/** * auto-recall hook (v3): injects relevant memories + persona into agent context * before the agent starts processing. * * - Searches L1 memories using configurable strategy (keyword / embedding / hybrid) * - keyword: FTS5 BM25 (requires FTS5; returns empty if unavailable) * - embedding: VectorStore cosine similarity * - hybrid: keyword + embedding merged with RRF * - L3 persona injection * - L2 scene navigation (full injection, LLM decides relevance) */ import fs from "node:fs/promises"; import path from "node:path"; import type { MemoryTdaiConfig } from "../../config.js"; import { readSceneIndex } from "../scene/scene-index.js"; import { generateSceneNavigation, stripSceneNavigation } from "../scene/scene-navigation.js"; import type { MemoryRecord } from "../record/l1-reader.js"; import type { IMemoryStore, L1SearchResult, L1FtsResult } from "../store/types.js"; import { buildFtsQuery } from "../store/sqlite.js"; import type { EmbeddingService, EmbeddingCallOptions } from "../store/embedding.js"; import { sanitizeText } from "../../utils/sanitize.js"; const TAG = "[memory-tdai] [recall]"; const RECALL_TRUNCATION_SUFFIX = "…(已截断;可用 tdai_memory_search 或 tdai_conversation_search 查看详情)"; const MIN_TRUNCATED_RECALL_LINE_CHARS = 40; const RECALL_LINE_SEPARATOR = "\n"; /** * Memory tools usage guide — injected at the end of memory context so the * main agent knows how to actively retrieve deeper information. */ const MEMORY_TOOLS_GUIDE = ` ## 记忆工具调用指南 当上方注入的记忆片段不足以回答用户问题时,可主动调用以下工具获取更多信息: - **tdai_memory_search**:搜索结构化记忆(L1),适用于回忆用户偏好、历史事件节点、规则等关键信息。 - **tdai_conversation_search**:搜索原始对话(L0),适用于查找具体消息原文、时间线、上下文细节;也可用于补充或校验 memory_search 的结果。 - **read_file**(Scene Navigation 中的路径):当已定位到相关情境,且需要该场景的完整画像、事件经过或阶段结论时使用。 ### ⚠️ 调用次数限制 每轮对话中,tdai_memory_search 和 tdai_conversation_search **合计最多调用 3 次**。 - 首次搜索无结果时,可换关键词或换工具重试,但总调用次数不要超过 3 次。 - 若 3 次搜索后仍无结果,说明该信息不在记忆中,请直接根据已有信息回复用户,不要继续搜索。 ` interface Logger { debug?: (message: string) => void; info: (message: string) => void; warn: (message: string) => void; error: (message: string) => void; } /** A single recalled L1 memory with its search score and type. */ export interface RecalledMemory { content: string; score: number; type: string; } export interface RecallResult { /** L1 relevant memories — prepended to user prompt text (dynamic, per-turn) */ prependContext?: string; /** Stable recall context appended to system prompt (persona, scene nav, tools guide — cacheable) */ appendSystemContext?: string; // ── Metric payload (for pendingRecallCache in index.ts) ── /** L1 memories that were recalled (with scores), for metric reporting */ recalledL1Memories?: RecalledMemory[]; /** L3 Persona raw content loaded during recall (null if none) */ recalledL3Persona?: string | null; /** Effective search strategy used */ recallStrategy?: string; } export async function performAutoRecall(params: { userText: string; actorId: string; sessionKey: string; cfg: MemoryTdaiConfig; pluginDataDir: string; logger?: Logger; vectorStore?: IMemoryStore; embeddingService?: EmbeddingService; }): Promise { const { cfg, logger } = params; const timeoutMs = cfg.recall.timeoutMs ?? 5000; let timer: ReturnType | undefined; return Promise.race([ performAutoRecallInner(params).finally(() => { if (timer) clearTimeout(timer); }), new Promise((resolve) => { timer = setTimeout(() => { logger?.warn?.( `${TAG} ⚠️ Recall timed out after ${timeoutMs}ms — skipping memory injection to avoid blocking the user`, ); resolve(undefined); }, timeoutMs); }), ]); } async function performAutoRecallInner(params: { userText: string; actorId: string; sessionKey: string; cfg: MemoryTdaiConfig; pluginDataDir: string; logger?: Logger; vectorStore?: IMemoryStore; embeddingService?: EmbeddingService; }): Promise { const { userText, cfg, pluginDataDir, logger, vectorStore, embeddingService } = params; const tRecallStart = performance.now(); // Search relevant memories (L1 layer) — skip only when userText is empty/undefined const tSearchStart = performance.now(); let memoryLines: string[] = []; let effectiveStrategy = "skipped"; let recalledL1Memories: RecalledMemory[] = []; let searchTiming: SearchTiming = { ftsMs: 0, embeddingMs: 0, ftsHits: 0, embeddingHits: 0 }; if (!userText || userText.length === 0) { logger?.debug?.(`${TAG} User text empty/undefined, skipping memory search (persona/scene still injected)`); } else { effectiveStrategy = cfg.recall.strategy ?? "hybrid"; const searchResult = await searchMemories(userText, pluginDataDir, cfg, logger, effectiveStrategy as "keyword" | "embedding" | "hybrid", vectorStore, embeddingService); memoryLines = searchResult.lines; searchTiming = searchResult.timing; memoryLines = applyRecallBudget(memoryLines, cfg.recall, logger); // Extract structured RecalledMemory from formatted lines for metric reporting recalledL1Memories = memoryLines.map((line) => { const match = line.match(/^-\s+\[([^\]]+)\]\s+(.+?)(?:\s*\(活动时间:.*\))?$/); if (match) { const tag = match[1]; const content = match[2].trim(); const typePart = tag.includes("|") ? tag.split("|")[0] : tag; return { content, score: 0, type: typePart }; } return { content: line, score: 0, type: "unknown" }; }); } const tSearchEnd = performance.now(); // Read persona (L3 layer) const tPersonaStart = performance.now(); let personaContent: string | undefined; try { const personaPath = path.join(pluginDataDir, "persona.md"); const raw = await fs.readFile(personaPath, "utf-8"); personaContent = stripSceneNavigation(raw).trim(); if (!personaContent) personaContent = undefined; logger?.debug?.(`${TAG} Persona loaded: ${personaContent ? `${personaContent.length} chars` : "empty"}`); } catch { logger?.debug?.(`${TAG} No persona file found (expected for new users)`); } const tPersonaEnd = performance.now(); // Load full scene navigation (L2 layer) const tSceneStart = performance.now(); let sceneNavigation: string | undefined; try { const sceneIndex = await readSceneIndex(pluginDataDir); if (sceneIndex.length > 0) { sceneNavigation = generateSceneNavigation(sceneIndex, pluginDataDir); logger?.debug?.(`${TAG} Scene navigation generated: ${sceneIndex.length} scenes`); } } catch { logger?.debug?.(`${TAG} No scene index found`); } const tSceneEnd = performance.now(); if (memoryLines.length === 0 && !personaContent && !sceneNavigation) { const totalMs = performance.now() - tRecallStart; logger?.info( `${TAG} ⏱ Recall timing: total=${totalMs.toFixed(0)}ms, ` + `search=${(tSearchEnd - tSearchStart).toFixed(0)}ms(strategy=${effectiveStrategy},hits=${memoryLines.length},` + `fts=${searchTiming.ftsMs.toFixed(0)}ms/${searchTiming.ftsHits}hits,` + `vec=${searchTiming.embeddingMs.toFixed(0)}ms/${searchTiming.embeddingHits}hits), ` + `persona=${(tPersonaEnd - tPersonaStart).toFixed(0)}ms, ` + `scene=${(tSceneEnd - tSceneStart).toFixed(0)}ms — no context to inject`, ); logger?.debug?.(`${TAG} No memories/persona/scenes to inject`); return undefined; } // Split recall context into stable and dynamic parts to optimize prompt caching. // // appendSystemContext (system prompt end — stable, cacheable): // persona, scene navigation, memory tools guide // These change infrequently; when content is identical across turns, // providers with prompt caching (Anthropic/OpenAI) can cache this region. // // prependContext (user prompt prefix — dynamic, per-turn): // L1 relevant memories — different every turn, moved out of system prompt // so it doesn't bust the system prompt cache. const stableParts: string[] = []; if (personaContent) { stableParts.push(`\n${personaContent}\n`); } if (sceneNavigation) { stableParts.push(`\n${sceneNavigation}\n`); } // Dynamic part: L1 relevant memories (changes every turn) → prependContext (user prompt) let prependContext: string | undefined; if (memoryLines.length > 0) { prependContext = `\n以下是当前对话召回的相关记忆,不代表当前任务进程,仅作为参考:\n\n${memoryLines.join(RECALL_LINE_SEPARATOR)}\n`; } // Append memory tools usage guide to the stable part so the agent knows // how to actively retrieve deeper context when the injected snippets // are not enough. This is static content and benefits from caching. if (stableParts.length > 0 || prependContext) { stableParts.push(MEMORY_TOOLS_GUIDE); } const appendSystemContext = stableParts.length > 0 ? stableParts.join("\n\n") : undefined; const totalMs = performance.now() - tRecallStart; logger?.info( `${TAG} ⏱ Recall timing: total=${totalMs.toFixed(0)}ms, ` + `search=${(tSearchEnd - tSearchStart).toFixed(0)}ms(strategy=${effectiveStrategy},hits=${memoryLines.length},` + `fts=${searchTiming.ftsMs.toFixed(0)}ms/${searchTiming.ftsHits}hits,` + `vec=${searchTiming.embeddingMs.toFixed(0)}ms/${searchTiming.embeddingHits}hits), ` + `persona=${(tPersonaEnd - tPersonaStart).toFixed(0)}ms(${personaContent ? `${personaContent.length}chars` : "none"}), ` + `scene=${(tSceneEnd - tSceneStart).toFixed(0)}ms(${sceneNavigation ? "loaded" : "none"})`, ); if (!appendSystemContext && !prependContext) { return undefined; } return { prependContext, appendSystemContext, recalledL1Memories, recalledL3Persona: personaContent ?? null, recallStrategy: effectiveStrategy, }; } // ============================ // Multi-strategy search dispatcher // ============================ interface ScoredRecord { record: MemoryRecord; score: number; } /** Timing breakdown from memory search */ interface SearchTiming { ftsMs: number; embeddingMs: number; ftsHits: number; embeddingHits: number; } interface SearchResult { lines: string[]; timing: SearchTiming; } /** * Search memories and return both formatted lines and structured details. * * This is a thin wrapper around `searchMemories` that also captures * the recalled memory metadata for metric reporting (agent_turn event). * It parses the returned formatted lines to extract type/content info. */ async function searchMemoriesWithDetails( userText: string, pluginDataDir: string, cfg: MemoryTdaiConfig, logger: Logger | undefined, strategy: "keyword" | "embedding" | "hybrid", vectorStore?: IMemoryStore, embeddingService?: EmbeddingService, ): Promise<{ lines: string[]; memories: RecalledMemory[]; timing: SearchTiming }> { const result = await searchMemories(userText, pluginDataDir, cfg, logger, strategy, vectorStore, embeddingService); // Extract structured data from formatted memory lines. // Format: "- [type|scene] content (活动时间: ...)" or "- [type] content" const memories: RecalledMemory[] = result.lines.map((line) => { const match = line.match(/^-\s+\[([^\]]+)\]\s+(.+?)(?:\s*\(活动时间:.*\))?$/); if (match) { const tag = match[1]; const content = match[2].trim(); const typePart = tag.includes("|") ? tag.split("|")[0] : tag; return { content, score: 0, type: typePart }; } return { content: line, score: 0, type: "unknown" }; }); return { lines: result.lines, memories, timing: result.timing }; } /** * Search memories using the configured strategy. * * - "keyword": JSONL keyword-based (Jaccard similarity) — no embedding needed * - "embedding": VectorStore cosine similarity — requires vectorStore + embeddingService * - "hybrid": merge both keyword and embedding results with RRF (Reciprocal Rank Fusion) * * Falls back to keyword if embedding resources are unavailable. */ async function searchMemories( userText: string, pluginDataDir: string, cfg: MemoryTdaiConfig, logger: Logger | undefined, strategy: "keyword" | "embedding" | "hybrid", vectorStore?: IMemoryStore, embeddingService?: EmbeddingService, ): Promise { const emptyResult: SearchResult = { lines: [], timing: { ftsMs: 0, embeddingMs: 0, ftsHits: 0, embeddingHits: 0 } }; // Strip gateway-injected inbound metadata (Sender, timestamps, media markers, // base64 image data, etc.) so FTS / embedding queries are based on pure user intent. const cleanText = sanitizeText(userText); if (cleanText.length < 2) { logger?.debug?.(`${TAG} Query too short for memory search (raw=${userText.length}, clean=${cleanText.length})`); return emptyResult; } if (cleanText.length !== userText.length) { logger?.debug?.( `${TAG} userText sanitized: ${userText.length} → ${cleanText.length} chars`, ); } const maxResults = cfg.recall.maxResults ?? 5; const threshold = cfg.recall.scoreThreshold ?? 0.3; const embeddingAvailable = !!vectorStore && !!embeddingService; logger?.debug?.( `${TAG} [searchMemories] strategy=${strategy}, embeddingAvailable=${embeddingAvailable}, ` + `vectorStore=${vectorStore ? "available" : "UNAVAILABLE"}, ` + `embeddingService=${embeddingService ? "available" : "UNAVAILABLE"}, ` + `maxResults=${maxResults}, threshold=${threshold}`, ); // Determine effective strategy (fall back to keyword if embedding not available) let effectiveStrategy = strategy; if ((strategy === "embedding" || strategy === "hybrid") && !embeddingAvailable) { logger?.warn?.( `${TAG} Strategy "${strategy}" requested but EmbeddingService not available, falling back to keyword`, ); effectiveStrategy = "keyword"; } logger?.debug?.(`${TAG} Search strategy: ${effectiveStrategy} (configured: ${strategy})`); // Resolve per-call embedding timeout for recall path. // Falls back to global embedding.timeoutMs when recallTimeoutMs is not configured. const recallEmbeddingTimeoutMs = cfg.embedding?.recallTimeoutMs ?? cfg.embedding?.timeoutMs; const embeddingCallOpts: EmbeddingCallOptions = { timeoutMs: recallEmbeddingTimeoutMs }; try { if (effectiveStrategy === "keyword") { const tFts = performance.now(); const lines = await searchByKeyword(cleanText, pluginDataDir, maxResults, threshold, logger, vectorStore); return { lines, timing: { ftsMs: performance.now() - tFts, embeddingMs: 0, ftsHits: lines.length, embeddingHits: 0 } }; } if (effectiveStrategy === "embedding") { const tEmb = performance.now(); const lines = await searchByEmbedding(cleanText, maxResults, threshold, vectorStore!, embeddingService!, logger, embeddingCallOpts); return { lines, timing: { ftsMs: 0, embeddingMs: performance.now() - tEmb, ftsHits: 0, embeddingHits: lines.length } }; } // Hybrid: if the store natively supports hybrid search (e.g. TCVDB does // server-side dense + sparse + RRF in a single API call), short-circuit // to avoid a redundant second HTTP request and a wasted local embed(). if (vectorStore?.getCapabilities().nativeHybridSearch) { const tNative = performance.now(); const results = await vectorStore.searchL1Hybrid({ query: cleanText, topK: maxResults }); const nativeMs = performance.now() - tNative; logger?.debug?.(`${TAG} [hybrid-native] Single-call hybrid: ${results.length} results in ${nativeMs.toFixed(0)}ms`); const lines = results.map((r) => formatMemoryLine(vectorResultToFormatable(r))); return { lines, timing: { ftsMs: 0, embeddingMs: nativeMs, ftsHits: 0, embeddingHits: results.length } }; } // Fallback: run keyword + embedding in parallel, merge with client-side RRF (SQLite path) return await searchHybrid(cleanText, pluginDataDir, maxResults, threshold, vectorStore!, embeddingService!, logger, embeddingCallOpts); } catch (err) { logger?.warn?.(`${TAG} Memory search failed (strategy=${effectiveStrategy}): ${err instanceof Error ? err.message : String(err)}`); return emptyResult; } } // ============================ // Strategy: Keyword (FTS5 BM25, no in-memory fallback) // ============================ async function searchByKeyword( userText: string, _pluginDataDir: string, maxResults: number, threshold: number, logger?: Logger, vectorStore?: IMemoryStore, ): Promise { // Prefer FTS5 if available if (vectorStore?.isFtsAvailable()) { const ftsQuery = buildFtsQuery(userText); if (ftsQuery) { logger?.debug?.(`${TAG} [keyword-fts] Using FTS5 BM25 search: query="${ftsQuery}"`); const ftsResults = await vectorStore.searchL1Fts(ftsQuery, maxResults * 2); if (ftsResults.length > 0) { logger?.debug?.( `${TAG} [keyword-fts] FTS5 raw results (${ftsResults.length}): ` + ftsResults.map((r) => `id=${r.record_id} score=${r.score.toFixed(6)}`).join(", "), ); const filtered = ftsResults .filter((r) => r.score >= threshold) .slice(0, maxResults); if (filtered.length > 0) { logger?.debug?.(`${TAG} [keyword-fts] FTS5 found ${filtered.length} results (from ${ftsResults.length} raw, threshold=${threshold})`); return filtered.map((r) => formatMemoryLine(ftsResultToFormatable(r))); } // BM25 absolute scores are unreliable when the document set is very // small (e.g. 1–3 records) because IDF approaches 0. In that case, // trust FTS5's MATCH + rank ordering and return the top results anyway. if (ftsResults.length <= maxResults) { logger?.debug?.( `${TAG} [keyword-fts] All ${ftsResults.length} results below threshold=${threshold} ` + `but document set is small — returning all matched results`, ); return ftsResults.slice(0, maxResults).map((r) => formatMemoryLine(ftsResultToFormatable(r))); } logger?.debug?.(`${TAG} [keyword-fts] FTS5 returned 0 results above threshold (from ${ftsResults.length} raw)`); } } } // FTS5 not available or returned no results — skip in-memory fallback to avoid O(N) full scan logger?.debug?.(`${TAG} [keyword] FTS5 unavailable or no results, skipping keyword search`); return []; } // ============================ // Strategy: Embedding (VectorStore cosine) // ============================ async function searchByEmbedding( userText: string, maxResults: number, threshold: number, vectorStore: IMemoryStore, embeddingService: EmbeddingService, logger?: Logger, embeddingCallOpts?: EmbeddingCallOptions, ): Promise { logger?.debug?.( `${TAG} [embedding-search] START query="${userText.slice(0, 80)}...", maxResults=${maxResults}, threshold=${threshold}`, ); const queryEmbedding = await embeddingService.embed(userText, embeddingCallOpts); logger?.debug?.( `${TAG} [embedding-search] Query embedding OK: dims=${queryEmbedding.length}, ` + `norm=${Math.sqrt(Array.from(queryEmbedding).reduce((s, v) => s + v * v, 0)).toFixed(4)}, ` + `searching top-${maxResults * 2}...`, ); // Retrieve more candidates for subsequent filtering const vecResults: L1SearchResult[] = await vectorStore.searchL1Vector(queryEmbedding, maxResults * 2); if (vecResults.length === 0) { logger?.debug?.(`${TAG} [embedding-search] Returned 0 results`); return []; } logger?.debug?.(`${TAG} [embedding-search] Got ${vecResults.length} candidates, filtering by threshold=${threshold}`); for (const r of vecResults) { logger?.debug?.( `${TAG} [embedding-search] candidate id=${r.record_id}, score=${r.score.toFixed(4)}, ` + `type=${r.type}, content="${r.content.slice(0, 60)}..."`, ); } const filtered = vecResults .filter((r) => r.score >= threshold) .slice(0, maxResults); if (filtered.length > 0) { logger?.debug?.(`${TAG} [embedding-search] Found ${filtered.length} relevant memories above threshold (from ${vecResults.length} candidates)`); return filtered.map((r) => formatMemoryLine(vectorResultToFormatable(r))); } logger?.debug?.(`${TAG} [embedding-search] No results above threshold ${threshold}`); return []; } // ============================ // Strategy: Hybrid (Keyword + Embedding + RRF) // ============================ /** * Hybrid search: run keyword (FTS5) and embedding in parallel, merge with * Reciprocal Rank Fusion (RRF) to combine rank lists. * * RRF score for a record at rank r = 1 / (k + r), where k=60 is a constant. * If a record appears in both lists, its RRF scores are summed. * * If FTS5 is unavailable, the keyword side returns empty and RRF uses * embedding results only. */ async function searchHybrid( userText: string, _pluginDataDir: string, maxResults: number, _threshold: number, vectorStore: IMemoryStore, embeddingService: EmbeddingService, logger?: Logger, embeddingCallOpts?: EmbeddingCallOptions, ): Promise { // Run keyword and embedding searches in parallel const candidateK = maxResults * 3; // retrieve more for merging const [keywordResult, embeddingResult] = await Promise.all([ // Keyword search: FTS5 only (no in-memory fallback) (async () => { const tStart = performance.now(); try { // Try FTS5 first if (vectorStore.isFtsAvailable()) { const ftsQuery = buildFtsQuery(userText); if (ftsQuery) { const ftsResults = await vectorStore.searchL1Fts(ftsQuery, candidateK); if (ftsResults.length > 0) { logger?.debug?.(`${TAG} [hybrid-keyword-fts] FTS5 found ${ftsResults.length} candidates`); // Convert FtsSearchResult to ScoredRecord for RRF merge const records = ftsResults.map((r): ScoredRecord => ({ record: { id: r.record_id, content: r.content, type: r.type as MemoryRecord["type"], priority: r.priority, scene_name: r.scene_name, source_message_ids: [], metadata: r.metadata_json ? (() => { try { return JSON.parse(r.metadata_json); } catch { return {}; } })() : {}, timestamps: [r.timestamp_str].filter(Boolean), createdAt: "", updatedAt: "", sessionKey: r.session_key, sessionId: r.session_id, }, score: r.score, })); return { records, ms: performance.now() - tStart }; } } } // FTS5 not available or returned no results — skip in-memory fallback logger?.debug?.(`${TAG} [hybrid-keyword] FTS5 unavailable or no results, skipping keyword part`); return { records: [] as ScoredRecord[], ms: performance.now() - tStart }; } catch (err) { logger?.warn?.(`${TAG} Hybrid: keyword part failed: ${err instanceof Error ? err.message : String(err)}`); return { records: [] as ScoredRecord[], ms: performance.now() - tStart }; } })(), // Embedding search (async () => { const tStart = performance.now(); try { logger?.debug?.(`${TAG} [hybrid-embedding] Generating query embedding...`); const queryEmbedding = await embeddingService.embed(userText, embeddingCallOpts); logger?.debug?.( `${TAG} [hybrid-embedding] Embedding OK, dims=${queryEmbedding.length}, searching top-${candidateK}...`, ); const results = await vectorStore.searchL1Vector(queryEmbedding, candidateK, userText); logger?.debug?.(`${TAG} [hybrid-embedding] Got ${results.length} candidates`); return { results, ms: performance.now() - tStart }; } catch (err) { logger?.warn?.(`${TAG} Hybrid: embedding part failed: ${err instanceof Error ? err.message : String(err)}`); return { results: [] as L1SearchResult[], ms: performance.now() - tStart }; } })(), ]); const keywordResults = keywordResult.records; const embeddingResults = embeddingResult.results; const timing: SearchTiming = { ftsMs: keywordResult.ms, embeddingMs: embeddingResult.ms, ftsHits: keywordResults.length, embeddingHits: embeddingResults.length, }; if (keywordResults.length === 0 && embeddingResults.length === 0) { logger?.debug?.(`${TAG} Hybrid search: both strategies returned 0 results`); return { lines: [], timing }; } // RRF merge: k=60 is a standard constant from the RRF paper const RRF_K = 60; // Map: record_id → { rrfScore, formatable } const mergedMap = new Map(); // Process keyword results for (let rank = 0; rank < keywordResults.length; rank++) { const r = keywordResults[rank]; const id = r.record.id; const rrfScore = 1 / (RRF_K + rank + 1); const existing = mergedMap.get(id); if (existing) { existing.rrfScore += rrfScore; } else { mergedMap.set(id, { rrfScore, formatable: recordToFormatable(r.record) }); } } // Process embedding results for (let rank = 0; rank < embeddingResults.length; rank++) { const r = embeddingResults[rank]; const id = r.record_id; const rrfScore = 1 / (RRF_K + rank + 1); const existing = mergedMap.get(id); if (existing) { existing.rrfScore += rrfScore; } else { mergedMap.set(id, { rrfScore, formatable: vectorResultToFormatable(r) }); } } // Sort by combined RRF score and take top results const sorted = [...mergedMap.entries()] .sort((a, b) => b[1].rrfScore - a[1].rrfScore) .slice(0, maxResults); if (sorted.length > 0) { logger?.debug?.( `${TAG} Hybrid search found ${sorted.length} results ` + `(keyword=${keywordResults.length}, embedding=${embeddingResults.length})`, ); return { lines: sorted.map(([, { formatable }]) => formatMemoryLine(formatable)), timing }; } logger?.debug?.(`${TAG} Hybrid search: no results after merge`); return { lines: [], timing }; } // ============================ // Unified memory line formatter // ============================ /** * Format a single memory record into a rich natural-language line for prompt injection. * * Time semantics: * - timestamp (点时间): when the activity/event happened, e.g. "2025-03-01 mentioned something" * - activity_start_time / activity_end_time (段时间): activity time range, e.g. "trip from 2025-05-01 to 2025-05-10" * - All three time fields may be empty/undefined — handled gracefully. * * Output examples: * - [persona] 用户叫王小明,30岁,是一名软件工程师。 * - [episodic|旅行计划] 用户计划五月去日本旅行。(活动时间: 2025-05-01 ~ 2025-05-10) * - [episodic] 用户今天加班到很晚。(活动时间: 2025-03-01) * - [instruction] 用户要求回答时使用中文,保持简洁。 */ interface FormatableMemory { type: string; content: string; scene_name?: string; /** Activity time range start (段时间 start), may be empty */ activity_start_time?: string; /** Activity time range end (段时间 end), may be empty */ activity_end_time?: string; /** Activity point-in-time (点时间: when it happened), may be empty */ timestamp?: string; } function formatMemoryLine(m: FormatableMemory): string { // 1. Type tag + optional scene name const tag = m.scene_name ? `${m.type}|${m.scene_name}` : m.type; // 2. Content (core) let line = `- [${tag}] ${m.content}`; // 3. Time info — prefer activity_start/end range; fall back to timestamp as point-in-time const start = formatTimestamp(m.activity_start_time); const end = formatTimestamp(m.activity_end_time); const point = formatTimestamp(m.timestamp); if (start && end) { // 段时间: both start and end line += ` (活动时间: ${start} ~ ${end})`; } else if (start) { // 段时间: only start line += ` (活动时间: ${start}起)`; } else if (end) { // 段时间: only end line += ` (活动时间: 至${end})`; } else if (point) { // 点时间: single timestamp line += ` (活动时间: ${point})`; } // If all three are empty → no time info appended (graceful) return line; } function applyRecallBudget( lines: string[], recall: MemoryTdaiConfig["recall"], logger?: Logger, ): string[] { const maxCharsPerMemory = normalizeBudgetLimit(recall.maxCharsPerMemory); const maxTotalRecallChars = normalizeBudgetLimit(recall.maxTotalRecallChars); if (!maxCharsPerMemory && !maxTotalRecallChars) { return lines; } const budgeted: string[] = []; let usedChars = 0; let truncatedCount = 0; let droppedCount = 0; for (let i = 0; i < lines.length; i++) { const line = lines[i]; const perMemoryBounded = maxCharsPerMemory ? truncateRecallLine(line, maxCharsPerMemory) : line; let wasTruncated = perMemoryBounded !== line; if (!maxTotalRecallChars) { budgeted.push(perMemoryBounded); if (wasTruncated) truncatedCount++; continue; } const separatorChars = budgeted.length > 0 ? RECALL_LINE_SEPARATOR.length : 0; const remainingChars = maxTotalRecallChars - usedChars - separatorChars; if (remainingChars <= 0) { droppedCount += lines.length - i; break; } if (perMemoryBounded.length > remainingChars) { const canFit = remainingChars >= MIN_TRUNCATED_RECALL_LINE_CHARS; if (canFit) { const totalBounded = truncateRecallLine(perMemoryBounded, remainingChars); budgeted.push(totalBounded); usedChars += separatorChars + totalBounded.length; wasTruncated ||= totalBounded !== perMemoryBounded; if (wasTruncated) truncatedCount++; } droppedCount += lines.length - i - (canFit ? 1 : 0); break; } budgeted.push(perMemoryBounded); usedChars += separatorChars + perMemoryBounded.length; if (wasTruncated) truncatedCount++; } if (truncatedCount > 0 || droppedCount > 0) { logger?.debug?.( `${TAG} Recall budget applied: input=${lines.length}, output=${budgeted.length}, ` + `truncated=${truncatedCount}, dropped=${droppedCount}, ` + `maxCharsPerMemory=${recall.maxCharsPerMemory}, maxTotalRecallChars=${recall.maxTotalRecallChars}`, ); } return budgeted; } function normalizeBudgetLimit(value: number | undefined): number | undefined { if (value == null || !Number.isFinite(value) || value <= 0) return undefined; return Math.floor(value); } function truncateRecallLine(line: string, maxChars: number): string { if (line.length <= maxChars) return line; if (maxChars <= RECALL_TRUNCATION_SUFFIX.length) { return line.slice(0, maxChars); } return `${line.slice(0, maxChars - RECALL_TRUNCATION_SUFFIX.length).trimEnd()}${RECALL_TRUNCATION_SUFFIX}`; } /** * Format an ISO 8601 timestamp to a concise date or datetime string. * - If the time part is 00:00:00 → show date only (e.g. "2025-03-01") * - Otherwise → show date + time (e.g. "2025-03-01 14:30") * - Returns undefined for empty/invalid inputs. */ function formatTimestamp(ts: string | undefined): string | undefined { if (!ts) return undefined; // Try to parse ISO format: "2025-03-01T14:30:00.000Z" or "2025-03-01" const match = ts.match(/^(\d{4}-\d{2}-\d{2})(?:T(\d{2}:\d{2})(?::\d{2})?)?/); if (!match) return undefined; const datePart = match[1]; const timePart = match[2]; if (!timePart || timePart === "00:00") { return datePart; } return `${datePart} ${timePart}`; } /** * Build a FormatableMemory from a full MemoryRecord (keyword search path). * Handles empty metadata, empty timestamps array gracefully. */ function recordToFormatable(record: MemoryRecord): FormatableMemory { const meta = record.metadata as { activity_start_time?: string; activity_end_time?: string } | undefined; return { type: record.type, content: record.content, scene_name: record.scene_name || undefined, activity_start_time: meta?.activity_start_time || undefined, activity_end_time: meta?.activity_end_time || undefined, timestamp: (record.timestamps && record.timestamps.length > 0) ? record.timestamps[0] : undefined, }; } /** * Build a FormatableMemory from a VectorSearchResult (embedding search path). * Handles empty/invalid metadata_json, empty timestamp_str gracefully. */ function vectorResultToFormatable(r: L1SearchResult): FormatableMemory { let activityStart: string | undefined; let activityEnd: string | undefined; if (r.metadata_json && r.metadata_json !== "{}") { try { const meta = typeof r.metadata_json === "string" ? JSON.parse(r.metadata_json) : r.metadata_json; activityStart = meta?.activity_start_time || undefined; activityEnd = meta?.activity_end_time || undefined; } catch { /* ignore parse errors — treat as no metadata */ } } return { type: r.type, content: r.content, scene_name: r.scene_name || undefined, activity_start_time: activityStart, activity_end_time: activityEnd, timestamp: r.timestamp_str || undefined, }; } /** * Build a FormatableMemory from an FtsSearchResult (FTS5 keyword search path). * Handles empty/invalid metadata_json, empty timestamp_str gracefully. */ function ftsResultToFormatable(r: L1FtsResult): FormatableMemory { let activityStart: string | undefined; let activityEnd: string | undefined; if (r.metadata_json && r.metadata_json !== "{}") { try { const meta = typeof r.metadata_json === "string" ? JSON.parse(r.metadata_json) : r.metadata_json; activityStart = meta?.activity_start_time || undefined; activityEnd = meta?.activity_end_time || undefined; } catch { /* ignore parse errors — treat as no metadata */ } } return { type: r.type, content: r.content, scene_name: r.scene_name || undefined, activity_start_time: activityStart, activity_end_time: activityEnd, timestamp: r.timestamp_str || undefined, }; }