import type { ISdk } from "iii-sdk"; import type { MemoryProvider, QueryExpansion } from "../types.js"; import { logger } from "../logger.js"; const QUERY_EXPANSION_SYSTEM = `You are a query expansion engine for a memory retrieval system. Given a user query, generate diverse reformulations to maximize recall. Output EXACTLY this XML: semantically diverse rephrasing 1 semantically diverse rephrasing 2 semantically diverse rephrasing 3 time-concretized version if applicable extracted entity name 1 extracted entity name 2 Rules: - Generate 3-5 reformulations capturing different interpretations - Include paraphrases, domain-specific restatements, and abstract/concrete variants - Extract any named entities (people, files, projects, libraries, concepts) - If the query mentions time ("last week", "recently"), generate temporal concretizations - Each reformulation should capture a distinct facet of intent - Keep reformulations concise (under 100 chars each)`; function parseExpansionXml(xml: string): QueryExpansion | null { const reformulations: string[] = []; const queryRegex = /[\s\S]*?<\/reformulations>/; const reformBlock = xml.match(queryRegex); if (reformBlock) { const qRegex = /([^<]+)<\/query>/g; let match; while ((match = qRegex.exec(reformBlock[0])) !== null) { reformulations.push(match[1].trim()); } } const temporalConcretizations: string[] = []; const tempBlock = xml.match(/[\s\S]*?<\/temporal>/); if (tempBlock) { const qRegex = /([^<]+)<\/query>/g; let match; while ((match = qRegex.exec(tempBlock[0])) !== null) { temporalConcretizations.push(match[1].trim()); } } const entityExtractions: string[] = []; const entityRegex = /([^<]+)<\/entity>/g; let match; while ((match = entityRegex.exec(xml)) !== null) { entityExtractions.push(match[1].trim()); } return { original: "", reformulations, temporalConcretizations, entityExtractions, }; } export function registerQueryExpansionFunction( sdk: ISdk, provider: MemoryProvider, ): void { sdk.registerFunction("mem::expand-query", async (data: { query: string; maxReformulations?: number } | undefined) => { if (!data || typeof data.query !== "string" || !data.query.trim()) { logger.warn("Invalid expand-query payload"); return { success: false, error: "query must be a non-empty string" }; } const rawMaxR = Number(data.maxReformulations); const maxR = Number.isFinite(rawMaxR) ? Math.max(1, Math.min(10, Math.floor(rawMaxR))) : 5; const query = data.query.trim(); try { const response = await provider.compress( QUERY_EXPANSION_SYSTEM, `Expand this query for memory retrieval:\n\n"${query}"`, ); const parsed = parseExpansionXml(response); if (!parsed) { logger.warn("Failed to parse query expansion"); return { success: true, expansion: { original: query, reformulations: [], temporalConcretizations: [], entityExtractions: [], }, }; } parsed.original = query; parsed.reformulations = parsed.reformulations.slice(0, maxR); logger.info("Query expanded", { original: query, reformulations: parsed.reformulations.length, entities: parsed.entityExtractions.length, }); return { success: true, expansion: parsed }; } catch (err) { const msg = err instanceof Error ? err.message : String(err); logger.error("Query expansion failed", { error: msg }); return { success: true, expansion: { original: query, reformulations: [], temporalConcretizations: [], entityExtractions: [], }, }; } }, ); } export function extractEntitiesFromQuery(query: string): string[] { const entities: string[] = []; const quoted = query.match(/"([^"]+)"/g); if (quoted) { for (const q of quoted) { entities.push(q.replace(/"/g, "")); } } const capitalized = query.match(/\b[A-Z][a-zA-Z0-9_.-]+\b/g); if (capitalized) { const stopWords = new Set([ "The", "This", "That", "What", "When", "Where", "How", "Why", "Who", "Which", "Did", "Does", "Do", "Is", "Are", "Was", "Were", "Has", "Have", "Had", "Can", "Could", "Would", "Should", "Will", "May", "Might", "If", "And", "But", "Or", "Not", "For", "From", "With", "About", "After", "Before", "Between", ]); for (const c of capitalized) { if (!stopWords.has(c)) entities.push(c); } } return [...new Set(entities)]; }