/** * Search Query Functions * * Programmatic API for searching content using FTS5. */ import type { Kysely } from "kysely"; import { sql } from "kysely"; import type { Database } from "../database/types.js"; import { validateIdentifier } from "../database/validate.js"; import { getDb } from "../loader.js"; import { FTSManager } from "./fts-manager.js"; import type { SearchOptions, CollectionSearchOptions, SearchResult, SearchResponse, SuggestOptions, Suggestion, SearchStats, } from "./types.js"; /** Pattern to split on whitespace for query term extraction */ const WHITESPACE_SPLIT_PATTERN = /\s+/; const FTS_OPERATORS_PATTERN = /\b(AND|OR|NOT|NEAR)\b/i; const DOUBLE_QUOTE_PATTERN = /"/g; /** * Detect FTS5 query syntax errors. Match specifically on the SQLite FTS5 * error fingerprints rather than a broad "fts5" / "syntax error" filter * (which would also swallow internal table-corruption errors). The two * fingerprints we care about are: * * - "fts5: syntax error near …" — unbalanced quotes, stray operators, * other malformed user input * - "unknown special query: …" — bare special tokens like `^*` that * parse but don't resolve to a real FTS5 directive */ function isFts5SyntaxError(error: unknown): boolean { if (!(error instanceof Error)) return false; const message = error.message.toLowerCase(); return message.includes("fts5: syntax error") || message.includes("unknown special query"); } /** * Search across multiple collections * * Public API that auto-injects the database. * * @param query - Search query (FTS5 syntax supported) * @param options - Search options * @returns Search results with pagination * * @example * ```typescript * import { search } from "emdash"; * * const results = await search("hello world", { * collections: ["posts", "pages"], * limit: 20 * }); * ``` */ export async function search(query: string, options: SearchOptions = {}): Promise { const db = await getDb(); return searchWithDb(db, query, options); } /** * Search across multiple collections (with explicit db) * * @internal Use `search()` in templates. This variant is for admin routes * that already have a database handle. * * @param db - Kysely database instance * @param query - Search query (FTS5 syntax supported) * @param options - Search options * @returns Search results with pagination */ export async function searchWithDb( db: Kysely, query: string, options: SearchOptions = {}, ): Promise { const ftsManager = new FTSManager(db); const limit = options.limit ?? 20; const status = options.status ?? "published"; // Get searchable collections let collections = options.collections; if (!collections || collections.length === 0) { collections = await getSearchableCollections(db); } if (collections.length === 0) { return { items: [] }; } // Search each collection and merge results const allResults: SearchResult[] = []; for (const collection of collections) { const config = await ftsManager.getSearchConfig(collection); if (!config?.enabled) { continue; } const collectionResults = await searchSingleCollection( db, collection, query, { status, locale: options.locale, limit: limit * 2, // Get extra for merging }, config.weights, ); allResults.push(...collectionResults); } // Sort by score descending allResults.sort((a, b) => b.score - a.score); // Apply limit const items = allResults.slice(0, limit); return { items }; } /** * Search within a single collection * * @param db - Kysely database instance * @param collection - Collection slug * @param query - Search query (FTS5 syntax supported) * @param options - Search options * @returns Search results with pagination * * @example * ```typescript * const results = await searchCollection(db, "posts", "hello world", { * limit: 10 * }); * ``` */ export async function searchCollection( db: Kysely, collection: string, query: string, options: CollectionSearchOptions = {}, ): Promise { const ftsManager = new FTSManager(db); const config = await ftsManager.getSearchConfig(collection); if (!config?.enabled) { return { items: [] }; } const items = await searchSingleCollection(db, collection, query, options, config.weights); return { items }; } /** * Internal function to search a single collection */ async function searchSingleCollection( db: Kysely, collection: string, query: string, options: CollectionSearchOptions, weights?: Record, ): Promise { // Validate before any raw SQL interpolation validateIdentifier(collection, "collection slug"); const ftsManager = new FTSManager(db); const ftsTable = ftsManager.getFtsTableName(collection); const contentTable = ftsManager.getContentTableName(collection); const limit = options.limit ?? 20; const status = options.status ?? "published"; const locale = options.locale; // Check if FTS table exists if (!(await ftsManager.ftsTableExists(collection))) { return []; } // Escape the query for FTS5 const escapedQuery = escapeQuery(query); if (!escapedQuery) { return []; } // Get searchable fields for snippet generation const searchableFields = await ftsManager.getSearchableFields(collection); // Build weight string for bm25 if weights provided // Format: bm25(table, weight1, weight2, ...) // First two weights are for 'id' and 'locale' columns (UNINDEXED, so 0) let bm25Args = ""; if (weights && searchableFields.length > 0) { const weightValues = ["0", "0"]; // id column, locale column for (const field of searchableFields) { weightValues.push(String(weights[field] ?? 1)); } bm25Args = weightValues.join(", "); } // Build and execute the search query // Using raw SQL because Kysely doesn't have FTS5 support const bm25Expr = bm25Args ? `bm25("${ftsTable}", ${bm25Args})` : `bm25("${ftsTable}")`; // Snippet column index is 2 (after id=0, locale=1, first searchable field=2) let results; try { results = await sql<{ id: string; slug: string | null; locale: string; title: string | null; snippet: string | null; score: number; }>` SELECT c.id, c.slug, c.locale, c.title, snippet("${sql.raw(ftsTable)}", 2, '', '', '...', 32) as snippet, ${sql.raw(bm25Expr)} as score FROM "${sql.raw(ftsTable)}" f JOIN "${sql.raw(contentTable)}" c ON f.id = c.id WHERE "${sql.raw(ftsTable)}" MATCH ${escapedQuery} AND c.status = ${status} AND c.deleted_at IS NULL ${locale ? sql`AND c.locale = ${locale}` : sql``} ORDER BY score LIMIT ${limit} `.execute(db); } catch (error) { // FTS5 returns syntax errors for queries with unbalanced quotes, // stray operators, or other malformed input. Treat these as // "no matches" so the user gets an empty result rather than an // internals-leaking error. Other errors (table missing, IO) still // propagate. Intentionally not logged: any anonymous client can // trigger this path, and the underlying error message embeds the // raw query, so logging would be both noisy and a log-injection // vector. if (isFts5SyntaxError(error)) { return []; } throw error; } return results.rows.map((row) => ({ collection, id: row.id, slug: row.slug, locale: row.locale, title: row.title ?? undefined, // SQLite's snippet() returns NULL when the targeted column is // NULL for that row — even if the row matched via a different // searchable column. Skip sanitization in that case so we don't // throw on `null.replace`. The SearchResult.snippet field is // already optional, so omitting it is the documented contract. snippet: row.snippet === null ? undefined : sanitizeSnippet(row.snippet), score: Math.abs(row.score), // bm25 returns negative scores })); } // Module-scope regexes so the engine doesn't recompile per call — // snippet sanitization runs on every search result. const SNIPPET_AMP_RE = /&/g; const SNIPPET_LT_RE = //g; const SNIPPET_QUOT_RE = /"/g; const SNIPPET_APOS_RE = /'/g; /** * Make an FTS5 snippet safe to render with `set:html` / `innerHTML`. * * SQLite's `snippet()` function splices literal `` and `` * markers around matched terms but does not escape the surrounding * source text. Posts that legitimately contain `<`, `>`, `&`, `"` or * `'` would render as broken markup, and a `