/** * Transcript Ingester * * Walks ~/.claude/projects/**‌/*.jsonl, parses Claude Code conversation * transcripts, and indexes human + assistant prose into Gaia's FTS5 store. * * Design: * - Streams JSONL line-by-line (never loads full file into memory) * - Incremental: tracks last-ingested byte offset per file in SQLite * - Dedupes on (file_path, message_index) to survive re-runs * - Skips noise: tool_result, system messages, sidechain, pure tool_use-only turns */ import Database from 'better-sqlite3'; import type { MemoriaStorage } from './storage.js'; export interface IngestOptions { /** Root directory to walk. Defaults to ~/.claude/projects */ rootDir?: string; /** Substring to match against project slug (e.g. "foundation") */ projectFilter?: string; /** Only ingest messages newer than this ISO timestamp */ since?: string; /** Walk and report without writing to DB */ dryRun?: boolean; } export interface IngestStats { filesFound: number; filesSkipped: number; messagesIngested: number; messagesSkipped: number; bytesProcessed: number; projectSlugs: string[]; dateRange: { earliest: string | null; latest: string | null; }; parseErrors: number; parseErrorSamples: string[]; dryRun: boolean; } /** A single parsed message ready for indexing */ export interface ParsedMessage { sessionId: string; messageIndex: number; timestamp: number; role: 'user' | 'assistant'; text: string; projectSlug: string; cwd: string | null; filePath: string; lineOffset: number; } export declare const INGEST_STATE_DDL = "\n CREATE TABLE IF NOT EXISTS transcript_ingest_state (\n file_path TEXT NOT NULL PRIMARY KEY,\n last_byte_offset INTEGER NOT NULL DEFAULT 0,\n last_ingested_at INTEGER NOT NULL,\n messages_ingested INTEGER NOT NULL DEFAULT 0\n );\n\n CREATE TABLE IF NOT EXISTS transcript_dedup (\n file_path TEXT NOT NULL,\n message_index INTEGER NOT NULL,\n memory_id TEXT NOT NULL,\n PRIMARY KEY (file_path, message_index)\n );\n\n CREATE INDEX IF NOT EXISTS idx_dedup_file ON transcript_dedup(file_path);\n"; export declare class TranscriptIngester { private db; private memoria; constructor(memoriaDb: Database.Database, memoria: MemoriaStorage); private ensureSchema; ingest(options?: IngestOptions): Promise; private collectJsonlFiles; private processFile; private streamFile; private updateIngestState; private updateDateRange; /** Returns ingest state rows for testing incremental behavior */ getIngestState(): Array<{ file_path: string; last_byte_offset: number; messages_ingested: number; }>; /** Returns dedup row count for a given file */ getDedupCount(filePath: string): number; /** Nano-id helper (re-exported so tests can use it) */ static generateId(): string; } /** * MemoriaStorage keeps its `db` private. We expose it through this module * by accepting the db instance directly (tests pass an in-memory DB; * the MCP tool passes the real db through getInternalDb). */ export declare function createIngester(db: Database.Database, memoria: MemoriaStorage): TranscriptIngester; //# sourceMappingURL=transcript-ingester.d.ts.map