/** * Cursor Local SQLite Parser * * Parses Cursor IDE chat history from the local SQLite database to extract * token usage and timestamps. Read-only. Does not push to any backend. * * Storage: * macOS: ~/Library/Application Support/Cursor/User/globalStorage/state.vscdb * Linux: ~/.config/Cursor/User/globalStorage/state.vscdb * Windows: %APPDATA%/Cursor/User/globalStorage/state.vscdb * * Schema: * Table cursorDiskKV (key TEXT, value BLOB) * Conversations: composerData: * Messages: bubbleId:: * * Token data lives at $.tokenCount.inputTokens and $.tokenCount.outputTokens * on bubble rows. Model name at $.modelInfo.modelName. Server-side dedup id * at $.serverBubbleId. * * Timestamps (verified in Task #30 against a real state.vscdb): * - $.createdAt on bubbles is an ISO 8601 string (~56% coverage, all-or- * nothing per composer — likely added in a newer Cursor version). * - $.createdAt on composerData rows is a Unix milliseconds number (100% * coverage). Same field name, different type — parser handles both. * - $.lastUpdatedAt on composerData rows is Unix ms (~13% coverage). * - $.timingInfo.client* on bubbles is performance.now()-style relative * (seconds since Cursor process start), NOT absolute — never use it as * a wall-clock timestamp. * * Fallback ladder for per-session timestamps: prefer min/max of bubble * createdAt when present, otherwise use composerData.createdAt with optional * composerData.lastUpdatedAt as end time. Every session gets non-null * timestamps; the `timestampSource` / `timestampQuality` fields surface * whether the values are precise or approximate. * * Workspace metadata fields (workspaceHash/workspaceName) remain unverified * and return null. composerData.name is a candidate for workspaceName but * has not been confirmed yet. */ import type { TokenUsage } from "./transcript-parser.js"; /** * Single Cursor session in parser output. * * Shape is intentionally distinct from Claude/Codex SessionUsage because * Cursor's timestamp coverage is partial — provenance fields surface * whether startTime/endTime/dailyUsage came from per-bubble timestamps * (precise) or a composer-level fallback (approximate). * * workspaceHash / workspaceName remain null — composerData.name is a * candidate for workspaceName but was not verified in Task #30 and is * deferred to a later release. */ export interface CursorSessionUsage { sessionId: string; workspaceHash: string | null; workspaceName: string | null; model: string; tokens: TokenUsage; messageCount: number; filePath: string; startTime: string | null; endTime: string | null; timestampSource: "bubble" | "composer" | "mixed" | "none"; timestampQuality: "precise" | "approximate" | "none"; dailyUsage: Record; dailyUsageSource: "bubble" | "composer" | "none"; } /** * Backward-compat alias. PR1 shipped `CursorSessionUsageDryRun` in the * published `.d.ts`; removing it in a patch release would be a breaking * change for anyone who imported the type. The alias will be kept for at * least one release after the rename. */ export type CursorSessionUsageDryRun = CursorSessionUsage; /** * Transparency payload for the `what_we_read` MCP mode. Describes where * the parser is reading from and what it sees at the table/prefix level, * without leaking conversation content. UUIDs in sample keys are * truncated to 8 characters — enough to distinguish keys at a glance, * not enough to be a stable correlation handle. */ export interface CursorMeta { filePath: string; dbFileSize: number; tables: string[]; keyPrefixes: Record; sampleBubbleKeys: string[]; sampleComposerKeys: string[]; } /** * Output of the `self_test` MCP mode. Runs the full parser pipeline and * reports health + coverage + invariant checks. overallStatus semantics: * PASS — parser ran, invariants held, coverage looks healthy * DEGRADED — parser ran but some bubbles lack timestamps / invariants * tripped warnings / coverage is partial. Not an error. * FAIL — parser could not run (missing DB, missing sqlite3, query * failure, or corrupt top-level output). Surfaces as * MCP isError:true at the tool boundary. */ export interface CursorSelfTestResult { filePath: string; dbExists: boolean; sqlite3Path: string; canQuery: boolean; tokenBubbleCount: number; composerCount: number; sessionsWithTokens: number; timestampCoverage: { bubblesWithCreatedAt: number; totalBubbles: number; composersWithCreatedAt: number; totalComposers: number; }; invariantChecks: Array<{ name: string; passed: boolean; details?: string; }>; warnings: string[]; errors: string[]; overallStatus: "PASS" | "DEGRADED" | "FAIL"; } export interface CursorParserError { code: "CURSOR_DB_NOT_FOUND" | "CURSOR_SQLITE3_NOT_FOUND" | "CURSOR_SQLITE_QUERY_FAILED"; message: string; } export interface CursorParserResult { sessions: CursorSessionUsage[]; filePath: string; } /** * Get the default Cursor SQLite path for the current platform, honoring * the COSTHAWK_CURSOR_DB_PATH environment override. */ export declare function getCursorDbPath(): string; /** * Check whether the Cursor SQLite database exists at the resolved path. */ export declare function cursorDbExists(): boolean; /** * Type guard — narrows an unknown error to a CursorParserError. */ declare function isCursorParserError(value: unknown): value is CursorParserError; /** * Parse Cursor usage from local SQLite. Read-only — does NOT push anything * to the costcanary backend. * * Returns aggregated session data per composer with per-session token totals, * message counts, start/end timestamps, and daily usage buckets. Throws * CursorParserError on unrecoverable failures (missing DB, missing sqlite3 * binary, malformed SQLite output). * * Dedup strategy: per composer, keep one entry per (serverBubbleId ?? bubbleId). * On collision, keep the candidate with the larger token total. * * Mixed-model handling: if a composer contains multiple non-empty model names, * the returned `model` field is "mixed". If no model info is present on any * bubble, the field is "unknown". * * Sort order: total tokens descending. */ export declare function parseCursorUsage(): CursorParserResult; /** * Backward-compat alias. PR1 consumers called this function name; keep it * working for one release after the rename. */ export declare const parseCursorUsageDryRun: typeof parseCursorUsage; /** * Return transparency metadata about the Cursor SQLite: file size, table * list, key-prefix histogram, and a small sample of bubble and composer * keys with their UUIDs truncated. Powers the `what_we_read` MCP mode so * users can see exactly what data CostHawk is reading. * * Throws CursorParserError on missing DB, missing sqlite3, or query failure. */ export declare function getCursorMeta(): CursorMeta; /** * Run a full parser health check against the live DB. Reports coverage * numbers, validates invariants, and classifies the result as PASS, * DEGRADED, or FAIL. * * - FAIL is reserved for unrecoverable failures (DB missing, sqlite3 * missing, query error). The MCP tool surfaces FAIL as isError:true. * - DEGRADED means the parser ran but flagged warnings — e.g., invariant * tolerance exceeded, partial timestamp coverage, unexpected row shapes. * - PASS means the parser ran cleanly with full coverage and no warnings. * * Never throws — catches errors and reports them as FAIL so callers can * present the full structured payload to users. */ export declare function runCursorSelfTest(): CursorSelfTestResult; export { isCursorParserError }; //# sourceMappingURL=cursor-parser.d.ts.map