#!/usr/bin/env npx tsx
/**
 * Bootstrap pi-memory from existing session-search index.
 * 
 * Reads session summaries, batches them, and runs LLM consolidation
 * to extract preferences, patterns, and lessons from past sessions.
 *
 * Usage: npx tsx src/bootstrap.ts [--dry-run] [--limit N] [--batch-size N]
 */
import { readFileSync } from "node:fs";
import { join } from "node:path";
import { homedir } from "node:os";
import { execFileSync } from "node:child_process";
import { MemoryStore } from "./store.js";
import { parseConsolidationResponse, applyExtracted, CONSOLIDATION_PROMPT } from "./consolidator.js";

const INDEX_PATH = join(homedir(), ".pi", "session-search", "index", "session-index.json");
const DB_PATH = join(homedir(), ".pi", "memory", "memory.db");

interface IndexedSession {
  session: {
    file: string;
    id: string;
    cwd: string;
    startedAt: string;
    name?: string;
    userMessages: string[];
    assistantText: string;
    compactionSummaries: string[];
    branchSummaries: string[];
  };
  summary: string;
}

// ─── Parse args ──────────────────────────────────────────────────────

const args = process.argv.slice(2);
const dryRun = args.includes("--dry-run");
const limitIdx = args.indexOf("--limit");
const limit = limitIdx >= 0 ? parseInt(args[limitIdx + 1], 10) : Infinity;
const batchIdx = args.indexOf("--batch-size");
const batchSize = batchIdx >= 0 ? parseInt(args[batchIdx + 1], 10) : 15;

// ─── Load sessions ───────────────────────────────────────────────────

console.log(`Loading session index from ${INDEX_PATH}...`);
const indexData = JSON.parse(readFileSync(INDEX_PATH, "utf8"));
const sessions: IndexedSession[] = Object.values(indexData.sessions);

// Sort by date (newest first) and take limit
sessions.sort((a, b) => b.session.startedAt.localeCompare(a.session.startedAt));
const selected = sessions.slice(0, limit);

console.log(`Found ${sessions.length} sessions, processing ${selected.length}`);

// ─── Build batches of summaries ──────────────────────────────────────

const batches: string[][] = [];
for (let i = 0; i < selected.length; i += batchSize) {
  const batch = selected.slice(i, i + batchSize).map(s => {
    const parts: string[] = [];
    if (s.session.name) parts.push(`Session: ${s.session.name}`);
    parts.push(`CWD: ${s.session.cwd}`);
    parts.push(`Date: ${s.session.startedAt.slice(0, 10)}`);
    if (s.summary) parts.push(`Summary: ${s.summary.slice(0, 500)}`);
    // Include compaction summaries — they're rich with context
    for (const cs of (s.session.compactionSummaries || []).slice(0, 2)) {
      parts.push(`Context: ${cs.slice(0, 500)}`);
    }
    return parts.join("\n");
  });
  batches.push(batch);
}

console.log(`Created ${batches.length} batches of ~${batchSize} sessions each`);

// ─── Process batches ─────────────────────────────────────────────────

const store = dryRun ? null : new MemoryStore(DB_PATH);
let totalSemantic = 0;
let totalLessons = 0;

for (let i = 0; i < batches.length; i++) {
  const batch = batches[i];
  console.log(`\nBatch ${i + 1}/${batches.length} (${batch.length} sessions)...`);

  const prompt = `${CONSOLIDATION_PROMPT}

You are analyzing summaries from ${batch.length} past coding sessions. Extract any recurring preferences, project patterns, tool usage habits, and corrections you can identify.

Focus on patterns that appear across multiple sessions — these are more likely to be lasting preferences.

## Session Summaries

${batch.map((s, j) => `### Session ${j + 1}\n${s}`).join("\n\n")}`;

  if (dryRun) {
    console.log(`  [dry-run] Would send ${prompt.length} chars to LLM`);
    continue;
  }

  try {
    // Pass the prompt as an argv entry rather than interpolating into a shell
    // string — session summaries are arbitrary past-project content (READMEs,
    // file snippets, MCP tool output), so skipping the shell avoids any quoting
    // pitfalls. Matches the runtime path in index.ts which uses pi.exec(argv).
    const result = execFileSync(
      "pi",
      ["-p", prompt, "--print"],
      { encoding: "utf8", timeout: 120_000, cwd: homedir(), maxBuffer: 1024 * 1024 }
    );

    const extracted = parseConsolidationResponse(result);
    console.log(`  Extracted: ${extracted.semantic.length} facts, ${extracted.lessons.length} lessons`);

    if (extracted.semantic.length + extracted.lessons.length > 0) {
      const applied = applyExtracted(store!, extracted, `bootstrap:batch-${i + 1}`);
      totalSemantic += applied.semantic;
      totalLessons += applied.lessons;
      console.log(`  Applied: ${applied.semantic} new facts, ${applied.lessons} new lessons`);
    }

    // Brief pause between batches to avoid rate limiting
    if (i < batches.length - 1) {
      await new Promise(r => setTimeout(r, 2000));
    }
  } catch (err: any) {
    console.error(`  Error: ${err.message?.slice(0, 200)}`);
  }
}

// ─── Summary ─────────────────────────────────────────────────────────

if (store) {
  const stats = store.stats();
  console.log(`\n✅ Bootstrap complete!`);
  console.log(`   Added: ${totalSemantic} semantic facts, ${totalLessons} lessons`);
  console.log(`   Total: ${stats.semantic} facts, ${stats.lessons} lessons, ${stats.events} events`);
  console.log(`   DB: ${DB_PATH}`);
  store.close();
} else {
  console.log(`\n[dry-run] Would have processed ${batches.length} batches`);
}