import type { AgentMessage } from "@earendil-works/pi-agent-core";
import { complete, type Message } from "@earendil-works/pi-ai";
import { convertToLlm, serializeConversation, type ExtensionContext } from "@earendil-works/pi-coding-agent";
import {
DEFAULT_BUDGET_GUARD_PERCENT,
DEFAULT_BUDGET_GUARD_TOKENS,
DEFAULT_BUDGET_MAX_INPUT_CHARS,
DEFAULT_COMPACTION_MAX_TOKENS,
DEFAULT_COMPACTION_MODEL,
DEFAULT_IDLE_COMPACTION_THRESHOLD_TOKENS,
QOL_COMPACTION_SYSTEM_PROMPT,
} from "./constants.js";
import {
chunkConversationText as chunkConversationTextRaw,
computeBudgetTrigger,
isBudgetGuardCompaction,
orchestrateChunkedSummary,
type BudgetTrigger,
type SummarizeOutcome,
type SummarizeRequest,
type TranscriptRiskResult,
} from "./budget-guard.js";
import {
buildBudgetHandoff as buildBudgetHandoffData,
writeBudgetHandoffArtifact as writeBudgetHandoffArtifactRaw,
type HandoffWriteResult,
type QolBudgetHandoff,
} from "./compaction-handoff.js";
import { settingBoolean, settingNumber, settingString } from "./settings.js";
import { stringifyError } from "./util.js";
export type QolSummaryProfile = "concise" | "balanced" | "exhaustive";
export type QolSummaryPurpose = "compaction" | "branch-summary" | "session-search";
export function compactionNotify(ctx: ExtensionContext, message: string, level: "info" | "warning" | "error" = "info"): void {
if (ctx.hasUI && settingBoolean("compaction.notify", true, ctx.cwd)) ctx.ui.notify(message, level);
}
export function compactionProfile(cwd: string): QolSummaryProfile {
const value = settingString("compaction.profile", "balanced", cwd);
return value === "concise" || value === "exhaustive" ? value : "balanced";
}
function compactionProfileInstructions(profile: QolSummaryProfile): string {
if (profile === "concise") return "Prefer a compact continuation summary. Include only decisions, current state, modified/read files, blockers, and concrete next steps.";
if (profile === "exhaustive") return "Be thorough. The summary may replace substantial conversation history, so preserve all relevant implementation details, alternatives considered, exact file paths, commands, errors, and pending work.";
return "Be complete but not verbose. Preserve enough detail for a future assistant to continue without the old transcript.";
}
function stripThinkingForSummary(messages: Message[]): Message[] {
return messages.map((message) => {
if (message.role !== "assistant" || !Array.isArray(message.content)) return message;
return {
...message,
content: message.content.filter((part: any) => part?.type !== "thinking"),
};
});
}
export function serializeMessagesForSummary(messages: AgentMessage[]): string {
return serializeConversation(stripThinkingForSummary(convertToLlm(messages)));
}
function customMessageContentToText(content: unknown): string {
if (typeof content === "string") return content;
if (!Array.isArray(content)) return "";
const parts: string[] = [];
for (const part of content) {
if (part?.type === "text" && typeof part.text === "string") parts.push(part.text);
else if (part?.type === "image") parts.push(`[image${typeof part.mimeType === "string" ? ` ${part.mimeType}` : ""}]`);
else if (part?.type) parts.push(`[${String(part.type)}]`);
}
return parts.join("\n").trim();
}
function buildSummaryPrompt(options: {
conversationText: string;
customInstructions?: string;
previousSummary?: string;
profile: QolSummaryProfile;
purpose: QolSummaryPurpose;
}): string {
const purposeText = options.purpose === "branch-summary"
? "the branch being left during /tree navigation"
: options.purpose === "session-search"
? "the previous session being imported into the current context"
: "the conversation span being compacted";
const previous = options.previousSummary ? `\n${options.previousSummary}\n\n\n` : "";
const custom = options.customInstructions?.trim() ? `\n${options.customInstructions.trim()}\n\n\n` : "";
return `${custom}${previous}\n${options.conversationText}\n\n\nSummarize ${purposeText} for a coding agent that must continue the work.\n\n${compactionProfileInstructions(options.profile)}\n\nUse this markdown shape:\n\n## Goal\n[What the user is trying to accomplish]\n\n## Constraints & Preferences\n- [Requirements, style, safety, or user preferences]\n\n## Progress\n### Done\n- [x] [Completed work]\n\n### In Progress\n- [ ] [Current partial work]\n\n### Blocked\n- [Blockers or none]\n\n## Key Decisions\n- **[Decision]**: [Rationale]\n\n## Files & Commands\n- [Files read/modified and important commands/results]\n\n## Next Steps\n1. [Most important next action]\n\n## Critical Context\n- [Anything easy to lose but needed later]`;
}
async function summarizeWithRemote(endpoint: string, systemPrompt: string, promptText: string, maxTokens: number, signal?: AbortSignal): Promise {
const response = await fetch(endpoint, {
body: JSON.stringify({ maxTokens, prompt: promptText, systemPrompt }),
headers: { "content-type": "application/json" },
method: "POST",
signal,
});
const text = await response.text();
if (!response.ok) throw new Error(`Remote compaction endpoint returned ${response.status}: ${text.slice(0, 500)}`);
let parsed: unknown;
try {
parsed = JSON.parse(text);
} catch {
throw new Error("Remote compaction endpoint did not return JSON");
}
if (parsed && typeof parsed === "object") {
const record = parsed as Record;
if (typeof record.summary === "string") return record.summary;
if (typeof record.text === "string") return record.text;
}
throw new Error("Remote compaction response missing summary");
}
export function resolveConfiguredModel(ctx: ExtensionContext, configured: string): any | undefined {
if (!configured || configured.trim().toLowerCase() === "current") return ctx.model;
const withoutThinking = configured.replace(/:(off|minimal|low|medium|high|xhigh)$/i, "");
const slash = withoutThinking.indexOf("/");
if (slash > 0) return ctx.modelRegistry.find(withoutThinking.slice(0, slash), withoutThinking.slice(slash + 1));
const providers = [ctx.model?.provider, "google", "openai", "anthropic", "mistral", "moonshot", "cloudflare-ai-gateway", "cloudflare-workers-ai"].filter((value): value is string => typeof value === "string");
for (const provider of providers) {
const model = ctx.modelRegistry.find(provider, withoutThinking);
if (model) return model;
}
return undefined;
}
export function modelLabel(model: any): string {
return model ? `${model.provider}/${model.id}` : "unknown model";
}
export function budgetMaxInputChars(ctx: ExtensionContext): number {
const raw = Math.floor(settingNumber("compaction.maxInputChars", DEFAULT_BUDGET_MAX_INPUT_CHARS, ctx.cwd));
// 0 or negative disables chunking. Anything above the hard floor is honored.
return raw <= 0 ? 0 : Math.max(20_000, raw);
}
export const chunkConversationText = chunkConversationTextRaw;
export type { QolBudgetHandoff } from "./compaction-handoff.js";
async function singleShotSummary(ctx: ExtensionContext, request: SummarizeRequest, options: { maxTokens: number; model?: string; purpose: QolSummaryPurpose; signal?: AbortSignal }): Promise {
const promptText = buildSummaryPrompt({
conversationText: request.text,
customInstructions: request.customInstructions,
previousSummary: settingBoolean("compaction.includePreviousSummary", true, ctx.cwd) ? request.previousSummary : undefined,
profile: compactionProfile(ctx.cwd),
purpose: options.purpose,
});
const remoteEndpoint = settingString("compaction.remoteEndpoint", "", ctx.cwd);
if (settingBoolean("compaction.remoteEnabled", false, ctx.cwd) && remoteEndpoint) {
try {
const summary = await summarizeWithRemote(remoteEndpoint, QOL_COMPACTION_SYSTEM_PROMPT, promptText, options.maxTokens, options.signal);
return { model: remoteEndpoint, summary, via: "remote" };
} catch (error) {
compactionNotify(ctx, `Remote compaction failed, trying model fallback: ${stringifyError(error)}`, "warning");
}
}
const configuredModel = options.model ?? settingString("compaction.model", DEFAULT_COMPACTION_MODEL, ctx.cwd);
const model = resolveConfiguredModel(ctx, configuredModel);
if (!model) throw new Error(`Summary model not found: ${configuredModel}`);
const auth = await ctx.modelRegistry.getApiKeyAndHeaders(model);
if (!auth.ok) throw new Error(auth.error);
if (!auth.apiKey) throw new Error(`No API key for ${model.provider}`);
const message: Message = {
content: [{ text: promptText, type: "text" }],
role: "user",
timestamp: Date.now(),
};
const response = await complete(
model,
{ messages: [message], systemPrompt: QOL_COMPACTION_SYSTEM_PROMPT },
{ apiKey: auth.apiKey, headers: auth.headers, maxTokens: options.maxTokens, signal: options.signal },
);
const summary = response.content
.filter((content): content is { type: "text"; text: string } => content.type === "text")
.map((content) => content.text)
.join("\n")
.trim();
return { model: modelLabel(model), summary, via: "model" };
}
export async function generateQolSummary(ctx: ExtensionContext, options: {
conversationText: string;
customInstructions?: string;
previousSummary?: string;
maxTokens?: number;
model?: string;
purpose: QolSummaryPurpose;
signal?: AbortSignal;
/** Internal: set true on recursive summary-of-summaries pass to skip rechunking. */
skipChunking?: boolean;
}): Promise<{ model: string; summary: string; via: "model" | "remote"; chunkCount?: number; reduceLevels?: number; requestCount?: number }> {
const maxTokens = Math.max(256, Math.floor(options.maxTokens ?? settingNumber("compaction.maxTokens", DEFAULT_COMPACTION_MAX_TOKENS, ctx.cwd)));
const maxInputChars = budgetMaxInputChars(ctx);
const summarize = (request: SummarizeRequest) => singleShotSummary(ctx, request, {
maxTokens,
model: options.model,
purpose: options.purpose,
signal: options.signal,
});
if (options.skipChunking || maxInputChars <= 0 || options.conversationText.length <= maxInputChars) {
return summarize({
customInstructions: options.customInstructions,
previousSummary: options.previousSummary,
skipChunking: true,
text: options.conversationText,
});
}
const orchestrated = await orchestrateChunkedSummary({
customInstructions: options.customInstructions,
maxInputChars,
notify: (message, level = "info") => compactionNotify(ctx, message, level),
previousSummary: options.previousSummary,
signal: options.signal,
summarize,
text: options.conversationText,
});
return orchestrated;
}
export function buildBudgetHandoff(ctx: ExtensionContext, options: {
reason: string;
preparation?: { messagesToSummarize?: AgentMessage[]; turnPrefixMessages?: AgentMessage[]; previousSummary?: string; tokensBefore?: number };
}): QolBudgetHandoff {
return buildBudgetHandoffData({
preparation: options.preparation,
reason: options.reason,
sessionManager: ctx.sessionManager as any,
});
}
export function writeBudgetHandoffArtifact(ctx: ExtensionContext, handoff: QolBudgetHandoff): HandoffWriteResult {
const enabled = settingBoolean("compaction.handoffArtifactEnabled", true, ctx.cwd);
const result = writeBudgetHandoffArtifactRaw(handoff, { enabled });
if (result.error) {
compactionNotify(ctx, `QOL handoff artifact write failed: ${result.error}`, "warning");
}
return result;
}
export async function handleQolCompaction(event: any, ctx: ExtensionContext): Promise {
const isBudgetGuard = isBudgetGuardCompaction(event?.customInstructions);
// Budget-guard-triggered compactions force the QOL bounded path so the
// chunked summarizer + handoff artifact always run, even when the user
// has not flipped compaction.customEnabled on.
if (!isBudgetGuard && !settingBoolean("compaction.customEnabled", false, ctx.cwd)) return undefined;
const preparation = event.preparation ?? {};
const messages = [...(preparation.messagesToSummarize ?? []), ...(preparation.turnPrefixMessages ?? [])];
if (messages.length === 0) return undefined;
const tokensBefore = typeof preparation.tokensBefore === "number" ? preparation.tokensBefore : 0;
const handoff = buildBudgetHandoff(ctx, { preparation, reason: event.customInstructions ?? "session_before_compact" });
const handoffResult = writeBudgetHandoffArtifact(ctx, handoff);
const sourceLabel = isBudgetGuard ? "pi-qol budget-guard" : "pi-qol";
compactionNotify(ctx, `QOL compaction: summarizing ${messages.length} message(s), ${tokensBefore.toLocaleString()} token(s)${isBudgetGuard ? " (budget guard)" : ""}.`, "info");
try {
const conversationText = serializeMessagesForSummary(messages);
const result = await generateQolSummary(ctx, {
conversationText,
customInstructions: event.customInstructions,
previousSummary: preparation.previousSummary,
purpose: "compaction",
signal: event.signal,
});
if (!result.summary.trim()) throw new Error("Compaction summary was empty");
const chunkSuffix = result.chunkCount && result.chunkCount > 1 ? ` (${result.chunkCount} chunks, ${result.reduceLevels ?? 1} reduce level${(result.reduceLevels ?? 1) === 1 ? "" : "s"})` : "";
compactionNotify(ctx, `QOL compaction complete via ${result.via}: ${result.model}${chunkSuffix}`, "info");
return {
compaction: {
details: {
chunkCount: result.chunkCount,
handoffArtifact: handoffResult.path,
handoffArtifactLatest: handoffResult.latestPath,
handoffArtifactError: handoffResult.error,
messageCount: messages.length,
model: result.model,
profile: compactionProfile(ctx.cwd),
reduceLevels: result.reduceLevels,
requestCount: result.requestCount,
source: sourceLabel,
trigger: isBudgetGuard ? "budget-guard" : "session_before_compact",
via: result.via,
},
firstKeptEntryId: preparation.firstKeptEntryId,
summary: result.summary,
tokensBefore: preparation.tokensBefore,
},
};
} catch (error) {
if (event.signal?.aborted) return undefined;
compactionNotify(ctx, `QOL compaction failed: ${stringifyError(error)}`, "error");
return settingBoolean("compaction.fallbackToDefault", true, ctx.cwd) ? undefined : { cancel: true };
}
}
function summarizeEntryForBranch(entry: any): string[] {
if (entry?.type === "message" && entry.message) return [serializeMessagesForSummary([entry.message])];
if (entry?.type === "compaction" && typeof entry.summary === "string") return [`[Compaction summary]: ${entry.summary}`];
if (entry?.type === "branch_summary" && typeof entry.summary === "string") return [`[Branch summary]: ${entry.summary}`];
if (entry?.type === "custom_message") return [`[Custom message${entry.customType ? `:${entry.customType}` : ""}]: ${customMessageContentToText(entry.content) || "[empty]"}`];
return [];
}
export async function handleQolBranchSummary(event: any, ctx: ExtensionContext): Promise {
if (!settingBoolean("compaction.branchSummaryEnabled", false, ctx.cwd)) return undefined;
const preparation = event.preparation ?? {};
if (preparation.userWantsSummary !== true) return undefined;
const entries = Array.isArray(preparation.entriesToSummarize) ? preparation.entriesToSummarize : [];
const conversationText = entries.flatMap(summarizeEntryForBranch).join("\n\n").trim();
if (!conversationText) return undefined;
compactionNotify(ctx, `QOL branch summary: summarizing ${entries.length} entr${entries.length === 1 ? "y" : "ies"}.`, "info");
try {
const result = await generateQolSummary(ctx, {
conversationText,
customInstructions: event.customInstructions ?? preparation.customInstructions,
purpose: "branch-summary",
signal: event.signal,
});
if (!result.summary.trim()) throw new Error("Branch summary was empty");
return {
summary: {
details: { entryCount: entries.length, model: result.model, profile: compactionProfile(ctx.cwd), source: "pi-qol", via: result.via },
summary: result.summary,
},
};
} catch (error) {
if (event.signal?.aborted) return undefined;
compactionNotify(ctx, `QOL branch summary failed: ${stringifyError(error)}`, "error");
return undefined;
}
}
function contextUsage(ctx: ExtensionContext): { contextWindow?: number; tokens: number } | undefined {
const usage = ctx.getContextUsage?.() as { tokens?: unknown; contextWindow?: unknown } | undefined;
const tokens = Number(usage?.tokens);
if (!Number.isFinite(tokens) || tokens <= 0) return undefined;
const contextWindow = Number(usage?.contextWindow ?? ctx.model?.contextWindow);
return { contextWindow: Number.isFinite(contextWindow) && contextWindow > 0 ? contextWindow : undefined, tokens };
}
export function compactionTriggerReason(ctx: ExtensionContext): string | undefined {
const usage = contextUsage(ctx);
if (!usage) return undefined;
const tokenLimit = settingNumber("compaction.thresholdTokens", -1, ctx.cwd);
if (tokenLimit > 0 && usage.tokens >= tokenLimit) return `${usage.tokens.toLocaleString()} tokens >= ${Math.floor(tokenLimit).toLocaleString()} token limit`;
const percentLimit = settingNumber("compaction.thresholdPercent", -1, ctx.cwd);
if (percentLimit > 0 && usage.contextWindow) {
const percent = (usage.tokens / usage.contextWindow) * 100;
if (percent >= percentLimit) return `${percent.toFixed(1)}% context >= ${percentLimit}% limit`;
}
const idleLimit = settingNumber("compaction.idleThresholdTokens", DEFAULT_IDLE_COMPACTION_THRESHOLD_TOKENS, ctx.cwd);
if (usage.tokens >= idleLimit) return `${usage.tokens.toLocaleString()} tokens >= ${Math.floor(idleLimit).toLocaleString()} idle threshold`;
return undefined;
}
export type BudgetGuardTrigger = BudgetTrigger;
export type TranscriptRiskState = TranscriptRiskResult;
/**
* Budget guard fires on agent_end (no idle wait) when context usage crosses a
* percent of the model window or an absolute token limit. Returns a stable key
* per crossing so the caller can suppress repeated triggers while usage stays
* above the threshold.
*/
export function budgetGuardTrigger(ctx: ExtensionContext): BudgetGuardTrigger | undefined {
if (!settingBoolean("compaction.budgetGuardEnabled", true, ctx.cwd)) return undefined;
const usage = contextUsage(ctx);
if (!usage) return undefined;
return computeBudgetTrigger({
contextWindow: usage.contextWindow,
enabled: true,
percentLimit: settingNumber("compaction.budgetPercent", DEFAULT_BUDGET_GUARD_PERCENT, ctx.cwd),
tokenLimit: settingNumber("compaction.budgetTokens", DEFAULT_BUDGET_GUARD_TOKENS, ctx.cwd),
tokens: usage.tokens,
});
}