/** * Rolling summarization — when a conversation's estimated token count * approaches the model's context window, summarize older turns and * inject the summary as a single synthetic system-role message. */ import { getContextWindow } from './cost-estimator' // ~4 chars per token, approximate function estimateTokens(text: string): number { return Math.ceil((text || '').length / 4) } export function estimateTotalTokens(messages: any[]): number { let total = 0 for (const m of messages) { if (Array.isArray(m.content)) { for (const b of m.content) { if (b.text) total += estimateTokens(b.text) else if (b.image) total += 1500 // rough image token else if (b.toolUse) total += estimateTokens(JSON.stringify(b.toolUse.input)) else if (b.toolResult?.content) total += estimateTokens(JSON.stringify(b.toolResult.content)) } } else if (typeof m.content === 'string') total += estimateTokens(m.content) } return total } /** * If total exceeds threshold, return a new array where the first N * messages are collapsed into a single summary. * The summarization itself is delegated to the caller (e.g. via sub-agent). */ export function shouldSummarize(messages: any[], model: string, thresholdPct = 70): boolean { const tokens = estimateTotalTokens(messages) const window = getContextWindow(model) return (tokens * 100 / window) > thresholdPct } /** Create a plain-text summary of older messages for compression. */ export function summaryPrompt(olderMessages: any[]): string { let txt = 'Summarize this conversation concisely (under 500 words). Keep key facts, decisions, and unresolved questions.\n\n' for (const m of olderMessages) { const role = m.role === 'user' ? 'User' : 'Assistant' const content = Array.isArray(m.content) ? m.content.map((b: any) => b.text || (b.toolUse ? `[tool:${b.toolUse.name}]` : '')).filter(Boolean).join(' ') : String(m.content || '') txt += `${role}: ${content.slice(0, 1000)}\n\n` } return txt }