/** * Shared utilities for compaction and branch summarization. */ import type { Message } from "@oh-my-pi/pi-ai"; import { formatGroupedPaths, prompt } from "@oh-my-pi/pi-utils"; import type { AgentMessage } from "../types"; import fileOperationsTemplate from "./prompts/file-operations.md" with { type: "text" }; import summarizationSystemPrompt from "./prompts/summarization-system.md" with { type: "text" }; // ============================================================================ // File Operation Tracking // ============================================================================ export interface FileOperations { read: Set; written: Set; edited: Set; } export function createFileOps(): FileOperations { return { read: new Set(), written: new Set(), edited: new Set(), }; } // Read-tool selector grammar, mirrored from the conservative filesystem splitter in // packages/coding-agent/src/tools/path-utils.ts (splitPathAndSel). Keep in sync. // A trailing `:chunk` is a selector only when it is a line-range list // (`50`, `50-200`, `50+10`, `5-16,960-973`, `..` alias), `raw`, or `conflicts` — // alone or as a `range:raw` / `raw:range` compound. const RANGE_CHUNK_SRC = String.raw`L?\d+(?:(?:[-+]|\.\.)L?\d+|-|\.\.)?`; const RANGE_LIST_SRC = `${RANGE_CHUNK_SRC}(?:,${RANGE_CHUNK_SRC})*`; const READ_SELECTOR_RE = new RegExp(`^(?:${RANGE_LIST_SRC}|raw|conflicts)$`, "i"); const READ_RANGE_ONLY_RE = new RegExp(`^${RANGE_LIST_SRC}$`, "i"); const READ_RAW_ONLY_RE = /^raw$/i; /** * Split a read-tool path into its base path and trailing selector, mirroring the * read tool's own splitter. Single source of the grammar in this package: the * file-operations list strips selectors via {@link stripReadSelector}, and the * supersede-prune pass keys on both parts via `readToolSupersedeKey`. */ export function splitReadSelector(path: string): { path: string; sel?: string } { const colon = path.lastIndexOf(":"); if (colon <= 0) return { path }; const candidate = path.slice(colon + 1); if (!READ_SELECTOR_RE.test(candidate)) return { path }; let base = path.slice(0, colon); let sel = candidate; // Compound trailing selector: `path:1-50:raw` or `path:raw:1-50`. const inner = base.lastIndexOf(":"); if (inner > 0) { const innerCandidate = base.slice(inner + 1); const innerIsRaw = READ_RAW_ONLY_RE.test(innerCandidate); const outerIsRaw = READ_RAW_ONLY_RE.test(candidate); const innerIsRange = READ_RANGE_ONLY_RE.test(innerCandidate); const outerIsRange = READ_RANGE_ONLY_RE.test(candidate); if ((innerIsRaw && outerIsRange) || (innerIsRange && outerIsRaw)) { sel = `${innerCandidate}:${candidate}`; base = base.slice(0, inner); } } return { path: base, sel }; } /** * Strip a trailing read-tool selector (`:50-200`, `:raw`, `:1-50:raw`, `:conflicts`, …) * so the same file read with different line ranges dedupes to one `` entry * and matches its write/edit path when computing Read/Write/RW markers. */ export function stripReadSelector(path: string): string { return splitReadSelector(path).path; } /** * Extract file operations from tool calls in an assistant message. */ export function extractFileOpsFromMessage(message: AgentMessage, fileOps: FileOperations): void { if (message.role !== "assistant") return; if (!("content" in message) || !Array.isArray(message.content)) return; for (const block of message.content) { if (typeof block !== "object" || block === null) continue; if (!("type" in block) || block.type !== "toolCall") continue; if (!("arguments" in block) || !("name" in block)) continue; const args = block.arguments as Record | undefined; if (!args) continue; const path = typeof args.path === "string" ? args.path : undefined; if (!path) continue; switch (block.name) { case "read": fileOps.read.add(stripReadSelector(path)); break; case "write": fileOps.written.add(path); break; case "edit": fileOps.edited.add(path); break; } } } /** * Compute final file lists from file operations. * Returns readFiles (files only read, not modified) and modifiedFiles. */ export function computeFileLists(fileOps: FileOperations): { readFiles: string[]; modifiedFiles: string[] } { const modified = new Set([...fileOps.edited, ...fileOps.written]); const readOnly = [...fileOps.read].filter(f => !modified.has(f)).sort(); const modifiedFiles = [...modified].sort(); return { readFiles: readOnly, modifiedFiles }; } /** * Format file operations as one `` tag: a grouped, prefix-folded * directory tree (find-tool shape — `# dir/` headers, bare basenames) with a * ` (Read)` / ` (Write)` / ` (RW)` marker per file instead of separate * read/modified lists. `readSet` is the cumulative read set (`fileOps.read`), * used to tell modified files that were also read (RW) from blind writes. */ const FILE_OPERATION_SUMMARY_LIMIT = 20; function stripFileOperationTags(summary: string): string { // Legacy / tags are still stripped so summaries // written before the combined tag self-heal on the next compaction. return summary .replace(/[\s\S]*?<\/files>\s*/g, "") .replace(/[\s\S]*?<\/read-files>\s*/g, "") .replace(/[\s\S]*?<\/modified-files>\s*/g, "") .trimEnd(); } export function formatFileOperations( readFiles: string[], modifiedFiles: string[], readSet?: ReadonlySet, ): string { if (readFiles.length === 0 && modifiedFiles.length === 0) return ""; const mode = new Map(); for (const file of readFiles) mode.set(file, "Read"); for (const file of modifiedFiles) mode.set(file, readSet?.has(file) ? "RW" : "Write"); const all = [...mode.keys()].sort(); let files = formatGroupedPaths(all.slice(0, FILE_OPERATION_SUMMARY_LIMIT), path => ` (${mode.get(path)})`); if (all.length > FILE_OPERATION_SUMMARY_LIMIT) { files += `\n… (${all.length - FILE_OPERATION_SUMMARY_LIMIT} more files omitted)`; } return prompt.render(fileOperationsTemplate, { files }); } export function upsertFileOperations( summary: string, readFiles: string[], modifiedFiles: string[], readSet?: ReadonlySet, ): string { const baseSummary = stripFileOperationTags(summary); const fileOperations = formatFileOperations(readFiles, modifiedFiles, readSet); if (!fileOperations) return baseSummary; if (!baseSummary) return fileOperations; return `${baseSummary}\n\n${fileOperations}`; } // ============================================================================ // Message Serialization // ============================================================================ /** Maximum characters for a tool result in serialized summaries. */ const TOOL_RESULT_MAX_CHARS = 2000; /** * Truncate text to a maximum character length for summarization. * Keeps the beginning and appends a truncation marker. */ function truncateForSummary(text: string, maxChars: number): string { if (text.length <= maxChars) return text; const truncatedChars = text.length - maxChars; return `${text.slice(0, maxChars)}\n\n[... ${truncatedChars} more characters truncated]`; } /** * Serialize LLM messages to text for summarization. * This prevents the model from treating it as a conversation to continue. * Call convertToLlm() first to handle custom message types. */ export function serializeConversation(messages: Message[]): string { const parts: string[] = []; for (const msg of messages) { if (msg.role === "user") { const content = typeof msg.content === "string" ? msg.content : msg.content .filter((c): c is { type: "text"; text: string } => c.type === "text") .map(c => c.text) .join(""); if (content) parts.push(`[User]: ${content}`); } else if (msg.role === "assistant") { const textParts: string[] = []; const thinkingParts: string[] = []; const toolCalls: string[] = []; for (const block of msg.content) { if (block.type === "text") { textParts.push(block.text); } else if (block.type === "thinking") { thinkingParts.push(block.thinking); } else if (block.type === "toolCall") { const args = block.arguments as Record; const argsStr = Object.entries(args) .map(([k, v]) => `${k}=${JSON.stringify(v)}`) .join(", "); toolCalls.push(`${block.name}(${argsStr})`); } } if (thinkingParts.length > 0) { parts.push(`[Assistant thinking]: ${thinkingParts.join("\n")}`); } if (textParts.length > 0) { parts.push(`[Assistant]: ${textParts.join("\n")}`); } if (toolCalls.length > 0) { parts.push(`[Assistant tool calls]: ${toolCalls.join("; ")}`); } } else if (msg.role === "toolResult") { const content = msg.content .filter((c): c is { type: "text"; text: string } => c.type === "text") .map(c => c.text) .join(""); if (content) { parts.push(`[Tool result]: ${truncateForSummary(content, TOOL_RESULT_MAX_CHARS)}`); } } } return parts.join("\n\n"); } // ============================================================================ // Summarization System Prompt // ============================================================================ export const SUMMARIZATION_SYSTEM_PROMPT = prompt.render(summarizationSystemPrompt);