/** * Compaction composition: wires all three layers into the transformContext chain. * * Layer 1 (Microcompaction): tool result trimming at threshold crossings * Layer 2 (Compaction): conversation summarization via LLM * Layer 3 (Failsafe): emergency truncation, purely mechanical * * All three layers run inside transformContext, which fires before every LLM * call. Compaction is fully self-contained within Cortex; no external calls * from the backend are needed to trigger it. Layer 2 fires when token usage * exceeds 70% of the context window and a completeFn + source accessors are * provided. Layer 3 fires whenever tokens exceed 90% of the model's context * window. * * References: * - compaction-strategy.md * - phase-5-compaction.md (5.5) */ import type { AgentMessage, AgentContext } from '../context-manager.js'; import type { CortexLogger, CortexCompactionConfig, AdaptiveThresholdConfig, CompactionResult, CompactionDegradedInfo, CompactionExhaustedInfo } from '../types.js'; import type { CompleteFn, BeforeCompactionHandler, PostCompactionHandler, CompactionErrorHandler } from './compaction.js'; import type { ObservationalMemoryState, ObservationEvent, ReflectionEvent } from './observational/types.js'; import { type CacheRetention } from '../provider-registry.js'; export { MicrocompactionEngine, capToolResult } from './microcompaction.js'; export type { TrimAction, TrimState } from './microcompaction.js'; export { runCompaction, shouldCompact, partitionHistory, buildSummaryMessage } from './compaction.js'; export type { CompleteFn } from './compaction.js'; export { emergencyTruncate, shouldTruncate, isContextOverflow } from './failsafe.js'; export type { FailsafeTruncationResult } from './failsafe.js'; export { ObservationalMemoryEngine } from './observational/index.js'; export type { ObservationalMemoryConfig, ObservationalMemoryState, ObservationChunk, ObservationEvent, ReflectionEvent, RecallResult, RecallConfig } from './observational/types.js'; export { createRecallTool } from './observational/recall-tool.js'; export declare const ADAPTIVE_DEFAULTS: AdaptiveThresholdConfig; export declare const DEFAULT_COMPACTION_CONFIG: CortexCompactionConfig; /** * Build a full compaction config from partial overrides. */ export declare function buildCompactionConfig(partial?: Partial): CortexCompactionConfig; /** * Compute the effective Layer 2 compaction threshold adjusted by interaction * recency. When the user has not interacted recently, the threshold is lowered * (i.e., compaction fires sooner), reducing token costs for idle sessions. * * @param baseThreshold - The configured Layer 2 threshold (e.g., 0.70) * @param adaptiveConfig - Adaptive threshold configuration * @param lastInteractionTime - Timestamp (ms) of the last user interaction, or null if never * @param now - Current timestamp (ms), injectable for testing * @returns The adjusted threshold (always >= 0) */ export declare function computeAdaptiveThreshold(baseThreshold: number, adaptiveConfig: AdaptiveThresholdConfig, lastInteractionTime: number | null, now?: number): number; /** * CompactionManager orchestrates all three compaction layers. * * It is stateful: it tracks the current token count and the microcompaction * cache. The CortexAgent creates one instance and delegates all compaction * decisions to it. Compaction is fully autonomous: all three layers run * inside applyInTransformContext(), which fires before every LLM call. */ export declare class CompactionManager { private readonly config; private readonly microcompaction; private readonly slotCount; private readonly _strategy; private observationalEngine; /** Post-hoc current-context token count, updated after each parent LLM call. */ private _currentContextTokenCount; /** Context budget for Layer 1/2 compaction decisions (may be artificially limited). */ private _contextWindow; /** Actual model context window for Layer 3 failsafe (never artificially limited). */ private _modelContextWindow; /** * Timestamp (ms) of the last user interaction. Used by the adaptive * threshold system to decide how aggressively to compact. Updated by * the consumer (backend) when a message-triggered tick fires. * Null means no interaction has been recorded yet. */ private _lastInteractionTime; /** * Timestamp (ms) of the last LLM call. Used by L1 to decide whether the * prompt cache has gone cold. Updated automatically in * updateCurrentContextTokenCount() (which fires after every LLM response). * Null means no LLM call has been recorded yet (treated as cold). */ private _lastLlmCallTimestamp; /** * Effective cache TTL (ms) for the current provider + cache retention. * Zero means caching is unsupported or disabled, in which case L1 treats * the cache as perpetually cold (trim freely). Set via setCacheInfo(). */ private _providerCacheTtlMs; /** Consumer handlers for compaction lifecycle events. */ private beforeCompactionHandlers; private postCompactionHandlers; private compactionErrorHandlers; private compactionResultHandlers; private compactionDegradedHandlers; private compactionExhaustedHandlers; /** Consecutive Layer 2 failure count for circuit breaker. Reset on success. */ private _consecutiveLayer2Failures; /** LLM completion function, set by CortexAgent. */ private completeFn; /** Logger for compaction diagnostics. */ private logger; constructor(config: CortexCompactionConfig, slotCount: number); /** Get the compaction strategy. */ get strategy(): 'observational' | 'classic'; /** * Set the context budget (the effective limit for Layer 1/2 compaction). * This may be smaller than the model's actual context window when a * user-configured limit is applied. */ setContextWindow(contextWindow: number): void; /** * Set the model's actual context window (for Layer 3 failsafe only). * Layer 3 emergency truncation uses this to avoid dropping messages * when the model still has capacity, even if the user-configured * budget has been exceeded. * * Also used as a proxy for the utility model context window until the * actual utility model window is set via setUtilityModelContextWindow(). */ setModelContextWindow(modelContextWindow: number): void; /** * Set the LLM completion function for Layer 2 summarization. */ setCompleteFn(fn: CompleteFn): void; /** * Set the LLM completion function for observational memory (utility model). */ setObservationalCompleteFn(fn: CompleteFn): void; /** * Update the utility model context window for observer/reflector clamps. */ setUtilityModelContextWindow(utilityModelContextWindow: number): void; /** * Set a logger for compaction diagnostics. */ setLogger(logger: CortexLogger): void; /** * Signal when the user last interacted with the system. * The consumer (backend) calls this during GATHER when a message-triggered * tick fires. For interval ticks, it is not called, so the timestamp * naturally ages. */ setLastInteractionTime(timestamp: number): void; /** * Get the timestamp of the last user interaction, or null if none recorded. */ get lastInteractionTime(): number | null; /** * Set the active provider and cache retention. Resolves the effective * cache TTL from PROVIDER_CACHE_CONFIG and stores it for L1's cache-aware * gating. Called by CortexAgent at construction, on provider changes, and * on cache retention changes. * * @param provider - The active provider name (e.g., "anthropic", "openai") * @param cacheRetention - The configured cache retention ('none' | 'short' | 'long') */ setCacheInfo(provider: string, cacheRetention: CacheRetention): void; /** * Check whether the prompt cache has gone cold (or is unused). * * Returns true when: * - Caching is unsupported / disabled (TTL <= 0), OR * - No LLM call has been recorded yet, OR * - The elapsed time since the last LLM call >= the cache TTL. * * @param now - Current timestamp (ms), injectable for testing */ isCacheCold(now?: number): boolean; /** * Get the effective cache TTL (ms) for the current provider + retention. * Zero means caching is unsupported or disabled. */ get providerCacheTtlMs(): number; /** * Get the timestamp of the last LLM call, or null if none recorded. */ get lastLlmCallTimestamp(): number | null; /** * Compute the effective Layer 2 compaction threshold, adjusted for * interaction recency when adaptive thresholds are enabled. * * @param now - Current timestamp (ms), injectable for testing */ getEffectiveThreshold(now?: number): number; /** * Update the post-hoc current-context token count from LLM usage data. */ updateCurrentContextTokenCount(inputTokens: number): void; /** * Get the post-hoc current-context token count from the most recent parent turn. */ get currentContextTokenCount(): number; /** * Get the context budget (effective limit for Layer 1/2). */ get contextWindow(): number; /** * Get the model's actual context window (for Layer 3 failsafe). */ get modelContextWindow(): number; /** * Get the current context usage ratio. */ get usageRatio(): number; /** * Estimate current context tokens from a transformed AgentContext snapshot. * * Returns the larger of: * - the heuristic estimate of the provided context snapshot * - the post-hoc token count from the most recent parent turn * * This mirrors the compaction decision logic so consumers can reason about * context pressure using the same semantics Cortex uses internally. */ estimateCurrentContextTokens(context: AgentContext): number; /** * Register a handler called before compaction starts (awaited). */ onBeforeCompaction(handler: BeforeCompactionHandler): void; /** * Register a handler called after compaction completes. */ onPostCompaction(handler: PostCompactionHandler): void; /** * Register a handler called if compaction fails. */ onCompactionError(handler: CompactionErrorHandler): void; /** * Register a handler that receives the CompactionResult (for CortexAgent event emission). */ onCompactionResult(handler: (result: CompactionResult) => void): void; /** * Register a handler called when Layer 2 failed and Layer 3 was used as fallback. */ onCompactionDegraded(handler: (info: CompactionDegradedInfo) => void): void; /** * Register a handler called when all compaction layers have failed. */ onCompactionExhausted(handler: (info: CompactionExhaustedInfo) => void): void; /** * Called at turn_end to trigger async buffer checks. */ onTurnEnd(totalTokens: number, contextWindow: number, messages: AgentMessage[], slotCount: number): void; /** * Register observation event handler. */ onObservation(handler: (event: ObservationEvent) => void): void; /** * Register reflection event handler. */ onReflection(handler: (event: ReflectionEvent) => void): void; /** * Get observational memory state for persistence. */ getObservationalMemoryState(): ObservationalMemoryState | null; /** * Restore observational memory state from a previous session. * * @param state - the persisted observational memory state * @param historyLength - length of the restored post-slot conversation * history, used to clamp the buffer watermark against drift */ restoreObservationalMemoryState(state: ObservationalMemoryState, historyLength?: number): void; /** * Force a synchronous observation cycle. */ triggerObservation(messages: AgentMessage[], slotCount: number): Promise; /** * Get the observation slot content string (for ContextManager.setSlot). */ getObservationSlotContent(): string; /** * Whether observations have been produced (non-empty observation text). */ hasObservations(): boolean; /** * Whether the recall tool should be registered. */ hasRecallTool(): boolean; /** * Get the recall config if available. */ getRecallConfig(): import("./observational/types.js").RecallConfig | undefined; /** * Current token count of activated observations only. * Returns 0 when not using the observational strategy. */ getObservationTokenCount(): number; /** * Whether the observer or reflector is currently running in the background. * Returns false when not using the observational strategy. */ isObservationalProcessing(): boolean; /** * Whether the observer specifically is in-flight. */ isObserverInFlight(): boolean; /** * Whether the reflector specifically is in-flight. */ isReflectorInFlight(): boolean; /** * Cap a tool result at insertion time (before it enters conversation history). */ capToolResult(content: string): string; /** * Apply insertion-time cap to all uncapped tool results in the source * messages array (mutates in place). * * Called from the transformContext hook on `agent.state.messages` so that * Tier 1 capping is automatically applied when tool results enter * conversation history through pi-agent-core's internal tool execution * loop. The cap is applied at most once per tool result part; already * capped content (containing the insertion marker) is skipped. * * @param messages - The source messages array (mutated in place) * @param slotCount - Number of slot messages to skip at the start */ applyInsertionCap(messages: AgentMessage[], slotCount: number): Promise; /** * Apply compaction layers to the context in transformContext. * * This is the main entry point called from CortexAgent.getTransformContextHook(). * It is fully self-contained: all three compaction layers are integrated here, * triggered autonomously based on token thresholds. No external calls from * the backend are needed to trigger compaction. * * Execution order: * 1. Layer 1 (microcompaction): tool result trimming at threshold crossings * 2. Layer 2 (summarization): if tokens exceed 70% after Layer 1, run LLM * summarization on the source transcript, then rebuild context from the * updated messages * 3. Layer 3 (failsafe): if tokens still exceed 90% after Layers 1-2, * emergency truncation drops the oldest turns * * @param context - The AgentContext from transformContext * @param getHistory - Function to get conversation history from the context * @param setHistory - Function to set conversation history in the context * @param getSourceHistory - Function to get the original source transcript history (post-slot) * @param setSourceHistory - Function to replace the original source transcript history * @returns Modified context with compacted history */ applyInTransformContext(context: AgentContext, getHistory: (ctx: AgentContext) => AgentMessage[], setHistory: (ctx: AgentContext, history: AgentMessage[]) => AgentContext, getSourceHistory?: () => AgentMessage[], setSourceHistory?: (history: AgentMessage[]) => void): Promise; /** * Manually check if compaction is needed and run it. * * This is a convenience API for consumers who want to trigger compaction * outside the agentic loop (e.g., for testing or manual maintenance). * The primary compaction trigger is `applyInTransformContext`, which runs * automatically before every LLM call. * * @param getHistory - Get current conversation history * @param setHistory - Replace conversation history * @returns CompactionResult if compaction ran, null otherwise */ checkAndRunCompaction(getHistory: () => AgentMessage[], setHistory: (history: AgentMessage[]) => void): Promise; /** * Handle a context overflow error by performing emergency truncation. * Called when the API returns a context overflow error. * * @param getHistory - Get current conversation history * @param setHistory - Replace conversation history */ handleOverflowError(getHistory: () => AgentMessage[], setHistory: (history: AgentMessage[]) => void): void; /** * Clear all state and handlers. */ destroy(): void; /** * Estimate tokens for a set of history messages. */ private estimateHistoryTokens; /** * Estimate total context tokens from an AgentContext object. */ private estimateContextTokens; } //# sourceMappingURL=index.d.ts.map