/** * Copyright 2025 Vybestack LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import { type IContent, type ToolCallBlock } from './IContent.js'; import { EventEmitter } from 'events'; import { type TokensUpdatedEvent } from './HistoryEvents.js'; import type { DensityResult } from '../../core/compression/types.js'; /** * Typed EventEmitter for HistoryService events */ interface HistoryServiceEventEmitter { on(event: 'tokensUpdated', listener: (eventData: TokensUpdatedEvent) => void): this; on(event: 'contentAdded', listener: (content: IContent) => void): this; on(event: 'compressionStarted', listener: () => void): this; on(event: 'compressionEnded', listener: (summary: IContent, itemsCompressed: number) => void): this; emit(event: 'tokensUpdated', eventData: TokensUpdatedEvent): boolean; emit(event: 'contentAdded', content: IContent): boolean; emit(event: 'compressionStarted'): boolean; emit(event: 'compressionEnded', summary: IContent, itemsCompressed: number): boolean; off(event: 'tokensUpdated', listener: (eventData: TokensUpdatedEvent) => void): this; off(event: 'contentAdded', listener: (content: IContent) => void): this; off(event: 'compressionStarted', listener: () => void): this; off(event: 'compressionEnded', listener: (summary: IContent, itemsCompressed: number) => void): this; } /** * Configuration for compression behavior */ export interface CompressionConfig { orphanTimeoutMs: number; orphanMessageDistance: number; pendingGracePeriodMs: number; minMessagesForCompression: number; } /** * Service for managing conversation history in a provider-agnostic way. * All history is stored as IContent. Providers are responsible for converting * to/from their own formats. */ export declare class HistoryService extends EventEmitter implements HistoryServiceEventEmitter { private history; private totalTokens; private baseTokenOffset; private tokenizerCache; private tokenizerLock; private logger; private isCompressing; private pendingOperations; /** * Get or create tokenizer for a specific model */ private getTokenizerForModel; /** * Generate a new canonical history tool ID. * Format: hist_tool_ */ generateHistoryId(turnKey: string, callIndex: number, providerName?: string, rawId?: string, toolName?: string): string; /** * Get a callback suitable for passing into converters * which will generate normalized history IDs on demand. */ getIdGeneratorCallback(turnKey?: string): () => string; generateTurnKey(): string; /** * Get the current total token count including base offset (system prompt). * * This value is used for compression threshold calculations and should always * reflect the total context size that will be sent to the API. * * @returns baseTokenOffset + totalTokens (history tokens) */ getTotalTokens(): number; getBaseTokenOffset(): number; estimateTokensForText(text: string, modelName?: string): Promise; /** * Set a base offset that is always included in the total token count. * Useful for accounting for system prompts or other fixed overhead. * * The system prompt token count should be set once at chat start using this method. * This offset is included in getTotalTokens() to ensure compression threshold * calculations account for the full context size (system prompt + history). * * NOTE: The system prompt itself is NEVER compressed - only conversation history * returned by getCurated() is subject to compression. * * @param offset - Number of tokens in the system prompt or fixed overhead */ setBaseTokenOffset(offset: number): void; /** * Sync the total token count to match actual prompt tokens from a provider. * This adjusts the baseTokenOffset so estimates align with the real count. */ syncTotalTokens(actualTotal: number): void; /** * Add content to the history * Note: We accept all content including empty responses for comprehensive history. * Filtering happens only when getting curated history. */ add(content: IContent, modelName?: string): void; private addInternal; /** * Atomically update token count for new content */ private updateTokenCount; /** * Estimate token count for content using tokenizer */ private estimateContentTokens; /** * Simple token estimation for text */ private simpleTokenEstimateForText; /** * Add multiple contents to the history */ addAll(contents: IContent[], modelName?: string): void; /** * Estimate total tokens for hypothetical contents without mutating history. */ estimateTokensForContents(contents: IContent[], modelName?: string): Promise; /** * Wait for any in-flight token updates to complete. */ waitForTokenUpdates(): Promise; /** * Apply a density optimization result to the raw history. * * @plan PLAN-20260211-HIGHDENSITY.P08 * @requirement REQ-HD-003.1, REQ-HD-003.2, REQ-HD-003.3, REQ-HD-001.6, REQ-HD-001.7 * @pseudocode history-service.md lines 20-82 */ applyDensityResult(result: DensityResult): Promise; /** * Return a read-only typed view of the backing history array. * * @plan PLAN-20260211-HIGHDENSITY.P08 * @requirement REQ-HD-003.5 * @pseudocode history-service.md lines 10-15 */ getRawHistory(): readonly IContent[]; /** * Force a full token recalculation after density operations. * * @plan PLAN-20260211-HIGHDENSITY.P08 * @requirement REQ-HD-003.6 * @pseudocode history-service.md lines 90-120 */ recalculateTotalTokens(): Promise; /** * Get all history */ getAll(): IContent[]; /** * Release all listeners and internal buffers to allow GC */ dispose(): void; /** * Clear all history */ clear(): void; private clearInternal; /** * Get the last N messages from history */ getRecent(count: number): IContent[]; /** * Get curated history (only valid, meaningful content) * Matches the behavior of extractCuratedHistory in geminiChat.ts: * - Always includes user/human messages * - Always includes tool messages * - Only includes AI messages if they are valid (have content) */ getCurated(): IContent[]; /** * Get comprehensive history (all content including invalid/empty) */ getComprehensive(): IContent[]; /** * Remove the last content if it matches the provided content */ removeLastIfMatches(content: IContent): boolean; /** * Pop the last content from history */ pop(): IContent | undefined; /** * Recalculate total tokens from scratch * Use this when removing content or when token counts might be stale */ recalculateTokens(defaultModel?: string): Promise; /** * Get the last user (human) content */ getLastUserContent(): IContent | undefined; /** * Get the last AI content */ getLastAIContent(): IContent | undefined; /** * Record a complete turn (user input + AI response + optional tool interactions) */ recordTurn(userInput: IContent, aiResponse: IContent, toolInteractions?: IContent[]): void; /** * Get the number of messages in history */ length(): number; /** * Check if history is empty */ isEmpty(): boolean; /** * Clone the history (deep copy) */ clone(): IContent[]; /** * Find unmatched tool calls (tool calls without responses) */ findUnmatchedToolCalls(): ToolCallBlock[]; /** * Validate and fix the history to ensure proper tool call/response pairing */ validateAndFix(): void; /** * Get curated history with circular references removed for providers. * This ensures the history can be safely serialized and sent to providers. */ getCuratedForProvider(tailContents?: IContent[]): IContent[]; /** * Providers expect tool calls to come from the assistant and tool results to * come from the tool role. If history corruption produces a single "tool" * message that contains both tool_call and tool_response blocks, split the * tool_call blocks into a separate assistant message directly before the tool * message. */ private splitToolCallsOutOfToolMessages; /** * Ensure every tool_response has a matching tool_call. * If compression removed the original tool_call, synthesize a minimal placeholder * so providers receive a structurally valid transcript without losing context. */ private ensureToolCallContinuity; /** * Ensure every tool_call has a corresponding tool_response. * * Provider transcripts with orphaned tool calls can hard-fail strict APIs * (e.g., Anthropic requires tool_result blocks immediately after tool_use, * Gemini returns 400 if function response count doesn't match function call count, * OpenAI Chat tool messages must follow an assistant tool_calls message). * * For provider-visible payloads, synthesize a minimal "cancelled" tool result * so the transcript remains structurally valid. * * This is intentionally non-mutating: it does not modify the stored history, * only the provider-facing view. */ private ensureToolResponseCompleteness; /** * Ensure tool responses appear immediately after the assistant message that * introduced their tool calls, and drop duplicate/out-of-order tool responses. * * Some providers strictly validate tool adjacency (e.g., OpenAI Chat tool * messages must follow an assistant tool_calls message; Anthropic tool_results * must correspond to tool_use blocks in the previous assistant message). */ private ensureToolResponseAdjacency; /** * Deep clone content array, removing circular references */ private deepCloneWithoutCircularRefs; /** * Sanitize parameters to remove circular references */ private sanitizeParams; /** * Merge two histories, handling duplicates and conflicts */ merge(other: HistoryService): void; /** * Get history within a token limit (for context window management) */ getWithinTokenLimit(maxTokens: number, countTokensFn: (content: IContent) => number): IContent[]; /** * Summarize older history to fit within token limits */ summarizeOldHistory(keepRecentCount: number, summarizeFn: (contents: IContent[]) => Promise): Promise; /** * Export history to JSON */ toJSON(): string; /** * Import history from JSON */ static fromJSON(json: string): HistoryService; /** * Mark compression as starting * This will cause add() operations to queue until compression completes */ startCompression(): void; /** * Mark compression as complete * This will flush all queued operations. * When summary and itemsCompressed are provided, emits a compressionEnded * event so the recording service can log the compression. */ endCompression(summary?: IContent, itemsCompressed?: number): void; /** * Wait for all pending operations to complete * For synchronous operations, this is now a no-op but kept for API compatibility */ waitForPendingOperations(): Promise; /** * Get conversation statistics */ getStatistics(): { totalMessages: number; userMessages: number; aiMessages: number; toolCalls: number; toolResponses: number; totalTokens?: number; }; } export {};