import { LanguageModel, FinishReason } from 'ai'; import { z } from 'zod'; import { Context, InferOptions, Owned, WarmupResult, WarmupExecutor, SchemaConflictResult, SchemaConflictBehavior } from '@mullion/core'; export { Context, InferOptions, Owned, Schema } from '@mullion/core'; /** * Provider capability matrix for cache optimization. */ /** * Cache capabilities for a specific provider/model combination. */ interface CacheCapabilities { /** Whether caching is supported at all for this provider/model */ readonly supported: boolean; /** Minimum number of tokens required for cache to be effective */ readonly minTokens: number; /** Maximum number of cache breakpoints supported */ readonly maxBreakpoints: number; /** Whether the provider supports TTL (time-to-live) for cache entries */ readonly supportsTtl: boolean; /** Specific TTL values supported by this provider/model */ readonly supportedTtl: readonly ('5m' | '1h')[]; /** Whether the provider supports caching tool/function calls */ readonly supportsToolCaching: boolean; /** Whether caching is automatic (like OpenAI) vs explicit */ readonly isAutomatic: boolean; } /** * Supported LLM providers for cache optimization. */ type Provider = 'anthropic' | 'openai' | 'google' | 'other'; /** * Get cache capabilities for a specific provider and model combination. */ declare function getCacheCapabilities(provider: Provider, model: string): CacheCapabilities; /** * Check if a specific caching feature is supported by a provider/model. */ declare function supportsCacheFeature(provider: Provider, model: string, feature: 'ttl' | 'toolCaching' | 'automatic'): boolean; /** * Get the effective cache breakpoint limit for a provider/model. */ declare function getEffectiveBreakpointLimit(provider: Provider, model: string, maxPractical?: number): number; /** * Validate if a TTL value is supported by a provider/model. */ declare function isValidTtl(provider: Provider, model: string, ttl: '5m' | '1h'): boolean; /** * Get the recommended cache strategy for a provider/model combination. */ declare function getRecommendedCacheStrategy(provider: Provider, model: string): 'explicit-segments' | 'automatic-optimization' | 'disabled'; /** * Provider-agnostic cache configuration types for Mullion * * This module defines the new cache configuration system with cleaner * abstractions and provider-specific adapters as specified in Task 7.2. */ /** * Time-to-live durations for cache entries. * * Simplified to only include supported TTL values based on provider research. */ type CacheTTL = '5m' | '1h'; /** * Security scope levels for cache content. * * Defines what types of content are safe to cache based on * data sensitivity and privacy requirements. */ type CacheScope = 'system-only' | 'developer-content' | 'allow-user-content'; /** * Provider-agnostic cache configuration. * * This is the main interface used by developers. It gets transformed * into provider-specific options by adapters. */ interface CacheConfig { /** Whether caching is enabled */ readonly enabled: boolean; /** Security scope for cache operations (default: 'developer-content') */ readonly scope?: CacheScope; /** Time-to-live for cache entries (default: '5m') */ readonly ttl?: CacheTTL; /** Number of cache breakpoints to use (1-4, default: 1) */ readonly breakpoints?: number; } /** * Anthropic provider-specific cache options. * * Maps to Anthropic's cache_control API format. */ interface AnthropicProviderOptions { /** Cache control directives for the request */ readonly cache?: { readonly type: 'ephemeral'; readonly ttl?: CacheTTL; }; /** Number of cache breakpoints to use */ readonly breakpoints: number; } /** * OpenAI provider-specific cache options. * * OpenAI uses automatic caching, so this mainly contains metadata. */ interface OpenAIProviderOptions { /** Whether automatic caching is enabled */ readonly autoCaching: boolean; /** Tool/function call caching settings */ readonly toolCaching?: { readonly enabled: boolean; }; } /** * Google provider-specific cache options. * * Gemini prompt caching requires a pre-created cached content resource. */ interface GoogleProviderOptions { /** Cached content resource name (for example: cachedContents/abc123) */ readonly cachedContent?: string; } /** * Gemini cache config extension with cached content handle. */ interface GeminiCacheConfig extends CacheConfig { /** Cached content resource name created via Gemini API */ readonly cachedContent?: string; } /** * Union type for all provider-specific options. */ type ProviderOptions = AnthropicProviderOptions | OpenAIProviderOptions | GoogleProviderOptions; /** * Anthropic cache adapter that converts abstract config to provider options. */ interface AnthropicCacheAdapter { /** * Convert abstract CacheConfig to Anthropic-specific provider options. * * @param config - Abstract cache configuration * @returns Anthropic provider options */ toProviderOptions(config: CacheConfig): AnthropicProviderOptions; } /** * OpenAI cache adapter that converts abstract config to provider options. */ interface OpenAICacheAdapter { /** * Convert abstract CacheConfig to OpenAI-specific provider options. * * @param config - Abstract cache configuration * @returns OpenAI provider options */ toProviderOptions(config: CacheConfig): OpenAIProviderOptions; } /** * Gemini cache adapter that converts abstract config to Google options. */ interface GeminiCacheAdapter { /** * Convert abstract cache config to Google-specific provider options. * * @param config - Cache configuration with optional cachedContent handle * @returns Google provider options */ toProviderOptions(config: GeminiCacheConfig): GoogleProviderOptions; } /** * Validation result for cache configuration. */ interface ValidationResult$1 { /** Whether the configuration is valid */ readonly valid: boolean; /** List of validation errors */ readonly errors: string[]; /** List of warnings (non-blocking issues) */ readonly warnings: string[]; } /** * Validate TTL ordering constraint. * * For providers like Anthropic, longer TTLs must come before shorter TTLs * in the same request to ensure proper cache hierarchy. * * @param segments - Array of cache segments with TTL values * @returns Validation result */ declare function validateTtlOrdering(segments: { ttl?: CacheTTL; }[]): ValidationResult$1; /** * Validate breakpoint limit against provider capabilities. * * @param count - Number of breakpoints requested * @param provider - Target provider * @param model - Target model * @returns Validation result */ declare function validateBreakpointLimit(count: number, provider: Provider, model: string): ValidationResult$1; /** * Validate minimum token threshold against provider requirements. * * @param tokens - Estimated token count for content * @param provider - Target provider * @param model - Target model * @returns Validation result */ declare function validateMinTokens(tokens: number, provider: Provider, model: string): ValidationResult$1; /** * Create an Anthropic cache adapter with model-specific capabilities. * * @param model - Anthropic model name * @returns Configured adapter instance */ declare function createAnthropicAdapter(model: string): AnthropicCacheAdapter; /** * Create an OpenAI cache adapter with model-specific capabilities. * * @param model - OpenAI model name * @returns Configured adapter instance */ declare function createOpenAIAdapter(model: string): OpenAICacheAdapter; /** * Create a Gemini cache adapter with model-specific capabilities. * * Gemini caching is explicit and requires a pre-created cached content * resource id. When unavailable, adapter returns an empty options object. * * @param model - Gemini model name * @returns Configured adapter instance */ declare function createGeminiAdapter(model: string): GeminiCacheAdapter; /** * Default cache configuration factory. * * @param overrides - Partial config to override defaults * @returns Complete cache configuration with defaults */ declare function createDefaultCacheConfig(overrides?: Partial): CacheConfig; /** * User content cache configuration factory. * * Safe-by-default configuration for user-provided content. * * @param overrides - Partial config to override defaults * @returns Cache configuration safe for user content */ declare function createUserContentConfig(overrides?: Partial): CacheConfig; /** * Developer content cache configuration factory. * * Optimized configuration for developer-controlled content. * * @param overrides - Partial config to override defaults * @returns Cache configuration optimized for developer content */ declare function createDeveloperContentConfig(overrides?: Partial): CacheConfig; /** * Cache segments API - First-class primitive for cache management * * This module provides the CacheSegmentManager class and related utilities * for managing cache segments as specified in Task 7.3. It includes * validation, token estimation, and safe-by-default policies. */ /** * Options for creating a cache segment. */ interface SegmentOptions { /** Security scope for this cache segment (overrides default) */ readonly scope?: CacheScope; /** Time-to-live for this cache entry (overrides default) */ readonly ttl?: CacheTTL; /** Minimum token count to trigger caching (overrides default) */ readonly minTokens?: number; /** Whether to force caching even if below provider minimum */ readonly force?: boolean; } /** * Metadata about a cache segment. */ interface CacheSegment { /** Unique cache key for this segment */ readonly key: string; /** String content being cached */ readonly content: string; /** Estimated token count for this content */ readonly tokenCount: number; /** Time-to-live for this cache entry */ readonly ttl: CacheTTL; /** Security scope for this segment */ readonly scope: CacheScope; /** Timestamp when segment was created */ readonly createdAt: number; } /** * Validation result for cache operations. */ interface ValidationResult { /** Whether the operation is valid */ readonly valid: boolean; /** List of validation errors (blocking) */ readonly errors: string[]; /** List of warnings (non-blocking) */ readonly warnings: string[]; } /** * Cache segment manager for managing cache segments within a context. * * This class provides the core cache segment functionality as specified * in Task 7.3, with validation, token estimation, and safety features. */ declare class CacheSegmentManager { private readonly provider; private readonly model; private readonly capabilities; private readonly segments; private readonly defaultConfig; /** * Create a new cache segment manager. * * @param provider - LLM provider * @param model - Model name * @param config - Default cache configuration */ constructor(provider: Provider, model: string, config: CacheConfig); /** * Create an explicit cache segment for arbitrary content. * * @param key - Unique cache key for this segment * @param content - Content to cache (will be stringified if object) * @param options - Cache segment options * @throws Error if validation fails and force is not enabled */ segment(key: string, content: string | object, options?: SegmentOptions): void; /** * Cache a system prompt with optimized defaults. * * Convenience method for system prompts which use 'system-only' scope * and default to longer TTL ('24h' equivalent, but we only support '1h'). * * @param systemPrompt - The system prompt content * @param options - Optional overrides for system prompt caching */ system(systemPrompt: string, options?: SegmentOptions): void; /** * Get all segments in this manager. * * @returns Array of all cache segments */ getSegments(): readonly CacheSegment[]; /** * Clear all segments from this manager. */ clear(): void; /** * Validate the current segments against the current model. * * @returns Validation result */ validate(): ValidationResult; /** * Validate the current segments against the target model. * * @param model - Target model to validate against * @returns Validation result */ validateForModel(model: string): ValidationResult; /** * Estimate token count for content using simple heuristic. * * Uses ~4 characters per token approximation which works reasonably * well for English text and JSON structures. * * @param content - Content to estimate * @returns Estimated token count */ estimateTokens(content: string): number; /** * Check if content should be cached based on configuration and constraints. * * @param content - Content to check * @param options - Cache options * @returns Whether content should be cached */ shouldCache(content: string, options?: SegmentOptions): boolean; /** * Get total estimated tokens across all segments. * * @returns Total token count */ getTotalTokens(): number; /** * Get segments filtered by scope. * * @param scope - Scope to filter by * @returns Segments with the specified scope */ getSegmentsByScope(scope: CacheScope): readonly CacheSegment[]; /** * Get segments filtered by TTL. * * @param ttl - TTL to filter by * @returns Segments with the specified TTL */ getSegmentsByTtl(ttl: CacheTTL): readonly CacheSegment[]; /** * Validate a single segment before creation. * * @param key - Segment key * @param content - Segment content * @param scope - Security scope * @param ttl - Time-to-live * @param tokenCount - Estimated tokens * @param minTokens - Minimum token threshold * @param force - Whether to force caching * @returns Validation result */ private validateSegment; } /** * Factory function to create a cache segment manager. * * @param provider - LLM provider * @param model - Model name * @param config - Cache configuration * @returns New cache segment manager instance */ declare function createCacheSegmentManager(provider: Provider, model: string, config: CacheConfig): CacheSegmentManager; /** * Cache metrics collection and analysis for Mullion * * This module provides provider-agnostic cache metrics collection, * parsing provider-specific response formats, and calculating * cost savings from cache utilization. */ /** * Provider-agnostic cache statistics. * * Normalized metrics that work across different LLM providers, * following the Task 7.4 specification for consistent tracking * and reporting regardless of provider implementation. */ interface CacheStats { /** Provider that generated these metrics */ readonly provider: 'anthropic' | 'openai' | 'google' | 'unknown'; /** Number of tokens written to cache (cache creation cost) */ readonly cacheWriteTokens: number; /** Number of tokens read from cache (cache hit benefit) */ readonly cacheReadTokens: number; /** Total input tokens in the request */ readonly inputTokens: number; /** Total output tokens generated */ readonly outputTokens: number; /** Total tokens saved by cache usage */ readonly savedTokens: number; /** Cache hit rate (0-1, where 1 = 100% cache hit) */ readonly cacheHitRate: number; /** Estimated cost savings in USD from cache usage */ readonly estimatedSavingsUsd: number; /** Time-to-live for cached content */ readonly ttl?: CacheTTL; /** Number of cache breakpoints used in request */ readonly breakpointsUsed?: number; /** Raw provider-specific metrics for debugging */ readonly raw?: unknown; } /** * Anthropic-specific cache metrics from their API response. */ interface AnthropicCacheMetrics { /** Total input tokens in the request */ readonly input_tokens: number; /** Total output tokens generated */ readonly output_tokens: number; /** Input tokens that triggered cache creation */ readonly cache_creation_input_tokens?: number; /** Input tokens that were served from cache */ readonly cache_read_input_tokens?: number; /** Additional usage metadata */ readonly [key: string]: unknown; } /** * OpenAI-specific cache metrics from their API response. */ interface OpenAICacheMetrics { /** Total prompt tokens in the request */ readonly prompt_tokens: number; /** Total completion tokens generated */ readonly completion_tokens: number; /** Total tokens used */ readonly total_tokens: number; /** Detailed prompt token breakdown */ readonly prompt_tokens_details?: { /** Tokens served from cache */ readonly cached_tokens?: number; /** Audio tokens (if applicable) */ readonly audio_tokens?: number; }; /** Additional usage metadata */ readonly [key: string]: unknown; } /** * Google-specific cache metrics from provider metadata. */ interface GoogleCacheMetrics { /** * Provider metadata payload from @ai-sdk/google: * providerMetadata.google.usageMetadata */ readonly usageMetadata?: { /** Total input prompt tokens */ readonly promptTokenCount?: number; /** Total output/candidate tokens */ readonly candidatesTokenCount?: number; /** Total request tokens */ readonly totalTokenCount?: number; /** Tokens loaded from cached content */ readonly cachedContentTokenCount?: number; }; /** Fallback shape if usageMetadata is already flattened */ readonly promptTokenCount?: number; readonly candidatesTokenCount?: number; readonly totalTokenCount?: number; readonly cachedContentTokenCount?: number; /** Additional usage metadata */ readonly [key: string]: unknown; } /** * Parse Anthropic cache metrics from API response. * * Extracts cache-specific metrics from Anthropic's usage object, * handling the cache_creation_input_tokens and cache_read_input_tokens fields. * * @param usage - Anthropic usage object from API response * @param provider - Provider identifier * @param model - Model identifier * @returns Normalized cache statistics */ declare function parseAnthropicMetrics(usage: AnthropicCacheMetrics, provider: "anthropic" | undefined, model: string): CacheStats; /** * Parse OpenAI cache metrics from API response. * * Extracts cache-specific metrics from OpenAI's usage object, * handling the prompt_tokens_details.cached_tokens field. * * @param usage - OpenAI usage object from API response * @param provider - Provider identifier * @param model - Model identifier * @returns Normalized cache statistics */ declare function parseOpenAIMetrics(usage: OpenAICacheMetrics, provider: "openai" | undefined, model: string): CacheStats; /** * Parse Google cache metrics from provider metadata. * * Extracts cache usage from usageMetadata.cachedContentTokenCount. */ declare function parseGoogleMetrics(usage: GoogleCacheMetrics, provider: "google" | undefined, model: string): CacheStats; /** * Generic cache metrics parser that routes to provider-specific parsers. * * Automatically detects the provider and model from the usage object * structure and routes to the appropriate parsing function. * * @param usage - Raw usage metrics from any provider * @param provider - LLM provider identifier * @param model - Model identifier * @returns Normalized cache statistics */ declare function parseCacheMetrics(usage: Record, provider: Provider, model: string): CacheStats; /** * Aggregate multiple cache metrics into cumulative statistics. * * Combines metrics from multiple API calls to provide session-level * or period-level aggregate cache performance statistics. * * @param metrics - Array of individual cache statistics * @returns Aggregated cache statistics */ declare function aggregateCacheMetrics(metrics: CacheStats[]): CacheStats; /** * Calculate potential cache savings for a given request. * * Estimates how much could be saved if caching were optimally configured * for the given content length and provider/model combination. * * @param contentTokens - Number of tokens in the content to cache * @param requestCount - Expected number of requests that would benefit * @param provider - LLM provider * @param model - Model identifier * @returns Estimated savings information */ declare function estimateCacheSavings(contentTokens: number, requestCount: number, provider: Provider, model: string): { readonly potentialSavedTokens: number; readonly potentialSavedUsd: number; readonly cacheEffective: boolean; readonly recommendation: string; }; /** * Format cache statistics for human-readable display. * * Creates a formatted string representation of cache metrics * suitable for logging, debugging, or user interfaces. * * @param stats - Cache statistics to format * @returns Formatted string representation */ declare function formatCacheStats(stats: CacheStats): string; /** * Session-level cache metrics collector. * * Tracks cache metrics across multiple requests in a session, * providing cumulative statistics and insights. */ declare class CacheMetricsCollector { private metrics; private readonly provider; private readonly model; constructor(provider: Provider, model: string); /** * Add metrics from a single API call. * * @param usage - Raw usage metrics from provider */ addMetrics(usage: Record): void; /** * Get aggregated metrics for all collected data. * * @returns Cumulative cache statistics */ getAggregatedStats(): CacheStats; /** * Get individual metrics for each API call. * * @returns Array of individual cache statistics */ getIndividualStats(): readonly CacheStats[]; /** * Clear all collected metrics. */ clear(): void; /** * Get metrics count. * * @returns Number of API calls tracked */ getCallCount(): number; } /** * Model pricing data for cost calculation * @module cost/pricing */ /** * Pricing information for a specific model */ interface ModelPricing { /** Model identifier */ model: string; /** Provider name */ provider: 'anthropic' | 'openai' | 'google' | 'unknown'; /** USD per 1M input tokens */ inputPer1M: number; /** USD per 1M output tokens */ outputPer1M: number; /** USD per 1M cached input tokens (cache read) */ cachedInputPer1M?: number; /** USD per 1M cache write tokens */ cacheWritePer1M?: number; /** Date when pricing was last updated (ISO format) */ asOfDate: string; } type KnownProvider = Exclude; /** * Complete baseline pricing database (snapshot as of 2026-02-09). */ declare const PRICING_DATA: Record; /** * Get pricing for a specific model. * * Precedence: runtime catalog override > user overrides > baseline pricing. */ declare function getPricing(model: string, overrides?: Partial): ModelPricing; /** * Get all available model pricing data (baseline + runtime catalog models). */ declare function getAllPricing(): ModelPricing[]; /** * Get pricing for all models from a specific provider. */ declare function getPricingByProvider(provider: KnownProvider): ModelPricing[]; /** * Calculate cache write pricing for a specific TTL. */ declare function calculateCacheWritePricing(basePricing: ModelPricing, ttl: '5m' | '1h'): number; /** * Export effective pricing data as JSON string. */ declare function exportPricingAsJSON(pretty?: boolean): string; /** * Import pricing data from JSON. */ declare function importPricingFromJSON(json: string): Record; /** * Cost calculation for LLM API calls with cache savings analysis * @module cost/calculator */ /** * Token usage information from API response */ interface TokenUsage { /** Total input tokens (including cached) */ inputTokens: number; /** Total output tokens generated */ outputTokens: number; /** Tokens read from cache (if applicable) */ cachedTokens?: number; } /** * Detailed cost breakdown for an LLM API call */ interface CostBreakdown { /** Cost of input tokens (non-cached) */ inputCost: number; /** Cost of output tokens */ outputCost: number; /** Cost of writing to cache */ cacheWriteCost: number; /** Cost of reading from cache */ cacheReadCost: number; /** Total cost of the API call */ totalCost: number; /** Amount saved vs no cache (can be negative if cache write cost > savings) */ savings: number; /** Percentage saved vs no cache (can be negative) */ savingsPercent: number; /** Cost breakdown without any caching (for comparison) */ noCacheCost: number; /** Model pricing used for calculation */ pricing: ModelPricing; } /** * Calculate cost breakdown for an LLM API call * * @param usage - Token usage from API response * @param cacheStats - Cache statistics (if caching was used) * @param model - Model identifier * @param pricingOverrides - Optional pricing overrides * @returns Detailed cost breakdown with savings analysis * * @example * ```typescript * const usage = { inputTokens: 10000, outputTokens: 500 }; * const cacheStats = { cacheReadTokens: 8000, cacheWriteTokens: 0, ... }; * const cost = calculateCost(usage, cacheStats, 'claude-3-5-sonnet-20241022'); * * console.log(cost.totalCost); // Total cost in USD * console.log(cost.savings); // Amount saved by caching * console.log(cost.savingsPercent); // Percentage saved * ``` */ declare function calculateCost(usage: TokenUsage, cacheStats: CacheStats | null, model: string, pricingOverrides?: Partial): CostBreakdown; /** * Calculate estimated cost before making an API call * * @param estimatedInputTokens - Estimated input tokens * @param estimatedOutputTokens - Estimated output tokens * @param model - Model identifier * @param useCache - Whether caching will be used * @param pricingOverrides - Optional pricing overrides * @returns Estimated cost breakdown * * @example * ```typescript * const estimate = estimateCost(10000, 500, 'gpt-4', false); * console.log(estimate.totalCost); // Estimated total cost * ``` */ declare function estimateCost(estimatedInputTokens: number, estimatedOutputTokens: number, model: string, useCache?: boolean, pricingOverrides?: Partial): CostBreakdown; /** * Calculate cost for multiple API calls (batch) * * @param calls - Array of token usage and cache stats * @param model - Model identifier * @param pricingOverrides - Optional pricing overrides * @returns Aggregated cost breakdown * * @example * ```typescript * const calls = [ * { usage: { inputTokens: 1000, outputTokens: 100 }, cacheStats: null }, * { usage: { inputTokens: 1000, outputTokens: 100 }, cacheStats: {...} }, * ]; * const total = calculateBatchCost(calls, 'gpt-4'); * console.log(total.totalCost); // Sum of all calls * ``` */ declare function calculateBatchCost(calls: { usage: TokenUsage; cacheStats: CacheStats | null; }[], model: string, pricingOverrides?: Partial): CostBreakdown; /** * Format cost breakdown as human-readable string * * @param cost - Cost breakdown to format * @param options - Formatting options * @returns Formatted string * * @example * ```typescript * const cost = calculateCost(usage, cacheStats, 'gpt-4'); * console.log(formatCostBreakdown(cost)); * // Output: * // Total: $0.0350 * // - Input: $0.0300 (1000 tokens) * // - Output: $0.0050 (100 tokens) * // - Cache Read: $0.0000 * // Savings: $0.0000 (0.0%) * ``` */ declare function formatCostBreakdown(cost: CostBreakdown, options?: { showBreakdown?: boolean; decimals?: number; }): string; /** * Compare actual cost vs estimated cost * * @param actual - Actual cost breakdown * @param estimated - Estimated cost breakdown * @returns Comparison metrics * * @example * ```typescript * const estimate = estimateCost(10000, 500, 'gpt-4'); * // ... make API call ... * const actual = calculateCost(usage, cacheStats, 'gpt-4'); * const comparison = compareCosts(actual, estimate); * * console.log(comparison.accuracyPercent); // How close was estimate * ``` */ declare function compareCosts(actual: CostBreakdown, estimated: CostBreakdown): { difference: number; differencePercent: number; accuracyPercent: number; underestimated: boolean; }; type JsonValue = null | string | number | boolean | JsonValue[] | { [key: string]: JsonValue; }; type ProviderCallOptions = Record>; /** * Extract a confidence score from the LLM finish reason. * * This function maps the finish reason to a confidence score between 0 and 1. * The mapping is based on how reliable the output is likely to be given * why the model stopped generating. * * @param finishReason - The reason the model finished generating * @returns A confidence score between 0 and 1 * * @example * ```typescript * extractConfidenceFromFinishReason('stop'); // 1.0 * extractConfidenceFromFinishReason('length'); // 0.75 * extractConfidenceFromFinishReason('error'); // 0.3 * ``` */ declare function extractConfidenceFromFinishReason(finishReason: FinishReason): number; /** * Mullion client for Vercel AI SDK integration. * * Provides a scoped execution environment with LLM inference capabilities. * The client wraps a language model and provides type-safe context management * for all LLM operations. * * @example * ```typescript * import { createMullionClient } from '@mullion/ai-sdk'; * import { openai } from '@ai-sdk/openai'; * * const client = createMullionClient(openai('gpt-4')); * * const result = await client.scope('user-query', async (ctx) => { * const intent = await ctx.infer(IntentSchema, userMessage); * return intent.value; * }); * ``` */ interface MullionClient { /** * Create a scoped execution context for LLM operations. * * This method establishes a type-safe boundary for LLM-generated values. * All values created within the scope are tagged with the scope identifier, * enabling compile-time detection of context leaks. * * @template S - The scope identifier (string literal type) * @template R - The return type of the scope function * @param name - The scope identifier (must be a string literal for type safety) * @param fn - Async function that receives a Context and returns a value * @returns Promise resolving to the value returned by the scope function * * @example * ```typescript * import { z } from 'zod'; * * const EmailSchema = z.object({ * subject: z.string(), * category: z.enum(['support', 'sales', 'feedback']) * }); * * const result = await client.scope('email-processing', async (ctx) => { * const email = await ctx.infer(EmailSchema, rawEmailText); * * if (email.confidence < 0.8) { * throw new Error('Low confidence classification'); * } * * return ctx.use(email); * }); * ``` * * @example * ```typescript * // Nested scopes with bridging * const analysis = await client.scope('admin', async (adminCtx) => { * const adminData = await adminCtx.infer(DataSchema, document); * * return await client.scope('processing', async (processCtx) => { * // Must explicitly bridge to use admin data * const bridged = processCtx.bridge(adminData); * return bridged; * }); * }); * ``` */ scope(name: S, fn: (ctx: MullionContext) => Promise): Promise; } /** * Creates a Mullion client with Vercel AI SDK integration. * * This function wraps a Vercel AI SDK language model to provide type-safe * context management for LLM operations. The returned client can create * scoped contexts where all LLM-generated values are properly tagged and * tracked for provenance. * * @param model - A Vercel AI SDK language model instance * @param options - Optional client configuration * @returns A Mullion client with scope() method * * @example * ```typescript * import { createMullionClient } from '@mullion/ai-sdk'; * import { openai } from '@ai-sdk/openai'; * import { anthropic } from '@ai-sdk/anthropic'; * * // With OpenAI * const client = createMullionClient(openai('gpt-4')); * * // With Anthropic * const client = createMullionClient(anthropic('claude-3-5-sonnet-20241022')); * * // Use the client * const result = await client.scope('analysis', async (ctx) => { * const data = await ctx.infer(Schema, input); * return data.value; * }); * ``` * * @example * ```typescript * // With custom model configuration * import { openai } from '@ai-sdk/openai'; * * const model = openai('gpt-4', { * apiKey: process.env.OPENAI_API_KEY, * }); * * const client = createMullionClient(model); * ``` */ /** * Configuration options for Mullion client. */ interface MullionClientOptions { /** LLM provider name for cache optimization */ readonly provider?: Provider; /** Model identifier for provider-specific features */ readonly model?: string; /** Enable cache segments API (default: false) */ readonly enableCache?: boolean; /** Default provider-specific options for all infer() calls */ readonly providerOptions?: ProviderCallOptions; } /** * Cache options for infer() method. */ interface CacheOptions { /** Cache strategy for this inference call */ readonly cache?: 'use-segments' | 'none'; } /** * Extended InferOptions that includes cache configuration. */ interface MullionInferOptions extends InferOptions, CacheOptions { /** Provider-specific options for this inference call */ readonly providerOptions?: ProviderCallOptions; } /** * Extended Context interface that includes cache segments API and cost tracking. */ interface MullionContext extends Context { /** Cache segments manager for this context */ readonly cache: CacheSegmentManager; /** Enhanced infer method with cache options */ infer(schema: z.ZodType & { _type?: T; }, input: string, options?: MullionInferOptions): Promise>; /** Get aggregated cache statistics for this context */ getCacheStats(): CacheStats; /** * Get cost breakdown for the last API call made in this context. * * Returns detailed cost information including input/output costs, * cache costs, and savings analysis. Returns null if no calls have * been made yet. * * @returns Cost breakdown from last infer() call or null * * @example * ```typescript * await ctx.infer(schema, prompt); * const cost = ctx.getLastCallCost(); * if (cost) { * console.log(`Total: $${cost.totalCost.toFixed(4)}`); * console.log(`Savings: ${cost.savingsPercent.toFixed(1)}%`); * } * ``` */ getLastCallCost(): CostBreakdown | null; /** * Estimate cost for a potential API call before making it. * * Provides pre-call cost estimation based on token count estimation * and current model pricing. Useful for cost-aware decision making. * * @param prompt - The prompt text to estimate cost for * @param estimatedOutputTokens - Expected output tokens (default: 500) * @returns Estimated cost breakdown * * @example * ```typescript * const estimate = ctx.estimateNextCallCost(longDocument); * if (estimate.totalCost > 0.10) { * console.warn('This call will be expensive!'); * } * await ctx.infer(schema, longDocument); * ``` * * @example * ```typescript * // Compare estimate vs actual * const estimate = ctx.estimateNextCallCost(prompt, 200); * await ctx.infer(schema, prompt); * const actual = ctx.getLastCallCost(); * * if (actual) { * const diff = actual.totalCost - estimate.totalCost; * console.log(`Estimation error: $${Math.abs(diff).toFixed(4)}`); * } * ``` */ estimateNextCallCost(prompt: string, estimatedOutputTokens?: number): CostBreakdown; } declare function createMullionClient(model: LanguageModel, clientOptions?: MullionClientOptions): MullionClient; /** * Gemini model discovery via Google Generative Language API. * * This module intentionally avoids a hardcoded full Gemini model list. * Instead, it resolves available models dynamically via `models.list`. */ /** * Normalized Gemini model metadata returned by discovery helpers. */ interface GeminiModel { /** Full API name, e.g. "models/gemini-2.0-flash" */ readonly name: string; /** Short model id, e.g. "gemini-2.0-flash" */ readonly id: string; readonly displayName?: string; readonly description?: string; readonly version?: string; readonly state?: string; readonly inputTokenLimit?: number; readonly outputTokenLimit?: number; readonly supportedGenerationMethods: readonly string[]; } /** * Options for listing Gemini models. */ interface ListGeminiModelsOptions { /** API key. Falls back to GOOGLE_GENERATIVE_AI_API_KEY when omitted. */ readonly apiKey?: string; /** Override models.list endpoint. */ readonly baseUrl?: string; /** Optional page size hint for models.list pagination. */ readonly pageSize?: number; /** Include models marked as deprecated. Default: false. */ readonly includeDeprecated?: boolean; /** * Include models that do not support generateContent. * Default: false (inference-focused list). */ readonly includeNonInferenceModels?: boolean; /** Optional abort signal for outbound requests. */ readonly signal?: AbortSignal; /** Optional fetch override for tests. */ readonly fetcher?: typeof fetch; } /** * Cached list options. */ interface ListGeminiModelsCachedOptions extends ListGeminiModelsOptions { /** Force refresh, bypassing in-memory cache. */ readonly forceRefresh?: boolean; /** Cache TTL in milliseconds. Default: 10 minutes. */ readonly cacheTtlMs?: number; } /** * Normalize model name from API format. * * @example * normalizeGeminiModelName('models/gemini-2.0-flash') // gemini-2.0-flash */ declare function normalizeGeminiModelName(name: string): string; /** * Check whether a model supports structured inference workflows. */ declare function supportsGenerateContent(model: GeminiModel): boolean; /** * Clear in-memory Gemini models cache. */ declare function clearGeminiModelsCache(): void; /** * Fetch Gemini models from `models.list`, handling pagination and filtering. */ declare function listGeminiModels(options?: ListGeminiModelsOptions): Promise; /** * Cached variant of listGeminiModels. */ declare function listGeminiModelsCached(options?: ListGeminiModelsCachedOptions): Promise; type CatalogProvider = 'anthropic' | 'openai' | 'google'; declare const catalogPricingEntrySchema: z.ZodObject<{ provider: z.ZodOptional>; inputPer1M: z.ZodOptional; outputPer1M: z.ZodOptional; cachedInputPer1M: z.ZodOptional; cacheWritePer1M: z.ZodOptional; asOfDate: z.ZodOptional; }, z.core.$strict>; declare const catalogCapabilityEntrySchema: z.ZodObject<{ supported: z.ZodOptional; minTokens: z.ZodOptional; maxBreakpoints: z.ZodOptional, z.ZodNumber]>>; supportsTtl: z.ZodOptional; supportedTtl: z.ZodOptional>>; supportsToolCaching: z.ZodOptional; isAutomatic: z.ZodOptional; }, z.core.$strict>; declare const catalogPricingProviderSchema: z.ZodObject<{ default: z.ZodOptional>; inputPer1M: z.ZodOptional; outputPer1M: z.ZodOptional; cachedInputPer1M: z.ZodOptional; cacheWritePer1M: z.ZodOptional; asOfDate: z.ZodOptional; }, z.core.$strict>>; models: z.ZodOptional>; inputPer1M: z.ZodOptional; outputPer1M: z.ZodOptional; cachedInputPer1M: z.ZodOptional; cacheWritePer1M: z.ZodOptional; asOfDate: z.ZodOptional; }, z.core.$strict>>>; }, z.core.$strict>; declare const catalogCapabilityProviderSchema: z.ZodObject<{ default: z.ZodOptional; minTokens: z.ZodOptional; maxBreakpoints: z.ZodOptional, z.ZodNumber]>>; supportsTtl: z.ZodOptional; supportedTtl: z.ZodOptional>>; supportsToolCaching: z.ZodOptional; isAutomatic: z.ZodOptional; }, z.core.$strict>>; models: z.ZodOptional; minTokens: z.ZodOptional; maxBreakpoints: z.ZodOptional, z.ZodNumber]>>; supportsTtl: z.ZodOptional; supportedTtl: z.ZodOptional>>; supportsToolCaching: z.ZodOptional; isAutomatic: z.ZodOptional; }, z.core.$strict>>>; }, z.core.$strict>; declare const modelCatalogSchema: z.ZodObject<{ schemaVersion: z.ZodLiteral<1>; snapshotDate: z.ZodString; generatedAt: z.ZodString; sources: z.ZodArray; pricing: z.ZodOptional>; inputPer1M: z.ZodOptional; outputPer1M: z.ZodOptional; cachedInputPer1M: z.ZodOptional; cacheWritePer1M: z.ZodOptional; asOfDate: z.ZodOptional; }, z.core.$strict>>; providers: z.ZodOptional>; inputPer1M: z.ZodOptional; outputPer1M: z.ZodOptional; cachedInputPer1M: z.ZodOptional; cacheWritePer1M: z.ZodOptional; asOfDate: z.ZodOptional; }, z.core.$strict>>; models: z.ZodOptional>; inputPer1M: z.ZodOptional; outputPer1M: z.ZodOptional; cachedInputPer1M: z.ZodOptional; cacheWritePer1M: z.ZodOptional; asOfDate: z.ZodOptional; }, z.core.$strict>>>; }, z.core.$strict>>; openai: z.ZodOptional>; inputPer1M: z.ZodOptional; outputPer1M: z.ZodOptional; cachedInputPer1M: z.ZodOptional; cacheWritePer1M: z.ZodOptional; asOfDate: z.ZodOptional; }, z.core.$strict>>; models: z.ZodOptional>; inputPer1M: z.ZodOptional; outputPer1M: z.ZodOptional; cachedInputPer1M: z.ZodOptional; cacheWritePer1M: z.ZodOptional; asOfDate: z.ZodOptional; }, z.core.$strict>>>; }, z.core.$strict>>; google: z.ZodOptional>; inputPer1M: z.ZodOptional; outputPer1M: z.ZodOptional; cachedInputPer1M: z.ZodOptional; cacheWritePer1M: z.ZodOptional; asOfDate: z.ZodOptional; }, z.core.$strict>>; models: z.ZodOptional>; inputPer1M: z.ZodOptional; outputPer1M: z.ZodOptional; cachedInputPer1M: z.ZodOptional; cacheWritePer1M: z.ZodOptional; asOfDate: z.ZodOptional; }, z.core.$strict>>>; }, z.core.$strict>>; }, z.core.$strict>>; models: z.ZodOptional>; inputPer1M: z.ZodOptional; outputPer1M: z.ZodOptional; cachedInputPer1M: z.ZodOptional; cacheWritePer1M: z.ZodOptional; asOfDate: z.ZodOptional; }, z.core.$strict>>>; }, z.core.$strict>>; capabilities: z.ZodOptional; minTokens: z.ZodOptional; maxBreakpoints: z.ZodOptional, z.ZodNumber]>>; supportsTtl: z.ZodOptional; supportedTtl: z.ZodOptional>>; supportsToolCaching: z.ZodOptional; isAutomatic: z.ZodOptional; }, z.core.$strict>>; providers: z.ZodOptional; minTokens: z.ZodOptional; maxBreakpoints: z.ZodOptional, z.ZodNumber]>>; supportsTtl: z.ZodOptional; supportedTtl: z.ZodOptional>>; supportsToolCaching: z.ZodOptional; isAutomatic: z.ZodOptional; }, z.core.$strict>>; models: z.ZodOptional; minTokens: z.ZodOptional; maxBreakpoints: z.ZodOptional, z.ZodNumber]>>; supportsTtl: z.ZodOptional; supportedTtl: z.ZodOptional>>; supportsToolCaching: z.ZodOptional; isAutomatic: z.ZodOptional; }, z.core.$strict>>>; }, z.core.$strict>>; openai: z.ZodOptional; minTokens: z.ZodOptional; maxBreakpoints: z.ZodOptional, z.ZodNumber]>>; supportsTtl: z.ZodOptional; supportedTtl: z.ZodOptional>>; supportsToolCaching: z.ZodOptional; isAutomatic: z.ZodOptional; }, z.core.$strict>>; models: z.ZodOptional; minTokens: z.ZodOptional; maxBreakpoints: z.ZodOptional, z.ZodNumber]>>; supportsTtl: z.ZodOptional; supportedTtl: z.ZodOptional>>; supportsToolCaching: z.ZodOptional; isAutomatic: z.ZodOptional; }, z.core.$strict>>>; }, z.core.$strict>>; google: z.ZodOptional; minTokens: z.ZodOptional; maxBreakpoints: z.ZodOptional, z.ZodNumber]>>; supportsTtl: z.ZodOptional; supportedTtl: z.ZodOptional>>; supportsToolCaching: z.ZodOptional; isAutomatic: z.ZodOptional; }, z.core.$strict>>; models: z.ZodOptional; minTokens: z.ZodOptional; maxBreakpoints: z.ZodOptional, z.ZodNumber]>>; supportsTtl: z.ZodOptional; supportedTtl: z.ZodOptional>>; supportsToolCaching: z.ZodOptional; isAutomatic: z.ZodOptional; }, z.core.$strict>>>; }, z.core.$strict>>; }, z.core.$strict>>; models: z.ZodOptional; minTokens: z.ZodOptional; maxBreakpoints: z.ZodOptional, z.ZodNumber]>>; supportsTtl: z.ZodOptional; supportedTtl: z.ZodOptional>>; supportsToolCaching: z.ZodOptional; isAutomatic: z.ZodOptional; }, z.core.$strict>>>; }, z.core.$strict>>; }, z.core.$strict>; type CatalogPricingEntry = z.infer; type CatalogCapabilityEntry = z.infer; type CatalogPricingProvider = z.infer; type CatalogCapabilityProvider = z.infer; type ModelCatalog = z.infer; interface LoadModelCatalogOptions { url?: string; filePath?: string; json?: string | ModelCatalog; ttlMs?: number; forceRefresh?: boolean; fetchFn?: typeof fetch; throwOnError?: boolean; } interface LoadModelCatalogResult { catalog: ModelCatalog | null; source: 'url' | 'file' | 'json'; fromCache: boolean; usedFallback: boolean; error?: ModelCatalogError; } declare class ModelCatalogError extends Error { constructor(message: string, options?: ErrorOptions); } declare class ModelCatalogValidationError extends ModelCatalogError { readonly issues: string[]; constructor(issues: string[], options?: ErrorOptions); } declare class ModelCatalogLoadError extends ModelCatalogError { constructor(message: string, options?: ErrorOptions); } declare function setModelCatalogOverrides(catalog: ModelCatalog): ModelCatalog; declare function clearModelCatalogOverrides(): void; declare function getModelCatalogOverrides(): Readonly | null; declare function loadModelCatalog(options: LoadModelCatalogOptions): Promise; /** * Cache warmup implementation for fork optimization. * * This module provides warmup strategies for priming the cache before * parallel fork branch execution. Warmup is essential for Anthropic * cache sharing because cache becomes available only after the first * response completes. * * @module cache/warmup */ /** * Configuration for warmup operations. */ interface WarmupConfig { /** LLM provider (anthropic, openai, etc.) */ readonly provider: Provider; /** Model identifier */ readonly model: string; /** Language model instance for API calls */ readonly languageModel: LanguageModel; /** System prompt to use for warmup (optional) */ readonly systemPrompt?: string; /** Minimal prompt for warmup (produces minimal output) */ readonly warmupPrompt?: string; /** Maximum tokens for warmup output (should be minimal) */ readonly maxTokens?: number; } /** * Performs an explicit warmup call to prime the cache. * * This function makes a minimal API call that uses the cached segments * but produces minimal output, purely to establish the cache entry * before parallel branches execute. * * @param config - Warmup configuration * @param cacheManager - Cache segment manager with segments to prime * @returns Promise resolving to warmup metrics * * @example * ```typescript * const config: WarmupConfig = { * provider: 'anthropic', * model: 'claude-3-5-sonnet-20241022', * languageModel: anthropic('claude-3-5-sonnet-20241022'), * }; * * // Add segments to cache manager * cacheManager.segment('document', longDocument, { ttl: '5m' }); * cacheManager.system(systemPrompt, { ttl: '1h' }); * * // Perform explicit warmup * const result = await explicitWarmup(config, cacheManager); * console.log(`Warmup cost: ${result.tokenCost} tokens`); * console.log(`Cache created: ${result.cacheCreatedTokens} tokens`); * ``` */ declare function explicitWarmup(config: WarmupConfig, cacheManager?: CacheSegmentManager): Promise; /** * Result of first-branch warmup. */ interface FirstBranchWarmupResult { /** Result from the first branch */ readonly firstResult: T; /** Warmup metrics */ readonly warmup: WarmupResult; } /** * Executes first branch as warmup, then returns its result. * * This strategy uses the first branch's natural execution to prime * the cache. Other branches wait for the first to complete before * executing in parallel, benefiting from the primed cache. * * @template T - Return type of the branch * @param firstBranch - The first branch function to execute * @param ctx - Context for the branch * @returns Promise resolving to first branch result and warmup metrics * * @example * ```typescript * const branches = [ * (c) => c.infer(OverviewSchema, 'Generate overview'), * (c) => c.infer(DetailSchema, 'Generate details'), * (c) => c.infer(ActionSchema, 'Generate actions'), * ]; * * // Execute first branch as warmup * const { firstResult, warmup } = await firstBranchWarmup( * branches[0], * ctx * ); * * // Execute remaining branches (they benefit from cache) * const remainingResults = await Promise.all( * branches.slice(1).map((branch) => branch(ctx)) * ); * * const allResults = [firstResult, ...remainingResults]; * ``` */ declare function firstBranchWarmup(firstBranch: (ctx: Context) => Promise, ctx: Context): Promise>; /** * Creates a warmup executor for use with the fork function. * * This factory creates a WarmupExecutor that can be registered with * @mullion/core to enable cache-optimized fork execution. * * @param config - Warmup configuration * @param cacheManager - Optional cache manager for segment access * @returns A WarmupExecutor instance * * @example * ```typescript * import { createWarmupExecutor, registerWarmupExecutor } from '@mullion/ai-sdk'; * import { anthropic } from '@ai-sdk/anthropic'; * * const executor = createWarmupExecutor({ * provider: 'anthropic', * model: 'claude-3-5-sonnet-20241022', * languageModel: anthropic('claude-3-5-sonnet-20241022'), * }); * * registerWarmupExecutor(executor); * * // Now fork() with strategy: 'cache-optimized' will use this executor * const result = await fork(ctx, { * strategy: 'cache-optimized', * warmup: 'explicit', * branches: [...], * }); * ``` */ declare function createWarmupExecutor(config: WarmupConfig, cacheManager?: CacheSegmentManager): WarmupExecutor; /** * Registers a warmup executor with @mullion/core for the given configuration. * * This is a convenience function that creates and registers the executor * in one call. * * @param config - Warmup configuration * @param cacheManager - Optional cache manager for segment access * @returns The created WarmupExecutor instance * * @example * ```typescript * import { setupWarmupExecutor } from '@mullion/ai-sdk'; * import { anthropic } from '@ai-sdk/anthropic'; * * // Register warmup for cache-optimized fork * setupWarmupExecutor({ * provider: 'anthropic', * model: 'claude-3-5-sonnet-20241022', * languageModel: anthropic('claude-3-5-sonnet-20241022'), * }); * * // Now use cache-optimized fork * const result = await fork(ctx, { * strategy: 'cache-optimized', * branches: [...], * }); * ``` */ declare function setupWarmupExecutor(config: WarmupConfig, cacheManager?: CacheSegmentManager): WarmupExecutor; /** * Estimates the token cost of a warmup operation. * * This function provides a rough estimate of the warmup cost before * actually executing the warmup, useful for cost planning. * * @param cacheManager - Cache manager with segments * @param systemPrompt - Optional system prompt * @returns Estimated token cost * * @example * ```typescript * const estimate = estimateWarmupCost(cacheManager, systemPrompt); * console.log(`Estimated warmup cost: ~${estimate} tokens`); * * if (estimate > 10000) { * console.warn('Large warmup cost, consider reducing cached content'); * } * ``` */ declare function estimateWarmupCost(cacheManager?: CacheSegmentManager, systemPrompt?: string): number; /** * Determines if warmup would be beneficial for the given configuration. * * Warmup is beneficial when: * - Provider supports explicit caching (not automatic like OpenAI) * - There are cache segments to prime * - Expected branch count is > 1 * * @param config - Warmup configuration * @param cacheManager - Cache manager with segments * @param branchCount - Number of fork branches * @returns Whether warmup is recommended * * @example * ```typescript * if (shouldWarmup(config, cacheManager, branches.length)) { * await fork(ctx, { * strategy: 'cache-optimized', * warmup: 'explicit', * branches, * }); * } else { * await fork(ctx, { * strategy: 'fast-parallel', * branches, * }); * } * ``` */ declare function shouldWarmup(config: WarmupConfig, cacheManager?: CacheSegmentManager, branchCount?: number): boolean; /** * Schema conflict detection for fork optimization. * * This module detects when fork branches use different schemas, which * breaks Anthropic cache prefix matching. When using `generateObject` * with Anthropic, the schema is converted to a tool definition, and * different tool definitions result in different cache prefixes. * * @module cache/schema-conflict */ /** * Information about a schema used in a fork branch. */ interface SchemaInfo { /** Branch index (0-based) */ readonly branchIndex: number; /** Schema signature for comparison */ readonly signature: string; /** Original schema object (for debugging) */ readonly schema: unknown; /** Human-readable description of the schema */ readonly description?: string; } /** * Options for schema conflict detection. */ interface DetectSchemaConflictOptions { /** Provider name (conflict detection is Anthropic-specific) */ readonly provider?: string; /** Include detailed schema information in result */ readonly includeDetails?: boolean; } /** * Extended conflict result with detailed schema information. */ interface DetailedSchemaConflictResult extends SchemaConflictResult { /** Detailed information about each schema group */ readonly schemaGroups: readonly SchemaInfo[][]; /** Suggestions for resolving the conflict */ readonly suggestions: readonly string[]; } /** * Computes a signature for a Zod schema. * * The signature captures the structure of the schema in a way that * can be compared for equality. Two schemas with the same signature * will produce the same tool definition for Anthropic. * * @param schema - Zod schema to compute signature for * @returns A string signature representing the schema structure */ declare function computeSchemaSignature(schema: z.ZodTypeAny): string; /** * Detects schema conflicts among a set of schemas. * * When fork branches use different schemas with Anthropic's `generateObject`, * each different schema produces a different tool definition. This breaks * cache prefix matching, meaning branches with different schemas won't * share cache even after warmup. * * @param schemas - Array of Zod schemas used by fork branches * @param options - Detection options * @returns Conflict detection result * * @example * ```typescript * import { z } from 'zod'; * * const RiskSchema = z.object({ risk: z.string() }); * const OpportunitySchema = z.object({ opportunity: z.string() }); * const SummarySchema = z.object({ summary: z.string() }); * * const result = detectSchemaConflict([ * RiskSchema, * OpportunitySchema, * SummarySchema, * ]); * * if (result.hasConflict) { * console.warn(result.message); * // "3 different schemas detected across 3 branches..." * } * ``` */ declare function detectSchemaConflict(schemas: readonly z.ZodTypeAny[], options?: DetectSchemaConflictOptions): DetailedSchemaConflictResult; /** * Handles a schema conflict according to the specified behavior. * * @param conflict - The detected conflict * @param behavior - How to handle the conflict * @returns Warning message if behavior is 'warn', undefined otherwise * @throws Error if behavior is 'error' and conflict exists * * @example * ```typescript * const conflict = detectSchemaConflict(schemas); * * // Warn but continue * const warning = handleSchemaConflict(conflict, 'warn'); * if (warning) console.warn(warning); * * // Throw on conflict * handleSchemaConflict(conflict, 'error'); // throws if conflict * * // Silent * handleSchemaConflict(conflict, 'allow'); // no-op * ``` */ declare function handleSchemaConflict(conflict: SchemaConflictResult, behavior: SchemaConflictBehavior): string | undefined; /** * Checks if schemas are compatible for cache sharing. * * This is a convenience function that returns a simple boolean * for cases where you just need to know if schemas match. * * @param schemas - Array of Zod schemas to check * @returns True if all schemas are compatible (same signature) * * @example * ```typescript * const SummarySchema = z.object({ summary: z.string() }); * * // Same schema used multiple times - compatible * const compatible = areSchemasCompatible([ * SummarySchema, * SummarySchema, * SummarySchema, * ]); * // true * * // Different schemas - not compatible * const RiskSchema = z.object({ risk: z.string() }); * const incompatible = areSchemasCompatible([ * SummarySchema, * RiskSchema, * ]); * // false * ``` */ declare function areSchemasCompatible(schemas: readonly z.ZodTypeAny[]): boolean; /** * Gets a human-readable description of schema differences. * * Useful for debugging and understanding why schemas don't match. * * @param schemas - Array of Zod schemas to compare * @returns Description of differences between schemas * * @example * ```typescript * const diff = describeSchemasDifference([RiskSchema, OpportunitySchema]); * console.log(diff); * // "Schema 0 has properties: risk. Schema 1 has properties: opportunity." * ``` */ declare function describeSchemasDifference(schemas: readonly z.ZodTypeAny[]): string; /** * Token estimation utilities for cost calculation * @module cost/tokens */ /** * Token estimation result */ interface TokenEstimate { /** Estimated token count */ count: number; /** Method used for estimation */ method: 'tiktoken' | 'approximate' | 'exact'; /** Model used for estimation (if applicable) */ model?: string; } /** * Estimate token count for a given text * * @param text - Text to estimate tokens for * @param model - Optional model identifier for provider-specific estimation * @returns Token estimate with method indication * * @example * ```typescript * const estimate = estimateTokens('Hello, world!', 'gpt-4'); * console.log(estimate); * // { count: 4, method: 'tiktoken', model: 'gpt-4' } * ``` * * @example * ```typescript * // Claude models use approximation * const estimate = estimateTokens('Hello, world!', 'claude-3-5-sonnet-20241022'); * console.log(estimate); * // { count: 3, method: 'approximate', model: 'claude-3-5-sonnet-20241022' } * ``` */ declare function estimateTokens(text: string, model?: string): TokenEstimate; /** * Estimate tokens for multiple text segments * * Useful for estimating total context size including system prompts, * user messages, and cached segments. * * @param segments - Array of text segments to estimate * @param model - Optional model identifier * @returns Total token estimate * * @example * ```typescript * const total = estimateTokensForSegments([ * systemPrompt, * userMessage, * cachedDocument * ], 'gpt-4'); * console.log(total.count); // Total tokens across all segments * ``` */ declare function estimateTokensForSegments(segments: string[], model?: string): TokenEstimate; export { type AnthropicCacheAdapter, type AnthropicCacheMetrics, type AnthropicProviderOptions, type CacheCapabilities, type CacheConfig, CacheMetricsCollector, type CacheOptions, type CacheScope, type CacheSegment, CacheSegmentManager, type CacheStats, type CacheTTL, type CatalogCapabilityEntry, type CatalogCapabilityProvider, type CatalogPricingEntry, type CatalogPricingProvider, type CatalogProvider, type CostBreakdown, type DetailedSchemaConflictResult, type DetectSchemaConflictOptions, type FirstBranchWarmupResult, type GeminiCacheAdapter, type GeminiCacheConfig, type GeminiModel, type GoogleCacheMetrics, type GoogleProviderOptions, type ListGeminiModelsCachedOptions, type ListGeminiModelsOptions, type LoadModelCatalogOptions, type LoadModelCatalogResult, type ModelCatalog, ModelCatalogError, ModelCatalogLoadError, ModelCatalogValidationError, type ModelPricing, type MullionClient, type MullionClientOptions, type MullionContext, type MullionInferOptions, type OpenAICacheAdapter, type OpenAICacheMetrics, type OpenAIProviderOptions, PRICING_DATA, type Provider, type ProviderOptions, type SchemaInfo, type SegmentOptions, type TokenEstimate, type TokenUsage, type ValidationResult$1 as ValidationResult, type WarmupConfig, aggregateCacheMetrics, areSchemasCompatible, calculateBatchCost, calculateCacheWritePricing, calculateCost, clearGeminiModelsCache, clearModelCatalogOverrides, compareCosts, computeSchemaSignature, createAnthropicAdapter, createCacheSegmentManager, createDefaultCacheConfig, createDeveloperContentConfig, createGeminiAdapter, createMullionClient, createOpenAIAdapter, createUserContentConfig, createWarmupExecutor, describeSchemasDifference, detectSchemaConflict, estimateCacheSavings, estimateCost, estimateTokens, estimateTokensForSegments, estimateWarmupCost, explicitWarmup, exportPricingAsJSON, extractConfidenceFromFinishReason, firstBranchWarmup, formatCacheStats, formatCostBreakdown, getAllPricing, getCacheCapabilities, getEffectiveBreakpointLimit, getModelCatalogOverrides, getPricing, getPricingByProvider, getRecommendedCacheStrategy, handleSchemaConflict, importPricingFromJSON, isValidTtl, listGeminiModels, listGeminiModelsCached, loadModelCatalog, normalizeGeminiModelName, parseAnthropicMetrics, parseCacheMetrics, parseGoogleMetrics, parseOpenAIMetrics, setModelCatalogOverrides, setupWarmupExecutor, shouldWarmup, supportsCacheFeature, supportsGenerateContent, validateBreakpointLimit, validateMinTokens, validateTtlOrdering };