/** * Tokenizer-Aware Encoding - Optimize for LLM tokenizers * * Adjusts encoding based on how LLM tokenizers (GPT, Claude, etc.) process text. * Minimizes token count by choosing optimal delimiters, spacing, and formatting. * * Features: * - Tokenizer-friendly delimiter selection * - Optimal whitespace usage * - Case-sensitive optimization * - Number formatting optimization * * Example optimizations: * - Use spaces instead of tabs (1 token vs 1-2 tokens) * - Prefer common delimiters (comma over pipe) * - Avoid unnecessary quotes * - Use compact number formats * * Token Savings: 5-15% through tokenizer-aware choices */ import type { TokenizerAwareOptions } from './types.js'; /** * Tokenizer analysis result */ export interface TokenizerAnalysis { estimatedTokens: number; recommendedDelimiter: string; recommendedQuoting: 'minimal' | 'conservative'; optimizations: string[]; } /** * Tokenizer-aware encoding manager */ export declare class TokenizerAware { private options; constructor(options?: Partial); /** * Analyze text for tokenizer optimization * * @param text - Text to analyze * @returns Analysis result */ analyzeText(text: string): TokenizerAnalysis; /** * Estimate token count for text * * Simple estimation: ~4 characters per token on average * * @param text - Text to estimate * @returns Estimated token count */ estimateTokens(text: string): number; /** * Recommend best delimiter for data * * @param sample - Sample data * @returns Recommended delimiter */ recommendDelimiter(sample: string): string; /** * Optimize text for tokenizer * * @param text - Original text * @returns Optimized text */ optimize(text: string): string; /** * Compact number formatting * * @param text - Text with numbers * @returns Text with compacted numbers */ private compactNumbers; /** * Generate tokenizer hint directive * * Format: @tokenizer target * * @param target - Target tokenizer * @returns TONL directive */ generateDirective(target: string): string; /** * Parse tokenizer directive * * @param directive - Directive string * @returns Target tokenizer */ parseDirective(directive: string): string; /** * Estimate savings from optimization * * @param original - Original text * @returns Estimated token savings */ estimateSavings(original: string): number; /** * Check if optimization would be beneficial * * @param text - Text to check * @returns True if recommended */ shouldOptimize(text: string): boolean; } //# sourceMappingURL=tokenizer-aware.d.ts.map