/** * Token-based Chunker * * Splits text based on token counts using simple tokenization. * Best for controlling context window usage with LLMs. */ import type { Chunker, Chunk, ChunkerValidationResult, TokenChunkerConfig, BaseChunkerConfig } from "../../types/index.js"; /** * Token-aware chunker implementation * Splits text based on approximate token counts * * Note: Uses simple word-based tokenization as approximation. * For exact token counts, integrate with tiktoken or model-specific tokenizers. */ export declare class TokenChunker implements Chunker { readonly strategy: "token"; private readonly CHARS_PER_TOKEN; chunk(text: string, config?: TokenChunkerConfig): Promise; /** * Simple word-based tokenization */ private tokenize; /** * Get characters per token for a tokenizer */ private getCharsPerToken; /** * Estimate average tokens per word */ private estimateTokensPerWord; /** * Estimate token count for text */ estimateTokenCount(text: string, tokenizer?: string): number; validateConfig(config: BaseChunkerConfig): ChunkerValidationResult; }