/** * Text Chunking Utility * * Splits large texts into chunks suitable for embedding generation. * Handles token limits, word boundaries, and overlap for context preservation. */ /** * Estimate token count for text. * * Uses a simple heuristic: ~4 characters per token for English text. * This is approximate but works well for our use case. * * @param text - Text to estimate tokens for * @returns Estimated token count */ export declare function estimateTokens(text: string): number; /** * Split text into chunks that fit within max tokens. * * Tries to break at word boundaries when possible. * Adds overlap between chunks to preserve context. * * @param text - Text to chunk * @param maxTokens - Maximum tokens per chunk (default: 8000 for OpenRouter) * @param overlapTokens - Number of tokens to overlap between chunks (default: 0) * @returns Array of text chunks */ export declare function chunkText(text: string, maxTokens?: number, overlapTokens?: number): string[]; /** * Chunk text specifically for embedding generation. * * Uses 8000 token limit (OpenRouter's limit for text-embedding-3-small). * * @param text - Text to chunk * @returns Array of text chunks suitable for embeddings */ export declare function chunkForEmbedding(text: string): string[]; //# sourceMappingURL=chunking.d.ts.map