import type { DocumentChunk, ChunkingOptions } from './document.types.js'; /** * Service for chunking documents using HybridChunker with fallback */ export declare class DocumentChunkerService { private docling; constructor(options?: { outputDir?: string; }); /** * Chunk a document using HybridChunker with fallback to simple chunking */ /** * Chunk a document using structure-aware chunking with fallback * For markdown/text: Uses heading detection and recursive splitting * For other formats: Use chunkWithDocling for best results */ chunkDocument(content: string, options?: ChunkingOptions): Promise; /** * Chunk a document using Docling document object (from conversion) */ chunkWithDocling(doclingDoc: any, options?: ChunkingOptions): Promise; /** * Process chunks from HybridChunker */ private processHybridChunks; /** * Detect chunk type from HybridChunker metadata */ private detectChunkType; /** * Extract heading hierarchy from chunk metadata */ private extractHeadingPath; /** * Extract text content from Docling document object */ private extractTextFromDocling; /** * Fallback to simple text chunking */ /** * Structure-aware chunking for markdown and plain text * Detects headings and splits intelligently */ private structureAwareChunking; /** * Detect headings in markdown and plain text */ private detectHeadings; /** * Chunk content by detected headings */ private chunkByHeadings; /** * Recursive chunking with intelligent separators * Tries to split on natural boundaries (paragraphs, sentences, words) */ private recursiveChunking; /** * Recursive splitting implementation */ private recursiveSplit; /** * Get overlap text from end of chunk */ private getOverlapText; /** * Character-level splitting (last resort) */ private characterSplit; /** * Estimate token count (rough approximation: 1 token ≈ 4 characters) */ private estimateTokenCount; } //# sourceMappingURL=document-chunker.service.d.ts.map