/** * Docs Chunking Engine * * Provides prose-optimized chunking configuration for documentation files. * Uses larger chunks and more overlap than code to preserve context in prose content. * * Features: * - Prose-optimized chunk sizes (8000 chars with 2000 overlap) * - Markdown header-aware chunking for .md files (SMCP-099) * - Character-based fallback for .txt files * * Based on RFC Section 3.2.1: Chunking differences */ import { type Chunk, type SplitOptions } from './chunking.js'; import { type MarkdownChunkOptions } from './markdownChunking.js'; /** * File extensions recognized as documentation files */ export declare const DOC_FILE_EXTENSIONS: string[]; /** * Glob patterns for finding documentation files */ export declare const DOC_FILE_PATTERNS: string[]; /** * Prose-optimized chunking parameters * * - chunkSize: ~2000 tokens = ~8000 characters (larger for prose) * - chunkOverlap: ~500 tokens = ~2000 characters (more overlap for context) * - separators: includes '. ' for sentence boundaries in prose */ export declare const DOC_SPLIT_OPTIONS: SplitOptions; /** * Check if a file is a documentation file based on its extension * * @param relativePath - Relative path to the file (or just filename) * @returns True if the file is a documentation file (.md or .txt) * * @example * ```typescript * isDocFile('README.md'); // true * isDocFile('docs/guide.txt'); // true * isDocFile('src/index.ts'); // false * isDocFile('NOTES.MD'); // true (case-insensitive) * ``` */ export declare function isDocFile(relativePath: string): boolean; /** * Options for doc file chunking */ export interface DocChunkOptions { /** Use markdown header chunking for .md files (default: true) */ useMarkdownChunking?: boolean; /** Markdown chunk options (if using markdown chunking) */ markdownOptions?: Partial; } /** * Chunk a documentation file with prose-optimized parameters * * For .md files (SMCP-099): * - Uses markdown header-aware chunking * - Chunks align with section boundaries (h1-h6) * - Preserves header hierarchy in chunk metadata * - Sub-chunks large sections while maintaining context * * For .txt files: * - Uses character-based chunking with DOC_SPLIT_OPTIONS * - Larger chunks (8000 chars) with more overlap (2000 chars) * * @param absolutePath - Absolute path to the file on disk * @param relativePath - Relative path from project root (stored in chunk) * @param options - Optional chunking configuration * @returns Promise resolving to array of chunks with IDs and metadata * @throws MCPError with FILE_NOT_FOUND if file doesn't exist * @throws MCPError with PERMISSION_DENIED if file can't be read * * @example * ```typescript * const chunks = await chunkDocFile( * '/Users/dev/project/docs/README.md', * 'docs/README.md' * ); * console.log(chunks[0].id); // 'a1b2c3d4-...' * console.log(chunks[0].path); // 'docs/README.md' * console.log(chunks[0].startLine); // 1 * // For markdown files, chunks align with sections * console.log(chunks[0].metadata?.tags); // ['installation', 'section:installation'] * ``` */ export declare function chunkDocFile(absolutePath: string, relativePath: string, options?: DocChunkOptions): Promise; //# sourceMappingURL=docsChunking.d.ts.map