/** * Result of title extraction, including how the title was determined */ export interface TitleExtractionResult { title: string; source: 'metadata' | 'content' | 'filename'; } /** * Convert a file name to a human-readable title * Strips the extension and replaces hyphens/underscores with spaces * * @param fileName - File name (e.g., "2024-annual-report.pdf") * @returns Human-readable title (e.g., "2024 annual report") */ export declare function fileNameToTitle(fileName: string): string; /** * Extract title from Markdown content * Priority: YAML frontmatter title -> first # H1 -> file name * * @param text - Markdown content * @param fileName - File name for fallback * @returns Title extraction result */ export declare function extractMarkdownTitle(text: string, fileName: string): TitleExtractionResult; /** * Extract title from plain text content * Priority: first line followed by empty line -> file name * * @param text - Plain text content * @param fileName - File name for fallback * @returns Title extraction result */ export declare function extractTxtTitle(text: string, fileName: string): TitleExtractionResult; /** * Extract title from HTML content (using Readability title) * Priority: readability title -> file name * * @param readabilityTitle - Title extracted by Readability * @param fileName - File name for fallback * @returns Title extraction result */ export declare function extractHtmlTitle(readabilityTitle: string, fileName: string): TitleExtractionResult; /** * Extract title from PDF metadata or first page chunk text * Priority: PDF metadata /Title -> first page chunk 0 text -> file name * * Rejects metadata titles that look like file paths (contain / or \) or are empty/whitespace-only. * * @param metadataTitle - PDF metadata /Title value (may be undefined) * @param firstPageChunkText - Text of chunk 0 from semantic chunking of page 1 (may be undefined) * @param fileName - File name for fallback * @param firstPageFontHint - Largest-font text item from page 1 (optional, used for title detection) * @returns Title extraction result */ export declare function extractPdfTitle(metadataTitle: string | undefined, firstPageChunkText: string | undefined, fileName: string, firstPageFontHint?: { text: string; fontSize: number; }): TitleExtractionResult; /** * Extract title from DOCX mammoth HTML output * Priority: first

from mammoth HTML -> file name * * @param htmlContent - HTML content generated by mammoth.convertToHtml() * @param fileName - File name for fallback * @returns Title extraction result */ export declare function extractDocxTitle(htmlContent: string, fileName: string): TitleExtractionResult; //# sourceMappingURL=title-extractor.d.ts.map