from mammoth HTML -> file name * * @param htmlContent - HTML content generated by mammoth.convertToHtml() * @param fileName - File name for fallback * @returns Title extraction result */ export declare function extractDocxTitle(htmlContent: string, fileName: string): TitleExtractionResult; //# sourceMappingURL=title-extractor.d.ts.map

/**
 * Result of title extraction, including how the title was determined
 */
export interface TitleExtractionResult {
    title: string;
    source: 'metadata' | 'content' | 'filename';
}
/**
 * Convert a file name to a human-readable title
 * Strips the extension and replaces hyphens/underscores with spaces
 *
 * @param fileName - File name (e.g., "2024-annual-report.pdf")
 * @returns Human-readable title (e.g., "2024 annual report")
 */
export declare function fileNameToTitle(fileName: string): string;
/**
 * Extract title from Markdown content
 * Priority: YAML frontmatter title -> first # H1 -> file name
 *
 * @param text - Markdown content
 * @param fileName - File name for fallback
 * @returns Title extraction result
 */
export declare function extractMarkdownTitle(text: string, fileName: string): TitleExtractionResult;
/**
 * Extract title from plain text content
 * Priority: first line followed by empty line -> file name
 *
 * @param text - Plain text content
 * @param fileName - File name for fallback
 * @returns Title extraction result
 */
export declare function extractTxtTitle(text: string, fileName: string): TitleExtractionResult;
/**
 * Extract title from HTML content (using Readability title)
 * Priority: readability title -> file name
 *
 * @param readabilityTitle - Title extracted by Readability
 * @param fileName - File name for fallback
 * @returns Title extraction result
 */
export declare function extractHtmlTitle(readabilityTitle: string, fileName: string): TitleExtractionResult;
/**
 * Extract title from PDF metadata or first page chunk text
 * Priority: PDF metadata /Title -> first page chunk 0 text -> file name
 *
 * Rejects metadata titles that look like file paths (contain / or \) or are empty/whitespace-only.
 *
 * @param metadataTitle - PDF metadata /Title value (may be undefined)
 * @param firstPageChunkText - Text of chunk 0 from semantic chunking of page 1 (may be undefined)
 * @param fileName - File name for fallback
 * @param firstPageFontHint - Largest-font text item from page 1 (optional, used for title detection)
 * @returns Title extraction result
 */
export declare function extractPdfTitle(metadataTitle: string | undefined, firstPageChunkText: string | undefined, fileName: string, firstPageFontHint?: {
    text: string;
    fontSize: number;
}): TitleExtractionResult;
/**
 * Extract title from DOCX mammoth HTML output
 * Priority: first <h1> from mammoth HTML -> file name
 *
 * @param htmlContent - HTML content generated by mammoth.convertToHtml()
 * @param fileName - File name for fallback
 * @returns Title extraction result
 */
export declare function extractDocxTitle(htmlContent: string, fileName: string): TitleExtractionResult;
//# sourceMappingURL=title-extractor.d.ts.map