/** * DOCX → Semantic Document Converter * * Transforms a DocxDocument into a format-agnostic SemanticDocument. * This is the bridge between OOXML-specific types and the universal IR * that HTML/Markdown/other renderers can consume. * * Handles: * - Heading detection (via style name or outlineLevel) * - Inline formatting resolution * - Hyperlink extraction * - Image registration into ConversionContext * - Table structure with merge (colSpan/rowSpan) * - List/numbering detection: consecutive numbered paragraphs are aggregated * into ordered/unordered `list` blocks with nested sub-lists by level * - Footnote/endnote reference and content * - Math content (text fallback) */ import type { DocxDocument } from "../types.js"; import type { ConversionContext, SemanticDocument } from "./conversion-ir.js"; /** Options for the semantic conversion. */ export interface DocxToSemanticOptions { /** Include footnotes in the output. Default: true. */ readonly includeFootnotes?: boolean; /** Include endnotes in the output. Default: true. */ readonly includeEndnotes?: boolean; /** Extract images and register as assets. Default: true. */ readonly extractImages?: boolean; } /** * Convert a DocxDocument to a SemanticDocument (format-agnostic IR). * * @param doc - The parsed DOCX document. * @param options - Conversion options. * @returns The semantic document with warnings and assets. */ export declare function docxToSemantic(doc: DocxDocument, options?: DocxToSemanticOptions): { document: SemanticDocument; context: ConversionContext; };