/** * Document Search & Query API (read-only helpers) * * Functions for searching, counting, and extracting text from a DocxDocument. */ import type { DocxDocument, Paragraph, Table, Hyperlink, BookmarkStart, CommentDef, ImageDef, SectionProperties } from "../types.js"; /** Result of a text search in a document. */ export interface SearchResult { /** * The paragraph's visit order across the entire document, counting * paragraphs reachable from the body (including those nested in tables, * SDTs, text boxes, headers, footers, footnotes, endnotes and TOC * caches) in walk order. * * This is **not** an index into `doc.body`: nested paragraphs are * counted too. Use it as a stable ordinal for ordering results, not * for direct array access. */ readonly paragraphIndex: number; /** The matched text. */ readonly match: string; /** Character offset within the paragraph's concatenated text. */ readonly offset: number; } /** A heading extracted from a document. */ export interface DocumentHeading { /** Heading level (1-9). */ readonly level: number; /** Plain text of the heading. */ readonly text: string; /** Index into doc.body where the paragraph resides. */ readonly paragraphIndex: number; /** Style ID used (e.g. "Heading1"). */ readonly style?: string; } /** A section definition found in the document. */ export interface DocumentSection { /** The section properties. */ readonly properties: SectionProperties; /** Index of the paragraph containing this section break (or -1 for the final section). */ readonly paragraphIndex: number; /** Whether this is the final section (from doc.sectionProperties). */ readonly isFinal: boolean; } /** Extract concatenated plain text from a paragraph's runs. */ export declare function paragraphText(para: Paragraph): string; /** * Count all top-level paragraphs in the document body. */ export declare function paragraphCount(doc: DocxDocument): number; /** * Count words across all paragraphs in the document body. * Uses simple whitespace splitting; for East Asian text, each CJK character * is counted as one "word" to approximate meaningful unit count. */ export declare function countWords(doc: DocxDocument): number; /** * Extract the heading outline from a document. * * Matches paragraphs whose style is `Heading1` through `Heading9` (case-insensitive), * or whose `outlineLevel` property is set (0-8). */ export declare function getHeadings(doc: DocxDocument): DocumentHeading[]; /** * Find a bookmark by name. * * @returns The bookmark start marker + its location, or `undefined` if not found. */ export declare function findBookmark(doc: DocxDocument, name: string): { bookmark: BookmarkStart; paragraphIndex: number; childIndex: number; } | undefined; /** * Find a comment by its ID. */ export declare function findComment(doc: DocxDocument, id: number): CommentDef | undefined; /** * List all images registered in the document. */ export declare function listImages(doc: DocxDocument): readonly ImageDef[]; /** * List all tables in the document. * * By default this returns **all** tables in the document body (including * tables nested inside other tables, SDTs, text boxes, and TOC cached * paragraphs). For top-level only behavior pass `{ topLevelOnly: true }`. * * @example * listTables(doc) // all tables (consistent with tableCount) * listTables(doc, { topLevelOnly: true }) // direct children of body only */ export declare function listTables(doc: DocxDocument, options?: { readonly topLevelOnly?: boolean; }): readonly Table[]; /** * Collect all hyperlinks in the document body. */ export declare function listHyperlinks(doc: DocxDocument): readonly Hyperlink[]; /** * Get the total number of tables (top-level) and nested tables. */ export declare function tableCount(doc: DocxDocument): number; /** * List all sections in a document. * * Sections are defined by section breaks within paragraph properties * and the final section at the document level. * * @param doc - The document to inspect. * @returns Array of section definitions in document order. */ export declare function listSections(doc: DocxDocument): DocumentSection[]; /** * Extract plain text from the entire document: body, headers, footers, * footnotes, and endnotes. * * Paragraphs are separated by `\n`. Tables render as tab-separated cell text. */ export declare function extractText(doc: DocxDocument): string; /** * Search for text occurrences across the entire document: body, tables, SDTs, * headers, footers, footnotes, and endnotes. * * @param doc - The document model to search. * @param query - String or RegExp to search for. * @returns Array of search results. */ export declare function searchText(doc: DocxDocument, query: string | RegExp): SearchResult[];