/** * DOCX Module - Streaming Writer * * A DOCX generator that serializes body content incrementally and pushes it * through a streaming compression pipeline. Uses the same streaming ZIP * infrastructure as the Excel module: * * - `Zip` (StreamingZip) — streams ZIP entries to output * - `ZipDeflate` — per-entry deflate compression * - `StreamBuf` — event-driven pipe from XML to ZIP * - `StringBuf` — efficient XML string builder with buffer reuse * * Data flow with sink (true end-to-end streaming): * ``` * add(paragraph) → XML → StreamBuf → ZipDeflate → Zip * ↓ (per-chunk callback) * await SinkAdapter.write(chunk) * ↓ * user-supplied WritableStream / * Node Writable / duck-typed sink * ``` * * Memory profile: * - Body model is never retained: each element is serialised and * compressed as it arrives, so peak per-element memory is * O(largest_single_element). * - When `options.sink` is provided, compressed bytes are pushed * into the sink as soon as they are produced (with backpressure * awaited via {@link SinkAdapter}). Total writer-side memory then * stays O(largest_part) regardless of final DOCX size. * - When `options.sink` is omitted, compressed bytes accumulate in * `_outputChunks` and `finalize()` returns the assembled * `Uint8Array`. Total memory is O(compressed_docx_size). */ import { type AnySink } from "../../stream/internal/sink-adapter.js"; import type { WordSecurityPolicy } from "../security/policy.js"; import type { AbstractNumbering, AppProperties, BodyContent, CommentDef, CoreProperties, CustomProperty, CustomXmlPart, DocDefaults, DocumentBackground, DocumentSettings, DocumentTheme, EmbeddedFont, EndnoteDef, FontDef, FooterDef, FootnoteDef, HeaderDef, ImageDef, NumberingInstance, OpaquePart, Paragraph, SectionProperties, StyleDef, Watermark } from "../types.js"; /** Options for the streaming DOCX writer. */ export interface StreamingDocxOptions { /** Compression level (0-9). Default: 6. */ readonly compressionLevel?: number; /** Progress callback interval: report after every N elements. Default: 1000. */ readonly chunkSize?: number; /** Section properties for the final section. */ readonly sectionProperties?: SectionProperties; /** Document styles. */ readonly styles?: readonly StyleDef[]; /** Document defaults. */ readonly docDefaults?: DocDefaults; /** Abstract numbering definitions. */ readonly abstractNumberings?: readonly AbstractNumbering[]; /** Numbering instances. */ readonly numberingInstances?: readonly NumberingInstance[]; /** Headers. */ readonly headers?: ReadonlyMap; /** Footers. */ readonly footers?: ReadonlyMap; /** Footnotes. */ readonly footnotes?: readonly FootnoteDef[]; /** Endnotes. */ readonly endnotes?: readonly EndnoteDef[]; /** Images. */ readonly images?: readonly ImageDef[]; /** Fonts. */ readonly fonts?: readonly FontDef[]; /** Document settings. */ readonly settings?: DocumentSettings; /** Core properties. */ readonly coreProperties?: CoreProperties; /** App properties. */ readonly appProperties?: AppProperties; /** Comments. */ readonly comments?: readonly CommentDef[]; /** Background. */ readonly background?: DocumentBackground; /** Custom properties. */ readonly customProperties?: readonly CustomProperty[]; /** Watermark. */ readonly watermark?: Watermark; /** Theme. */ readonly theme?: DocumentTheme; /** Custom XML parts (for SDT data binding). */ readonly customXmlParts?: readonly CustomXmlPart[]; /** Embedded font binaries (stored in word/fonts/). */ readonly embeddedFonts?: readonly EmbeddedFont[]; /** Opaque (unrecognized) parts preserved for round-trip fidelity. */ readonly opaqueParts?: readonly OpaquePart[]; /** * How to handle image references whose binary is not in `images`. * * - `"throw"` (default): throw `DocxWriteError` from `add*` so the caller * notices the broken reference immediately. * - `"warn"`: emit a `console.warn` and skip the rId. The output will be * missing this image's relationship — useful only for tooling that * knows it's intentionally producing a partial document. * * The previous behaviour (silent skip) is gone because it generated * invalid DOCX files. */ readonly missingImagePolicy?: "throw" | "warn"; /** * Security policy. Currently used to surface `rawXmlPolicy` to the renderers * (preserve / strip / reject) so opaque rawXml fields behave consistently * with the buffered `packageDocx` writer. */ readonly securityPolicy?: WordSecurityPolicy; /** * Output sink. When provided, compressed bytes flow through it as soon * as the underlying ZIP pipeline produces them, with backpressure * awaited via {@link SinkAdapter}. Total writer-side memory then * stays O(largest_part) regardless of final DOCX size. * * Accepts: * - Web `WritableStream` (browser, Deno, modern Node) * - Node `Writable` (`fs.createWriteStream`, http response, …) * - Any duck-typed object exposing `write(chunk)` + `end()` plus * `once("drain"|"error"|"close"|"finish", …)` listeners * * When the sink is provided, {@link StreamingDocxWriter.finalize} * resolves to a zero-length `Uint8Array` (the bytes have already * been delivered to the sink — the empty return is a sentinel that * keeps `finalize`'s return type stable across both modes). Use * {@link StreamingDocxWriter.addAsync} (instead of * {@link StreamingDocxWriter.add}) when you want each `add` call to * await actual sink drain — that gives true end-to-end backpressure * for tight production loops. * * When omitted, behaviour is unchanged: compressed bytes accumulate * internally and `finalize()` returns the assembled `Uint8Array`. */ readonly sink?: AnySink; } /** Progress callback for streaming writer. */ export type StreamingProgressCallback = (info: { /** Number of body elements written so far. */ elementsWritten: number; /** Current phase: "body" | "finalizing". */ phase: string; }) => void; /** * Streaming DOCX writer. Body elements are serialized to XML and compressed * into the ZIP pipeline as they arrive, so the body **model** is not retained * after each `add()`. * * When constructed with `options.sink`, compressed bytes are pushed into * the sink as soon as the ZIP layer produces them, with backpressure * awaited via {@link SinkAdapter}; in this mode peak memory is * O(largest_part) and `finalize()` resolves to a zero-length * `Uint8Array` (the bytes are already in the sink). Without a sink, * compressed bytes accumulate in `_outputChunks` and `finalize()` * returns the assembled `Uint8Array` (peak memory * O(compressed_docx_size)). * * Use {@link addAsync} (instead of {@link add}) when driving the sink * variant to obtain true end-to-end backpressure: each call awaits all * pending sink writes before resolving. */ export declare class StreamingDocxWriter { private readonly _options; private _elementCount; private _finalized; private _onProgress?; private readonly _xmlBuffer; private _zip; /** Compressed-byte accumulator used when no `sink` is supplied. */ private _outputChunks; /** Sink-mode adapter (set when `options.sink` is provided). */ private _sinkAdapter?; /** * Promise chain serialising every sink write. The `Zip` callback fires * synchronously, so we queue chunks via `.then(...)` and let * `addAsync` / `finalize` await the chain. */ private _pendingDrain; private _documentStream; private _documentZipFile; private _headerWritten; /** * Whether the previously-written body element was a ``. Tracked * so we can insert a separator `` between adjacent tables — Word * rejects (and silently merges) two `` blocks that share no * paragraph between them per ECMA-376 §17.13.5.34. */ private _prevWasTable; /** * First error reported by the underlying ZIP stream (compression failure, * write-after-end, etc.). Stored synchronously by the `Zip` callback and * surfaced from `finalize()` so callers receive a rejection instead of an * indefinitely-pending promise. */ private _streamError; private _documentRels; private _renderCtx; /** Charts encountered in body content; rendered to `word/charts/chartN.xml` at finalize time. */ private readonly _bodyCharts; /** ChartEx items encountered in body content. */ private readonly _bodyChartEx; /** * Per-chart sequence numbers fixed at registration time. Both classes * use independent monotonic counters; the writer emits * `word/charts/chart{n}.xml` for the regular chart family and * `word/charts/chartEx{n}.xml` for the chartEx family. * * The previous scheme used `chartCount + chartExCount + 1` as the * sequence number for both classes, which made the rId path encoded in * documentRels disagree with the path used at finalize when the writer * iterated the two arrays separately. The result was relationships * pointing at non-existent chart parts. */ private readonly _chartNum; private _nextChartSeq; private _nextChartExSeq; /** Hyperlink object identities already registered (to keep one rId per object). */ private readonly _registeredHyperlinks; /** Image rIds already registered to documentRels (avoid duplicates). */ private readonly _registeredImageRIds; /** header map key → newly allocated rId. Populated by `_allocateHeaderFooterRIds`. */ private readonly _headerKeyToRid; /** footer map key → newly allocated rId. */ private readonly _footerKeyToRid; /** rId allocated for the auto-generated watermark header (if any). */ private _watermarkHeaderRid; constructor(options?: StreamingDocxOptions); /** Set a progress callback. */ onProgress(cb: StreamingProgressCallback): this; /** * Add a single body element. The element is immediately serialized to XML * and pushed into the ZIP compression pipeline. After this call, the element * can be garbage collected — it is not retained. */ add(element: BodyContent): this; /** Add multiple body elements at once. */ addMany(elements: readonly BodyContent[]): this; /** * Async variant of {@link add}. After serialising the element, awaits * any pending writes to the configured `sink` so callers driving large * input get true end-to-end backpressure rather than letting the * sink-write queue grow unbounded inside the writer. * * Without `options.sink` this is equivalent to `add` (resolving * synchronously after element serialisation). * * Throws if the sink reports an error: previous queued writes whose * rejection was captured into `_streamError` surface here. */ addAsync(element: BodyContent): Promise; /** * Async variant of {@link addMany}. Awaits `addAsync` for each element * so backpressure is honoured between every body element. */ addManyAsync(elements: readonly BodyContent[]): Promise; /** Add a paragraph with simple text content. */ addText(content: string, properties?: Paragraph["properties"]): this; /** Get the count of body elements written so far. */ get elementCount(): number; /** * Finalize the document. * * - Without `options.sink`: returns the assembled `Uint8Array` * containing the full DOCX file. * - With `options.sink`: drains any pending sink writes, calls * `sink.end()`, and resolves to a zero-length `Uint8Array`. The * DOCX bytes have already been delivered to the sink — the empty * return is a sentinel signalling "writer is done; consumer keeps * the data". */ finalize(): Promise; /** Reset the writer for reuse. */ /** * Reset the writer for reuse. * * Throws when the writer was constructed with an `options.sink`: a * sink can only be `end()`ed once, so reusing the same writer would * produce an undefined byte stream. Construct a new writer (with a * new sink) for each document instead. */ reset(): this; private _initZip; private _write; private _writeDocumentHeader; private _writeBodyElement; /** * Emit an empty `` to separate two adjacent tables. Required by * ECMA-376 §17.13.5.34 — Word rejects packages where two `` * elements appear without a paragraph between them. */ private _writeSeparatorParagraph; /** * Scan a single body element and register any chart / hyperlink / image * references it introduces against the writer's accumulated state. This * must run before the element is serialized so the render context already * carries the relationships the renderer will look up. */ private _registerElementReferences; private _registerParagraphReferences; private _registerChart; private _registerChartEx; private _registerHyperlink; private _registerImageRId; private _lookupImage; private _writeDocumentFooter; /** * Allocate header/footer relationship IDs deterministically (in the same * order auxiliary parts will be emitted). Called once during finalize so * `_writeDocumentFooter` and `_addAuxiliaryParts` agree on which rId * points at which header/footer XML part. */ private _allocateHeaderFooterRIds; private _rewireSectionRefs; /** * Synthesise section-property header references for every header part * the caller registered. Recognised type keys (`default`/`first`/`even`) * keep their semantics; any other key (round-tripped rId names from * readDocx, custom strings) falls back to `"default"` so the header is * actually referenced — without this fallback header parts can sit in * the package as dangling content. * * If multiple keys map to the same logical type, only the first one is * kept so we don't emit two `` * children (Word's behaviour with duplicates is implementation-defined). */ private _synthesizeHeaderRefs; private _synthesizeFooterRefs; private _addAuxiliaryParts; private _endStream; private _assembleOutput; } /** * Create a new streaming DOCX writer. * * @example * ```ts * const writer = createDocxStream({ * styles: [{ type: "paragraph", styleId: "Normal", name: "Normal" }] * }); * * for (let i = 0; i < 100000; i++) { * writer.addText(`Paragraph ${i}`); * } * * const buffer = await writer.finalize(); * ``` */ export declare function createDocxStream(options?: StreamingDocxOptions): StreamingDocxWriter;