/**
 * Phase 2 of document ingestion: deterministic structural extraction
 * from a Markdown intermediate to RDF triples + source-file linkage.
 *
 * This is the "Layer 1 structural" extraction defined by
 * `19_MARKDOWN_CONTENT_TYPE.md` — it runs without an LLM and produces
 * triples from explicit Markdown/YAML structure only:
 *
 *   - YAML frontmatter keys → subject properties
 *   - `type` frontmatter key → rdf:type
 *   - Wikilinks `[[Target]]` → schema:mentions
 *   - Hashtags `#keyword` → schema:keywords
 *   - Dataview `key:: value` inline fields → properties
 *   - Heading hierarchy → dkg:hasSection
 *
 * When `sourceFileIri` is provided the extractor emits the §10.1 data-
 * graph linkage triples it owns — specifically row 1
 * (`<entityUri> dkg:sourceFile <fileUri>`) and row 3
 * (`<entityUri> dkg:rootEntity <resolvedRootEntity>`). These come back
 * in the `sourceFileLinkage` return field so the daemon can keep them
 * distinct from content triples before merging them into the
 * assertion graph. The field was renamed from `provenance` in Round 13
 * Bug 39 to remove the semantic clash with its original
 * extraction-run-metadata meaning.
 *
 * Row 2 (`<entityUri> dkg:sourceContentType "<original-mime>"`) is
 * owned by the daemon (Round 9 Bug 1 / Round 9 Bug 27 rulings), not
 * this module — only the daemon has access to the original upload
 * content type that row 2 must describe. The daemon emits row 2
 * alongside the extractor's rows 1 and 3 in the same atomic insert.
 *
 * Rows 4-13 (file descriptor block + ExtractionProvenance resource
 * described in §3.2/§10.2) are also daemon-owned — the daemon has
 * natural access to the UAL, the fresh provenance URI, the agent DID,
 * and the `_meta` writes. This module stays free of `_meta` /
 * extraction-run concerns.
 *
 * Spec: 05_PROTOCOL_EXTENSIONS.md §6.3 / §6.5, 19_MARKDOWN_CONTENT_TYPE.md §10
 */
import { type ExtractionQuad as Quad } from '@origintrail-official/dkg-core';
export interface MarkdownExtractInput {
    /** Markdown source text (the Phase 1 mdIntermediate). */
    markdown: string;
    /** DID of the extracting agent, recorded in provenance. */
    agentDid: string;
    /** Optional ontology URI (not yet used by Layer 1 — reserved for Layer 2). */
    ontologyRef?: string;
    /**
     * Optional stable subject IRI for the document. When omitted, the extractor
     * derives a subject from frontmatter `id` or the first H1 heading.
     */
    documentIri?: string;
    /**
     * IRI of the source blob this markdown was extracted from, in the form
     * `urn:dkg:file:keccak256:<hex>`. When set, the extractor emits the
     * §10.1 `dkg:sourceFile` linkage quad (row 1) with `<entityUri>` as
     * subject and this URI as object.
     *
     * The file descriptor block (rows 4-8) is subsequently filtered out of
     * `assertionPromote`'s root-entity partition via a subject-prefix
     * filter on `urn:dkg:file:` in `packages/publisher/src/dkg-publisher.ts`
     * — that's how we prevent cross-assertion contention without using
     * blank-node subjects. See `19_MARKDOWN_CONTENT_TYPE.md §10.2` for the
     * normative rule and spec-engineer's reconciled ruling on Codex Bug 8
     * for the history (Round 3 tried blank nodes; Round 4 reverted to URI
     * subjects + promote-time filter after an `skolemizeByEntity` audit showed
     * the blank-node approach silently drops the ExtractionProvenance
     * block, which is a correctness smell).
     */
    sourceFileIri?: string;
    /**
     * Explicit root-entity IRI override. In V10.0 this is usually the
     * document subject IRI itself (`<entityUri> dkg:rootEntity <entityUri>`).
     * If the frontmatter carries a `rootEntity` key with a string value it
     * takes precedence over both the input and the subject default; see
     * §19.10.1:508. The resolved value is returned on
     * `MarkdownExtractOutput.resolvedRootEntity` so the daemon can reuse it
     * for the `_meta` row 14 write without re-resolving.
     */
    rootEntityIri?: string;
}
export interface MarkdownExtractOutput {
    /** Extracted RDF triples describing the document content. */
    triples: Quad[];
    /**
     * §10.1 source-file linkage quads on the document subject. Emits rows
     * 1 and 3 (`dkg:sourceFile` + `dkg:rootEntity`); row 2
     * (`dkg:sourceContentType`) is owned by the daemon because it has the
     * original upload content type and the extractor does not. Empty when
     * `sourceFileIri` is not supplied. The daemon merges these into the
     * same data graph as `triples` before committing.
     *
     * Round 13 Bug 39: renamed from `provenance` to `sourceFileLinkage`.
     * The original field at module introduction (`ff8afe3`) was
     * "`dkg:ExtractionProvenance` blank-identifier records for every
     * extracted triple" — extraction-run metadata (agent, timestamp,
     * method). The PR #121 chain repurposed the field to hold source-
     * file linkage triples, creating a semantic clash with the old
     * meaning. Round 9 Bug 27 moved the extraction-run provenance rows
     * (9-13 on the `<urn:dkg:extraction:uuid>` subject) to the daemon's
     * route handler, so the extractor no longer produces ANY
     * extraction-run metadata — only source-file linkage. Renaming
     * makes the contract honest: this field contains linkage triples,
     * full stop.
     */
    sourceFileLinkage: Quad[];
    /** The subject IRI used for the document (useful to the caller for indexing). */
    subjectIri: string;
    /**
     * The resolved root-entity IRI, following the §19.10.1:508 precedence
     * rules: frontmatter `rootEntity` key > explicit `rootEntityIri` input >
     * reflexive fallback to the document subject. The daemon reuses this
     * value as the object of the `_meta` row 14 quad so the data-graph row 3
     * and `_meta` row 14 stay in sync without the daemon re-running the
     * resolution logic.
     */
    resolvedRootEntity: string;
}
/**
 * Run the full Phase 2 structural extraction. Deterministic, no LLM.
 * Returns `{ triples, sourceFileLinkage, subjectIri, resolvedRootEntity }`. Empty arrays are valid
 * — a Markdown document with no frontmatter, no wikilinks, no tags, no
 * dataview fields, and no headings produces zero triples.
 */
export declare function extractFromMarkdown(input: MarkdownExtractInput): MarkdownExtractOutput;
//# sourceMappingURL=markdown-extractor.d.ts.map