/** * Read and validate an `emdash-plugin.jsonc` manifest from disk. * * Failure modes, each with a distinct error code for scriptable consumers * (`validate --json`, programmatic API users): * * - `MANIFEST_NOT_FOUND` — file doesn't exist at the resolved path. * - `MANIFEST_TOO_LARGE` — file exceeds `MANIFEST_MAX_BYTES`. Reading * stops at the cap; the file is never fully buffered. * - `MANIFEST_PARSE_ERROR` — JSONC parse failure (trailing comma, missing * bracket, control char in string, duplicate keys). Includes line + * column from `jsonc-parser`'s offset. * - `MANIFEST_VALIDATION_ERROR` — JSONC parsed cleanly but the value * failed the Zod schema. Includes the field path and the offending * value's location in the source where possible. * * The line/column mapping is critical for editor-side workflows: a user * running `emdash-plugin validate` from a CI step wants the same kind of * pointer they'd get from `tsc` or `eslint`, not a Zod issue tree. */ import { open } from "node:fs/promises"; import { resolve } from "node:path"; import { parseTree, type Node, type ParseError, printParseErrorCode } from "jsonc-parser"; import type { ZodIssue } from "zod"; import { ManifestSchema, type Manifest } from "./schema.js"; /** * Conventional manifest filename. Lives next to the plugin's `package.json`. */ export const MANIFEST_FILENAME = "emdash-plugin.jsonc"; /** * Hard cap on the bytes we'll buffer for a manifest. The largest real-world * v1 manifest under the current schema is a few hundred bytes; even a heavily- * populated future version with all the long-form sections from issue #1030 * (5 sections × 20 KB cap each from the lexicon) tops out under 128 KB. We * pick 1 MiB so accidental mis-targets (`--manifest ./large.tar`) fail fast * with a clear error rather than OOMing the CLI. */ export const MANIFEST_MAX_BYTES = 1024 * 1024; export type ManifestErrorCode = | "MANIFEST_NOT_FOUND" | "MANIFEST_TOO_LARGE" | "MANIFEST_PARSE_ERROR" | "MANIFEST_VALIDATION_ERROR"; export class ManifestError extends Error { override readonly name = "ManifestError"; readonly code: ManifestErrorCode; /** Resolved absolute path of the manifest file. */ readonly path: string; /** * Issues for `MANIFEST_VALIDATION_ERROR`. One per failed rule, each * carrying a JSON pointer-style path and an optional source location. * Empty for the other error codes. */ readonly issues: ManifestIssue[]; constructor( code: ManifestErrorCode, message: string, path: string, issues: ManifestIssue[] = [], ) { super(message); this.code = code; this.path = path; this.issues = issues; } } export interface ManifestIssue { /** Dotted/bracketed JSON path, e.g. `authors[0].email`. */ path: string; message: string; /** 1-indexed line and column in the manifest source, when known. */ location?: { line: number; column: number }; } export interface LoadManifestResult { manifest: Manifest; /** Resolved absolute path. */ path: string; } /** * Load and validate a manifest at `path`. `path` may be a directory (in * which case `emdash-plugin.jsonc` is appended) or a file. * * Throws `ManifestError` on every failure path. Successful return guarantees * the manifest is schema-valid (but normalisation to the publish-input * shape still needs `./translate.ts`). */ export async function loadManifest(path: string): Promise { const resolved = resolve(path); // Heuristic: paths that end in `.jsonc` or `.json` are treated as // files; everything else is treated as a directory. We don't `stat` // to disambiguate because the error path "missing file" should be the // same regardless of which form the caller passed. const filePath = resolved.endsWith(".jsonc") || resolved.endsWith(".json") ? resolved : resolve(resolved, MANIFEST_FILENAME); // Bounded read: open the file and read at most MANIFEST_MAX_BYTES+1 // bytes. The extra byte is a sentinel — if we get it, the file is // definitely over the cap regardless of what `stat` would say. // This closes the stat-then-readFile race where a concurrent writer // could grow the file between size check and buffer. const source = await readBoundedUtf8(filePath); return parseAndValidate(source, filePath); } /** * Read a UTF-8 file with a hard cap of `MANIFEST_MAX_BYTES` bytes. * Throws `ManifestError(MANIFEST_TOO_LARGE)` if the file exceeds the cap, * `ManifestError(MANIFEST_NOT_FOUND)` for ENOENT. * * We allocate a buffer of `MANIFEST_MAX_BYTES + 1` and read into it; if * the read fills the whole buffer, the file is at least one byte over * the limit and we reject. This avoids the TOCTOU window of a separate * `stat` call: a concurrent writer can grow the file between syscalls, * but it can never make our buffer larger than what we allocated up * front. * * `read` returns a single chunk synchronously from kernel buffers when * available; for files of our cap size (1 MiB) this is one syscall on * Linux/macOS. We loop in case the kernel returns a short read. */ async function readBoundedUtf8(filePath: string): Promise { let handle: Awaited>; try { handle = await open(filePath, "r"); } catch (error) { if (isNodeNotFoundError(error)) { throw new ManifestError( "MANIFEST_NOT_FOUND", `No manifest at ${filePath}. Create one with: emdash-plugin init`, filePath, ); } throw error; } try { // One extra byte so we can detect oversize without reading // arbitrarily much. const buffer = Buffer.allocUnsafe(MANIFEST_MAX_BYTES + 1); let totalRead = 0; while (totalRead < buffer.length) { const { bytesRead } = await handle.read( buffer, totalRead, buffer.length - totalRead, totalRead, ); if (bytesRead === 0) break; totalRead += bytesRead; } if (totalRead > MANIFEST_MAX_BYTES) { throw new ManifestError( "MANIFEST_TOO_LARGE", `Manifest at ${filePath} is larger than the ${MANIFEST_MAX_BYTES}-byte cap. Check that you pointed --manifest at the right file.`, filePath, ); } return buffer.subarray(0, totalRead).toString("utf8"); } finally { await handle.close().catch(() => { // Closing a handle should never fail in practice; if it // does, swallow it — the read result is already in hand. }); } } /** * Variant for callers that already have the source text in hand (tests, * editor integrations that read the buffer). The `path` argument is used * for error messages only. */ export function parseAndValidateManifest(source: string, path: string): LoadManifestResult { return parseAndValidate(source, path); } // ────────────────────────────────────────────────────────────────────────── // Internals // ────────────────────────────────────────────────────────────────────────── function parseAndValidate(source: string, filePath: string): LoadManifestResult { const parseErrors: ParseError[] = []; // `parseTree` gives us both the parsed value AND the syntax tree, so we // can map a Zod issue's path back to a source offset. `parse` alone // loses that information. const root = parseTree(source, parseErrors, { // Comments are part of the JSONC contract. Trailing commas are // allowed because they reduce diff noise when the user adds a new // field at the end. disallowComments: false, allowTrailingComma: true, allowEmptyContent: false, }); if (parseErrors.length > 0) { const first = parseErrors[0]!; const { line, column } = offsetToLineCol(source, first.offset); throw new ManifestError( "MANIFEST_PARSE_ERROR", `${filePath}:${line}:${column}: ${printParseErrorCode(first.error)}`, filePath, ); } if (!root) { // Shouldn't be reachable when `allowEmptyContent: false` is set and // `parseErrors` is empty, but `parseTree`'s return type is nullable. throw new ManifestError("MANIFEST_PARSE_ERROR", `${filePath}: file is empty`, filePath); } // Reject duplicate keys before validation. `nodeToValue` is last-wins // (matches JSON.parse semantics) which silently shadows earlier keys // — review-hostile for a security-sensitive document like this. We // scan once for duplicates and surface them as a parse error with a // source location, so a `git diff` reviewer can't be fooled by a // "publisher": "" at the top of the file that gets overridden // by "publisher": "" further down. const duplicate = findDuplicateKey(root); if (duplicate) { const { line, column } = offsetToLineCol(source, duplicate.offset); throw new ManifestError( "MANIFEST_PARSE_ERROR", `${filePath}:${line}:${column}: duplicate key "${duplicate.key}". Each property may only be declared once.`, filePath, ); } const value = nodeToValue(root); const result = ManifestSchema.safeParse(value); if (!result.success) { const issues = result.error.issues.map((issue) => zodIssueToManifestIssue(issue, source, root)); const summary = issues .map((i) => { const loc = i.location ? `:${i.location.line}:${i.location.column}` : ""; return `${filePath}${loc}: ${i.path ? `${i.path}: ` : ""}${i.message}`; }) .join("\n"); throw new ManifestError( "MANIFEST_VALIDATION_ERROR", `Manifest validation failed:\n${summary}`, filePath, issues, ); } return { manifest: result.data, path: filePath }; } /** * Map a Zod issue to a manifest issue. The path translation strips the * leading `$` that some Zod versions prepend and produces the JSONC-style * `authors[0].email` syntax users will recognise. * * Zod 4 types path segments as `PropertyKey` (string | number | symbol). * Symbols cannot appear in a JSON-parsed value's path (JSON has no symbol * keys), so we narrow defensively and treat any stray symbol as an * opaque "" string in the displayed path. * * Special-cases `unrecognized_keys` (typo'd field names): Zod reports the * issue at the parent path with the offending key(s) in `issue.keys`. * Without special handling, the line:col points at the parent object's * opening brace, not the actual typo. We resolve the first listed key * inside the parent and use ITS source offset, so a `"licens": "MIT"` * mistake gets the pointer landing on the bad key's line and column. */ function zodIssueToManifestIssue(issue: ZodIssue, source: string, root: Node): ManifestIssue { const path = narrowZodPath(issue.path); const pathStr = formatZodPath(path); let offset: number | undefined; if (issue.code === "unrecognized_keys") { const keys = (issue as ZodIssue & { keys?: readonly string[] }).keys; const firstKey = keys?.[0]; if (firstKey !== undefined) { const parent = findNodeAtPath(root, path); if (parent?.type === "object" && parent.children) { const prop: Node | undefined = parent.children.find( (c) => c.type === "property" && c.children?.[0]?.type === "string" && c.children[0].value === firstKey, ); const keyNode = prop?.children?.[0]; if (keyNode) offset = keyNode.offset; } } } if (offset === undefined) { offset = findNodeAtPath(root, path)?.offset; } const location = offset !== undefined ? offsetToLineCol(source, offset) : undefined; return location ? { path: pathStr, message: issue.message, location } : { path: pathStr, message: issue.message }; } /** * Coerce a Zod 4 issue path (`PropertyKey[]`) to the string|number form * the rest of the loader uses. A symbol segment is impossible for JSONC * input, but we render it defensively rather than crashing. */ function narrowZodPath(path: ReadonlyArray): Array { return path.map((segment) => { if (typeof segment === "string" || typeof segment === "number") return segment; return segment.toString(); }); } /** * Format a Zod path array as `authors[0].email`. Numbers become bracketed * indices; strings become dot-prefixed (except the first). */ function formatZodPath(path: ReadonlyArray): string { let out = ""; for (const segment of path) { if (typeof segment === "number") { out += `[${segment}]`; } else { out += out.length === 0 ? segment : `.${segment}`; } } return out; } /** * Walk the JSONC syntax tree to find the node at a given path. When the * path traverses into a missing key or a wrong-shape value, returns the * deepest ancestor that DID exist — so the resulting line:col still * points at something useful (the parent object, where the missing * property "should have been"). This matters most for two error * classes: * * - Missing required key: Zod's path is `[key]`, the value doesn't * exist; returning the root object's offset puts the pointer at the * opening brace, which an editor highlights as "issue with this * object". * - Unknown key (typo): Zod's path is `[wrongKey]`, the value doesn't * exist in the parent. Same parent-fallback gives the pointer the * line of the parent object. * * Both cases used to return undefined and lose the line:col entirely. */ function findNodeAtPath(root: Node, path: ReadonlyArray): Node | undefined { let current: Node | undefined = root; let lastResolved: Node | undefined = root; for (const segment of path) { if (!current) return lastResolved; if (typeof segment === "number") { if (current.type !== "array" || !current.children) return current; const next: Node | undefined = current.children[segment]; if (!next) return current; current = next; } else { if (current.type !== "object" || !current.children) return current; const prop: Node | undefined = current.children.find( (c) => c.type === "property" && c.children?.[0]?.type === "string" && c.children[0].value === segment, ); // `property` node's children are [keyNode, valueNode]. We want // the value for further traversal. If the property is missing // entirely (e.g. typo'd key, missing required field), fall // back to the current object so the caller gets a source // location for the containing structure. const next: Node | undefined = prop?.children?.[1]; if (!next) return current; current = next; } lastResolved = current; } return current; } /** * Recursively scan an object node for duplicate property names. Returns * the FIRST duplicate found (innermost-first within the recursion, but * order across siblings is the order in the source) with its offset for * line:column reporting. * * We scan the entire tree, not just the root: duplicate keys inside * `author: { ... }` or `security: { ... }` are equally review-hostile. */ function findDuplicateKey(node: Node): { key: string; offset: number } | undefined { if (node.type === "object" && node.children) { const seen = new Set(); for (const prop of node.children) { if (prop.type !== "property") continue; const keyNode = prop.children?.[0]; if (!keyNode || keyNode.type !== "string" || typeof keyNode.value !== "string") { continue; } if (seen.has(keyNode.value)) { return { key: keyNode.value, offset: keyNode.offset }; } seen.add(keyNode.value); const valueNode = prop.children?.[1]; if (valueNode) { const nested = findDuplicateKey(valueNode); if (nested) return nested; } } } else if (node.type === "array" && node.children) { for (const child of node.children) { const nested = findDuplicateKey(child); if (nested) return nested; } } return undefined; } /** * Convert a JSONC syntax-tree node to its plain JavaScript value. The * `parseTree` API doesn't return values directly; this walks the tree. * * We can't use `jsonc-parser`'s `parse()` (which would give us the value * directly) because we need the tree anyway for error-location mapping, * and parsing twice doubles the work for a file we're about to validate. */ function nodeToValue(node: Node): unknown { switch (node.type) { case "object": { const obj: Record = {}; for (const prop of node.children ?? []) { if (prop.type !== "property") continue; const [keyNode, valueNode] = prop.children ?? []; if (!keyNode || keyNode.type !== "string" || !valueNode) continue; if (typeof keyNode.value !== "string") continue; obj[keyNode.value] = nodeToValue(valueNode); } return obj; } case "array": return (node.children ?? []).map((child) => nodeToValue(child)); case "string": case "number": case "boolean": case "null": return node.value; default: return undefined; } } /** * Convert a byte offset in `source` into 1-indexed line + column. Matches * the convention `tsc` and `eslint` use for error pointers. */ function offsetToLineCol(source: string, offset: number): { line: number; column: number } { let line = 1; let column = 1; const max = Math.min(offset, source.length); for (let i = 0; i < max; i++) { if (source.charCodeAt(i) === 10 /* \n */) { line++; column = 1; } else { column++; } } return { line, column }; } function isNodeNotFoundError(error: unknown): boolean { return ( error instanceof Error && "code" in error && (error as { code: unknown }).code === "ENOENT" ); }