/** * `emdash-plugin publish --url ` * * Thin citty wrapper around `publishRelease` from `../publish/api.js`. * * Responsibilities here are limited to: * - parsing args and reading filesystem credentials, * - fetching the tarball at `--url` (with URL/size guards) so the API has * bytes to work with, * - extracting the manifest from those bytes BEFORE printing any tarball * metadata so users don't see "looks good" output for a malformed file, * - opening an authenticated `PublishingClient` from the OAuth session, * - rendering the API's structured output through consola, * - exiting non-zero on `PublishError`. * * Everything else (FAIR's immutability rule, profile bootstrap validation, * deprecated-capability hard-fail, AT URI construction) lives in the API so * tests can run it against a mock PDS. */ import { lookup as dnsLookup } from "node:dns/promises"; import { readFile, stat } from "node:fs/promises"; import { dirname, join, resolve } from "node:path"; import type { PluginManifest } from "@emdash-cms/plugin-types"; import { FileCredentialStore, PublishingClient } from "@emdash-cms/registry-client"; import { defineCommand } from "citty"; import consola from "consola"; import pc from "picocolors"; import { formatBytes, MAX_BUNDLE_SIZE, validateBundleSize } from "../bundle/utils.js"; import { redirectConsolaToStderr } from "../cli-output.js"; import { loadManifest, MANIFEST_FILENAME, ManifestError } from "../manifest/load.js"; import { checkPublisher, PublisherCheckError, writePublisherBack } from "../manifest/publisher.js"; import { manifestToProfileInput, normaliseManifest, type NormalisedManifest, resolveSections, SectionError, } from "../manifest/translate.js"; import { sha256Multihash } from "../multihash.js"; import { resumeSession } from "../oauth.js"; import { PublishError, publishRelease, type ProfileInput, type PublishLogger, type ReleaseArtifactsInput, } from "../publish/api.js"; import { ArtifactUploadError, resolveReleaseArtifacts } from "../publish/upload-artifacts.js"; /** * Hard cap on the gzipped tarball we'll buffer into memory. Sized for the * decompressed bundle cap from RFC 0001 (MAX_BUNDLE_SIZE = 256 KB) plus * tar headers and worst-case gzip overhead. Anything larger is rejected at * fetch time before decompression. The decompressed bundle is then re-checked * against the per-file and total caps via `validateBundleSize`. */ const MAX_TARBALL_BYTES = 384 * 1024; export const publishCommand = defineCommand({ meta: { name: "publish", description: "Publish a sandboxed plugin release to the registry (atproto + FAIR-shaped records)", }, args: { url: { type: "string", description: "Public URL where the tarball is hosted (artifact source-of-truth)", required: true, }, local: { type: "string", description: "Optional path to a local copy of the tarball at --url. The CLI still downloads the URL (it has to compute the checksum from what consumers will fetch), but cross-checks the local bytes match. Use this to catch a stale upload before publishing.", }, license: { type: "string", description: "(deprecated: set `license` in emdash-plugin.jsonc) SPDX license expression. Required on first publish; ignored thereafter (the existing profile wins). Overrides the manifest value when both are present.", }, "author-name": { type: "string", description: "(deprecated: set `author`/`authors` in emdash-plugin.jsonc) Author display name (first publish only). Overrides the manifest authors when any --author-* flag is present.", }, "author-url": { type: "string", description: "(deprecated: use emdash-plugin.jsonc) Author URL (first publish only)", }, "author-email": { type: "string", description: "(deprecated: use emdash-plugin.jsonc) Author email (first publish only)", }, "security-email": { type: "string", description: "(deprecated: set `security`/`securityContacts` in emdash-plugin.jsonc) Security contact email. Required on first publish; ignored thereafter. Overrides the manifest security contacts when any --security-* flag is present.", }, "security-url": { type: "string", description: "(deprecated: use emdash-plugin.jsonc) Security contact URL (first publish only)", }, manifest: { type: "string", description: `Path to emdash-plugin.jsonc, or the directory containing it. Defaults to ./${MANIFEST_FILENAME}. Pass --no-manifest (or set to "false") to disable manifest loading and rely entirely on flags.`, }, "no-manifest": { type: "boolean", description: "Disable manifest loading and rely entirely on flags. Useful in CI where the manifest lives elsewhere or shouldn't be implicitly consumed.", default: false, }, "allow-overwrite": { type: "boolean", description: "Allow overwriting an existing release at :. Default refuses, since FAIR treats version records as immutable and aggregators/labellers may flag any change as a takedown event.", default: false, }, "artifact-base-url": { type: "string", description: "Base URL the CLI PUTs media artifacts (icon / screenshot / banner) to. Each file is uploaded to /// and that URL is recorded in the release. The target must serve the bytes back unchanged with a stable content type. Required when the manifest declares any `release.artifacts`.", }, json: { type: "boolean", description: "Emit a single-line JSON object on stdout instead of human output. Success: {profile, release, cid, checksum, url, profileCreated, releaseOverwritten}. Failure: {error: {code, message}}. Human-readable progress goes to stderr in either mode.", }, }, async run({ args }) { // In --json mode, stdout MUST contain only the final JSON object so // callers can `emdash-plugin publish ... --json | jq`. Route every // consola log line to stderr; capture the previous reporter set so we // can restore in the finally below (matters when the CLI is exec'd // in-process by tests or wrappers). const restoreReporters = args.json ? redirectConsolaToStderr() : null; // `process.exit` inside a catch block runs SYNCHRONOUSLY and skips // the finally. Capture the desired exit code, run finally, then // exit. The exit code mirrors the originating CliError where // available (so user-error vs publish-failure is distinguishable in // scripts) and falls back to 1 for anything else. let exitCode = 0; try { await runPublish(args); } catch (error) { exitCode = error instanceof CliError ? error.exitCode : 1; handlePublishError(error, args.json); } finally { restoreReporters?.(); } if (exitCode !== 0) process.exit(exitCode); }, }); /** * Inner publish flow. Throws on every failure path; the outer `run` catches * and renders consistently (human + JSON modes). */ async function runPublish(args: PublishArgs): Promise { // Validate URL before any network access. Empty or non-https URLs are // rejected so we never publish a record pointing at file:// or a private // IP that consumers won't be able to fetch from. const urlError = validatePublishUrl(args.url); if (urlError) throw new CliError(urlError, 2, "INVALID_URL"); // Reject empty-string flags up front. citty leaves them as "" rather // than undefined, and the publish API treats "" as missing -- bad UX. const stringFlagError = validateStringFlags({ license: args.license, "author-name": args["author-name"], "author-url": args["author-url"], "author-email": args["author-email"], "security-email": args["security-email"], "security-url": args["security-url"], }); if (stringFlagError) throw new CliError(stringFlagError, 2, "INVALID_FLAG"); // Load the manifest if present. Precedence: explicit flags win over // manifest values, manifest values fill in any gaps. With // --no-manifest, we skip loading entirely. // // The default path is `./emdash-plugin.jsonc`. If the user didn't pass // --manifest and there's no file at the default path, that's NOT an // error: legacy flag-only invocations keep working. Only `--manifest` // explicit-not-found is an error. const manifestLoad = await loadManifestBootstrap(args, consola); const manifestProfile = manifestLoad?.profileInput ?? null; // Resume the active publisher session BEFORE any network access. // The publisher-mismatch check below depends only on the session DID // and the manifest's pinned publisher; running both up front means // a wrong-account publish fails in milliseconds rather than after a // full tarball fetch + decompress + manifest extract. const credentials = new FileCredentialStore(); const session = await credentials.current(); if (!session) { throw new CliError( "Not logged in. Run: emdash-plugin login ", 1, "NOT_LOGGED_IN", ); } consola.info(`Publishing as ${pc.bold(session.handle ?? session.did)} (${pc.dim(session.did)})`); // Verify the manifest's pinned publisher matches the active session // before fetching the tarball. The check is offline (DID compare is // verbatim; handle resolution only runs if the manifest pins a handle) // so we can fail fast on the wrong-account case. if (manifestLoad?.manifest.publisher !== undefined) { try { const check = await checkPublisher({ manifestPublisher: manifestLoad.manifest.publisher, sessionDid: session.did, }); if (check.kind === "mismatch") { throw new CliError( `Manifest pins publisher to ${pc.bold(check.pinnedDisplay)} (${check.pinnedDid}), but the active session is ${session.did}. ` + `Either switch sessions (\`emdash-plugin switch ${check.pinnedDid}\`), or edit the manifest if you are transferring the plugin to a new publisher.`, 1, "MANIFEST_PUBLISHER_MISMATCH", ); } } catch (error) { if (error instanceof PublisherCheckError) { throw new CliError(error.message, 1, error.code); } throw error; } } // Fetch + checksum the tarball, then extract the manifest BEFORE we // print any reassuring "tarball looks fine" lines. A 200 from a CDN // can serve an HTML 404 page; we want the failure to land before the // user sees apparent success. consola.start(`Fetching ${args.url}...`); const tarballBytes = await fetchTarball(args.url); const checksum = sha256Multihash(tarballBytes); const manifest = await extractManifestFromTarball(tarballBytes); consola.info(`Tarball: ${formatBytes(tarballBytes.length)}`); consola.info(`Multihash: ${pc.dim(checksum)}`); consola.info(`Manifest: ${pc.bold(manifest.id)}@${manifest.version}`); // Optional local cross-check. if (args.local) { const localPath = resolve(args.local); const localBytes = await readFile(localPath); const localChecksum = sha256Multihash(localBytes); if (localChecksum !== checksum) { throw new CliError( `Local file ${localPath} does not match the bytes served at ${args.url}. Local multihash: ${localChecksum}; remote multihash: ${checksum}. Re-upload the correct tarball, or drop --local to publish whatever's at the URL.`, 1, "LOCAL_CHECKSUM_MISMATCH", ); } consola.success(`Local file at ${pc.dim(localPath)} matches the URL`); } const oauthSession = await resumeSession(session.did); const publisher = PublishingClient.fromHandler({ handler: oauthSession, did: session.did, pds: session.pds, }); // Resolve the profile block. The manifest is the base; the deprecated // flat flags override on top (explicit caller intent wins). A single // --author-* / --security-* flag replaces the whole corresponding // manifest list — the flat flags only ever model one entry, so merging // them into a multi-entry list would be ambiguous. const profileInput: ProfileInput = { ...manifestProfile }; if (args.license !== undefined) profileInput.license = args.license; if ( args["author-name"] !== undefined || args["author-url"] !== undefined || args["author-email"] !== undefined ) { const author: { name: string; url?: string; email?: string } = { name: args["author-name"] ?? "unknown", }; if (args["author-url"] !== undefined) author.url = args["author-url"]; if (args["author-email"] !== undefined) author.email = args["author-email"]; profileInput.authors = [author]; } if (args["security-email"] !== undefined || args["security-url"] !== undefined) { const contact: { url?: string; email?: string } = {}; if (args["security-email"] !== undefined) contact.email = args["security-email"]; if (args["security-url"] !== undefined) contact.url = args["security-url"]; profileInput.security = [contact]; } const usedDeprecatedFlags = args.license !== undefined || args["author-name"] !== undefined || args["author-url"] !== undefined || args["author-email"] !== undefined || args["security-email"] !== undefined || args["security-url"] !== undefined; if (usedDeprecatedFlags) { consola.warn( "The --license / --author-* / --security-* flags are deprecated. Declare these in emdash-plugin.jsonc instead; the flags will be removed in a future release.", ); } const logger: PublishLogger = { info: (m) => consola.info(m), success: (m) => consola.success(m), warn: (m) => consola.warn(m), }; const artifacts = await resolveManifestArtifacts(args, manifestLoad, logger); const result = await publishRelease({ publisher, did: session.did, manifest, checksum, url: args.url, profileInput, repo: manifestLoad?.manifest.repo, requires: manifestLoad?.manifest.requires, artifacts, allowOverwrite: args["allow-overwrite"], logger, }); // Post-publish: pin the active session's DID back to the manifest if // the user didn't pin one themselves. This is a convenience, not a // publish requirement — failures are logged but don't fail the // command (the publish already committed to the PDS). // // The handle is passed for the line-comment annotation; the CLI // itself only ever uses the DID for the equality check. if (manifestLoad && manifestLoad.manifest.publisher === undefined) { await writePublisherBack({ manifestPath: manifestLoad.path, sessionDid: session.did, // session.handle is nullable; normalise to undefined for the // optional argument so the absence-vs-empty distinction stays // clean at the writePublisherBack boundary. sessionHandle: session.handle ?? undefined, onInfo: (m) => consola.info(m), onWarn: (m) => consola.warn(m), }); } // Subsequent-publish: warn about ignored first-publish-only profile // fields. These are owned by the existing profile record; a republish // doesn't rewrite them. if (!result.profileCreated && result.ignoredProfileFields.length > 0) { const fields = result.ignoredProfileFields.join(", "); consola.warn( `Ignored on subsequent publish (existing package record wins): ${fields}. To edit these on the existing package, run \`emdash-plugin update-package\` (dry-run prints the diff; pass \`--yes\` to apply).`, ); } if (args.json) { // Stdout-clean JSON for pipe consumers. process.stdout.write( `${JSON.stringify({ profile: result.profileUri, release: result.releaseUri, cid: result.releaseCid, checksum: result.checksum, url: args.url, profileCreated: result.profileCreated, releaseOverwritten: result.releaseOverwritten, })}\n`, ); return; } consola.success(`Published ${pc.bold(`${result.slug}@${manifest.version}`)}`); consola.info(`Release URI: ${pc.dim(result.releaseUri)}`); consola.info(`Profile URI: ${pc.dim(result.profileUri)}`); console.log(); consola.info( `The aggregator will pick this up from the firehose. To verify discovery once it's indexed:`, ); console.log(` ${pc.cyan(`emdash-plugin info ${session.handle ?? session.did} ${result.slug}`)}`); } /** * Resolve the manifest's `release.artifacts` block into uploaded, embeddable * records. Returns `undefined` when no manifest was loaded or it declared no * artifacts. Throws `CliError` when artifacts are declared but the publisher * didn't supply `--artifact-base-url`, or when resolution/upload fails. */ async function resolveManifestArtifacts( args: PublishArgs, manifestLoad: ManifestLoadOutcome | null, logger: PublishLogger, ): Promise { const artifacts = manifestLoad?.manifest.artifacts; if (!manifestLoad || !artifacts) return undefined; const baseUrl = args["artifact-base-url"]; if (baseUrl === undefined || baseUrl.length === 0) { throw new CliError( "The manifest declares `release.artifacts` (icon / screenshot / banner) but no --artifact-base-url was given. Pass --artifact-base-url so the CLI can upload the images and record where they're hosted.", 2, "ARTIFACT_BASE_URL_REQUIRED", ); } let parsedBase: URL; try { parsedBase = new URL(baseUrl); } catch { throw new CliError(`--artifact-base-url is not a valid URL: ${baseUrl}`, 2, "INVALID_FLAG"); } if (parsedBase.protocol !== "https:") { throw new CliError( `--artifact-base-url must use https; got ${parsedBase.protocol}. Host artifacts over TLS.`, 2, "INVALID_FLAG", ); } try { return await resolveReleaseArtifacts({ artifacts, manifestDir: dirname(manifestLoad.path), baseUrl, slug: manifestLoad.manifest.slug, version: manifestLoad.manifest.version, logger, }); } catch (error) { if (error instanceof ArtifactUploadError) { throw new CliError(error.message, 1, error.code); } throw error; } } /** * Render any error from the publish flow, in human or JSON shape. Always * writes to stderr (consola was already redirected for --json mode); in * --json mode also writes a structured `{ error: { code, message } }` to * stdout so a piped consumer can parse failures the same way they parse * successes. */ function handlePublishError(error: unknown, jsonMode: boolean): void { let code = "INTERNAL_ERROR"; let message = "Internal error"; if (error instanceof PublishError) { code = error.code; message = error.message; consola.error(error.message); if (error.code === "RELEASE_ALREADY_PUBLISHED") { consola.error( "To overwrite anyway, pass --allow-overwrite (use only when you're sure no consumers have installed this version yet).", ); } } else if (error instanceof CliError) { code = error.code; message = error.message; consola.error(error.message); } else if (error instanceof Error) { message = error.message; consola.error(error); } else { message = String(error); consola.error(error); } if (jsonMode) { process.stdout.write(`${JSON.stringify({ error: { code, message } })}\n`); } } /** * Internal CLI-only error with a stable code, so JSON mode can surface a * structured failure shape without the caller having to map raw Error * messages. */ class CliError extends Error { override readonly name = "CliError"; constructor( message: string, readonly exitCode: number, readonly code: string, ) { super(message); } } /** citty arg shape for the publish command. Inferred from the schema below. */ type PublishArgs = { url: string; local?: string; license?: string; "author-name"?: string; "author-url"?: string; "author-email"?: string; "security-email"?: string; "security-url"?: string; manifest?: string; "no-manifest"?: boolean; "allow-overwrite"?: boolean; "artifact-base-url"?: string; json?: boolean; }; /** * Result of resolving the manifest for `runPublish`. Surfaces both the * derived ProfileInput (the publish API's input) and the normalised * manifest itself, so downstream code can run the publisher-pin check * and the post-publish write-back without re-parsing the file. */ interface ManifestLoadOutcome { /** Resolved absolute path to the manifest file. */ path: string; /** Normalised manifest (single/multi-author forms collapsed). */ manifest: NormalisedManifest; /** Structured profile block for the publish-API input. */ profileInput: ProfileInput; } /** * Resolve the manifest layer for `runPublish`. Returns `null` when no * manifest was loaded (either suppressed by --no-manifest or the * default-path file is missing). Throws a CliError when the user * explicitly named a manifest path that couldn't be loaded, and warns * when `--no-manifest` is used while a manifest exists at the default * path so the publisher-pin safety story stays visible. */ async function loadManifestBootstrap( args: PublishArgs, log: { info(m: string): void; warn(m: string): void }, ): Promise { const optedOut = args["no-manifest"] === true || args.manifest === "false" || args.manifest === ""; if (optedOut) { // `--no-manifest` is a power-user escape hatch (CI, debugging), // but silently skipping a manifest at the default path defeats // the publisher-pin safety story. If the file exists, warn that // the pin (if any) won't be checked. We probe via stat to keep // the path cheap: no parse, no schema validation. const defaultPath = `./${MANIFEST_FILENAME}`; try { await stat(defaultPath); log.warn( `Skipping manifest at ${defaultPath} (--no-manifest is set). Publisher pin and license/security defaults are NOT being applied for this publish.`, ); } catch { // No manifest at the default path; nothing to warn about. } return null; } const explicit = args.manifest !== undefined && args.manifest.length > 0; const path = args.manifest ?? `./${MANIFEST_FILENAME}`; try { const { manifest, path: resolvedPath } = await loadManifest(path); // Manifest `version` is optional; reconcile with // `package.json#version` (the canonical source for // npm-distributed plugins) before validating. const packageVersion = await readSiblingPackageVersion(dirname(resolvedPath)); let normalised: NormalisedManifest; try { normalised = normaliseManifest(manifest, packageVersion); } catch (error) { if (error instanceof Error && "code" in error) { const code = (error as { code: unknown }).code; if (code === "VERSION_MISSING" || code === "VERSION_MISMATCH") { throw new CliError(error.message, 1, String(code)); } } throw error; } try { normalised.sections = await resolveSections(manifest.sections, dirname(resolvedPath)); } catch (error) { if (error instanceof SectionError) { throw new CliError(error.message, 1, error.code); } throw error; } log.info(`Loaded manifest: ${pc.dim(resolvedPath)}`); return { path: resolvedPath, manifest: normalised, profileInput: manifestToProfileInput(normalised), }; } catch (error) { if (error instanceof ManifestError) { // Default-path miss: not an error. Legacy flag-only callers // keep working when they have no manifest file. if (!explicit && error.code === "MANIFEST_NOT_FOUND") return null; throw new CliError(error.message, 1, error.code); } throw error; } } /** * Read `package.json#version` from the directory containing the * manifest. Returns `undefined` when no `package.json` exists (the * registry-only path). Throws `CliError` when the file exists but is * malformed, so a typo like `"verison"` surfaces directly rather than * appearing as a misleading VERSION_MISSING further down. */ async function readSiblingPackageVersion(manifestDir: string): Promise { const packageJsonPath = join(manifestDir, "package.json"); let source: string; try { source = await readFile(packageJsonPath, "utf-8"); } catch (error) { // ENOENT is the registry-only path; surface anything else. if ( error instanceof Error && "code" in error && (error as { code: unknown }).code === "ENOENT" ) { return undefined; } throw new CliError( `Failed to read package.json at ${packageJsonPath}: ${error instanceof Error ? error.message : String(error)}`, 1, "PACKAGE_JSON_UNREADABLE", ); } let parsed: unknown; try { parsed = JSON.parse(source); } catch (error) { throw new CliError( `package.json at ${packageJsonPath} is not valid JSON: ${error instanceof Error ? error.message : String(error)}`, 1, "PACKAGE_JSON_INVALID", ); } if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) { throw new CliError( `package.json at ${packageJsonPath} must be a JSON object.`, 1, "PACKAGE_JSON_INVALID", ); } const version = (parsed as { version?: unknown }).version; if (version !== undefined && (typeof version !== "string" || version.length === 0)) { throw new CliError( `package.json at ${packageJsonPath} has a non-string \`version\` (${JSON.stringify(version)}).`, 1, "PACKAGE_JSON_INVALID", ); } return typeof version === "string" ? version : undefined; } // ── helpers ────────────────────────────────────────────────────────────────── /** * Validate the publish URL before any network access. Returns an error message * to print, or `null` if the URL is acceptable. * * The CLI runs locally so the SSRF surface is the publisher's own machine, * not a server. The real harm is publishing a record pointing at a * non-public URL: consumers can't install from `file:///` or `http://192.x` * and end up with a broken record in the registry. We reject those up front. */ // Exported for unit tests; not part of the public CLI surface. export function validatePublishUrlForTest(url: string): string | null { return validatePublishUrl(url); } function validatePublishUrl(url: string): string | null { let parsed: URL; try { parsed = new URL(url); } catch { return `--url is not a valid URL: ${url}`; } // Require https. Tarball integrity is enforced by the multihash we publish // alongside the URL, so a MITM can't substitute the bytes -- consumers // will reject a checksum mismatch. But TLS still matters here: it // prevents an active attacker from observing which plugin versions the // publisher is shipping, and it shuts the door on novel checksum-bypass // attacks (e.g. a lexicon evolution that loosens checksum verification). // The cost is near-zero -- no public CDN serves http-only in 2026. if (parsed.protocol !== "https:") { return `--url must use https; got ${parsed.protocol}. Host the tarball over TLS.`; } // `URL.hostname` keeps the `[ ]` around IPv6 literals AND normalises any // embedded IPv4 dotted-quad to two hex groups (e.g. // `::ffff:169.254.169.254` -> `[::ffff:a9fe:a9fe]`). Strip brackets // before any check; downstream helpers operate on the bracket-free form. // Also strip a single trailing dot (FQDN form): mDNS resolvers happily // resolve both `mymachine.local` and `mymachine.local.`, so the // `.local` deny check has to canonicalise both. const host = stripTrailingDot(stripIPv6Brackets(parsed.hostname)); if (host.length === 0) { return `--url ${url} has an empty hostname after canonicalisation`; } if ( host === "localhost" || host === "127.0.0.1" || host === "::1" || host.endsWith(".local") || isPrivateIp(host) || isPrivateMappedIPv6(host) || isPrivateIPv6Literal(host) ) { return `--url ${url} resolves to a non-public host (${host}); consumers won't be able to install from it. Host the tarball publicly first.`; } return null; } /** * Strip a single trailing dot from a DNS name (FQDN form). Idempotent. * Real hostnames don't end with multiple dots after URL parsing, so we * only strip one. */ function stripTrailingDot(host: string): string { if (host.endsWith(".")) return host.slice(0, -1); return host; } /** * Remove the surrounding brackets that `URL.hostname` keeps around an IPv6 * literal. Returns the input unchanged if not bracketed. */ function stripIPv6Brackets(host: string): string { if (host.length >= 2 && host.startsWith("[") && host.endsWith("]")) { return host.slice(1, -1); } return host; } /** * Resolve `host` via DNS and check whether any A/AAAA record falls inside * the public-host deny list. Used by the redirect loop to defend against * a public hostname pointed at a private IP. * * Best-effort: errors short-circuit to "couldn't resolve, treat as * suspicious" so a transient DNS failure during publish doesn't silently * succeed against an attacker-controlled redirect chain. * * Returns an error message or null. Pass an already-validated URL. */ async function validateResolvedHost(host: string): Promise { // Literal IPs are caught by `validatePublishUrl`'s syntactic guard // (which is invariably called immediately before this in both the // initial-URL and redirect-hop paths). We skip resolving them here: // - v4 literal: matches IPV4_RE, no DNS to do. // - v6 literal: contains `:`, no DNS to do (URL keeps brackets in // hostname; the bracket form has `:` so the include check fires). // Real hostnames contain neither; that's what we resolve. // // CAVEAT (DNS rebinding): the OS resolver may cache for a TTL the // attacker controls. A 1-second TTL means the validation lookup and // the subsequent `fetch` lookup can resolve to different addresses. // A motivated attacker can return a public IP for the validation // lookup and a private IP for the fetch. Mitigating this fully // requires resolving once and binding the connection to the resolved // IP literal, which means giving up SNI / Host-header convenience. // We accept the residual risk for the publish CLI (run by a human // publishing their own content) and document it here so it isn't // surprising. const stripped = stripIPv6Brackets(host); if (IPV4_RE.test(stripped)) return null; if (stripped.includes(":")) return null; let addresses: Array<{ address: string; family: number }>; try { addresses = await dnsLookup(stripped, { all: true, verbatim: true }); } catch (error) { return `could not resolve ${stripped}: ${error instanceof Error ? error.message : String(error)}`; } for (const { address } of addresses) { if ( address === "127.0.0.1" || address === "::1" || isPrivateIp(address) || isPrivateMappedIPv6(address) || isPrivateIPv6Literal(address) ) { return `${stripped} resolves to non-public address ${address}`; } } return null; } // Module-scope regexes so we don't re-compile on every call. The IP-literal // guard is the FIRST line of defence -- the redirect loop additionally runs // a DNS-resolved check (`validateResolvedHost`) so a public hostname // pointing at a private IP is also caught. const TAR_LEADING_DOT_SLASH_RE = /^\.\//; const IPV4_RE = /^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/; // IPv6 ULA (fc00::/7), link-local (fe80::/10). Bracket-stripped form. const IPV6_ULA_FC_RE = /^fc[0-9a-f]{2}(?::|$)/i; const IPV6_ULA_FD_RE = /^fd[0-9a-f]{2}(?::|$)/i; const IPV6_LINK_LOCAL_RE = /^fe[89ab][0-9a-f](?::|$)/i; // Loopback (::1), unspecified (::) -- already special-cased in // `validatePublishUrl` for clarity, also caught here. Bracket-stripped. const IPV6_LOOPBACK_RE = /^::1$/i; const IPV6_UNSPECIFIED_RE = /^::$/i; // IPv4-compatible IPv6 (`::a.b.c.d`, deprecated but valid input form). const IPV6_V4_COMPAT_DOTTED_RE = /^::(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/i; // IPv4-mapped IPv6 in dotted form (`::ffff:a.b.c.d`). const IPV6_V4_MAPPED_DOTTED_RE = /^::ffff:(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/i; // IPv4-mapped IPv6 after URL parser normalisation (`::ffff:hhhh:hhhh`). // Two hex groups of up to 4 chars each, encoding 4 octets of v4. The URL // parser collapses leading zero groups into `::` and converts the IPv4 // suffix into hex pairs. const IPV6_V4_MAPPED_HEX_RE = /^::ffff:([0-9a-f]{1,4}):([0-9a-f]{1,4})$/i; // IPv4-compatible IPv6 after URL parser normalisation. The dotted form // `::a.b.c.d` is normalised by Node's URL parser to `::hhhh:hhhh` (no // `ffff:` prefix) -- without this branch, the IPv4-compat case slips // through both the dotted regex (because the URL parser ate the dots) // and the v4-mapped-hex regex (because there's no `ffff:`). const IPV6_V4_COMPAT_HEX_RE = /^::([0-9a-f]{1,4}):([0-9a-f]{1,4})$/i; // IPv6 prefixes that carry an embedded IPv4 in their last 32 bits. We // enumerate explicitly rather than treating EVERY v6 literal's trailing // 32 bits as a candidate v4 -- the broader form would false-positive // reject legitimate public v6 addresses whose last two hex groups // coincidentally encode an RFC1918 / loopback / 169.254 octet pattern. // // Currently covered: // - `64:ff9b::/96` — RFC 6052 NAT64 well-known prefix // - `64:ff9b:1::/48` — RFC 8215 NAT64 local-use prefix // - `2002:xxxx:xxxx::/16` — 6to4 (xxxx:xxxx encodes the v4) // - `::/96` — IPv4-compatible (already handled by // IPV6_V4_COMPAT_DOTTED_RE for dotted form) const IPV6_NAT64_RE = /^64:ff9b::([0-9a-f]{1,4}):([0-9a-f]{1,4})$/i; // RFC 8215 local-use NAT64 prefix `64:ff9b:1::/48`. The URL parser // collapses runs of zero groups into `::`, so the typical form for this // prefix when carrying a v4 in the last 32 bits is // `64:ff9b:1::WWWW:XXXX`. Match the prefix permissively and capture the // trailing two hex groups. const IPV6_NAT64_LOCAL_RE = /^64:ff9b:1:[0-9a-f:]*:([0-9a-f]{1,4}):([0-9a-f]{1,4})$/i; const IPV6_6TO4_RE = /^2002:([0-9a-f]{1,4}):([0-9a-f]{1,4}):/i; /** * Detect RFC 1918, loopback, link-local, CGNAT, and 0.0.0.0/8 IPv4 * literals. The IPv6 ULA/link-local cases are handled below. */ function isPrivateIp(host: string): boolean { const v4 = IPV4_RE.exec(host); if (v4) { const [, aStr, bStr] = v4; const a = Number(aStr); const b = Number(bStr); if (a === 0) return true; // 0.0.0.0/8 ("this network") if (a === 10) return true; if (a === 100 && b >= 64 && b <= 127) return true; // CGNAT 100.64.0.0/10 if (a === 127) return true; if (a === 169 && b === 254) return true; // link-local + cloud metadata if (a === 172 && b >= 16 && b <= 31) return true; if (a === 192 && b === 168) return true; } // IPv6 literal in URL form is wrapped in []; URL strips the brackets in // `hostname`. ULA fc00::/7 and link-local fe80::/10. if (IPV6_ULA_FC_RE.test(host) || IPV6_ULA_FD_RE.test(host)) return true; if (IPV6_LINK_LOCAL_RE.test(host)) return true; return false; } /** * IPv4-mapped (`::ffff:1.2.3.4`) and IPv4-compatible (`::1.2.3.4`) IPv6. * * Note: Node's `URL` parser normalises the dotted-quad form to hex pairs * (`::ffff:a9fe:a9fe`), so for inputs that came through `URL.hostname` we * also need the hex-pair branch. We handle both because `validatePublishUrl` * is also called with redirect Locations that may not have been re-parsed. */ function isPrivateMappedIPv6(host: string): boolean { const dotted = IPV6_V4_MAPPED_DOTTED_RE.exec(host) ?? IPV6_V4_COMPAT_DOTTED_RE.exec(host); if (dotted) { const inner = dotted[1]; return inner ? isPrivateIp(inner) : false; } const hex = IPV6_V4_MAPPED_HEX_RE.exec(host) ?? IPV6_V4_COMPAT_HEX_RE.exec(host); if (hex) { const v4 = decodeIPv6V4HexPair(hex[1], hex[2]); return v4 ? isPrivateIp(v4) : false; } return false; } /** * Detect a private IPv6 address literal in bracket-stripped form. Covers * loopback `::1`, unspecified `::`, ULA `fc00::/7`, link-local `fe80::/10`, * and any literal whose final 32 bits decode to a private IPv4 (handles * NAT64 well-known-prefix `64:ff9b::a.b.c.d` style addresses where the * URL parser has converted the dotted suffix to hex). */ function isPrivateIPv6Literal(host: string): boolean { if (IPV6_LOOPBACK_RE.test(host)) return true; if (IPV6_UNSPECIFIED_RE.test(host)) return true; if (IPV6_ULA_FC_RE.test(host)) return true; if (IPV6_ULA_FD_RE.test(host)) return true; if (IPV6_LINK_LOCAL_RE.test(host)) return true; // NAT64 (`64:ff9b::a.b.c.d` -> URL-normalised to // `64:ff9b::aabb:ccdd`). Decode the embedded v4 and re-check. const nat64 = IPV6_NAT64_RE.exec(host) ?? IPV6_NAT64_LOCAL_RE.exec(host); if (nat64) { const v4 = decodeIPv6V4HexPair(nat64[1], nat64[2]); if (v4 && isPrivateIp(v4)) return true; } // 6to4 (`2002:WWXX:YYZZ::/48` where WWXX:YYZZ is the v4). The v4 is at // the prefix, not the suffix. const sixtofour = IPV6_6TO4_RE.exec(host); if (sixtofour) { const v4 = decodeIPv6V4HexPair(sixtofour[1], sixtofour[2]); if (v4 && isPrivateIp(v4)) return true; } return false; } /** * Decode two IPv6 hex groups (each up to 4 hex digits) into an IPv4 dotted * quad. Returns null if either group is out of range. Matches the encoding * Node's `URL` parser uses when an IPv6 literal includes a dotted IPv4 * suffix. */ function decodeIPv6V4HexPair(a: string | undefined, b: string | undefined): string | null { if (!a || !b) return null; const ax = Number.parseInt(a, 16); const bx = Number.parseInt(b, 16); if (!Number.isFinite(ax) || !Number.isFinite(bx)) return null; if (ax < 0 || ax > 0xffff || bx < 0 || bx > 0xffff) return null; const o1 = (ax >> 8) & 0xff; const o2 = ax & 0xff; const o3 = (bx >> 8) & 0xff; const o4 = bx & 0xff; return `${o1}.${o2}.${o3}.${o4}`; } /** * Catch the user passing an empty-string flag value (`--license=`). citty * gives us "" which the publish API treats as missing -- the user gets a * confusing PROFILE_BOOTSTRAP_MISSING_FIELD even though they explicitly * passed the flag. */ function validateStringFlags(flags: Record): string | null { for (const [name, value] of Object.entries(flags)) { if (value !== undefined && value === "") { return `--${name} cannot be empty`; } } return null; } /** * Fetch the tarball bytes at `url`. We need the full body to compute the * checksum and to read `manifest.json` from inside it. Streams the response * with a hard cap so a malicious URL can't OOM the CLI. * * Follows redirects MANUALLY so we can re-validate every hop against * `validatePublishUrl`. Without this, a publisher could pass a public URL * that 302s to `http://169.254.169.254/...` (cloud metadata) or to a * `localhost` victim, defeating the publish-side allow-list. */ /** * Build the error message for the gzipped-fetch cap. The cap itself is an * implementation detail of the fetch buffer (sized to fit any bundle that * meets the published RFC 0001 caps); the user-facing constraint is the * decompressed total — surface that here so the next attempt aims at the * right number. */ function oversizedFetchError(url: string, fetched: number): string { return ( `tarball at ${url} is ${formatBytes(fetched)} (gzipped), exceeding the ${formatBytes(MAX_TARBALL_BYTES)} fetch cap. ` + `Sandboxed plugin bundles must decompress to at most ${formatBytes(MAX_BUNDLE_SIZE)} (RFC 0001).` ); } async function fetchTarball(url: string): Promise { const MAX_REDIRECTS = 10; let currentUrl = url; let res: Response | undefined; // Run a DNS-level check on the initial URL (validatePublishUrl already // passed, but it only catches IP literals -- a public hostname pointing // at a private IP slips through the syntactic guard). const initialDns = await validateResolvedHost(new URL(url).hostname); if (initialDns) { throw new Error(`tarball at ${url}: ${initialDns}`); } for (let hop = 0; hop <= MAX_REDIRECTS; hop++) { res = await fetch(currentUrl, { // GitHub release assets need this header to actually serve the file // (without it the API URL returns JSON metadata). Direct CDN URLs // ignore the header. headers: { Accept: "application/octet-stream" }, redirect: "manual", }); // `manual` means 3xx responses come back with a Location header rather // than being followed. fetch() in workerd / node also surfaces // "opaqueredirect" status 0 in some environments; treat any 3xx-ish // state (or status 0) WITH a Location header as a redirect. const status = res.status; const location = res.headers.get("location"); if (location === null) break; const isRedirect = (status >= 300 && status < 400) || status === 0; if (!isRedirect) break; if (hop === MAX_REDIRECTS) { throw new Error(`tarball at ${url}: too many redirects (>${MAX_REDIRECTS})`); } // Resolve relative Locations against the current URL. const next = new URL(location, currentUrl).toString(); const hopError = validatePublishUrl(next); if (hopError) { throw new Error(`tarball at ${url} redirected to a disallowed URL (${next}): ${hopError}`); } // DNS-resolve the hop target to defend against a public hostname // that returns a private A/AAAA record (the publishUrl syntactic // guard only catches IP literals). const dnsError = await validateResolvedHost(new URL(next).hostname); if (dnsError) { throw new Error(`tarball at ${url} redirected to a disallowed URL (${next}): ${dnsError}`); } currentUrl = next; } if (!res) { // Loop is structured so this is unreachable, but TS can't see it. throw new Error(`failed to fetch ${url}: no response`); } if (!res.ok) { throw new Error(`failed to fetch ${url}: ${res.status} ${res.statusText}`); } // If the server told us the size up front, reject oversized responses // before reading the body. const contentLength = res.headers.get("content-length"); if (contentLength) { const len = Number(contentLength); if (Number.isFinite(len) && len > MAX_TARBALL_BYTES) { throw new Error(oversizedFetchError(url, len)); } } if (!res.body) { const buf = await res.arrayBuffer(); if (buf.byteLength > MAX_TARBALL_BYTES) { throw new Error(oversizedFetchError(url, buf.byteLength)); } return new Uint8Array(buf); } // Stream the body so we can abort once we exceed the cap, instead of // buffering an unbounded response into memory. const reader = res.body.getReader(); const chunks: Uint8Array[] = []; let total = 0; while (true) { const { done, value } = await reader.read(); if (done) break; if (value) { total += value.length; if (total > MAX_TARBALL_BYTES) { await reader.cancel(); throw new Error(oversizedFetchError(url, total)); } chunks.push(value); } } const combined = new Uint8Array(total); let offset = 0; for (const chunk of chunks) { combined.set(chunk, offset); offset += chunk.length; } return combined; } /** * Extract `manifest.json` from a gzipped tarball, using `modern-tar`'s * stream-then-collect API. Returns the parsed-and-validated manifest. * * Accepts both `manifest.json` and `./manifest.json` since modern-tar's * exact naming behaviour isn't pinned in our contract. * * Enforces the bundle size caps from RFC 0001 against the decompressed * entries — total decompressed size, per-file size, and file count. The * gzipped fetch is already capped at MAX_TARBALL_BYTES upstream; this is * the post-decompression check that matches what aggregators enforce at * ingest, so a publisher who would be rejected at the registry sees the * same error locally first. * * Validates the parsed JSON shape against the contract before returning, * so downstream code (which iterates `capabilities`, indexes `allowedHosts`, * etc.) doesn't TypeError on garbage input from a malicious tarball. */ // Exported for unit tests; not part of the public CLI surface. export const extractManifestFromTarballForTest = extractManifestFromTarball; async function extractManifestFromTarball(bytes: Uint8Array): Promise { const { unpackTar, createGzipDecoder } = await import("modern-tar"); const source = new ReadableStream({ start(controller) { controller.enqueue(bytes); controller.close(); }, }); let entries; try { const decoded = source.pipeThrough(createGzipDecoder()) as ReadableStream; entries = await unpackTar(decoded); } catch (error) { throw new Error( `tarball at the URL is not a valid gzipped tar archive: ${error instanceof Error ? error.message : String(error)}`, { cause: error }, ); } // The bundle size caps apply to regular files only: directories, // symlinks, hardlinks, devices, and FIFOs all carry no content and // shouldn't appear in a sandboxed plugin bundle anyway. Filtering by // header.type matches what `collectBundleEntries` does for the staging // dir (where `item.isFile()` already excludes non-files), so the bundle // command and the publish command agree on what counts. const fileEntries = entries .filter((e) => (e.header.type ?? "file") === "file") .map((e) => ({ name: e.header.name.replace(TAR_LEADING_DOT_SLASH_RE, ""), bytes: e.data?.byteLength ?? 0, })); const sizeViolations = validateBundleSize(fileEntries); if (sizeViolations.length > 0) { throw new Error( `tarball at the URL violates bundle size caps:\n - ${sizeViolations.join("\n - ")}`, ); } const manifestEntry = entries.find((e) => { const name = e.header.name; return name === "manifest.json" || name === "./manifest.json"; }); if (!manifestEntry?.data) { throw new Error("manifest.json not found in tarball"); } let parsed: unknown; try { parsed = JSON.parse(new TextDecoder().decode(manifestEntry.data)); } catch (error) { throw new Error( `manifest.json in the tarball is not valid JSON: ${error instanceof Error ? error.message : String(error)}`, { cause: error }, ); } return assertManifestShape(parsed); } /** * Best-effort structural sanity check on a parsed `manifest.json`. NOT a * full lexicon validation: we check the shapes the publish flow actually * touches (`id`, `version`, `capabilities`, `allowedHosts`, plus that * `storage`/`hooks`/`routes`/`admin` are at least the right top-level * shape). Hook and route entries are only checked to be string-or-object; * we deliberately don't validate their inner structure here because (a) * the publish flow doesn't iterate them and (b) the inner shape varies * across manifest schema versions and is core's read-side problem. * * Callers who need a full validation should round-trip through the runtime * narrowing helper in `packages/core/src/plugins/manifest-schema.ts`. */ function assertManifestShape(value: unknown): PluginManifest { if (!value || typeof value !== "object" || Array.isArray(value)) { throw new Error("manifest.json must be a JSON object"); } const v = value as Record; if (typeof v.id !== "string" || v.id.length === 0) { throw new Error("manifest.json: `id` must be a non-empty string"); } if (typeof v.version !== "string" || v.version.length === 0) { throw new Error("manifest.json: `version` must be a non-empty string"); } if (!Array.isArray(v.capabilities) || v.capabilities.some((c) => typeof c !== "string")) { throw new Error("manifest.json: `capabilities` must be an array of strings"); } if (!Array.isArray(v.allowedHosts) || v.allowedHosts.some((h) => typeof h !== "string")) { throw new Error("manifest.json: `allowedHosts` must be an array of strings"); } // `typeof null === "object"` and `typeof [] === "object"`, so both checks // matter. Same for `admin` below. if (!v.storage || typeof v.storage !== "object" || Array.isArray(v.storage)) { throw new Error("manifest.json: `storage` must be an object"); } if (!Array.isArray(v.hooks)) { throw new Error("manifest.json: `hooks` must be an array"); } for (const [i, entry] of v.hooks.entries()) { // Hook entries are either bare hook-name strings or // `{ hook: string, ... }` objects per the manifest contract. Anything // else means the publisher hand-edited (or corrupted) the manifest. if (typeof entry === "string") continue; if (!entry || typeof entry !== "object" || Array.isArray(entry)) { throw new Error( `manifest.json: \`hooks[${i}]\` must be a string or object, got ${describeJsonValue(entry)}`, ); } } if (!Array.isArray(v.routes)) { throw new Error("manifest.json: `routes` must be an array"); } for (const [i, entry] of v.routes.entries()) { if (typeof entry === "string") continue; if (!entry || typeof entry !== "object" || Array.isArray(entry)) { throw new Error( `manifest.json: \`routes[${i}]\` must be a string or object, got ${describeJsonValue(entry)}`, ); } } if (!v.admin || typeof v.admin !== "object" || Array.isArray(v.admin)) { throw new Error("manifest.json: `admin` must be an object"); } return v as unknown as PluginManifest; } /** Shape-describing string for error messages. Distinguishes null/array/object. */ function describeJsonValue(value: unknown): string { if (value === null) return "null"; if (Array.isArray(value)) return "array"; return typeof value; }