/** * Request Metadata Extraction * * Extracts normalized metadata (IP, user agent, referer, geo) from * incoming requests. Used by plugin route handlers to access request * context without touching raw headers. * */ import type { EmDashConfig } from "../astro/integration/runtime.js"; import { getTrustedProxyHeaders, normalizeTrustedHeaders } from "../auth/trusted-proxy.js"; import type { GeoInfo, RequestMeta } from "./types.js"; /** * Cloudflare Workers `cf` object shape (subset we use). * Present on requests when running on Cloudflare Workers. */ interface CfProperties { country?: string; region?: string; city?: string; } /** * Loose validation for IPv4 and IPv6 addresses. * Accepts digits, hex chars, dots, and colons — rejects anything else * (e.g. HTML tags, scripts, or other non-IP garbage in spoofed headers). */ const IP_PATTERN = /^[\da-fA-F.:]+$/; /** * Extract the first IP from an X-Forwarded-For header value. * The header may contain a comma-separated list of IPs; the first * entry is the original client IP. * * Returns null if the extracted value doesn't look like an IP address. */ function parseFirstForwardedIp(header: string): string | null { const first = header.split(",")[0]; const trimmed = first?.trim(); if (!trimmed) return null; return IP_PATTERN.test(trimmed) ? trimmed : null; } /** * Read an IP from an operator-declared trusted header. XFF-style headers * (any name ending in `forwarded-for`) are parsed as comma-separated lists * and the first entry is used; everything else is treated as a single * trimmed value. */ function readIpFromHeader(headers: Headers, name: string): string | null { const value = headers.get(name); if (!value) return null; if (name.endsWith("forwarded-for")) { return parseFirstForwardedIp(value); } const trimmed = value.trim(); if (!trimmed) return null; return IP_PATTERN.test(trimmed) ? trimmed : null; } /** * Get the Cloudflare `cf` object from the request, if present. * Returns undefined when not running on Cloudflare Workers. */ function getCfObject(request: Request): CfProperties | undefined { return (request as unknown as { cf?: CfProperties }).cf; } /** * Extract geographic information from the Cloudflare `cf` object * attached to the request. Returns null when not running on CF Workers. */ function extractGeo(cf: CfProperties | undefined): GeoInfo | null { if (!cf) return null; const country = cf.country ?? null; const region = cf.region ?? null; const city = cf.city ?? null; // Only return geo if at least one field is populated if (country === null && region === null && city === null) return null; return { country, region, city }; } /** * Extract normalized request metadata from a Request object. * * IP resolution order: * 1. `CF-Connecting-IP` — trusted only when a `cf` object is present on the * request. CF edge overwrites any client-supplied value, so this is the * cryptographically trustworthy path on Workers. Operator-declared * trusted headers cannot override it. * 2. `X-Forwarded-For` first entry — trusted only with a `cf` object. * 3. Operator-declared trusted proxy headers (from `config.trustedProxyHeaders` * or the `EMDASH_TRUSTED_PROXY_HEADERS` env var), tried in order. Used as * the primary source off-CF and as a fill-in on CF. * 4. `null` * * The second argument accepts either the EmDash config or a pre-resolved * list of trusted headers, so callers that already have the list don't have * to round-trip through the config every request. */ export function extractRequestMeta( request: Request, configOrTrustedHeaders?: EmDashConfig | null | { trustedProxyHeaders?: string[] } | string[], ): RequestMeta { const headers = request.headers; const cf = getCfObject(request); const trusted = resolveTrustedHeaders(configOrTrustedHeaders); let ip: string | null = null; // On Cloudflare, prefer the cryptographically trustworthy headers first. if (cf) { const cfIp = headers.get("cf-connecting-ip")?.trim(); if (cfIp && IP_PATTERN.test(cfIp)) { ip = cfIp; } if (!ip) { const xff = headers.get("x-forwarded-for"); ip = xff ? parseFirstForwardedIp(xff) : null; } } // Fall through to operator-declared trusted headers. On CF this fills // in when the CF headers are absent; off-CF it's the primary source. if (!ip) { for (const name of trusted) { const value = readIpFromHeader(headers, name); if (value) { ip = value; break; } } } const userAgent = headers.get("user-agent")?.trim() || null; const referer = headers.get("referer")?.trim() || null; const geo = extractGeo(cf); return { ip, userAgent, referer, geo }; } function resolveTrustedHeaders( value: EmDashConfig | null | { trustedProxyHeaders?: string[] } | string[] | undefined, ): string[] { if (Array.isArray(value)) { // Apply the same RFC 7230 validation the config/env path does so a // caller passing a pre-resolved list with bad entries can't crash // `Headers.get()` downstream. return normalizeTrustedHeaders(value); } return getTrustedProxyHeaders(value); } // ============================================================================= // Header Sanitization for Sandbox // ============================================================================= /** * Headers that must never cross the RPC boundary to sandboxed plugins. * Session tokens, auth credentials, and infrastructure headers are stripped * to prevent malicious plugins from exfiltrating sensitive data. */ const SANDBOX_STRIPPED_HEADERS = new Set([ "cookie", "set-cookie", "authorization", "proxy-authorization", "cf-access-jwt-assertion", "cf-access-client-id", "cf-access-client-secret", "x-emdash-request", ]); /** * Copy request headers into a plain object, stripping sensitive headers * that must not be exposed to sandboxed plugin code. */ export function sanitizeHeadersForSandbox(headers: Headers): Record { const safe: Record = {}; headers.forEach((value, key) => { if (!SANDBOX_STRIPPED_HEADERS.has(key)) { safe[key] = value; } }); return safe; }