import type { ContentKind, DetectedContent } from './types'; /** Strip Content-Type parameters (``; charset=utf-8`` etc.) and lower-case. */ function normaliseContentType(raw: string | null): string | null { if (!raw) return null; const semi = raw.indexOf(';'); return (semi === -1 ? raw : raw.slice(0, semi)).trim().toLowerCase(); } /** Pull Content-Type out of the headers bag. We accept both plain * objects (``{ "content-type": "..." }``) and ``Headers``-like shapes * with a ``.get`` method. The context type is ``any`` so we can't be * stricter without plumbing a proper type through the response code * path. */ function readContentType(headers: unknown): string | null { if (!headers) return null; // Headers instance — use ``.get`` (case-insensitive). if (typeof (headers as Headers).get === 'function') { return (headers as Headers).get('content-type'); } // Plain object — look up with case-insensitive key match so both // ``Content-Type`` and ``content-type`` work. if (typeof headers === 'object') { for (const [k, v] of Object.entries(headers as Record)) { if (k.toLowerCase() === 'content-type') { return typeof v === 'string' ? v : null; } } } return null; } /** Map a normalised MIME type to our short ``ContentKind``. Unknown * types fall back to ``text`` so PrettyCode still shows something * sensible (plain text is a valid Prism language). */ function kindFromContentType(mime: string | null): ContentKind { if (!mime) return 'text'; if (mime === 'application/json' || mime.endsWith('+json')) return 'json'; if (mime === 'text/html' || mime === 'application/xhtml+xml') return 'html'; if ( mime === 'application/xml' || mime === 'text/xml' || mime.endsWith('+xml') ) return 'xml'; if (mime === 'text/css') return 'css'; if ( mime === 'application/javascript' || mime === 'text/javascript' || mime === 'application/x-javascript' ) return 'javascript'; return 'text'; } /** Heuristic fallback when ``Content-Type`` is missing or opaque. * Returns ``null`` when nothing firm can be inferred; the caller * then defaults to ``text``. */ function kindFromBody(body: string): ContentKind | null { const trimmed = body.trimStart(); if (!trimmed) return null; if (trimmed.startsWith(']/i.test(trimmed)) return 'html'; if (trimmed.startsWith(' = { json: 'json', // ``markup`` is Prism's HTML/XML grammar — there isn't a separate // ``html`` language. XML piggy-backs on the same tokeniser. html: 'markup', xml: 'markup', css: 'css', javascript: 'javascript', text: 'markup', }; /** Detect content kind from headers, falling back to body sniffing * when the header is missing. Pure function — safe to call inside * ``useMemo`` deps on the raw body and headers. */ export function detectContent(headers: unknown, rawBody: string): DetectedContent { const contentType = normaliseContentType(readContentType(headers)); const headerKind = kindFromContentType(contentType); // If headers say it's text/plain but the body looks like HTML or // JSON, trust the body — a lot of framework 500 pages claim // text/html but some setups default to text/plain for errors. const kind = headerKind === 'text' ? (kindFromBody(rawBody) ?? 'text') : headerKind; return { kind, prism: PRISM_BY_KIND[kind], contentType, }; }