/** * @fileoverview parse passthrough module. */ import type { BinaryDownloadMetadata } from "../../http/download.ts"; export type RoutedContentKind = | "text" | "markdown" | "mdx" | "rst" | "source" | "json" | "xml" | "svg" | "html" | "pdf" | "binary"; export interface ContentRoute { kind: RoutedContentKind; shouldParseHtml: boolean; shouldExtractPdf: boolean; isTextLike: boolean; } export interface BinaryAttachmentInfo extends BinaryDownloadMetadata { kind: "binary"; filename?: string; } export function routeContentType( contentType: string | undefined, url = "", ): ContentRoute { const type = (contentType ?? "").split(";", 1)[0]?.trim().toLowerCase() ?? ""; const path = url.toLowerCase().split(/[?#]/u, 1)[0] ?? ""; const kind = classify(type, path); return { kind, shouldParseHtml: kind === "html", shouldExtractPdf: kind === "pdf", isTextLike: [ "text", "markdown", "mdx", "rst", "source", "json", "xml", "svg", "html", ].includes(kind), }; } export function parseJsonText(text: string): unknown { return JSON.parse(text); } export function binaryAttachmentInfo( file: BinaryDownloadMetadata, filename?: string, ): BinaryAttachmentInfo { return { ...file, kind: "binary", filename }; } function classify(type: string, path: string): RoutedContentKind { if (type === "application/pdf" || path.endsWith(".pdf")) return "pdf"; if ( type === "text/html" || type === "application/xhtml+xml" || path.endsWith(".html") ) return "html"; if (path.endsWith(".mdx")) return "mdx"; if ( type.includes("markdown") || path.endsWith(".md") || path.endsWith(".markdown") ) return "markdown"; if (path.endsWith(".rst")) return "rst"; if (/\.(?:[cm]?[jt]sx?|py|rs)$/u.test(path)) return "source"; if (type.includes("json") || path.endsWith(".json")) return "json"; if (type.includes("svg") || path.endsWith(".svg")) return "svg"; if (type.includes("xml") || path.endsWith(".xml")) return "xml"; if (type.startsWith("text/") || path.endsWith(".txt")) return "text"; return "binary"; }