/**
 * @fileoverview parse passthrough module.
 */
import type { BinaryDownloadMetadata } from "../../http/download.ts";

export type RoutedContentKind =
	| "text"
	| "markdown"
	| "mdx"
	| "rst"
	| "source"
	| "json"
	| "xml"
	| "svg"
	| "html"
	| "pdf"
	| "binary";

export interface ContentRoute {
	kind: RoutedContentKind;
	shouldParseHtml: boolean;
	shouldExtractPdf: boolean;
	isTextLike: boolean;
}

export interface BinaryAttachmentInfo extends BinaryDownloadMetadata {
	kind: "binary";
	filename?: string;
}

export function routeContentType(
	contentType: string | undefined,
	url = "",
): ContentRoute {
	const type = (contentType ?? "").split(";", 1)[0]?.trim().toLowerCase() ?? "";
	const path = url.toLowerCase().split(/[?#]/u, 1)[0] ?? "";
	const kind = classify(type, path);
	return {
		kind,
		shouldParseHtml: kind === "html",
		shouldExtractPdf: kind === "pdf",
		isTextLike: [
			"text",
			"markdown",
			"mdx",
			"rst",
			"source",
			"json",
			"xml",
			"svg",
			"html",
		].includes(kind),
	};
}

export function parseJsonText(text: string): unknown {
	return JSON.parse(text);
}

export function binaryAttachmentInfo(
	file: BinaryDownloadMetadata,
	filename?: string,
): BinaryAttachmentInfo {
	return { ...file, kind: "binary", filename };
}

function classify(type: string, path: string): RoutedContentKind {
	if (type === "application/pdf" || path.endsWith(".pdf")) return "pdf";
	if (
		type === "text/html" ||
		type === "application/xhtml+xml" ||
		path.endsWith(".html")
	)
		return "html";
	if (path.endsWith(".mdx")) return "mdx";
	if (
		type.includes("markdown") ||
		path.endsWith(".md") ||
		path.endsWith(".markdown")
	)
		return "markdown";
	if (path.endsWith(".rst")) return "rst";
	if (/\.(?:[cm]?[jt]sx?|py|rs)$/u.test(path)) return "source";
	if (type.includes("json") || path.endsWith(".json")) return "json";
	if (type.includes("svg") || path.endsWith(".svg")) return "svg";
	if (type.includes("xml") || path.endsWith(".xml")) return "xml";
	if (type.startsWith("text/") || path.endsWith(".txt")) return "text";
	return "binary";
}