import { parseHTML } from "linkedom";
import type { RenderResult, SpecialHandler } from "./types";
import { buildResult, loadPage } from "./types";
import { convertWithMarkit, fetchBinary } from "./utils";
/**
* Handle IACR ePrint Archive URLs
*/
export const handleIacr: SpecialHandler = async (
url: string,
timeout: number,
signal?: AbortSignal,
): Promise => {
try {
const parsed = new URL(url);
if (parsed.hostname !== "eprint.iacr.org") return null;
// Extract paper ID from /year/number or /year/number.pdf
const match = parsed.pathname.match(/\/(\d{4})\/(\d+)(?:\.pdf)?$/);
if (!match) return null;
const [, year, number] = match;
const paperId = `${year}/${number}`;
const fetchedAt = new Date().toISOString();
const notes: string[] = [];
// Fetch the HTML page for metadata
const pageUrl = `https://eprint.iacr.org/${paperId}`;
const result = await loadPage(pageUrl, { timeout, signal });
if (!result.ok) return null;
const doc = parseHTML(result.content).document;
// Extract metadata from the page
const title =
doc.querySelector("h3.mb-3")?.textContent?.trim() ||
doc.querySelector('meta[name="citation_title"]')?.getAttribute("content");
const authors = Array.from(
doc.querySelectorAll('meta[name="citation_author"]') as Iterable<{
getAttribute: (name: string) => string | null;
}>,
)
.map(m => m.getAttribute("content"))
.filter((author): author is string => Boolean(author));
// Abstract is in after
Abstract
const abstractHeading = Array.from(
doc.querySelectorAll("h5") as Iterable<{
textContent: string | null;
parentElement?: { querySelector: (selector: string) => { textContent: string | null } | null } | null;
}>,
).find(h => h.textContent?.includes("Abstract"));
const abstract =
abstractHeading?.parentElement?.querySelector("p")?.textContent?.trim() ||
doc.querySelector('meta[name="description"]')?.getAttribute("content");
const keywords = doc.querySelector(".keywords")?.textContent?.replace("Keywords:", "").trim();
const pubDate = doc.querySelector('meta[name="citation_publication_date"]')?.getAttribute("content");
let md = `# ${title || "IACR ePrint Paper"}\n\n`;
if (authors.length) md += `**Authors:** ${authors.join(", ")}\n`;
if (pubDate) md += `**Date:** ${pubDate}\n`;
md += `**ePrint:** ${paperId}\n`;
if (keywords) md += `**Keywords:** ${keywords}\n`;
md += `\n---\n\n## Abstract\n\n${abstract || "No abstract available."}\n\n`;
// If it was a PDF link, try to fetch and convert PDF
if (parsed.pathname.endsWith(".pdf")) {
const pdfUrl = `https://eprint.iacr.org/${paperId}.pdf`;
notes.push("Fetching PDF for full content...");
const pdfResult = await fetchBinary(pdfUrl, timeout, signal);
if (pdfResult.ok) {
const converted = await convertWithMarkit(pdfResult.buffer, ".pdf", timeout, signal);
if (converted.ok && converted.content.length > 500) {
md += `---\n\n## Full Paper\n\n${converted.content}\n`;
notes.push("PDF converted via markit");
}
}
}
return buildResult(md, {
url,
method: "iacr",
fetchedAt,
notes: notes.length ? notes : ["Fetched from IACR ePrint Archive"],
});
} catch {}
return null;
};