import { parseHTML } from "linkedom";
import type { RenderResult, SpecialHandler } from "./types";
import { buildResult, loadPage } from "./types";
/**
* Handle Wikipedia URLs via Wikipedia API
*/
export const handleWikipedia: SpecialHandler = async (
url: string,
timeout: number,
signal?: AbortSignal,
): Promise => {
try {
const parsed = new URL(url);
// Match *.wikipedia.org
const wikiMatch = parsed.hostname.match(/^(\w+)\.wikipedia\.org$/);
if (!wikiMatch) return null;
const lang = wikiMatch[1];
const titleMatch = parsed.pathname.match(/\/wiki\/(.+)/);
if (!titleMatch) return null;
const title = decodeURIComponent(titleMatch[1]);
const fetchedAt = new Date().toISOString();
// Use Wikipedia API to get plain text extract
const apiUrl = `https://${lang}.wikipedia.org/api/rest_v1/page/summary/${encodeURIComponent(title)}`;
const summaryResult = await loadPage(apiUrl, { timeout, signal });
let md = "";
if (summaryResult.ok) {
const summary = JSON.parse(summaryResult.content) as {
title: string;
description?: string;
extract: string;
};
md = `# ${summary.title}\n\n`;
if (summary.description) md += `*${summary.description}*\n\n`;
md += `${summary.extract}\n\n---\n\n`;
}
// Get full article content via mobile-html or parse API
const contentUrl = `https://${lang}.wikipedia.org/api/rest_v1/page/mobile-html/${encodeURIComponent(title)}`;
const contentResult = await loadPage(contentUrl, { timeout, signal });
if (contentResult.ok) {
const doc = parseHTML(contentResult.content).document;
// Extract main content sections
const sections = doc.querySelectorAll("section");
for (const section of sections) {
const heading = section.querySelector("h2, h3, h4");
const headingText = heading?.textContent?.trim();
// Skip certain sections
if (
headingText &&
["References", "External links", "See also", "Notes", "Further reading"].includes(headingText)
) {
continue;
}
if (headingText) {
const level = heading?.tagName === "H2" ? "##" : "###";
md += `${level} ${headingText}\n\n`;
}
const paragraphs = section.querySelectorAll("p");
for (const p of paragraphs) {
const text = p.textContent?.trim();
if (text && text.length > 20) {
md += `${text}\n\n`;
}
}
}
}
if (!md) return null;
return buildResult(md, { url, method: "wikipedia", fetchedAt, notes: ["Fetched via Wikipedia API"] });
} catch {}
return null;
};