import { describe, it, expect, beforeEach, afterEach, vi } from "vitest"; import { extractContent, extractHeadingTitle, fetchAllContent } from "./extract.js"; import type { ExtractedContent } from "./storage.js"; describe("extract", () => { const mockFetch = vi.fn(); beforeEach(() => { vi.stubGlobal("fetch", mockFetch); mockFetch.mockReset(); }); afterEach(() => { vi.unstubAllGlobals(); }); describe("extractContent", () => { it("returns error for invalid URL", async () => { const result = await extractContent("not-a-url"); expect(result.error).toBe("Invalid URL"); expect(result.url).toBe("not-a-url"); expect(mockFetch).not.toHaveBeenCalled(); }); it("returns error for aborted request", async () => { const controller = new AbortController(); controller.abort(); const result = await extractContent("https://example.com", controller.signal); expect(result.error).toBe("Aborted"); expect(result.url).toBe("https://example.com"); }); it("extracts readable HTML content", async () => { const html = ` Test Article

Test Article Title

${"Lorem ipsum dolor sit amet. ".repeat(50)}

${"Consectetur adipiscing elit. ".repeat(50)}

`; mockFetch.mockResolvedValueOnce({ ok: true, status: 200, headers: new Headers({ "content-type": "text/html; charset=utf-8", "content-length": String(html.length), }), text: async () => html, }); const result = await extractContent("https://example.com/article"); expect(result.url).toBe("https://example.com/article"); expect(result.title).toBeTruthy(); expect(result.content.length).toBeGreaterThan(100); expect(result.error).toBeNull(); }); it("returns non-HTML content directly", async () => { const jsonContent = JSON.stringify({ key: "value", data: [1, 2, 3] }); mockFetch.mockResolvedValueOnce({ ok: true, status: 200, headers: new Headers({ "content-type": "application/json", "content-length": String(jsonContent.length), }), text: async () => jsonContent, }); const result = await extractContent("https://api.example.com/data.json"); expect(result.content).toBe(jsonContent); expect(result.error).toBeNull(); }); it("returns HTTP error with status code", async () => { mockFetch.mockResolvedValueOnce({ ok: false, status: 404, statusText: "Not Found", headers: new Headers({}), text: async () => "Not Found", }); // Also mock the Jina fallback mockFetch.mockResolvedValueOnce({ ok: false, status: 500, headers: new Headers({}), text: async () => "", }); const result = await extractContent("https://example.com/missing"); expect(result.error).toBeTruthy(); expect(result.error).toContain("404"); }); it("rejects unsupported content types", async () => { mockFetch.mockResolvedValueOnce({ ok: true, status: 200, headers: new Headers({ "content-type": "image/png", "content-length": "1024", }), text: async () => "", }); const result = await extractContent("https://example.com/image.png"); expect(result.error).toBe("Unsupported content type"); // Should NOT try Jina fallback for non-recoverable errors expect(mockFetch).toHaveBeenCalledTimes(1); }); }); describe("fetchAllContent", () => { it("fetches multiple URLs concurrently", async () => { const makeHtml = (title: string) => { const body = "Content here. ".repeat(100); return `${title}

${title}

${body}

`; }; const html1 = makeHtml("Page One"); const html2 = makeHtml("Page Two"); mockFetch .mockResolvedValueOnce({ ok: true, status: 200, headers: new Headers({ "content-type": "text/html", "content-length": String(html1.length) }), text: async () => html1, }) .mockResolvedValueOnce({ ok: true, status: 200, headers: new Headers({ "content-type": "text/html", "content-length": String(html2.length) }), text: async () => html2, }); const results = await fetchAllContent([ "https://example.com/one", "https://example.com/two", ]); expect(results).toHaveLength(2); expect(results[0].url).toBe("https://example.com/one"); expect(results[1].url).toBe("https://example.com/two"); expect(mockFetch).toHaveBeenCalledTimes(2); }); }); describe("extractHeadingTitle", () => { it("extracts h1 title", () => { const text = "# My Great Article\n\nSome content here."; expect(extractHeadingTitle(text)).toBe("My Great Article"); }); it("extracts h2 title", () => { const text = "## Section Heading\n\nMore content."; expect(extractHeadingTitle(text)).toBe("Section Heading"); }); it("returns null for no heading", () => { const text = "Just some plain text without any headings."; expect(extractHeadingTitle(text)).toBeNull(); }); }); });