import { describe, it, expect } from "vitest"; import { cleanHtml } from "../cleaner.js"; import { htmlToMarkdown, htmlToText } from "../converter.js"; import { extractContent, extractLinks, extractDescription } from "../extractor.js"; const SAMPLE_HTML = ` Test Page

Hello World

This is the main content of the page.

It has a link and some text.

NameValue
FooBar
Hidden content
Buy stuff now!
`; describe("cleanHtml", () => { it("removes scripts", () => { const result = cleanHtml(SAMPLE_HTML, "https://example.com"); expect(result).not.toContain("