/** * Parse HTML string into a queryable Document object */ export declare function parseHTML(html: string): Document; /** * Fetch and parse HTML from a URL */ export declare function fetchHTML(url: string, options?: FetchHTMLOptions): Promise; /** * Extract text content from HTML, stripping all tags */ export declare function extractText(html: string): string; /** * Extract all links from HTML */ export declare function extractLinks(html: string, baseUrl?: string): string[]; /** * Extract meta tags from HTML */ export declare function extractMeta(html: string): Record; /** * Helper to wait for a condition (useful for client-side rendered content) * Note: This is a simple polling implementation since we can't execute JavaScript */ export declare function waitFor(condition: () => boolean | Promise, options?: { timeout?: number, interval?: number }): Promise; /** * Batch fetch multiple URLs in parallel */ export declare function fetchMultiple(urls: string[], options?: FetchHTMLOptions): Promise>; /** * Extract structured data from common formats */ export declare function extractStructuredData(html: string): { jsonLd: any[] openGraph: Record twitter: Record microdata: any[] }; /** * Lightweight web scraper using only Bun native APIs * No external dependencies required * * @example * ```ts * import { fetchHTML, parseHTML } from './web-scraper' * * // Simple usage * const doc = await fetchHTML('https://example.com') * const title = doc.querySelector('title')?.textContent * * // Advanced usage with custom options * const html = await fetch('https://example.com').then(r => r.text()) * const doc = parseHTML(html) * const links = doc.querySelectorAll('a[href]') * links.forEach(link => console.log(link.getAttribute('href'))) * ``` */ export declare interface HTMLElement { tagName: string attributes: Record textContent: string innerHTML: string children: HTMLElement[] parent: HTMLElement | null querySelector: (selector: string) => HTMLElement | null querySelectorAll: (selector: string) => HTMLElement[] getAttribute: (name: string) => string | null hasAttribute: (name: string) => boolean getElementById: (id: string) => HTMLElement | null getElementsByClassName: (className: string) => HTMLElement[] getElementsByTagName: (tagName: string) => HTMLElement[] } export declare interface Document extends HTMLElement { querySelector: (selector: string) => HTMLElement | null querySelectorAll: (selector: string) => HTMLElement[] } export declare interface FetchHTMLOptions { timeout?: number headers?: Record userAgent?: string redirect?: 'follow' | 'manual' | 'error' signal?: AbortSignal }