/*! Copyright (c) 2025, XAPP AI */ import type { Page } from "puppeteer-core"; /** * Document type enumeration */ export type DocumentType = "PDF" | "MS_WORD" | "MS_EXCEL" | "PPT"; /** * Interface representing a document link detected on a page */ export interface DocumentLink { /** * The absolute URL to the document */ url: string; /** * The type of document */ type: DocumentType; /** * Optional title/text of the link */ title?: string; } /** * All supported document extensions */ export declare const DOCUMENT_EXTENSIONS: string[]; /** * Detects document links on a page * * This function identifies document links by checking file extensions in both the pathname * and query parameters of URLs. It detects common document formats including PDF, Word, * Excel, and PowerPoint files. * * **Limitations**: * - Only detects documents with recognizable file extensions * - Does not detect documents served via content-type headers without extensions * - Does not make HEAD requests to verify content types * * @param page - The Puppeteer page to scan for document links * @param allowedTypes - Optional array of document types to filter by (e.g., ['PDF', 'MS_WORD']) * @returns Promise resolving to an array of detected document links * * @example * ```typescript * const documentLinks = await detectDocumentLinks(page); * // Returns all document links * * const pdfLinks = await detectDocumentLinks(page, ['PDF']); * // Returns only PDF links * ``` */ export declare function detectDocumentLinks(page: Page, allowedTypes?: DocumentType[]): Promise;