import { APIResource } from "../../core/resource.js"; import * as BrowserAPI from "./browser.js"; import { Browser, BrowserCreateParams, BrowserCreateResponse, BrowserDeleteResponse } from "./browser.js"; import { APIPromise } from "../../core/api-promise.js"; import { RequestOptions } from "../../internal/request-options.js"; /** * Search and scrape webpages with finance-oriented defaults. */ export declare class Web extends APIResource { browser: BrowserAPI.Browser; /** * Scrape a webpage */ scrape(body: WebScrapeParams, options?: RequestOptions): APIPromise; /** * Wraps Firecrawl search with finance-focused defaults. The raw `query` is passed * through, `include_domains` and `exclude_domains` are appended as `site:` and * `-site:` operators, multiple included domains are grouped with `OR`, and * `date_range` is sent separately as Firecrawl's `tbs` parameter for `web` results * only. When `search_type="all"`, this endpoint requests both `web` and `news` * results and flattens them into one combined list. */ search(body: WebSearchParams, options?: RequestOptions): APIPromise; } export interface WebScrapeResponse { /** * Favicon URL, if available. */ favicon_url: string | null; /** * Open Graph image URL, if available. */ image_url: string | null; /** * Extracted markdown content for the page. Empty when markdown was not requested * or not returned. */ markdown: string | null; /** * Parsed publication date, if available. This is primarily populated when the * provider returns an absolute date. */ published_date: string | null; /** * Final resolved URL reported by the scraper, or the requested URL when no * different final URL is available. */ resolved_url: string; /** * Normalized scrape status. Returns `success` for 2xx responses or when the * provider omits a status code; otherwise returns the provider error string or * `error`. */ status: string; /** * Page title, if available. */ title: string | null; /** * Requested URL. */ url: string; /** * Extracted rendered HTML content for the page, if requested. */ html?: string | null; /** * Extracted raw HTML content for the page, if requested. */ raw_html?: string | null; /** * Screenshot payload returned by the scraper, if requested. This is typically * either a signed image URL or an inline data URL such as * `data:image/png;base64,...`. */ screenshot?: string | null; } export interface WebSearchResponse { /** * Search query sent to the upstream provider after source filters were applied. */ effective_query: string; /** * Effective domain denylist applied to the search, including the built-in finance * denylist plus any caller-provided exclusions. */ excluded_domains: Array; /** * Normalized combined result list. When `search_type="all"`, `web` results appear * first, followed by `news` results. */ results: Array; } export declare namespace WebSearchResponse { interface Result { /** * Result author, if available. */ author: string | null; /** * Favicon URL for the source website, if available. */ favicon_url: string | null; /** * Representative image URL, if available. */ image_url: string | null; /** * Parsed publication date, if available. This is primarily populated when the * provider returns an absolute date. */ published_date: string | null; /** * Snippet or scraped markdown content associated with the result. */ text: string | null; /** * Result title, if available. */ title: string | null; /** * Canonical result URL. */ url: string; /** * Original publication date string when the provider returns a relative or * otherwise non-ISO value such as `today`. */ published_date_raw?: string | null; } } export interface WebScrapeParams { /** * HTTP(S) URL of the webpage to scrape. */ url: string; /** * Whether to block ads and cookie consent banners while scraping. */ block_ads?: boolean; /** * Requested scrape output formats. Defaults to `['markdown']`. Mix `markdown`, * `html`, `rawHtml`, and screenshot configurations to retrieve multiple * representations in one scrape. */ formats?: Array<'markdown' | 'html' | 'rawHtml' | WebScrapeParams.WebScrapeScreenshotFormat>; /** * Optional HTTP headers to send when scraping the page, such as `User-Agent`, * `Authorization`, or cookies. */ headers?: { [key: string]: string; } | null; /** * Maximum cache age in milliseconds. If the cached scrape result is older than * this, the page will be re-scraped. Defaults to 24 hours. */ max_age?: number; /** * Only return the main content of the page excluding headers, navs, footers, etc. */ only_main_content?: boolean; } export declare namespace WebScrapeParams { interface WebScrapeScreenshotFormat { /** * Discriminator for screenshot scrape output. */ type: 'screenshot'; /** * Whether to capture the full page instead of only the viewport. */ full_page?: boolean; /** * Viewport dimensions in pixels for the screenshot render. */ viewport?: WebScrapeScreenshotFormat.Viewport; } namespace WebScrapeScreenshotFormat { /** * Viewport dimensions in pixels for the screenshot render. */ interface Viewport { /** * Viewport height in pixels for the screenshot render. */ height: number; /** * Viewport width in pixels for the screenshot render. */ width: number; } } } export interface WebSearchParams { /** * Base search query text. You can include inline search operators here, such as * exact phrases (`"..."`), excluded terms (`-term`), file extension filters * (`filetype:pdf`), URL matching (`inurl:` and `allinurl:`), title matching * (`intitle:` and `allintitle:`), and related-site lookup (`related:`). Prefer * `include_domains` and `exclude_domains` over manually adding `site:` operators. * Use `date_range` for time filtering instead of embedding time operators in the * query. */ query: string; /** * Search source to query. Use `all` to request both `web` and `news` results in * one call, or `news` for news-only results. There is currently no `web`-only * option. This field is required and has no server-side default. */ search_type: 'all' | 'news'; /** * An inclusive date range for filtering web search results. */ date_range?: WebSearchParams.DateRange | null; /** * Additional domains to exclude from results. Each domain is converted into a * `-site:` operator and merged with the default denylist. */ exclude_domains?: Array; /** * Optional domains to require in results. A single domain is converted into a * `site:` operator; multiple domains are grouped into an `OR` expression so * matches from any included domain are allowed. */ include_domains?: Array; /** * Maximum number of results requested per upstream source. With * `search_type="all"`, up to this many `web` results and this many `news` results * may be returned. */ limit?: number; } export declare namespace WebSearchParams { /** * An inclusive date range for filtering web search results. */ interface DateRange { /** * Inclusive end date for filtering web search results. */ end_date: string; /** * Inclusive start date for filtering web search results. */ start_date: string; } } export declare namespace Web { export { type WebScrapeResponse as WebScrapeResponse, type WebSearchResponse as WebSearchResponse, type WebScrapeParams as WebScrapeParams, type WebSearchParams as WebSearchParams, }; export { Browser as Browser, type BrowserCreateResponse as BrowserCreateResponse, type BrowserDeleteResponse as BrowserDeleteResponse, type BrowserCreateParams as BrowserCreateParams, }; } //# sourceMappingURL=web.d.ts.map