import { request, type Dispatcher } from "undici"; import { DEFAULT_MAX_BYTES, DEFAULT_RETRY, DEFAULT_TIMEOUT_SECONDS, DEFAULT_USER_AGENT, } from "../defaults.ts"; import { findFreshFetch, recordFetch } from "../storage/cache/fetch-cache.ts"; import type { ResolveStorageOptions } from "../storage/paths.ts"; import type { CommonRequestOptions } from "../types.ts"; import { normalizeHeaders } from "./download.ts"; import { HttpClientError, httpClientErrorFromUnknown } from "./errors.ts"; import { createDefaultDispatcher } from "./guarded-agent.ts"; import { PolitenessController, abortableSleep } from "./politeness.ts"; import { resolveEnvProxyForUrl } from "./proxy-config.ts"; import { createProxyDispatcher } from "./proxy-dispatcher.ts"; import { followRedirects } from "./redirects.ts"; import { fetchWithRequestPolicy } from "./request-policy.ts"; import { materializeFetchStreamResponse, type FetchUrlResult } from "./response.ts"; import { isRetryableStatus, isIdempotentMethod, parseRetryAfterMs, retryDelayMs, shouldStopRetrying, } from "./retry.ts"; import { type CacheEntry, RobotsCache } from "./robots.ts"; /** @file Http client module. */ import { getOrCreateSession, mergeSessionHeaders, updateSessionCookies } from "./session.ts"; import { withTimeout } from "./timeout.ts"; import { assertSafeFetchUrl, type SafeUrlResult, type UrlSafetyOptions } from "./url-safety.ts"; export { HttpClientError } from "./errors.ts"; export { createFetchUrlResult } from "./response.ts"; export type { FetchUrlResult } from "./response.ts"; export interface HttpClientOptions extends UrlSafetyOptions { dispatcher?: Dispatcher; userAgent?: string; globalConcurrency?: number; perHostConcurrency?: number; retryAttempts?: number; maxRedirects?: number; storage?: ResolveStorageOptions; } export interface FetchUrlOptions extends CommonRequestOptions { method?: "GET" | "HEAD" | "POST" | "PUT" | "PATCH" | "DELETE"; body?: string | Uint8Array; downloadBinary?: boolean; forceText?: boolean; maxRedirects?: number; sessionId?: string; cookies?: Record; } const sharedRobotsCache = new Map>(); /** Clear the shared robots.txt cache. Intended for test isolation. */ export function clearSharedRobotsCache(): void { sharedRobotsCache.clear(); } export class HttpClient { private readonly dispatcher: Dispatcher; private readonly userAgent: string; private readonly politeness: PolitenessController; private readonly robots: RobotsCache; constructor(private readonly options: HttpClientOptions = {}) { this.dispatcher = options.dispatcher ?? createDefaultDispatcher(options); this.userAgent = options.userAgent ?? DEFAULT_USER_AGENT; this.politeness = new PolitenessController({ globalConcurrency: options.globalConcurrency, perHostConcurrency: options.perHostConcurrency, }); this.robots = new RobotsCache({ userAgent: this.userAgent, fetchText: (url, signal) => this.fetchRobotsText(url, signal), cache: sharedRobotsCache, }); } private async safeFetchUrl(input: string | URL): Promise { return await assertSafeFetchUrl(input, { ...this.options, trimTrailingSlash: false, }); } async fetchUrl( input: string | URL, fetchOptions: FetchUrlOptions = {}, signal?: AbortSignal, ): Promise { const safe = await this.safeFetchUrl(input); try { const ttl = fetchOptions.cacheTtlSeconds; if (fetchOptions.method !== "HEAD" && ttl && ttl > 0 && fetchOptions.refresh !== true) { const hit = await findFreshFetch(safe.normalizedUrl, ttl, { ...this.options.storage, maxAgeSeconds: fetchOptions.maxAgeSeconds, }); if (hit) return hit; } const result = await this.fetchWithRetries(safe, fetchOptions, signal, true); if (fetchOptions.method !== "HEAD" && ttl && ttl > 0) { await recordFetch(result, { ...this.options.storage, ttlSeconds: ttl }); } return { ...result, cache: { cached: false, stale: false } }; } catch (error) { if (error instanceof HttpClientError) { throw error; } throw httpFetchError(error, safe.normalizedUrl, fetchOptions); } } private async fetchRobotsText( url: string, signal?: AbortSignal, ): Promise<{ status: number; text: string }> { const safe = await this.safeFetchUrl(url); const result = await this.fetchWithRetries( safe, { respectRobots: false, timeoutSeconds: 5, maxBytes: 256 * 1024, headers: { accept: "text/plain,*/*;q=0.1" }, forceText: true, }, signal, false, ); return { status: result.status, text: result.text ?? result.body?.toString("utf8") ?? "", }; } private async fetchWithRetries( initialSafe: SafeUrlResult, options: FetchUrlOptions, signal: AbortSignal | undefined, applyPolicy: boolean, ): Promise { const attempts = isIdempotentMethod(options.method) ? (options.retryAttempts ?? this.options.retryAttempts ?? DEFAULT_RETRY.attempts) : 1; let lastError: unknown; for (let attempt = 1; attempt <= attempts; attempt += 1) { try { const result = await followRedirects({ initialSafe, maxRedirects: options.maxRedirects ?? this.options.maxRedirects ?? 5, fetchRequest: (safe) => fetchWithRequestPolicy({ safe, respectRobots: options.respectRobots, applyPolicy, robots: this.robots, politeness: this.politeness, userAgent: this.userAgent, signal, fetch: () => this.fetchOnce(safe.normalizedUrl, options, signal), }), resolveSafeUrl: (url) => this.safeFetchUrl(url), }); this.politeness.noteResponse( new URL(result.finalUrl).host, result.status, parseRetryAfterMs(result.headers["retry-after"]), ); if (attempt < attempts && isRetryableStatus(result.status)) { await abortableSleep( retryDelayMs(attempt, result.headers["retry-after"], options), signal, ); continue; } return result; } catch (error) { lastError = error; if ( shouldStopRetrying( error, signal, attempt, attempts, (value): value is HttpClientError => value instanceof HttpClientError, ) ) { throw httpFetchError(error, initialSafe.normalizedUrl, options); } await abortableSleep(retryDelayMs(attempt, undefined, options), signal); } } throw httpFetchError(lastError, initialSafe.normalizedUrl, options); } private async fetchOnce( url: string, options: FetchUrlOptions, parentSignal?: AbortSignal, ): Promise { const timeoutMs = (options.timeoutSeconds ?? DEFAULT_TIMEOUT_SECONDS) * 1_000; const maxBytes = options.maxBytes ?? DEFAULT_MAX_BYTES; const { signal, cleanup } = withTimeout(parentSignal, timeoutMs); try { // Session support: load cookies for this request const session = options.sessionId ? await getOrCreateSession(options.sessionId, this.options.storage) : undefined; const urlObj = new URL(url); const cookieHeader = options.cookies ? Object.entries(options.cookies) .map(([name, value]) => `${name}=${value}`) .join("; ") : ""; const mergedHeaders = mergeSessionHeaders( session, urlObj.hostname, urlObj.pathname, urlObj.protocol === "https:" ? "https" : "http", options.headers, ); if (cookieHeader) { mergedHeaders["cookie"] = mergedHeaders["cookie"] ? `${mergedHeaders["cookie"]}; ${cookieHeader}` : cookieHeader; } // Normalize all header keys to lowercase to prevent HTTP/2 // ERR_HTTP2_HEADER_SINGLE_VALUE when user-supplied headers use different // casing (e.g. "User-Agent") than the default lowercased keys. The spread // only deduplicates identical keys in the JavaScript object, but "user-agent" // and "User-Agent" are distinct JS keys — HTTP/2 lowercases them both, // producing a duplicate header value. const lowerHeaders: Record = {}; for (const [key, value] of Object.entries(mergedHeaders)) { lowerHeaders[key.toLowerCase()] = value; } // Use explicit proxy, then env-derived proxy (only when no custom dispatcher was injected), then default dispatcher const hasExplicitProxy = options.proxy && options.proxy.length > 0; const effectiveProxy = hasExplicitProxy ? options.proxy : this.options.dispatcher ? undefined : resolveEnvProxyForUrl(url); const effectiveDispatcher = effectiveProxy ? createProxyDispatcher(effectiveProxy, this.options) : this.dispatcher; const response = await request(url, { method: options.method ?? "GET", body: options.body, dispatcher: effectiveDispatcher, headers: { "user-agent": this.userAgent, accept: "*/*", ...lowerHeaders, }, signal, }); const result = await materializeFetchStreamResponse({ url, status: response.statusCode, headers: normalizeHeaders(response.headers), body: response.body, maxBytes, options, discardBody: () => response.body.dump(), }); // Update session cookies from Set-Cookie headers if (session) { const setCookie = result.headers["set-cookie"]; if (setCookie) { updateSessionCookies( session, Array.isArray(setCookie) ? setCookie : [setCookie], urlObj.hostname, urlObj.pathname, ); } } return result; } finally { cleanup(); } } } export function createHttpClient(options?: HttpClientOptions): HttpClient { return new HttpClient(options); } function httpFetchError(error: unknown, url: string, options: FetchUrlOptions): HttpClientError { return httpClientErrorFromUnknown(error, url, options, { code: "HTTP_FETCH_FAILED", phase: "fetch", message: "HTTP fetch failed", }); }