import axios, { AxiosRequestConfig } from "axios"; import * as cheerio from "cheerio"; import TurndownService from "turndown"; const ALLOWED_CONTENT_TYPES = [ "text/html", "application/json", "application/xml", "application/javascript", "text/plain", ]; const DEFAULT_HEADERS = { Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", "Accept-Encoding": "gzip, deflate", "Accept-Language": "en-US,en;q=0.5", "Alt-Used": "LEAVE-THIS-KEY-SET-BY-TOOL", Connection: "keep-alive", Host: "LEAVE-THIS-KEY-SET-BY-TOOL", Referer: "https://www.google.com/", "Sec-Fetch-Dest": "document", "Sec-Fetch-Mode": "navigate", "Sec-Fetch-Site": "cross-site", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/111.0", }; export class WebUtilities { public static extractText(html: string, baseUrl: string, summarize: boolean): string { // Parse all elements including