/**
* Web Search Skill - Searches the web using the Google Custom Search API.
*
* Tier 3 built-in skill: requires `GOOGLE_SEARCH_API_KEY` and
* `GOOGLE_SEARCH_ENGINE_ID` (legacy alias: `GOOGLE_SEARCH_CX`) environment
* variables, or the equivalent config params. Uses the Google Custom Search
* JSON API to perform web searches and return formatted results.
*/
import { SkillBase } from '../SkillBase.js';
import type { SkillToolDefinition, SkillPromptSection, SkillConfig, ParameterSchemaEntry } from '../SkillBase.js';
/**
* Run the HTML → plain-text extraction pipeline that mirrors Python's
* `GoogleSearchScraper.extract_html_content` (skill.py:204-282).
*
* Pipeline:
* 1. Pre-process `` sections to plain text (Python html.parser
* keeps CDATA content; cheerio/htmlparser2 drops it).
* 2. Walk {@link CONTENT_CANDIDATES} in order, picking the first match.
* Fall back to `
` or the raw document.
* 3. Clone the picked subtree so tag/pattern removal only affects it —
* analogous to Python's `content_soup = BeautifulSoup(str(main_content))`.
* The selector-first-then-filter order is load-bearing: if the real
* content is wrapped in a sidebar-pattern div, Python still finds it
* because the selector runs before removal.
* 4. Remove {@link UNWANTED_TAGS} and every element matching any of
* {@link UNWANTED_PATTERNS} on either `class` or `id` (case-insensitive).
* 5. Collapse whitespace and trim.
*
* Exported so `tests/skills/web-search-parity.test.ts` can verify byte-
* identical behavior against Python BeautifulSoup fixtures; not re-exported
* from the skills barrel, so this stays internal to the skill module.
*
* @internal
*/
export declare function extractTextFromHtml(html: string): string;
/**
* Searches the web using the Google Custom Search JSON API.
*
* Tier 3 built-in skill. Credentials can be supplied via the `api_key` and
* `search_engine_id` params or `GOOGLE_SEARCH_API_KEY` /
* `GOOGLE_SEARCH_ENGINE_ID` (legacy: `GOOGLE_SEARCH_CX`) environment variables.
*
* The handler mirrors Python's `search_and_scrape_best` pipeline: fetches
* `oversample_factor × num_results` candidates from Google, scrapes each
* result page (SSRF-guarded, cheerio-based text extraction), scores for
* quality (length + query relevance + boilerplate penalty), deduplicates by
* domain, and returns the top `num_results` above `min_quality_score` with
* full page content. If every scrape fails or falls below the threshold the
* handler falls back to raw API snippets so the agent still has something
* to say.
*
* Supported config: `tool_name`, `num_results`, `no_results_message`,
* `safe_search`, `delay`, `max_content_length`, `oversample_factor`,
* `min_quality_score`.
*
* @example
* ```ts
* agent.addSkill('web_search', {
* api_key: process.env.GOOGLE_SEARCH_API_KEY,
* search_engine_id: process.env.GOOGLE_CSE_ID,
* num_results: 3,
* });
* ```
*/
export declare class WebSearchSkill extends SkillBase {
static SKILL_NAME: string;
static SKILL_DESCRIPTION: string;
static SKILL_VERSION: string;
static REQUIRED_PACKAGES: readonly string[];
static REQUIRED_ENV_VARS: readonly string[];
static SUPPORTS_MULTIPLE_INSTANCES: boolean;
static getParameterSchema(): Record;
/**
* @returns Manifest declaring Google Search credentials as required env vars.
* Reports `GOOGLE_SEARCH_ENGINE_ID` as the canonical name; `GOOGLE_SEARCH_CX`
* is still accepted as a legacy fallback at runtime.
*/
/**
* Validate required credentials before the skill becomes active.
*
* Mirrors Python's `setup()` (skill.py:559-600) which checks `api_key` and
* `search_engine_id` and returns `False` (logging an error) if either is
* absent. In the TS SDK credentials may also arrive via environment variables
* (`GOOGLE_SEARCH_API_KEY` / `GOOGLE_SEARCH_ENGINE_ID` or the legacy alias
* `GOOGLE_SEARCH_CX`), so both config params and env vars are checked.
* @returns `true` if all required credentials are present, `false` otherwise.
*/
setup(): Promise;
/**
* Instance key for the SkillManager. Includes the configured
* `search_engine_id` (or `"default"`) and `tool_name` (or `"web_search"`)
* to match Python's `"{SKILL_NAME}_{search_engine_id}_{tool_name}"` scheme.
*/
getInstanceKey(): string;
/** Global data injected into the agent's SWML context (mirrors Python). */
getGlobalData(): Record;
/** Resolve the tool name (defaults to `web_search`, matches Python default). */
private getToolName;
/**
* @returns A single tool (named via `tool_name`) that performs a Google
* Custom Search and returns formatted results.
*/
getTools(): SkillToolDefinition[];
/** Apply the `{query}` template to the no-results message. */
private static _formatNoResultsMessage;
/**
* Check whether the URL points at Reddit. Python parity: `is_reddit_url`
* (skill.py:66).
*/
private static _isRedditUrl;
/**
* Fetch a Reddit URL via the `.json` endpoint and build a structured summary
* of the post + top comments.
*
* Python parity: `extract_reddit_content` (skill.py:71-190). Matches the
* post-title/author/score/comments assembly and the top-20 → valid → top-5
* comment pipeline. Returns just the compiled text — Python's
* `search_and_scrape_best` unconditionally overwrites Reddit's
* engagement-score metrics with the 6-factor `_calculate_content_quality`
* (skill.py:447-448), so we skip computing the dead engagement score here
* and let the handler score the text via `_qualityMetrics`.
*
* Falls through to HTML extraction on JSON fetch failure or malformed
* payload, matching Python's `except Exception: fall back` behavior.
*/
private _extractRedditContent;
/**
* Fetch a URL and extract clean text content, then score it with the
* 6-factor `_qualityMetrics`. Reddit URLs are routed to the JSON extractor
* first; the compiled Reddit text is scored with the same 6-factor formula
* to match Python's `search_and_scrape_best` overwrite behavior
* (skill.py:447-448).
*
* Python parity: `extract_text_from_url` (skill.py:192-202). Returns `null`
* on any failure (network, non-200, parse error, or SSRF rejection).
*/
private _scrapeUrl;
/**
* Six-factor content quality metrics (score + sub-metrics used in the
* per-result output). Combines content length, word diversity,
* boilerplate penalty, sentence structure, domain reputation, and query
* relevance.
*
* Python parity: `_calculate_content_quality` (skill.py:284-414). Preserves
* the weights (0.25/0.10/0.10/0.15/0.15/0.25), the 26-phrase boilerplate
* list, the quality/low-quality domain lists, and the phrase-match bonus
* on relevance. Also returns the same metric fields Python exposes
* (`text_length`, `sentence_count`, `query_relevance`, `query_words_found`,
* `domain`) so the handler can render the full Python output format.
*/
private static _qualityMetrics;
/** @returns Prompt section describing web search capabilities and usage guidance. */
protected _getPromptSections(): SkillPromptSection[];
}
/**
* Factory function for creating WebSearchSkill instances.
* @param config - Optional skill configuration.
* @returns A new WebSearchSkill instance.
*/
export declare function createSkill(config?: SkillConfig): WebSearchSkill;