/** * 星环OPC中心 — opc_search 联网搜索工具 * * 通过 DuckDuckGo Lite 提供免费联网搜索能力(无需 API Key), * 让 AI 员工可以搜索行业报告、政策法规、竞品信息等。 * * 搜索策略:DuckDuckGo Lite HTML → Bing RSS(备选) */ import { Type, type Static } from "@sinclair/typebox"; import type { OpenClawPluginApi } from "openclaw/plugin-sdk"; import { json, toolError } from "../utils/tool-helper.js"; const SearchSchema = Type.Object({ query: Type.String({ description: "搜索关键词,支持中英文" }), site: Type.Optional(Type.String({ description: "限定搜索特定网站(如 zhihu.com、36kr.com)" })), count: Type.Optional(Type.Number({ description: "返回结果数量,默认 10,最多 20" })), }); type SearchParams = Static; type SearchItem = { title: string; url: string; snippet: string; source: string }; /** 解码 HTML 实体 + 去标签 */ function decodeHtml(html: string): string { return html .replace(/&/g, "&") .replace(/</g, "<") .replace(/>/g, ">") .replace(/"/g, '"') .replace(/'/g, "'") .replace(/'/g, "'") .replace(/|<\/b>/g, "") .replace(/<[^>]+>/g, "") .trim(); } /** 从 DuckDuckGo uddg 跳转链接提取真实 URL */ function extractRealUrl(href: string): string { const m = href.match(/uddg=([^&]+)/); if (m) { try { return decodeURIComponent(m[1]); } catch { /* fall through */ } } // 直接链接 if (href.startsWith("http")) return href; if (href.startsWith("//")) return "https:" + href; return href; } /** * 解析 DuckDuckGo Lite HTML * 结构: 标题 → → 源域名 摘要 */ function parseDdgLite(html: string, maxResults: number): SearchItem[] { const results: SearchItem[] = []; // DuckDuckGo Lite HTML 结构: // 标题 // 域名 摘要文本 // // 注意: href 在 class 之前,用通用匹配 // 匹配所有含 result-link 的 标签(href 可能在 class 之前或之后) const linkRegex = /]*class=['"]result-link['"][^>]*>([\s\S]*?)<\/a>/g; const hrefInTag = /href=['"]([^'"]+)['"]/; const snippetRegex = /([\s\S]*?)(?:<\/td>)/g; const links: { href: string; title: string }[] = []; const snippets: string[] = []; let m: RegExpExecArray | null; // 第一遍:提取所有 result-link while ((m = linkRegex.exec(html)) !== null) { const fullTag = html.slice(m.index, m.index + m[0].indexOf(">") + 1); const hrefMatch = hrefInTag.exec(fullTag); const href = hrefMatch ? hrefMatch[1].replace(/&/g, "&") : ""; const title = decodeHtml(m[1]); // 跳过广告和 "more info" if (href.includes("ad_domain") || href.includes("ad_provider") || title === "more info" || !title) continue; links.push({ href, title }); } // 第二遍:提取所有 snippet while ((m = snippetRegex.exec(html)) !== null) { const raw = m[1]; // snippet 内部可能包含 域名 + 正文 // 如果包含广告标记则跳过 if (raw.includes("ad_domain") || raw.includes("ad_provider")) continue; snippets.push(decodeHtml(raw)); } // 组装结果 for (let i = 0; i < links.length && results.length < maxResults; i++) { const url = extractRealUrl(links[i].href); let source = ""; try { source = new URL(url).hostname; } catch { /* ignore */ } // snippet 和 link 不一定 1:1 对应(广告 snippet 已被过滤) const snippet = i < snippets.length ? snippets[i] : ""; // 去掉 snippet 开头的域名部分 const cleanSnippet = snippet.startsWith(source) ? snippet.slice(source.length).trim() : snippet; results.push({ title: links[i].title, url, snippet: cleanSnippet, source, }); } return results; } /** 从 Bing RSS XML 中解析搜索结果 (备选方案) */ function parseBingRss(xml: string, maxResults: number): SearchItem[] { const results: SearchItem[] = []; const itemRegex = /([\s\S]*?)<\/item>/g; let m: RegExpExecArray | null; while ((m = itemRegex.exec(xml)) !== null && results.length < maxResults) { const item = m[1]; const title = item.match(/([\s\S]*?)<\/title>/)?.[1] ?? ""; const url = item.match(/<link>([\s\S]*?)<\/link>/)?.[1] ?? ""; const snippet = item.match(/<description>([\s\S]*?)<\/description>/)?.[1] ?? ""; if (title && url) { let source = ""; try { source = new URL(decodeHtml(url.trim())).hostname; } catch { /* ignore */ } results.push({ title: decodeHtml(title.trim()), url: decodeHtml(url.trim()), snippet: decodeHtml(snippet.trim()), source, }); } } return results; } export function registerSearchTool(api: OpenClawPluginApi): void { api.registerTool( { name: "opc_search", label: "OPC 联网搜索", description: "联网搜索工具,可搜索任何互联网信息。" + "用途:搜索行业报告、政策法规、竞品分析、市场数据、技术方案、最新资讯等。" + "支持站内搜索(site参数限定特定网站)。直接传入搜索关键词即可。", parameters: SearchSchema, async execute(_toolCallId, params) { const p = params as SearchParams; const count = Math.min(p.count ?? 10, 20); let q = p.query; if (p.site) q += ` site:${p.site}`; api.logger.info(`opc_search: 搜索「${q}」...`); // 策略 1: DuckDuckGo Lite try { const ddgUrl = `https://lite.duckduckgo.com/lite/?q=${encodeURIComponent(q)}`; const ddgResp = await fetch(ddgUrl, { headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "Accept-Language": "zh-CN,zh;q=0.9", }, signal: AbortSignal.timeout(12000), }); if (ddgResp.ok) { const html = await ddgResp.text(); const results = parseDdgLite(html, count); if (results.length > 0) { api.logger.info(`opc_search: DuckDuckGo 返回 ${results.length} 条结果`); return json({ query: p.query, engine: "DuckDuckGo", result_count: results.length, results: results.map((r, i) => ({ rank: i + 1, ...r })), }); } } api.logger.info("opc_search: DuckDuckGo 无结果,尝试 Bing RSS..."); } catch (err) { api.logger.info(`opc_search: DuckDuckGo 失败 (${err instanceof Error ? err.message : String(err)}),尝试 Bing RSS...`); } // 策略 2: Bing RSS (备选) try { const bingUrl = `https://cn.bing.com/search?format=rss&q=${encodeURIComponent(q)}&count=${count}&mkt=zh-CN`; const bingResp = await fetch(bingUrl, { headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "Accept-Language": "zh-CN,zh;q=0.9", }, signal: AbortSignal.timeout(12000), }); if (bingResp.ok) { const xml = await bingResp.text(); const results = parseBingRss(xml, count); if (results.length > 0) { api.logger.info(`opc_search: Bing RSS 返回 ${results.length} 条结果`); return json({ query: p.query, engine: "Bing", result_count: results.length, results: results.map((r, i) => ({ rank: i + 1, ...r })), }); } } } catch (err) { api.logger.info(`opc_search: Bing RSS 也失败 (${err instanceof Error ? err.message : String(err)})`); } // 两个引擎都失败 api.logger.info("opc_search: 所有搜索引擎均无结果"); return json({ query: p.query, result_count: 0, results: [], message: "未找到相关结果,建议换关键词重试(如用更具体的中文描述)", }); }, }, { name: "opc_search" }, ); api.logger.info("opc: 已注册 opc_search 联网搜索工具(DuckDuckGo + Bing)"); }