/** * OpenAI Codex Web Search Provider * * Uses Codex's built-in web_search tool via the Responses API. * Requires OAuth credentials stored in agent.db for provider "openai-codex". * Returns synthesized answers with web search sources. */ import * as os from "node:os"; import { getBundledModels } from "@oh-my-pi/pi-ai"; import { decodeJwt } from "@oh-my-pi/pi-ai/utils/oauth/openai-codex"; import { $env, getAgentDbPath, readSseJson } from "@oh-my-pi/pi-utils"; import packageJson from "../../../../package.json" with { type: "json" }; import { AgentStorage } from "../../../session/agent-storage"; import type { SearchResponse, SearchSource } from "../../../web/search/types"; import { SearchProviderError } from "../../../web/search/types"; import type { SearchParams } from "./base"; import { SearchProvider } from "./base"; import { classifyProviderHttpError, withHardTimeout } from "./utils"; const CODEX_BASE_URL = "https://chatgpt.com/backend-api"; const CODEX_RESPONSES_PATH = "/codex/responses"; const FALLBACK_MODEL = "gpt-5-codex-mini"; const DEFAULT_MODEL_PREFERENCES = [ "gpt-5-codex-mini", "gpt-5.4", "gpt-5.3-codex", "gpt-5.2-codex", "gpt-5.1-codex", "gpt-5-codex", ]; const JWT_CLAIM_PATH = "https://api.openai.com/auth"; const DEFAULT_INSTRUCTIONS = "You are a helpful assistant with web search capabilities. Search the web to answer the user's question accurately and cite your sources."; function getModel(): string { const configuredModel = $env.PI_CODEX_WEB_SEARCH_MODEL?.trim(); if (configuredModel) return configuredModel; const bundledModels = getBundledModels("openai-codex"); const bundledIds = new Set(bundledModels.map(model => model.id)); const preferred = DEFAULT_MODEL_PREFERENCES.find(modelId => bundledIds.has(modelId)); if (preferred) return preferred; const nonMini = bundledModels.find(model => !model.id.includes("mini") && !model.id.includes("spark")); return nonMini?.id ?? bundledModels[0]?.id ?? FALLBACK_MODEL; } export interface CodexSearchParams { signal?: AbortSignal; query: string; system_prompt?: string; num_results?: number; /** Search context size: controls how much web content to include */ search_context_size?: "low" | "medium" | "high"; } /** OAuth credential stored in agent.db */ interface CodexOAuthCredential { type: "oauth"; access: string; refresh?: string; expires: number; accountId?: string; } /** Codex API response structure */ interface CodexResponseItem { type: string; id?: string; role?: string; name?: string; call_id?: string; status?: string; arguments?: string; content?: CodexContentPart[]; summary?: Array<{ type: string; text: string }>; } interface CodexContentPart { type: string; text?: string; annotations?: CodexAnnotation[]; } interface CodexAnnotation { type: string; url?: string; title?: string; start_index?: number; end_index?: number; } interface CodexUsage { input_tokens?: number; output_tokens?: number; total_tokens?: number; input_tokens_details?: { cached_tokens?: number }; } interface CodexResponse { id?: string; model?: string; status?: string; usage?: CodexUsage; } function isImagePlaceholderAnswer(text: string): boolean { return text.trim().toLowerCase() === "(see attached image)"; } function addSource(sources: SearchSource[], source: SearchSource): void { if (!sources.some(existing => existing.url === source.url)) { sources.push(source); } } function countCharacter(text: string, target: string): number { let count = 0; for (const char of text) { if (char === target) { count += 1; } } return count; } /** * Strips prose punctuation and unmatched closing delimiters from extracted URLs. * Codex often returns links in markdown or sentence text without structured annotations. */ function normalizeExtractedUrl(candidate: string): string | null { let url = candidate.trim(); while (url.length > 0) { const lastCharacter = url.at(-1); if (!lastCharacter) break; if (/[.,!?;:'"]/u.test(lastCharacter)) { url = url.slice(0, -1); continue; } if (lastCharacter === ")" && countCharacter(url, ")") > countCharacter(url, "(")) { url = url.slice(0, -1); continue; } if (lastCharacter === "]" && countCharacter(url, "]") > countCharacter(url, "[")) { url = url.slice(0, -1); continue; } if (lastCharacter === "}" && countCharacter(url, "}") > countCharacter(url, "{")) { url = url.slice(0, -1); continue; } break; } if (!/^https?:\/\//.test(url)) { return null; } try { return new URL(url).toString(); } catch { return null; } } function findMarkdownLinkUrlEnd(text: string, openParenIndex: number): number | null { let depth = 0; for (let index = openParenIndex; index < text.length; index += 1) { const character = text[index]; if (!character || character === "\n") { return null; } if (character === "(") { depth += 1; continue; } if (character !== ")") { continue; } depth -= 1; if (depth === 0) { return index; } if (depth < 0) { return null; } } return null; } /** * Extracts citation sources from markdown links and bare URLs in the answer text. * Used as a fallback when the Codex response omits `url_citation` annotations. */ function extractTextSources(text: string): SearchSource[] { const sources: SearchSource[] = []; for (let index = 0; index < text.length; index += 1) { if (text[index] !== "[") { continue; } const titleEnd = text.indexOf("]", index + 1); if (titleEnd === -1 || text[titleEnd + 1] !== "(") { continue; } const urlEnd = findMarkdownLinkUrlEnd(text, titleEnd + 1); if (urlEnd === null) { continue; } const title = text.slice(index + 1, titleEnd).trim(); const url = normalizeExtractedUrl(text.slice(titleEnd + 2, urlEnd)); if (url) { addSource(sources, { title: title || url, url }); } index = urlEnd; } for (const match of text.matchAll(/https?:\/\/\S+/g)) { const url = normalizeExtractedUrl(match[0] ?? ""); if (!url) continue; addSource(sources, { title: url, url }); } return sources; } /** * Extracts account ID from a Codex access token. * @param accessToken - JWT access token * @returns Account ID string, or null if not found */ function getAccountId(accessToken: string): string | null { const payload = decodeJwt(accessToken); const auth = payload?.[JWT_CLAIM_PATH] as { chatgpt_account_id?: string } | undefined; const accountId = auth?.chatgpt_account_id; return typeof accountId === "string" && accountId.length > 0 ? accountId : null; } /** * Finds valid Codex OAuth credentials from agent.db. * Checks agent credentials and returns the first non-expired credential. * @returns OAuth credential with access token and account ID, or null if none found */ async function findCodexAuth(): Promise<{ accessToken: string; accountId: string } | null> { const expiryBuffer = 5 * 60 * 1000; // 5 minutes const now = Date.now(); try { const storage = await AgentStorage.open(getAgentDbPath()); const records = storage.listAuthCredentials("openai-codex"); for (const record of records) { const credential = record.credential; if (credential.type !== "oauth") continue; const oauthCred = credential as CodexOAuthCredential; if (!oauthCred.access) continue; if (oauthCred.expires <= now + expiryBuffer) continue; const accountId = oauthCred.accountId ?? getAccountId(oauthCred.access); if (!accountId) continue; return { accessToken: oauthCred.access, accountId }; } } catch { return null; } return null; } /** * Builds HTTP headers for Codex API requests. * @param accessToken - OAuth access token * @param accountId - ChatGPT account ID * @returns Headers object for fetch requests */ function buildCodexHeaders(accessToken: string, accountId: string): Record { return { Authorization: `Bearer ${accessToken}`, "chatgpt-account-id": accountId, "OpenAI-Beta": "responses=experimental", originator: "pi", "User-Agent": `pi/${packageJson.version} (${os.platform()} ${os.release()}; ${os.arch()})`, Accept: "text/event-stream", "Content-Type": "application/json", }; } /** * Calls the Codex Responses API with web search tool enabled. * Streams the response and collects all events. * @param auth - Authentication info (access token and account ID) * @param query - Search query from the user * @param options - Search options including system prompt and context size * @returns Parsed response with answer, sources, and usage * @throws {SearchProviderError} If the API request fails */ async function callCodexSearch( auth: { accessToken: string; accountId: string }, query: string, options: { signal?: AbortSignal; systemPrompt?: string; searchContextSize?: "low" | "medium" | "high" }, ): Promise<{ answer: string; sources: SearchSource[]; model: string; requestId: string; usage?: { inputTokens: number; outputTokens: number; totalTokens: number }; }> { const url = `${CODEX_BASE_URL}${CODEX_RESPONSES_PATH}`; const headers = buildCodexHeaders(auth.accessToken, auth.accountId); const requestedModel = getModel(); const body: Record = { model: requestedModel, stream: true, store: false, input: [ { type: "message", role: "user", content: [{ type: "input_text", text: query }], }, ], tools: [ { type: "web_search", search_context_size: options.searchContextSize ?? "high", }, ], tool_choice: { type: "web_search" }, instructions: options.systemPrompt ?? DEFAULT_INSTRUCTIONS, }; const response = await fetch(url, { method: "POST", headers, body: JSON.stringify(body), signal: withHardTimeout(options.signal), }); if (!response.ok) { const errorText = await response.text(); const classified = classifyProviderHttpError("codex", response.status, errorText); if (classified) throw classified; throw new SearchProviderError("codex", `Codex API error (${response.status}): ${errorText}`, response.status); } if (!response.body) { throw new SearchProviderError("codex", "Codex API returned no response body", 500); } // Parse SSE stream const answerParts: string[] = []; const streamedAnswerParts: string[] = []; const sources: SearchSource[] = []; let model = requestedModel; let requestId = ""; let usage: { inputTokens: number; outputTokens: number; totalTokens: number } | undefined; for await (const rawEvent of readSseJson>(response.body, options.signal)) { const eventType = typeof rawEvent.type === "string" ? rawEvent.type : ""; if (!eventType) continue; if (eventType === "response.output_text.delta") { const delta = typeof rawEvent.delta === "string" ? rawEvent.delta : ""; if (delta) { streamedAnswerParts.push(delta); } } else if (eventType === "response.output_item.done") { const item = rawEvent.item as CodexResponseItem | undefined; if (!item) continue; // Handle text message content and extract sources from annotations if (item.type === "message" && item.content) { for (const part of item.content) { if (part.type === "output_text" && part.text) { answerParts.push(part.text); // Extract sources from url_citation annotations if (part.annotations) { for (const annotation of part.annotations) { if (annotation.type === "url_citation" && annotation.url) { // Deduplicate by URL addSource(sources, { title: annotation.title ?? annotation.url, url: annotation.url }); } } } } } } // Handle reasoning summary as part of answer if (item.type === "reasoning" && item.summary) { for (const part of item.summary) { if (part.type === "summary_text" && part.text) { answerParts.push(part.text); } } } } else if (eventType === "response.completed" || eventType === "response.done") { const resp = (rawEvent as { response?: CodexResponse }).response; if (resp) { if (resp.model) model = resp.model; if (resp.id) requestId = resp.id; if (resp.usage) { const cachedTokens = resp.usage.input_tokens_details?.cached_tokens ?? 0; usage = { inputTokens: (resp.usage.input_tokens ?? 0) - cachedTokens, outputTokens: resp.usage.output_tokens ?? 0, totalTokens: resp.usage.total_tokens ?? 0, }; } } } else if (eventType === "error") { const code = (rawEvent as { code?: string }).code ?? ""; const message = (rawEvent as { message?: string }).message ?? "Unknown error"; throw new SearchProviderError("codex", `Codex error (${code}): ${message}`, 500); } else if (eventType === "response.failed") { const resp = (rawEvent as { response?: { error?: { message?: string } } }).response; const errorMessage = resp?.error?.message ?? "Request failed"; throw new SearchProviderError("codex", `Codex request failed: ${errorMessage}`, 500); } } const finalAnswer = answerParts.join("\n\n").trim(); const streamedAnswer = streamedAnswerParts.join("").trim(); if (isImagePlaceholderAnswer(finalAnswer) && streamedAnswer.length === 0) { throw new SearchProviderError("codex", "Codex returned image-only response", 502); } const answer = finalAnswer.length > 0 && !isImagePlaceholderAnswer(finalAnswer) ? finalAnswer : streamedAnswer.length > 0 ? streamedAnswer : finalAnswer; // Fallback: when Codex omits url_citation annotations, scrape markdown links // and bare URLs from the synthesized answer so callers still receive sources. if (sources.length === 0 && answer.length > 0) { for (const source of extractTextSources(answer)) { addSource(sources, source); } } return { answer, sources, model, requestId, usage, }; } /** * Executes a web search using OpenAI Codex's built-in web search tool. * Requires OAuth credentials stored in agent.db for provider "openai-codex". * @param params - Search parameters including query and optional settings * @returns Search response with synthesized answer, sources, and usage * @throws {Error} If no Codex OAuth credentials are configured */ export async function searchCodex(params: CodexSearchParams): Promise { const auth = await findCodexAuth(); if (!auth) { throw new Error( "No Codex OAuth credentials found. Login with 'omp /login openai-codex' to enable Codex web search.", ); } const result = await callCodexSearch(auth, params.query, { systemPrompt: params.system_prompt, searchContextSize: params.search_context_size ?? "high", }); let sources = result.sources; // Apply num_results limit if specified if (params.num_results && sources.length > params.num_results) { sources = sources.slice(0, params.num_results); } return { provider: "codex", answer: result.answer || undefined, sources, usage: result.usage ? { inputTokens: result.usage.inputTokens, outputTokens: result.usage.outputTokens, totalTokens: result.usage.totalTokens, } : undefined, model: result.model, requestId: result.requestId, }; } /** * Checks if Codex web search is available. * @returns True if valid OAuth credentials exist for openai-codex */ export async function hasCodexSearch(): Promise { const auth = await findCodexAuth(); return auth !== null; } /** Search provider for OpenAI Codex web search. */ export class CodexProvider extends SearchProvider { readonly id = "codex"; readonly label = "Codex"; isAvailable(): Promise { return Promise.resolve(hasCodexSearch()); } search(params: SearchParams): Promise { return searchCodex({ signal: params.signal, query: params.query, system_prompt: params.systemPrompt, num_results: params.numSearchResults ?? params.limit, }); } }