import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"; import { join } from "node:path"; import { type ExtensionCommandContext, getAgentDir, type ProviderModelConfig } from "@earendil-works/pi-coding-agent"; import { resolve as resolveThinkingLevelMap } from "./thinking-levels.ts"; import { fetchJsonWithTimeout } from "./utils.ts"; // --- Constants --- const CACHE_DIR = join(getAgentDir(), "cache"); const CACHE_FILE = join(CACHE_DIR, "orcarouter-models.json"); const CACHE_MAX_AGE_MS = 30 * 24 * 60 * 60 * 1000; const FETCH_TIMEOUT_MS = 10000; export const ORCAROUTER_BASE = (process.env.ORCAROUTER_API_BASE || "https://api.orcarouter.ai").replace(/\/+$/, ""); // --- Raw API types --- /** * A single entry from GET /v1/models. OrcaRouter returns the full catalog in * one call, so there is no per-model detail fetch. * * Fields are widely optional: router models (orcarouter/*) omit pricing, and * many models omit architecture, context_length, or max_completion_tokens. */ export interface OrcaRouterModel { id: string; object?: string; created?: number; owned_by?: string; /** e.g. ["openai", "openai-response", "anthropic", "gemini", "embeddings", "image-generation"] */ supported_endpoint_types?: string[] | null; name?: string; description?: string; context_length?: number | null; max_completion_tokens?: number | null; architecture?: { input_modalities?: string[] | null; output_modalities?: string[] | null; } | null; top_provider?: { context_length?: number | null; max_completion_tokens?: number | null; } | null; /** Per-token pricing in USD. Strings like "0.0000010000" (per-token) and per-million variants. */ pricing?: { prompt?: string; completion?: string; prompt_per_million?: string; completion_per_million?: string; /** Flat per-request fee in USD, when present. */ request?: string; } | null; } /** On-disk cache: raw /v1/models entries keyed by model ID. */ interface CachedData { /** Unix epoch milliseconds used to decide when the cached metadata is stale. */ timestamp?: number; models: Record; } type RefreshProgressStage = "list" | "done"; export interface RefreshProgress { stage: RefreshProgressStage; current?: number; total?: number; message: string; } // --- Capability detection --- const DEFAULT_CONTEXT_WINDOW = 128000; const DEFAULT_MAX_TOKENS = 32768; /** * Endpoint types that indicate a chat-completions-capable model. * The provider is registered with api: "openai-completions", so we require * the "openai" endpoint. Embeddings / image-generation / video models are * filtered out because they cannot be driven as chat models. */ function isChatModel(model: OrcaRouterModel): boolean { const endpoints = model.supported_endpoint_types ?? []; if (!endpoints.includes("openai")) return false; // Exclude models that are *only* non-chat endpoints (defensive; the openai // check above already excludes pure embeddings/image models in practice). const chatLike = endpoints.some((e) => e === "openai" || e === "openai-response" || e === "anthropic"); return chatLike; } /** Resolve the input modalities Pi understands ("text" | "image"). */ function resolveInput(model: OrcaRouterModel): ("text" | "image")[] { const modalities = model.architecture?.input_modalities ?? ["text"]; const input: ("text" | "image")[] = ["text"]; if (modalities.includes("image")) input.push("image"); return input; } /** * OrcaRouter does not expose a reasoning/thinking capability flag, so we infer * it from the model id. Reasoning families accept OpenAI-compatible * `reasoning_effort`; everything else is treated as non-reasoning. */ export function isReasoningModel(id: string): boolean { const lower = id.toLowerCase(); return ( lower.includes("claude") || /\bo[1-9]\b/.test(lower) || lower.includes("gpt-5") || lower.includes("gpt-6") || lower.includes("gpt-oss") || lower.includes("gemini-2.5") || lower.includes("gemini-3") || lower.includes("grok-3") || lower.includes("grok-4") || lower.includes("deepseek-r") || lower.includes("deepseek-v3") || lower.includes("qwq") || lower.includes("thinking") || lower.includes("reasoner") || lower.includes("minimax") || lower.includes("glm-4.6") || lower.includes("glm-4.5") || lower.includes("kimi") || lower.includes("fusion") ); } // --- Pricing --- /** Parse a USD-per-token price string into a number; returns 0 on absent/invalid. */ function parsePrice(value: string | undefined): number { if (typeof value !== "string") return 0; const n = Number.parseFloat(value); return Number.isFinite(n) && n >= 0 ? n : 0; } /** * Build Pi's per-token cost object from OrcaRouter pricing. * Pi expects USD per token (the same unit OrcaRouter's `prompt`/`completion` * fields use). Router models without pricing fall back to zero. * OrcaRouter does not expose cache-read/cache-write pricing, so those stay 0. */ function buildCost(model: OrcaRouterModel): NonNullable { const pricing = model.pricing; if (!pricing) return { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }; return { input: parsePrice(pricing.prompt), output: parsePrice(pricing.completion), cacheRead: 0, cacheWrite: 0, }; } // --- Assembly: raw API data -> ProviderModelConfig[] --- /** * Build an explicit OpenAICompletionsCompat for an OrcaRouter model. * OrcaRouter is an OpenAI-compatible gateway, so it follows standard * OpenAI Chat Completions semantics for the fields below. * * pi type definition: https://github.com/badlogic/pi-mono/blob/main/packages/ai/src/types.ts */ function buildCompat(reasoning: boolean): ProviderModelConfig["compat"] { return { // OpenAI-compatible gateway uses the standard "developer"/"system" handling. supportsDeveloperRole: false, // reasoning_effort is forwarded for reasoning-capable models. supportsReasoningEffort: reasoning, // "store" is not part of the chat-completions passthrough. supportsStore: false, // Standard OpenAI chat completions field. maxTokensField: "max_completion_tokens", // stream_options.include_usage is supported. supportsUsageInStreaming: true, requiresToolResultName: false, requiresAssistantAfterToolResult: false, requiresThinkingAsText: false, requiresReasoningContentOnAssistantMessages: false, thinkingFormat: "openai", // tool_choice strict mode is not guaranteed across all routed providers. supportsStrictMode: false, sendSessionAffinityHeaders: false, supportsLongCacheRetention: false, zaiToolStream: false, // Explicitly undefined: JSON.stringify drops undefined values, keeping // models.generated.ts structurally consistent with assembleModels() output. cacheControlFormat: undefined, openRouterRouting: {}, vercelGatewayRouting: {}, }; } export function assembleModels(raw: Record): ProviderModelConfig[] { return Object.entries(raw) .filter(([, data]) => isChatModel(data)) .map(([id, data]) => { const reasoning = isReasoningModel(id); return { id, name: data.name ?? id, reasoning, thinkingLevelMap: resolveThinkingLevelMap(id, reasoning), input: resolveInput(data), cost: buildCost(data), contextWindow: data.context_length ?? data.top_provider?.context_length ?? DEFAULT_CONTEXT_WINDOW, maxTokens: data.max_completion_tokens ?? data.top_provider?.max_completion_tokens ?? DEFAULT_MAX_TOKENS, compat: buildCompat(reasoning), }; }); } // --- Cache I/O --- type CacheState = | { status: "fresh"; models: Record } | { status: "stale"; models: Record } | { status: "missing" }; function createCacheData(models: Record, now = new Date()): CachedData { return { timestamp: now.getTime(), models }; } function readCacheData(path: string): CachedData | null { try { const data: CachedData = JSON.parse(readFileSync(path, "utf-8")); if (!data.models || Object.keys(data.models).length === 0) return null; return data; } catch { return null; } } function isFreshCache(data: CachedData): boolean { if (typeof data.timestamp !== "number" || !Number.isFinite(data.timestamp)) return false; return Date.now() - data.timestamp <= CACHE_MAX_AGE_MS; } export function readCacheState(): CacheState { if (!existsSync(CACHE_FILE)) return { status: "missing" }; const data = readCacheData(CACHE_FILE); if (!data) { try { rmSync(CACHE_FILE, { force: true }); } catch { // Ignore cache delete errors. } return { status: "missing" }; } return isFreshCache(data) ? { status: "fresh", models: data.models } : { status: "stale", models: data.models }; } export function writeCache(models: Record): void { try { mkdirSync(CACHE_DIR, { recursive: true }); writeFileSync(CACHE_FILE, JSON.stringify(createCacheData(models), null, 2)); } catch { // Ignore cache write errors } } // --- Fetch Models --- /** * Fetch the full catalog from GET /v1/models, keyed by model ID. * OrcaRouter returns all metadata in this one call. */ export async function fetchModelCatalog(timeoutMs = FETCH_TIMEOUT_MS): Promise> { const headers: Record = {}; const apiKey = process.env.ORCAROUTER_API_KEY; if (apiKey) { headers.Authorization = `Bearer ${apiKey}`; } const res = await fetchJsonWithTimeout<{ data: OrcaRouterModel[] }>( `${ORCAROUTER_BASE}/v1/models`, { headers }, timeoutMs, ); if (res.status === 429) { throw new Error("OrcaRouter rate limited. Try again shortly."); } if (!res.ok || !res.data) { throw new Error(`Failed to fetch model list: ${res.status}${res.error ? ` - ${res.error}` : ""}`); } const models: Record = {}; for (const model of res.data.data ?? []) { if (model?.id) models[model.id] = model; } return models; } export async function refreshOrcaRouterModels(params: { notify?: (message: string, level?: "info" | "error") => void; onProgress?: (progress: RefreshProgress) => void; }): Promise> { const notify = params.notify ?? (() => undefined); const onProgress = params.onProgress ?? (() => undefined); onProgress({ stage: "list", message: "Fetching model catalog..." }); const models = await fetchModelCatalog(); const total = Object.keys(models).length; if (total === 0) throw new Error("OrcaRouter returned an empty model catalog"); const chatCount = assembleModels(models).length; notify(`Fetched ${total} models (${chatCount} usable as chat models)`, "info"); onProgress({ stage: "done", current: total, total, message: "Done" }); return models; } export async function fetchModels( ctx: Pick, onProgress?: (progress: RefreshProgress) => void, ): Promise | null> { try { return await refreshOrcaRouterModels({ notify: (message, level) => ctx.ui.notify(message, level), onProgress, }); } catch (error) { ctx.ui.notify(error instanceof Error ? error.message : String(error), "error"); return null; } }