import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"; import { join } from "node:path"; import { type ExtensionCommandContext, getAgentDir, type ProviderModelConfig } from "@earendil-works/pi-coding-agent"; import { resolve as resolveThinkingLevelMap } from "./thinking-levels.ts"; import { concurrentMap, fetchJsonWithTimeout, getContextLength } from "./utils.ts"; // --- Constants --- const CACHE_DIR = join(getAgentDir(), "cache"); const CACHE_FILE = join(CACHE_DIR, "ollama-cloud-models.json"); const CACHE_MAX_AGE_MS = 30 * 24 * 60 * 60 * 1000; const FETCH_TIMEOUT_MS = 10000; export const OLLAMA_BASE = (process.env.OLLAMA_API_BASE || "https://ollama.com").replace(/\/+$/, ""); // --- Raw API types --- /** Response from POST /api/show */ interface OllamaShowResponse { details: { parent_model: string; format: string; family: string; families: string[] | null; parameter_size: string; quantization_level: string; }; model_info: Record; capabilities: string[]; modified_at: string; } type CachedOllamaModel = OllamaShowResponse; /** On-disk cache: raw /api/show responses keyed by model ID. */ interface CachedData { /** Unix epoch milliseconds used to decide when the generated metadata is stale. */ timestamp?: number; models: Record; } type RefreshProgressStage = "list" | "details" | "done"; export interface RefreshProgress { stage: RefreshProgressStage; current?: number; total?: number; failed?: number; message: string; } // --- Assembly: raw API data -> ProviderModelConfig[] --- /** * Build an explicit OpenAICompletionsCompat for an Ollama Cloud model. * Every flag is set explicitly so the contract is visible to maintainers. * * Ollama API reference: https://docs.ollama.com/api/openai-compatibility * pi type definition: https://github.com/earendil-works/pi/blob/b94482762321ed0b9f8f245be57c84d786a7105d/packages/ai/src/types.ts#L361-L400 * pi compat resolution: https://docs.ollama.com/api/openai-compatibility https://github.com/badlogic/pi-mono/blob/main/packages/ai/src/types.ts#L365-L425 */ function buildCompat(): ProviderModelConfig["compat"] { return { // Ollama uses "system" role, not "developer" (ollama: docs.ollama.com/api/openai-compatibility, pi: types.ts#supportsDeveloperRole). supportsDeveloperRole: false, // reasoning_effort works (ollama: docs.ollama.com/api/openai-compatibility, pi: types.ts#supportsReasoningEffort, tested in think-experiment.md). supportsReasoningEffort: true, // "store" is not a supported field (ollama: docs.ollama.com/api/openai-compatibility, pi: types.ts#supportsStore). supportsStore: false, // Ollama lists "max_tokens", not "max_completion_tokens" (ollama: docs.ollama.com/api/openai-compatibility, pi: types.ts#maxTokensField). maxTokensField: "max_tokens", // stream_options.include_usage is supported (ollama: docs.ollama.com/api/openai-compatibility, pi: types.ts#supportsUsageInStreaming). supportsUsageInStreaming: true, // Default: tool results don't need a name field (pi: types.ts#requiresToolResultName). requiresToolResultName: false, // Default: no assistant message required between tool result and user (pi: types.ts#requiresAssistantAfterToolResult). requiresAssistantAfterToolResult: false, // Ollama supports native thinking blocks (pi: types.ts#requiresThinkingAsText). requiresThinkingAsText: false, // DeepSeek-specific, not needed for Ollama (pi: types.ts#requiresReasoningContentOnAssistantMessages). requiresReasoningContentOnAssistantMessages: false, // reasoning_effort format works (pi: types.ts#thinkingFormat, tested in think-experiment.md). thinkingFormat: "openai", // Ollama does not support tool_choice, so strict mode is unavailable (ollama: docs.ollama.com/api/openai-compatibility, pi: types.ts#supportsStrictMode). supportsStrictMode: false, // Anthropic cache_control not relevant; Ollama has implicit KV cache only (pi: types.ts#cacheControlFormat). // Explicitly undefined: JSON.stringify drops undefined values, keeping // models.generated.ts structurally consistent with assembleModels() runtime output. // Session affinity headers not relevant for Ollama (pi: types.ts#sendSessionAffinityHeaders). sendSessionAffinityHeaders: false, // No explicit cache-retention API (pi: types.ts#supportsLongCacheRetention). supportsLongCacheRetention: false, // Not z.ai (pi: types.ts#zaiToolStream). zaiToolStream: false, cacheControlFormat: undefined, openRouterRouting: {}, vercelGatewayRouting: {}, }; } export function assembleModels(raw: Record): ProviderModelConfig[] { return Object.entries(raw) .filter(([, data]) => data.capabilities?.includes("tools")) .map(([id, data]) => ({ id, name: id, reasoning: data.capabilities?.includes("thinking") ?? false, thinkingLevelMap: resolveThinkingLevelMap(id, data.capabilities ?? []), input: (data.capabilities?.includes("vision") ? ["text", "image"] : ["text"]) as ("text" | "image")[], cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, contextWindow: getContextLength(data.model_info ?? {}), // No per-model limit exposed by the API (https://docs.ollama.com/api-reference/show-model-details, // https://github.com/ollama/ollama/issues/7222). 32768 matches most Ollama Cloud context windows. maxTokens: 32768, compat: buildCompat(), })); } // --- Cache I/O --- type CacheState = | { status: "fresh"; models: Record } | { status: "stale"; models: Record } | { status: "missing" }; function createCacheData(models: Record, now = new Date()): CachedData { return { timestamp: now.getTime(), models }; } function readCacheData(path: string): CachedData | null { try { const data: CachedData = JSON.parse(readFileSync(path, "utf-8")); if (!data.models || Object.keys(data.models).length === 0) return null; return data; } catch { return null; } } function isFreshGeneratedCache(data: CachedData): boolean { if (typeof data.timestamp !== "number" || !Number.isFinite(data.timestamp)) return false; return Date.now() - data.timestamp <= CACHE_MAX_AGE_MS; } export function readCacheState(): CacheState { if (!existsSync(CACHE_FILE)) return { status: "missing" }; const data = readCacheData(CACHE_FILE); if (!data) { try { rmSync(CACHE_FILE, { force: true }); } catch { // Ignore cache delete errors. } return { status: "missing" }; } return isFreshGeneratedCache(data) ? { status: "fresh", models: data.models } : { status: "stale", models: data.models }; } export function writeCache(models: Record): void { try { mkdirSync(CACHE_DIR, { recursive: true }); writeFileSync(CACHE_FILE, JSON.stringify(createCacheData(models), null, 2)); } catch { // Ignore cache write errors } } // --- Fetch Models --- export async function fetchModelIds(timeoutMs = FETCH_TIMEOUT_MS): Promise { const headers: Record = {}; const apiKey = process.env.OLLAMA_API_KEY; if (apiKey) { headers.Authorization = `Bearer ${apiKey}`; } const res = await fetchJsonWithTimeout<{ data: { id: string }[] }>( `${OLLAMA_BASE}/v1/models`, { headers }, timeoutMs, ); if (res.status === 429) { throw new Error("Ollama Cloud rate limited. Try again shortly."); } if (!res.ok || !res.data) { throw new Error(`Failed to fetch model list: ${res.status}${res.error ? ` - ${res.error}` : ""}`); } return res.data.data.map((m) => m.id); } export async function fetchModelDetails(id: string, timeoutMs = FETCH_TIMEOUT_MS): Promise { const headers: Record = { "Content-Type": "application/json" }; const apiKey = process.env.OLLAMA_API_KEY; if (apiKey) { headers.Authorization = `Bearer ${apiKey}`; } const res = await fetchJsonWithTimeout( `${OLLAMA_BASE}/api/show`, { method: "POST", headers, body: JSON.stringify({ model: id }), }, timeoutMs, ); if (res.status === 429) { throw new Error("Ollama Cloud rate limited. Try again shortly."); } if (!res.ok || !res.data) { throw new Error(`Failed to fetch /api/show for ${id}: ${res.status}${res.error ? ` - ${res.error}` : ""}`); } return res.data; } export async function refreshOllamaCloudModels(params: { notify?: (message: string, level?: "info" | "error") => void; onProgress?: (progress: RefreshProgress) => void; workers?: number; }): Promise> { const notify = params.notify ?? (() => undefined); const onProgress = params.onProgress ?? (() => undefined); onProgress({ stage: "list", message: "Fetching model list..." }); const modelIds = await fetchModelIds(); notify(`Found ${modelIds.length} models, fetching details...`); onProgress({ stage: "details", current: 0, total: modelIds.length, failed: 0, message: "Fetching model details" }); let detailsDone = 0; let detailsFailed = 0; const detailResults = await concurrentMap(modelIds, params.workers ?? 8, async (id) => { try { return [id, await fetchModelDetails(id)] as const; } catch (error) { detailsFailed++; throw error; } finally { detailsDone++; onProgress({ stage: "details", current: detailsDone, total: modelIds.length, failed: detailsFailed, message: "Fetching model details", }); } }); const models: Record = {}; for (const result of detailResults) { if (result.status === "fulfilled") { const [id, data] = result.value; models[id] = data; } } const succeeded = Object.keys(models).length; if (succeeded === 0) throw new Error(`Failed to fetch model details${detailsFailed ? ` (${detailsFailed} failed)` : ""}`); notify(`Fetched ${succeeded} model details${detailsFailed ? ` (${detailsFailed} failed)` : ""}`, "info"); onProgress({ stage: "done", current: Object.keys(models).length, total: Object.keys(models).length, message: "Done", }); return models; } export async function fetchModels( ctx: Pick, onProgress?: (progress: RefreshProgress) => void, ): Promise | null> { try { return await refreshOllamaCloudModels({ notify: (message, level) => ctx.ui.notify(message, level), onProgress, }); } catch (error) { ctx.ui.notify(error instanceof Error ? error.message : String(error), "error"); return null; } }