/** * Cost estimator — live pricing from OpenRouter (primary) + LiteLLM (fallback) * with a 24h localStorage cache and a hardcoded offline fallback. * * Data sources: * • https://openrouter.ai/api/v1/models (367+ models, CORS-clean, USD/token) * • https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json * (2700+ entries, CORS-clean, input_cost_per_token/output_cost_per_token) * * Normalization: Bedrock IDs like `global.anthropic.claude-opus-4-7` are * stripped of the region/profile prefix before matching. */ type PriceRow = { input: number; output: number; context?: number } const CACHE_KEY = 'careless-pricing-cache-v1' const CACHE_TTL_MS = 24 * 60 * 60 * 1000 // 24h const OPENROUTER_URL = 'https://openrouter.ai/api/v1/models' const LITELLM_URL = 'https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json' /** In-memory parsed table: normalized-model-id → price row. Populated lazily. */ let priceTable: Record = {} let tableReady = false let fetchInFlight: Promise | null = null /* ── Offline fallback (USD per 1M tokens) ──────────────────────────────── */ const FALLBACK_PRICES: Record = { // Anthropic 'claude-opus-4-20250514': { input: 15, output: 75, context: 200_000 }, 'claude-sonnet-4-20250514': { input: 3, output: 15, context: 200_000 }, 'claude-opus-4-7': { input: 15, output: 75, context: 200_000 }, 'claude-opus-4-6': { input: 15, output: 75, context: 200_000 }, 'claude-sonnet-4-6': { input: 3, output: 15, context: 200_000 }, 'claude-sonnet-4-5': { input: 3, output: 15, context: 200_000 }, 'claude-haiku-4-5': { input: 1, output: 5, context: 200_000 }, 'claude-3-5-haiku-20241022': { input: 0.8, output: 4, context: 200_000 }, // OpenAI 'gpt-4o': { input: 2.5, output: 10, context: 128_000 }, 'gpt-4o-mini': { input: 0.15, output: 0.6, context: 128_000 }, 'gpt-5': { input: 2, output: 10, context: 400_000 }, 'gpt-5.5': { input: 2, output: 10, context: 400_000 }, 'gpt-5.5-2026-04-23': { input: 2, output: 10, context: 400_000 }, 'gpt-5.2-2025-12-11': { input: 1.5, output: 7.5, context: 256_000 }, 'gpt-5-mini-2025-10-14': { input: 0.3, output: 1.2, context: 128_000 }, 'o1-mini': { input: 3, output: 12, context: 128_000 }, // Google 'gemini-2.5-flash': { input: 0.3, output: 2.5, context: 1_000_000 }, 'gemini-2.5-pro': { input: 1.25, output: 10, context: 2_000_000 }, } /** Default row for unknown models (conservative mid-tier) */ const DEFAULT_ROW: PriceRow = { input: 3, output: 15, context: 128_000 } /* ── Normalization ────────────────────────────────────────────────────── */ /** * Strip region/profile prefixes and return a lowercase canonical id. * Examples: * global.anthropic.claude-opus-4-7 → claude-opus-4-7 * us.anthropic.claude-sonnet-4-20250514 → claude-sonnet-4-20250514 * anthropic/claude-opus-4.7 → claude-opus-4.7 * openai/gpt-4o → gpt-4o */ function normalizeModelId(id: string): string { if (!id) return '' let m = id.toLowerCase().trim() // Bedrock prefixes: global. / us. / eu. / apac. / anthropic. m = m.replace(/^(global|us|eu|apac)\./, '') m = m.replace(/^anthropic\./, '') // OpenRouter/LiteLLM `provider/model` prefixes m = m.replace(/^(anthropic|openai|google|meta|mistralai|deepseek|xai|cohere|perplexity)\//, '') return m } /** Do these two normalized ids refer to the same model? */ function idMatches(needle: string, haystack: string): boolean { if (needle === haystack) return true // Either is a dotted/dashed substring of the other (e.g. claude-opus-4 vs claude-opus-4-7) return haystack.includes(needle) || needle.includes(haystack) } /* ── Fetch + parse ─────────────────────────────────────────────────────── */ async function fetchOpenRouter(): Promise> { const res = await fetch(OPENROUTER_URL, { cache: 'no-cache' }) if (!res.ok) throw new Error(`openrouter ${res.status}`) const data = await res.json() as { data: Array<{ id: string context_length?: number pricing?: { prompt?: string; completion?: string } }> } const out: Record = {} for (const m of data.data || []) { const prompt = parseFloat(m.pricing?.prompt || '0') const completion = parseFloat(m.pricing?.completion || '0') if (!Number.isFinite(prompt) || !Number.isFinite(completion)) continue const nid = normalizeModelId(m.id) if (!nid) continue out[nid] = { input: prompt * 1_000_000, // USD/token → USD per 1M tokens output: completion * 1_000_000, context: m.context_length, } } return out } async function fetchLiteLLM(): Promise> { const res = await fetch(LITELLM_URL, { cache: 'no-cache' }) if (!res.ok) throw new Error(`litellm ${res.status}`) const data = await res.json() as Record const out: Record = {} for (const [id, row] of Object.entries(data)) { if (row.mode && row.mode !== 'chat' && row.mode !== 'completion') continue const inp = row.input_cost_per_token const outp = row.output_cost_per_token if (typeof inp !== 'number' || typeof outp !== 'number') continue const nid = normalizeModelId(id) if (!nid) continue out[nid] = { input: inp * 1_000_000, output: outp * 1_000_000, context: row.max_input_tokens || row.max_tokens, } } return out } /* ── Cache ─────────────────────────────────────────────────────────────── */ function loadCache(): Record | null { try { const raw = localStorage.getItem(CACHE_KEY) if (!raw) return null const parsed = JSON.parse(raw) as { ts: number; table: Record } if (!parsed.ts || Date.now() - parsed.ts > CACHE_TTL_MS) return null return parsed.table } catch { return null } } function saveCache(table: Record) { try { localStorage.setItem(CACHE_KEY, JSON.stringify({ ts: Date.now(), table })) } catch { /* quota, private mode, etc. */ } } /** Fetch both sources concurrently; merge (OpenRouter wins on conflict). */ async function refreshPriceTable(): Promise { if (fetchInFlight) return fetchInFlight fetchInFlight = (async () => { const [orRes, llRes] = await Promise.allSettled([fetchOpenRouter(), fetchLiteLLM()]) const merged: Record = { ...FALLBACK_PRICES } if (llRes.status === 'fulfilled') Object.assign(merged, llRes.value) if (orRes.status === 'fulfilled') Object.assign(merged, orRes.value) if (orRes.status === 'rejected' && llRes.status === 'rejected') { console.warn('[cost-estimator] both pricing sources failed, using fallback table') } priceTable = merged tableReady = true saveCache(merged) })() try { await fetchInFlight } finally { fetchInFlight = null } } /** Boot the price table: cache → immediate; network → 24h refresh. */ export function initPricing(): void { const cached = loadCache() if (cached) { priceTable = cached tableReady = true } else { priceTable = { ...FALLBACK_PRICES } tableReady = true // fallback is fine to use synchronously } // Refresh in background regardless (stale-while-revalidate) refreshPriceTable().catch(() => { /* already logged */ }) } // Auto-init on first import (browser only). if (typeof window !== 'undefined') { try { initPricing() } catch { /* noop */ } } /* ── Lookup ────────────────────────────────────────────────────────────── */ function lookup(model: string): PriceRow { const nid = normalizeModelId(model) if (!nid) return DEFAULT_ROW // Exact match first const table = tableReady ? priceTable : FALLBACK_PRICES if (table[nid]) return table[nid] // Fuzzy: longest matching key wins (so 'claude-opus-4-7' beats 'claude-opus-4') let best: PriceRow | null = null let bestLen = 0 for (const k of Object.keys(table)) { if (idMatches(nid, k) && k.length > bestLen) { best = table[k] bestLen = k.length } } return best || DEFAULT_ROW } /* ── Public API ────────────────────────────────────────────────────────── */ /** Estimate USD cost for a turn. */ export function estimateCost(model: string, inputTokens: number, outputTokens: number): number { const row = lookup(model) return (inputTokens * row.input + outputTokens * row.output) / 1_000_000 } /** Get the context window size for a model (defaults to 128k). */ export function getContextWindow(model: string): number { return lookup(model).context ?? 128_000 } export function formatCost(usd: number): string { if (!Number.isFinite(usd) || usd <= 0) return '$0.00' if (usd < 0.0001) return '<$0.0001' if (usd < 0.01) return `$${usd.toFixed(4)}` if (usd < 1) return `$${usd.toFixed(3)}` return `$${usd.toFixed(2)}` } /** Return per-1M-token input/output prices (for display, e.g. Settings tab). */ export function getPriceRow(model: string): PriceRow { return lookup(model) } /** Force-refresh the pricing table (skips cache). Useful for "Update" buttons. */ export async function forceRefreshPricing(): Promise { try { localStorage.removeItem(CACHE_KEY) } catch { /* noop */ } await refreshPriceTable() } /** Is the pricing table ready to answer lookups? */ export function isPricingReady(): boolean { return tableReady } /** How many entries are in the live table (useful for diagnostics). */ export function getPricingEntryCount(): number { return Object.keys(priceTable).length }