import type { AIProviderName } from "../constants/enums.js"; import type { OpenAICompatBuildBodyArgs, OpenAICompatStreamLifecycleListeners } from "../types/index.js"; import { OpenAIChatCompletionsProvider } from "./openaiChatCompletionsBase.js"; /** * LiteLLM Provider — direct HTTP, no AI SDK. Talks to a LiteLLM proxy * server (or any deployment that speaks OpenAI chat-completions + the * `/v1/models` and `/v1/embeddings` endpoints). * * All request/stream/tool-loop orchestration lives in * `OpenAIChatCompletionsProvider`. This class adds LiteLLM-specific * behaviour: OTel span wrap with cost (`onStreamStart`), Gemini 2.5 * maxTokens skip (`adjustBuildBodyOptions`), ModelAccessDeniedError on * 403, 10-minute model cache (`getAvailableModels`), `LITELLM_FALLBACK_MODELS` * env-driven fallback list, and native `/v1/embeddings`. */ export declare class LiteLLMProvider extends OpenAIChatCompletionsProvider { private static modelsCache; private static modelsCacheTime; private static readonly MODELS_CACHE_DURATION; constructor(modelName?: string, sdk?: unknown, _region?: string, credentials?: { apiKey?: string; baseURL?: string; }); protected getProviderName(): AIProviderName; protected getDefaultModel(): string; protected getFallbackModelName(): string; protected getFallbackModels(): string[]; /** * Gemini 2.5 models on LiteLLM have a known compatibility issue with * `max_tokens` — strip it before the wire body is built. Applies to * both streaming and non-streaming paths. */ protected adjustBuildBodyOptions(modelId: string, opts: OpenAICompatBuildBodyArgs["options"]): OpenAICompatBuildBodyArgs["options"]; /** * Wrap the stream in an OTel span to capture provider-level latency, * token usage, finish reason, and cost. Matches the pre-migration * behaviour where streamText was wrapped in `neurolink.provider.streamText`. */ protected onStreamStart(modelId: string): OpenAICompatStreamLifecycleListeners | undefined; formatProviderError(error: unknown): Error; /** * Get available models from LiteLLM proxy `/v1/models` endpoint. * Caches results for 10 minutes; falls back to env-driven list or a * minimal safe default if the API fetch fails. */ getAvailableModels(): Promise; private fetchModelsFromAPI; /** * Generate an embedding for a single text input via native /v1/embeddings. */ embed(text: string, modelName?: string): Promise; /** * Generate embeddings for multiple text inputs via native /v1/embeddings. */ embedMany(texts: string[], modelName?: string): Promise; private callEmbeddings; }