import type { AIProviderName } from "../constants/enums.js"; import type { NeurolinkCredentials, StreamOptions, TextGenerationOptions } from "../types/index.js"; import { OpenAIChatCompletionsProvider } from "./openaiChatCompletionsBase.js"; /** * Ollama Provider — direct HTTP, no AI SDK. * * Wraps a local (or remote/Cloud) Ollama server via its OpenAI-compatible * `/v1` API. All request / stream / multi-step tool-loop orchestration lives * in `OpenAIChatCompletionsProvider`; this class declares configuration plus * the Ollama-specific behaviour: * * 1. `/v1` base-URL normalization (accepts a bare `OLLAMA_BASE_URL` host). * 2. No-auth-by-default with an optional `OLLAMA_API_KEY` for Ollama Cloud. * 3. Configurable per-model tool gating via `OLLAMA_TOOL_CAPABLE_MODELS` / * `modelConfig` (`supportsTools`). * 4. Elevated request timeout for slow large local models (5-minute base * default, overridable via `OLLAMA_TIMEOUT`). * 5. Native `/v1/embeddings` (`embed` / `embedMany`). * 6. Rich, actionable error mapping (`ollama serve` / `ollama pull` hints). * * Model discovery uses the base's `/v1/models` probe (Ollama supports it). * * @see https://docs.ollama.com/api/openai-compatibility */ export declare class OllamaProvider extends OpenAIChatCompletionsProvider { constructor(modelName?: string, sdk?: unknown, _region?: string, credentials?: NeurolinkCredentials["ollama"]); protected getProviderName(): AIProviderName; protected getDefaultModel(): string; protected getFallbackModelName(): string; protected formatProviderError(error: unknown): Error; /** * Ollama proxies many local models with varying tool support. When * `OLLAMA_TOOL_CAPABLE_MODELS` (or `modelConfig`'s * `modelBehavior.toolCapableModels`) is configured, gate tools on a * substring match against the current model; with no list configured, * assume tools are supported (don't disable on absent evidence). */ supportsTools(): boolean; /** * Local models are slow; the base already defaults Ollama to a 5-minute * timeout and honors a per-call `options.timeout`. Preserve the legacy * `OLLAMA_TIMEOUT` env override for callers who relied on it, applied only * when no explicit per-call timeout is set. Parsed with the shared * `parseTimeout` so both millisecond numbers ("240000") and duration strings * ("4m", "30s") work; a malformed value is ignored in favour of the default. */ getTimeout(options: TextGenerationOptions | StreamOptions): number; /** * Health check: probe `/v1/models` and require at least one installed model. * A reachable server with zero models would let `resolveModelName()` fall * back to a model that the first real request can't serve, so report unusable. */ validateConfiguration(): Promise; getConfiguration(): { provider: AIProviderName; model: string; defaultModel: string; baseURL: string; }; /** * Generate an embedding for a single text input via native /v1/embeddings. * Uses `OLLAMA_EMBEDDING_MODEL` (default `nomic-embed-text`); the embedding * model must be pulled locally (`ollama pull nomic-embed-text`). */ embed(text: string, modelName?: string): Promise; /** * Generate embeddings for multiple text inputs via native /v1/embeddings. */ embedMany(texts: string[], modelName?: string): Promise; private callEmbeddings; }