import type { GenerationStats, LLMLoadOptions, StreamEvent } from './specs/LLM.nitro'; export type EventCallback = (event: StreamEvent) => void; export type Message = { role: 'user' | 'assistant' | 'system' | 'tool'; content: string; }; export type ToolCallInfo = { name: string; arguments: Record; }; export type ToolCallUpdate = { toolCall: ToolCallInfo; allToolCalls: ToolCallInfo[]; }; /** * LLM text generation using MLX on Apple Silicon. * * @example * ```ts * import { LLM } from 'react-native-nitro-mlx' * * // Load a model * await LLM.load('mlx-community/Qwen3-0.6B-4bit', progress => { * console.log(`Loading: ${(progress * 100).toFixed(0)}%`) * }) * * // Stream a response * await LLM.stream('Hello!', token => { * process.stdout.write(token) * }) * * // Get generation stats * const stats = LLM.getLastGenerationStats() * console.log(`${stats.tokensPerSecond} tokens/sec`) * ``` */ export declare const LLM: { /** * Load a model into memory. Downloads the model from HuggingFace if not already cached. * @param modelId - HuggingFace model ID (e.g., 'mlx-community/Qwen3-0.6B-4bit') * @param options - Callback invoked with loading progress (0-1) */ load(modelId: string, options?: LLMLoadOptions): Promise; /** * Generate a complete response for a prompt. Blocks until generation is complete. * For streaming responses, use `stream()` instead. * @param prompt - The input text to generate a response for * @returns The complete generated text */ generate(prompt: string): Promise; /** * Stream a response token by token with optional tool calling support. * Tools must be provided when loading the model via `load()` options. * Tools are automatically executed when the model calls them. * @param prompt - The input text to generate a response for * @param onToken - Callback invoked for each generated token * @param onToolCall - Optional callback invoked when a tool is called. * Receives the current tool call and an accumulated array of all tool calls so far. * @returns The complete generated text */ stream(prompt: string, onToken: (token: string) => void, onToolCall?: (update: ToolCallUpdate) => void): Promise; /** * Stream with typed events for thinking blocks and tool calls. * Provides granular lifecycle events for UI updates. * * @param prompt - The input text * @param onEvent - Callback receiving typed StreamEvent objects * @returns Promise resolving to final content string (thinking content stripped) * * @example * ```ts * await LLM.streamWithEvents(prompt, (event) => { * switch (event.type) { * case 'token': * appendToContent(event.token) * break * case 'thinking_start': * showThinkingIndicator() * break * case 'thinking_chunk': * appendToThinking(event.chunk) * break * case 'tool_call_start': * showToolCallCard(event.name, event.arguments) * break * } * }) * ``` */ streamWithEvents(prompt: string, onEvent: EventCallback): Promise; /** * Stop the current generation. Safe to call even if not generating. */ stop(): void; /** * Unload the current model and release memory. * Call this when you're done with the model to free up memory. */ unload(): void; /** * Get statistics from the last generation. * @returns Statistics including token count, tokens/sec (excluding tool execution), TTFT, total time, and tool execution time */ getLastGenerationStats(): GenerationStats; /** * Get the message history if management is enabled. * @returns Array of messages in the history */ getHistory(): Message[]; /** * Clear the message history. */ clearHistory(): void; /** Whether a model is currently loaded and ready for generation */ readonly isLoaded: boolean; /** Whether text is currently being generated */ readonly isGenerating: boolean; /** The ID of the currently loaded model, or empty string if none */ readonly modelId: string; /** Enable debug logging to console */ debug: boolean; /** * System prompt used when loading the model. * Set this before calling `load()`. Changes require reloading the model. * @default "You are a helpful assistant." */ systemPrompt: string; }; //# sourceMappingURL=llm.d.ts.map