import type { GenerationStats, LLMLoadOptions, StreamEvent } from './specs/LLM.nitro';
export type EventCallback = (event: StreamEvent) => void;
export type Message = {
    role: 'user' | 'assistant' | 'system' | 'tool';
    content: string;
};
export type ToolCallInfo = {
    name: string;
    arguments: Record<string, unknown>;
};
export type ToolCallUpdate = {
    toolCall: ToolCallInfo;
    allToolCalls: ToolCallInfo[];
};
/**
 * LLM text generation using MLX on Apple Silicon.
 *
 * @example
 * ```ts
 * import { LLM } from 'react-native-nitro-mlx'
 *
 * // Load a model
 * await LLM.load('mlx-community/Qwen3-0.6B-4bit', progress => {
 *   console.log(`Loading: ${(progress * 100).toFixed(0)}%`)
 * })
 *
 * // Stream a response
 * await LLM.stream('Hello!', token => {
 *   process.stdout.write(token)
 * })
 *
 * // Get generation stats
 * const stats = LLM.getLastGenerationStats()
 * console.log(`${stats.tokensPerSecond} tokens/sec`)
 * ```
 */
export declare const LLM: {
    /**
     * Load a model into memory. Downloads the model from HuggingFace if not already cached.
     * @param modelId - HuggingFace model ID (e.g., 'mlx-community/Qwen3-0.6B-4bit')
     * @param options - Callback invoked with loading progress (0-1)
     */
    load(modelId: string, options?: LLMLoadOptions): Promise<void>;
    /**
     * Generate a complete response for a prompt. Blocks until generation is complete.
     * For streaming responses, use `stream()` instead.
     * @param prompt - The input text to generate a response for
     * @returns The complete generated text
     */
    generate(prompt: string): Promise<string>;
    /**
     * Stream a response token by token with optional tool calling support.
     * Tools must be provided when loading the model via `load()` options.
     * Tools are automatically executed when the model calls them.
     * @param prompt - The input text to generate a response for
     * @param onToken - Callback invoked for each generated token
     * @param onToolCall - Optional callback invoked when a tool is called.
     *   Receives the current tool call and an accumulated array of all tool calls so far.
     * @returns The complete generated text
     */
    stream(prompt: string, onToken: (token: string) => void, onToolCall?: (update: ToolCallUpdate) => void): Promise<string>;
    /**
     * Stream with typed events for thinking blocks and tool calls.
     * Provides granular lifecycle events for UI updates.
     *
     * @param prompt - The input text
     * @param onEvent - Callback receiving typed StreamEvent objects
     * @returns Promise resolving to final content string (thinking content stripped)
     *
     * @example
     * ```ts
     * await LLM.streamWithEvents(prompt, (event) => {
     *   switch (event.type) {
     *     case 'token':
     *       appendToContent(event.token)
     *       break
     *     case 'thinking_start':
     *       showThinkingIndicator()
     *       break
     *     case 'thinking_chunk':
     *       appendToThinking(event.chunk)
     *       break
     *     case 'tool_call_start':
     *       showToolCallCard(event.name, event.arguments)
     *       break
     *   }
     * })
     * ```
     */
    streamWithEvents(prompt: string, onEvent: EventCallback): Promise<string>;
    /**
     * Stop the current generation. Safe to call even if not generating.
     */
    stop(): void;
    /**
     * Unload the current model and release memory.
     * Call this when you're done with the model to free up memory.
     */
    unload(): void;
    /**
     * Get statistics from the last generation.
     * @returns Statistics including token count, tokens/sec (excluding tool execution), TTFT, total time, and tool execution time
     */
    getLastGenerationStats(): GenerationStats;
    /**
     * Get the message history if management is enabled.
     * @returns Array of messages in the history
     */
    getHistory(): Message[];
    /**
     * Clear the message history.
     */
    clearHistory(): void;
    /** Whether a model is currently loaded and ready for generation */
    readonly isLoaded: boolean;
    /** Whether text is currently being generated */
    readonly isGenerating: boolean;
    /** The ID of the currently loaded model, or empty string if none */
    readonly modelId: string;
    /** Enable debug logging to console */
    debug: boolean;
    /**
     * System prompt used when loading the model.
     * Set this before calling `load()`. Changes require reloading the model.
     * @default "You are a helpful assistant."
     */
    systemPrompt: string;
};
//# sourceMappingURL=llm.d.ts.map