import { NitroModules } from 'react-native-nitro-modules'
import type {
  GenerationStats,
  LLMLoadOptions,
  LLM as LLMSpec,
  StreamEvent,
} from './specs/LLM.nitro'

export type EventCallback = (event: StreamEvent) => void

let instance: LLMSpec | null = null

export type Message = {
  role: 'user' | 'assistant' | 'system'
  content: string
}

export type ToolCallInfo = {
  name: string
  arguments: Record<string, unknown>
}

export type ToolCallUpdate = {
  toolCall: ToolCallInfo
  allToolCalls: ToolCallInfo[]
}

function getInstance(): LLMSpec {
  if (!instance) {
    instance = NitroModules.createHybridObject<LLMSpec>('LLM')
  }
  return instance
}

/**
 * LLM text generation using MLX on Apple Silicon.
 *
 * @example
 * ```ts
 * import { LLM } from 'react-native-nitro-mlx'
 *
 * // Load a model
 * await LLM.load('mlx-community/Qwen3-0.6B-4bit', progress => {
 *   console.log(`Loading: ${(progress * 100).toFixed(0)}%`)
 * })
 *
 * // Stream a response
 * await LLM.stream('Hello!', token => {
 *   process.stdout.write(token)
 * })
 *
 * // Get generation stats
 * const stats = LLM.getLastGenerationStats()
 * console.log(`${stats.tokensPerSecond} tokens/sec`)
 * ```
 */
export const LLM = {
  /**
   * Load a model into memory. Downloads the model from HuggingFace if not already cached.
   * @param modelId - HuggingFace model ID (e.g., 'mlx-community/Qwen3-0.6B-4bit')
   * @param options - Callback invoked with loading progress (0-1)
   */
  load(modelId: string, options: LLMLoadOptions): Promise<void> {
    return getInstance().load(modelId, options)
  },

  /**
   * Generate a complete response for a prompt. Blocks until generation is complete.
   * For streaming responses, use `stream()` instead.
   * @param prompt - The input text to generate a response for
   * @returns The complete generated text
   */
  generate(prompt: string): Promise<string> {
    return getInstance().generate(prompt)
  },

  /**
   * Stream a response token by token with optional tool calling support.
   * Tools must be provided when loading the model via `load()` options.
   * Tools are automatically executed when the model calls them.
   * @param prompt - The input text to generate a response for
   * @param onToken - Callback invoked for each generated token
   * @param onToolCall - Optional callback invoked when a tool is called.
   *   Receives the current tool call and an accumulated array of all tool calls so far.
   * @returns The complete generated text
   */
  stream(
    prompt: string,
    onToken: (token: string) => void,
    onToolCall?: (update: ToolCallUpdate) => void,
  ): Promise<string> {
    const accumulatedToolCalls: ToolCallInfo[] = []

    return getInstance().stream(prompt, onToken, (name: string, argsJson: string) => {
      if (onToolCall) {
        try {
          const args = JSON.parse(argsJson) as Record<string, unknown>
          const toolCall = { name, arguments: args }
          accumulatedToolCalls.push(toolCall)
          onToolCall({
            toolCall,
            allToolCalls: [...accumulatedToolCalls],
          })
        } catch {
          const toolCall = { name, arguments: {} }
          accumulatedToolCalls.push(toolCall)
          onToolCall({
            toolCall,
            allToolCalls: [...accumulatedToolCalls],
          })
        }
      }
    })
  },

  /**
   * Stream with typed events for thinking blocks and tool calls.
   * Provides granular lifecycle events for UI updates.
   *
   * @param prompt - The input text
   * @param onEvent - Callback receiving typed StreamEvent objects
   * @returns Promise resolving to final content string (thinking content stripped)
   *
   * @example
   * ```ts
   * await LLM.streamWithEvents(prompt, (event) => {
   *   switch (event.type) {
   *     case 'token':
   *       appendToContent(event.token)
   *       break
   *     case 'thinking_start':
   *       showThinkingIndicator()
   *       break
   *     case 'thinking_chunk':
   *       appendToThinking(event.chunk)
   *       break
   *     case 'tool_call_start':
   *       showToolCallCard(event.name, event.arguments)
   *       break
   *   }
   * })
   * ```
   */
  streamWithEvents(prompt: string, onEvent: EventCallback): Promise<string> {
    return getInstance().streamWithEvents(prompt, (eventJson: string) => {
      try {
        const event = JSON.parse(eventJson) as StreamEvent
        onEvent(event)
      } catch {
        // Silently ignore malformed events
      }
    })
  },

  /**
   * Stop the current generation. Safe to call even if not generating.
   */
  stop(): void {
    getInstance().stop()
  },

  /**
   * Unload the current model and release memory.
   * Call this when you're done with the model to free up memory.
   */
  unload(): void {
    getInstance().unload()
  },

  /**
   * Get statistics from the last generation.
   * @returns Statistics including token count, tokens/sec (excluding tool execution), TTFT, total time, and tool execution time
   */
  getLastGenerationStats(): GenerationStats {
    return getInstance().getLastGenerationStats()
  },

  /**
   * Get the message history if management is enabled.
   * @returns Array of messages in the history
   */
  getHistory(): Message[] {
    return getInstance().getHistory() as Message[]
  },

  /**
   * Clear the message history.
   */
  clearHistory(): void {
    getInstance().clearHistory()
  },

  /** Whether a model is currently loaded and ready for generation */
  get isLoaded(): boolean {
    return getInstance().isLoaded
  },

  /** Whether text is currently being generated */
  get isGenerating(): boolean {
    return getInstance().isGenerating
  },

  /** The ID of the currently loaded model, or empty string if none */
  get modelId(): string {
    return getInstance().modelId
  },

  /** Enable debug logging to console */
  get debug(): boolean {
    return getInstance().debug
  },

  set debug(value: boolean) {
    getInstance().debug = value
  },

  /**
   * System prompt used when loading the model.
   * Set this before calling `load()`. Changes require reloading the model.
   * @default "You are a helpful assistant."
   */
  get systemPrompt(): string {
    return getInstance().systemPrompt
  },

  set systemPrompt(value: string) {
    getInstance().systemPrompt = value
  },

  get maxTokens(): number {
    return getInstance().maxTokens
  },

  set maxTokens(value: number) {
    getInstance().maxTokens = value
  },

  get temperature(): number {
    return getInstance().temperature
  },

  set temperature(value: number) {
    getInstance().temperature = value
  },

  get enableThinking(): boolean {
    return getInstance().enableThinking
  },

  set enableThinking(value: boolean) {
    getInstance().enableThinking = value
  },
}