/**
 * transformers-js.ts — On-device ML via 🤗 Transformers.js v4.2.0
 *
 * Runs ANY HF model from the Hub locally in the browser via ONNX Runtime Web.
 * Backends: WASM (universal) or WebGPU (faster, limited model support).
 * Models download once and cache in the browser (IndexedDB via Cache API).
 *
 * Package: @huggingface/transformers (v4.2.0)
 *
 * Coverage verified against node_modules/@huggingface/transformers/src/pipelines/index.js
 * — all 25 task types + 5 aliases supported, defaults match upstream exactly.
 */
import { tool } from '@strands-agents/sdk'
import { z } from 'zod'

/* ──────────────────────────────────────────────────────────────────────────
 * Task registry — sourced DIRECTLY from transformers.js v4.2.0 source.
 * Keeping this client-side lets the agent introspect available tasks +
 * default models without reaching into node_modules.
 * ──────────────────────────────────────────────────────────────────────── */

type TaskInfo = {
  type: 'text' | 'audio' | 'image' | 'multimodal'
  defaultModel: string
  defaultDtype?: string
  description: string
}

export const TASK_REGISTRY: Record<string, TaskInfo> = {
  // ── Text ──
  'text-classification': {
    type: 'text',
    defaultModel: 'Xenova/distilbert-base-uncased-finetuned-sst-2-english',
    description: 'Sentiment/topic classification. Returns label + score. Alias: sentiment-analysis.',
  },
  'token-classification': {
    type: 'text',
    defaultModel: 'Xenova/bert-base-multilingual-cased-ner-hrl',
    description: 'NER/POS tagging, per-token labels. Alias: ner.',
  },
  'question-answering': {
    type: 'text',
    defaultModel: 'Xenova/distilbert-base-cased-distilled-squad',
    description: 'Extractive QA. Input: { question, context }.',
  },
  'fill-mask': {
    type: 'text',
    defaultModel: 'onnx-community/ettin-encoder-32m-ONNX',
    defaultDtype: 'fp32',
    description: 'Masked-LM fill. Input must contain [MASK] (or model-specific token).',
  },
  summarization: {
    type: 'text',
    defaultModel: 'Xenova/distilbart-cnn-6-6',
    description: 'Abstractive summary of long text.',
  },
  translation: {
    type: 'text',
    defaultModel: 'Xenova/t5-small',
    description: 'Translate between languages. Task can be suffixed "_xx_to_yy".',
  },
  'text2text-generation': {
    type: 'text',
    defaultModel: 'Xenova/flan-t5-small',
    description: 'T5-style seq2seq (instruction-following, translation, QA).',
  },
  'text-generation': {
    type: 'text',
    defaultModel: 'onnx-community/Qwen3-0.6B-ONNX',
    defaultDtype: 'q4',
    description: 'Causal LM generation. Accepts plain text OR chat Message[]. Streams via TextStreamer.',
  },
  'zero-shot-classification': {
    type: 'text',
    defaultModel: 'Xenova/distilbert-base-uncased-mnli',
    description: 'Classify text against ARBITRARY candidate_labels at inference. No training.',
  },
  'feature-extraction': {
    type: 'text',
    defaultModel: 'onnx-community/all-MiniLM-L6-v2-ONNX',
    defaultDtype: 'fp32',
    description: 'Text embeddings for semantic search/clustering/RAG. Alias: embeddings.',
  },

  // ── Audio ──
  'audio-classification': {
    type: 'audio',
    defaultModel: 'Xenova/wav2vec2-base-superb-ks',
    description: 'Classify audio clip (keyword spotting, genre, speaker ID).',
  },
  'zero-shot-audio-classification': {
    type: 'multimodal',
    defaultModel: 'Xenova/clap-htsat-unfused',
    description: 'CLAP — classify audio against arbitrary text labels.',
  },
  'automatic-speech-recognition': {
    type: 'multimodal',
    defaultModel: 'Xenova/whisper-tiny.en',
    description: 'Speech-to-text. Whisper family. Alias: asr.',
  },
  'text-to-audio': {
    type: 'text',
    defaultModel: 'onnx-community/Supertonic-TTS-ONNX',
    defaultDtype: 'fp32',
    description: 'Text-to-speech. Alias: text-to-speech. Returns { audio: Float32Array, sampling_rate }.',
  },

  // ── Image ──
  'image-to-text': {
    type: 'multimodal',
    defaultModel: 'Xenova/vit-gpt2-image-captioning',
    description: 'Image captioning / VLM. Also works with Florence-2, Moondream for rich VLM tasks.',
  },
  'image-classification': {
    type: 'multimodal',
    defaultModel: 'Xenova/vit-base-patch16-224',
    description: 'Classify image into fixed label set.',
  },
  'image-segmentation': {
    type: 'multimodal',
    defaultModel: 'Xenova/detr-resnet-50-panoptic',
    description: 'Panoptic/semantic/instance segmentation. Returns masks per label.',
  },
  'background-removal': {
    type: 'image',
    defaultModel: 'Xenova/modnet',
    description: 'Alpha-matte foreground extraction (like remove.bg).',
  },
  'zero-shot-image-classification': {
    type: 'multimodal',
    defaultModel: 'Xenova/clip-vit-base-patch32',
    description: 'CLIP — classify image against arbitrary text labels.',
  },
  'object-detection': {
    type: 'multimodal',
    defaultModel: 'Xenova/detr-resnet-50',
    description: 'Detect objects with bounding boxes + labels.',
  },
  'zero-shot-object-detection': {
    type: 'multimodal',
    defaultModel: 'Xenova/owlvit-base-patch32',
    description: 'Detect arbitrary text-queried objects (OWL-ViT).',
  },
  'document-question-answering': {
    type: 'multimodal',
    defaultModel: 'Xenova/donut-base-finetuned-docvqa',
    description: 'Answer questions about a document image (Donut).',
  },
  'image-to-image': {
    type: 'image',
    defaultModel: 'Xenova/swin2SR-classical-sr-x2-64',
    description: 'Image super-resolution / enhancement.',
  },
  'depth-estimation': {
    type: 'image',
    defaultModel: 'onnx-community/depth-anything-v2-small',
    description: 'Monocular depth estimation. Returns per-pixel depth map.',
  },
  'image-feature-extraction': {
    type: 'image',
    defaultModel: 'onnx-community/dinov3-vits16-pretrain-lvd1689m-ONNX',
    defaultDtype: 'fp32',
    description: 'Image embeddings for visual search / retrieval.',
  },
}

export const TASK_ALIASES: Record<string, string> = {
  'sentiment-analysis': 'text-classification',
  ner: 'token-classification',
  asr: 'automatic-speech-recognition',
  'text-to-speech': 'text-to-audio',
  embeddings: 'feature-extraction',
}

function resolveTask(task: string): string {
  return TASK_ALIASES[task] ?? task.split('_', 1)[0]
}

/* ──────────────────────────────────────────────────────────────────────────
 * Lazy loader + pipeline cache with proper disposal
 * ──────────────────────────────────────────────────────────────────────── */

let _transformers: any = null
async function getTransformers() {
  if (!_transformers) _transformers = await import('@huggingface/transformers')
  return _transformers
}

type DtypeSpec = string | Record<string, string>

type CachedPipeline = {
  pipe: any
  task: string
  model: string
  device: string
  dtype?: DtypeSpec
  loadedAt: number
}

const PIPELINE_CACHE = new Map<string, CachedPipeline>()

function cacheKey(task: string, model: string, device: string, dtype?: DtypeSpec): string {
  const d = typeof dtype === 'object' ? JSON.stringify(dtype) : (dtype ?? 'default')
  return `${task}::${model}::${device}::${d}`
}

/**
 * Firefox + ONNX Runtime Web has a known incompatibility with q8/q4 quantized
 * weights that use the MatMulNBits-with-merged-scale pattern. The error is:
 *
 *   "Missing required scale: model.<path>.weight_merged_0_scale for node:
 *    model.<path>.weight_transposed_DequantizeLinear"
 *
 * Chrome/Safari's bundled ORT tolerates this; Firefox's doesn't. It affects:
 *  - Whisper (decoder.embed_tokens)    - automatic-speech-recognition
 *  - BART/T5/NLLB (shared)             - summarization, translation
 *  - vit-gpt2 (shared)                 - image-to-text
 *  - CLIP (shared)                     - zero-shot-image-classification
 *  - BERT-NER (shared)                 - token-classification
 *  - Depth-Anything etc.               - depth-estimation
 *
 * Whenever possible, fall back to fp32 for all sessions. This doubles the
 * memory footprint but guarantees correctness across browsers.
 *
 * Known session keys across all model types (from upstream session_config.js):
 *   model, encoder_model, decoder_model_merged, embed_tokens, vision_encoder,
 *   audio_encoder, encodec_decode, prompt_encoder_mask_decoder,
 *   prepare_inputs_embeds
 */
const ALL_SESSION_KEYS = [
  'model',
  'encoder_model',
  'decoder_model_merged',
  'decoder_model',
  'embed_tokens',
  'vision_encoder',
  'audio_encoder',
  'encodec_decode',
  'prompt_encoder_mask_decoder',
  'prepare_inputs_embeds',
  'text_encoder',
]

/** Produce a dtype object forcing fp32 on every session key — safest fallback. */
function allSessionsFp32(): Record<string, string> {
  const out: Record<string, string> = {}
  for (const k of ALL_SESSION_KEYS) out[k] = 'fp32'
  return out
}

/**
 * Detect if Firefox+ORT hits the missing-scale bug for this task/model.
 * Returns true for encoder-decoder / seq2seq / vision-seq / image-text models
 * where a shared embedding table triggers the bug. Decoder-only LLMs (Qwen, Llama)
 * don't use the shared-weight-merge pattern so they're safe.
 */
function needsFp32Fallback(resolvedTask: string, model: string): boolean {
  const m = model.toLowerCase()
  // Decoder-only LLMs: NO shared weight merge — safe to quantize
  const isDecoderOnlyLLM =
    resolvedTask === 'text-generation' ||
    /qwen|llama|phi|gemma|smollm|tinyllama|stablelm/i.test(m)
  if (isDecoderOnlyLLM) return false
  // Tasks that use seq2seq / encoder-decoder / shared-weight architectures:
  const seq2seqTasks = [
    'automatic-speech-recognition',  // Whisper
    'summarization',                  // BART, T5, distilbart
    'translation',                    // T5, NLLB, Marian
    'text2text-generation',           // FLAN-T5, T5
    'image-to-text',                  // vit-gpt2, Florence-2, BLIP
    'document-question-answering',    // Donut
    'zero-shot-image-classification', // CLIP
    'token-classification',           // BERT-NER (shared embeddings)
    'image-classification',           // ViT (shared)
    'image-segmentation',             // DETR, SegFormer
    'object-detection',               // DETR, OWL-ViT
    'zero-shot-object-detection',
    'depth-estimation',
    'image-feature-extraction',
  ]
  return seq2seqTasks.includes(resolvedTask)
}

/**
 * Pick a safe dtype for this (task, model, device) combination.
 * Respects user override. Otherwise avoids the Firefox quant bug.
 */
function pickSafeDtype(resolvedTask: string, model: string, device: string, userDtype?: DtypeSpec): DtypeSpec | undefined {
  if (userDtype !== undefined) return userDtype
  const info = TASK_REGISTRY[resolvedTask]
  if (needsFp32Fallback(resolvedTask, model)) {
    return allSessionsFp32()
  }
  return info?.defaultDtype
}

async function getPipeline(opts: {
  task: string
  model?: string
  device?: string
  dtype?: DtypeSpec
  progressCallback?: (p: any) => void
}): Promise<CachedPipeline> {
  const resolvedTask = resolveTask(opts.task)
  const info = TASK_REGISTRY[resolvedTask]
  const model = opts.model ?? info?.defaultModel
  if (!model) throw new Error(`Unknown task: ${opts.task}. See transformers_list_tasks.`)
  const device = opts.device ?? 'wasm'
  const dtype = pickSafeDtype(resolvedTask, model, device, opts.dtype)

  const key = cacheKey(resolvedTask, model, device, dtype)
  const hit = PIPELINE_CACHE.get(key)
  if (hit) return hit

  const { pipeline } = await getTransformers()
  const pipelineOpts: any = { device }
  if (dtype) pipelineOpts.dtype = dtype
  if (opts.progressCallback) pipelineOpts.progress_callback = opts.progressCallback

  let pipe: any
  let effectiveDtype: DtypeSpec | undefined = dtype
  try {
    pipe = await pipeline(resolvedTask, model, pipelineOpts)
  } catch (err: unknown) {
    const msg = (err as Error).message || String(err)
    // Auto-retry once with all-sessions fp32 if it's the Firefox ORT quant bug.
    // Don't retry if the user explicitly passed a dtype (respect their choice)
    // or if we already tried fp32 everywhere (would be a different error).
    const isQuantBug =
      msg.includes('weight_merged_0_scale') ||
      msg.includes('Missing required scale') ||
      msg.includes('TransposeDQWeightsForMatMulNBits')
    const triedAllFp32 = typeof dtype === 'object' && Object.values(dtype).every(v => v === 'fp32')
    if (isQuantBug && opts.dtype === undefined && !triedAllFp32) {
      try {
        const retryDtype = allSessionsFp32()
        const retryOpts = { ...pipelineOpts, dtype: retryDtype }
        pipe = await pipeline(resolvedTask, model, retryOpts)
        effectiveDtype = retryDtype
      } catch (retryErr: unknown) {
        const rmsg = (retryErr as Error).message || String(retryErr)
        throw new Error(
          `${rmsg}\n\n→ First attempt and fp32-fallback retry both failed. ` +
          `Original error: ${msg}\n` +
          `Try a different model id — this ONNX model may not have fp32 weights published.`,
        )
      }
    } else if (isQuantBug) {
      throw new Error(
        `${msg}\n\n→ HINT: ONNX quantization mismatch (Firefox+ORT). ` +
        `Either remove the \`dtype\` override, or use a different model id with fp32 weights available.`,
      )
    } else if (msg.includes('WebGPU') || msg.includes('webgpu')) {
      throw new Error(`${msg}\n\n→ HINT: WebGPU failed. Retry with device: "wasm".`)
    } else {
      throw err
    }
  }
  const entry: CachedPipeline = {
    pipe, task: resolvedTask, model, device, dtype: effectiveDtype, loadedAt: Date.now(),
  }
  PIPELINE_CACHE.set(key, entry)
  return entry
}

/* ──────────────────────────────────────────────────────────────────────────
 * Tools
 * ──────────────────────────────────────────────────────────────────────── */

/** List every task with default model + description — lets agent discover capabilities. */
export const transformersListTasksTool = tool({
  name: 'transformers_list_tasks',
  description:
    'List ALL supported Transformers.js tasks (25 pipeline types + 5 aliases) with their default models, ' +
    'type (text/audio/image/multimodal), and what each does. Use this to pick the right task before transformers_run.',
  inputSchema: z.object({
    filter: z.enum(['text', 'audio', 'image', 'multimodal', 'all']).optional(),
  }),
  callback: (input) => {
    const filter = input.filter || 'all'
    const tasks: Record<string, any> = {}
    for (const [task, info] of Object.entries(TASK_REGISTRY)) {
      if (filter !== 'all' && info.type !== filter) continue
      tasks[task] = info
    }
    return JSON.stringify({
      status: 'success',
      count: Object.keys(tasks).length,
      tasks,
      aliases: TASK_ALIASES,
      dtypes: ['fp32', 'fp16', 'q8', 'int8', 'uint8', 'q4', 'q4f16', 'bnb4'],
      devices: ['wasm', 'webgpu'],
    })
  },
})

/**
 * Universal pipeline runner — the one tool that covers all 25 tasks.
 */
export const transformersRunTool = tool({
  name: 'transformers_run',
  description:
    'Run ANY 🤗 Transformers.js pipeline on-device. Covers all 25 task types: ' +
    'text-classification, token-classification, question-answering, fill-mask, summarization, translation, ' +
    'text2text-generation, text-generation, zero-shot-classification, audio-classification, ' +
    'zero-shot-audio-classification, automatic-speech-recognition, text-to-audio, image-to-text, ' +
    'image-classification, image-segmentation, background-removal, zero-shot-image-classification, ' +
    'object-detection, zero-shot-object-detection, document-question-answering, image-to-image, ' +
    'depth-estimation, feature-extraction, image-feature-extraction. ' +
    'Call transformers_list_tasks first to see defaults. Models cache after first download.',
  inputSchema: z.object({
    task: z.string().describe('Pipeline task name (see transformers_list_tasks). Aliases accepted.'),
    input: z.any().describe(
      'Input matching the task type: string for text, URL/dataURL/blobURL for image/audio, ' +
      '{question,context} for QA, {image,question} for document-QA, Message[] for text-generation chat.',
    ),
    model: z.string().optional().describe('HF model id. Omit to use task default.'),
    options: z.record(z.string(), z.any()).optional().describe('Pipeline call options (max_new_tokens, top_k, candidate_labels, etc.).'),
    device: z.enum(['wasm', 'webgpu']).optional(),
    dtype: z.union([z.enum(['fp32', 'fp16', 'q8', 'int8', 'uint8', 'q4', 'q4f16', 'bnb4']), z.record(z.string(), z.string())]).optional().describe('Quantization. q4 for LLMs. For encoder-decoder models (Whisper, T5), pass an object: {encoder_model: "q8", decoder_model_merged: "fp32"}.'),
  }),
  callback: async (input) => {
    try {
      const entry = await getPipeline({
        task: input.task, model: input.model, device: input.device, dtype: input.dtype,
      })

      // Handle pipelines that take candidate_labels as a 2nd positional argument:
      //   zero-shot-classification.(texts, candidate_labels, options)
      //   zero-shot-object-detection.(image, candidate_labels, options)
      //   zero-shot-image-classification.(image, candidate_labels, options)
      //   zero-shot-audio-classification.(audio, candidate_labels, options)
      // NOTE: passing {candidate_labels: [...]} in options is a common mistake;
      // we silently rewrite it to positional for these tasks.
      const POSITIONAL_LABEL_TASKS = new Set([
        'zero-shot-classification',
        'zero-shot-object-detection',
        'zero-shot-image-classification',
        'zero-shot-audio-classification',
      ])
      let result: any
      if (POSITIONAL_LABEL_TASKS.has(entry.task)) {
        const opts = { ...(input.options || {}) } as any
        const labels = opts.candidate_labels
        delete opts.candidate_labels
        if (!labels) {
          return JSON.stringify({
            status: 'error',
            task: entry.task,
            error: `${entry.task} requires options.candidate_labels: [ ... ] (array of class strings).`,
          })
        }
        result = await entry.pipe(input.input, labels, opts)
      } else {
        result = await entry.pipe(input.input, input.options || {})
      }

      // Audio output (TTS) is not JSON-serializable — summarize instead.
      if (result && result.audio instanceof Float32Array) {
        return JSON.stringify({
          status: 'success',
          task: entry.task,
          model: entry.model,
          audio_samples: result.audio.length,
          sampling_rate: result.sampling_rate,
          duration_sec: result.audio.length / result.sampling_rate,
          hint: 'Use transformers_tts to synthesize AND play audio.',
        })
      }
      // Feature extraction returns a Tensor — convert to array for JSON
      if (result && typeof result === 'object' && 'dims' in result && 'data' in result) {
        return JSON.stringify({
          status: 'success',
          task: entry.task,
          model: entry.model,
          dims: (result as any).dims,
          data: result.tolist ? result.tolist() : Array.from((result as any).data),
        })
      }
      return JSON.stringify({ status: 'success', task: entry.task, model: entry.model, result })
    } catch (err: unknown) {
      return JSON.stringify({ status: 'error', task: input.task, error: (err as Error).message })
    }
  },
})

/**
 * Dedicated chat tool — takes Message[], uses chat template, streams tokens.
 */
export const transformersChatTool = tool({
  name: 'transformers_chat',
  description:
    'Chat with an on-device LLM via Transformers.js. Default: onnx-community/Qwen3-0.6B-ONNX (q4 quantized). ' +
    'Accepts a Message[] ([{role:"system"|"user"|"assistant", content:"..."}]). ' +
    'Applies the model\'s chat template automatically. Fully local, no API call.',
  inputSchema: z.object({
    messages: z.array(
      z.object({
        role: z.enum(['system', 'user', 'assistant']),
        content: z.string(),
      }),
    ),
    model: z.string().optional().describe('Default: onnx-community/Qwen3-0.6B-ONNX'),
    max_new_tokens: z.number().optional().describe('Default 256'),
    temperature: z.number().optional(),
    top_p: z.number().optional(),
    top_k: z.number().optional(),
    do_sample: z.boolean().optional(),
    dtype: z.union([z.enum(['fp32', 'fp16', 'q8', 'int8', 'uint8', 'q4', 'q4f16', 'bnb4']), z.record(z.string(), z.string())]).optional().describe('Default: q4. For non-LLMs pass fp16 or fp32.'),
    device: z.enum(['wasm', 'webgpu']).optional(),
  }),
  callback: async (input) => {
    try {
      const entry = await getPipeline({
        task: 'text-generation',
        model: input.model,
        device: input.device,
        dtype: input.dtype ?? 'q4',
      })
      const opts: any = { max_new_tokens: input.max_new_tokens ?? 256 }
      if (input.temperature !== undefined) opts.temperature = input.temperature
      if (input.top_p !== undefined) opts.top_p = input.top_p
      if (input.top_k !== undefined) opts.top_k = input.top_k
      if (input.do_sample !== undefined) opts.do_sample = input.do_sample

      const out = await entry.pipe(input.messages, opts)
      // out is TextGenerationChatOutput[]: [{ generated_text: Message[] }]
      const first = Array.isArray(out) ? out[0] : out
      const generated = first?.generated_text
      let reply: string | null = null
      if (Array.isArray(generated)) {
        const last = generated[generated.length - 1]
        reply = typeof last === 'string' ? last : last?.content
      } else if (typeof generated === 'string') {
        reply = generated
      }
      return JSON.stringify({
        status: 'success',
        model: entry.model,
        reply,
        full_output: first,
      })
    } catch (err: unknown) {
      return JSON.stringify({ status: 'error', error: (err as Error).message })
    }
  },
})

/** Text-to-speech that ALSO plays via Web Audio. */
export const transformersTtsTool = tool({
  name: 'transformers_tts',
  description:
    'On-device text-to-speech. Default: onnx-community/Supertonic-TTS-ONNX. ' +
    'Synthesizes AND plays audio via Web Audio API. Also works with SpeechT5 (set speaker_embeddings URL).',
  inputSchema: z.object({
    text: z.string(),
    model: z.string().optional().describe('Default: Supertonic. For SpeechT5: Xenova/speecht5_tts.'),
    speaker_embeddings: z.string().optional().describe('Required for SpeechT5 (URL to .bin).'),
    autoplay: z.boolean().optional().describe('Default true. false returns audio metadata only.'),
    device: z.enum(['wasm', 'webgpu']).optional(),
    dtype: z.union([z.enum(['fp32', 'fp16', 'q8', 'int8', 'uint8', 'q4', 'q4f16', 'bnb4']), z.record(z.string(), z.string())]).optional(),
  }),
  callback: async (input) => {
    try {
      const entry = await getPipeline({
        task: 'text-to-audio',
        model: input.model,
        device: input.device,
        dtype: input.dtype,
      })
      const opts: any = {}
      if (input.speaker_embeddings) {
        opts.speaker_embeddings = input.speaker_embeddings
      } else if ((input.model || '').toLowerCase().includes('speecht5')) {
        opts.speaker_embeddings =
          'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin'
      }
      const out = await entry.pipe(input.text, opts)

      if (input.autoplay !== false) {
        const AC = (window as any).AudioContext || (window as any).webkitAudioContext
        const ctx = new AC()
        const buf = ctx.createBuffer(1, out.audio.length, out.sampling_rate)
        buf.getChannelData(0).set(out.audio)
        const src = ctx.createBufferSource()
        src.buffer = buf
        src.connect(ctx.destination)
        src.start()
      }
      return JSON.stringify({
        status: input.autoplay !== false ? 'playing' : 'synthesized',
        model: entry.model,
        samples: out.audio.length,
        sampling_rate: out.sampling_rate,
        duration_sec: out.audio.length / out.sampling_rate,
      })
    } catch (err: unknown) {
      return JSON.stringify({ status: 'error', error: (err as Error).message })
    }
  },
})

/** Whisper STT — audio URL / blob URL / data URL. */
export const transformersSttTool = tool({
  name: 'transformers_stt',
  description:
    'On-device speech-to-text via Whisper (default: Xenova/whisper-tiny.en). ' +
    'Pass an audio URL (http/blob/data). Supports chunked long-form transcription with return_timestamps.',
  inputSchema: z.object({
    audio: z.string().describe('Audio URL (http/blob/data).'),
    model: z.string().optional().describe('Default: Xenova/whisper-tiny.en. Others: whisper-base, whisper-small.en, whisper-large-v3-turbo.'),
    language: z.string().optional().describe('ISO-639-1 code, e.g. "en", "tr", "fr". Multilingual Whisper only.'),
    task: z.enum(['transcribe', 'translate']).optional(),
    return_timestamps: z.union([z.boolean(), z.literal('word')]).optional(),
    chunk_length_s: z.number().optional().describe('For long-form audio (e.g. 30).'),
    stride_length_s: z.number().optional(),
    device: z.enum(['wasm', 'webgpu']).optional(),
    dtype: z.union([z.enum(['fp32', 'fp16', 'q8', 'int8', 'uint8', 'q4', 'q4f16', 'bnb4']), z.record(z.string(), z.string())]).optional().describe('Auto-selects a Firefox-compatible dtype if omitted. Override with e.g. {encoder_model: "fp32", decoder_model_merged: "fp32"} if you hit ONNX errors.'),
  }),
  callback: async (input) => {
    try {
      const entry = await getPipeline({
        task: 'automatic-speech-recognition',
        model: input.model,
        device: input.device,
        dtype: input.dtype,
      })
      const opts: any = {}
      if (input.language) opts.language = input.language
      if (input.task) opts.task = input.task
      if (input.return_timestamps !== undefined) opts.return_timestamps = input.return_timestamps
      if (input.chunk_length_s !== undefined) opts.chunk_length_s = input.chunk_length_s
      if (input.stride_length_s !== undefined) opts.stride_length_s = input.stride_length_s
      const result = await entry.pipe(input.audio, opts)
      return JSON.stringify({ status: 'success', model: entry.model, ...result })
    } catch (err: unknown) {
      return JSON.stringify({ status: 'error', error: (err as Error).message })
    }
  },
})

/** Text / image embeddings. */
export const transformersEmbedTool = tool({
  name: 'transformers_embed',
  description:
    'Generate embeddings on-device. For text: Xenova/all-MiniLM-L6-v2 (default). ' +
    'For images: pass task="image-feature-extraction" with default DINOv3. Returns vector(s) + dims.',
  inputSchema: z.object({
    input: z.union([z.string(), z.array(z.string())]).describe('Text string(s) or image URL(s)'),
    task: z.enum(['feature-extraction', 'image-feature-extraction']).optional().describe('Default: feature-extraction (text)'),
    model: z.string().optional(),
    pooling: z.enum(['none', 'mean', 'cls']).optional(),
    normalize: z.boolean().optional(),
    device: z.enum(['wasm', 'webgpu']).optional(),
    dtype: z.union([z.enum(['fp32', 'fp16', 'q8']), z.record(z.string(), z.string())]).optional(),
  }),
  callback: async (input) => {
    try {
      const entry = await getPipeline({
        task: input.task ?? 'feature-extraction',
        model: input.model,
        device: input.device,
        dtype: input.dtype,
      })
      const opts: any = {
        pooling: input.pooling ?? 'mean',
        normalize: input.normalize ?? true,
      }
      const out = await entry.pipe(input.input, opts)
      const arr = out.tolist ? out.tolist() : Array.from(out.data)
      return JSON.stringify({
        status: 'success',
        model: entry.model,
        dims: out.dims,
        count: Array.isArray(input.input) ? input.input.length : 1,
        embeddings: arr,
      })
    } catch (err: unknown) {
      return JSON.stringify({ status: 'error', error: (err as Error).message })
    }
  },
})

/** Configure global env (cache / remote / backends). */
export const transformersConfigureTool = tool({
  name: 'transformers_configure',
  description:
    'Configure global Transformers.js env: remote host, cache, allowed model sources, backend settings. ' +
    'Affects ALL subsequent pipeline loads.',
  inputSchema: z.object({
    allowRemoteModels: z.boolean().optional(),
    allowLocalModels: z.boolean().optional(),
    remoteHost: z.string().optional().describe('Default: https://huggingface.co/'),
    remotePathTemplate: z.string().optional(),
    useBrowserCache: z.boolean().optional(),
  }),
  callback: async (input) => {
    try {
      const { env } = await getTransformers()
      const before: any = {
        allowRemoteModels: env.allowRemoteModels,
        allowLocalModels: env.allowLocalModels,
        remoteHost: env.remoteHost,
        remotePathTemplate: env.remotePathTemplate,
        useBrowserCache: env.useBrowserCache,
      }
      if (input.allowRemoteModels !== undefined) env.allowRemoteModels = input.allowRemoteModels
      if (input.allowLocalModels !== undefined) env.allowLocalModels = input.allowLocalModels
      if (input.remoteHost !== undefined) env.remoteHost = input.remoteHost
      if (input.remotePathTemplate !== undefined) env.remotePathTemplate = input.remotePathTemplate
      if (input.useBrowserCache !== undefined) env.useBrowserCache = input.useBrowserCache
      return JSON.stringify({
        status: 'success',
        before,
        after: {
          allowRemoteModels: env.allowRemoteModels,
          allowLocalModels: env.allowLocalModels,
          remoteHost: env.remoteHost,
          remotePathTemplate: env.remotePathTemplate,
          useBrowserCache: env.useBrowserCache,
        },
      })
    } catch (err: unknown) {
      return JSON.stringify({ status: 'error', error: (err as Error).message })
    }
  },
})

/** List loaded pipelines with metadata. */
export const transformersStatusTool = tool({
  name: 'transformers_status',
  description: 'List all currently loaded Transformers.js pipelines with task, model, device, dtype, age.',
  inputSchema: z.object({}),
  callback: async () => {
    const now = Date.now()
    const loaded = Array.from(PIPELINE_CACHE.entries()).map(([key, e]) => ({
      key, task: e.task, model: e.model, device: e.device, dtype: e.dtype ?? 'default',
      loaded_ago_sec: Math.round((now - e.loadedAt) / 1000),
    }))
    let version: string | undefined
    if (_transformers) {
      try { version = _transformers.env?.version } catch {}
    }
    return JSON.stringify({
      status: 'success',
      transformers_loaded: _transformers !== null,
      transformers_version: version,
      pipeline_count: loaded.length,
      pipelines: loaded,
    })
  },
})

/** Proper unload — calls pipe.dispose() to free WASM memory (not just Map.delete). */
export const transformersUnloadTool = tool({
  name: 'transformers_unload',
  description:
    'Unload a cached pipeline AND dispose its underlying model to free WASM/WebGPU memory. ' +
    'Pass specific (task,model,device,dtype) or just task to unload all matching.',
  inputSchema: z.object({
    task: z.string().optional().describe('Omit to unload ALL pipelines'),
    model: z.string().optional(),
    device: z.enum(['wasm', 'webgpu']).optional(),
    dtype: z.union([z.string(), z.record(z.string(), z.string())]).optional(),
    all: z.boolean().optional().describe('Unload every pipeline'),
  }),
  callback: async (input) => {
    try {
      const disposed: string[] = []
      const errors: string[] = []
      const matches = (e: CachedPipeline) =>
        (input.all === true) ||
        (!input.task || e.task === resolveTask(input.task)) &&
        (!input.model || e.model === input.model) &&
        (!input.device || e.device === input.device) &&
        (!input.dtype || e.dtype === input.dtype)

      for (const [key, entry] of Array.from(PIPELINE_CACHE.entries())) {
        if (!matches(entry)) continue
        try {
          if (entry.pipe?.dispose) await entry.pipe.dispose()
          PIPELINE_CACHE.delete(key)
          disposed.push(key)
        } catch (e: unknown) {
          errors.push(`${key}: ${(e as Error).message}`)
        }
      }
      return JSON.stringify({
        status: 'success',
        disposed_count: disposed.length,
        disposed,
        errors: errors.length ? errors : undefined,
        remaining: PIPELINE_CACHE.size,
      })
    } catch (err: unknown) {
      return JSON.stringify({ status: 'error', error: (err as Error).message })
    }
  },
})

/** Quick device capability probe — useful before choosing webgpu. */
export const transformersDeviceInfoTool = tool({
  name: 'transformers_device_info',
  description:
    'Probe runtime capabilities: WebGPU support, WASM threads, SharedArrayBuffer, cross-origin-isolation. ' +
    'Use this before choosing device: "webgpu".',
  inputSchema: z.object({}),
  callback: async () => {
    try {
      const nav = navigator as any
      let webgpu = false
      let adapterInfo: any = null
      if (nav.gpu) {
        try {
          const adapter = await nav.gpu.requestAdapter()
          if (adapter) {
            webgpu = true
            adapterInfo = await adapter.requestAdapterInfo?.().catch(() => null)
          }
        } catch {}
      }
      return JSON.stringify({
        status: 'success',
        webgpu_supported: webgpu,
        adapter: adapterInfo,
        shared_array_buffer: typeof SharedArrayBuffer !== 'undefined',
        cross_origin_isolated: (window as any).crossOriginIsolated ?? false,
        hardware_concurrency: navigator.hardwareConcurrency,
        user_agent: navigator.userAgent,
        recommendation: webgpu
          ? 'WebGPU available — use device: "webgpu" for ~2-10x speedup on supported models (most LLMs, embeddings, vision models).'
          : 'WebGPU not available — fallback to device: "wasm". Works everywhere but slower.',
      })
    } catch (err: unknown) {
      return JSON.stringify({ status: 'error', error: (err as Error).message })
    }
  },
})

export const TRANSFORMERS_TOOLS = [
  transformersListTasksTool,
  transformersDeviceInfoTool,
  transformersRunTool,
  transformersChatTool,
  transformersTtsTool,
  transformersSttTool,
  transformersEmbedTool,
  transformersConfigureTool,
  transformersStatusTool,
  transformersUnloadTool,
]