// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
//
// SPDX-License-Identifier: Apache-2.0
import { type AudioBuffer, mergeFrames, normalizeLanguage, stt } from '@livekit/agents';
import type { AudioFrame } from '@livekit/rtc-node';
import { OpenAI } from 'openai';
import type { GroqAudioModels, WhisperModels } from './models.js';

export interface STTOptions {
  apiKey?: string;
  language: string;
  prompt?: string;
  detectLanguage: boolean;
  model: WhisperModels | string;
  baseURL?: string;
  client?: OpenAI;
}

const defaultSTTOptions: STTOptions = {
  apiKey: process.env.OPENAI_API_KEY,
  language: 'en',
  detectLanguage: false,
  model: 'whisper-1',
};

export class STT extends stt.STT {
  #opts: STTOptions;
  #client: OpenAI;
  label = 'openai.STT';

  get model(): string {
    return this.#opts.model;
  }

  get provider(): string {
    try {
      const url = new URL(this.#client.baseURL);
      return url.host;
    } catch {
      return 'api.openai.com';
    }
  }

  /**
   * Create a new instance of OpenAI STT.
   *
   * @remarks
   * `apiKey` must be set to your OpenAI API key, either using the argument or by setting the
   * `OPENAI_API_KEY` environment variable.
   */
  constructor(opts: Partial<STTOptions> = defaultSTTOptions) {
    super({ streaming: false, interimResults: false, alignedTranscript: false });

    this.#opts = {
      ...defaultSTTOptions,
      ...opts,
      language: normalizeLanguage(opts.language ?? defaultSTTOptions.language),
    };
    if (this.#opts.apiKey === undefined) {
      throw new Error('OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY');
    }

    this.#client =
      this.#opts.client ||
      new OpenAI({
        baseURL: this.#opts.baseURL,
        apiKey: this.#opts.apiKey,
      });
  }

  /**
   * Create a new instance of Groq STT.
   *
   * @remarks
   * `apiKey` must be set to your Groq API key, either using the argument or by setting the
   * `GROQ_API_KEY` environment variable.
   */
  static withGroq(
    opts: Partial<{
      model: string | GroqAudioModels;
      apiKey?: string;
      baseURL?: string;
      client: OpenAI;
      language: string;
      detectLanguage: boolean;
    }> = {},
  ): STT {
    opts.apiKey = opts.apiKey || process.env.GROQ_API_KEY;
    if (opts.apiKey === undefined) {
      throw new Error('Groq API key is required, whether as an argument or as $GROQ_API_KEY');
    }

    return new STT({
      model: 'whisper-large-v3-turbo',
      baseURL: 'https://api.groq.com/openai/v1',
      ...opts,
    });
  }

  /**
   * Create a new instance of OVHcloud AI Endpoints STT.
   *
   * @remarks
   * `apiKey` must be set to your OVHcloud AI Endpoints API key, either using the argument or by setting the
   * `OVHCLOUD_API_KEY` environment variable.
   */
  static withOVHcloud(
    opts: Partial<{
      model: string;
      apiKey?: string;
      baseURL?: string;
      client: OpenAI;
      language: string;
      detectLanguage: boolean;
    }> = {},
  ): STT {
    opts.apiKey = opts.apiKey || process.env.OVHCLOUD_API_KEY;
    if (opts.apiKey === undefined) {
      throw new Error(
        'OVHcloud AI Endpoints API key is required, whether as an argument or as $OVHCLOUD_API_KEY',
      );
    }

    return new STT({
      model: 'whisper-large-v3-turbo',
      baseURL: 'https://oai.endpoints.kepler.ai.cloud.ovh.net/v1',
      ...opts,
    });
  }

  #sanitizeOptions(language?: string): STTOptions {
    if (language) {
      return { ...this.#opts, language: normalizeLanguage(language) };
    } else {
      return this.#opts;
    }
  }

  #createWav(frame: AudioFrame): Buffer {
    const bitsPerSample = 16;
    const byteRate = (frame.sampleRate * frame.channels * bitsPerSample) / 8;
    const blockAlign = (frame.channels * bitsPerSample) / 8;

    const header = Buffer.alloc(44);
    header.write('RIFF', 0);
    header.writeUInt32LE(36 + frame.data.byteLength, 4);
    header.write('WAVE', 8);
    header.write('fmt ', 12);
    header.writeUInt32LE(16, 16);
    header.writeUInt16LE(1, 20);
    header.writeUInt16LE(frame.channels, 22);
    header.writeUInt32LE(frame.sampleRate, 24);
    header.writeUInt32LE(byteRate, 28);
    header.writeUInt16LE(blockAlign, 32);
    header.writeUInt16LE(16, 34);
    header.write('data', 36);
    header.writeUInt32LE(frame.data.byteLength, 40);
    return Buffer.concat([header, Buffer.from(frame.data.buffer)]);
  }

  async _recognize(buffer: AudioBuffer, abortSignal?: AbortSignal): Promise<stt.SpeechEvent> {
    const config = this.#sanitizeOptions();
    buffer = mergeFrames(buffer);
    const wavBuffer = this.#createWav(buffer);
    const file = new File([new Uint8Array(wavBuffer)], 'audio.wav', { type: 'audio/wav' });

    const resp = await this.#client.audio.transcriptions.create(
      {
        file,
        model: this.#opts.model,
        language: config.language,
        prompt: config.prompt,
        response_format: 'json',
      },
      {
        signal: abortSignal,
      },
    );

    return {
      type: stt.SpeechEventType.FINAL_TRANSCRIPT,
      alternatives: [
        {
          text: resp.text || '',
          language: normalizeLanguage(config.language || ''),
          startTime: 0,
          endTime: 0,
          confidence: 0,
        },
      ],
    };
  }

  /** This method throws an error; streaming is unsupported on OpenAI STT. */
  stream(): stt.SpeechStream {
    throw new Error('Streaming is not supported on OpenAI STT');
  }
}