{"version":3,"sources":["../../src/beta/gemini_tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type * as types from '@google/genai';\nimport { GoogleGenAI } from '@google/genai';\nimport {\n  type APIConnectOptions,\n  APIConnectionError,\n  APIStatusError,\n  AudioByteStream,\n  isAPIError,\n  shortuuid,\n  tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport type { GeminiTTSModels } from '../models.js';\n\nexport type { GeminiTTSModels } from '../models.js';\n\nexport type GeminiVoices =\n  | 'Zephyr'\n  | 'Puck'\n  | 'Charon'\n  | 'Kore'\n  | 'Fenrir'\n  | 'Leda'\n  | 'Orus'\n  | 'Aoede'\n  | 'Callirrhoe'\n  | 'Autonoe'\n  | 'Enceladus'\n  | 'Iapetus'\n  | 'Umbriel'\n  | 'Algieba'\n  | 'Despina'\n  | 'Erinome'\n  | 'Algenib'\n  | 'Rasalgethi'\n  | 'Laomedeia'\n  | 'Achernar'\n  | 'Alnilam'\n  | 'Schedar'\n  | 'Gacrux'\n  | 'Pulcherrima'\n  | 'Achird'\n  | 'Zubenelgenubi'\n  | 'Vindemiatrix'\n  | 'Sadachbia'\n  | 'Sadaltager'\n  | 'Sulafat';\n\nconst DEFAULT_MODEL: GeminiTTSModels = 'gemini-2.5-flash-lite-preview-tts';\nconst DEFAULT_VOICE: GeminiVoices = 'Kore';\nconst DEFAULT_SAMPLE_RATE = 24000; // not configurable\nconst NUM_CHANNELS = 1;\nconst DEFAULT_INSTRUCTIONS = \"Say the text with a proper tone, don't omit or add any words\";\n\nexport interface TTSOptions {\n  model: GeminiTTSModels | string;\n  voiceName: GeminiVoices | string;\n  vertexai: boolean;\n  project?: string;\n  location?: string;\n  instructions?: string;\n  customPronunciations?: CustomPronunciations;\n}\n\nexport interface CustomPronunciationParams {\n  phrase: string;\n  pronunciation: string;\n  phoneticEncoding?: string;\n}\n\nexport interface CustomPronunciations {\n  pronunciations: CustomPronunciationParams[];\n}\n\nexport class TTS extends tts.TTS {\n  #opts: TTSOptions;\n  #client: GoogleGenAI;\n  label = 'google.gemini.TTS';\n\n  /**\n   * Create a new instance of Gemini TTS.\n   *\n   * Environment Requirements:\n   * - For VertexAI: Set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to the path of the service account key file.\n   * - For Google Gemini API: Set the `apiKey` argument or the `GOOGLE_API_KEY` environment variable.\n   *\n   * @param opts - Configuration options for Gemini TTS\n   */\n  constructor({\n    model = DEFAULT_MODEL,\n    voiceName = DEFAULT_VOICE,\n    apiKey,\n    vertexai,\n    project,\n    location,\n    instructions,\n    customPronunciations,\n  }: Partial<TTSOptions & { apiKey: string }> = {}) {\n    super(DEFAULT_SAMPLE_RATE, NUM_CHANNELS, { streaming: false });\n\n    const gcpProject: string | undefined = project || process.env.GOOGLE_CLOUD_PROJECT;\n    const gcpLocation: string | undefined =\n      location || process.env.GOOGLE_CLOUD_LOCATION || 'us-central1';\n    const useVertexai = vertexai ?? process.env.GOOGLE_GENAI_USE_VERTEXAI === 'true';\n    const geminiApiKey = apiKey || process.env.GOOGLE_API_KEY;\n\n    let finalProject: string | undefined = gcpProject;\n    let finalLocation: string | undefined = gcpLocation;\n    let finalApiKey: string | undefined = geminiApiKey;\n\n    if (useVertexai) {\n      if (!finalProject) {\n        throw new APIConnectionError({\n          message:\n            'Project ID is required for Vertex AI. Set via project option or GOOGLE_CLOUD_PROJECT environment variable',\n        });\n      }\n      finalApiKey = undefined;\n    } else {\n      finalProject = undefined;\n      finalLocation = undefined;\n      if (!finalApiKey) {\n        throw new APIConnectionError({\n          message:\n            'API key is required for Google API either via apiKey or GOOGLE_API_KEY environment variable',\n        });\n      }\n    }\n\n    this.#opts = {\n      model,\n      voiceName,\n      vertexai: useVertexai,\n      project: finalProject,\n      location: finalLocation,\n      instructions: instructions ?? DEFAULT_INSTRUCTIONS,\n      customPronunciations,\n    };\n\n    const clientOptions: types.GoogleGenAIOptions = useVertexai\n      ? {\n          vertexai: true,\n          project: finalProject,\n          location: finalLocation,\n        }\n      : {\n          apiKey: finalApiKey,\n        };\n\n    this.#client = new GoogleGenAI(clientOptions);\n  }\n\n  synthesize(\n    text: string,\n    connOptions?: APIConnectOptions,\n    abortSignal?: AbortSignal,\n  ): ChunkedStream {\n    return new ChunkedStream(text, this, connOptions, abortSignal);\n  }\n\n  /**\n   * Update the TTS options.\n   *\n   * @param opts - Options to update\n   */\n  updateOptions(opts: { voiceName?: GeminiVoices | string }) {\n    if (opts.voiceName !== undefined) {\n      this.#opts.voiceName = opts.voiceName;\n    }\n  }\n\n  stream(): tts.SynthesizeStream {\n    throw new Error('Streaming is not supported on Gemini TTS');\n  }\n\n  get opts(): TTSOptions {\n    return this.#opts;\n  }\n\n  get client(): GoogleGenAI {\n    return this.#client;\n  }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n  #tts: TTS;\n  label = 'google.gemini.ChunkedStream';\n\n  constructor(\n    inputText: string,\n    tts: TTS,\n    connOptions?: APIConnectOptions,\n    abortSignal?: AbortSignal,\n  ) {\n    super(inputText, tts, connOptions, abortSignal);\n    this.#tts = tts;\n  }\n\n  protected async run() {\n    const requestId = shortuuid();\n    const bstream = new AudioByteStream(this.#tts.sampleRate, this.#tts.numChannels);\n\n    const config: types.GenerateContentConfig = {\n      responseModalities: ['AUDIO'],\n      speechConfig: {\n        voiceConfig: {\n          prebuiltVoiceConfig: {\n            voiceName: this.#tts.opts.voiceName,\n          },\n        },\n      },\n      abortSignal: this.abortSignal,\n    };\n\n    let inputText = this.inputText;\n    const instructions = [\n      this.#tts.opts.instructions,\n      formatCustomPronunciations(this.#tts.opts.customPronunciations),\n    ]\n      .filter((instruction): instruction is string => !!instruction)\n      .join('\\n');\n    if (instructions) {\n      inputText = `${instructions}:\\n\"${inputText}\"`;\n    }\n\n    const contents: types.Content[] = [\n      {\n        role: 'user',\n        parts: [{ text: inputText }],\n      },\n    ];\n\n    try {\n      const responseStream = await this.#tts.client.models.generateContentStream({\n        model: this.#tts.opts.model,\n        contents,\n        config,\n      });\n\n      for await (const response of responseStream) {\n        await this.#processResponse(response, bstream, requestId);\n      }\n    } catch (error: unknown) {\n      if (error instanceof Error && error.name === 'AbortError') {\n        return;\n      }\n      if (isAPIError(error)) throw error;\n\n      const err = error as {\n        code?: number;\n        message?: string;\n        status?: string;\n        type?: string;\n      };\n\n      if (err.code && err.code >= 400 && err.code < 500) {\n        if (err.code === 429) {\n          throw new APIStatusError({\n            message: `Gemini TTS: Rate limit error - ${err.message || 'Unknown error'}`,\n            options: {\n              statusCode: 429,\n              retryable: true,\n            },\n          });\n        } else {\n          throw new APIStatusError({\n            message: `Gemini TTS: Client error (${err.code}) - ${err.message || 'Unknown error'}`,\n            options: {\n              statusCode: err.code,\n              retryable: false,\n            },\n          });\n        }\n      }\n\n      if (err.code && err.code >= 500) {\n        throw new APIStatusError({\n          message: `Gemini TTS: Server error (${err.code}) - ${err.message || 'Unknown error'}`,\n          options: {\n            statusCode: err.code,\n            retryable: true,\n          },\n        });\n      }\n\n      throw new APIConnectionError({\n        message: `Gemini TTS: Connection error - ${err.message || 'Unknown error'}`,\n        options: { retryable: true },\n      });\n    } finally {\n      this.queue.close();\n    }\n  }\n\n  async #processResponse(\n    response: types.GenerateContentResponse,\n    bstream: AudioByteStream,\n    requestId: string,\n  ) {\n    if (!response.candidates || response.candidates.length === 0) {\n      return;\n    }\n\n    const candidate = response.candidates[0];\n    if (!candidate || !candidate.content?.parts) {\n      return;\n    }\n\n    let lastFrame: AudioFrame | undefined;\n    const sendLastFrame = (final: boolean) => {\n      if (lastFrame) {\n        this.queue.put({\n          requestId,\n          frame: lastFrame,\n          segmentId: requestId,\n          final,\n        });\n        lastFrame = undefined;\n      }\n    };\n\n    for (const part of candidate.content.parts) {\n      if (part.inlineData?.data && part.inlineData.mimeType?.startsWith('audio/')) {\n        const audioBuffer = Buffer.from(part.inlineData.data, 'base64');\n\n        for (const frame of bstream.write(audioBuffer)) {\n          sendLastFrame(false);\n          lastFrame = frame;\n        }\n      }\n    }\n\n    for (const frame of bstream.flush()) {\n      sendLastFrame(false);\n      lastFrame = frame;\n    }\n\n    sendLastFrame(true);\n  }\n}\n\nfunction formatCustomPronunciations(\n  customPronunciations?: CustomPronunciations,\n): string | undefined {\n  if (!customPronunciations?.pronunciations.length) {\n    return undefined;\n  }\n\n  const rules = customPronunciations.pronunciations.map((pronunciation) => {\n    const encoding = pronunciation.phoneticEncoding\n      ? ` using ${pronunciation.phoneticEncoding}`\n      : '';\n    return `- Pronounce \"${pronunciation.phrase}\" as \"${pronunciation.pronunciation}\"${encoding}`;\n  });\n\n  return ['Use these custom pronunciations when speaking the text:', ...rules].join('\\n');\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,mBAA4B;AAC5B,oBAQO;AAsCP,MAAM,gBAAiC;AACvC,MAAM,gBAA8B;AACpC,MAAM,sBAAsB;AAC5B,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAsBtB,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA;AAAA,EACA,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWR,YAAY;AAAA,IACV,QAAQ;AAAA,IACR,YAAY;AAAA,IACZ;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,IAA8C,CAAC,GAAG;AAChD,UAAM,qBAAqB,cAAc,EAAE,WAAW,MAAM,CAAC;AAE7D,UAAM,aAAiC,WAAW,QAAQ,IAAI;AAC9D,UAAM,cACJ,YAAY,QAAQ,IAAI,yBAAyB;AACnD,UAAM,cAAc,YAAY,QAAQ,IAAI,8BAA8B;AAC1E,UAAM,eAAe,UAAU,QAAQ,IAAI;AAE3C,QAAI,eAAmC;AACvC,QAAI,gBAAoC;AACxC,QAAI,cAAkC;AAEtC,QAAI,aAAa;AACf,UAAI,CAAC,cAAc;AACjB,cAAM,IAAI,iCAAmB;AAAA,UAC3B,SACE;AAAA,QACJ,CAAC;AAAA,MACH;AACA,oBAAc;AAAA,IAChB,OAAO;AACL,qBAAe;AACf,sBAAgB;AAChB,UAAI,CAAC,aAAa;AAChB,cAAM,IAAI,iCAAmB;AAAA,UAC3B,SACE;AAAA,QACJ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,SAAK,QAAQ;AAAA,MACX;AAAA,MACA;AAAA,MACA,UAAU;AAAA,MACV,SAAS;AAAA,MACT,UAAU;AAAA,MACV,cAAc,gBAAgB;AAAA,MAC9B;AAAA,IACF;AAEA,UAAM,gBAA0C,cAC5C;AAAA,MACE,UAAU;AAAA,MACV,SAAS;AAAA,MACT,UAAU;AAAA,IACZ,IACA;AAAA,MACE,QAAQ;AAAA,IACV;AAEJ,SAAK,UAAU,IAAI,yBAAY,aAAa;AAAA,EAC9C;AAAA,EAEA,WACE,MACA,aACA,aACe;AACf,WAAO,IAAI,cAAc,MAAM,MAAM,aAAa,WAAW;AAAA,EAC/D;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,cAAc,MAA6C;AACzD,QAAI,KAAK,cAAc,QAAW;AAChC,WAAK,MAAM,YAAY,KAAK;AAAA,IAC9B;AAAA,EACF;AAAA,EAEA,SAA+B;AAC7B,UAAM,IAAI,MAAM,0CAA0C;AAAA,EAC5D;AAAA,EAEA,IAAI,OAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,SAAsB;AACxB,WAAO,KAAK;AAAA,EACd;AACF;AAEO,MAAM,sBAAsB,kBAAI,cAAc;AAAA,EACnD;AAAA,EACA,QAAQ;AAAA,EAER,YACE,WACAA,MACA,aACA,aACA;AACA,UAAM,WAAWA,MAAK,aAAa,WAAW;AAC9C,SAAK,OAAOA;AAAA,EACd;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,UAAM,UAAU,IAAI,8BAAgB,KAAK,KAAK,YAAY,KAAK,KAAK,WAAW;AAE/E,UAAM,SAAsC;AAAA,MAC1C,oBAAoB,CAAC,OAAO;AAAA,MAC5B,cAAc;AAAA,QACZ,aAAa;AAAA,UACX,qBAAqB;AAAA,YACnB,WAAW,KAAK,KAAK,KAAK;AAAA,UAC5B;AAAA,QACF;AAAA,MACF;AAAA,MACA,aAAa,KAAK;AAAA,IACpB;AAEA,QAAI,YAAY,KAAK;AACrB,UAAM,eAAe;AAAA,MACnB,KAAK,KAAK,KAAK;AAAA,MACf,2BAA2B,KAAK,KAAK,KAAK,oBAAoB;AAAA,IAChE,EACG,OAAO,CAAC,gBAAuC,CAAC,CAAC,WAAW,EAC5D,KAAK,IAAI;AACZ,QAAI,cAAc;AAChB,kBAAY,GAAG,YAAY;AAAA,GAAO,SAAS;AAAA,IAC7C;AAEA,UAAM,WAA4B;AAAA,MAChC;AAAA,QACE,MAAM;AAAA,QACN,OAAO,CAAC,EAAE,MAAM,UAAU,CAAC;AAAA,MAC7B;AAAA,IACF;AAEA,QAAI;AACF,YAAM,iBAAiB,MAAM,KAAK,KAAK,OAAO,OAAO,sBAAsB;AAAA,QACzE,OAAO,KAAK,KAAK,KAAK;AAAA,QACtB;AAAA,QACA;AAAA,MACF,CAAC;AAED,uBAAiB,YAAY,gBAAgB;AAC3C,cAAM,KAAK,iBAAiB,UAAU,SAAS,SAAS;AAAA,MAC1D;AAAA,IACF,SAAS,OAAgB;AACvB,UAAI,iBAAiB,SAAS,MAAM,SAAS,cAAc;AACzD;AAAA,MACF;AACA,cAAI,0BAAW,KAAK,EAAG,OAAM;AAE7B,YAAM,MAAM;AAOZ,UAAI,IAAI,QAAQ,IAAI,QAAQ,OAAO,IAAI,OAAO,KAAK;AACjD,YAAI,IAAI,SAAS,KAAK;AACpB,gBAAM,IAAI,6BAAe;AAAA,YACvB,SAAS,kCAAkC,IAAI,WAAW,eAAe;AAAA,YACzE,SAAS;AAAA,cACP,YAAY;AAAA,cACZ,WAAW;AAAA,YACb;AAAA,UACF,CAAC;AAAA,QACH,OAAO;AACL,gBAAM,IAAI,6BAAe;AAAA,YACvB,SAAS,6BAA6B,IAAI,IAAI,OAAO,IAAI,WAAW,eAAe;AAAA,YACnF,SAAS;AAAA,cACP,YAAY,IAAI;AAAA,cAChB,WAAW;AAAA,YACb;AAAA,UACF,CAAC;AAAA,QACH;AAAA,MACF;AAEA,UAAI,IAAI,QAAQ,IAAI,QAAQ,KAAK;AAC/B,cAAM,IAAI,6BAAe;AAAA,UACvB,SAAS,6BAA6B,IAAI,IAAI,OAAO,IAAI,WAAW,eAAe;AAAA,UACnF,SAAS;AAAA,YACP,YAAY,IAAI;AAAA,YAChB,WAAW;AAAA,UACb;AAAA,QACF,CAAC;AAAA,MACH;AAEA,YAAM,IAAI,iCAAmB;AAAA,QAC3B,SAAS,kCAAkC,IAAI,WAAW,eAAe;AAAA,QACzE,SAAS,EAAE,WAAW,KAAK;AAAA,MAC7B,CAAC;AAAA,IACH,UAAE;AACA,WAAK,MAAM,MAAM;AAAA,IACnB;AAAA,EACF;AAAA,EAEA,MAAM,iBACJ,UACA,SACA,WACA;AA7SJ;AA8SI,QAAI,CAAC,SAAS,cAAc,SAAS,WAAW,WAAW,GAAG;AAC5D;AAAA,IACF;AAEA,UAAM,YAAY,SAAS,WAAW,CAAC;AACvC,QAAI,CAAC,aAAa,GAAC,eAAU,YAAV,mBAAmB,QAAO;AAC3C;AAAA,IACF;AAEA,QAAI;AACJ,UAAM,gBAAgB,CAAC,UAAmB;AACxC,UAAI,WAAW;AACb,aAAK,MAAM,IAAI;AAAA,UACb;AAAA,UACA,OAAO;AAAA,UACP,WAAW;AAAA,UACX;AAAA,QACF,CAAC;AACD,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,eAAW,QAAQ,UAAU,QAAQ,OAAO;AAC1C,YAAI,UAAK,eAAL,mBAAiB,WAAQ,UAAK,WAAW,aAAhB,mBAA0B,WAAW,YAAW;AAC3E,cAAM,cAAc,OAAO,KAAK,KAAK,WAAW,MAAM,QAAQ;AAE9D,mBAAW,SAAS,QAAQ,MAAM,WAAW,GAAG;AAC9C,wBAAc,KAAK;AACnB,sBAAY;AAAA,QACd;AAAA,MACF;AAAA,IACF;AAEA,eAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,oBAAc,KAAK;AACnB,kBAAY;AAAA,IACd;AAEA,kBAAc,IAAI;AAAA,EACpB;AACF;AAEA,SAAS,2BACP,sBACoB;AACpB,MAAI,EAAC,6DAAsB,eAAe,SAAQ;AAChD,WAAO;AAAA,EACT;AAEA,QAAM,QAAQ,qBAAqB,eAAe,IAAI,CAAC,kBAAkB;AACvE,UAAM,WAAW,cAAc,mBAC3B,UAAU,cAAc,gBAAgB,KACxC;AACJ,WAAO,gBAAgB,cAAc,MAAM,SAAS,cAAc,aAAa,IAAI,QAAQ;AAAA,EAC7F,CAAC;AAED,SAAO,CAAC,2DAA2D,GAAG,KAAK,EAAE,KAAK,IAAI;AACxF;","names":["tts"]}