import axios from 'axios'; import FormData from 'form-data'; import { VoiceSettings } from '../types/audio.js'; export interface ElevenLabsResponse { audioBuffer: ArrayBuffer; audioUrl?: string; duration?: number; } export class ElevenLabsClient { private apiKey: string; private baseUrl = 'https://api.elevenlabs.io/v1'; // Voice ID mappings private voiceMap = new Map([ ['george', 'JBFqnCBsd6RMkjVDRZzb'], // JARVIS voice ['sarah', '21m00Tcm4TlvDq8ikWAM'], ['aria', '9BWtsMINqrJLrRacOk9x'], ['laura', 'FGY2WhTYpPnrIDTdsKH5'], ['charlie', 'IKne3meq5aSn9XLyUdCD'], ]); constructor(apiKey?: string) { this.apiKey = apiKey || process.env.ELEVENLABS_API_KEY || ''; if (!this.apiKey) { throw new Error('ElevenLabs API key is required'); } } async synthesizeText( text: string, voice: string = 'george', settings?: Partial ): Promise { const voiceId = this.voiceMap.get(voice.toLowerCase()) || this.voiceMap.get('george')!; const voiceSettings = { stability: settings?.stability ?? 0.5, similarity_boost: settings?.similarityBoost ?? 0.75, style: settings?.style ?? 0.0, use_speaker_boost: settings?.useSpeakerBoost ?? true, }; try { console.log(`🎙️ Synthesizing text (${text.length} chars) with voice: ${voice}`); const response = await axios.post( `${this.baseUrl}/text-to-speech/${voiceId}/stream`, { text: text, model_id: 'eleven_monolingual_v1', voice_settings: voiceSettings, optimize_streaming_latency: settings?.speed && settings.speed > 1.5 ? 4 : 3 }, { headers: { 'Accept': 'application/json', 'xi-api-key': this.apiKey, 'Content-Type': 'application/json', }, responseType: 'arraybuffer', timeout: 30000, // 30 second timeout } ); console.log(`✅ Synthesis successful (${response.data.byteLength} bytes)`); return { audioBuffer: response.data, duration: this.estimateAudioDuration(text, settings?.speed ?? 1.0) }; } catch (error) { console.error('❌ ElevenLabs synthesis error:', error); if (axios.isAxiosError(error)) { if (error.response?.status === 401) { throw new Error('Invalid ElevenLabs API key'); } else if (error.response?.status === 429) { throw new Error('ElevenLabs rate limit exceeded'); } else if (error.response?.status === 422) { throw new Error('Invalid text or voice settings'); } } throw new Error(`ElevenLabs API error: ${error instanceof Error ? error.message : 'Unknown error'}`); } } async synthesizeWithStreaming( text: string, voice: string = 'george', settings?: Partial ): Promise { // For now, use regular synthesis // TODO: Implement proper streaming for real-time playback return this.synthesizeText(text, voice, settings); } private estimateAudioDuration(text: string, speed: number = 1.0): number { // Rough estimation: ~150 words per minute at normal speed const wordCount = text.split(/\s+/).length; const baseWPM = 150; const adjustedWPM = baseWPM * speed; const durationMinutes = wordCount / adjustedWPM; return durationMinutes * 60; // Convert to seconds } async getVoices(): Promise> { try { const response = await axios.get(`${this.baseUrl}/voices`, { headers: { 'xi-api-key': this.apiKey, }, }); return response.data.voices.map((voice: any) => ({ id: voice.voice_id, name: voice.name, })); } catch (error) { console.error('❌ Error fetching voices:', error); return []; } } isValidVoice(voice: string): boolean { return this.voiceMap.has(voice.toLowerCase()); } getAvailableVoices(): string[] { return Array.from(this.voiceMap.keys()); } }