// Improved useTTS.ts with race condition fix import { useState, useCallback, useEffect, useRef } from 'react'; import { sanitizeText } from '../sanitizer'; import { getLocalConfig, setLocalConfig } from '../configuration'; import { useViseme } from '../../context/visemeContext'; import { IAudioContext } from 'standardized-audio-context'; import { isAndroid } from '../utils'; /** * Configurazione per il TTS */ export interface TTSConfig { provider: 'azure' | 'openai'; voice?: string; model?: string; region?: string; // richiesto per Azure tenant?: string; // Tenant identifier for multi-tenant applications layout?: 'DEFAULT' | 'ZOOMED_FULL_BODY' | 'FULLPAGE' | 'TOTEM'; } type VisemeData = { visemeId: number; audioOffset: number; }; /** * Opzioni per l'hook useTTS */ export interface UseTTSOptions { apiUrl?: string; continuousSpeech?: boolean; onEndSpeakStartListen?: () => void; preview?: boolean; disableSpeaker?: boolean; } // Create our own simplified audio context interface for better typing interface SimpleAudioWrapper { currentTime: number; state: 'running' | 'suspended' | 'closed'; onstatechange: ((this: AudioContext, ev: Event) => any) | null; } /** * Hook unificato che gestisce la sintesi vocale */ export function useTTS( config: TTSConfig, options: UseTTSOptions = {}, autoStart: boolean = false, defaultEnableAudio: boolean = true, defaultSpeakerActive: boolean = true ) { // Stato locale const [isPlaying, setIsPlaying] = useState(false); const [speakerMuted, setSpeakerMuted] = useState( getLocalConfig( 'muteSpeaker', !defaultEnableAudio || !defaultSpeakerActive || autoStart ) ); // Get viseme methods from your context const { addViseme, resetVisemeQueue, startProcessing, stopProcessing } = useViseme(); const [hasUserActivatedSpeak, setHasUserActivatedSpeak] = useState(false); // Helper function to check if audio should be played. // When defaultEnableAudio is false, default to muted so we never play before the sync effect runs. const shouldPlayAudio = (text?: string) => { const currentSpeakerMuted = getLocalConfig( 'muteSpeaker', !defaultEnableAudio ); return ( text && text.trim() && !options.preview && !currentSpeakerMuted && defaultEnableAudio ); }; // Riferimenti const audioRef = useRef(null); const audioWrapperRef = useRef(null); const globalSpeakRef = useRef(null); const visemeLoadedRef = useRef(false); const isSpeakingRef = useRef(false); const timeoutRef = useRef(null); const isMountedRef = useRef(true); const currentChunkAudioRef = useRef(null); const apiUrl = options.apiUrl || '/api/tts'; // Load viseme data into the queue const loadVisemeData = useCallback( (visemeData: VisemeData[]) => { resetVisemeQueue(); visemeLoadedRef.current = false; if (visemeData && visemeData.length > 0) { visemeData.forEach(viseme => { addViseme(viseme.visemeId, viseme.audioOffset); }); visemeLoadedRef.current = true; return true; } else { return false; } }, [addViseme, resetVisemeQueue] ); // Create audio wrapper for viseme processing const createAudioWrapper = useCallback(() => { if (!audioRef.current) { return null; } // Create a clean wrapper for this audio session const wrapper: SimpleAudioWrapper = { state: 'running', onstatechange: null, get currentTime() { return audioRef.current ? audioRef.current.currentTime : 0; }, }; // Add event listeners to update the state const handlePause = () => { wrapper.state = 'suspended'; if (wrapper.onstatechange) { wrapper.onstatechange.call(null as any, new Event('statechange')); } }; const handlePlay = () => { wrapper.state = 'running'; if (wrapper.onstatechange) { wrapper.onstatechange.call(null as any, new Event('statechange')); } }; const handleEnded = () => { wrapper.state = 'closed'; if (wrapper.onstatechange) { wrapper.onstatechange.call(null as any, new Event('statechange')); } }; // Attach event listeners to the audio element audioRef.current.addEventListener('pause', handlePause); audioRef.current.addEventListener('play', handlePlay); audioRef.current.addEventListener('ended', handleEnded); // Store cleanup function const cleanupEventListeners = () => { if (audioRef.current) { audioRef.current.removeEventListener('pause', handlePause); audioRef.current.removeEventListener('play', handlePlay); audioRef.current.removeEventListener('ended', handleEnded); } }; // Store the cleanup function on the wrapper for later use (wrapper as any).cleanup = cleanupEventListeners; return wrapper; }, []); /** * Performs a complete cleanup of audio and viseme resources */ const cleanup = useCallback(() => { if (timeoutRef.current) { clearTimeout(timeoutRef.current); timeoutRef.current = null; } if (audioWrapperRef.current && (audioWrapperRef.current as any).cleanup) { (audioWrapperRef.current as any).cleanup(); } audioWrapperRef.current = null; stopProcessing(); resetVisemeQueue(); // ADD THIS LINE - ensure viseme queue is cleared if (audioRef.current?.src) { URL.revokeObjectURL(audioRef.current.src); audioRef.current = null; } // Clear chunk audio reference if (currentChunkAudioRef.current) { currentChunkAudioRef.current = null; } visemeLoadedRef.current = false; // Don't reset isSpeakingRef here - let the speak function manage it }, [stopProcessing, resetVisemeQueue]); /** * Stops audio playback and cleans up */ const stop = useCallback((): void => { // Stop the main audio element if (audioRef.current) { audioRef.current.pause(); audioRef.current.currentTime = 0; } // Stop the current chunk audio element if it exists if (currentChunkAudioRef.current) { currentChunkAudioRef.current.pause(); currentChunkAudioRef.current.currentTime = 0; currentChunkAudioRef.current = null; } setIsPlaying(false); cleanup(); isSpeakingRef.current = false; const e = new CustomEvent('MemoriAudioEnded'); document.dispatchEvent(e); }, [cleanup]); /** * Emette l'evento di fine riproduzione */ const emitEndSpeakEvent = useCallback(() => { const e = new CustomEvent('MemoriEndSpeak'); document.dispatchEvent(e); if (options.continuousSpeech && options.onEndSpeakStartListen) { options.onEndSpeakStartListen(); } }, [options.continuousSpeech, options.onEndSpeakStartListen]); // Helper per creare i chunk const createChunks = useCallback( (text: string, maxLength: number = 800): string[] => { if (text.length <= maxLength) { return [text]; } const sentences = text.split(/[.!?]+/).filter(s => s.trim().length > 0); const chunks: string[] = []; let currentChunk = ''; for (const sentence of sentences) { const sentenceWithPunct = sentence.trim() + '.'; if ( (currentChunk + sentenceWithPunct).length > maxLength && currentChunk.length > 0 ) { chunks.push(currentChunk.trim()); currentChunk = sentenceWithPunct; } else { currentChunk += sentenceWithPunct + ' '; } } if (currentChunk.trim().length > 0) { chunks.push(currentChunk.trim()); } return chunks; }, [] ); // Helper per riprodurre un singolo chunk // Helper function to handle text-to-speech for a single chunk of text const speakChunk = useCallback( async (chunkText: string): Promise => { // Make API request to TTS endpoint const response = await fetch(options.apiUrl || '/api/tts', { method: 'POST', headers: { 'Content-Type': 'application/json', }, body: JSON.stringify({ text: chunkText, tenant: config.tenant || 'www.aisuru.com', voice: config.voice, model: config.model || 'tts-1', region: config.region, provider: config.provider, // Include viseme data for certain layout types that need lip sync includeVisemes: config.layout === 'ZOOMED_FULL_BODY' || config.layout === 'FULLPAGE' || config.layout === 'DEFAULT' || config.layout === 'TOTEM', }), }); // Handle API errors if (!response.ok) { const errorData = await response.json().catch(() => ({})); throw new Error(errorData.error || `API error: ${response.status}`); } // Get audio blob from response and create URL const audioBlob = await response.blob(); const audioUrl = URL.createObjectURL(audioBlob); // Check if speaker is muted after receiving TTS result if (!shouldPlayAudio(chunkText)) { URL.revokeObjectURL(audioUrl); return; } // Clean up if speaking was cancelled if (!isSpeakingRef.current || !isMountedRef.current) { URL.revokeObjectURL(audioUrl); return; } // Parse viseme data from response headers if available let hasVisemeData = false; const visemeDataHeader = response.headers.get('X-Viseme-Data'); if (visemeDataHeader) { try { const visemeData: VisemeData[] = JSON.parse(visemeDataHeader); hasVisemeData = loadVisemeData(visemeData); } catch (err) { console.error('[useTTS] Error parsing viseme data:', err); } } // Clean up if speaking was cancelled if (!isSpeakingRef.current || !isMountedRef.current) { URL.revokeObjectURL(audioUrl); return; } // Create audio element with Android optimizations const audio = new Audio(); audio.preload = 'auto'; // Force preloading for Android compatibility audio.src = audioUrl; audioRef.current = audio; currentChunkAudioRef.current = audio; // Clean up previous wrapper before creating a new one if (audioWrapperRef.current && (audioWrapperRef.current as any).cleanup) { (audioWrapperRef.current as any).cleanup(); audioWrapperRef.current = null; } // Create audio wrapper for viseme processing if needed if (hasVisemeData) { audioWrapperRef.current = createAudioWrapper(); } // Return promise that resolves when audio finishes playing return new Promise((resolve, reject) => { if (!audioRef.current) { reject(new Error('Audio element not found')); return; } const handleCanPlay = async () => { try { // Check if playback was cancelled if (!isSpeakingRef.current || !isMountedRef.current) { URL.revokeObjectURL(audioUrl); resolve(); return; } // Play audio first, then start viseme processing try { // Start viseme processing AFTER audio starts playing if (hasVisemeData && audioWrapperRef.current) { startProcessing( audioWrapperRef.current as unknown as IAudioContext ); } await audioRef.current?.play(); } catch (playError) { // Retry once for Android compatibility await new Promise(r => setTimeout(r, 100)); if (hasVisemeData && audioWrapperRef.current) { startProcessing( audioWrapperRef.current as unknown as IAudioContext ); } await audioRef.current?.play(); } } catch (e) { // Clean up on error URL.revokeObjectURL(audioUrl); reject(e); } }; audioRef.current.addEventListener('canplaythrough', handleCanPlay, { once: true, }); // When audio finishes playing audioRef.current.onended = () => { // Clean up resources URL.revokeObjectURL(audioUrl); if (currentChunkAudioRef.current === audio) { currentChunkAudioRef.current = null; } resolve(); }; // Handle audio errors audioRef.current.onerror = () => { // Clean up resources URL.revokeObjectURL(audioUrl); if (currentChunkAudioRef.current === audio) { currentChunkAudioRef.current = null; } reject(new Error('Audio playback failed')); }; // Start loading the audio audioRef.current?.load(); }); }, [ config, options, loadVisemeData, createAudioWrapper, startProcessing, isSpeakingRef, isMountedRef, speakerMuted, defaultEnableAudio, ] ); /** * Sintetizza il testo in audio e lo riproduce */ const speak = useCallback( async (text: string): Promise => { if (!isMountedRef.current) { return; } // Early exit conditions before setting speaking flag if (!shouldPlayAudio(text)) { // Still set hasUserActivatedSpeak to true when audio is disabled // so the chat can start properly if (!hasUserActivatedSpeak) { setHasUserActivatedSpeak(true); } emitEndSpeakEvent(); return; } // Stop any existing playback first (before checking/setting speaking flag) if (isPlaying) { stop(); } // Now check if we're already processing a request if (isSpeakingRef.current) { return; } // Set the flag after all the early exits and cleanup isSpeakingRef.current = true; if (!hasUserActivatedSpeak) { setHasUserActivatedSpeak(true); } try { setIsPlaying(true); // CHUNKING LOGIC: Dividi il testo in chunk se necessario const chunks = createChunks(text, 500); // Riproduci tutti i chunk in sequenza // Il loop itera su ogni chunk di testo che deve essere riprodotto for (let i = 0; i < chunks.length; i++) { // Controlla se il componente è ancora montato e se non è stato interrotto if (!isSpeakingRef.current || !isMountedRef.current) { break; // Interrompe il loop se il componente viene smontato } // Attende che il chunk corrente venga riprodotto prima di passare al successivo await speakChunk(chunks[i]); // Se ci sono altri chunk da riprodurre, aggiunge una piccola pausa if ( i < chunks.length - 1 && isSpeakingRef.current && isMountedRef.current ) { // Crea una Promise che si risolve dopo 300ms // Questo crea una pausa tra un chunk e l'altro // setTimeout viene wrappato in una Promise per poter usare await await new Promise(resolve => setTimeout(resolve, 300)); } } setIsPlaying(false); isSpeakingRef.current = false; emitEndSpeakEvent(); // Dispatch custom event to notify MemoriWidget that audio has ended const e = new CustomEvent('MemoriAudioEnded'); document.dispatchEvent(e); } catch (err) { console.error('[speak] Error during playback:', err); setIsPlaying(false); isSpeakingRef.current = false; if (timeoutRef.current) { clearTimeout(timeoutRef.current); timeoutRef.current = null; } cleanup(); emitEndSpeakEvent(); // Dispatch custom event to notify MemoriWidget that audio has ended const e = new CustomEvent('MemoriAudioEnded'); document.dispatchEvent(e); } }, [ config, speakerMuted, options.preview, hasUserActivatedSpeak, stop, cleanup, createChunks, speakChunk, emitEndSpeakEvent, isPlaying, defaultEnableAudio, ] ); /** * Imposta lo stato del muto */ const toggleMute = useCallback( (mute?: boolean) => { const newMuteState = mute !== undefined ? mute : !speakerMuted; setSpeakerMuted(newMuteState); // Update local config for persistence setLocalConfig('muteSpeaker', newMuteState); if (newMuteState && isPlaying) { stop(); } // Always clean up viseme state when toggling mute // This ensures fresh start when unmuting // if (newMuteState) { // console.log('[useTTS] Muting - resetting viseme queue and stopping processing'); // resetVisemeQueue(); // stopProcessing(); // } else { // console.log('[useTTS] Unmuting - visemes will restart on next speak call'); // // When unmuting, ensure viseme processing can restart properly // // The visemes will be loaded fresh on the next speak call // } }, [speakerMuted, isPlaying, stop, resetVisemeQueue, stopProcessing] ); /** * Aggiorna la variabile globale quando cambia isPlaying */ useEffect(() => { if (typeof window !== 'undefined') { (window as any).memoriSpeaking = isPlaying; } }, [isPlaying]); /** * Hook per esporre la funzione speak globalmente */ useEffect(() => { if (typeof window !== 'undefined') { globalSpeakRef.current = (window as any).speak; (window as any).speak = speak; return () => { (window as any).speak = globalSpeakRef.current; }; } }, [speak]); /** * Pulizia delle risorse al dismount */ useEffect(() => { return () => { isSpeakingRef.current = false; isMountedRef.current = false; stop(); }; }, [stop]); return { speak, stop, isPlaying, speakerMuted, toggleMute, hasUserActivatedSpeak, setHasUserActivatedSpeak, }; }