import React, { useState, useRef, useCallback, useEffect } from 'react'; import { TriggerBarSendIcon } from '../icons/TriggerBarSendIcon'; import { useVoiceRecorder, convertPcmToWav } from '../../hooks/useVoiceRecorder'; import { sendStreamPackage, generateUUID } from '../../services/asrService'; import type { OnRecognitionResult } from '../../services/asrService'; import styles from './index.module.less'; const WAVEFORM_BAR_COUNT = 30; const WAVEFORM_BARS = Array.from({ length: WAVEFORM_BAR_COUNT }, (_, i) => ({ delay: (i * 0.04) + (i % 3) * 0.02, duration: 0.4 + (i % 5) * 0.12, })); const SAMPLE_RATE = 16000; const CHANNELS = 1; const TARGET_PCM_SAMPLES = SAMPLE_RATE; // ~1 second of audio const MIN_SEND_INTERVAL = 1000; interface InputBarProps { onSend: (text: string) => void; disabled?: boolean; sending?: boolean; onStop?: () => void; keyboardOffset?: number; initialVoiceMode?: boolean; } export const InputBar: React.FC = ({ onSend, disabled = false, sending = false, onStop, keyboardOffset = 0, initialVoiceMode = true, }) => { const [text, setText] = useState(''); const [isVoiceMode, setIsVoiceMode] = useState(initialVoiceMode); const [voicePressing, setVoicePressing] = useState(false); const [voiceError, setVoiceError] = useState(null); const [realtimeText, setRealtimeText] = useState(''); const inputRef = useRef(null); const voiceButtonRef = useRef(null); const voiceCancelledRef = useRef(false); const voicePressingRef = useRef(false); const pendingStopRef = useRef(false); const recordStartedRef = useRef(false); const touchStartYRef = useRef(0); const isCancelGestureRef = useRef(false); // Streaming ASR state const streamRequestIdRef = useRef(null); const streamSequenceIdRef = useRef(0); const isStreamingRef = useRef(false); const pcmBufferRef = useRef([]); const streamTimerRef = useRef(null); const lastSendTimeRef = useRef(0); const recognizedTextRef = useRef(''); const partialTextRef = useRef(''); const streamErrorRef = useRef(false); const CANCEL_SWIPE_THRESHOLD = 80; const shouldHandleMouseRef = useRef( typeof window === 'undefined' ? true : !('ontouchstart' in window), ); const { isRecording, status, startRecord, stopRecord, cancelRecord } = useVoiceRecorder(); const voiceButtonDisabled = disabled || status === 'processing'; useEffect(() => { setIsVoiceMode(initialVoiceMode); }, [initialVoiceMode]); const handleSend = () => { const trimmed = text.trim(); if (!trimmed || disabled) return; onSend(trimmed); setText(''); }; const handleKeyDown = (e: React.KeyboardEvent) => { if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); handleSend(); } }; const toggleMode = () => { const next = !isVoiceMode; setIsVoiceMode(next); if (!next) { setTimeout(() => inputRef.current?.focus(), 100); } }; const handleStreamResult: OnRecognitionResult = useCallback((result) => { if (!result.text || result.text.trim().length === 0) return; if (result.textType === 'partial') { partialTextRef.current = result.text; } else if (result.textType === 'final') { recognizedTextRef.current += result.text; partialTextRef.current = ''; } setRealtimeText(recognizedTextRef.current + partialTextRef.current); }, []); const clearStreamState = useCallback(() => { if (streamTimerRef.current !== null) { clearTimeout(streamTimerRef.current); streamTimerRef.current = null; } pcmBufferRef.current = []; streamRequestIdRef.current = null; streamSequenceIdRef.current = 0; isStreamingRef.current = false; lastSendTimeRef.current = 0; recognizedTextRef.current = ''; partialTextRef.current = ''; streamErrorRef.current = false; setRealtimeText(''); }, []); const sendBufferedPcmData = useCallback(async () => { if (!isStreamingRef.current || !streamRequestIdRef.current) { pcmBufferRef.current = []; return; } const now = Date.now(); const timeSinceLastSend = now - lastSendTimeRef.current; if (timeSinceLastSend < MIN_SEND_INTERVAL && lastSendTimeRef.current > 0) { if (streamTimerRef.current === null) { const remainingDelay = MIN_SEND_INTERVAL - timeSinceLastSend; streamTimerRef.current = window.setTimeout(async () => { streamTimerRef.current = null; await sendBufferedPcmData(); }, remainingDelay); } return; } const chunksToSend = [...pcmBufferRef.current]; pcmBufferRef.current = []; if (chunksToSend.length === 0) return; try { const totalLength = chunksToSend.reduce((sum, chunk) => sum + chunk.length, 0); const mergedPcm = new Float32Array(totalLength); let offset = 0; for (const chunk of chunksToSend) { mergedPcm.set(chunk, offset); offset += chunk.length; } const audioArrayBuffer = convertPcmToWav(mergedPcm, SAMPLE_RATE, CHANNELS); if (!audioArrayBuffer || audioArrayBuffer.byteLength === 0) return; lastSendTimeRef.current = Date.now(); streamSequenceIdRef.current += 1; const sequenceId = streamSequenceIdRef.current; await sendStreamPackage({ audioData: audioArrayBuffer, requestId: streamRequestIdRef.current!, sequenceId, isFirstPackage: sequenceId === 1, isLastPackage: false, onResult: handleStreamResult, }); } catch { streamErrorRef.current = true; } }, [handleStreamResult]); const onPcmData = useCallback((pcmData: Float32Array) => { if (!isStreamingRef.current || !streamRequestIdRef.current) return; pcmBufferRef.current.push(new Float32Array(pcmData)); const totalSamples = pcmBufferRef.current.reduce((sum, chunk) => sum + chunk.length, 0); if (totalSamples >= TARGET_PCM_SAMPLES) { if (streamTimerRef.current !== null) { clearTimeout(streamTimerRef.current); streamTimerRef.current = null; } void sendBufferedPcmData(); } else if (streamTimerRef.current === null) { streamTimerRef.current = window.setTimeout(async () => { streamTimerRef.current = null; await sendBufferedPcmData(); }, MIN_SEND_INTERVAL); } }, [sendBufferedPcmData]); const doEndVoicePress = useCallback(async () => { voicePressingRef.current = false; recordStartedRef.current = false; setVoicePressing(false); const wasCancelled = voiceCancelledRef.current || isCancelGestureRef.current; isCancelGestureRef.current = false; if (wasCancelled) { cancelRecord(); clearStreamState(); return; } // Flush remaining PCM buffer as tail package before stopping recorder let tailAudioData: ArrayBuffer = new ArrayBuffer(0); if (isStreamingRef.current && streamRequestIdRef.current && pcmBufferRef.current.length > 0) { const chunks = [...pcmBufferRef.current]; pcmBufferRef.current = []; const totalLength = chunks.reduce((sum, c) => sum + c.length, 0); if (totalLength > 0) { const merged = new Float32Array(totalLength); let off = 0; for (const c of chunks) { merged.set(c, off); off += c.length; } tailAudioData = convertPcmToWav(merged, SAMPLE_RATE, CHANNELS); } } try { const wavData = await stopRecord(); if (isStreamingRef.current && streamRequestIdRef.current) { if (streamTimerRef.current !== null) { clearTimeout(streamTimerRef.current); streamTimerRef.current = null; } try { streamSequenceIdRef.current += 1; await sendStreamPackage({ audioData: tailAudioData, requestId: streamRequestIdRef.current, sequenceId: streamSequenceIdRef.current, isFirstPackage: streamSequenceIdRef.current === 1, isLastPackage: true, onResult: handleStreamResult, }); } catch { streamErrorRef.current = true; } } const streamedText = (recognizedTextRef.current + partialTextRef.current).trim(); clearStreamState(); if (streamedText) { onSend(streamedText); return; } if (!wavData || wavData.byteLength === 0 || !streamedText) { setVoiceError('未识别到语音内容，请长按说话'); setTimeout(() => setVoiceError(null), 2500); } } catch (err: unknown) { clearStreamState(); setVoiceError((err as Error)?.message || '语音识别失败'); setTimeout(() => setVoiceError(null), 3000); } }, [stopRecord, cancelRecord, clearStreamState, handleStreamResult, onSend]); const beginVoicePress = useCallback(async () => { if (voicePressingRef.current) return; voicePressingRef.current = true; pendingStopRef.current = false; recordStartedRef.current = false; setVoicePressing(true); setVoiceError(null); setRealtimeText(''); voiceCancelledRef.current = false; isCancelGestureRef.current = false; // Init streaming state streamRequestIdRef.current = generateUUID(); streamSequenceIdRef.current = 0; isStreamingRef.current = true; pcmBufferRef.current = []; lastSendTimeRef.current = 0; recognizedTextRef.current = ''; partialTextRef.current = ''; streamErrorRef.current = false; try { await startRecord(onPcmData); recordStartedRef.current = true; if (pendingStopRef.current) { pendingStopRef.current = false; doEndVoicePress(); return; } } catch (err: unknown) { voicePressingRef.current = false; pendingStopRef.current = false; recordStartedRef.current = false; setVoicePressing(false); clearStreamState(); setVoiceError((err as Error)?.message || '录音启动失败'); } }, [startRecord, doEndVoicePress, onPcmData, clearStreamState]); const endVoicePress = useCallback(async () => { if (!voicePressingRef.current) return; if (!recordStartedRef.current) { pendingStopRef.current = true; return; } doEndVoicePress(); }, [doEndVoicePress]); const cancelVoicePress = useCallback(() => { voiceCancelledRef.current = true; voicePressingRef.current = false; pendingStopRef.current = false; recordStartedRef.current = false; setVoicePressing(false); isCancelGestureRef.current = false; cancelRecord(); clearStreamState(); }, [cancelRecord, clearStreamState]); const handleTouchStart = useCallback((e: React.TouchEvent) => { e.preventDefault(); e.stopPropagation(); touchStartYRef.current = e.touches[0]?.clientY ?? 0; isCancelGestureRef.current = false; void beginVoicePress(); }, [beginVoicePress]); const handleTouchMove = useCallback((e: React.TouchEvent) => { if (!voicePressingRef.current) return; const currentY = e.touches[0]?.clientY ?? 0; const deltaY = touchStartYRef.current - currentY; isCancelGestureRef.current = deltaY > CANCEL_SWIPE_THRESHOLD; }, []); const handleTouchEnd = useCallback((e: React.TouchEvent) => { e.stopPropagation(); if (isCancelGestureRef.current) { cancelVoicePress(); } else { endVoicePress(); } }, [endVoicePress, cancelVoicePress]); const handleMouseDown = useCallback(() => { const suppressed = !shouldHandleMouseRef.current; if (suppressed) return; void beginVoicePress(); }, [beginVoicePress]); const handleMouseUp = useCallback(() => { const suppressed = !shouldHandleMouseRef.current; if (suppressed) return; void endVoicePress(); }, [endVoicePress]); const voiceLabel = voiceError || '按住说话'; const voiceAreaClass = [ styles.voiceArea, voicePressing && (isRecording || recordStartedRef.current) ? styles.voiceRecording : '', voiceError ? styles.voiceError : '', ] .filter(Boolean) .join(' '); const showVoiceOverlay = voicePressing && (isRecording || recordStartedRef.current); return (

{showVoiceOverlay && (

{realtimeText || '正在聆听...'}

{isCancelGestureRef.current ? '松手取消' : '松手发送，上移取消'}

{WAVEFORM_BARS.map((bar, i) => ( ))}

)}

{isVoiceMode ? ( ) : ( setText(e.target.value)} onKeyDown={handleKeyDown} placeholder="请输入问题..." disabled={disabled} enterKeyHint="send" /> )} {sending ? ( ) : isVoiceMode ? ( ) : text.trim() ? ( ) : ( )}

); }; const KeyboardIcon = () => ( ); const MicIcon = () => ( ); const StopIcon = () => ( );