import { useCopilotContext, useCopilotMessagesContext, } from "@copilotkit/react-core"; import { gqlToAGUI } from "@copilotkit/runtime-client-gql"; import { Message } from "@copilotkit/shared"; import { MutableRefObject, useEffect, useRef, useState } from "react"; export const checkMicrophonePermission = async () => { try { const permissionStatus = await navigator.permissions.query({ name: "microphone" as PermissionName, }); if (permissionStatus.state === "granted") { return true; } else { return false; } } catch (err) { console.error("Error checking microphone permission", err); } }; export const requestMicAndPlaybackPermission = async () => { try { const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); const audioContext = new window.AudioContext(); await audioContext.resume(); return { stream, audioContext }; } catch (err) { console.error("Error requesting microphone and playback permissions", err); return null; } }; const startRecording = async ( mediaStreamRef: MutableRefObject, mediaRecorderRef: MutableRefObject, audioContextRef: MutableRefObject, recordedChunks: Blob[], onStop: () => void, ) => { if (!mediaStreamRef.current || !audioContextRef.current) { mediaStreamRef.current = await navigator.mediaDevices.getUserMedia({ audio: true, }); audioContextRef.current = new window.AudioContext(); await audioContextRef.current.resume(); } mediaRecorderRef.current = new MediaRecorder(mediaStreamRef.current!); mediaRecorderRef.current.start(1000); mediaRecorderRef.current.ondataavailable = (event) => { recordedChunks.push(event.data); }; mediaRecorderRef.current.onstop = onStop; }; const stopRecording = ( mediaRecorderRef: MutableRefObject, mediaStreamRef?: MutableRefObject, ) => { if ( mediaRecorderRef.current && mediaRecorderRef.current.state !== "inactive" ) { mediaRecorderRef.current.stop(); } // Release microphone tracks to free the device if (mediaStreamRef?.current) { mediaStreamRef.current.getTracks().forEach((track) => track.stop()); mediaStreamRef.current = null; } }; const transcribeAudio = async ( recordedChunks: Blob[], transcribeAudioUrl: string, mediaType: string = "audio/mp4", ) => { const extension = mediaType.split("/")[1] || "mp4"; const completeBlob = new Blob(recordedChunks, { type: mediaType }); const formData = new FormData(); formData.append("file", completeBlob, `recording.${extension}`); const response = await fetch(transcribeAudioUrl, { method: "POST", body: formData, }); if (!response.ok) { throw new Error(`Error: ${response.statusText}`); } const transcription = await response.json(); return transcription.text; }; const playAudioResponse = ( text: string, textToSpeechUrl: string, audioContext: AudioContext, ) => { const encodedText = encodeURIComponent(text); const url = `${textToSpeechUrl}?text=${encodedText}`; fetch(url) .then((response) => response.arrayBuffer()) .then((arrayBuffer) => audioContext.decodeAudioData(arrayBuffer)) .then((audioBuffer) => { const source = audioContext.createBufferSource(); source.buffer = audioBuffer; source.connect(audioContext.destination); source.start(0); }) .catch((error) => { console.error("Error with decoding audio data", error); }); }; export type PushToTalkState = "idle" | "recording" | "transcribing"; export type SendFunction = (text: string) => Promise; export const usePushToTalk = ({ sendFunction, inProgress, mediaType = "audio/mp4", }: { sendFunction: SendFunction; inProgress: boolean; mediaType?: string; }) => { const [pushToTalkState, setPushToTalkState] = useState("idle"); const mediaStreamRef = useRef(null); const audioContextRef = useRef(null); const mediaRecorderRef = useRef(null); const recordedChunks = useRef([]); const generalContext = useCopilotContext(); const messagesContext = useCopilotMessagesContext(); const context = { ...generalContext, ...messagesContext }; const [startReadingFromMessageId, setStartReadingFromMessageId] = useState< string | null >(null); useEffect(() => { if (pushToTalkState === "recording") { startRecording( mediaStreamRef, mediaRecorderRef, audioContextRef, recordedChunks.current, () => { setPushToTalkState("transcribing"); }, ); } else { stopRecording(mediaRecorderRef, mediaStreamRef); if (pushToTalkState === "transcribing") { transcribeAudio( recordedChunks.current, context.copilotApiConfig.transcribeAudioUrl!, mediaType, ).then(async (transcription) => { recordedChunks.current = []; setPushToTalkState("idle"); const message = await sendFunction(transcription); if (message) { setStartReadingFromMessageId(message.id); } }); } } return () => { stopRecording(mediaRecorderRef, mediaStreamRef); }; }, [pushToTalkState]); useEffect(() => { if (inProgress === false && startReadingFromMessageId) { const lastMessageIndex = context.messages.findIndex( (message) => message.id === startReadingFromMessageId, ); const aguiMessages = gqlToAGUI(context.messages); const messagesAfterLast = aguiMessages .slice(lastMessageIndex + 1) .filter((message) => message.role === "assistant"); const text = messagesAfterLast .map((message) => message.content) .join("\n"); playAudioResponse( text, context.copilotApiConfig.textToSpeechUrl!, audioContextRef.current!, ); setStartReadingFromMessageId(null); } }, [startReadingFromMessageId, inProgress]); return { pushToTalkState, setPushToTalkState }; };