const ASR_URL = '/mobile/jd-asr/asr'; const APPLICATION_ID = 'db5c6297-4d8e-44af-ba77-f36154447dae'; const ASR_PROTOCOL = 3; const NET_STATE = 2; // WIFI const APPLICATOR = 1; // 外部业务方 interface ASRResponse { request_id: string; status: number; index: number; message: string; content?: Array<{ text: string; text_type?: 'final' | 'partial'; }>; } export type OnRecognitionResult = (result: { text: string; textType: 'partial' | 'final'; isComplete: boolean; }) => void; export interface StreamPackageParams { audioData: ArrayBuffer; requestId: string; sequenceId: number; isFirstPackage?: boolean; isLastPackage?: boolean; onResult?: OnRecognitionResult; } const ERROR_MESSAGES: Record = { 0: '成功', 31001: '语音数据为空', 31002: '语音数据过长,一次请求音频不能超过一分钟', 31003: '请求参数出错', 31004: '音频头部格式解析错误', 31005: '音频采样率或通道数错误', 31006: '音频格式错误', 32001: '服务内部音频解码错误', 32002: '服务内部模块错误', 32003: '服务内部连接错误', 33001: '服务内部模块错误', 33002: '语音识别解码失败', }; export function generateUUID(): string { if (typeof crypto !== 'undefined' && crypto.randomUUID) { return crypto.randomUUID(); } return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, c => { const r = (Math.random() * 16) | 0; const v = c === 'x' ? r : (r & 0x3) | 0x8; return v.toString(16); }); } function getPlatformInfo(): string { if (typeof navigator === 'undefined') return 'Web&Browser&Unknown'; const ua = navigator.userAgent; if (ua.includes('iPhone') || ua.includes('iPad')) { const m = ua.match(/OS (\d+[._]\d+)/); const ver = m ? m[1].replace(/_/g, '.') : 'Unknown'; const dev = ua.includes('iPad') ? 'iPad' : 'iPhone'; return `iOS&${dev}&${ver}`; } if (ua.includes('Android')) { const m = ua.match(/Android (\d+\.\d+)/); return `Android&Mobile&${m ? m[1] : 'Unknown'}`; } if (ua.includes('Mac OS')) { const m = ua.match(/Mac OS X (\d+[._]\d+[._]\d+)/); return `macOS&Mac&${m ? m[1].replace(/_/g, '.') : 'Unknown'}`; } if (ua.includes('Windows')) { const m = ua.match(/Windows NT (\d+\.\d+)/); return `Windows&PC&${m ? m[1] : 'Unknown'}`; } return 'Web&Browser&Unknown'; } function buildStreamingProperty(): string { return JSON.stringify({ autoend: false, platform: getPlatformInfo(), version: '1.0.0', longspeech: true, encode: { channel: 1, format: 'wav', sample_rate: 16000, post_process: -1, partial_result: 1, punc_end_process: 1, punc_partial_process: 1, dynamic_lm: 1, }, }); } /** * Send a single streaming audio package to JD ASR. * Sequence-Id protocol: first=1, middle=N, last=-N. * Property header is included on first and last packages. */ export async function sendStreamPackage(params: StreamPackageParams): Promise { const { audioData, requestId, sequenceId, isFirstPackage = false, isLastPackage = false, onResult, } = params; if ((!audioData || audioData.byteLength === 0) && !isLastPackage) { return { request_id: requestId, status: 0, index: 0, message: '成功', content: [] }; } const headers: Record = { 'Content-Type': 'application/octet-stream', Domain: 'general', 'Application-Id': APPLICATION_ID, 'Request-Id': requestId, 'Sequence-Id': isLastPackage ? `-${sequenceId}` : String(sequenceId), 'Asr-Protocol': String(ASR_PROTOCOL), 'Net-State': String(NET_STATE), Applicator: String(APPLICATOR), }; if (isFirstPackage || isLastPackage) { headers.Property = buildStreamingProperty(); } const response = await fetch(ASR_URL, { method: 'POST', headers, body: audioData, }); if (!response.ok) { throw new Error(`语音识别请求失败 (HTTP ${response.status})`); } const result: ASRResponse = await response.json(); if (result.status !== 0) { const msg = ERROR_MESSAGES[result.status] || result.message || '未知错误'; throw new Error(`语音识别失败: ${msg}`); } if (result.content && result.content.length > 0 && onResult) { for (const item of result.content) { if (item.text) { const textType = item.text_type || (isLastPackage ? 'final' : 'partial'); onResult({ text: item.text, textType: textType as 'partial' | 'final', isComplete: isLastPackage && textType === 'final', }); } } } return result; } /** * Send WAV audio data to JD ASR API and return the recognized text. * Uses single-package upload (Sequence-Id: -1). */ export async function recognizeSpeech(audioData: ArrayBuffer): Promise { if (!audioData || audioData.byteLength === 0) { return ''; } const requestId = generateUUID(); const property = JSON.stringify({ autoend: false, platform: getPlatformInfo(), version: '1.0.0', longspeech: false, encode: { channel: 1, format: 'wav', sample_rate: 16000, post_process: -1, partial_result: -1, punc_end_process: 1, punc_partial_process: -1, dynamic_lm: 1, }, }); const headers: Record = { 'Content-Type': 'application/octet-stream', Domain: 'general', 'Application-Id': APPLICATION_ID, 'Request-Id': requestId, 'Sequence-Id': '-1', 'Asr-Protocol': String(ASR_PROTOCOL), 'Net-State': String(NET_STATE), Applicator: String(APPLICATOR), Property: property, }; const response = await fetch(ASR_URL, { method: 'POST', headers, body: audioData, }); if (!response.ok) { throw new Error(`语音识别请求失败 (HTTP ${response.status})`); } const result: ASRResponse = await response.json(); if (result.status !== 0) { const msg = ERROR_MESSAGES[result.status] || result.message || '未知错误'; throw new Error(`语音识别失败: ${msg}`); } if (result.content && result.content.length > 0) { return result.content.map(c => c.text).join(''); } return ''; }