{"version":3,"sources":["src/common.speech/SynthesisTurn.ts"],"names":[],"mappings":"AAGA,OAAO,EAIH,iBAAiB,EACpB,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,qBAAqB,EAAE,MAAM,mCAAmC,CAAC;AAE1E,OAAO,EACH,kBAAkB,EAGlB,qBAAqB,EACxB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAAE,kBAAkB,EAAgB,MAAM,6CAA6C,CAAC;AAC/F,OAAO,EAEH,oBAAoB,EAGvB,MAAM,sBAAsB,CAAC;AAE9B,MAAM,WAAW,yBAAyB;IACtC,UAAU,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,uBAAuB;IACpC,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,kBAAkB;IAC/B,OAAO,EAAE,yBAAyB,CAAC;IACnC,KAAK,EAAE,uBAAuB,CAAC;IAC/B,MAAM,EAAE;QACJ,gBAAgB,EAAE,MAAM,CAAC;KAC5B,CAAC;CACL;AAED,qBAAa,aAAa;IAEtB,IAAW,SAAS,IAAI,MAAM,CAE7B;IAED,IAAW,QAAQ,IAAI,MAAM,CAE5B;IAED,IAAW,QAAQ,CAAC,KAAK,EAAE,MAAM,EAEhC;IAED,IAAW,iBAAiB,IAAI,qBAAqB,CAEpD;IAED,IAAW,iBAAiB,CAAC,MAAM,EAAE,qBAAqB,EAEzD;IAED,IAAW,qBAAqB,IAAI,OAAO,CAAC,IAAI,CAAC,CAEhD;IAED,IAAW,gBAAgB,IAAI,OAAO,CAErC;IAED,IAAW,cAAc,IAAI,OAAO,CAEnC;IAED,IAAW,iBAAiB,IAAI,MAAM,CAErC;IAED,IAAW,qBAAqB,IAAI,MAAM,CAEzC;IAGD,IAAW,aAAa,IAAI,MAAM,CAEjC;IAED,IAAW,aAAa,IAAI,MAAM,CAEjC;IAED,IAAW,eAAe,IAAI,kBAAkB,CAQ/C;IAED,OAAO,CAAC,cAAc,CAAkB;IACxC,OAAO,CAAC,oBAAoB,CAAS;IACrC,OAAO,CAAC,kBAAkB,CAAkB;IAC5C,OAAO,CAAC,oBAAoB,CAAkB;IAC9C,OAAO,CAAC,iBAAiB,CAAa;IACtC,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,YAAY,CAAS;IAC7B,OAAO,CAAC,gBAAgB,CAAiB;IACzC,OAAO,CAAC,UAAU,CAAkB;IACpC,OAAO,CAAC,qBAAqB,CAAwB;IACrD,OAAO,CAAC,qBAAqB,CAA4B;IACzD,OAAO,CAAC,iBAAiB,CAAc;IACvC,OAAO,CAAC,2BAA2B,CAAc;IACjD,OAAO,CAAC,cAAc,CAAa;IACnC,OAAO,CAAC,uBAAuB,CAAa;IAC5C,OAAO,CAAC,kBAAkB,CAAa;IACvC,OAAO,CAAC,2BAA2B,CAAa;IAChD,OAAO,CAAC,0BAA0B,CAAS;IAC3C,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,UAAU,CAAU;IAC5B,OAAO,CAAC,wBAAwB,CAAoB;IACpD,OAAO,CAAC,iBAAiB,CAAS;IAClC,OAAO,CAAC,aAAa,CAAS;;IAUjB,mBAAmB,IAAI,OAAO,CAAC,WAAW,CAAC;IAW3C,6BAA6B,IAAI,OAAO,CAAC,WAAW,CAAC;IAgB3D,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,gBAAgB,CAAC,EAAE,iBAAiB,GAAG,IAAI;IAwBlH,oBAAoB,CAAC,gBAAgB,EAAE,MAAM,GAAG,IAAI;IAKpD,eAAe,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI;IAMvC,8BAA8B,CAAC,UAAU,EAAE,MAAM,GAAG,IAAI;IAUxD,wBAAwB,CAAC,YAAY,EAAE,MAAM,GAAG,IAAI;IAKpD,wBAAwB,IAAI,IAAI;IAMhC,0BAA0B,CAAC,YAAY,EAAE,MAAM,GAAG,IAAI;IAgBtD,oBAAoB,CAAC,IAAI,EAAE,WAAW,GAAG,IAAI;IAU7C,mBAAmB,CAAC,QAAQ,EAAE,kBAAkB,GAAG,IAAI;IAIvD,wBAAwB,CAAC,QAAQ,EAAE,kBAAkB,GAAG,IAAI;IAM5D,YAAY,CAAC,QAAQ,EAAE,kBAAkB,GAAG,IAAI;IAI1C,wBAAwB,IAAI,OAAO,CAAC,qBAAqB,CAAC;IAYhE,OAAO,IAAI,IAAI;IAOf,kBAAkB,IAAI,IAAI;IAIjC;;;OAGG;IACI,0BAA0B,IAAI,MAAM;IAM3C,SAAS,CAAC,OAAO,CAAC,KAAK,EAAE,oBAAoB,GAAG,IAAI;IAIpD;;;;OAIG;IACH,OAAO,CAAC,MAAM,CAAC,QAAQ;IAIvB,OAAO,CAAC,gBAAgB;IAwBxB,OAAO,CAAC,UAAU;YAaJ,sBAAsB;IAWpC;;;;OAIG;IACH,OAAO,CAAC,YAAY;CAGvB","file":"SynthesisTurn.d.ts","sourcesContent":["// Copyright (c) Microsoft Corporation. All rights reserved.\n// Licensed under the MIT license.\n\nimport {\n    createNoDashGuid,\n    Deferred,\n    Events,\n    IAudioDestination\n} from \"../common/Exports.js\";\nimport { AudioOutputFormatImpl } from \"../sdk/Audio/AudioOutputFormat.js\";\nimport { PullAudioOutputStreamImpl } from \"../sdk/Audio/AudioOutputStream.js\";\nimport {\n    PropertyCollection,\n    PropertyId,\n    ResultReason,\n    SpeechSynthesisResult,\n} from \"../sdk/Exports.js\";\nimport { ISynthesisMetadata, MetadataType } from \"./ServiceMessages/SynthesisAudioMetadata.js\";\nimport {\n    ConnectingToSynthesisServiceEvent,\n    SpeechSynthesisEvent,\n    SynthesisStartedEvent,\n    SynthesisTriggeredEvent,\n} from \"./SynthesisEvents.js\";\n\nexport interface ISynthesisResponseContext {\n    serviceTag: string;\n}\n\nexport interface ISynthesisResponseAudio {\n    type: string;\n    streamId: string;\n}\n\nexport interface ISynthesisResponse {\n    context: ISynthesisResponseContext;\n    audio: ISynthesisResponseAudio;\n    webrtc: {\n        connectionString: string;\n    };\n}\n\nexport class SynthesisTurn {\n\n    public get requestId(): string {\n        return this.privRequestId;\n    }\n\n    public get streamId(): string {\n        return this.privStreamId;\n    }\n\n    public set streamId(value: string) {\n        this.privStreamId = value;\n    }\n\n    public get audioOutputFormat(): AudioOutputFormatImpl {\n        return this.privAudioOutputFormat;\n    }\n\n    public set audioOutputFormat(format: AudioOutputFormatImpl) {\n        this.privAudioOutputFormat = format;\n    }\n\n    public get turnCompletionPromise(): Promise<void> {\n        return this.privTurnDeferral.promise;\n    }\n\n    public get isSynthesisEnded(): boolean {\n        return this.privIsSynthesisEnded;\n    }\n\n    public get isSynthesizing(): boolean {\n        return this.privIsSynthesizing;\n    }\n\n    public get currentTextOffset(): number {\n        return this.privTextOffset;\n    }\n\n    public get currentSentenceOffset(): number {\n        return this.privSentenceOffset;\n    }\n\n    // The number of bytes received for current turn\n    public get bytesReceived(): number {\n        return this.privBytesReceived;\n    }\n\n    public get audioDuration(): number {\n        return this.privAudioDuration;\n    }\n\n    public get extraProperties(): PropertyCollection {\n        if (!!this.privWebRTCSDP) {\n            const properties = new PropertyCollection();\n            properties.setProperty(PropertyId.TalkingAvatarService_WebRTC_SDP, this.privWebRTCSDP);\n            return properties;\n        }\n\n        return undefined;\n    }\n\n    private privIsDisposed: boolean = false;\n    private privAuthFetchEventId: string;\n    private privIsSynthesizing: boolean = false;\n    private privIsSynthesisEnded: boolean = false;\n    private privBytesReceived: number = 0;\n    private privRequestId: string;\n    private privStreamId: string;\n    private privTurnDeferral: Deferred<void>;\n    private privInTurn: boolean = false;\n    private privAudioOutputFormat: AudioOutputFormatImpl;\n    private privAudioOutputStream: PullAudioOutputStreamImpl;\n    private privReceivedAudio: ArrayBuffer;\n    private privReceivedAudioWithHeader: ArrayBuffer;\n    private privTextOffset: number = 0;\n    private privNextSearchTextIndex: number = 0;\n    private privSentenceOffset: number = 0;\n    private privNextSearchSentenceIndex: number = 0;\n    private privPartialVisemeAnimation: string;\n    private privRawText: string;\n    private privIsSSML: boolean;\n    private privTurnAudioDestination: IAudioDestination;\n    private privAudioDuration: number;\n    private privWebRTCSDP: string;\n\n    public constructor() {\n        this.privRequestId = createNoDashGuid();\n        this.privTurnDeferral = new Deferred<void>();\n\n        // We're not in a turn, so resolve.\n        this.privTurnDeferral.resolve();\n    }\n\n    public async getAllReceivedAudio(): Promise<ArrayBuffer> {\n        if (!!this.privReceivedAudio) {\n            return Promise.resolve(this.privReceivedAudio);\n        }\n        if (!this.privIsSynthesisEnded) {\n            return null;\n        }\n        await this.readAllAudioFromStream();\n        return Promise.resolve(this.privReceivedAudio);\n    }\n\n    public async getAllReceivedAudioWithHeader(): Promise<ArrayBuffer> {\n        if (!!this.privReceivedAudioWithHeader) {\n            return this.privReceivedAudioWithHeader;\n        }\n        if (!this.privIsSynthesisEnded) {\n            return null;\n        }\n        if (this.audioOutputFormat.hasHeader) {\n            const audio: ArrayBuffer = await this.getAllReceivedAudio();\n            this.privReceivedAudioWithHeader = this.audioOutputFormat.addHeader(audio);\n            return this.privReceivedAudioWithHeader;\n        } else {\n            return this.getAllReceivedAudio();\n        }\n    }\n\n    public startNewSynthesis(requestId: string, rawText: string, isSSML: boolean, audioDestination?: IAudioDestination): void {\n        this.privIsSynthesisEnded = false;\n        this.privIsSynthesizing = true;\n        this.privRequestId = requestId;\n        this.privRawText = rawText;\n        this.privIsSSML = isSSML;\n        this.privAudioOutputStream = new PullAudioOutputStreamImpl();\n        this.privAudioOutputStream.format = this.privAudioOutputFormat;\n        this.privReceivedAudio = null;\n        this.privReceivedAudioWithHeader = null;\n        this.privBytesReceived = 0;\n        this.privTextOffset = 0;\n        this.privNextSearchTextIndex = 0;\n        this.privSentenceOffset = 0;\n        this.privNextSearchSentenceIndex = 0;\n        this.privPartialVisemeAnimation = \"\";\n        this.privWebRTCSDP = \"\";\n        if (audioDestination !== undefined) {\n            this.privTurnAudioDestination = audioDestination;\n            this.privTurnAudioDestination.format = this.privAudioOutputFormat;\n        }\n        this.onEvent(new SynthesisTriggeredEvent(this.requestId, undefined, audioDestination === undefined ? undefined : audioDestination.id()));\n    }\n\n    public onPreConnectionStart(authFetchEventId: string): void {\n        this.privAuthFetchEventId = authFetchEventId;\n        this.onEvent(new ConnectingToSynthesisServiceEvent(this.privRequestId, this.privAuthFetchEventId));\n    }\n\n    public onAuthCompleted(isError: boolean): void {\n        if (isError) {\n            this.onComplete();\n        }\n    }\n\n    public onConnectionEstablishCompleted(statusCode: number): void {\n        if (statusCode === 200) {\n            this.onEvent(new SynthesisStartedEvent(this.requestId, this.privAuthFetchEventId));\n            this.privBytesReceived = 0;\n            return;\n        } else if (statusCode === 403) {\n            this.onComplete();\n        }\n    }\n\n    public onServiceResponseMessage(responseJson: string): void {\n        const response: ISynthesisResponse = JSON.parse(responseJson) as ISynthesisResponse;\n        this.streamId = response.audio.streamId;\n    }\n\n    public onServiceTurnEndResponse(): void {\n        this.privInTurn = false;\n        this.privTurnDeferral.resolve();\n        this.onComplete();\n    }\n\n    public onServiceTurnStartResponse(responseJson: string): void {\n        if (!!this.privTurnDeferral && !!this.privInTurn) {\n            // What? How are we starting a turn with another not done?\n            this.privTurnDeferral.reject(\"Another turn started before current completed.\");\n            // Avoid UnhandledPromiseRejection if privTurnDeferral is not being awaited\n            // eslint-disable-next-line @typescript-eslint/no-empty-function\n            this.privTurnDeferral.promise.then().catch((): void => { });\n        }\n        this.privInTurn = true;\n        this.privTurnDeferral = new Deferred<void>();\n        const response: ISynthesisResponse = JSON.parse(responseJson) as ISynthesisResponse;\n        if (!!response.webrtc) {\n            this.privWebRTCSDP = response.webrtc.connectionString;\n        }\n    }\n\n    public onAudioChunkReceived(data: ArrayBuffer): void {\n        if (this.isSynthesizing) {\n            this.privAudioOutputStream.write(data);\n            this.privBytesReceived += data.byteLength;\n            if (this.privTurnAudioDestination !== undefined) {\n                this.privTurnAudioDestination.write(data);\n            }\n        }\n    }\n\n    public onTextBoundaryEvent(metadata: ISynthesisMetadata): void {\n        this.updateTextOffset(metadata.Data.text.Text, metadata.Type);\n    }\n\n    public onVisemeMetadataReceived(metadata: ISynthesisMetadata): void {\n        if (metadata.Data.AnimationChunk !== undefined) {\n            this.privPartialVisemeAnimation += metadata.Data.AnimationChunk;\n        }\n    }\n\n    public onSessionEnd(metadata: ISynthesisMetadata): void {\n        this.privAudioDuration = metadata.Data.Offset;\n    }\n\n    public async constructSynthesisResult(): Promise<SpeechSynthesisResult> {\n        const audioBuffer: ArrayBuffer = await this.getAllReceivedAudioWithHeader();\n        return new SpeechSynthesisResult(\n            this.requestId,\n            ResultReason.SynthesizingAudioCompleted,\n            audioBuffer,\n            undefined,\n            this.extraProperties,\n            this.audioDuration\n        );\n    }\n\n    public dispose(): void {\n        if (!this.privIsDisposed) {\n            // we should have completed by now. If we did not its an unknown error.\n            this.privIsDisposed = true;\n        }\n    }\n\n    public onStopSynthesizing(): void {\n        this.onComplete();\n    }\n\n    /**\n     * Gets the viseme animation string (merged from animation chunk), and clears the internal\n     * partial animation.\n     */\n    public getAndClearVisemeAnimation(): string {\n        const animation: string = this.privPartialVisemeAnimation;\n        this.privPartialVisemeAnimation = \"\";\n        return animation;\n    }\n\n    protected onEvent(event: SpeechSynthesisEvent): void {\n        Events.instance.onEvent(event);\n    }\n\n    /**\n     * Check if the text is an XML(SSML) tag\n     * @param text\n     * @private\n     */\n    private static isXmlTag(text: string): boolean {\n        return text.length >= 2 && text[0] === \"<\" && text[text.length - 1] === \">\";\n    }\n\n    private updateTextOffset(text: string, type: MetadataType): void {\n        if (type === MetadataType.WordBoundary) {\n            this.privTextOffset = this.privRawText.indexOf(text, this.privNextSearchTextIndex);\n            if (this.privTextOffset >= 0) {\n                this.privNextSearchTextIndex = this.privTextOffset + text.length;\n                if (this.privIsSSML) {\n                    if (this.withinXmlTag(this.privTextOffset) && !SynthesisTurn.isXmlTag(text)) {\n                        this.updateTextOffset(text, type);\n                    }\n                }\n            }\n        } else {\n            this.privSentenceOffset = this.privRawText.indexOf(text, this.privNextSearchSentenceIndex);\n            if (this.privSentenceOffset >= 0) {\n                this.privNextSearchSentenceIndex = this.privSentenceOffset + text.length;\n                if (this.privIsSSML) {\n                    if (this.withinXmlTag(this.privSentenceOffset) && !SynthesisTurn.isXmlTag(text)) {\n                        this.updateTextOffset(text, type);\n                    }\n                }\n            }\n        }\n    }\n\n    private onComplete(): void {\n        if (this.privIsSynthesizing) {\n            this.privIsSynthesizing = false;\n            this.privIsSynthesisEnded = true;\n            this.privAudioOutputStream.close();\n            this.privInTurn = false;\n            if (this.privTurnAudioDestination !== undefined) {\n                this.privTurnAudioDestination.close();\n                this.privTurnAudioDestination = undefined;\n            }\n        }\n    }\n\n    private async readAllAudioFromStream(): Promise<void> {\n        if (this.privIsSynthesisEnded) {\n            this.privReceivedAudio = new ArrayBuffer(this.bytesReceived);\n            try {\n                await this.privAudioOutputStream.read(this.privReceivedAudio);\n            } catch (e) {\n                this.privReceivedAudio = new ArrayBuffer(0);\n            }\n        }\n    }\n\n    /**\n     * Check if current idx is in XML(SSML) tag\n     * @param idx\n     * @private\n     */\n    private withinXmlTag(idx: number): boolean {\n        return this.privRawText.indexOf(\"<\", idx + 1) > this.privRawText.indexOf(\">\", idx + 1);\n    }\n}\n"]}