/* tslint:disable */ /* eslint-disable */ /** * The result of analyzing a signal with an `Analyzer`. */ export class AnalysisResult { private constructor(); free(): void; [Symbol.dispose](): void; /** * Measure of interference from additional speakers present in audio. * Lower indicates less problematic audio. * * **Range:** 0.0 to 1.0 */ readonly interferingSpeech: number; /** * Measure of interfering speech content from media devices, * e.g. from TVs, radios, phones or else. * Lower indicates less problematic audio. * * **Range:** 0.0 to 1.0 */ readonly mediaSpeech: number; /** * Measure of ambient or environmental noise. * Lower indicates less problematic audio. * * **Range:** 0.0 to 1.0 */ readonly noise: number; /** * Measure of audio dropouts or discontinuities in the stream, * e.g. from packet loss, frame erasure, jitter or CPU overload. * Lower indicates less problematic audio. * * **Range:** 0.0 to 1.0 */ readonly packetLoss: number; /** * Headline audio score. * * Predicts likelihood of failure of downstream models including speech-to-text, * voice activity detection or turn-taking or speech-to-speech models. * Lower indicates less problematic audio. * * **Range:** 0.0 to 1.0 */ readonly riskScore: number; /** * Measure of speaker loudness. * * **Range:** 0.0 to 1.0 */ readonly speakerLoudness: number; /** * Measure of speaker distance and reverberance. * Lower indicates less problematic audio. * * **Range:** 0.0 to 1.0 */ readonly speakerReverb: number; } /** * Analyzer for non-real-time analysis. * * The buffering methods (`bufferInterleaved`, `bufferSequential`, `bufferPlanar`) are * designed to be placed in the audio thread, buffering audio chunks for later analysis. * * `analyze` is designed to be run separately. Analysis models are computationally * expensive and cannot run in the audio thread. * * The analyzer retains a span of audio determined by the analysis model. As more samples * get buffered, old audio is discarded. */ export class Analyzer { free(): void; [Symbol.dispose](): void; /** * Analyze the buffered signal. * * The analyzer runs a forward-pass of the analysis model with a fixed length of audio, * determined by the model. * * If this function is called before that length of audio has been buffered (via * `bufferInterleaved`/`bufferSequential`/`bufferPlanar`), the analyzer will run the * analysis with silence (zeros) in the tail of the input. * * # Note * When buffering, all channels are mixed down to mono. To analyze channels * independently, create separate analyzers. * * # Returns * An `AnalysisResult` containing the analysis scores. * * # Throws * - `Error` (`EnhancementNotAllowed`): SDK key was not authorized or process failed to report usage. Check if you have internet connection. */ analyze(): AnalysisResult; /** * Buffers audio with interleaved channels in a single buffer for later analysis. * * **Memory Layout:** * - Single contiguous buffer with channels interleaved * - Buffer size: `num_channels` * `num_frames` floats * - Example for 2 channels, 4 frames: * `audio -> [ch0_f0, ch1_f0, ch0_f1, ch1_f1, ch0_f2, ch1_f2, ch0_f3, ch1_f3]` * * # Parameters * - `audio`: Single buffer containing interleaved audio data of size `num_channels` * `num_frames`. * - `num_channels`: Number of channels (must match initialization). * - `num_frames`: Number of samples per channel (must match initialization value, or if `allow_variable_frames` was enabled, must be ≤ initialization value). * * # Note * Input audio is read-only and is not modified. * * All channels are mixed and buffered in mono. To analyze channels * independently, create separate analyzers. * * # Throws * - `Error` (`ProcessorNotInitialized`): Analyzer has not been initialized. * - `Error` (`AudioConfigMismatch`): Channel or frame count mismatch. */ bufferInterleaved(audio: Float32Array, num_channels: number, num_frames: number): void; /** * Buffers audio with separate buffers for each channel (planar layout) for later analysis. * * **Memory Layout:** * - The input `audio` slice is treated as a flat array containing concatenated channel buffers. * - Each channel must have `num_frames` samples. * - Example for 2 channels, 4 frames: * `audio -> [ch0_f0, ch0_f1, ch0_f2, ch0_f3, ch1_f0, ch1_f1, ch1_f2, ch1_f3]` * * # Parameters * - `audio`: Flat buffer of `num_channels` per-channel sub-buffers, each `num_frames` floats long. * - `num_channels`: Number of channels (must match initialization). * * # Note * Input audio is read-only and is not modified. * * All channels are mixed and buffered in mono. To analyze channels * independently, create separate analyzers. * * # Throws * - `Error` (`ProcessorNotInitialized`): Analyzer has not been initialized. * - `Error` (`AudioConfigMismatch`): Channel or frame count mismatch. */ bufferPlanar(audio: Float32Array, num_channels: number): void; /** * Buffers audio with sequential channel data in a single buffer for later analysis. * * **Memory Layout:** * - Single contiguous buffer with all samples for each channel stored sequentially * - Buffer size: `num_channels` * `num_frames` floats * - Example for 2 channels, 4 frames: * `audio -> [ch0_f0, ch0_f1, ch0_f2, ch0_f3, ch1_f0, ch1_f1, ch1_f2, ch1_f3]` * * # Parameters * - `audio`: Single buffer containing sequential audio data of size `num_channels` * `num_frames`. * - `num_channels`: Number of channels (must match initialization). * - `num_frames`: Number of samples per channel (must match initialization value, or if `allow_variable_frames` was enabled, must be ≤ initialization value). * * # Note * Input audio is read-only and is not modified. * * All channels are mixed and buffered in mono. To analyze channels * independently, create separate analyzers. * * # Throws * - `Error` (`ProcessorNotInitialized`): Analyzer has not been initialized. * - `Error` (`AudioConfigMismatch`): Channel or frame count mismatch. */ bufferSequential(audio: Float32Array, num_channels: number, num_frames: number): void; /** * Configures the analyzer for a specific audio format. * * This function must be called before buffering any audio. * For the lowest delay use the sample rate and frame size returned by * `Model.getOptimalSampleRate` and `Model.getOptimalNumFrames`. * * # Parameters * - `sample_rate`: Audio sample rate in Hz (8000 - 192000). * - `num_channels`: Number of audio channels (1 for mono, 2 for stereo, etc.). * - `num_frames`: Number of samples per channel in each buffer call. * - `allow_variable_frames`: Allows varying frame counts per buffer call (up to `num_frames`), but increases delay. * * # Note * All channels are mixed to mono for buffering. To analyze channels * independently, create separate analyzers. * * # Throws * - `Error` (`AudioConfigUnsupported`): Configuration is not supported. */ initialize(sample_rate: number, num_channels: number, num_frames: number, allow_variable_frames: boolean): void; /** * Creates a new analyzer for an analysis model. * * # Parameters * - `model`: Model to analyze with. * - `license_key`: String containing your license key. * * # Throws * - `Error` (`ModelTypeUnsupported`): `model` is not an analysis model. * - `Error` (`LicenseFormatInvalid`): License key format is incorrect. * - `Error` (`LicenseVersionUnsupported`): License version is not compatible with the SDK version. * - `Error` (`LicenseExpired`): License key has expired. */ constructor(model: Model, license_key: string); /** * Clears all internal state and buffered audio. * * Call this when the audio stream is interrupted or when seeking * to prevent mispredictions from previous audio content. * * The analyzer stays initialized to the configured settings. */ reset(): void; /** * Replaces the bearer token on a running analyzer. * * Use this when your license key is a JWT and needs to be refreshed * before it expires. Calling this with a renewed token lets you stay authenticated * without tearing down and recreating the analyzer: the analyzer handle stays valid, * buffered spectra stay available, and the new token is used for all * subsequent authentication against the ai-coustics backend. * * In-place updates are only supported when both the originally configured key and * the new token are JWTs. Other license types cannot be swapped in this way. * * On any error the call is a no-op: the previously active token remains in use and * the telemetry session is unaffected (no backoff, no interruption to processing). * * On success the swap is applied immediately and is **not** gated on backend * acceptance. The token is validated locally for format only; if the backend later * rejects it (e.g. expired or revoked), the SDK retries it under backoff rather than * rolling back to the prior token, and analysis calls may be rejected if no * accepted token arrives in time. Supplying a known-good token via this call * during that window recovers the session. * * # Parameters * - `token`: New JWT. * * # Throws * - `Error` (`LicenseFormatInvalid`): New token could not be parsed; the existing token stays in use. * - `Error` (`TokenUpdateUnsupported`): The original or new key does not support in-place updates; the existing token stays in use. */ updateBearerToken(token: string): void; } /** * Loaded model data. */ export class Model { private constructor(); free(): void; [Symbol.dispose](): void; /** * Creates a new model instance from a memory buffer. * * A single model instance can be used to create multiple processors. * * # Note * Processor instances retain a shared reference to the model data. * It is safe to release the model handle after creating the desired processors. * The memory used by the model will be automatically freed after all processors * using that model have been released. * * # Parameters * - `bytes`: The contents of the `.aicmodel` file as a Uint8Array. * * # Throws * - `Error`: Model buffer is invalid or corrupted. * - `Error`: Model version is not compatible with the SDK version. */ static fromBytes(bytes: Uint8Array): Model; /** * Returns the model identifier. * * The returned string is UTF-8 encoded. */ getId(): string; /** * Retrieves the optimal number of frames for the model at a given sample rate. * * Using the optimal number of frames minimizes latency by avoiding internal buffering. * * **When you use a different frame count than the optimal value, the processor will * introduce additional buffering latency on top of its base processing delay.** * * The optimal frame count varies based on the sample rate. Each model operates on a * fixed time window length, so the required number of frames changes with sample rate. * For example, a model designed for 10 ms processing windows requires 480 frames at * 48 kHz, but only 160 frames at 16 kHz to capture the same duration of audio. * * Call this function with your intended sample rate before calling `Processor.initialize` * to determine the best frame count for minimal latency. * * # Parameters * - `sample_rate`: The sample rate in Hz for which to calculate the optimal frame count. * * # Returns * The optimal frame count. */ getOptimalNumFrames(sample_rate: number): number; /** * Retrieves the optimal sample rate of the model. * * Each model is optimized for a specific sample rate, which determines the frequency * range of the enhanced audio output. While you can process audio at any sample rate, * understanding the model's native rate helps predict the enhancement quality. * * **How sample rate affects enhancement:** * * - Models trained at lower sample rates (e.g., 8 kHz) can only enhance frequencies * up to their Nyquist limit (4 kHz for 8 kHz models) * - When processing higher sample rate input (e.g., 48 kHz) with a lower-rate model, * only the lower frequency components will be enhanced * * **Enhancement blending:** * * When enhancement strength is set below 1.0, the enhanced signal is blended with * the original, maintaining the full frequency spectrum of your input while adding * the model's noise reduction capabilities to the lower frequencies. * * **Sample rate and optimal frames relationship:** * * When using different sample rates than the model's native rate, the optimal number * of frames (returned by `getOptimalNumFrames`) will change. The processor's output * delay remains constant regardless of sample rate as long as you use the optimal frame * count for that rate. * * **Recommendation:** * * For maximum enhancement quality across the full frequency spectrum, match your * input sample rate to the model's native rate when possible. * * # Returns * The optimal sample rate in Hz. */ getOptimalSampleRate(): number; } /** * Audio processor for speech enhancement. * * Multiple processors can be created to process different audio streams simultaneously * or to switch between different enhancement algorithms during runtime. */ export class Processor { free(): void; [Symbol.dispose](): void; /** * Creates a processor context handle for control APIs. * * Use the returned handle to reset the processor, set/get parameters, query * output delay, and update the bearer token. */ getProcessorContext(): ProcessorContext; /** * Creates a VAD context handle for control APIs. * * The voice activity detection works automatically using the enhanced audio output * of this processor. All handles created from a given processor reference the same * VAD instance. * * **Important:** If the backing processor is destroyed, the VAD instance will stop * producing new data. It is safe to destroy the processor without destroying the VAD. */ getVadContext(): VadContext; /** * Configures the processor for a specific audio format. * * This function must be called before processing any audio. * For the lowest delay use the sample rate and frame size returned by * `Model.getOptimalSampleRate` and `Model.getOptimalNumFrames`. * * # Parameters * - `sample_rate`: Audio sample rate in Hz (8000 - 192000). * - `num_channels`: Number of audio channels (1 for mono, 2 for stereo, etc.). * - `num_frames`: Number of samples per channel in each process call. * - `allow_variable_frames`: Allows varying frame counts per process call (up to `num_frames`), but increases delay. * * # Note * All channels are mixed to mono for processing. To process channels * independently, create separate processor instances. * * # Throws * - `Error` (`AudioConfigUnsupported`): Configuration is not supported. */ initialize(sample_rate: number, num_channels: number, num_frames: number, allow_variable_frames: boolean): void; /** * Creates a new audio processor instance. * * Multiple processors can be created to process different audio streams simultaneously * or to switch between different enhancement algorithms during runtime. * * # Parameters * - `model`: Model instance to process. * - `license_key`: String containing your license key. * * # Throws * - `Error` (`LicenseFormatInvalid`): License key format is incorrect. * - `Error` (`LicenseVersionUnsupported`): License version is not compatible with the SDK version. * - `Error` (`LicenseExpired`): License key has expired. * - `Error` (`ModelTypeUnsupported`): The model type is not supported by the processor. */ constructor(model: Model, license_key: string); /** * Processes audio with interleaved channels in a single buffer. * * Enhances speech in the provided audio buffer in-place. * * **Memory Layout:** * - Single contiguous buffer with channels interleaved * - Buffer size: `num_channels` * `num_frames` floats * - Example for 2 channels, 4 frames: * `audio -> [ch0_f0, ch1_f0, ch0_f1, ch1_f1, ch0_f2, ch1_f2, ch0_f3, ch1_f3]` * * # Parameters * - `audio`: Single buffer containing interleaved audio data of size `num_channels` * `num_frames`. * - `num_channels`: Number of channels (must match initialization). * - `num_frames`: Number of samples per channel (must match initialization value, or if `allow_variable_frames` was enabled, must be ≤ initialization value). * * # Note * All channels are mixed to mono for processing. To process channels * independently, create separate processor instances. * * # Throws * - `Error` (`ProcessorNotInitialized`): Processor has not been initialized. * - `Error` (`AudioConfigMismatch`): Channel or frame count mismatch. * - `Error` (`EnhancementNotAllowed`): SDK key was not authorized or process failed to report usage. Check if you have internet connection. */ processInterleaved(audio: Float32Array, num_channels: number, num_frames: number): void; /** * Processes audio with separate buffers for each channel (planar layout). * * Enhances speech in the provided audio buffers in-place. * * **Memory Layout:** * - The input `audio` slice is treated as a flat array containing concatenated channel buffers. * - Each channel must have `num_frames` samples. * - Example for 2 channels, 4 frames: * `audio -> [ch0_f0, ch0_f1, ch0_f2, ch0_f3, ch1_f0, ch1_f1, ch1_f2, ch1_f3]` * * # Parameters * - `audio`: Flat buffer of `num_channels` per-channel sub-buffers, each `num_frames` floats long. * - `num_channels`: Number of channels (must match initialization). * * # Note * All channels are mixed to mono for processing. To process channels * independently, create separate processor instances. * * # Throws * - `Error` (`ProcessorNotInitialized`): Processor has not been initialized. * - `Error` (`AudioConfigMismatch`): Channel or frame count mismatch. * - `Error` (`EnhancementNotAllowed`): SDK key was not authorized or process failed to report usage. Check if you have internet connection. */ processPlanar(audio: Float32Array, num_channels: number): void; /** * Processes audio with sequential channel data in a single buffer. * * Enhances speech in the provided audio buffer in-place. * * **Memory Layout:** * - Single contiguous buffer with all samples for each channel stored sequentially * - Buffer size: `num_channels` * `num_frames` floats * - Example for 2 channels, 4 frames: * `audio -> [ch0_f0, ch0_f1, ch0_f2, ch0_f3, ch1_f0, ch1_f1, ch1_f2, ch1_f3]` * * # Parameters * - `audio`: Single buffer containing sequential audio data of size `num_channels` * `num_frames`. * - `num_channels`: Number of channels (must match initialization). * - `num_frames`: Number of samples per channel (must match initialization value, or if `allow_variable_frames` was enabled, must be ≤ initialization value). * * # Note * All channels are mixed to mono for processing. To process channels * independently, create separate processor instances. * * # Throws * - `Error` (`ProcessorNotInitialized`): Processor has not been initialized. * - `Error` (`AudioConfigMismatch`): Channel or frame count mismatch. * - `Error` (`EnhancementNotAllowed`): SDK key was not authorized or process failed to report usage. Check if you have internet connection. */ processSequential(audio: Float32Array, num_channels: number, num_frames: number): void; } /** * Processor context handle for control APIs. * * Created via `Processor.getProcessorContext()`. Use the returned handle to * reset the processor, set/get parameters, query output delay, and update the * bearer token. The handle operates on the processor it was created from. */ export class ProcessorContext { private constructor(); free(): void; [Symbol.dispose](): void; /** * Returns the total output delay in samples for the current audio configuration. * * This function provides the complete end-to-end latency introduced by the processor, * which includes both algorithmic processing delay and any buffering overhead. * Use this value to synchronize enhanced audio with other streams or to implement * delay compensation in your application. * * This queries the processor associated with this context handle. * * **Enhancement vs. VAD models:** * - For an enhancement model this is the latency of the enhanced audio: the number of * samples by which the processed output lags behind the input. * - For a dedicated VAD model, the audio buffer is input-only and passes through unchanged. * This delay is the VAD prediction latency: how many samples a speech decision from * `VadContext.isSpeechDetected` lags behind the input it describes. * Use this value to line up VAD decisions with the input timeline. * * **Delay behavior:** * - **Before initialization:** Returns the base processing delay using the processor's * optimal frame size at its native sample rate * - **After initialization:** Returns the actual delay for your specific configuration, * including any additional buffering introduced by non-optimal frame sizes * * **Important:** The delay value is always expressed in samples at the sample rate * you configured during `Processor.initialize`. To convert to time units: * `delay_ms = (delay_samples * 1000) / sample_rate` * * **Note:** Using frame sizes different from the optimal value returned by * `Model.getOptimalNumFrames` will increase the delay beyond the processor's base latency. * * # Returns * The delay in samples. */ getOutputDelay(): number; /** * Retrieves the current value of a parameter. * * This queries the processor associated with this context handle. * * # Parameters * - `parameter`: Parameter to query. * * # Returns * The current parameter value. */ getParameter(parameter: ProcessorParameter): number; /** * Clears all internal state and buffers. This also resets the VAD state associated with this processor. * * Call this when the audio stream is interrupted or when seeking * to prevent artifacts from previous audio content. * * This operates on the processor associated with this context handle. * * The processor stays initialized to the configured settings. */ reset(): void; /** * Modifies an enhancement parameter. * * All parameters can be changed during audio processing. * * This operates on the processor associated with this context handle. * * # Parameters * - `parameter`: Parameter to modify. * - `value`: New parameter value. See parameter documentation for ranges. * * # Throws * - `RangeError`: Value outside valid range. */ setParameter(parameter: ProcessorParameter, value: number): void; /** * Replaces the bearer token on a running processor. * * Use this when your license key is a JWT and needs to be refreshed * before it expires. Calling this with a renewed token lets you stay authenticated * without tearing down and recreating the processor: audio processing continues * uninterrupted, the context handle stays valid, and the new token is used for all * subsequent authentication against the ai-coustics backend. * * In-place updates are only supported when both the originally configured key and * the new token are JWTs. Other license types cannot be swapped in this way. * * On any error the call is a no-op: the previously active token remains in use and * the telemetry session is unaffected (no backoff, no interruption to processing). * * On success the swap is applied immediately and is **not** gated on backend * acceptance. The token is validated locally for format only; if the backend later * rejects it (e.g. expired or revoked), the SDK retries it under backoff rather than * rolling back to the prior token, and audio processing is eventually disabled if no * accepted token arrives in time. Supplying a known-good token via this call during * that window recovers the session. * * # Parameters * - `token`: New JWT. * * # Throws * - `Error` (`LicenseFormatInvalid`): New token could not be parsed; the existing token stays in use. * - `Error` (`TokenUpdateUnsupported`): The original or new key does not support in-place updates; the existing token stays in use. */ updateBearerToken(token: string): void; } /** * Configurable parameters for audio processing. */ export enum ProcessorParameter { /** * Controls whether audio processing is bypassed while preserving algorithmic delay. * * When enabled, the input audio passes through unmodified, but the output is still * delayed by the same amount as during normal processing. This ensures seamless * transitions when toggling enhancement on/off without audible clicks or timing shifts. * * **Range:** 0.0 to 1.0 * - **0.0:** Enhancement active (normal processing) * - **1.0:** Bypass enabled (latency-compensated passthrough) * * **Default:** 0.0 */ Bypass = 0, /** * A tunable parameter to optimize for specific STT engines, deployment environments, * and user experience requirements. * * The exact behavior depends on the active model: * - **Quail Models:** Controls how aggressively the model suppresses noise. When used * with Quail Voice Focus, it also suppresses background and competing speech. * - **Rook Models:** Controls the mixback and therefore the intensity of the * enhancement. * * **Range:** 0.0 to 1.0 */ EnhancementLevel = 1, } /** * VAD context handle for control APIs. * * Created via `Processor.getVadContext()`. The voice activity detection works * automatically using the enhanced audio output of the backing processor. All * handles created from a given processor reference the same VAD instance. * * **Important:** If the backing processor is destroyed, the VAD instance will stop * producing new data. It is safe to destroy the processor without destroying the VAD. */ export class VadContext { private constructor(); free(): void; [Symbol.dispose](): void; /** * Retrieves the current value of a parameter. * * # Parameters * - `parameter`: Parameter to query. * * # Returns * The current parameter value. */ getParameter(parameter: VadParameter): number; /** * Returns the raw prediction of the VAD, without any post-processing. * * In contrast to `isSpeechDetected`, this is the model's direct prediction * without going through the SDK's VAD post-processing (i.e. speech hold * duration, sensitivity thresholding, etc.). This value may be used to build * other abstractions on top of this data. * * **Note:** This value is only useful when using a VAD model. When using an * energy-based VAD, the raw prediction is set to `1.0` or `0.0` depending on * whether `isSpeechDetected` is true or false. * * The latency notes from `isSpeechDetected` apply identically: the prediction * lags its input by the backing processor's processing latency, and it stops * updating if the backing processor stops being processed. * * # Returns * The raw VAD prediction in the range 0.0 to 1.0. */ getRawVadProbability(): number; /** * Returns the VAD's prediction. * * **Important:** * - The latency of the VAD prediction is equal to * the backing processor's processing latency, reported by * `ProcessorContext.getOutputDelay`. The prediction lags its input by that * many samples even for a dedicated VAD model whose audio buffer passes through * untouched. Align speech decisions to the input timeline using that delay. * - If the backing processor stops being processed, * the VAD will not update its speech detection prediction. * * # Returns * `true` if speech is currently detected, `false` otherwise. */ isSpeechDetected(): boolean; /** * Modifies a VAD parameter. * * All parameters can be changed during audio processing. * * # Parameters * - `parameter`: Parameter to modify. * - `value`: New parameter value. See parameter documentation for ranges. * * # Throws * - `RangeError`: Value outside valid range. */ setParameter(parameter: VadParameter, value: number): void; } /** * Configurable parameters for Voice Activity Detection. */ export enum VadParameter { /** * Controls for how long the VAD continues to detect speech after the audio signal * no longer contains speech. * * This affects the stability of speech detected -> not detected transitions. * * The VAD reports speech detected if the audio signal contained speech in at least 50% * of the frames processed in the last `speech_hold_duration * 2` seconds. * * For example, if `speech_hold_duration` is set to 0.5 seconds and the VAD stops detecting speech * in the audio signal, the VAD will continue to report speech for 0.5 seconds assuming the * VAD does not detect speech again during that period. If a few frames of speech are detected * during that period, those frames will be included in the 50% calculation, which will extend * the speech detection period until the 50% threshold is no longer met. * * NOTE: The VAD returns a value per processed buffer, so this duration is rounded * to the closest model window length. For example, if the model has a processing window * length of 10 ms, the VAD will round up/down to the closest multiple of 10 ms. * Because of this, this parameter may return a different value than the one it was last set to. * * **Range:** 0.0 to 300x model window length (value in seconds) * * **Default:** 0.03 (30 ms) */ SpeechHoldDuration = 0, /** * Controls the sensitivity of the VAD. * * There are two kinds of VADs offered by the SDK: * * - VAD models (e.g. Quail VAD): These are models specifically trained for voice activity detection. * They output a probability of speech presence for each processed audio buffer, 1.0 being the model * is certain speech is present and 0.0 being the model is certain speech is not present. * The probability is compared against the sensitivity threshold to determine if speech is detected. * * - Energy-based VAD of speech enhancement models (e.g. Quail, Rook): These models filter out * background noise and enhance speech, but they do not explicitly output a VAD decision. * To provide VAD functionality, the SDK determines whether of speech is present based on how much * energy is left in the signal after enhancement, since the model suppresses non-speech components. * For these models, the sensitivity parameter controls the energy threshold for detecting speech presence. * The formula for the energy threshold is `10 ^ (-sensitivity)`, so higher sensitivity values result in a * less energy required in the signal, therefore resulting in more aggressive speech detection. * * A value above the threshold will trigger a speech detected decision. * * **Range:** * - On VAD models: 0.0 to 1.0 * - On energy-based VADs: 1.0 to 15.0 * * **Default:** model-specific */ Sensitivity = 1, /** * Controls for how long speech needs to be present in the audio signal before * the VAD considers it speech. * * This affects the stability of speech not detected -> detected transitions. * * NOTE: The VAD returns a value per processed buffer, so this duration is rounded * to the closest model window length. For example, if the model has a processing window * length of 10 ms, the VAD will round up/down to the closest multiple of 10 ms. * Because of this, this parameter may return a different value than the one it was last set to. * * **Range:** 0.0 to 1.0 (value in seconds) * * **Default:** 0.0 */ MinimumSpeechDuration = 2, } /** * Module initializer. Registers the SDK wrapper id and installs the panic hook * so any future panics surface as readable JS console errors. */ export function __aic_sdk_wasm_init(): void; /** * Returns the model version compatible with the SDK. * * # Returns * Model version compatible with this version of the SDK. */ export function getCompatibleModelVersion(): number; /** * Returns the version of the SDK. * * # Returns * A string containing the version (e.g., "1.2.3"). */ export function getVersion(): string; export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module; export interface InitOutput { readonly memory: WebAssembly.Memory; readonly __wbg_analysisresult_free: (a: number, b: number) => void; readonly __wbg_analyzer_free: (a: number, b: number) => void; readonly __wbg_model_free: (a: number, b: number) => void; readonly __wbg_processor_free: (a: number, b: number) => void; readonly __wbg_processorcontext_free: (a: number, b: number) => void; readonly __wbg_vadcontext_free: (a: number, b: number) => void; readonly analysisresult_interferingSpeech: (a: number) => number; readonly analysisresult_mediaSpeech: (a: number) => number; readonly analysisresult_noise: (a: number) => number; readonly analysisresult_packetLoss: (a: number) => number; readonly analysisresult_riskScore: (a: number) => number; readonly analysisresult_speakerLoudness: (a: number) => number; readonly analysisresult_speakerReverb: (a: number) => number; readonly analyzer_analyze: (a: number) => [number, number, number]; readonly analyzer_bufferInterleaved: (a: number, b: number, c: number, d: number, e: number) => [number, number]; readonly analyzer_bufferPlanar: (a: number, b: number, c: number, d: number) => [number, number]; readonly analyzer_bufferSequential: (a: number, b: number, c: number, d: number, e: number) => [number, number]; readonly analyzer_initialize: (a: number, b: number, c: number, d: number, e: number) => [number, number]; readonly analyzer_new: (a: number, b: number, c: number) => [number, number, number]; readonly analyzer_reset: (a: number) => void; readonly analyzer_updateBearerToken: (a: number, b: number, c: number) => [number, number]; readonly getCompatibleModelVersion: () => number; readonly getVersion: () => [number, number]; readonly model_fromBytes: (a: number, b: number) => [number, number, number]; readonly model_getId: (a: number) => [number, number]; readonly model_getOptimalNumFrames: (a: number, b: number) => number; readonly model_getOptimalSampleRate: (a: number) => number; readonly processor_getProcessorContext: (a: number) => number; readonly processor_getVadContext: (a: number) => number; readonly processor_initialize: (a: number, b: number, c: number, d: number, e: number) => [number, number]; readonly processor_new: (a: number, b: number, c: number) => [number, number, number]; readonly processor_processInterleaved: (a: number, b: number, c: number, d: any, e: number, f: number) => [number, number]; readonly processor_processPlanar: (a: number, b: number, c: number, d: any, e: number) => [number, number]; readonly processor_processSequential: (a: number, b: number, c: number, d: any, e: number, f: number) => [number, number]; readonly processorcontext_getOutputDelay: (a: number) => number; readonly processorcontext_getParameter: (a: number, b: number) => number; readonly processorcontext_reset: (a: number) => void; readonly processorcontext_setParameter: (a: number, b: number, c: number) => [number, number]; readonly processorcontext_updateBearerToken: (a: number, b: number, c: number) => [number, number]; readonly vadcontext_getParameter: (a: number, b: number) => number; readonly vadcontext_getRawVadProbability: (a: number) => number; readonly vadcontext_isSpeechDetected: (a: number) => number; readonly vadcontext_setParameter: (a: number, b: number, c: number) => [number, number]; readonly __aic_sdk_wasm_init: () => void; readonly wasm_bindgen__convert__closures_____invoke__h32aad1ae9b36601a: (a: number, b: number, c: any) => [number, number]; readonly wasm_bindgen__convert__closures_____invoke__ha6938de6b49a4f0d: (a: number, b: number) => void; readonly wasm_bindgen__convert__closures_____invoke__h557542718049044a: (a: number, b: number) => void; readonly __wbindgen_malloc: (a: number, b: number) => number; readonly __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number; readonly __wbindgen_exn_store: (a: number) => void; readonly __externref_table_alloc: () => number; readonly __wbindgen_externrefs: WebAssembly.Table; readonly __wbindgen_free: (a: number, b: number, c: number) => void; readonly __wbindgen_destroy_closure: (a: number, b: number) => void; readonly __externref_table_dealloc: (a: number) => void; readonly __wbindgen_start: () => void; } export type SyncInitInput = BufferSource | WebAssembly.Module; /** * Instantiates the given `module`, which can either be bytes or * a precompiled `WebAssembly.Module`. * * @param {{ module: SyncInitInput }} module - Passing `SyncInitInput` directly is deprecated. * * @returns {InitOutput} */ export function initSync(module: { module: SyncInitInput } | SyncInitInput): InitOutput; /** * If `module_or_path` is {RequestInfo} or {URL}, makes a request and * for everything else, calls `WebAssembly.instantiate` directly. * * @param {{ module_or_path: InitInput | Promise }} module_or_path - Passing `InitInput` directly is deprecated. * * @returns {Promise} */ export default function __wbg_init (module_or_path?: { module_or_path: InitInput | Promise } | InitInput | Promise): Promise;