export declare type RequireSource = number; /** * Pipeline profiles set up the speech pipeline based on your needs */ export declare enum PipelineProfile { /** * Set up wakeword and use local Apple/Android ASR. * Note that wakeword.filter, wakeword.encode, and wakeword.detect * are required if any wakeword profile is used. */ TFLITE_WAKEWORD_NATIVE_ASR = 0, /** * Apple/Android Automatic Speech Recognition is on * when Voice Active Detection triggers it. */ VAD_NATIVE_ASR = 1, /** * Apple/Android Automatic Speech Recogntion is on * when the speech pipeline is active. * This is likely the more common profile * when not using wakeword. */ PTT_NATIVE_ASR = 2, /** * Set up wakeword and use remote Spokestack ASR. * Note that wakeword.filter, wakeword.encode, and wakeword.detect * are required if any wakeword profile is used. */ TFLITE_WAKEWORD_SPOKESTACK_ASR = 3, /** * Spokestack Automatic Speech Recognition is on * when Voice Active Detection triggers it. */ VAD_SPOKESTACK_ASR = 4, /** * Spokestack Automatic Speech Recogntion is on * when the speech pipeline is active. * This is likely the more common profile * when not using wakeword, but Spokestack ASR is preferred. */ PTT_SPOKESTACK_ASR = 5, /** * VAD-sensitive TFLite wake word activates TFLite keyword recognizer */ TFLITE_WAKEWORD_KEYWORD = 6, /** * VAD-triggered TFLite Keyword Recognizer */ VAD_KEYWORD_ASR = 7 } /** * How much logging to show * A lower number means more logs. */ export declare enum TraceLevel { DEBUG = 10, PERF = 20, INFO = 30, NONE = 100 } /** * Three formats are supported when using Spokestack TTS. * Raw text, SSML, and Speech Markdown. * See https://www.speechmarkdown.org/ if unfamiliar with Speech Markdown. * IPA is expected when using SSML or Speech Markdown. */ export declare enum TTSFormat { TEXT = 0, SSML = 1, SPEECHMARKDOWN = 2 } export interface SpokestackRecognizeEvent { transcript: string; } export interface SpokestackErrorEvent { error: string; } export interface SpokestackTraceEvent { message: string; } export interface SpokestackPlayEvent { playing: boolean; } export declare type SpokestackEvent = SpokestackRecognizeEvent | SpokestackErrorEvent | SpokestackTraceEvent | SpokestackPlayEvent; export interface SpokestackNLUSlot { /** The slot's type, as defined in the model metadata */ type: string; /** The parsed (typed) value of the slot recognized in the user utterance */ value: any; /** The original string value of the slot recognized in the user utterance */ rawValue: string; } export interface SpokestackNLUSlots { [key: string]: SpokestackNLUSlot; } export interface SpokestackNLUResult { /** The intent based on the match provided by the NLU model */ intent: string; /** A number from 0 to 1 representing the NLU model's confidence in the intent it recognized, where 1 represents absolute confidence. */ confidence: number; /** Data associated with the intent, provided by the NLU model */ slots: SpokestackNLUSlots; } export interface PipelineConfig { /** * Profiles are collections of common configurations for Pipeline stages. * * If no profile is set explicitly, Spokestack determines * a sensible default profile based on the config * passed to `Spokestack.initialize()`: * * If wakeword config files are set (and keyword config is not), * the default will be set to `TFLITE_WAKEWORD_NATIVE_ASR`. * * If keyword config files are set (and wakeword config is not), * the default will be set to `VAD_KEYWORD_ASR`. * * If both wakeword and keyword config files are set, * the default will be set to `TFLITE_WAKEWORD_KEYWORD`. * * Otherwise, the default is `PTT_NATIVE_ASR`. */ profile?: PipelineProfile; /** * Audio sampling rate, in Hz */ sampleRate?: number; /** * @advanced * * Speech frame width, in ms */ frameWidth?: number; /** * @advanced * * Buffer width, used with frameWidth to determine the buffer size */ bufferWidth?: number; /** * Voice activity detector mode */ vadMode?: 'quality' | 'low-bitrate' | 'aggressive' | 'very-aggressive'; /** * @advanced * * Falling-edge detection run length, in ms; this value determines * how many negative samples must be received to flip the detector to negative */ vadFallDelay?: number; /** * @advanced * * Android-only * * Rising-edge detection run length, in ms; this value determines * how many positive samples must be received to flip the detector to positive */ vadRiseDelay?: number; /** * @advanced * * Android-only for AcousticNoiseSuppressor * * Noise policy */ ansPolicy?: 'mild' | 'medium' | 'aggressive' | 'very-aggressive'; /** * @advanced * * Android-only for AcousticGainControl * * Target peak audio level, in -dB, * to maintain a peak of -9dB, configure a value of 9 */ agcCompressionGainDb?: number; /** * @advanced * * Android-only for AcousticGainControl * * Dynamic range compression rate, in dBFS */ agcTargetLevelDbfs?: number; } export interface NLUSourceConfig { /** * The NLU Tensorflow-Lite model. If specified, metadata and vocab are also required. * * This field accepts 2 types of values. * 1. A string representing a remote URL from which to download and cache the file (presumably from a CDN). * 2. A source object retrieved by a `require` or `import` (e.g. `model: require('./nlu.tflite')`) */ model: string | RequireSource; /** * The JSON file for NLU metadata. If specified, model and vocab are also required. * * This field accepts 2 types of values. * 1. A string representing a remote URL from which to download and cache the file (presumably from a CDN). * 2. A source object retrieved by a `require` or `import` (e.g. `metadata: require('./metadata.sjson')`). * * **IMPORTANT: a special extension is used for local metadata JSON files (`.sjson`) when using `require` or `import` * so the file is not parsed when included but instead imported as a source object. This makes it so the * file is read and parsed by the underlying native libraries instead.** */ metadata: string | RequireSource; /** * A txt file containing the NLU vocabulary. If specified, model and metadata are also required. * * This field accepts 2 types of values. * 1. A string representing a remote URL from which to download and cache the file (presumably from a CDN). * 2. A source object retrieved by a `require` or `import` (e.g. `vocab: require('./vocab.txt')`) */ vocab: string | RequireSource; } export interface NLUAdvancedConfig { inputLength?: number; } export declare type NLUConfig = NLUSourceConfig & NLUAdvancedConfig; export interface CommandModelSourceConfig { /** * The "filter" Tensorflow-Lite model. If specified, detect and encode are also required. * * This field accepts 2 types of values. * 1. A string representing a remote URL from which to download and cache the file (presumably from a CDN). * 2. A source object retrieved by a `require` or `import` (e.g. `filter: require('./filter.tflite')`) * * The filter model is used to calculate a mel spectrogram frame from the linear STFT; * its inputs should be shaped [fft-width], and its outputs [mel-width] */ filter: string | RequireSource; /** * The "detect" Tensorflow-Lite model. If specified, filter and encode are also required. * * This field accepts 2 types of values. * 1. A string representing a remote URL from which to download and cache the file (presumably from a CDN). * 2. A source object retrieved by a `require` or `import` (e.g. `detect: require('./detect.tflite')`) * * The encode model is used to perform each autoregressive step over the mel frames; * its inputs should be shaped [mel-length, mel-width], and its outputs [encode-width], * with an additional state input/output shaped [state-width] */ detect: string | RequireSource; /** * The "encode" Tensorflow-Lite model. If specified, filter and detect are also required. * * This field accepts 2 types of values. * 1. A string representing a remote URL from which to download and cache the file (presumably from a CDN). * 2. A source object retrieved by a `require` or `import` (e.g. `encode: require('./encode.tflite')`) * * Its inputs should be shaped [encode-length, encode-width], * and its outputs */ encode: string | RequireSource; } export interface CommandModelAdvancedConfig { /** * @advanced * * The length of the sliding window of encoder output * used as an input to the classifier, in milliseconds */ encodeLength?: number; /** * @advanced * * The size of the encoder output, in vector units */ encodeWidth?: number; /** * @advanced * * The size of the signal window used to calculate the STFT, * in number of samples - should be a power of 2 for maximum efficiency */ fftWindowSize?: number; /** * @advanced * * Android-only * * The name of the windowing function to apply to each audio frame * before calculating the STFT; currently the "hann" window is supported */ fftWindowType?: string; /** * @advanced * * The length of time to skip each time the * overlapping STFT is calculated, in milliseconds */ fftHopLength?: number; /** * @advanced * * The length of time to skip each time the * overlapping STFT is calculated, in milliseconds */ melFrameLength?: number; /** * @advanced * * The size of each mel spectrogram frame, * in number of filterbank components */ melFrameWidth?: number; /** * @advanced * * The pre-emphasis filter weight to apply to * the normalized audio signal (0 for no pre-emphasis) */ preEmphasis?: number; /** * @advanced * * The size of the encoder state, in vector units (defaults to wake-encode-width) */ stateWidth?: number; /** * @advanced * * The threshold of the classifier's posterior output, * above which the trigger activates the pipeline, in the range [0, 1] */ threshold?: number; } export interface WakewordOnlyConfig { /** * @advanced * * The minimum length of an activation, in milliseconds, * used to ignore a VAD deactivation after the wakeword */ activeMin?: number; /** * @advanced * * The maximum length of an activation, in milliseconds, * used to time out the activation */ activeMax?: number; /** * iOS-only * * Length of time to allow an Apple ASR request to run, in milliseconds. * Apple has an undocumented limit of 60000ms per request. */ requestTimeout?: number; /** * @advanced * Android-only * * The desired linear Root Mean Squared (RMS) signal energy, * which is used for signal normalization and should be tuned * to the RMS target used during training */ rmsTarget?: number; /** * @advanced * Android-only * * The Exponentially-Weighted Moving Average (EWMA) update * rate for the current RMS signal energy (0 for no RMS normalization) */ rmsAlpha?: number; /** * iOS-only * * An ordered array or comma-separated list of wakeword keywords * Only necessary when not passing the filter, detect, and encode paths. */ wakewords?: string | string[]; } export declare type WakewordConfig = CommandModelSourceConfig & CommandModelAdvancedConfig & WakewordOnlyConfig; export interface KeywordMetadataConfig { /** * The JSON file for Keyword metadata. * Required if `keyword.classes` is not specified. * * This field accepts 2 types of values. * 1. A string representing a remote URL from which to download and cache the file (presumably from a CDN). * 2. A source object retrieved by a `require` or `import` (e.g. `metadata: require('./metadata.sjson')`). * * **IMPORTANT: a special extension is used for local metadata JSON files (`.sjson`) when using `require` or `import` * so the file is not parsed when included but instead imported as a source object. This makes it so the * file is read and parsed by the underlying native libraries instead.** */ metadata: string | RequireSource; } export interface KeywordClassesConfig { /** * A comma-separated list or an ordered array of class names for the keywords. * The name corresponding to the most likely class will be returned * in the transcript field when the recognition event is raised. * Required if `keyword.metadata` is not specified. */ classes: string | string[]; } export declare type KeywordWithMetadataConfig = CommandModelSourceConfig & CommandModelAdvancedConfig & KeywordMetadataConfig; export declare type KeywordWithClassesConfig = CommandModelSourceConfig & CommandModelAdvancedConfig & KeywordClassesConfig; export declare type KeywordConfig = KeywordWithMetadataConfig | KeywordWithClassesConfig; /** * Spokestack-iOS reference: https://spokestack.github.io/spokestack-ios/index.html * spokestack-android reference: https://javadoc.io/doc/io.spokestack/spokestack-android/latest/index.html */ export interface SpokestackConfig { /** * This option is only used when remote URLs are passed to fields such as `wakeword.filter`. * * Set this to true to allow downloading models over cellular. * Note that `Spokestack.initialize()` will still reject the promise if * models need to be downloaded but there is no network at all. * * Ideally, the app will include network handling itself and * inform the user about file downloads. * * Default: false */ allowCellularDownloads?: boolean; /** * Wakeword, Keyword, and NLU model files are cached internally. * Set this to true whenever a model is changed * during development to refresh the internal model cache. * * This affects models passed with `require()` as well * as models downloaded from remote URLs. * * Default: true in dev mode, false otherwise * * **Important:** By default, apps in production will * cache models to avoid downloading them every time * the app is launched. The side-effect of this optimization * is that if models change on the CDN, apps will * not pick up those changes–unless the app were reinstalled. * We think this is a fair trade-off, but set this to `true` * if you prefer to download the models every time the app * is launched. */ refreshModels?: boolean; /** * This controls the log level for the underlying native * iOS and Android libraries. * Also add a `"trace"` event listener to get trace events. * See the TraceLevel enum for values. */ traceLevel?: TraceLevel; /** * Most of these options are advanced aside from "profile" */ pipeline?: PipelineConfig; /** Only needed if using Spokestack.classify */ nlu?: NLUConfig; /** * Only required for wakeword * Most options are advanced aside from * filter, encode, and decode for specifying config files. */ wakeword?: WakewordConfig; /** * Only required for the keyword recognizer * Most options are advanced aside from * filter, encode, decode, metadata, and classes. */ keyword?: KeywordConfig; }