// Type definitions for @piper-plus/g2p // Multilingual G2P (Grapheme-to-Phoneme) for TTS -- eSpeak-ng free, MIT licensed // --------------------------------------------------------------------------- // Basic types // --------------------------------------------------------------------------- /** Supported language codes. */ export type Language = 'ja' | 'en' | 'zh' | 'ko' | 'es' | 'fr' | 'pt' | 'sv'; /** * Prosody features extracted from OpenJTalk full-context labels. * * These correspond to the A1/A2/A3 accent features used by Piper TTS * for duration prediction and naturalness improvement. */ export interface ProsodyInfo { /** Accent type -- relative position to accent nucleus. */ a1: number; /** Position within accent phrase (1-indexed from start). */ a2: number; /** Position within accent phrase (1-indexed from end). */ a3: number; } /** * Result of a phonemize operation. * * Contains IPA token arrays and optional per-token prosody information. * Tokens are returned as IPA strings (no PUA encoding). */ export interface PhonemizeResult { /** IPA phoneme tokens (e.g. ["k", "o", "N_n", "n", "i", "ch", "i", "h", "a"]). */ tokens: string[]; /** * Parallel array of prosody info per token. * ProsodyInfo for actual phonemes, null for prosody markers and pauses. * Only populated for Japanese; other languages return null for all entries. */ prosody: (ProsodyInfo | null)[]; /** Language of the phonemized text. */ language: Language; } /** * Result of encoding IPA tokens to Piper-compatible phoneme IDs. * * Includes BOS/EOS/inter-phoneme padding inserted by the encoder. */ export interface EncodeResult { /** Piper-compatible phoneme ID sequence with BOS/EOS/padding. */ phonemeIds: number[]; /** * Flattened prosody features aligned to phonemeIds. * Each group of 3 values represents [a1, a2, a3] for the corresponding phoneme ID. * null when the source PhonemizeResult has no prosody data. */ prosodyFlat: number[] | null; } // --------------------------------------------------------------------------- // G2P (high-level unified API) // --------------------------------------------------------------------------- /** Options for G2P.create(). */ export interface G2POptions { /** Languages to load. Defaults to all supported languages. */ languages?: Language[]; /** * Pre-loaded OpenJTalk WASM module instance. * When omitted, the module is loaded automatically (requires WASM files). */ openjtalkModule?: any; /** * Pre-loaded Japanese dictionary data. * When provided, skips dictionary download during initialization. */ jaDict?: JaDictData; /** Custom dictionaries for pronunciation overrides. */ customDicts?: CustomDictionary[]; } /** Options for phonemize and encode methods. */ export interface PhonemizeOptions { /** Target language. When omitted, language is auto-detected from text. */ language?: Language; } /** * High-level G2P API that orchestrates language detection, phonemization, * and encoding across all supported languages. * * @example * ```js * const g2p = await G2P.create({ languages: ['ja', 'en'] }); * * const result = g2p.phonemize('Hello, world!'); * // result.tokens => ["h", "ʌ", "l", "oʊ", ",", " ", "w", "ɜː", "l", "d", "!"] * * const jaResult = g2p.phonemize('こんにちは'); * // jaResult.tokens => ["k", "o", "N_n", "n", "i", "ch", "i", "h", "a"] * * g2p.dispose(); * ``` */ export class G2P { /** * Create and initialize a G2P instance. * * Loads the required language modules and, for Japanese, initializes the * OpenJTalk WASM module and downloads the dictionary if needed. * * @param options - Configuration options. * @returns Fully initialized G2P instance. */ static create(options?: G2POptions): Promise; /** * Convert text to IPA phoneme tokens. * * Language is auto-detected from text when not specified in options. * * @param text - Input text to phonemize. * @param options - Optional language specification. * @returns Phonemize result with IPA tokens and language. * @throws {Error} If the instance has been disposed via `dispose()`. * @throws {Error} If the detected/specified language was not initialised in `G2P.create()`. */ phonemize(text: string, options?: PhonemizeOptions): PhonemizeResult; /** * Convert text to IPA phoneme tokens with prosody features. * * For Japanese, returns A1/A2/A3 accent features per token. * For other languages, prosody entries are null. * * @param text - Input text to phonemize. * @param options - Optional language specification. * @returns Phonemize result with IPA tokens, prosody info, and language. * @throws {Error} If the instance has been disposed via `dispose()`. * @throws {Error} If the detected/specified language was not initialised in `G2P.create()`. */ phonemizeWithProsody(text: string, options?: PhonemizeOptions): PhonemizeResult; /** * Convert text to Piper-compatible phoneme IDs. * * Combines phonemization and encoding in a single call: * phonemize -> PUA mapping -> ID lookup -> BOS/EOS/padding insertion. * * @param text - Input text. * @param phonemeIdMap - Phoneme-to-ID mapping from Piper model config. * @param options - Optional language specification. * @returns Encoded phoneme IDs and optional flattened prosody features. * @throws {Error} If the instance has been disposed via `dispose()`. * @throws {Error} If the detected/specified language was not initialised in `G2P.create()`. */ encode(text: string, phonemeIdMap: Record, options?: PhonemizeOptions): EncodeResult; /** * Detect the language of the given text using Unicode script analysis. * * Detection priority: JA (Hiragana/Katakana) > ZH (CJK without Kana) > * language-specific scripts > EN (default fallback). * * @param text - Text to analyze. * @returns Detected language code. * @throws {Error} If the instance has been disposed via `dispose()`. */ detectLanguage(text: string): Language; /** * Segment text into language-homogeneous chunks. * * Splits mixed-language text (e.g. "Hello, こんにちは") into segments * with consistent language labels. * * @param text - Mixed-language text to segment. * @returns Array of segments with language and text. * @throws {Error} If the instance has been disposed via `dispose()`. */ segmentText(text: string): Array<{ language: Language; text: string }>; /** * Release all resources held by this instance. * * Disposes OpenJTalk WASM module and per-language G2P instances. * The instance must not be used after calling dispose(). */ dispose(): void; } // --------------------------------------------------------------------------- // DictLoader // --------------------------------------------------------------------------- /** * Japanese dictionary data loaded by DictLoader. * * Contains the 8 MeCab dictionary files required by OpenJTalk. */ export interface JaDictData { /** * MeCab dictionary files keyed by filename. * Required files: char.bin, matrix.bin, sys.dic, unk.dic, * left-id.def, pos-id.def, rewrite.def, right-id.def. */ dictFiles: Record; } /** Options for DictLoader.loadJaDict(). */ export interface DictLoadOptions { /** * Custom tar.gz URL for the dictionary archive. * Defaults to the official GitHub Releases URL. */ dictUrl?: string; /** * Progress callback invoked during download. * Called with `{ loaded, total }` in bytes. */ onProgress?: (info: { loaded: number; total: number }) => void; } /** * Dictionary loader for Japanese G2P. * * Downloads OpenJTalk MeCab dictionary files from GitHub Releases, verifies * the SHA-256 hash, and caches them in IndexedDB for instant subsequent loads. * * @example * ```js * const loader = new DictLoader(); * const { dictFiles } = await loader.loadJaDict({ * onProgress: ({ loaded, total }) => console.log(`${loaded}/${total}`), * }); * ``` */ export class DictLoader { /** * @param options - Configuration options. * @param options.cachePrefix - IndexedDB database name. Default: 'piper-g2p-dict'. */ constructor(options?: { cachePrefix?: string }); /** * Download (or retrieve from cache) Japanese dictionary files. * * On the first call the full tar.gz is downloaded, its SHA-256 is verified, * and individual dictionary files are cached in IndexedDB. Subsequent calls * return instantly from the cache. * * @param options - Download options. * @returns Dictionary data with MeCab files. */ loadJaDict(options?: DictLoadOptions): Promise; /** * Check whether all dictionary files are already cached in IndexedDB. * * @returns true if all 8 MeCab dictionary files are in the cache. */ isCached(): Promise; /** * Remove all cached dictionary data from IndexedDB. */ clearCache(): Promise; /** * Close the IndexedDB connection and release resources. * * Safe to call multiple times. After calling `destroy()`, subsequent * operations (e.g. `loadJaDict()`) will re-open the database connection * as needed. */ destroy(): void; } // --------------------------------------------------------------------------- // Per-language G2P classes // --------------------------------------------------------------------------- /** * Japanese G2P using OpenJTalk WASM. * * Provides high-quality Japanese phonemization with prosody features, * context-dependent N variants (N_m, N_n, N_ng, N_uvular), and * Kurihara-method prosody markers. * * @example * ```js * import { JapaneseG2P } from '@piper-plus/g2p/ja'; * import { DictLoader } from '@piper-plus/g2p/dict'; * * const loader = new DictLoader(); * const jaDict = await loader.loadJaDict(); * const ja = new JapaneseG2P({ jaDict }); * await ja.initialize(); * * const result = ja.phonemize('こんにちは'); * // result.tokens => ["k", "o", "N_n", "n", "i", "ch", "i", "h", "a"] * ``` */ export class JapaneseG2P { /** * @param options - Configuration options. * @param options.openjtalkModule - Pre-loaded OpenJTalk WASM module. * @param options.jaDict - Pre-loaded dictionary data from DictLoader. */ constructor(options?: { openjtalkModule?: any; jaDict?: JaDictData }); /** * Initialize the OpenJTalk WASM module and load dictionaries. * * Must be called before phonemize(). No-op if already initialized. * * @param options - Reserved for future use. */ initialize(options?: {}): Promise; /** * Convert Japanese text to IPA phoneme tokens. * * @param text - Japanese text to phonemize. * @returns Phonemize result with IPA tokens (no PUA, no BOS/EOS). */ phonemize(text: string): PhonemizeResult; /** * Convert Japanese text to IPA phoneme tokens with A1/A2/A3 prosody features. * * @param text - Japanese text to phonemize. * @returns Phonemize result with IPA tokens and per-token ProsodyInfo. */ phonemizeWithProsody(text: string): PhonemizeResult; /** Whether the OpenJTalk module has been initialized and is ready to use. */ readonly initialized: boolean; /** * Release the OpenJTalk WASM module resources. * The instance must not be used after calling dispose(). */ dispose(): void; } /** * English G2P using rule-based CMU-style conversion. * * Converts English text to IPA phoneme sequences with stress markers. * Function word stress reduction (97 words) is applied automatically. * No external dependencies required. * * @example * ```js * import { EnglishG2P } from '@piper-plus/g2p/en'; * * const en = new EnglishG2P(); * const result = en.phonemize('Hello, world!'); * // result.tokens => ["h", "ʌ", "l", "oʊ", ...] * ``` */ export class EnglishG2P { constructor(options?: {}); /** * Convert English text to IPA phoneme tokens. * * @param text - English text to phonemize. * @returns Phonemize result with IPA tokens and stress markers. */ phonemize(text: string): PhonemizeResult; /** * Convert English text to IPA phoneme tokens with prosody features. * * Returns ProsodyInfo(a1=0, a2=stress_level, a3=word_phoneme_count) * for each phoneme token. * * @param text - English text to phonemize. * @returns Phonemize result with IPA tokens and prosody info. */ phonemizeWithProsody(text: string): PhonemizeResult; } /** * Chinese G2P using pinyin-based phonemization. * * Converts Chinese text to phoneme sequences based on pinyin decomposition. * No external dependencies required. * * @example * ```js * import { ChineseG2P } from '@piper-plus/g2p/zh'; * * const zh = new ChineseG2P(); * const result = zh.phonemize('你好世界'); * ``` */ export class ChineseG2P { /** * @param options - Configuration options. * @param options.phonemeIdMap - Phoneme-to-ID mapping for character-based fallback. */ constructor(options?: { phonemeIdMap?: Record }); /** * Convert Chinese text to phoneme tokens. * * @param text - Chinese text to phonemize. * @returns Phonemize result with phoneme tokens. */ phonemize(text: string): PhonemizeResult; /** * Convert Chinese text to phoneme tokens with prosody features. * * @param text - Chinese text to phonemize. * @returns Phonemize result with phoneme tokens (prosody entries are null). */ phonemizeWithProsody(text: string): PhonemizeResult; } /** * Spanish G2P using rule-based phonemization. * * Converts Spanish text to IPA phoneme sequences. * No external dependencies required. */ export class SpanishG2P { /** * @param options - Configuration options. * @param options.phonemeIdMap - Phoneme-to-ID mapping for character-based fallback. */ constructor(options?: { phonemeIdMap?: Record }); /** * Convert Spanish text to phoneme tokens. * * @param text - Spanish text to phonemize. * @returns Phonemize result with phoneme tokens. */ phonemize(text: string): PhonemizeResult; /** * Convert Spanish text to phoneme tokens with prosody features. * * @param text - Spanish text to phonemize. * @returns Phonemize result with phoneme tokens (prosody entries are null). */ phonemizeWithProsody(text: string): PhonemizeResult; } /** * French G2P using rule-based phonemization. * * Converts French text to IPA phoneme sequences. * No external dependencies required. */ export class FrenchG2P { /** * @param options - Configuration options. * @param options.phonemeIdMap - Phoneme-to-ID mapping for character-based fallback. */ constructor(options?: { phonemeIdMap?: Record }); /** * Convert French text to phoneme tokens. * * @param text - French text to phonemize. * @returns Phonemize result with phoneme tokens. */ phonemize(text: string): PhonemizeResult; /** * Convert French text to phoneme tokens with prosody features. * * @param text - French text to phonemize. * @returns Phonemize result with phoneme tokens (prosody entries are null). */ phonemizeWithProsody(text: string): PhonemizeResult; } /** * Portuguese G2P using rule-based phonemization. * * Converts Portuguese text to IPA phoneme sequences. * No external dependencies required. */ export class PortugueseG2P { /** * @param options - Configuration options. * @param options.phonemeIdMap - Phoneme-to-ID mapping for character-based fallback. */ constructor(options?: { phonemeIdMap?: Record }); /** * Convert Portuguese text to phoneme tokens. * * @param text - Portuguese text to phonemize. * @returns Phonemize result with phoneme tokens. */ phonemize(text: string): PhonemizeResult; /** * Convert Portuguese text to phoneme tokens with prosody features. * * @param text - Portuguese text to phonemize. * @returns Phonemize result with phoneme tokens (prosody entries are null). */ phonemizeWithProsody(text: string): PhonemizeResult; } /** * Swedish G2P using rule-based phonemization. * * Converts Swedish text to IPA phoneme sequences with stress markers. * Features: complementary quantity (9 long/short vowel pairs), soft k/g rules, * retroflex assimilation (rt/rd/rs/rn/rl), loanword suffix rules, and * stress detection with prosody (A1=0, A2=stress, A3=syllable count). * No external dependencies required. * * @example * ```js * import { SwedishG2P } from '@piper-plus/g2p/sv'; * * const sv = new SwedishG2P(); * const result = sv.phonemize('Hej, hur m\u00e5r du?'); * // result.tokens => ["\u02c8", "h", "e\u02d0", "j", ...] * ``` */ export class SwedishG2P { constructor(options?: {}); /** * Convert Swedish text to phoneme tokens. * * @param text - Swedish text to phonemize. * @returns Phonemize result with IPA tokens and stress markers. */ phonemize(text: string): PhonemizeResult; /** * Convert Swedish text to phoneme tokens with prosody features. * * Returns ProsodyInfo(a1=0, a2=stress_level, a3=word_phoneme_count) * for each phoneme token. a2=2 for primary stress markers. * * @param text - Swedish text to phonemize. * @returns Phonemize result with IPA tokens and prosody info. */ phonemizeWithProsody(text: string): PhonemizeResult; } /** * Korean G2P using Hangul decomposition and IPA mapping. * * Decomposes Hangul syllables into jamo, maps to IPA phonemes, and applies * basic liaison rules (연음법칙). Supports tense consonants, aspirated * consonants, and unreleased finals with PUA codepoints. * No external dependencies required. * * @example * ```js * import { KoreanG2P } from '@piper-plus/g2p/ko'; * * const ko = new KoreanG2P(); * const result = ko.phonemize('안녕하세요'); * ``` */ export class KoreanG2P { constructor(options?: {}); /** Language code for this G2P instance. */ readonly languageCode: string; /** * Convert Korean text to phoneme tokens. * * @param text - Korean text to phonemize. * @returns Phonemize result with IPA tokens. */ phonemize(text: string): PhonemizeResult; /** * Convert Korean text to phoneme tokens with prosody features. * * Korean prosody: A1=0, A2=0, A3=0 (fixed). * * @param text - Korean text to phonemize. * @returns Phonemize result with IPA tokens and prosody info. */ phonemizeWithProsody(text: string): PhonemizeResult; } // --------------------------------------------------------------------------- // UnicodeLanguageDetector // --------------------------------------------------------------------------- /** * Language detector using Unicode script-based heuristics. * * Detects language by analyzing Unicode code points in the text. * Detection priority: JA (Hiragana/Katakana) > ZH (CJK without Kana) > * language-specific scripts > EN (default fallback). * * Also supports segmenting mixed-language text into homogeneous chunks * for per-segment phonemization. * * @example * ```js * import { UnicodeLanguageDetector } from '@piper-plus/g2p/detect'; * * const detector = new UnicodeLanguageDetector(['ja', 'en', 'zh']); * detector.detectLanguage('こんにちは'); // => 'ja' * detector.detectLanguage('Hello'); // => 'en' * * detector.segmentText('Hello, こんにちは'); * // => [{ language: 'en', text: 'Hello, ' }, { language: 'ja', text: 'こんにちは' }] * ``` */ export class UnicodeLanguageDetector { /** * @param languages - Supported language codes. Defaults to all supported languages. */ constructor(languages?: Language[]); /** * Detect the primary language of the given text. * * @param text - Text to analyze. * @returns Detected language code. */ detectLanguage(text: string): Language; /** * Segment text into language-homogeneous chunks. * * Adjacent characters of the same detected language are grouped together. * * @param text - Mixed-language text to segment. * @returns Array of segments, each with a language code and text content. */ segmentText(text: string): Array<{ language: Language; text: string }>; } // --------------------------------------------------------------------------- // Encoder // --------------------------------------------------------------------------- /** * Encodes IPA phoneme tokens to Piper-compatible phoneme ID sequences. * * Handles PUA mapping, phoneme-to-ID lookup, and BOS/EOS/inter-phoneme * padding insertion required by Piper TTS models. * * @example * ```js * import { Encoder } from '@piper-plus/g2p/encode'; * * const encoder = new Encoder(config.phoneme_id_map); * const { phonemeIds } = encoder.encode(tokens); * const { phonemeIds, prosodyFlat } = encoder.encodeWithProsody(tokens, prosody); * ``` */ export class Encoder { /** * @param phonemeIdMap - Phoneme-to-ID mapping from Piper model config (config.json). */ constructor(phonemeIdMap: Record); /** * Encode IPA tokens to phoneme IDs. * * Applies PUA mapping for multi-character tokens, looks up IDs from the * phoneme_id_map, and inserts BOS/EOS/inter-phoneme padding. * * @param tokens - IPA phoneme token array from a phonemize() call. * @returns Object containing the phoneme ID sequence. */ encode(tokens: string[]): { phonemeIds: number[] }; /** * Encode IPA tokens to phoneme IDs with aligned prosody features. * * Inserts null/zero prosody entries at padding positions to maintain * alignment between phonemeIds and prosody data. * * @param tokens - IPA phoneme token array. * @param prosody - Parallel prosody info array from phonemizeWithProsody(). * @returns Encoded phoneme IDs and flattened prosody features. */ encodeWithProsody(tokens: string[], prosody: (ProsodyInfo | null)[]): EncodeResult; } // --------------------------------------------------------------------------- // CustomDictionary // --------------------------------------------------------------------------- /** * Custom dictionary for pronunciation overrides. * * Supports JSON v1.0/v2.0 format (compatible with Rust/C++ implementations) * for user-defined pronunciation rules. Entries are applied as text * replacements before phonemization. * * @example * ```js * import { CustomDictionary } from '@piper-plus/g2p/custom-dict'; * * // From inline entries * const dict = new CustomDictionary({ 'GUI': ['g', 'uː', 'iː'] }); * * // From JSON file * const dict = await CustomDictionary.fromJSON('/path/to/dict.json'); * * const { text, replacements } = dict.apply('Open the GUI'); * ``` */ export class CustomDictionary { /** * @param entries - Dictionary entries mapping surface forms to phoneme token arrays. */ constructor(entries?: Record); /** * Load a custom dictionary from a JSON file (v1.0 or v2.0 format). * * Fetches the JSON from the given path or URL and parses entries. * * @param pathOrUrl - Path or URL to the JSON dictionary file. * @returns Loaded CustomDictionary instance. */ static fromJSON(pathOrUrl: string): Promise; /** * Apply dictionary entries to the input text. * * Matches surface forms in the text and records their phoneme replacements. * * @param text - Input text to process. * @returns Object with the (possibly modified) text and a map of replacements applied. * @deprecated Use {@link applyToText} instead for cross-language API consistency. */ apply(text: string): { text: string; replacements: Map }; /** * Apply dictionary entries to the input text. * * Recommended method name, consistent with Python (`apply_to_text()`) * and Rust (`apply_to_text()`). * * @param text - Input text to process. * @returns Object with the (possibly modified) text and a map of replacements applied. */ applyToText(text: string): { text: string; replacements: Map }; } // --------------------------------------------------------------------------- // PUA mapping utilities // --------------------------------------------------------------------------- /** * PUA (Private Use Area) mapping table. * * Maps multi-character IPA tokens to single Unicode PUA codepoints for * compatibility with Piper TTS phoneme_id_map. Must match * token_mapper.py FIXED_PUA_MAPPING exactly. * * @example * ```js * PUA_MAP['a:'] // => '\ue000' * PUA_MAP['ch'] // => '\ue00e' * PUA_MAP['N_uvular'] // => '\ue01c' * ``` */ export const PUA_MAP: Record; /** * Map a multi-character IPA token to its PUA single codepoint. * * Returns the original token unchanged if no PUA mapping exists. * * @param token - IPA token (e.g. "ch", "N_m", "a:"). * @returns PUA codepoint or original token. */ export function mapToken(token: string): string; /** * Reverse-map a PUA codepoint back to its multi-character IPA token. * * Returns the original character unchanged if it is not a PUA codepoint. * * @param puaChar - PUA codepoint character. * @returns Original multi-character IPA token or the input character. */ export function unmapToken(puaChar: string): string; // --------------------------------------------------------------------------- // phoneme-extract utilities (Japanese) // --------------------------------------------------------------------------- /** * Extract phonemes and prosody info from OpenJTalk full-context labels. * * Replicates the Python phonemize_japanese() logic: * - Kurihara-method prosody markers: `[`, `]`, `#` * - `pau` to `_` conversion * - Context-dependent N variants (N_m, N_n, N_ng, N_uvular) * - PUA mapping for multi-character tokens * - A1/A2/A3 prosody feature extraction * * @param labels - Full-context labels from OpenJTalk (newline-separated). * @returns Object with PUA-mapped token array and parallel prosody array. */ export function extractPhonemesFromLabels(labels: string): { tokens: string[]; prosody: (ProsodyInfo | null)[]; }; /** * Apply context-dependent N phoneme rules. * * Replaces generic "N" tokens with context-specific variants based on * the following phoneme: * - N_m: before bilabials (m, my, b, by, p, py) * - N_n: before alveolars (n, ny, t, ty, d, dy, ts, ch) * - N_ng: before velars (k, ky, kw, g, gy, gw) * - N_uvular: word-final or before vowels * * Matches _apply_n_phoneme_rules() in japanese.py. * * @param tokens - Array of phoneme tokens potentially containing "N". * @returns New array with "N" replaced by contextual variants. */ export function applyNPhonemeRules(tokens: string[]): string[]; /** * Map multi-character IPA tokens to PUA single codepoints. * * Applies the PUA_MAP to each token in the array. Single-character tokens * and tokens without a PUA mapping are passed through unchanged. * * Matches map_sequence() in token_mapper.py. * * @param tokens - Array of phoneme tokens. * @returns New array with multi-character tokens replaced by PUA codepoints. */ export function mapToPUA(tokens: string[]): string[];