import {describe, it, expect} from 'vitest'; import {FormantVisemeMapper, type AudioFeatures} from './FormantVisemeMapper'; function silence(): AudioFeatures { return { rms: 0, centroid: 0, low: 0, mid: 0, high: 0, f1Hz: 0, f2Hz: 0, voiced: false, }; } function vowel(f1Hz: number, f2Hz: number): AudioFeatures { return { rms: 0.15, centroid: 1500, low: 0.6, mid: 0.3, high: 0.05, f1Hz, f2Hz, voiced: true, }; } // Settle the mapper into steady state at the given features by running // many frames at the same dt. Returns the converged viseme weights. function settle( mapper: FormantVisemeMapper, features: AudioFeatures, dt = 0.016, frames = 200 ) { let out = mapper.update(features, dt); for (let i = 0; i < frames - 1; i++) out = mapper.update(features, dt); return out; } describe('FormantVisemeMapper', () => { it('silent input → all zero visemes', () => { const m = new FormantVisemeMapper(); const out = settle(m, silence()); expect(out.jawOpen).toBeLessThan(0.05); expect(out.aa).toBe(0); expect(out.oo).toBe(0); expect(out.ee).toBe(0); }); it('low F1, low F2 (oo-like) → oo dominates aa and ee', () => { const out = settle(new FormantVisemeMapper(), vowel(350, 900)); expect(out.oo).toBeGreaterThan(out.aa); expect(out.oo).toBeGreaterThan(out.ee); }); it('low F1, high F2 (ee-like) → ee dominates aa and oo', () => { const out = settle(new FormantVisemeMapper(), vowel(350, 2400)); expect(out.ee).toBeGreaterThan(out.aa); expect(out.ee).toBeGreaterThan(out.oo); }); it('high F1 (aa-like) → aa dominates oo and ee', () => { const out = settle(new FormantVisemeMapper(), vowel(800, 1300)); expect(out.aa).toBeGreaterThan(out.oo); expect(out.aa).toBeGreaterThan(out.ee); }); it('a single frame of full input does not fully transfer (smoothing on)', () => { const m = new FormantVisemeMapper(); const out = m.update(vowel(800, 1300), 0.016); // After one ~16ms frame, jawOpen should still be well below 1. expect(out.jawOpen).toBeLessThan(0.8); }); it('frame-rate-independent: 60 Hz and 120 Hz converge to same value at same wall-clock time', () => { const features = vowel(800, 1300); // Step at 16.67ms for 30 frames = ~500ms wall clock. const m60 = new FormantVisemeMapper(); let v60 = m60.update(features, 0.01667); for (let i = 0; i < 29; i++) v60 = m60.update(features, 0.01667); // Step at 8.33ms for 60 frames = ~500ms wall clock. const m120 = new FormantVisemeMapper(); let v120 = m120.update(features, 0.00833); for (let i = 0; i < 59; i++) v120 = m120.update(features, 0.00833); // Both should converge to nearly the same jawOpen because the time // constant is expressed in seconds, not in frames. expect(Math.abs(v60.jawOpen - v120.jawOpen)).toBeLessThan(0.05); expect(Math.abs(v60.aa - v120.aa)).toBeLessThan(0.05); }); it('clears cached F1/F2 after sustained silence so the next vowel starts fresh', () => { const m = new FormantVisemeMapper(); // Settle on /oo/ (low F1, low F2). settle(m, vowel(350, 900)); const ooLocked = m.update(vowel(350, 900), 0.016); expect(ooLocked.oo).toBeGreaterThan(ooLocked.aa); expect(ooLocked.oo).toBeGreaterThan(ooLocked.ee); // 500 ms of contiguous silence: well past the 250 ms cache reset. for (let i = 0; i < 30; i++) m.update(silence(), 0.016); // Now resume with /ee/ formants. If smoothF1/F2 had been retained // at the /oo/ values, the first few frames would smooth /ee/'s F2 // ~2400 toward 900 and the mouth would briefly look like /oo/. // With the cache cleared, smoothF1/F2 reinit from the new frame's // raw values and /ee/ leads from frame one. const firstEe = m.update(vowel(300, 2400), 0.016); expect(firstEe.ee).toBeGreaterThan(firstEe.oo); expect(firstEe.ee).toBeGreaterThan(firstEe.aa); }); it('reset() returns to zero state', () => { const m = new FormantVisemeMapper(); settle(m, vowel(800, 1300)); m.reset(); const out = m.update(silence(), 0.016); expect(out.jawOpen).toBeLessThan(0.05); expect(out.aa).toBe(0); }); });