/** * Mock LLM provider for Iranti testing. * * Deterministic, scenario-driven provider that simulates realistic LLM * behavior for Staff component tests. Prompt classification (S3 architecture) * routes each Staff prompt to a specific canned response branch without regex * scanning at dispatch time, making test assertions on branch coverage possible. * * Features: * - Scenarios: default | disagreement | unreliable | collaborative | noisy * - Failure modes: malformed_json | wrong_shape | truncated | empty | throw * - Scheduled failures (failBeforeSuccessCount) for retry-path testing * - Probabilistic failures (failureModeRate) for fuzz-style testing * - Seeded randomness for deterministic test runs * - Response delay simulation (fixed or range-based) * - strictFallthrough mode to catch unhandled Staff prompt shapes * - Kind-count tracking so tests can assert which branches were exercised * * Key exports: * - configureMock() — reconfigure the singleton for a new test * - classifyPromptKind() — pure classifier for unit testing (no state) * - getMockKindCounts() — read per-kind call counts * - reset*Tracking() — clear counters between test cases * - default export — singleton MockProvider instance */ import { CompleteOptions, LLMMessage, LLMProvider, LLMResponse } from '../llm'; export type MockScenario = 'default' | 'disagreement' | 'unreliable' | 'collaborative' | 'noisy'; export interface MockFallthroughEvent { prompt: string; promptSnippet: string; callCount: number; at: number; } /** * Post-dispatch failure modes used to simulate a misbehaving LLM. These fire * AFTER a prompt branch has computed its normal answer, then corrupt it on the * way out. Staff code paths (Librarian, Attendant, Archivist) should degrade * gracefully when they see these shapes. * * - 'malformed_json': Returns truncated/invalid JSON where valid JSON is expected. * Exercises JSON.parse try/catch fallback paths in Staff prompt consumers. * - 'wrong_shape': Returns syntactically valid JSON with the wrong schema. * Exercises schema validation / type-guard fallbacks. * - 'truncated': Returns the first half of the normal response. Exercises * incomplete-output handling for both JSON and free-text responses. * - 'empty': Returns an empty string. Exercises the "LLM gave us nothing" * fallback paths which are often missing entirely. * - 'throw': Throws a provider error. Stronger than 'failureRate' because it * can be scheduled (see failBeforeSuccessCount) instead of random. */ export type MockFailureMode = 'malformed_json' | 'wrong_shape' | 'truncated' | 'empty' | 'throw'; export interface MockFailureEvent { mode: MockFailureMode; callCount: number; at: number; /** Whether this failure was scheduled (failBeforeSuccessCount) or sampled (failureModeRate). */ scheduled: boolean; } export interface MockConfig { scenario: MockScenario; agentId?: string; failureRate?: number; confidenceRange?: [number, number]; seed?: number; responseDelayMs?: number; /** * When set, each call sleeps a random duration in this [min, max] ms range * BEFORE dispatch. Takes precedence over responseDelayMs. Useful for * simulating a flaky provider where latency varies wildly call to call. */ responseDelayRangeMs?: [number, number]; /** * When true, the mock throws instead of returning the canned researcher-profile * JSON when no prompt-branch matches. Use this in tests to catch silently * unhandled Staff prompt shapes. Default: false (backward compatible). */ strictFallthrough?: boolean; /** * Optional callback invoked every time the mock falls through to its canned * response. Fires regardless of strictFallthrough. Useful for asserting * expected vs. unexpected fallthroughs in tests. */ onFallthrough?: (event: MockFallthroughEvent) => void; /** * Primary failure mode applied to the normal response on the way out. * If omitted, no failure mode is applied and the mock behaves exactly as * S1 specified. Combined with failureModeRate (probabilistic) or * failBeforeSuccessCount (scheduled) to decide when it fires. */ failureMode?: MockFailureMode; /** * Probability (0..1) of firing failureMode on any given call. Ignored * when failBeforeSuccessCount > 0 — scheduled failures win over random ones * until the schedule is exhausted. */ failureModeRate?: number; /** * Force the first N calls to fail with failureMode, then succeed from * call N+1 onward. Models the "timeout-then-success" pattern needed to * exercise retry-with-backoff paths in Staff consumers. */ failBeforeSuccessCount?: number; /** * Optional callback invoked every time a failure mode fires. Fires for * both scheduled and sampled failures. Useful for asserting failure * counts and shapes in tests. */ onFailureMode?: (event: MockFailureEvent) => void; } /** * S3: deterministic prompt kinds the mock knows how to answer. Every Staff * prompt gets classified into exactly one of these kinds before any response * is computed. The flat substring-based if/else chain that used to drive * complete() is now a single switch on the classifier output. Benefits: * * - The classifier is a pure function: easy to unit-test in isolation, no * provider state required. Tests can assert which kind a given Staff * prompt classifies to WITHOUT running the mock. * - Kind counts are tracked so tests can verify that a scenario exercised * every expected branch instead of guessing from fallthrough counts alone. * - Adding a new Staff prompt means adding one kind + one case, not hunting * for a spot in a regex chain. * * 'unknown' is the deliberate fallthrough — a prompt the classifier did not * recognize. The existing fallthrough machinery (strictFallthrough, * onFallthrough, canned researcher JSON) still handles this case so behavior * is exactly preserved across the refactor. */ export type PromptKind = 'memory_need' | 'entity_extraction' | 'task_inference' | 'relevance_filter' | 'conflict_resolution' | 'fact_extraction' | 'compression' | 'unknown'; /** * S3: classify a Staff prompt into its deterministic kind. Pure function — * no side effects, no provider state, no randomness. The matching rules are * intentionally the same substrings the flat dispatch used, so this refactor * preserves behavior bit-for-bit; only the structure changes. */ export declare function classifyPromptKind(message: string): PromptKind; declare class MockProvider implements LLMProvider { private config; private rand; private callCount; private fallthroughCount; private lastFallthrough; private failureModeCount; private lastFailureMode; private scheduledFailuresRemaining; private unreliableRotationIndex; private kindCounts; constructor(config?: MockConfig); configure(config: Partial): void; private resetKindCounts; complete(messages: LLMMessage[], options?: CompleteOptions): Promise; private respond; private resolveActiveFailureMode; private nextUnreliableMode; private recordFailureMode; private applyFailureMode; getCallCount(): number; resetCallCount(): void; getFallthroughCount(): number; getLastFallthrough(): MockFallthroughEvent | null; resetFallthroughTracking(): void; getFailureModeCount(): number; getLastFailureMode(): MockFailureEvent | null; getScheduledFailuresRemaining(): number; resetFailureModeTracking(): void; getKindCounts(): Record; resetKindCountsExternal(): void; } declare const mockProvider: MockProvider; export declare function configureMock(config: Partial): void; export declare function getMockFallthroughCount(): number; export declare function getLastMockFallthrough(): MockFallthroughEvent | null; export declare function resetMockFallthroughTracking(): void; export declare function getMockFailureModeCount(): number; export declare function getLastMockFailureMode(): MockFailureEvent | null; export declare function getMockScheduledFailuresRemaining(): number; export declare function resetMockFailureModeTracking(): void; export declare function getMockKindCounts(): Record; export declare function resetMockKindCounts(): void; export default mockProvider; //# sourceMappingURL=mock.d.ts.map