/** * Team composer for auto-routing. * * Maps a TaskAssessment to a concrete TeamSpec using a routing table derived * from lifecycle eval data (s01–s04). The routing table is a pure data * structure — update it as eval pass rates change. * * Key invariants (empirically verified by eval suite): * - Haiku is worker-only. It is never selected as lead. * - Sonnet + one-liner onboarding = 100% lifecycle reliability → default lead. * - Opus lead only for high complexity (capable but verbose/expensive). * - Worker model matches subtask complexity, not overall task complexity. * - For codex workers, prefer gpt-5.5 or gpt-5.4-mini. See CODEX_MODEL_TIERS. */ import type { TaskAssessment } from './classifier.js'; export type ModelTier = 'haiku' | 'sonnet' | 'opus'; export type OnboardingVariant = 'bare' | 'one-liner' | 'brief' | 'skill'; /** * Which CLI harness to use for an agent. * Extend as opencode model evals complete and confirm role fitness. */ export type WorkerCli = 'claude' | 'codex' | 'opencode' | 'gemini' | 'droid'; /** * Roles from the choosing-swarm-patterns skill. * Each role slot in a pattern is filled by a harness+model with confirmed fitness. */ export type AgentRole = 'lead' | 'coordinator' | 'worker' | 'planner' | 'reviewer' | 'critic' | 'verifier' | 'judge' | 'mapper' | 'reducer' | 'supervisor' | 'debater'; /** * Role fitness for a harness+model combination. * Derived from lifecycle eval pass rates (s01–s06). * * Fitness levels: * 'confirmed' — s03+s04 ≥5 full-repeat runs pass reliably * 'provisional' — passes but with caveats (phantom rate, onboarding dependency) * 'not-viable' — fails lifecycle or not relay-native * 'untested' — eval not yet run */ export type RoleFitness = 'confirmed' | 'provisional' | 'not-viable' | 'untested'; export interface RoleFitEntry { fitness: RoleFitness; notes?: string; } export interface HarnessRoleMap { harness: string; defaultModel?: string; roles: Partial>; bestOnboarding: OnboardingVariant; relayNative: boolean; } export interface WorkerSpec { role: AgentRole | string; model: ModelTier; task: string; /** Override CLI harness. Defaults to 'claude'. */ cli?: WorkerCli; /** For codex workers: the OpenAI model name to pass via --model. */ codexModel?: string; /** For opencode workers: the opencode model suffix. */ opencodeModel?: string; } export interface TeamSpec { lead: { model: ModelTier; onboarding: OnboardingVariant; }; workers: WorkerSpec[]; } export declare const CODEX_MODEL_TIERS: { /** * gpt-5.5 — best. 16/16 scenarios PASS, 100% s03 one-liner+, 100% s04 all. * Recommended default for codex workers. */ readonly recommended: "gpt-5.5"; /** * gpt-5.4-mini — viable budget tier. 15/16 scenarios PASS (only s03:skill fails). * 100% s03 bare/one-liner, 80% brief/skill. 80–100% s04. Use bare or one-liner * onboarding for best results. phantom=31% (slightly noisy). */ readonly budget: "gpt-5.4-mini"; /** * gpt-5.4 — avoid. 16/16 scenarios PASS on majority-vote but phantom=52% (14 * phantom agents) and per-run reliability is 60% across s03/s04 variants. * The config migration alias `gpt-5.4 → gpt-5.5` does NOT apply at runtime — * these are distinct models with significantly different behaviour. */ readonly avoid: "gpt-5.4"; /** * gpt-5.3-codex-spark — not viable. 6/16 scenarios PASS. Fails s03 one-liner/ * brief/skill, s04 one-liner/brief. Ultra-fast but sacrifices relay reliability. */ readonly notViable: "gpt-5.3-codex-spark"; }; export declare const HARNESS_ONBOARDING: Record; export declare const HARNESS_ROLE_MAP: HarnessRoleMap[]; /** * Look up which harnesses can fill a given role at 'confirmed' or better fitness. * Use to populate role slots when composing teams for specific swarm patterns. */ export declare function harnessesForRole(role: AgentRole): HarnessRoleMap[]; /** * Map a TaskAssessment to a TeamSpec using the routing table. * Pure function — no I/O, no LLM call. */ export declare function composeTeam(assessment: TaskAssessment, originalTask: string): TeamSpec; //# sourceMappingURL=composer.d.ts.map