/** * Judge Prompt — heuristic detectors + system prompt for hidden-session judge. * * Pattern sourced from dzianisv/opencode-plugins (reflection-3.ts). * Heuristic detectors identify common premature-stop patterns * before involving the LLM. The full judge runs in a hidden session * to avoid biasing the main conversation. * * Judge rubric mined from 227 real agent stops (78% premature). * Keep this minimal — not the 2000-line reflection-3.ts. */ export interface TaskContext { toolCalls: number; writeRatio: number; lastMessage: string; consecutiveIdenticalCommands?: number; } export type HeuristicVerdict = 'premature' | 'suspicious' | 'normal' | null; /** * PLANNING_LOOP detector: agent makes many tool calls but writes very little. * Pattern: high toolCalls + low writeRatio → agent is stuck planning/investigating * without making progress. */ export declare function detectPlanningLoop(ctx: TaskContext, writeRatioThreshold?: number): HeuristicVerdict; /** * ACTION_LOOP detector: agent runs the same command repeatedly. * Pattern: 3+ consecutive identical commands → agent is in a loop. */ export declare function detectActionLoop(ctx: TaskContext): HeuristicVerdict; /** * PERMISSION-SEEKING detector: final turn asks yes/no about something the * agent can do itself → premature stop. * Pattern: message contains permission-asking phrases. */ export declare function detectPermissionSeeking(lastMessage: string): HeuristicVerdict; /** * STOPPED-WITH-TODOS detector: response lists remaining tasks and stops. * Pattern: mentions "remaining tasks", "next steps", "todo" while work remains. */ export declare function detectStoppedWithTodos(lastMessage: string): HeuristicVerdict; /** * FALSE-COMPLETE detector: claims done but no verification evidence. * Pattern: says "done", "complete", "finished" but no test/verification commands. */ export declare function detectFalseComplete(lastMessage: string): HeuristicVerdict; /** * Run all heuristic detectors and return the most severe verdict. */ export declare function runHeuristicDetectors(ctx: TaskContext): { verdict: HeuristicVerdict; reason: string | null; }; /** * Build the system prompt for the hidden-session judge LLM call. * Short and focused — not the 2000-line reflection-3.ts. */ export declare function buildJudgeSystemPrompt(): string; /** * Parse judge LLM response into structured verdict. */ export declare function parseJudgeResponse(response: string): { verdict: 'complete' | 'incomplete'; reason: string; } | null;