{
  "name": "agent-evaluation",
  "version": "1.0.0",
  "description": "\"Testing and benchmarking LLM agents including behavioral testing, capability assessment, reliability metrics, and production monitoring—where even top agents achieve less than 50% on real-world benchmarks Use when: agent testing, agent evaluation, benchmark agents, agent reliability, test agent.\"",
  "author": "local",
  "license": "MIT",
  "tools": [],
  "trigger_phrases": [],
  "compatible_agents": [
    "aiden"
  ],
  "min_agent_version": "3.0.0",
  "tags": [
    "code",
    "files"
  ],
  "created": "2026-04-27T17:11:42.834Z"
}