/** * Spec Verification Engine * * Tests whether generated specs accurately describe the codebase by using * the specs to predict code behavior and comparing against actual files. */ import type { LLMService } from '../services/llm-service.js'; import type { DependencyGraphResult } from '../analyzer/dependency-graph.js'; /** * Verification candidate file */ export interface VerificationCandidate { path: string; absolutePath: string; domain: string; usedInGeneration: boolean; complexity: number; lines: number; imports: number; exports: number; } /** * LLM prediction for a file */ export interface FilePrediction { predictedPurpose: string; predictedImports: string[]; predictedExports: string[]; predictedLogic: string[]; relatedRequirements: string[]; confidence: number; /** LLM-as-judge score: how accurately does the spec describe this file (0.0–1.0) */ specAccuracyScore?: number; /** LLM-as-judge score: fraction of this file's behavior covered by spec requirements (0.0–1.0) */ requirementCoverageScore?: number; reasoning: string; } /** * Match result for purpose */ export interface PurposeMatch { predicted: string; actual: string; similarity: number; } /** * Match result for imports/exports */ export interface SetMatch { predicted: string[]; actual: string[]; precision: number; recall: number; f1Score: number; } /** * Requirement coverage analysis */ export interface RequirementCoverage { relatedRequirements: string[]; actuallyImplements: string[]; coverage: number; } /** * Result for a single file verification */ export interface VerificationResult { filePath: string; domain: string; purposeMatch: PurposeMatch; importMatch: SetMatch; exportMatch: SetMatch; requirementCoverage: RequirementCoverage; overallScore: number; llmConfidence: number; feedback: string[]; } /** * Domain breakdown in report */ export interface DomainBreakdown { domain: string; specPath: string; filesVerified: number; averageScore: number; weakestArea: string; } /** * Suggested improvement */ export interface SuggestedImprovement { domain: string; issue: string; suggestion: string; } /** * Complete verification report */ export interface VerificationReport { timestamp: string; specVersion: string; sampledFiles: number; passedFiles: number; overallConfidence: number; domainBreakdown: DomainBreakdown[]; commonGaps: string[]; recommendation: 'ready' | 'needs-review' | 'regenerate'; suggestedImprovements: SuggestedImprovement[]; results: VerificationResult[]; } /** * Engine options */ export interface VerificationEngineOptions { /** Root directory of the project */ rootPath: string; /** Path to openspec directory */ openspecPath: string; /** Output directory for reports */ outputDir: string; /** Minimum complexity (lines) for candidate files */ minComplexity?: number; /** Maximum complexity (lines) for candidate files */ maxComplexity?: number; /** Number of files to sample per domain */ filesPerDomain?: number; /** Passing threshold for overall score */ passThreshold?: number; /** Files used in generation (to exclude) */ generationContext?: string[]; } /** * Spec Verification Engine */ export declare class SpecVerificationEngine { private llm; private options; private specs; private fileDomainMap; private parser; constructor(llm: LLMService, options: VerificationEngineOptions); /** * Run full verification */ verify(depGraph: DependencyGraphResult, specVersion: string): Promise; /** * Load all specs from openspec directory */ private loadSpecs; /** * Load file→domain mapping from .openlore/analysis/mapping.json. * Falls back silently if the file doesn't exist (e.g. before first analysis run). */ private loadFileDomainMap; /** * Select verification candidate files */ selectCandidates(depGraph: DependencyGraphResult): VerificationCandidate[]; /** * Resolve the spec domain for a file. * * Priority: * 1. mapping.json lookup — deterministic, built from the analysis run. * 2. Path heuristic — walk segments, match against known spec domain names * (exact, then prefix ≥4 chars to handle utils→utilities etc.). * 3. Fallback — first meaningful non-structural segment. */ private inferDomain; /** * Verify a single file */ verifyFile(candidate: VerificationCandidate): Promise; /** * Build specs context string capped at maxChars to avoid silent LLM token overflow. * Specs are included in order; the last spec may be truncated if the budget is tight. */ private buildSpecsContext; /** * Get prediction from LLM. * * When fileContent is provided the prompt uses an LLM-as-judge approach: * the model sees both the spec and the actual file content, and returns a * specAccuracyScore (0–1) measuring how well the spec describes the file. * This replaces the brittle Jaccard keyword-overlap used for purposeMatch. */ private getPrediction; /** * Compare predicted purpose to actual file content. * * When specAccuracyScore is provided (LLM-as-judge), it is used directly as * the similarity score — this is far more reliable than keyword overlap because * the LLM has seen the actual file and can assess whether the spec describes it. * Falls back to Jaccard keyword overlap when no LLM score is available. */ private comparePurpose; /** * Extract purpose from file content (comments, docstrings) */ private extractPurpose; /** * Calculate text similarity using keyword overlap */ private calculateSimilarity; /** * Normalize a word for similarity comparison by truncating to its first 5 * characters. This is more robust than suffix-stripping for technical * English: "generate/generates/generating/generation" all share the prefix * "gener", "verify/verification/verifies" share "verif", etc. * Tested against 26 word pairs: 18/26 correct matches, 0 false positives. */ private normalize; /** * Extract keywords from text */ private extractKeywords; /** * Analyze import coverage using spec content rather than LLM predictions. * For each actual import (normalized to module name), checks whether it is * mentioned in the domain's spec text (exact name or hyphen→space variant). * This is a spec-completeness check: are the modules the file depends on * actually described in the spec? * * Returns a SetMatch where: * - actual = all normalized actual import module names * - predicted = subset of actual imports that appear in the spec text * - f1Score = recall = fraction of actual imports covered by spec */ private analyzeImportCoverage; /** * Normalize import path for comparison. * Strips file extensions and the first leading `./` or `../` prefix, * then extracts the final path segment (module name) in lowercase. * Deeply-nested relative paths (e.g. `../../foo`) are handled correctly * because only the last segment is used for comparison. */ private normalizeImport; /** * Compare predicted exports to actual */ private compareExports; /** * Calculate precision, recall, F1 for set comparison */ private calculateSetMatch; /** * Parse requirements from a spec's markdown content. * Returns an array of { name, description } extracted from * "### Requirement: Name\n\nThe system SHALL ..." blocks. */ private parseSpecRequirements; /** * Analyze requirement coverage. * * When llmScore is provided (LLM-as-judge), it is used directly — the LLM * has seen both the spec and the file and scores only the requirements * relevant to this specific file, avoiding the false penalty of a domain * spec covering many files where each file implements only a small subset. * * Falls back to keyword matching when no LLM score is available. */ private analyzeRequirementCoverage; /** * Calculate overall score (weighted combination) */ private calculateOverallScore; /** * Generate feedback for gaps */ private generateFeedback; /** * Generate verification report */ private generateReport; /** * Save verification report */ private saveReport; /** * Generate markdown report */ private generateMarkdownReport; /** * Get list of domains from loaded specs. * If specs have not been loaded yet (i.e., verify() has not been called), * triggers an eager load so callers can preview domains without a full LLM run. */ getDomains(): Promise; } /** * Run verification on a project */ export declare function verifySpecs(llm: LLMService, depGraph: DependencyGraphResult, options: VerificationEngineOptions, specVersion: string): Promise; //# sourceMappingURL=verification-engine.d.ts.map