/**
 * Performance Regression Detection
 *
 * Tracks response times across tool executions and detects performance regressions.
 * Provides percentile-based metrics (p50, p95, p99) for comprehensive latency analysis.
 */
import type { BehavioralBaseline, ChangeSeverity, PerformanceConfidence } from './types.js';
/**
 * Latency trend direction.
 */
export type LatencyTrend = 'improving' | 'stable' | 'degrading';
/**
 * Performance metrics for a single tool.
 */
export interface ToolPerformanceMetrics {
    /** Tool name */
    toolName: string;
    /** 50th percentile latency in milliseconds */
    p50Ms: number;
    /** 95th percentile latency in milliseconds */
    p95Ms: number;
    /** 99th percentile latency in milliseconds */
    p99Ms: number;
    /** Success rate (0-1) */
    successRate: number;
    /** Total number of executions */
    sampleCount: number;
    /** Average latency in milliseconds */
    avgMs: number;
    /** Minimum latency in milliseconds */
    minMs: number;
    /** Maximum latency in milliseconds */
    maxMs: number;
    /** Standard deviation of latency */
    stdDevMs: number;
    /** Timestamp of when metrics were collected */
    collectedAt: Date;
    /** Statistical confidence metrics */
    confidence?: PerformanceConfidence;
}
/**
 * Performance baseline for a tool (stored in baseline file).
 */
export interface PerformanceBaseline {
    /** Tool name */
    toolName: string;
    /** Baseline 50th percentile latency */
    baselineP50: number;
    /** Baseline 95th percentile latency */
    baselineP95: number;
    /** Baseline 99th percentile latency */
    baselineP99: number;
    /** Baseline success rate */
    baselineSuccessRate: number;
    /** Maximum allowed regression percentage (default from config) */
    maxAllowedRegression: number;
    /** When the baseline was established */
    establishedAt: Date;
}
/**
 * Performance comparison result for a single tool.
 */
export interface PerformanceComparison {
    /** Tool name */
    toolName: string;
    /** Current metrics */
    current: ToolPerformanceMetrics;
    /** Baseline metrics (if available) */
    baseline?: PerformanceBaseline;
    /** Latency trend */
    trend: LatencyTrend;
    /** Regression percentage for p50 (positive = slower, negative = faster) */
    p50RegressionPercent: number | null;
    /** Regression percentage for p95 */
    p95RegressionPercent: number | null;
    /** Regression percentage for p99 */
    p99RegressionPercent: number | null;
    /** Whether this tool has regressed beyond threshold */
    hasRegression: boolean;
    /** Severity of the regression */
    severity: ChangeSeverity;
    /** Human-readable summary */
    summary: string;
    /** Current confidence level */
    confidence?: PerformanceConfidence;
    /** Whether the regression is statistically reliable (based on confidence) */
    isReliable: boolean;
}
/**
 * Overall performance report for a baseline comparison.
 */
export interface PerformanceReport {
    /** Individual tool comparisons */
    toolComparisons: PerformanceComparison[];
    /** Number of tools with performance regressions */
    regressionCount: number;
    /** Number of tools with improved performance */
    improvementCount: number;
    /** Number of tools with stable performance */
    stableCount: number;
    /** Overall performance trend */
    overallTrend: LatencyTrend;
    /** Overall severity */
    overallSeverity: ChangeSeverity;
    /** Human-readable summary */
    summary: string;
    /** Number of tools with low confidence */
    lowConfidenceCount: number;
    /** Tools with low confidence (names) */
    lowConfidenceTools: string[];
    /** Number of reliable regressions (regressions with good confidence) */
    reliableRegressionCount: number;
}
/**
 * Raw latency sample for calculating metrics.
 */
export interface LatencySample {
    toolName: string;
    durationMs: number;
    success: boolean;
    timestamp: Date;
    /**
     * Expected outcome of this test.
     * - 'success': Happy path test, expects tool to succeed
     * - 'error': Validation test, expects tool to reject/fail
     * - 'either': Edge case, either outcome is acceptable
     */
    expectedOutcome?: 'success' | 'error' | 'either';
    /**
     * Whether the outcome was correct based on expectations.
     * True if: (expected success && got success) OR (expected error && got error)
     */
    outcomeCorrect?: boolean;
}
/**
 * Calculate statistical confidence for performance metrics.
 *
 * Confidence is determined by:
 * 1. Sample count - more samples = higher confidence
 * 2. Coefficient of variation (CV) - lower variability = higher confidence
 *
 * Key insight: For confidence calculation, we only count happy_path tests that
 * expect success. Validation tests (expectedOutcome: 'error') are tracked
 * separately because their failure doesn't indicate tool problems.
 *
 * Note: The first sample is excluded from variance calculation because it includes
 * cold-start overhead (JIT compilation, connection establishment, cache warming).
 * This gives more accurate confidence scores for steady-state performance.
 *
 * @param samples - The latency samples to analyze
 * @param options - Optional configuration
 * @returns Performance confidence metrics
 */
export declare function calculatePerformanceConfidence(samples: LatencySample[], options?: {
    excludeWarmup?: boolean;
}): PerformanceConfidence;
/**
 * Calculate performance confidence from ToolPerformanceMetrics.
 * Use this when you already have calculated metrics but need confidence.
 *
 * Note: This function assumes the metrics are from happy path tests only.
 * For full validation/success separation, use calculatePerformanceConfidence with raw samples.
 */
export declare function calculateConfidenceFromMetrics(metrics: ToolPerformanceMetrics, options?: {
    validationSamples?: number;
    totalTests?: number;
}): PerformanceConfidence;
/**
 * Format confidence level for display.
 */
export declare function formatConfidenceLevel(confidence: PerformanceConfidence, includeIndicator?: boolean): string;
/**
 * Check if performance data has sufficient confidence for reliable comparisons.
 */
export declare function hasReliableConfidence(confidence: PerformanceConfidence): boolean;
/**
 * Calculate performance metrics from raw latency samples.
 */
export declare function calculateMetrics(samples: LatencySample[]): ToolPerformanceMetrics | null;
/**
 * Create a performance baseline from metrics.
 */
export declare function createPerformanceBaseline(metrics: ToolPerformanceMetrics, maxAllowedRegression?: number): PerformanceBaseline;
/**
 * Extract performance baselines from a behavioral baseline.
 * Uses the performance metrics stored in tool fingerprints.
 */
export declare function extractPerformanceBaselines(baseline: BehavioralBaseline, regressionThreshold?: number): Map<string, PerformanceBaseline>;
/**
 * Compare current metrics against baseline.
 */
export declare function comparePerformance(current: ToolPerformanceMetrics, baseline: PerformanceBaseline | undefined, regressionThreshold?: number): PerformanceComparison;
/**
 * Generate a complete performance report comparing current and baseline.
 */
export declare function generatePerformanceReport(currentMetrics: Map<string, ToolPerformanceMetrics>, baselines: Map<string, PerformanceBaseline>, regressionThreshold?: number): PerformanceReport;
/**
 * Format performance metrics for display.
 */
export declare function formatMetrics(metrics: ToolPerformanceMetrics): string;
/**
 * Format performance comparison for display.
 */
export declare function formatComparison(comparison: PerformanceComparison): string;
/**
 * Check if metrics indicate acceptable performance.
 */
export declare function isPerformanceAcceptable(comparison: PerformanceComparison, failOnRegression?: boolean): boolean;
/**
 * Aggregate multiple samples into metrics grouped by tool.
 */
export declare function aggregateSamplesByTool(samples: LatencySample[]): Map<string, ToolPerformanceMetrics>;
//# sourceMappingURL=performance-tracker.d.ts.map