# Error Handling Playbook

## Error Handling Checklist

- [ ] All errors classified by category and severity ([Error Classification System](#error-classification-system))
- [ ] Custom error classes extend base ApplicationError ([Error Categories](#error-categories))
- [ ] Error context automatically collected ([Error Context Collection](#error-context-collection))
- [ ] Sensitive data sanitized from error logs ([Error Context Collection](#error-context-collection))
- [ ] Retry logic implemented with exponential backoff ([Retry Mechanisms](#retry-mechanisms))
- [ ] Circuit breakers protect external dependencies ([Retry Mechanisms](#retry-mechanisms))
- [ ] Graceful degradation for non-critical features ([Graceful Degradation](#graceful-degradation))
- [ ] User-friendly error messages for all error types ([Error Message Design](#error-message-design))
- [ ] Error messages localized for supported languages ([Error Message Design](#error-message-design))
- [ ] Error boundaries catch React component errors ([Error UI Components](#error-ui-components))
- [ ] Consistent error UI components used throughout ([Error UI Components](#error-ui-components))
- [ ] Error reporting configured for production ([Error Tracking](#error-tracking))
- [ ] Error aggregation identifies patterns ([Error Tracking](#error-tracking))
- [ ] Alerting rules configured for critical errors ([Error Monitoring and Alerting](#error-monitoring-and-alerting))
- [ ] Chaos testing validates error handling ([Chaos Engineering](#chaos-engineering))
- [ ] Error recovery documented in runbooks ([Error Monitoring and Alerting](#error-monitoring-and-alerting))
- [ ] Support team trained on error ID lookup ([Error UI Components](#error-ui-components))
- [ ] Error metrics tracked in dashboards ([Error Monitoring and Alerting](#error-monitoring-and-alerting))
- [ ] Regular error handling drills conducted ([Error Recovery Testing](#error-recovery-testing))

## Core Philosophy

Errors are not exceptions, they're expectations. Every system fails; great systems fail gracefully. Handle errors at the right level, provide actionable feedback, and always leave the system in a valid state. Errors should help developers debug and help users recover.

## Error Classification System

### Error Categories
Classify errors by type and severity.

**Why:** Different errors require different handling strategies. Classification enables appropriate responses, from automatic retry to user intervention to emergency escalation.

```typescript
// Error severity levels
enum ErrorSeverity {
  LOW = 'low',        // Log only, no user impact
  MEDIUM = 'medium',  // Degraded experience
  HIGH = 'high',      // Feature unavailable
  CRITICAL = 'critical' // System-wide impact
}

// Error categories
enum ErrorCategory {
  // User errors
  VALIDATION = 'VALIDATION',
  AUTHENTICATION = 'AUTHENTICATION', 
  AUTHORIZATION = 'AUTHORIZATION',
  NOT_FOUND = 'NOT_FOUND',
  
  // System errors
  NETWORK = 'NETWORK',
  DATABASE = 'DATABASE',
  EXTERNAL_SERVICE = 'EXTERNAL_SERVICE',
  
  // Application errors
  BUSINESS_LOGIC = 'BUSINESS_LOGIC',
  STATE_CORRUPTION = 'STATE_CORRUPTION',
  CONFIGURATION = 'CONFIGURATION',
  
  // Client errors
  BROWSER = 'BROWSER',
  PARSING = 'PARSING',
  RENDERING = 'RENDERING'
}

// Base error class with classification
class ApplicationError extends Error {
  public readonly id: string;
  public readonly timestamp: Date;
  public readonly category: ErrorCategory;
  public readonly severity: ErrorSeverity;
  public readonly statusCode: number;
  public readonly isOperational: boolean;
  public readonly context?: Record<string, any>;
  public readonly originalError?: Error;
  
  constructor(
    message: string,
    category: ErrorCategory,
    severity: ErrorSeverity,
    statusCode: number = 500,
    isOperational: boolean = true,
    context?: Record<string, any>,
    originalError?: Error
  ) {
    super(message);
    this.name = this.constructor.name;
    this.id = generateErrorId();
    this.timestamp = new Date();
    this.category = category;
    this.severity = severity;
    this.statusCode = statusCode;
    this.isOperational = isOperational;
    this.context = context;
    this.originalError = originalError;
    
    // Capture stack trace
    Error.captureStackTrace(this, this.constructor);
  }
  
  toJSON() {
    return {
      id: this.id,
      name: this.name,
      message: this.message,
      category: this.category,
      severity: this.severity,
      statusCode: this.statusCode,
      timestamp: this.timestamp,
      context: this.context,
      stack: this.stack
    };
  }
}

// Specific error classes
class ValidationError extends ApplicationError {
  public readonly fields: Record<string, string>;
  
  constructor(fields: Record<string, string>, context?: Record<string, any>) {
    const message = 'Validation failed';
    super(
      message,
      ErrorCategory.VALIDATION,
      ErrorSeverity.LOW,
      422,
      true,
      context
    );
    this.fields = fields;
  }
}

class AuthenticationError extends ApplicationError {
  constructor(message = 'Authentication failed', context?: Record<string, any>) {
    super(
      message,
      ErrorCategory.AUTHENTICATION,
      ErrorSeverity.MEDIUM,
      401,
      true,
      context
    );
  }
}

class DatabaseError extends ApplicationError {
  public readonly query?: string;
  public readonly operation?: string;
  
  constructor(
    message: string,
    query?: string,
    operation?: string,
    originalError?: Error
  ) {
    super(
      message,
      ErrorCategory.DATABASE,
      ErrorSeverity.HIGH,
      500,
      true,
      { query, operation },
      originalError
    );
    this.query = query;
    this.operation = operation;
  }
}

class ExternalServiceError extends ApplicationError {
  public readonly service: string;
  public readonly endpoint?: string;
  public readonly responseTime?: number;
  
  constructor(
    service: string,
    message: string,
    endpoint?: string,
    responseTime?: number,
    originalError?: Error
  ) {
    super(
      message,
      ErrorCategory.EXTERNAL_SERVICE,
      ErrorSeverity.MEDIUM,
      502,
      true,
      { service, endpoint, responseTime },
      originalError
    );
    this.service = service;
    this.endpoint = endpoint;
    this.responseTime = responseTime;
  }
}
```

### Error Context Collection
Gather relevant context for debugging.

**Why:** Context makes errors actionable. Without context, errors are just noise. Good context reduces debugging time from hours to minutes.

```typescript
// Error context collector
class ErrorContext {
  static collect(req?: Request): Record<string, any> {
    return {
      // Request context
      url: req?.url,
      method: req?.method,
      headers: this.sanitizeHeaders(req?.headers),
      query: req?.query,
      params: req?.params,
      body: this.sanitizeBody(req?.body),
      
      // User context
      userId: req?.user?.id,
      userRole: req?.user?.role,
      sessionId: req?.session?.id,
      
      // System context
      hostname: os.hostname(),
      environment: process.env.NODE_ENV,
      version: process.env.APP_VERSION,
      memory: process.memoryUsage(),
      uptime: process.uptime(),
      
      // Timing
      timestamp: new Date().toISOString(),
      timezone: Intl.DateTimeFormat().resolvedOptions().timeZone
    };
  }
  
  static sanitizeHeaders(headers?: Record<string, string>): Record<string, string> {
    if (!headers) return {};
    
    const sensitive = ['authorization', 'cookie', 'x-api-key'];
    const sanitized = { ...headers };
    
    sensitive.forEach(key => {
      if (sanitized[key]) {
        sanitized[key] = '[REDACTED]';
      }
    });
    
    return sanitized;
  }
  
  static sanitizeBody(body?: any): any {
    if (!body) return undefined;
    
    const sensitive = ['password', 'token', 'secret', 'creditCard', 'ssn'];
    
    const sanitize = (obj: any): any => {
      if (typeof obj !== 'object' || obj === null) return obj;
      
      const sanitized = Array.isArray(obj) ? [] : {};
      
      for (const [key, value] of Object.entries(obj)) {
        if (sensitive.includes(key)) {
          sanitized[key] = '[REDACTED]';
        } else if (typeof value === 'object') {
          sanitized[key] = sanitize(value);
        } else {
          sanitized[key] = value;
        }
      }
      
      return sanitized;
    };
    
    return sanitize(body);
  }
}

// Async context tracking
import { AsyncLocalStorage } from 'async_hooks';

const asyncContext = new AsyncLocalStorage<Map<string, any>>();

export function withContext<T>(fn: () => T): T {
  const store = new Map();
  store.set('requestId', generateRequestId());
  store.set('startTime', Date.now());
  
  return asyncContext.run(store, fn);
}

export function addContext(key: string, value: any): void {
  const store = asyncContext.getStore();
  store?.set(key, value);
}

export function getContext(): Record<string, any> {
  const store = asyncContext.getStore();
  if (!store) return {};
  
  const context: Record<string, any> = {};
  store.forEach((value, key) => {
    context[key] = value;
  });
  
  return context;
}
```

## Error Recovery Strategies

### Retry Mechanisms
Implement intelligent retry logic.

**Why:** Many errors are transient - network hiccups, temporary overload, race conditions. Smart retries can recover from these automatically without user intervention.

```typescript
// Exponential backoff with jitter
class RetryStrategy {
  private readonly maxAttempts: number;
  private readonly baseDelay: number;
  private readonly maxDelay: number;
  private readonly jitter: boolean;
  
  constructor(
    maxAttempts = 3,
    baseDelay = 1000,
    maxDelay = 30000,
    jitter = true
  ) {
    this.maxAttempts = maxAttempts;
    this.baseDelay = baseDelay;
    this.maxDelay = maxDelay;
    this.jitter = jitter;
  }
  
  calculateDelay(attempt: number): number {
    // Exponential backoff: 2^attempt * baseDelay
    let delay = Math.min(
      this.baseDelay * Math.pow(2, attempt),
      this.maxDelay
    );
    
    // Add jitter to prevent thundering herd
    if (this.jitter) {
      delay = delay * (0.5 + Math.random() * 0.5);
    }
    
    return Math.floor(delay);
  }
  
  shouldRetry(error: Error, attempt: number): boolean {
    // Don't retry if max attempts reached
    if (attempt >= this.maxAttempts) {
      return false;
    }
    
    // Don't retry non-operational errors
    if (error instanceof ApplicationError && !error.isOperational) {
      return false;
    }
    
    // Retry based on error type
    if (error instanceof ValidationError) return false;
    if (error instanceof AuthenticationError) return false;
    if (error instanceof AuthorizationError) return false;
    
    // Retry network errors
    if (error instanceof NetworkError) return true;
    if (error instanceof TimeoutError) return true;
    
    // Retry specific HTTP status codes
    if ('statusCode' in error) {
      const retryableCodes = [408, 429, 500, 502, 503, 504];
      return retryableCodes.includes(error.statusCode);
    }
    
    return false;
  }
}

// Retry wrapper
async function withRetry<T>(
  fn: () => Promise<T>,
  strategy = new RetryStrategy()
): Promise<T> {
  let lastError: Error;
  
  for (let attempt = 0; attempt < strategy.maxAttempts; attempt++) {
    try {
      return await fn();
    } catch (error) {
      lastError = error;
      
      if (!strategy.shouldRetry(error, attempt + 1)) {
        throw error;
      }
      
      const delay = strategy.calculateDelay(attempt);
      
      logger.warn('Retry attempt', {
        attempt: attempt + 1,
        maxAttempts: strategy.maxAttempts,
        delay,
        error: error.message
      });
      
      await sleep(delay);
    }
  }
  
  throw new Error(`Failed after ${strategy.maxAttempts} attempts`, {
    cause: lastError
  });
}

// Circuit breaker pattern
class CircuitBreaker {
  private state: 'CLOSED' | 'OPEN' | 'HALF_OPEN' = 'CLOSED';
  private failures = 0;
  private lastFailureTime?: Date;
  private successCount = 0;
  
  constructor(
    private readonly threshold = 5,
    private readonly timeout = 60000,
    private readonly successThreshold = 2
  ) {}
  
  async execute<T>(fn: () => Promise<T>): Promise<T> {
    if (this.state === 'OPEN') {
      if (Date.now() - this.lastFailureTime!.getTime() > this.timeout) {
        this.state = 'HALF_OPEN';
        this.successCount = 0;
      } else {
        throw new Error('Circuit breaker is OPEN');
      }
    }
    
    try {
      const result = await fn();
      
      if (this.state === 'HALF_OPEN') {
        this.successCount++;
        if (this.successCount >= this.successThreshold) {
          this.state = 'CLOSED';
          this.failures = 0;
        }
      }
      
      return result;
    } catch (error) {
      this.failures++;
      this.lastFailureTime = new Date();
      
      if (this.failures >= this.threshold) {
        this.state = 'OPEN';
        logger.error('Circuit breaker opened', {
          failures: this.failures,
          threshold: this.threshold
        });
      }
      
      throw error;
    }
  }
  
  getState() {
    return {
      state: this.state,
      failures: this.failures,
      lastFailureTime: this.lastFailureTime
    };
  }
}
```

### Graceful Degradation
Provide fallback functionality when services fail.

**Why:** Users prefer degraded functionality over no functionality. Graceful degradation keeps critical features working even when non-essential services fail.

```typescript
// Fallback strategies
class FallbackService {
  // Cache fallback
  async getWithCacheFallback<T>(
    key: string,
    fetcher: () => Promise<T>,
    ttl = 3600
  ): Promise<T> {
    try {
      const data = await fetcher();
      await cache.set(key, data, ttl);
      return data;
    } catch (error) {
      logger.warn('Using cache fallback', { key, error });
      
      const cached = await cache.get(key);
      if (cached) {
        return cached;
      }
      
      throw error;
    }
  }
  
  // Default value fallback
  async getWithDefault<T>(
    fetcher: () => Promise<T>,
    defaultValue: T
  ): Promise<T> {
    try {
      return await fetcher();
    } catch (error) {
      logger.warn('Using default value', { error });
      return defaultValue;
    }
  }
  
  // Service degradation
  async getRecommendations(userId: string): Promise<Product[]> {
    try {
      // Try ML-powered recommendations
      return await mlService.getPersonalizedRecommendations(userId);
    } catch (error) {
      logger.warn('ML service failed, using simple recommendations');
      
      try {
        // Fallback to collaborative filtering
        return await this.getCollaborativeRecommendations(userId);
      } catch (error) {
        logger.warn('Collaborative filtering failed, using popular items');
        
        // Final fallback to popular items
        return await this.getPopularProducts();
      }
    }
  }
  
  // Feature flags for degradation
  async processPayment(payment: Payment): Promise<PaymentResult> {
    const features = await featureFlags.get();
    
    if (!features.paymentProcessing) {
      throw new Error('Payment processing temporarily disabled');
    }
    
    if (!features.fraudDetection) {
      logger.warn('Fraud detection disabled, processing without checks');
      return await this.processWithoutFraudCheck(payment);
    }
    
    if (!features.realTimeProcessing) {
      logger.info('Real-time processing disabled, queueing payment');
      await queue.add('payments', payment);
      return { status: 'pending', message: 'Payment queued for processing' };
    }
    
    return await this.processNormally(payment);
  }
}

// Progressive enhancement
class ProgressiveFeature {
  async render(data: any): Promise<string> {
    const enhancementLevels = [
      this.renderInteractive.bind(this),
      this.renderStatic.bind(this),
      this.renderBasic.bind(this),
      this.renderFallback.bind(this)
    ];
    
    for (const renderer of enhancementLevels) {
      try {
        return await renderer(data);
      } catch (error) {
        logger.warn(`Renderer failed, trying next level`, { error });
        continue;
      }
    }
    
    return this.renderError();
  }
  
  private async renderInteractive(data: any): Promise<string> {
    // Full interactive experience
    const processed = await heavyProcessing(data);
    const enriched = await enrichData(processed);
    return renderFullUI(enriched);
  }
  
  private async renderStatic(data: any): Promise<string> {
    // Static but complete
    return renderStaticUI(data);
  }
  
  private async renderBasic(data: any): Promise<string> {
    // Basic HTML only
    return renderBasicHTML(data);
  }
  
  private async renderFallback(data: any): Promise<string> {
    // Absolute minimum
    return `<div>Content temporarily unavailable</div>`;
  }
  
  private renderError(): string {
    return `<div>An error occurred. Please try again later.</div>`;
  }
}
```

## User-Facing Error Messages

### Error Message Design
Create helpful, actionable error messages.

**Why:** Good error messages help users recover from errors independently, reduce support tickets, and improve user satisfaction. Bad error messages frustrate users and damage trust.

```typescript
// Error message formatter
class UserErrorFormatter {
  static format(error: ApplicationError): UserFacingError {
    const baseMessage = this.getBaseMessage(error);
    const details = this.getDetails(error);
    const actions = this.getSuggestedActions(error);
    const support = this.getSupportInfo(error);
    
    return {
      title: this.getTitle(error),
      message: baseMessage,
      details: process.env.NODE_ENV === 'development' ? details : undefined,
      actions,
      support,
      errorId: error.id,
      timestamp: error.timestamp
    };
  }
  
  private static getTitle(error: ApplicationError): string {
    const titles = {
      [ErrorCategory.VALIDATION]: 'Invalid Input',
      [ErrorCategory.AUTHENTICATION]: 'Authentication Required',
      [ErrorCategory.AUTHORIZATION]: 'Access Denied',
      [ErrorCategory.NOT_FOUND]: 'Not Found',
      [ErrorCategory.NETWORK]: 'Connection Problem',
      [ErrorCategory.DATABASE]: 'System Error',
      [ErrorCategory.EXTERNAL_SERVICE]: 'Service Unavailable',
      [ErrorCategory.BUSINESS_LOGIC]: 'Operation Failed',
      [ErrorCategory.STATE_CORRUPTION]: 'System Error',
      [ErrorCategory.CONFIGURATION]: 'Configuration Error'
    };
    
    return titles[error.category] || 'Something Went Wrong';
  }
  
  private static getBaseMessage(error: ApplicationError): string {
    // User-friendly messages by category
    const messages = {
      [ErrorCategory.VALIDATION]: 'Please check your input and try again.',
      [ErrorCategory.AUTHENTICATION]: 'Please sign in to continue.',
      [ErrorCategory.AUTHORIZATION]: 'You don\'t have permission to perform this action.',
      [ErrorCategory.NOT_FOUND]: 'The requested resource could not be found.',
      [ErrorCategory.NETWORK]: 'We\'re having trouble connecting. Please check your internet connection.',
      [ErrorCategory.DATABASE]: 'We\'re experiencing technical difficulties. Please try again later.',
      [ErrorCategory.EXTERNAL_SERVICE]: 'One of our services is temporarily unavailable.',
      [ErrorCategory.BUSINESS_LOGIC]: error.message || 'The operation could not be completed.',
      [ErrorCategory.STATE_CORRUPTION]: 'An unexpected error occurred. Please refresh and try again.',
      [ErrorCategory.CONFIGURATION]: 'System configuration error. Please contact support.'
    };
    
    return messages[error.category] || 'An unexpected error occurred.';
  }
  
  private static getSuggestedActions(error: ApplicationError): string[] {
    const actions: string[] = [];
    
    switch (error.category) {
      case ErrorCategory.VALIDATION:
        if (error instanceof ValidationError) {
          Object.entries(error.fields).forEach(([field, message]) => {
            actions.push(`${field}: ${message}`);
          });
        }
        break;
        
      case ErrorCategory.AUTHENTICATION:
        actions.push('Sign in to your account');
        actions.push('Check your credentials');
        actions.push('Reset your password if needed');
        break;
        
      case ErrorCategory.NETWORK:
        actions.push('Check your internet connection');
        actions.push('Try refreshing the page');
        actions.push('Disable VPN or proxy if active');
        break;
        
      case ErrorCategory.DATABASE:
      case ErrorCategory.EXTERNAL_SERVICE:
        actions.push('Wait a few moments and try again');
        actions.push('Refresh the page');
        if (error.severity === ErrorSeverity.HIGH) {
          actions.push('Check our status page for updates');
        }
        break;
        
      default:
        actions.push('Try refreshing the page');
        actions.push('Contact support if the problem persists');
    }
    
    return actions;
  }
  
  private static getSupportInfo(error: ApplicationError): SupportInfo {
    return {
      showContact: error.severity >= ErrorSeverity.HIGH,
      errorId: error.id,
      timestamp: error.timestamp,
      contactMessage: `If you need help, please contact support with error ID: ${error.id}`
    };
  }
}

// Localized error messages
class LocalizedErrorMessages {
  private static messages = {
    en: {
      'validation.required': 'This field is required',
      'validation.email': 'Please enter a valid email address',
      'validation.min': 'Must be at least {{min}} characters',
      'validation.max': 'Must be no more than {{max}} characters',
      'auth.invalid_credentials': 'Invalid email or password',
      'auth.account_locked': 'Your account has been locked. Please contact support.',
      'network.timeout': 'The request timed out. Please try again.',
      'network.offline': 'You appear to be offline. Please check your connection.'
    },
    es: {
      'validation.required': 'Este campo es obligatorio',
      'validation.email': 'Por favor ingrese un correo electrónico válido',
      // ... more translations
    }
  };
  
  static get(key: string, locale = 'en', params?: Record<string, any>): string {
    let message = this.messages[locale]?.[key] || this.messages['en'][key] || key;
    
    // Replace parameters
    if (params) {
      Object.entries(params).forEach(([key, value]) => {
        message = message.replace(`{{${key}}}`, value);
      });
    }
    
    return message;
  }
}
```

### Error UI Components
Display errors consistently in the UI.

**Why:** Consistent error presentation reduces cognitive load, helps users recognize and understand errors quickly, and maintains professional appearance even during failures.

```typescript
// React error boundary
class ErrorBoundary extends Component<ErrorBoundaryProps, ErrorBoundaryState> {
  state = {
    hasError: false,
    error: null,
    errorId: null
  };
  
  static getDerivedStateFromError(error: Error) {
    const errorId = generateErrorId();
    
    return {
      hasError: true,
      error,
      errorId
    };
  }
  
  componentDidCatch(error: Error, errorInfo: ErrorInfo) {
    // Log to error reporting service
    errorReporter.report({
      error,
      errorInfo,
      errorId: this.state.errorId,
      context: {
        component: this.props.name,
        props: this.props,
        ...errorInfo
      }
    });
  }
  
  render() {
    if (this.state.hasError) {
      return (
        <ErrorFallback
          error={this.state.error}
          errorId={this.state.errorId}
          resetError={() => this.setState({ hasError: false })}
        />
      );
    }
    
    return this.props.children;
  }
}

// Error fallback component
const ErrorFallback = ({ error, errorId, resetError }: ErrorFallbackProps) => {
  const isProduction = process.env.NODE_ENV === 'production';
  
  return (
    <div className="error-fallback">
      <div className="error-icon">
        <ExclamationTriangleIcon />
      </div>
      
      <h1>Something went wrong</h1>
      
      <p className="error-message">
        {isProduction 
          ? "We're sorry, but something unexpected happened."
          : error.message
        }
      </p>
      
      {!isProduction && (
        <details className="error-details">
          <summary>Error details</summary>
          <pre>{error.stack}</pre>
        </details>
      )}
      
      <div className="error-actions">
        <button onClick={resetError} className="btn-primary">
          Try again
        </button>
        
        <button onClick={() => window.location.href = '/'} className="btn-secondary">
          Go to homepage
        </button>
      </div>
      
      {errorId && (
        <p className="error-id">
          Error ID: <code>{errorId}</code>
        </p>
      )}
    </div>
  );
};

// Toast notifications for errors
const useErrorToast = () => {
  const { addToast } = useToast();
  
  const showError = useCallback((error: ApplicationError) => {
    const formatted = UserErrorFormatter.format(error);
    
    addToast({
      id: error.id,
      type: 'error',
      title: formatted.title,
      message: formatted.message,
      duration: error.severity === ErrorSeverity.LOW ? 5000 : null,
      actions: formatted.actions?.map(action => ({
        label: action,
        onClick: () => handleAction(action)
      }))
    });
  }, [addToast]);
  
  return { showError };
};

// Inline error display
const InlineError = ({ error, onRetry }: InlineErrorProps) => {
  if (!error) return null;
  
  const severity = error.severity || ErrorSeverity.MEDIUM;
  const variant = {
    [ErrorSeverity.LOW]: 'info',
    [ErrorSeverity.MEDIUM]: 'warning',
    [ErrorSeverity.HIGH]: 'error',
    [ErrorSeverity.CRITICAL]: 'error'
  }[severity];
  
  return (
    <Alert variant={variant} className="inline-error">
      <AlertTitle>{error.title}</AlertTitle>
      <AlertDescription>
        {error.message}
        
        {error.actions && (
          <ul className="error-actions">
            {error.actions.map((action, index) => (
              <li key={index}>{action}</li>
            ))}
          </ul>
        )}
      </AlertDescription>
      
      {onRetry && (
        <Button size="sm" onClick={onRetry}>
          Try again
        </Button>
      )}
    </Alert>
  );
};

// Form field errors
const FormFieldError = ({ error }: { error?: string }) => {
  if (!error) return null;
  
  return (
    <span 
      className="form-field-error"
      role="alert"
      aria-live="polite"
    >
      <ExclamationCircleIcon className="error-icon" />
      {error}
    </span>
  );
};
```

## Error Monitoring and Alerting

### Error Tracking
Monitor and track errors in production.

**Why:** You can't fix what you don't know about. Error tracking provides visibility into production issues, helps prioritize fixes, and measures system health.

```typescript
// Error reporter service
class ErrorReporter {
  private queue: ErrorReport[] = [];
  private batchSize = 10;
  private flushInterval = 5000;
  private timer?: NodeJS.Timer;
  
  constructor(
    private readonly endpoint: string,
    private readonly apiKey: string
  ) {
    this.startBatchTimer();
  }
  
  report(error: Error | ApplicationError, context?: any): void {
    const report: ErrorReport = {
      id: generateErrorId(),
      timestamp: new Date(),
      error: this.serializeError(error),
      context: {
        ...this.getSystemContext(),
        ...context
      },
      user: this.getUserContext(),
      session: this.getSessionContext(),
      breadcrumbs: this.getBreadcrumbs()
    };
    
    // Add to queue
    this.queue.push(report);
    
    // Flush if queue is full
    if (this.queue.length >= this.batchSize) {
      this.flush();
    }
    
    // Report critical errors immediately
    if (error instanceof ApplicationError && 
        error.severity === ErrorSeverity.CRITICAL) {
      this.flushImmediately(report);
    }
  }
  
  private serializeError(error: Error): SerializedError {
    return {
      name: error.name,
      message: error.message,
      stack: error.stack,
      ...(error instanceof ApplicationError && {
        id: error.id,
        category: error.category,
        severity: error.severity,
        statusCode: error.statusCode,
        context: error.context
      })
    };
  }
  
  private async flush(): Promise<void> {
    if (this.queue.length === 0) return;
    
    const reports = [...this.queue];
    this.queue = [];
    
    try {
      await fetch(this.endpoint, {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
          'X-API-Key': this.apiKey
        },
        body: JSON.stringify({ reports })
      });
    } catch (error) {
      // Re-queue on failure
      this.queue.unshift(...reports);
      console.error('Failed to send error reports:', error);
    }
  }
  
  private startBatchTimer(): void {
    this.timer = setInterval(() => {
      this.flush();
    }, this.flushInterval);
  }
  
  // Breadcrumb tracking
  private breadcrumbs: Breadcrumb[] = [];
  private maxBreadcrumbs = 50;
  
  addBreadcrumb(breadcrumb: Breadcrumb): void {
    this.breadcrumbs.push({
      ...breadcrumb,
      timestamp: new Date()
    });
    
    // Limit breadcrumbs
    if (this.breadcrumbs.length > this.maxBreadcrumbs) {
      this.breadcrumbs.shift();
    }
  }
  
  getBreadcrumbs(): Breadcrumb[] {
    return [...this.breadcrumbs];
  }
}

// Error aggregation
class ErrorAggregator {
  private errors = new Map<string, AggregatedError>();
  
  aggregate(error: ApplicationError): void {
    const key = this.getErrorKey(error);
    
    if (!this.errors.has(key)) {
      this.errors.set(key, {
        fingerprint: key,
        firstSeen: new Date(),
        lastSeen: new Date(),
        count: 0,
        severity: error.severity,
        category: error.category,
        message: error.message,
        affectedUsers: new Set(),
        samples: []
      });
    }
    
    const aggregated = this.errors.get(key)!;
    aggregated.count++;
    aggregated.lastSeen = new Date();
    
    if (error.context?.userId) {
      aggregated.affectedUsers.add(error.context.userId);
    }
    
    // Keep sample of errors
    if (aggregated.samples.length < 10) {
      aggregated.samples.push(error);
    }
  }
  
  private getErrorKey(error: ApplicationError): string {
    // Create fingerprint for grouping similar errors
    return `${error.category}:${error.name}:${this.normalizeMessage(error.message)}`;
  }
  
  private normalizeMessage(message: string): string {
    // Remove variable parts from message
    return message
      .replace(/\d+/g, 'N')           // Replace numbers
      .replace(/[a-f0-9-]{36}/gi, 'UUID') // Replace UUIDs
      .replace(/\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/g, 'IP'); // Replace IPs
  }
  
  getTopErrors(limit = 10): AggregatedError[] {
    return Array.from(this.errors.values())
      .sort((a, b) => b.count - a.count)
      .slice(0, limit);
  }
}

// Alert rules
class ErrorAlertManager {
  private rules: AlertRule[] = [
    {
      name: 'High Error Rate',
      condition: (metrics) => metrics.errorRate > 0.05, // 5% error rate
      severity: 'warning',
      channels: ['slack', 'email']
    },
    {
      name: 'Critical Error',
      condition: (metrics) => metrics.criticalErrors > 0,
      severity: 'critical',
      channels: ['slack', 'pagerduty', 'email']
    },
    {
      name: 'Service Down',
      condition: (metrics) => metrics.successRate < 0.5, // 50% success rate
      severity: 'critical',
      channels: ['pagerduty']
    },
    {
      name: 'Database Errors Spike',
      condition: (metrics) => metrics.databaseErrors > 10,
      severity: 'warning',
      channels: ['slack']
    }
  ];
  
  async checkAlerts(metrics: ErrorMetrics): Promise<void> {
    for (const rule of this.rules) {
      if (rule.condition(metrics)) {
        await this.sendAlert(rule, metrics);
      }
    }
  }
  
  private async sendAlert(rule: AlertRule, metrics: ErrorMetrics): Promise<void> {
    const alert = {
      rule: rule.name,
      severity: rule.severity,
      metrics,
      timestamp: new Date(),
      environment: process.env.NODE_ENV
    };
    
    for (const channel of rule.channels) {
      await this.sendToChannel(channel, alert);
    }
  }
  
  private async sendToChannel(channel: string, alert: any): Promise<void> {
    switch (channel) {
      case 'slack':
        await this.sendSlackAlert(alert);
        break;
      case 'email':
        await this.sendEmailAlert(alert);
        break;
      case 'pagerduty':
        await this.sendPagerDutyAlert(alert);
        break;
    }
  }
}
```

## Error Recovery Testing

### Chaos Engineering
Test error handling with controlled failures.

**Why:** The best way to ensure error handling works is to trigger errors intentionally. Chaos engineering builds confidence in system resilience and uncovers hidden failure modes.

```typescript
// Chaos monkey for testing
class ChaosMonkey {
  private enabled = process.env.CHAOS_MONKEY_ENABLED === 'true';
  private probability = parseFloat(process.env.CHAOS_PROBABILITY || '0.01');
  
  // Random failures
  maybeThrow(category: ErrorCategory): void {
    if (!this.enabled) return;
    
    if (Math.random() < this.probability) {
      throw new ApplicationError(
        'Chaos Monkey induced failure',
        category,
        ErrorSeverity.MEDIUM,
        500,
        true,
        { induced: true }
      );
    }
  }
  
  // Network chaos
  async maybeDelay(ms = 5000): Promise<void> {
    if (!this.enabled) return;
    
    if (Math.random() < this.probability) {
      await sleep(ms);
    }
  }
  
  // Database chaos
  async maybeFail<T>(operation: () => Promise<T>): Promise<T> {
    if (!this.enabled) return operation();
    
    if (Math.random() < this.probability) {
      throw new DatabaseError('Chaos Monkey: Database unavailable');
    }
    
    return operation();
  }
  
  // Memory chaos
  maybeLeakMemory(size = 10 * 1024 * 1024): void {
    if (!this.enabled) return;
    
    if (Math.random() < this.probability) {
      const leak = Buffer.alloc(size);
      // Intentionally don't free the buffer
      global.chaosLeaks = global.chaosLeaks || [];
      global.chaosLeaks.push(leak);
    }
  }
}

// Error injection for testing
class ErrorInjector {
  private injections = new Map<string, Error>();
  
  inject(path: string, error: Error): void {
    this.injections.set(path, error);
  }
  
  clear(path?: string): void {
    if (path) {
      this.injections.delete(path);
    } else {
      this.injections.clear();
    }
  }
  
  middleware() {
    return (req: Request, res: Response, next: Next) => {
      const injection = this.injections.get(req.path);
      if (injection) {
        return next(injection);
      }
      next();
    };
  }
}

// Error scenario testing
describe('Error Recovery', () => {
  let errorInjector: ErrorInjector;
  
  beforeEach(() => {
    errorInjector = new ErrorInjector();
    app.use(errorInjector.middleware());
  });
  
  test('handles database connection failure', async () => {
    errorInjector.inject('/api/users', new DatabaseError('Connection lost'));
    
    const response = await request(app).get('/api/users');
    
    expect(response.status).toBe(503);
    expect(response.body.error).toContain('temporarily unavailable');
  });
  
  test('retries on transient failures', async () => {
    let attempts = 0;
    const handler = jest.fn(() => {
      attempts++;
      if (attempts < 3) {
        throw new NetworkError('Timeout');
      }
      return { success: true };
    });
    
    const result = await withRetry(handler);
    
    expect(attempts).toBe(3);
    expect(result.success).toBe(true);
  });
  
  test('circuit breaker opens on repeated failures', async () => {
    const breaker = new CircuitBreaker(3, 1000);
    const failingOperation = jest.fn().mockRejectedValue(new Error('Fail'));
    
    // Trigger failures
    for (let i = 0; i < 3; i++) {
      await expect(breaker.execute(failingOperation)).rejects.toThrow();
    }
    
    // Circuit should be open
    await expect(breaker.execute(failingOperation)).rejects.toThrow('Circuit breaker is OPEN');
    expect(failingOperation).toHaveBeenCalledTimes(3);
  });
});
```

