import { CloudWatchClient, PutMetricDataCommand, StandardUnit } from '@aws-sdk/client-cloudwatch'; import { getLogger } from './logging.js'; import { getConfig } from '../config/env.js'; const logger = getLogger(); const config = getConfig(); // Whether to enable CloudWatch metrics const METRICS_ENABLED = config.ENABLE_CLOUDWATCH_METRICS === 'true'; // Initialize CloudWatch client only if metrics are enabled const cloudWatchClient = METRICS_ENABLED ? new CloudWatchClient() : null; // Namespace for CloudWatch metrics const METRIC_NAMESPACE = config.CLOUDWATCH_METRICS_NAMESPACE || 'AwsLogsMcp'; // Optional batching window in milliseconds const BATCHING_WINDOW_MS = 60000; // 1 minute // Log metrics configuration status on startup logger.info('Metrics configuration', { enabled: METRICS_ENABLED, namespace: METRIC_NAMESPACE, batchingWindow: `${BATCHING_WINDOW_MS}ms` }); // In-memory metric batch storage interface MetricDataPoint { name: string; unit: StandardUnit; value: number; dimensions?: Record; timestamp?: Date; } // Batched metrics to reduce API calls const metricBatch: MetricDataPoint[] = []; // Tracks if a flush is scheduled let flushTimeout: NodeJS.Timeout | null = null; /** * Schedule a flush of the metric batch to CloudWatch */ function scheduleFlush(): void { if (flushTimeout) return; flushTimeout = setTimeout(() => { flushMetrics().catch(err => { logger.error('Failed to flush metrics to CloudWatch', { error: err instanceof Error ? err.message : String(err) }); }); flushTimeout = null; }, BATCHING_WINDOW_MS); } /** * Flush the metric batch to CloudWatch */ async function flushMetrics(): Promise { if (metricBatch.length === 0) return; // If metrics are disabled, just clear the batch and return if (!METRICS_ENABLED || !cloudWatchClient) { // Log metrics to debug output if desired if (logger.debug && metricBatch.length > 0) { logger.debug('Metrics collected (not sent to CloudWatch)', { metricCount: metricBatch.length, metrics: metricBatch.slice(0, 5) // Log a sample of the metrics }); } // Clear the batch metricBatch.length = 0; return; } try { // Copy current batch and clear it const batchToSend = [...metricBatch]; metricBatch.length = 0; // Convert to CloudWatch format const metricData = batchToSend.map(metric => ({ MetricName: metric.name, Unit: metric.unit, Value: metric.value, Dimensions: metric.dimensions ? Object.entries(metric.dimensions).map(([name, value]) => ({ Name: name, Value: value })) : undefined, Timestamp: metric.timestamp || new Date() })); // Send metrics to CloudWatch (in chunks of 20 to respect AWS limits) const chunkSize = 20; for (let i = 0; i < metricData.length; i += chunkSize) { const chunk = metricData.slice(i, i + chunkSize); const command = new PutMetricDataCommand({ Namespace: METRIC_NAMESPACE, MetricData: chunk }); await cloudWatchClient.send(command); } logger.debug('Flushed metrics to CloudWatch', { count: batchToSend.length }); } catch (error) { // Log error but don't rethrow - metrics shouldn't break the application logger.error('Error sending metrics to CloudWatch', { error: error instanceof Error ? { name: error.name, message: error.message, stack: error.stack } : error }); } } /** * Add a metric to the batch */ function addToBatch( name: string, value: number, unit: StandardUnit, dimensions?: Record ): void { metricBatch.push({ name, value, unit, dimensions, timestamp: new Date() }); scheduleFlush(); } /** * Track API request * @param tool Tool name * @param responseTime Response time in ms * @param isSuccess Whether the request was successful */ export function trackToolRequest(tool: string, responseTime: number, isSuccess: boolean): void { // Record request count addToBatch('ToolRequests', 1, StandardUnit.Count, { Tool: tool, Status: isSuccess ? 'Success' : 'Failure' }); // Record response time addToBatch('ToolResponseTime', responseTime, StandardUnit.Milliseconds, { Tool: tool }); } /** * Track AWS API request * @param service AWS service name * @param operation Operation name * @param responseTime Response time in ms * @param isSuccess Whether the request was successful */ export function trackAwsRequest( service: string, operation: string, responseTime: number, isSuccess: boolean ): void { // Record request count addToBatch('AwsApiRequests', 1, StandardUnit.Count, { Service: service, Operation: operation, Status: isSuccess ? 'Success' : 'Failure' }); // Record response time addToBatch('AwsApiResponseTime', responseTime, StandardUnit.Milliseconds, { Service: service, Operation: operation }); } /** * Track HTTP request * @param method HTTP method * @param path Request path * @param statusCode Response status code * @param responseTime Response time in ms */ export function trackHttpRequest( method: string, path: string, statusCode: number, responseTime: number ): void { // Simplify path for metric dimensions - group by first path segment const simplePath = path.split('/')[1] || 'root'; // Track request count addToBatch('HttpRequests', 1, StandardUnit.Count, { Method: method, Path: simplePath, StatusCode: String(statusCode) }); // Track response time addToBatch('HttpResponseTime', responseTime, StandardUnit.Milliseconds, { Method: method, Path: simplePath }); } /** * Send system metrics to CloudWatch */ function reportSystemMetrics(): void { try { // CPU usage (not perfectly accurate but useful) const cpuUsage = process.cpuUsage(); addToBatch('CpuUserTime', cpuUsage.user / 1000, StandardUnit.Microseconds); addToBatch('CpuSystemTime', cpuUsage.system / 1000, StandardUnit.Microseconds); // Memory usage const memUsage = process.memoryUsage(); addToBatch('MemoryRss', memUsage.rss / (1024 * 1024), StandardUnit.Megabytes); addToBatch('MemoryHeapTotal', memUsage.heapTotal / (1024 * 1024), StandardUnit.Megabytes); addToBatch('MemoryHeapUsed', memUsage.heapUsed / (1024 * 1024), StandardUnit.Megabytes); // Process uptime addToBatch('Uptime', process.uptime(), StandardUnit.Seconds); } catch (error) { logger.error('Error collecting system metrics', { error: error instanceof Error ? error.message : String(error) }); } } // Report system metrics every minute, but only if metrics are enabled let systemMetricsInterval: NodeJS.Timeout | null = null; if (METRICS_ENABLED) { systemMetricsInterval = setInterval(reportSystemMetrics, 60000); // Initial report of system metrics reportSystemMetrics(); } /** * Force flush metrics immediately * Useful for graceful shutdown */ export async function flushMetricsNow(): Promise { if (flushTimeout) { clearTimeout(flushTimeout); flushTimeout = null; } // Also clear the interval if it exists if (systemMetricsInterval) { clearInterval(systemMetricsInterval); systemMetricsInterval = null; } return flushMetrics(); } /** * Check if CloudWatch metrics are enabled */ export function isMetricsEnabled(): boolean { return METRICS_ENABLED; } /** * Get all collected metrics * @returns Object containing all metrics */ export function getAllMetrics(): { counters: Record, gauges: Record, histograms: Record } { return { counters: {}, // Implement with actual counters if needed gauges: {}, // Implement with actual gauges if needed histograms: {} // Implement with actual histograms if needed }; } /** * Reset all metrics */ export function resetMetrics(): void { // Implement reset functionality if needed logger.info('Metrics reset'); } /** * Middleware for tracking HTTP requests */ export function createMetricsMiddleware() { return (req: any, res: any, next: any) => { const startTime = Date.now(); // Record metrics when response is finished res.on('finish', () => { const responseTime = Date.now() - startTime; trackHttpRequest(req.method, req.path, res.statusCode, responseTime); }); next(); }; }