/** * Content-level secrets scanner * * Scans file content for potential secret patterns before write. * Works on all file types via regex matching. * * Detected patterns: * - Stripe/OpenAI keys (sk-*) * - GitHub tokens (ghp_*, gho_*, github_pat_*) * - AWS keys (AKIA*) * - Slack tokens (xoxp-*, xoxb-*) * - Private keys (BEGIN PRIVATE KEY) * - Generic API key/password patterns */ import { isTestFile } from "./file-utils.js"; interface SecretPattern { pattern: RegExp; name: string; message: string; } const SAFE_HEADER_KEYS = new Set([ "user-agent", "accept", "accept-language", "accept-encoding", "content-type", "content-length", "origin", "referer", "host", "connection", "cache-control", "pragma", "x-requested-with", ]); function extractHeaderKey(line: string): string | null { const m = line.match(/["']([A-Za-z][A-Za-z0-9-]{0,63})["']\s*:\s*["'][^"']+/); if (!m) return null; return m[1].toLowerCase(); } function shouldIgnorePatternMatch(line: string, patternName: string): boolean { // Ignore obvious non-secret HTTP header literals such as "User-Agent". if ( patternName === "hardcoded-secret" || patternName === "hardcoded-password" ) { const key = extractHeaderKey(line); if (key && SAFE_HEADER_KEYS.has(key)) { return true; } } return false; } /** * Check if a string value looks like an environment variable name. * Env var names typically use UPPERCASE_SNAKE_CASE. * Used to filter false positives like: api_key = "FIREWORKS_API_KEY" * where the value is just referencing the env var name, not a secret. */ function looksLikeEnvVarName(value: string): boolean { // Must be all uppercase with underscores (no lowercase letters) // Must start with a letter and contain at least one underscore return /^[A-Z][A-Z0-9_]*$/.test(value) && value.includes("_"); } /** * Extract the quoted value from a hardcoded secret pattern match. * Returns null if no quoted value found. */ function extractQuotedValue(line: string): string | null { // Match content inside quotes after : or = const match = line.match(/[:=]\s*["']([^"']+)["']/); return match ? match[1] : null; } // Patterns ordered by specificity - first match wins per line const SECRET_PATTERNS: SecretPattern[] = [ // High-confidence: specific key prefixes { pattern: /sk-[a-zA-Z0-9-]{20,}/g, name: "stripe-openai-key", message: "Possible Stripe or OpenAI API key (sk-*)", }, { pattern: /ghp_[a-zA-Z0-9]{36}/g, name: "github-personal-token", message: "GitHub personal access token (ghp_*)", }, { pattern: /gho_[a-zA-Z0-9]{36}/g, name: "github-oauth-token", message: "GitHub OAuth token (gho_*)", }, { pattern: /github_pat_[a-zA-Z_]{82}/g, name: "github-fine-grained-pat", message: "GitHub fine-grained PAT (github_pat_*)", }, { pattern: /AKIA[0-9A-Z]{16}/g, name: "aws-access-key", message: "AWS access key ID (AKIA*)", }, { pattern: /xox[bp]-[a-zA-Z0-9]{10,}/g, name: "slack-token", message: "Slack token (xoxb-*/xoxp-*)", }, { pattern: /-----BEGIN\s+(RSA\s+)?PRIVATE KEY-----/g, name: "private-key", message: "Private key material detected", }, // Medium-confidence: quoted credentials { pattern: /password\s*[:=]\s*["'][^"']{4,}["']/gi, name: "hardcoded-password", message: "Possible hardcoded password", }, { pattern: /\b(secret|api_?key|token|access_?key)\b\s*[:=]\s*["']([a-zA-Z0-9_./-]{8,})["']/gi, name: "hardcoded-secret", message: "Possible hardcoded secret or API key", }, // .env format: KEY=VALUE (no quotes) { pattern: /^(?:API_?KEY|SECRET|TOKEN|PASSWORD|AWS_?ACCESS_?KEY)\s*=\s*\S{8,}/gim, name: "env-file-secret", message: "Possible secret in .env format", }, ]; export interface SecretFinding { line: number; message: string; } /** * Scan content for potential secrets * Returns findings with line numbers. * Skips test files to avoid false positives. */ export function scanForSecrets( content: string, filePath?: string, ): SecretFinding[] { // Skip test files — secrets in tests are usually fake/test values if (filePath && isTestFile(filePath)) { return []; } const findings: SecretFinding[] = []; const lines = content.split("\n"); for (let i = 0; i < lines.length; i++) { const line = lines[i]; for (const pattern of SECRET_PATTERNS) { // Reset lastIndex before each test (important for global regex) const regex = new RegExp(pattern.pattern.source, pattern.pattern.flags); const match = regex.exec(line); if (match) { if (shouldIgnorePatternMatch(line, pattern.name)) { continue; } // For hardcoded-secret pattern, check if the value looks like an env var name // This prevents false positives like: api_key = "FIREWORKS_API_KEY" if (pattern.name === "hardcoded-secret") { const value = extractQuotedValue(line); if (value && looksLikeEnvVarName(value)) { continue; // Skip - just referencing env var name, not a secret } } findings.push({ line: i + 1, message: pattern.message, }); break; // One finding per line } } } return findings; } /** * Format secrets findings for terminal output */ export function formatSecrets( findings: SecretFinding[], filePath: string, ): string { if (findings.length === 0) return ""; const lines = [ `🔴 STOP — ${findings.length} potential secret(s) in ${filePath}:`, ]; for (const f of findings.slice(0, 5)) { lines.push(` L${f.line}: ${f.message}`); } if (findings.length > 5) { lines.push(` ... and ${findings.length - 5} more`); } lines.push(" → Remove before continuing. Use env vars instead."); return lines.join("\n"); }