/**
 * Content-level secrets scanner
 *
 * Scans file content for potential secret patterns before write.
 * Works on all file types via regex matching.
 *
 * Detected patterns:
 * - Stripe/OpenAI keys (sk-*)
 * - GitHub tokens (ghp_*, gho_*, github_pat_*)
 * - AWS keys (AKIA*)
 * - Slack tokens (xoxp-*, xoxb-*)
 * - Private keys (BEGIN PRIVATE KEY)
 * - Generic API key/password patterns
 */

import { isTestFile } from "./file-utils.js";

interface SecretPattern {
	pattern: RegExp;
	name: string;
	message: string;
}

const SAFE_HEADER_KEYS = new Set([
	"user-agent",
	"accept",
	"accept-language",
	"accept-encoding",
	"content-type",
	"content-length",
	"origin",
	"referer",
	"host",
	"connection",
	"cache-control",
	"pragma",
	"x-requested-with",
]);

function extractHeaderKey(line: string): string | null {
	const m = line.match(/["']([A-Za-z][A-Za-z0-9-]{0,63})["']\s*:\s*["'][^"']+/);
	if (!m) return null;
	return m[1].toLowerCase();
}

function shouldIgnorePatternMatch(line: string, patternName: string): boolean {
	// Ignore obvious non-secret HTTP header literals such as "User-Agent".
	if (
		patternName === "hardcoded-secret" ||
		patternName === "hardcoded-password"
	) {
		const key = extractHeaderKey(line);
		if (key && SAFE_HEADER_KEYS.has(key)) {
			return true;
		}
	}

	return false;
}

/**
 * Check if a string value looks like an environment variable name.
 * Env var names typically use UPPERCASE_SNAKE_CASE.
 * Used to filter false positives like: api_key = "FIREWORKS_API_KEY"
 * where the value is just referencing the env var name, not a secret.
 */
function looksLikeEnvVarName(value: string): boolean {
	// Must be all uppercase with underscores (no lowercase letters)
	// Must start with a letter and contain at least one underscore
	return /^[A-Z][A-Z0-9_]*$/.test(value) && value.includes("_");
}

/**
 * Extract the quoted value from a hardcoded secret pattern match.
 * Returns null if no quoted value found.
 */
function extractQuotedValue(line: string): string | null {
	// Match content inside quotes after : or =
	const match = line.match(/[:=]\s*["']([^"']+)["']/);
	return match ? match[1] : null;
}

// Patterns ordered by specificity - first match wins per line
const SECRET_PATTERNS: SecretPattern[] = [
	// High-confidence: specific key prefixes
	{
		pattern: /sk-[a-zA-Z0-9-]{20,}/g,
		name: "stripe-openai-key",
		message: "Possible Stripe or OpenAI API key (sk-*)",
	},
	{
		pattern: /ghp_[a-zA-Z0-9]{36}/g,
		name: "github-personal-token",
		message: "GitHub personal access token (ghp_*)",
	},
	{
		pattern: /gho_[a-zA-Z0-9]{36}/g,
		name: "github-oauth-token",
		message: "GitHub OAuth token (gho_*)",
	},
	{
		pattern: /github_pat_[a-zA-Z_]{82}/g,
		name: "github-fine-grained-pat",
		message: "GitHub fine-grained PAT (github_pat_*)",
	},
	{
		pattern: /AKIA[0-9A-Z]{16}/g,
		name: "aws-access-key",
		message: "AWS access key ID (AKIA*)",
	},
	{
		pattern: /xox[bp]-[a-zA-Z0-9]{10,}/g,
		name: "slack-token",
		message: "Slack token (xoxb-*/xoxp-*)",
	},
	{
		pattern: /-----BEGIN\s+(RSA\s+)?PRIVATE KEY-----/g,
		name: "private-key",
		message: "Private key material detected",
	},
	// Medium-confidence: quoted credentials
	{
		pattern: /password\s*[:=]\s*["'][^"']{4,}["']/gi,
		name: "hardcoded-password",
		message: "Possible hardcoded password",
	},
	{
		pattern:
			/\b(secret|api_?key|token|access_?key)\b\s*[:=]\s*["']([a-zA-Z0-9_./-]{8,})["']/gi,
		name: "hardcoded-secret",
		message: "Possible hardcoded secret or API key",
	},
	// .env format: KEY=VALUE (no quotes)
	{
		pattern:
			/^(?:API_?KEY|SECRET|TOKEN|PASSWORD|AWS_?ACCESS_?KEY)\s*=\s*\S{8,}/gim,
		name: "env-file-secret",
		message: "Possible secret in .env format",
	},
];

export interface SecretFinding {
	line: number;
	message: string;
}

/**
 * Scan content for potential secrets
 * Returns findings with line numbers.
 * Skips test files to avoid false positives.
 */
export function scanForSecrets(
	content: string,
	filePath?: string,
): SecretFinding[] {
	// Skip test files — secrets in tests are usually fake/test values
	if (filePath && isTestFile(filePath)) {
		return [];
	}

	const findings: SecretFinding[] = [];
	const lines = content.split("\n");

	for (let i = 0; i < lines.length; i++) {
		const line = lines[i];
		for (const pattern of SECRET_PATTERNS) {
			// Reset lastIndex before each test (important for global regex)
			const regex = new RegExp(pattern.pattern.source, pattern.pattern.flags);
			const match = regex.exec(line);
			if (match) {
				if (shouldIgnorePatternMatch(line, pattern.name)) {
					continue;
				}
				// For hardcoded-secret pattern, check if the value looks like an env var name
				// This prevents false positives like: api_key = "FIREWORKS_API_KEY"
				if (pattern.name === "hardcoded-secret") {
					const value = extractQuotedValue(line);
					if (value && looksLikeEnvVarName(value)) {
						continue; // Skip - just referencing env var name, not a secret
					}
				}
				findings.push({
					line: i + 1,
					message: pattern.message,
				});
				break; // One finding per line
			}
		}
	}

	return findings;
}

/**
 * Format secrets findings for terminal output
 */
export function formatSecrets(
	findings: SecretFinding[],
	filePath: string,
): string {
	if (findings.length === 0) return "";

	const lines = [
		`🔴 STOP — ${findings.length} potential secret(s) in ${filePath}:`,
	];
	for (const f of findings.slice(0, 5)) {
		lines.push(`  L${f.line}: ${f.message}`);
	}
	if (findings.length > 5) {
		lines.push(`  ... and ${findings.length - 5} more`);
	}
	lines.push("  → Remove before continuing. Use env vars instead.");
	return lines.join("\n");
}