export class ContentCleaner {
/**
* Cleans email body by removing noise, quoted replies, and footers.
* optimized for LLM processing.
*/
static cleanEmailBody(text: string): string {
if (!text) return "";
const originalText = text;
// 1. Detect if content is actually HTML
const isHtml = /<[a-z][\s\S]*>/i.test(text);
if (isHtml) {
// Lightweight HTML -> Markdown Conversion
// Structure:
,
-> Newlines
text = text.replace(/
/gi, '\n');
text = text.replace(/<\/p>/gi, '\n\n');
text = text.replace(/