/** * Extract the FIRST previewable http(s) URL from a message's plain text. * * Telegram/WhatsApp rule: at most one preview per message — we surface the * first link only. Returns `null` when there's nothing to unfurl. * * Deliberately conservative: only absolute http/https URLs (no bare * `example.com`, no mailto/ftp), trailing punctuation trimmed, and a length * cap to avoid pathological inputs. */ // http(s) URL up to the first whitespace or closing bracket/quote. const URL_RE = /\bhttps?:\/\/[^\s<>"'`)\]}]+/i; /** Trailing punctuation that's almost always sentence/markup, not the URL. */ const TRAILING = /[.,;:!?)\]}'"»]+$/; export function extractFirstUrl(text: string | undefined | null): string | null { if (!text) return null; // Skip fenced/inline code-heavy content cheaply: still match, but the cap // below guards huge blobs. if (text.length > 50_000) return null; const match = URL_RE.exec(text); if (!match) return null; let url = match[0].replace(TRAILING, ''); // Balance a trailing ")" that belongs to a markdown link `](url)` wrapper: // if the URL has more ")" than "(", drop the extras. const opens = (url.match(/\(/g) ?? []).length; const closes = (url.match(/\)/g) ?? []).length; if (closes > opens) { url = url.replace(/\)+$/, (m) => m.slice(0, Math.max(0, m.length - (closes - opens)))); } // Validate — a malformed match resolves to no preview. try { const parsed = new URL(url); if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') return null; return parsed.toString(); } catch { return null; } }