import * as fs from 'fs'; import { resolveProjectPath } from '../runtime'; export interface Scene { sceneNumber: number; duration: number; visualDescription: string; voiceoverText: string; searchKeywords: string[]; localAsset?: string; } export interface ParsedScript { scenes: Scene[]; totalDuration: number; videoStyle: 'professional' | 'casual' | 'energetic'; } // Common stop words to filter out for better keyword extraction const STOP_WORDS = new Set([ 'about', 'after', 'again', 'also', 'back', 'been', 'before', 'being', 'could', 'each', 'every', 'first', 'from', 'give', 'have', 'here', 'into', 'just', 'know', 'like', 'look', 'make', 'many', 'more', 'most', 'much', 'need', 'only', 'other', 'over', 'same', 'should', 'some', 'such', 'take', 'than', 'that', 'their', 'them', 'then', 'there', 'these', 'they', 'thing', 'this', 'those', 'through', 'time', 'very', 'want', 'well', 'what', 'when', 'where', 'which', 'while', 'will', 'with', 'work', 'would', 'year', 'your', 'free', 'today', 'already', 'towards', 'specifically', 'question' ]); /** * Parse a script into scenes using local parsing * Simple, fast, no API calls required */ export async function parseScript(script: string): Promise { // console.log('\nšŸ“ [PARSER] Starting script parsing...'); // console.log(`šŸ“ [PARSER] Script length: ${script.length} characters`); // console.log(`šŸ“ [PARSER] Script preview: "${script.substring(0, 100)}..."`); const startTime = Date.now(); const result = parseScriptLocally(script); const elapsed = Date.now() - startTime; // console.log(`šŸ“ [PARSER] Parsing completed in ${elapsed}ms`); return result; } /** * Simple local parser that doesn't need AI * Breaks text by newlines/periods and extracts keywords */ function parseScriptLocally(script: string): ParsedScript { // console.log('šŸ“ [PARSER] Splitting script by sentences...'); // Split by newlines first, then by periods ONLY if followed by space or end // This prevents splitting URLs like "example.com" const rawLines: string[] = []; // First split by double newlines (paragraphs) const paragraphs = script.split(/\n\s*\n/); for (const para of paragraphs) { // Split by single newlines const lines = para.split('\n'); for (const line of lines) { // Split by periods/questions/exclamations followed by space or end (sentence boundaries) // But NOT punctuation in the middle of words (URLs, abbreviations) const sentences = line.split(/(?<=[.?!])\s+/); for (const sentence of sentences) { const trimmed = sentence.trim(); if (trimmed.length > 0) { rawLines.push(trimmed); } } } } // console.log(`šŸ“ [PARSER] Raw sentences found: ${rawLines.length}`); const lines = rawLines.filter(s => s.length > 5); // Ignore very short fragments // console.log(`šŸ“ [PARSER] Valid lines after filtering (>10 chars): ${lines.length}`); // console.log('šŸ“ [PARSER] Processing each line into scenes...\n'); const scenes: Scene[] = []; let pendingVisualCue = ''; for (const line of lines) { // console.log(` šŸ“ [SCENE ${scenes.length + 1}] Processing: "${line.substring(0, 50)}..."`); const inlineVisualMatch = line.match(/\[Visual:?\s*(.*?)\]/i); let visualCue = inlineVisualMatch?.[1]?.trim() || ''; let cleanText = line.replace(/\[Visual:?\s*.*?\]/gi, '').trim(); if (!visualCue && pendingVisualCue) { visualCue = pendingVisualCue; pendingVisualCue = ''; } // Support the common Claude-style format where a visual tag is on its own line // and the narration appears on the following line. if (!cleanText) { if (visualCue) { pendingVisualCue = visualCue; } continue; } // Better keyword extraction with stop words filter const allWords = cleanText .toLowerCase() .replace(/[.,?!#+'%]/g, '') .split(/\s+/); const filteredWords = allWords.filter(w => w.length > 3 && !STOP_WORDS.has(w)); // Strategy: Use Visual Cue if present, otherwise use keywords let keywords: string[] = []; let visualDescription = ''; let localAsset: string | undefined = undefined; if (visualCue) { keywords = visualCue.toLowerCase().split(/\s+/).filter(Boolean); visualDescription = `Visual for: ${visualCue}`; if (fs.existsSync(resolveProjectPath('input', 'input-assests', visualCue))) { localAsset = visualCue; } } else { keywords = filteredWords.slice(0, 4); if (keywords.length === 0) { keywords.push('business', 'professional'); } visualDescription = `Visual for: ${keywords.join(' ')}`; } const duration = Math.max(3, Math.ceil(cleanText.length / 15)); // console.log(` šŸ“ [SCENE ${scenes.length + 1}] Keywords: [${keywords.join(', ')}]`); // console.log(` šŸ“ [SCENE ${scenes.length + 1}] Duration: ${duration}s (based on ${cleanText.length} chars)`); // console.log(''); scenes.push({ sceneNumber: scenes.length + 1, duration, visualDescription, voiceoverText: cleanText, searchKeywords: keywords, localAsset }); } const totalDuration = scenes.reduce((acc, s) => acc + s.duration, 0); // console.log('šŸ“ [PARSER] ═══════════════════════════════════════'); // console.log(`šŸ“ [PARSER] āœ… Parsing Summary:`); // console.log(`šŸ“ [PARSER] Total scenes: ${scenes.length}`); // console.log(`šŸ“ [PARSER] Total duration: ${totalDuration}s`); // console.log(`šŸ“ [PARSER] Avg scene duration: ${(totalDuration / scenes.length).toFixed(1)}s`); // console.log('šŸ“ [PARSER] ═══════════════════════════════════════\n'); return { scenes, totalDuration, videoStyle: 'professional' }; } /** * Validate that a script has the minimum required content */ export function validateScript(script: string): void { // console.log('\nšŸ“‹ [VALIDATOR] Starting script validation...'); // console.log(`šŸ“‹ [VALIDATOR] Input type: ${typeof script}`); // console.log(`šŸ“‹ [VALIDATOR] Input length: ${script?.length || 0} characters`); if (!script || script.trim().length === 0) { // console.error('šŸ“‹ [VALIDATOR] āŒ FAILED: Script is empty'); throw new Error('Script cannot be empty'); } // console.log('šŸ“‹ [VALIDATOR] āœ“ Script is not empty'); const trimmedLength = script.trim().length; // console.log(`šŸ“‹ [VALIDATOR] Trimmed length: ${trimmedLength} characters`); if (trimmedLength < 10) { // console.error(`šŸ“‹ [VALIDATOR] āŒ FAILED: Script too short (${trimmedLength} < 10)`); throw new Error('Script is too short (minimum 10 characters)'); } // console.log('šŸ“‹ [VALIDATOR] āœ“ Script length >= 10 characters'); if (trimmedLength > 5000) { // console.error(`šŸ“‹ [VALIDATOR] āŒ FAILED: Script too long (${trimmedLength} > 5000)`); throw new Error('Script is too long (maximum 5000 characters)'); } // console.log('šŸ“‹ [VALIDATOR] āœ“ Script length <= 5000 characters'); // console.log('šŸ“‹ [VALIDATOR] āœ… Script validation PASSED\n'); }