export interface ParsedBlock { type: "markdown" | "code"; content: string; language?: string; isComplete?: boolean; startIndex: number; endIndex: number; } export function parseMarkdownAndCode(streamedText: string): ParsedBlock[] { const blocks: ParsedBlock[] = []; // Flags and accumulators let inCodeBlock = false; let currentContent = ""; let currentBlockStart = 0; // absolute index where this block began let codeLanguage = ""; // language for fenced code blocks // Split the input by lines, but we also need to track the absolute position // of each line in the original streamedText. // // We'll do a manual split so we can record exactly where each line starts. const lines = []; let lastLineEnd = 0; while (true) { const nextNewline = streamedText.indexOf("\n", lastLineEnd); if (nextNewline === -1) { // no more newlines -> remainder is the last line lines.push({ text: streamedText.slice(lastLineEnd), startPos: lastLineEnd, // store absolute start in original }); break; } else { // line without the trailing newline lines.push({ text: streamedText.slice(lastLineEnd, nextNewline), startPos: lastLineEnd, }); lastLineEnd = nextNewline + 1; // move past the newline } } /** * Helper to finalize a markdown block (paragraph or partial line-based content). */ function finalizeMarkdownBlock(endPos: number) { const trimmed = currentContent.trimEnd(); if (trimmed.length > 0) { blocks.push({ type: "markdown", content: trimmed, startIndex: currentBlockStart, endIndex: endPos, isComplete: true, // we've explicitly ended on a boundary }); } currentContent = ""; } for (let i = 0; i < lines.length; i++) { const { text: line, startPos: lineStartPos } = lines[i]; const trimmedLine = line.trim(); const lineEndPos = lineStartPos + line.length; // ----------------------------------- // If currently inside a code block: // ----------------------------------- if (inCodeBlock) { // Check if line is a closing fence (triple backticks on a line by themselves, // possibly with leading/trailing spaces). if (/^``` *$/.test(trimmedLine)) { // We found the closing fence blocks.push({ type: "code", content: currentContent.split('\n').map(line => line.trim()).join('\n'), // trim each line startIndex: currentBlockStart, endIndex: lineEndPos + 3, language: codeLanguage, isComplete: true, }); // Reset inCodeBlock = false; currentContent = ""; codeLanguage = ""; currentBlockStart = lineEndPos + 1; } else { // Still inside code block -> accumulate trimmed line currentContent += trimmedLine + "\n"; } } else { // ----------------------------------- // We are in "markdown" mode // ----------------------------------- // 1) Detect opening code fence // We'll allow leading spaces before backticks, e.g. " ```lang" // Per CommonMark, up to 3 leading spaces is allowed, but let's be lenient. const codeFenceMatch = line.match(/^(\s*)(```+)(.*)$/); if (codeFenceMatch) { // We found an opening code fence. // 1) finalize any existing markdown block if (currentContent) { finalizeMarkdownBlock(lineStartPos); } // 2) parse the language from whatever comes after the fence // e.g., "```javascript" // - codeFenceMatch[3] is "javascript" possibly with spaces codeLanguage = codeFenceMatch[3].trim(); // 3) switch to code block mode inCodeBlock = true; currentBlockStart = lineStartPos; // fence line is the start currentContent = ""; continue; } // 2) Detect ATX heading: lines starting with 1-6 '#' plus at least one space or EOL // e.g., "# Title", "## Something", etc. // We won't parse inline markup for headings, just treat them as a single block line if (/^(#{1,6})(\s|$)/.test(trimmedLine)) { // finalize existing paragraph block if any if (currentContent) { finalizeMarkdownBlock(lineStartPos); } // push heading as its own block blocks.push({ type: "markdown", content: line, // the entire heading line startIndex: lineStartPos, endIndex: lineEndPos, isComplete: true, }); // next block starts after this line currentBlockStart = lineEndPos + 1; continue; } // 3) Detect blank line => finalize current paragraph block if (trimmedLine === "") { if (currentContent) { finalizeMarkdownBlock(lineStartPos); } // skip creating a block for the blank line itself currentBlockStart = lineEndPos + 1; continue; } // 4) Otherwise accumulate line in the current markdown block currentContent += line + "\n"; } } // ----------------------------------- // End of all lines // ----------------------------------- if (currentContent) { if (inCodeBlock) { // We never saw a closing fence => partial code block blocks.push({ type: "code", content: currentContent.split('\n').map(line => line.trim()).join('\n'), // trim each line startIndex: currentBlockStart, endIndex: streamedText.length, language: codeLanguage, isComplete: false, }); } else { // leftover markdown text => finalize paragraph blocks.push({ type: "markdown", content: currentContent.trimEnd(), startIndex: currentBlockStart, endIndex: streamedText.length, isComplete: true, }); } } return blocks; }