"title"` (brackets are stripped)
* And three title-quote forms: `"..."`, `'...'`, `(...)`.
*/
function parseDestinationAndTitle(raw: string): {
url: string;
title?: string;
} {
raw = raw.trim();
let url: string;
let rest: string;
if (raw.startsWith("<")) {
const close = raw.indexOf(">");
if (close === -1) {
// Unmatched `<` — treat the whole thing as the URL minus the `<`.
url = raw.substring(1);
rest = "";
} else {
url = raw.substring(1, close);
rest = raw.substring(close + 1).trim();
}
} else {
// Split at first unescaped whitespace.
let split = raw.length;
for (let i = 0; i < raw.length; i++) {
if (raw[i] === "\\" && i + 1 < raw.length) {
i++;
continue;
}
if (raw[i] === " " || raw[i] === "\t" || raw[i] === "\n") {
split = i;
break;
}
}
url = raw.substring(0, split);
rest = raw.substring(split).trim();
}
let title: string | undefined;
if (rest.length > 0) {
const titleMatch = rest.match(/^"([^"]*)"$|^'([^']*)'$|^\(([^)]*)\)$/);
if (titleMatch) {
title = titleMatch[1] ?? titleMatch[2] ?? titleMatch[3];
}
}
return { url, title };
}
function parseDelimited(
text: string,
start: number,
delimiter: string,
openTag: string,
closeTag: string
): { html: string; end: number } | null {
const len = delimiter.length;
const afterOpen = start + len;
if (afterOpen >= text.length) {return null;}
// Opening delimiter must not be followed by whitespace
if (text[afterOpen] === " " || text[afterOpen] === "\t") {return null;}
// Find closing delimiter
let j = afterOpen;
while (j < text.length) {
// Skip escaped characters
if (text[j] === "\\" && j + 1 < text.length) {
j += 2;
continue;
}
if (text.substring(j, j + len) === delimiter) {
// Closing delimiter must not be preceded by whitespace
if (text[j - 1] === " " || text[j - 1] === "\t") {
j++;
continue;
}
// For single-char delimiters, don't accept closer if it's part of a
// multi-char run (e.g., don't treat the * in ** as italic closer)
if (
len === 1 &&
((j > 0 && text[j - 1] === delimiter[0] && !(j >= 2 && text[j - 2] === "\\")) ||
(j + len < text.length && text[j + len] === delimiter[0]))
) {
j++;
continue;
}
const inner = text.substring(afterOpen, j);
if (inner.length === 0) {
j++;
continue;
}
return {
html: openTag + parseInline(inner) + closeTag,
end: j + len,
};
}
j++;
}
return null;
}
// ─── Block-Level Types ───────────────────────────────────────────────────────
interface BlockToken {
type: string;
}
interface HeadingToken extends BlockToken {
type: "heading";
level: number;
content: string;
}
interface ParagraphToken extends BlockToken {
type: "paragraph";
content: string;
}
interface CodeBlockToken extends BlockToken {
type: "codeBlock";
language: string;
code: string;
}
interface BlockquoteToken extends BlockToken {
type: "blockquote";
content: string;
}
interface HorizontalRuleToken extends BlockToken {
type: "hr";
}
interface ListItemToken extends BlockToken {
type: "listItem";
listType: "bullet" | "ordered" | "task";
indent: number;
content: string;
start?: number; // for ordered lists
checked?: boolean; // for task lists
childContent?: string; // recursively parsed content within this item
}
interface TableToken extends BlockToken {
type: "table";
headers: string[];
rows: string[][];
alignments: ("left" | "center" | "right" | null)[];
}
interface RawHtmlToken extends BlockToken {
type: "rawHtml";
content: string;
}
type Token =
| HeadingToken
| ParagraphToken
| CodeBlockToken
| BlockquoteToken
| HorizontalRuleToken
| ListItemToken
| TableToken
| RawHtmlToken;
/**
* HTML block-level tag names (from the CommonMark type-6 list, plus `audio`
* which BlockNote serializes as raw HTML since markdown has no shorthand
* for it). When a line starts with `<` followed by one of these tag names,
* the run of non-blank lines is emitted verbatim as raw HTML rather than
* wrapped in a paragraph.
*/
const HTML_BLOCK_TAGS = new Set([
"address", "article", "aside", "audio", "base", "basefont", "blockquote",
"body", "caption", "center", "col", "colgroup", "dd", "details", "dialog",
"dir", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer",
"form", "frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head",
"header", "hr", "html", "iframe", "legend", "li", "link", "main", "menu",
"menuitem", "nav", "noframes", "ol", "optgroup", "option", "p", "param",
"section", "source", "summary", "table", "tbody", "td", "tfoot", "th",
"thead", "title", "tr", "track", "ul",
]);
function isHtmlBlockStart(line: string): boolean {
// `, `...?>`, ``, or ``.
// Lines are emitted verbatim until the next blank line.
if (isHtmlBlockStart(line)) {
const htmlLines: string[] = [];
while (i < lines.length && lines[i].trim() !== "") {
htmlLines.push(lines[i]);
i++;
}
tokens.push({
type: "rawHtml",
content: htmlLines.join("\n"),
});
prevLineWasBlank = false;
continue;
}
// Paragraph (default)
const paraLines: string[] = [line];
i++;
while (i < lines.length) {
const nextLine = lines[i];
// Stop paragraph on blank line
if (nextLine.trim() === "") {break;}
// Stop on block-level element
if (/^(#{1,6})\s/.test(nextLine)) {break;}
if (/^(`{3,}|~{3,})/.test(nextLine)) {break;}
if (/^\s{0,3}>/.test(nextLine)) {break;}
if (/^(\s{0,3})([-*_])\s*(\2\s*){2,}$/.test(nextLine)) {break;}
if (/^\s*([-*+]|\d+[.)])\s+/.test(nextLine)) {break;}
if (/^\s*\|(.+\|)+\s*$/.test(nextLine)) {break;}
if (isHtmlBlockStart(nextLine)) {break;}
// Check if next-next line is setext marker
if (
i + 1 < lines.length &&
/^[=-]+\s*$/.test(lines[i + 1]) &&
nextLine.trim().length > 0
) {
break;
}
paraLines.push(nextLine);
i++;
}
// CommonMark allows up to 3 leading spaces of indent on paragraph lines.
// Also strip trailing whitespace from the final line so a trailing
// hard-break sequence (` \n` at end of paragraph) doesn't leak as
// literal trailing spaces in the rendered output.
tokens.push({
type: "paragraph",
content: paraLines
.map((l) => l.replace(/^ {1,3}/, ""))
.join("\n")
.replace(/[ \t]+$/, ""),
});
prevLineWasBlank = false;
}
return tokens;
}
function tryParseTable(
lines: string[],
start: number
): { token: TableToken; nextLine: number } | null {
// A table needs at least a header row and a separator row
if (start + 1 >= lines.length) {return null;}
const headerLine = lines[start];
const separatorLine = lines[start + 1];
// Check separator line format: | --- | --- | or --- | --- (outer pipes optional)
// Must contain at least one pipe and only dashes, colons, pipes, and whitespace
if (
!separatorLine.includes("|") ||
!/^\s*\|?\s*:?-+:?\s*(\|\s*:?-+:?\s*)*\|?\s*$/.test(separatorLine)
) {return null;}
// Check header line has at least one pipe (required to distinguish from plain text)
if (!headerLine.includes("|")) {return null;}
const headers = parsePipeCells(headerLine);
const alignments = parseAlignments(separatorLine);
const rows: string[][] = [];
let i = start + 2;
while (i < lines.length) {
const line = lines[i];
if (!line.includes("|")) {break;}
rows.push(parsePipeCells(line));
i++;
}
return {
token: {
type: "table",
headers,
rows,
alignments,
},
nextLine: i,
};
}
function parsePipeCells(line: string): string[] {
// Trim leading/trailing pipes and split
const trimmed = line.trim();
const withoutOuterPipes = trimmed.startsWith("|")
? trimmed.substring(1)
: trimmed;
const content = withoutOuterPipes.endsWith("|")
? withoutOuterPipes.substring(0, withoutOuterPipes.length - 1)
: withoutOuterPipes;
// Split by pipes, handling escaped pipes
const cells: string[] = [];
let current = "";
for (let i = 0; i < content.length; i++) {
if (content[i] === "\\" && i + 1 < content.length && content[i + 1] === "|") {
current += "|";
i++;
} else if (content[i] === "|") {
cells.push(current.trim());
current = "";
} else {
current += content[i];
}
}
cells.push(current.trim());
return cells;
}
function parseAlignments(
separatorLine: string
): ("left" | "center" | "right" | null)[] {
const cells = parsePipeCells(separatorLine);
return cells.map((cell) => {
const trimmed = cell.trim();
const left = trimmed.startsWith(":");
const right = trimmed.endsWith(":");
if (left && right) {return "center";}
if (right) {return "right";}
if (left) {return "left";}
return null;
});
}
// ─── HTML Emitter ────────────────────────────────────────────────────────────
function tokensToHtml(tokens: Token[]): string {
let html = "";
let i = 0;
while (i < tokens.length) {
const token = tokens[i];
switch (token.type) {
case "heading": {
const t = token as HeadingToken;
html += `${parseInline(t.content)}`;
i++;
break;
}
case "paragraph": {
const t = token as ParagraphToken;
html += `${parseInline(t.content)}
`;
i++;
break;
}
case "codeBlock": {
const t = token as CodeBlockToken;
const langAttr = t.language
? ` data-language="${escapeHtml(t.language)}"`
: "";
html += `${escapeHtml(t.code)}
`;
i++;
break;
}
case "blockquote": {
const t = token as BlockquoteToken;
// Recursively parse blockquote content as markdown
const innerTokens = tokenize(t.content);
const innerHtml = tokensToHtml(innerTokens);
html += `${innerHtml}
`;
i++;
break;
}
case "hr":
html += `
`;
i++;
break;
case "listItem": {
// Collect consecutive list items and build nested list structure
const listHtml = emitListItems(tokens, i);
html += listHtml.html;
i = listHtml.nextIndex;
break;
}
case "table": {
const t = token as TableToken;
html += emitTable(t);
i++;
break;
}
case "rawHtml": {
const t = token as RawHtmlToken;
html += t.content;
i++;
break;
}
default:
i++;
}
}
return html;
}
function emitListItems(
tokens: Token[],
startIdx: number
): { html: string; nextIndex: number } {
let html = "";
let i = startIdx;
let currentListType: "bullet" | "ordered" | null = null;
while (i < tokens.length && tokens[i].type === "listItem") {
const item = tokens[i] as ListItemToken;
const effectiveType = getEffectiveListType(item.listType);
// Check if we need to switch list type
if (currentListType !== null && currentListType !== effectiveType) {
// Close current list, open new one
html += `${currentListType === "ordered" ? "ol" : "ul"}>`;
currentListType = null;
}
// Open list if needed
if (currentListType === null) {
if (effectiveType === "ordered") {
const startAttr =
item.start !== undefined && item.start !== 1
? ` start="${item.start}"`
: "";
html += ``;
} else {
html += `