import fs from "node:fs";
import path from "node:path";
import chalk from "chalk";
import * as cheerio from "cheerio";
import * as Sentry from "@sentry/node";
import {
MacroInvocationError,
MacroLiveSampleError,
MacroRedirectedLinkError,
} from "../kumascript/src/errors.js";
import { Doc } from "../libs/types/document.js";
import { Document, execGit, slugToFolder } from "../content/index.js";
import { CONTENT_ROOT, REPOSITORY_URLS } from "../libs/env/index.js";
import * as kumascript from "../kumascript/index.js";
import { DEFAULT_LOCALE, FLAW_LEVELS } from "../libs/constants/index.js";
import { extractSections } from "./extract-sections.js";
import { extractSidebar } from "./extract-sidebar.js";
import { extractSummary } from "./extract-summary.js";
import { addBreadcrumbData } from "./document-utils.js";
import {
fixFixableFlaws,
injectFlaws,
injectSectionFlaws,
} from "./flaws/index.js";
import { checkImageReferences, checkImageWidths } from "./check-images.js";
import { getPageTitle } from "./page-title.js";
import { wrapCodeExamples } from "./code-headers.js";
import { formatNotecards } from "./format-notecards.js";
import buildOptions from "./build-options.js";
import LANGUAGES_RAW from "../libs/languages/index.js";
import { safeDecodeURIComponent } from "../kumascript/src/api/util.js";
import { wrapTables } from "./wrap-tables.js";
import {
getAdjacentFileAttachments,
injectLoadingLazyAttributes,
injectNoTranslate,
makeTOC,
postLocalFileLinks,
postProcessExternalLinks,
postProcessSmallerHeadingIDs,
} from "./utils.js";
import { addBaseline } from "./web-features.js";
export { default as SearchIndex } from "./search-index.js";
export { gather as gatherGitHistory } from "./git-history.js";
export { buildSPAs } from "./spas.js";
const LANGUAGES = new Map(
Object.entries(LANGUAGES_RAW).map(([locale, data]) => {
return [locale.toLowerCase(), data];
})
);
const DEFAULT_BRANCH_NAME = "main"; // That's what we use for github.com/mdn/content
// Module-level cache
const rootToGitBranchMap = new Map();
function getCurrentGitBranch(root: string) {
if (!rootToGitBranchMap.has(root)) {
// If this is running in a GitHub Action "PR Build" workflow the current
// branch name will be set in `GITHUB_REF_NAME_SLUG`.
let name = DEFAULT_BRANCH_NAME;
// Only bother getting fancy if the root is CONTENT_ROOT.
// For other possible roots, just leave it to the default.
if (root === CONTENT_ROOT) {
if (
process.env.GITHUB_REF &&
process.env.GITHUB_REPOSITORY !== "mdn/yari"
) {
name = process.env.GITHUB_REF.split("/").slice(2).join("/");
} else {
// Most probably, you're hacking on the content, using Yari to preview,
// in a topic branch. Then figure this out using a child-process.
// Note, if you're in detached head, (e.g. "d6a6c3f17") instead of a named
// branch, this will fail. But that's why we rely on a default.
try {
const output = execGit(["branch", "--show-current"], {
cwd: root,
});
if (output) {
name = output;
}
} catch (e) {
/* allowed to fail for non git content root */
}
}
}
rootToGitBranchMap.set(root, name);
}
return rootToGitBranchMap.get(root);
}
/** Throw an error if the slug is insane.
* This helps breaking the build if someone has put in faulty data into
* the content (metadata file).
* If all is well, do nothing. Nothing is expected to return.
*/
function validateSlug(slug: string) {
if (!slug) {
throw new Error("slug is empty");
}
if (slug.startsWith("/")) {
throw new Error(`Slug '${slug}' starts with a /`);
}
if (slug.endsWith("/")) {
throw new Error(`Slug '${slug}' ends with a /`);
}
if (slug.includes("//")) {
throw new Error(`Slug '${slug}' contains a double /`);
}
}
/**
* Find all `
` and turn them into `
`
* and keep in mind that if it was already been manually fixed so, you
* won't end up with `
`.
*
* @param {Cheerio document instance} $
*/
function injectNotecardOnWarnings($: cheerio.CheerioAPI) {
$("div.warning, div.note, div.blockIndicator")
.addClass("notecard")
.removeClass("blockIndicator");
}
/**
* Return the full URL directly to the file in GitHub based on this folder.
* @param {String} folder - the current folder we're processing.
*/
function getGitHubURL(root: string, folder: string, filename: string) {
const baseURL = `https://github.com/${REPOSITORY_URLS[root]}`;
return `${baseURL}/blob/${getCurrentGitBranch(
root
)}/files/${folder}/${filename}`;
}
/**
* Return the full URL directly to the last commit affecting this file on GitHub.
* @param {String} hash - the full hash to point to.
*/
export function getLastCommitURL(root: string, hash: string) {
const baseURL = `https://github.com/${REPOSITORY_URLS[root]}`;
return `${baseURL}/commit/${hash}`;
}
function injectSource(doc, document, metadata) {
const folder = document.fileInfo.folder;
const root = document.fileInfo.root;
const filename = path.basename(document.fileInfo.path);
doc.source = {
folder,
github_url: getGitHubURL(root, folder, filename),
last_commit_url: getLastCommitURL(root, metadata.hash),
filename,
};
}
export interface BuiltDocument {
doc: Doc;
liveSamples: any;
fileAttachmentMap: Map
;
source?: {
github_url: string;
};
plainHTML?: string;
}
interface DocumentOptions {
fixFlaws?: boolean;
fixFlawsDryRun?: boolean;
fixFlawsTypes?: Iterable;
fixFlawsVerbose?: boolean;
plainHTML?: boolean;
}
export async function buildDocument(
document,
documentOptions: DocumentOptions = {}
): Promise {
Sentry.setContext("doc", {
path: document?.fileInfo?.path,
title: document?.metadata?.title,
url: document?.url,
});
Sentry.setTags({
doc_slug: document?.metadata?.slug,
doc_locale: document?.metadata?.locale,
});
// Important that the "local" document options comes last.
// And use Object.assign to create a new object instead of mutating the
// global one.
const options = {
...buildOptions,
...documentOptions,
};
const { metadata, fileInfo } = document;
const expectedFolderPath = Document.urlToFolderPath(document.url);
if (expectedFolderPath !== document.fileInfo.folder) {
throw new Error(
`The document's slug (${metadata.slug}) doesn't match its disk folder name (${document.fileInfo.folder}): expected path (${expectedFolderPath})`
);
}
const doc = {
isMarkdown: document.isMarkdown,
isTranslated: document.isTranslated,
isActive: document.isActive,
flaws: {},
} as Partial;
interface LiveSample {
id: string;
html: string;
slug?: string;
}
let flaws: any[] = [];
let $: cheerio.CheerioAPI = null;
const liveSamples: LiveSample[] = [];
// this will get populated with the parent's frontmatter by kumascript if the document is localized:
let allMetadata = metadata;
try {
let kumascriptMetadata;
[$, flaws, kumascriptMetadata] = await kumascript.render(document.url);
allMetadata = { ...allMetadata, ...kumascriptMetadata };
} catch (error) {
if (
error instanceof MacroInvocationError &&
error.name === "MacroInvocationError"
) {
// The source HTML couldn't even be parsed! There's no point allowing
// anything else move on.
// But considering that this might just be one of many documents you're
// building, let's at least help by setting a more user-friendly error
// message.
error.updateFileInfo(document.fileInfo);
throw new Error(
`MacroInvocationError trying to parse file.\n\nFile: ${error.filepath}\nMessage: ${error.error.message}\n\n${error.sourceContext}`
);
}
// Any other unexpected error re-thrown.
throw error;
}
const liveSamplePages = await kumascript.buildLiveSamplePages(
document.url,
document.metadata.title,
$,
document.rawBody
);
for (const liveSamplePage of liveSamplePages) {
const { id, flaw, slug } = liveSamplePage;
let { html } = liveSamplePage;
if (flaw) {
flaw.updateFileInfo(fileInfo);
if (flaw.name === "MacroLiveSampleError") {
// As of April 2021 there are 0 pages in mdn/content that trigger
// a MacroLiveSampleError. So we can be a lot more strict with en-US
// until the translated-content has had a chance to clean up all
// their live sample errors.
// See https://github.com/mdn/yari/issues/2489
if (document.metadata.locale === "en-US") {
throw new Error(
`MacroLiveSampleError within ${flaw.filepath}, line ${flaw.line} column ${flaw.column} (${flaw.error.message})`
);
} else {
console.warn(
`MacroLiveSampleError within ${flaw.filepath}, line ${flaw.line} column ${flaw.column} (${flaw.error.message})`
);
}
}
flaws.push(flaw);
html = `
Live sample failed!
Live sample failed!
An error occurred trying to render this live sample.
Consider filing an issue or trying your hands at a fix of your own.
Error details:
${flaw.error.toString()}
`;
}
liveSamples.push({ id: id.toLowerCase(), html, slug });
}
if (flaws.length) {
if (options.flawLevels.get("macros") === FLAW_LEVELS.ERROR) {
// Report and exit immediately on the first document with flaws.
console.error(
chalk.red.bold(
`Flaws (${flaws.length}) within ${document.metadata.slug} while rendering macros:`
)
);
flaws.forEach((flaw, i) => {
console.error(chalk.bold.red(`${i + 1}: ${flaw.name}`));
console.error(chalk.red(`${flaw}\n`));
});
// // XXX This is probably the wrong way to bubble up.
// process.exit(1);
throw new Error("Flaw error encountered");
} else if (options.flawLevels.get("macros") === FLAW_LEVELS.WARN) {
// doc.flaws.macros = flaws;
// The 'flaws' array don't have everything we need from the
// kumascript rendering, so we "beef it up" to have convenient
// attributes needed.
doc.flaws = doc.flaws ?? {};
doc.flaws.macros = flaws.map((flaw: any, i) => {
let fixable = false;
let suggestion: string | null = null;
if (flaw.name === "MacroDeprecatedError") {
fixable = true;
suggestion = "";
} else if (
flaw.name === "MacroRedirectedLinkError" &&
(!(flaw as MacroRedirectedLinkError).filepath ||
(flaw as MacroRedirectedLinkError).filepath ===
document.fileInfo.path)
) {
fixable = true;
suggestion = (flaw as MacroRedirectedLinkError).macroSource.replace(
(flaw as MacroRedirectedLinkError).redirectInfo.current,
(flaw as MacroRedirectedLinkError).redirectInfo.suggested
);
}
const id = `macro${i}`;
const explanation = flaw.error.message;
return Object.assign({ id, fixable, suggestion, explanation }, flaw);
});
}
}
// TODO: The slug should always match the folder name.
// If you edit the slug bug don't correctly edit the folder it's in
// it's going to lead to confusion.
// We can use the utils.slugToFolder() function and compare
// its output with the `folder`.
validateSlug(metadata.slug);
// EmbedLiveSamples carry their token information to enrich flaw error
// messages, these should not be in the final output
$("[data-token]").removeAttr("data-token");
// Kumascript rendering can't know about FLAW_LEVELS when it's building,
// because injecting it there would cause a circular dependency.
// So, let's post-process the rendered HTML now afterwards.
// If the flaw levels for `macros` was to ignore, we can delete all the
// injected `data-flaw-src="..."` attributes.
if (options.flawLevels.get("macros") === FLAW_LEVELS.IGNORE) {
// This helps the final production built HTML since there `data-flaw-src`
// attributes on the HTML is useless.
$("[data-flaw-src]").removeAttr("data-flaw-src");
}
doc.title = metadata.title || "";
doc.mdn_url = document.url;
doc.locale = metadata.locale;
doc.native = LANGUAGES.get(doc.locale.toLowerCase())?.native;
// metadata doesn't have a browser-compat key on translated docs:
const browserCompat = allMetadata["browser-compat"];
doc.browserCompat =
browserCompat &&
(Array.isArray(browserCompat) ? browserCompat : [browserCompat]);
doc.baseline = addBaseline(doc);
// If the document contains