import { spawnSync } from "node:child_process"; import fs from "node:fs"; import { readFile } from "node:fs/promises"; import path from "node:path"; import { cwd } from "node:process"; import * as cheerio from "cheerio"; import { fileTypeFromBuffer } from "file-type"; import imagemin from "imagemin"; import imageminPngquant from "imagemin-pngquant"; import imageminMozjpeg from "imagemin-mozjpeg"; import imageminGifsicle from "imagemin-gifsicle"; import imageminSvgo from "imagemin-svgo"; import sanitizeFilename from "sanitize-filename"; import { ANY_ATTACHMENT_REGEXP, VALID_MIME_TYPES, } from "../libs/constants/index.js"; import { FileAttachment } from "../content/index.js"; import { BLOG_ROOT } from "../libs/env/index.js"; export function escapeRegExp(str: string) { return str.replace(/[\\^$.*+?()[\]{}|]/g, "\\$&"); } export function humanFileSize(size: number) { if (size < 1024) return `${size} B`; const i = Math.floor(Math.log(size) / Math.log(1024)); const num = size / 1024 ** i; const round = Math.round(num); let str: string; if (round < 10) { str = num.toFixed(2); } else if (round < 100) { str = num.toFixed(1); } else { str = String(round); } return `${str} ${"KMGTPEZY"[i - 1]}B`; } // We have a lot of images that *should* be external, at least for the sake // of cleaning up, but aren't. E.g. `/@api/deki/files/247/=HTMLBlinkElement.gif` // These get logged as external images by the flaw detection, but to actually // be able to process them and fix the problem we need to "temporarily" // pretend they were hosted on a remote working full domain. // See https://github.com/mdn/yari/issues/1103 export function forceExternalURL(url: string) { if (url.startsWith("/")) { return `https://mdn.mozillademos.org${url}`; } return url; } export async function downloadAndResizeImage( src: string, out: string, basePath: string ) { // eslint-disable-next-line n/no-unsupported-features/node-builtins const response = await fetch(forceExternalURL(src)); const arrayBuffer = await response.arrayBuffer(); const imageBuffer = Buffer.from(arrayBuffer); let fileType = await fileTypeFromBuffer(imageBuffer); if ( !fileType && src.toLowerCase().endsWith(".svg") && response.headers["content-type"].toLowerCase().startsWith("image/svg+xml") ) { // If the SVG doesn't have the `` // and/or the ` { // This needs to match what we do in filecheck/checker.py return !dirent.isDirectory() && ANY_ATTACHMENT_REGEXP.test(dirent.name); }) .map((dirent) => path.join(documentDirectory, dirent.name)); } /** * Find all tags that we need to change to tell tools like Google Translate * to not translate. * * @param {Cheerio document instance} $ */ export function injectNoTranslate($) { $("pre").addClass("notranslate"); } /** * For every image and iframe, where appropriate add the `loading="lazy"` attribute. * * @param {Cheerio document instance} $ */ export function injectLoadingLazyAttributes($) { $("img:not([loading]), iframe:not([loading])").attr("loading", "lazy"); } /** * For every `` make it * `` * * * @param {Cheerio document instance} $ */ export function postProcessExternalLinks($) { $("a[href^=http]").each((i, element) => { const $a = $(element); if ($a.attr("href").startsWith("https://developer.mozilla.org")) { // This should have been removed since it's considered a flaw. // But we haven't applied all fixable flaws yet and we still have to // support translated content which is quite a long time away from // being entirely treated with the fixable flaws cleanup. $a.attr( "href", $a.attr("href").replace("https://developer.mozilla.org", "") || "/" ); return; } $a.addClass("external"); $a.attr("target", "_blank"); }); } /** * For every `` remove the ".md" */ export function postProcessCurriculumLinks( $: cheerio.CheerioAPI, toUrl: (x?: string) => string ) { // expand relative links $("a[href^=.]").each((_, element) => { const $a = $(element); $a.attr("href", toUrl($a.attr("href"))); }); // remove trailing .md for /en-US/curriculum/* $("a[href^=/en-US/curriculum]").each((_, element) => { const $a = $(element); $a.attr("href", $a.attr("href")?.replace(/(.*)\.md(#.*|$)/, "$1/$2")); }); // remove trailing .md and add locale for /curriculum/* $("a[href^=/curriculum]").each((_, element) => { const $a = $(element); $a.attr("href", $a.attr("href")?.replace(/(.*)\.md(#.*|$)/, "/en-US$1/$2")); }); // remove leading numbers for /en-US/curriculum/* // /en-US/curriculum/2-core/ -> /en-US/curriculum/core/ $("a[href^=/en-US/curriculum]").each((_, element) => { const $a = $(element); const [head, hash] = $a.attr("href")?.split("#") || []; $a.attr("href", `${head.replace(/\d+-/g, "")}${hash ? `#${hash}` : ""}`); }); } /** * For every ``, where 'THING' is not a http or / link, make it * `` * * * @param {Cheerio document instance} $ */ export function postLocalFileLinks($, doc) { $("a[href]").each((i, element) => { const href = element.attribs.href; // This test is merely here to quickly bail if there's no hope to find the // file attachment as a local file link. There are a LOT of hyperlinks // throughout the content and this simple if statement means we can skip 99% // of the links, so it's presumed to be worth it. if ( !href || /^(\/|\.\.|http|#|mailto:|about:|ftp:|news:|irc:|ftp:)/i.test(href) ) { return; } // There are a lot of links that don't match. E.g. `` // So we'll look-up a lot "false positives" that are not file attachments. // Thankfully, this lookup is fast. const url = `${doc.mdn_url}/${href}`; const fileAttachment = FileAttachment.findByURLWithFallback(url); if (fileAttachment) { $(element).attr("href", url); } }); } /** * Fix the heading IDs so they're all lower case. * * @param {Cheerio document instance} $ */ export function postProcessSmallerHeadingIDs($) { $("h4[id], h5[id], h6[id]").each((i, element) => { const id = element.attribs.id; const lcID = id.toLowerCase(); if (id !== lcID) { $(element).attr("id", lcID); } }); } /** * Return an array of objects like this [{text: ..., id: ...}, ...] * from a document's body. * This will be used for the "Table of Contents" menu which expects to be able * to link to each section with anchor links. * * @param {Document} doc */ export function makeTOC(doc, withH3 = false) { return doc.body .map((section) => { if ( (section.type === "prose" || section.type === "browser_compatibility" || section.type === "specifications") && section.value.id && section.value.title && (!section.value.isH3 || withH3) ) { return { text: section.value.title, id: section.value.id }; } return null; }) .filter(Boolean); } export function findPostFileBySlug(slug: string): string | null { if (!BLOG_ROOT) { return null; } try { const { stdout, stderr, status } = spawnSync("rg", [ "-il", `slug: ${slug}`, BLOG_ROOT, ]); if (status === 0) { const file = stdout.toString("utf-8").split("\n")[0]; return file; } const message = stderr.toString(); if (message) { console.error(`error running rg: ${message}`); } else { console.error(`Post ${slug} not found in ${BLOG_ROOT}`); } } catch { console.error("rg failed"); } return null; } const POST_URL_RE = /^\/en-US\/blog\/([^/]+)\/?$/; export function getSlugByBlogPostUrl(url: string): string | null { return url.match(POST_URL_RE)?.[1] || null; } export async function importJSON(jsonPath: string): Promise { if (!jsonPath.startsWith(".")) { jsonPath = path.join(cwd(), "node_modules", jsonPath); } const json = await readFile(jsonPath, "utf-8"); return JSON.parse(json); } export function* chunks(array: T[], size: number): Generator { for (let i = 0; i < array.length; i += size) { yield array.slice(i, i + size); } } export function formatDuration(seconds: number) { return seconds > 60 ? `${(seconds / 60).toFixed(1)} minutes` : `${seconds.toFixed(1)} seconds`; }