import _ from 'lodash'; import path from 'path'; import slugify from 'slugify'; import fse from 'fs-extra'; import { utils } from '@stackbit/cms-core'; import { Readable } from 'stream'; import axios from 'axios'; import moment from 'moment'; import { Document, DocumentField, Logger } from '@stackbit/types'; import { readDirRecursivelyWithExtensions } from '@stackbit/utils'; import { SUPPORTED_FILE_EXTENSIONS } from '@stackbit/sdk'; const momentFormatRe = /moment_format\(\s*(?:(?.+?)\s*,\s*)?['"](?[^'"]+)['"]\s*\)/; const momentFormatEscapedRe = /moment_format\\\(\s*(?:(?.+?)\s*,\s*)?['"](?[^'"]+)['"]\s*\\\)/; export const momentDateToken = 'date'; const MARKDOWN_FILE_EXTENSIONS = ['md', 'mdx', 'markdown']; export function sanitizeSlug(slug: string): string { return slug .split('/') .map((part) => slugify(part)) .join('/'); } /** * Extract token names from file path pattern. * * extractTokensFromString('pages/{slug}.md') * => ['slug'] * * By default, the token produced by moment_format() is "date" * extractTokensFromString('pages/blog/{moment_format("YYYY-MM-DD")}-{slug}.md') * => ['date', 'slug'] * * moment_format() can also receive two arguments, in this case, the first * argument specifies the name of the token. This argument should not be quoted. * extractTokensFromString('pages/blog/{moment_format(date, "YYYY-MM-DD")}-{slug}.md') * => ['date', 'slug'] * * @param input */ export function extractTokensFromString(input: string): string[] { const tokens = input.match(/(?<={)[^}]+(?=})/g) || []; return _.chain(tokens) .map((token) => { const momentFormatMatch = token.match(momentFormatRe); if (momentFormatMatch) { return momentFormatMatch.groups!.param || momentDateToken; } return token; }) .compact() .value(); } /** * Interpolates url or file path pattern from data. * If token does not exist in data returns original token. * * @example * interpolateFileName('posts/{slug}', { slug: 'hello' }) * => 'posts/hello' * interpolateFileName('_posts/{moment_format("YYYY-MM-DD")}-{slug}.md', { slug: 'hello' }) * => '_posts/2020-11-16-hello.md' * * @param {string} pathTemplate * @param {Object} data * @return {string} */ export function interpolatePath(pathTemplate: string, data: Record) { const interpolatedPath = pathTemplate.replace(/{(.*?)}/g, (match, tokenName) => { const momentFormatMatch = tokenName.match(momentFormatRe); if (momentFormatMatch) { const date = _.get(data, momentFormatMatch.groups.param ?? momentDateToken) || new Date(); return moment(date).format(momentFormatMatch.groups.format); } return _.get(data, tokenName, `{${tokenName}}`); }); return path.normalize(sanitizeSlug(interpolatedPath)); } export async function getFileDates(filePath: string): Promise<{ createdAt: string; updatedAt: string }> { let fileStats: fse.Stats | null = null; try { fileStats = await fse.stat(filePath); } catch (err) { // pass } return { createdAt: (fileStats?.birthtime ?? fileStats?.mtime ?? new Date()).toISOString(), updatedAt: (fileStats?.mtime ?? new Date()).toISOString() }; } export async function getFileData(filePath: string): Promise { const extension = path.extname(filePath).substring(1); let data = await utils.parseFile(filePath); if (MARKDOWN_FILE_EXTENSIONS.includes(extension) && _.has(data, 'frontmatter') && _.has(data, 'markdown')) { data = { ...data.frontmatter, markdown_content: data.markdown }; } return data; } export async function saveFileData(filePath: string, data: any): Promise { let dataToWrite = data; const extension = path.extname(filePath).substring(1); if (MARKDOWN_FILE_EXTENSIONS.includes(extension)) { const existingData = (await fse.pathExists(filePath)) ? await utils.parseFile(filePath) : {}; dataToWrite = { ...existingData, markdown: data.markdown_content, frontmatter: _.omit(data, ['markdown_content']) }; } return utils.outputDataIfNeeded(filePath, dataToWrite); } export async function saveBase64Data(filePath: string, data: string): Promise { const buffer = Buffer.from(data, 'base64'); const readStream = Readable.from(buffer); await fse.ensureDir(path.dirname(filePath)); const writeStream = fse.createWriteStream(filePath); readStream.pipe(writeStream); return new Promise((resolve, reject) => { writeStream.on('error', reject).on('finish', resolve); }); } export async function saveFromUrl(filePath: string, url: string): Promise { const response = await axios({ responseType: 'stream', url }); await fse.ensureDir(path.dirname(filePath)); const writeStream = fse.createWriteStream(filePath); response.data.pipe(writeStream); return new Promise((resolve, reject) => { writeStream.on('error', reject).on('finish', resolve); }); } export function processMarkdownImagePaths(markdown: string, handler: (imagePath: string) => string): string { const re = /(!\[[^\]]*])\(([^)\s]+?)(\s+"[^"]*")?\)/g; let reResult; let result = ''; let lastIndex = 0; while ((reResult = re.exec(markdown)) !== null) { const altText = reResult[1]; const path = handler(reResult[2]!); const title = reResult[3] || ''; result += markdown.substring(lastIndex, reResult.index); result += `${altText}(${path}${title})`; lastIndex = re.lastIndex; } result += markdown.substring(lastIndex); return result; } /** * Extract tokens and their values from filePath based on the filePathPattern. * * @example * extractTokensAndValuesFromFilePath( * '_posts/2020-09-20-hello.md', * '_posts/{moment_format("YYYY-MM-DD")}-{slug}.md' * ) => { date: '2020-09-20', slug: 'hello' } * * @param {string} filePath * @param {string} filePathPattern * @return {Object} Object with extracted tokens and their values */ export function extractTokensAndValuesFromFilePath(filePath: string, filePathPattern: string): Record { // filePath: '_posts/2020-11-04-hello.md' // filePathPattern: '_posts/{moment_format("YYYY-MM-DD")}-{slug}.md' const usedTokens: Record = {}; const regExpStr = filePathPattern // escape characters that may conflict with regular expression .replace(/[\\.*+\-?^$|()[\]]/g, '\\$&') // replace tokens with named capturing group: (?x) .replace(/{([^}]+)}(\/?)/g, (match, tokenName: string, slashAfter: string, offset: number) => { const momentFormatMatch = tokenName.match(momentFormatEscapedRe); if (momentFormatMatch) { // for now use naive approach, assume moment's basic format tokens D, DD, M, MM, YY, YYYY, H, HH, m, mm (everything else will fail) const dateRe = momentFormatMatch.groups!.format!.replace(/[MYDHm]+/g, '\\d+'); const dateToken = momentFormatMatch.groups!.param || momentDateToken; return `(?<${dateToken}>${dateRe})${slashAfter}`; } // if token was used, assume it has the same value // _posts/{slug}/{moment_format("YYYY-MM-DD")}-{slug}.md // _posts/welcome-to-jekyll/2020-08-29-welcome-to-jekyll.md if (_.has(usedTokens, tokenName)) { return `(?:.+?)${slashAfter}`; } usedTokens[tokenName] = true; // if token is left and right bounded by slashes or the beginning // of the string: /pages/{slug}/index.md, then generate regular // expression that puts the whole token with the following slash // as an optional match: /\/pages\/(?:(?.+?)/)?\/index.md/ // Such that the following file path will match '/pages/index.md' // this regular expression and produce a match with 'undefined' slug // named capturing group which will be converted to an empty string. const tokenLeftBounded = offset === 0 || filePathPattern[offset - 1] === '/'; const tokenRightBounded = slashAfter === '/'; if (tokenLeftBounded && tokenRightBounded) { return `(?:(?<${tokenName}>.+?)/)?`; } return `(?<${tokenName}>.+?)${slashAfter}`; }); // regExpStr = '_posts/(?\d+-\d+-\d+)-(?.+?)\.md' const regExp = new RegExp(regExpStr); // regExp = /_posts\/(?\d+-\d+-\d+)-(?.+?)\.md/ const match = regExp.exec(filePath); // match.groups = {slug: 'hello', date: } return _.mapValues(match?.groups, (value) => (typeof value === 'undefined' ? '' : value)); } type ForEachFieldHandler = (field: DocumentField, fieldPath: (string | number)[]) => Promise; export async function forEachFieldInDocument(document: Document, handler: ForEachFieldHandler): Promise { return forEachFieldInFields(document.fields, [], handler); } export async function forEachFieldInFields(documentFields: Record, fieldPath: (string | number)[], handler: ForEachFieldHandler) { for (const [fieldName, field] of Object.entries(documentFields)) { await forEachField(field, fieldPath.concat(fieldName), handler); } } async function forEachField(documentField: DocumentField, fieldPath: (string | number)[], handler: ForEachFieldHandler) { switch (documentField.type) { case 'string': case 'text': case 'html': case 'slug': case 'url': case 'color': case 'boolean': case 'number': case 'date': case 'datetime': case 'enum': case 'file': case 'json': case 'style': case 'markdown': case 'image': case 'reference': case 'cross-reference': case 'richText': { if (documentField.localized) { if (_.isEmpty(documentField.locales)) { return; } for (const locale of Object.values(documentField.locales)) { await handler(locale, fieldPath.concat(locale.locale)); } } else { await handler(documentField, fieldPath); } break; } case 'model': case 'object': { if (documentField.localized) { if (_.isEmpty(documentField.locales)) { return; } for (const locale of Object.values(documentField.locales)) { await forEachFieldInFields(locale.fields, fieldPath.concat(locale.locale), handler); } } else { await forEachFieldInFields(documentField.fields, fieldPath, handler); } break; } case 'list': { if (documentField.localized) { if (_.isEmpty(documentField.locales)) { return; } for (const locale of Object.values(documentField.locales)) { for (const [index, item] of locale.items.entries()) { await forEachField(item, fieldPath.concat(locale.locale, index), handler); } } } else { for (const [index, item] of documentField.items.entries()) { await forEachField(item, fieldPath.concat(index), handler); } } break; } default: { const _exhaustiveCheck: never = documentField; return _exhaustiveCheck; } } } export async function readFilesFromDirectory( directoryPath: string, logger: Logger, handler: (relFilePath: string, fullFilePath: string, data: any) => Promise ): Promise { const filePaths = await readDirRecursivelyWithExtensions(directoryPath, SUPPORTED_FILE_EXTENSIONS); for (const filePath of filePaths) { const fullFilePath = path.join(directoryPath, filePath); let data; try { data = await getFileData(fullFilePath); await handler(filePath, fullFilePath, data); } catch (err) { logger.warn('Error loading file: ' + filePath, err); continue; } } }