import _ from 'lodash';
import path from 'path';
import slugify from 'slugify';
import fse from 'fs-extra';
import { utils } from '@stackbit/cms-core';
import { Readable } from 'stream';
import axios from 'axios';
import moment from 'moment';
import { Document, DocumentField, Logger } from '@stackbit/types';
import { readDirRecursivelyWithExtensions } from '@stackbit/utils';
import { SUPPORTED_FILE_EXTENSIONS } from '@stackbit/sdk';
const momentFormatRe = /moment_format\(\s*(?:(?.+?)\s*,\s*)?['"](?[^'"]+)['"]\s*\)/;
const momentFormatEscapedRe = /moment_format\\\(\s*(?:(?.+?)\s*,\s*)?['"](?[^'"]+)['"]\s*\\\)/;
export const momentDateToken = 'date';
const MARKDOWN_FILE_EXTENSIONS = ['md', 'mdx', 'markdown'];
export function sanitizeSlug(slug: string): string {
return slug
.split('/')
.map((part) => slugify(part))
.join('/');
}
/**
* Extract token names from file path pattern.
*
* extractTokensFromString('pages/{slug}.md')
* => ['slug']
*
* By default, the token produced by moment_format() is "date"
* extractTokensFromString('pages/blog/{moment_format("YYYY-MM-DD")}-{slug}.md')
* => ['date', 'slug']
*
* moment_format() can also receive two arguments, in this case, the first
* argument specifies the name of the token. This argument should not be quoted.
* extractTokensFromString('pages/blog/{moment_format(date, "YYYY-MM-DD")}-{slug}.md')
* => ['date', 'slug']
*
* @param input
*/
export function extractTokensFromString(input: string): string[] {
const tokens = input.match(/(?<={)[^}]+(?=})/g) || [];
return _.chain(tokens)
.map((token) => {
const momentFormatMatch = token.match(momentFormatRe);
if (momentFormatMatch) {
return momentFormatMatch.groups!.param || momentDateToken;
}
return token;
})
.compact()
.value();
}
/**
* Interpolates url or file path pattern from data.
* If token does not exist in data returns original token.
*
* @example
* interpolateFileName('posts/{slug}', { slug: 'hello' })
* => 'posts/hello'
* interpolateFileName('_posts/{moment_format("YYYY-MM-DD")}-{slug}.md', { slug: 'hello' })
* => '_posts/2020-11-16-hello.md'
*
* @param {string} pathTemplate
* @param {Object} data
* @return {string}
*/
export function interpolatePath(pathTemplate: string, data: Record) {
const interpolatedPath = pathTemplate.replace(/{(.*?)}/g, (match, tokenName) => {
const momentFormatMatch = tokenName.match(momentFormatRe);
if (momentFormatMatch) {
const date = _.get(data, momentFormatMatch.groups.param ?? momentDateToken) || new Date();
return moment(date).format(momentFormatMatch.groups.format);
}
return _.get(data, tokenName, `{${tokenName}}`);
});
return path.normalize(sanitizeSlug(interpolatedPath));
}
export async function getFileDates(filePath: string): Promise<{ createdAt: string; updatedAt: string }> {
let fileStats: fse.Stats | null = null;
try {
fileStats = await fse.stat(filePath);
} catch (err) {
// pass
}
return {
createdAt: (fileStats?.birthtime ?? fileStats?.mtime ?? new Date()).toISOString(),
updatedAt: (fileStats?.mtime ?? new Date()).toISOString()
};
}
export async function getFileData(filePath: string): Promise {
const extension = path.extname(filePath).substring(1);
let data = await utils.parseFile(filePath);
if (MARKDOWN_FILE_EXTENSIONS.includes(extension) && _.has(data, 'frontmatter') && _.has(data, 'markdown')) {
data = {
...data.frontmatter,
markdown_content: data.markdown
};
}
return data;
}
export async function saveFileData(filePath: string, data: any): Promise {
let dataToWrite = data;
const extension = path.extname(filePath).substring(1);
if (MARKDOWN_FILE_EXTENSIONS.includes(extension)) {
const existingData = (await fse.pathExists(filePath)) ? await utils.parseFile(filePath) : {};
dataToWrite = {
...existingData,
markdown: data.markdown_content,
frontmatter: _.omit(data, ['markdown_content'])
};
}
return utils.outputDataIfNeeded(filePath, dataToWrite);
}
export async function saveBase64Data(filePath: string, data: string): Promise {
const buffer = Buffer.from(data, 'base64');
const readStream = Readable.from(buffer);
await fse.ensureDir(path.dirname(filePath));
const writeStream = fse.createWriteStream(filePath);
readStream.pipe(writeStream);
return new Promise((resolve, reject) => {
writeStream.on('error', reject).on('finish', resolve);
});
}
export async function saveFromUrl(filePath: string, url: string): Promise {
const response = await axios({
responseType: 'stream',
url
});
await fse.ensureDir(path.dirname(filePath));
const writeStream = fse.createWriteStream(filePath);
response.data.pipe(writeStream);
return new Promise((resolve, reject) => {
writeStream.on('error', reject).on('finish', resolve);
});
}
export function processMarkdownImagePaths(markdown: string, handler: (imagePath: string) => string): string {
const re = /(!\[[^\]]*])\(([^)\s]+?)(\s+"[^"]*")?\)/g;
let reResult;
let result = '';
let lastIndex = 0;
while ((reResult = re.exec(markdown)) !== null) {
const altText = reResult[1];
const path = handler(reResult[2]!);
const title = reResult[3] || '';
result += markdown.substring(lastIndex, reResult.index);
result += `${altText}(${path}${title})`;
lastIndex = re.lastIndex;
}
result += markdown.substring(lastIndex);
return result;
}
/**
* Extract tokens and their values from filePath based on the filePathPattern.
*
* @example
* extractTokensAndValuesFromFilePath(
* '_posts/2020-09-20-hello.md',
* '_posts/{moment_format("YYYY-MM-DD")}-{slug}.md'
* ) => { date: '2020-09-20', slug: 'hello' }
*
* @param {string} filePath
* @param {string} filePathPattern
* @return {Object} Object with extracted tokens and their values
*/
export function extractTokensAndValuesFromFilePath(filePath: string, filePathPattern: string): Record {
// filePath: '_posts/2020-11-04-hello.md'
// filePathPattern: '_posts/{moment_format("YYYY-MM-DD")}-{slug}.md'
const usedTokens: Record = {};
const regExpStr = filePathPattern
// escape characters that may conflict with regular expression
.replace(/[\\.*+\-?^$|()[\]]/g, '\\$&')
// replace tokens with named capturing group: (?x)
.replace(/{([^}]+)}(\/?)/g, (match, tokenName: string, slashAfter: string, offset: number) => {
const momentFormatMatch = tokenName.match(momentFormatEscapedRe);
if (momentFormatMatch) {
// for now use naive approach, assume moment's basic format tokens D, DD, M, MM, YY, YYYY, H, HH, m, mm (everything else will fail)
const dateRe = momentFormatMatch.groups!.format!.replace(/[MYDHm]+/g, '\\d+');
const dateToken = momentFormatMatch.groups!.param || momentDateToken;
return `(?<${dateToken}>${dateRe})${slashAfter}`;
}
// if token was used, assume it has the same value
// _posts/{slug}/{moment_format("YYYY-MM-DD")}-{slug}.md
// _posts/welcome-to-jekyll/2020-08-29-welcome-to-jekyll.md
if (_.has(usedTokens, tokenName)) {
return `(?:.+?)${slashAfter}`;
}
usedTokens[tokenName] = true;
// if token is left and right bounded by slashes or the beginning
// of the string: /pages/{slug}/index.md, then generate regular
// expression that puts the whole token with the following slash
// as an optional match: /\/pages\/(?:(?.+?)/)?\/index.md/
// Such that the following file path will match '/pages/index.md'
// this regular expression and produce a match with 'undefined' slug
// named capturing group which will be converted to an empty string.
const tokenLeftBounded = offset === 0 || filePathPattern[offset - 1] === '/';
const tokenRightBounded = slashAfter === '/';
if (tokenLeftBounded && tokenRightBounded) {
return `(?:(?<${tokenName}>.+?)/)?`;
}
return `(?<${tokenName}>.+?)${slashAfter}`;
});
// regExpStr = '_posts/(?\d+-\d+-\d+)-(?.+?)\.md'
const regExp = new RegExp(regExpStr);
// regExp = /_posts\/(?\d+-\d+-\d+)-(?.+?)\.md/
const match = regExp.exec(filePath);
// match.groups = {slug: 'hello', date: }
return _.mapValues(match?.groups, (value) => (typeof value === 'undefined' ? '' : value));
}
type ForEachFieldHandler = (field: DocumentField, fieldPath: (string | number)[]) => Promise;
export async function forEachFieldInDocument(document: Document, handler: ForEachFieldHandler): Promise {
return forEachFieldInFields(document.fields, [], handler);
}
export async function forEachFieldInFields(documentFields: Record, fieldPath: (string | number)[], handler: ForEachFieldHandler) {
for (const [fieldName, field] of Object.entries(documentFields)) {
await forEachField(field, fieldPath.concat(fieldName), handler);
}
}
async function forEachField(documentField: DocumentField, fieldPath: (string | number)[], handler: ForEachFieldHandler) {
switch (documentField.type) {
case 'string':
case 'text':
case 'html':
case 'slug':
case 'url':
case 'color':
case 'boolean':
case 'number':
case 'date':
case 'datetime':
case 'enum':
case 'file':
case 'json':
case 'style':
case 'markdown':
case 'image':
case 'reference':
case 'cross-reference':
case 'richText': {
if (documentField.localized) {
if (_.isEmpty(documentField.locales)) {
return;
}
for (const locale of Object.values(documentField.locales)) {
await handler(locale, fieldPath.concat(locale.locale));
}
} else {
await handler(documentField, fieldPath);
}
break;
}
case 'model':
case 'object': {
if (documentField.localized) {
if (_.isEmpty(documentField.locales)) {
return;
}
for (const locale of Object.values(documentField.locales)) {
await forEachFieldInFields(locale.fields, fieldPath.concat(locale.locale), handler);
}
} else {
await forEachFieldInFields(documentField.fields, fieldPath, handler);
}
break;
}
case 'list': {
if (documentField.localized) {
if (_.isEmpty(documentField.locales)) {
return;
}
for (const locale of Object.values(documentField.locales)) {
for (const [index, item] of locale.items.entries()) {
await forEachField(item, fieldPath.concat(locale.locale, index), handler);
}
}
} else {
for (const [index, item] of documentField.items.entries()) {
await forEachField(item, fieldPath.concat(index), handler);
}
}
break;
}
default: {
const _exhaustiveCheck: never = documentField;
return _exhaustiveCheck;
}
}
}
export async function readFilesFromDirectory(
directoryPath: string,
logger: Logger,
handler: (relFilePath: string, fullFilePath: string, data: any) => Promise
): Promise {
const filePaths = await readDirRecursivelyWithExtensions(directoryPath, SUPPORTED_FILE_EXTENSIONS);
for (const filePath of filePaths) {
const fullFilePath = path.join(directoryPath, filePath);
let data;
try {
data = await getFileData(fullFilePath);
await handler(filePath, fullFilePath, data);
} catch (err) {
logger.warn('Error loading file: ' + filePath, err);
continue;
}
}
}