/** * Build-time batch compilation for markdown/MDX files. * @module vite-plugin/batch-compiler */ import { existsSync } from 'node:fs'; import { readFile } from 'node:fs/promises'; import { createRequire } from 'node:module'; import path from 'node:path'; import type { SourceMapInput } from 'rollup'; import type { Registry } from 'xmdx/registry'; import type { ResolvedConfig } from 'vite'; import { batchTransformJsx, runParallelJsxTransform, DiskCache, normalizeStarlightComponents, debugLog, debugTime, debugTimeEnd, } from '@xmdx/vite'; import type { ShikiManager, ExpressiveCodeManager, EsbuildCacheEntry, PersistentCache, XmdxBinding, XmdxCompiler, XmdxPluginOptions, } from '@xmdx/vite'; import type { Transform } from 'xmdx/pipeline'; import { wrapMdxModule } from './mdx-wrapper/index.js'; import type { PluginHooks, TransformContext, MdxImportHandlingOptions } from '../types.js'; import type { ExpressiveCodeConfig } from 'xmdx/utils/config'; import { asSourceMap, asBinding, parseJsonRecord } from '../ops/type-narrowing.js'; import { detectProblematicMdxPatterns } from 'xmdx/utils/mdx-detection'; import { VIRTUAL_MODULE_PREFIX, OUTPUT_EXTENSION, DEFAULT_IGNORE_PATTERNS } from '../constants.js'; const require = createRequire(import.meta.url); /** * Runs async tasks with bounded concurrency using a worker-pool pattern. * Unlike chunked Promise.all, this keeps all worker slots busy without * waiting at chunk boundaries. */ async function mapConcurrent( items: T[], concurrency: number, fn: (item: T) => Promise ): Promise { const results: R[] = new Array(items.length); let nextIndex = 0; async function worker() { while (nextIndex < items.length) { const index = nextIndex++; results[index] = await fn(items[index]!); } } await Promise.all( Array.from({ length: Math.min(concurrency, items.length) }, () => worker()) ); return results; } interface BatchInput { id: string; source: string; originalSource: string; filepath: string; contentHash: string; } interface ReadAndDetectResult { inputs: BatchInput[]; sourceHashes: Map; diskCacheHits: number; } interface BatchStats { succeeded: number; total: number; failed: number; processingTimeMs: number; } interface BatchCompileStatsResult { md: BatchStats; mdx: BatchStats; } interface BuildState { buildPassCount: number; diskCache: DiskCache | null; } export interface BuildStartDeps { resolvedConfig?: ResolvedConfig; state: BuildState; diskCacheEnabled: boolean; persistentCache: PersistentCache; esbuildCache: Map; fallbackFiles: Set; fallbackReasons: Map; processedFiles: Set; hooks: PluginHooks; mdxOptions?: MdxImportHandlingOptions; providedBinding: XmdxBinding | null; loadBinding: () => Promise; compilerOptions: Record; shikiManager: ShikiManager; ecManager: ExpressiveCodeManager; starlightComponents: XmdxPluginOptions['starlightComponents']; transformPipeline: Transform; expressiveCode: ExpressiveCodeConfig | null; registry: Registry; warn: (message: string) => void; } export async function batchReadAndDetectFallbacks( files: string[], hooks: PluginHooks, mdxOptions: MdxImportHandlingOptions | undefined, diskCache: DiskCache | null, esbuildCache: Map, fallbackFiles: Set, fallbackReasons: Map, processedFiles: Set ): Promise { const fallbackStats = { disallowedImports: 0, noAllowImports: 0, }; const disallowedImportSources = new Map(); const sourceHashes = new Map(); let diskCacheHits = 0; const inputsOrNull = await Promise.all( files.map(async (file) => { const rawSource = await readFile(file, 'utf8'); let processedSource = rawSource; for (const preprocessHook of hooks.preprocess) { processedSource = preprocessHook(processedSource, file); } const detection = detectProblematicMdxPatterns(processedSource, mdxOptions, file); if (detection.hasProblematicPatterns) { fallbackFiles.add(file); fallbackReasons.set(file, detection.reason ?? 'Unknown pattern'); if (detection.disallowedImports && detection.disallowedImports.length > 0) { fallbackStats.disallowedImports++; for (const src of detection.disallowedImports) { disallowedImportSources.set(src, (disallowedImportSources.get(src) ?? 0) + 1); } } else if (detection.allImports && detection.allImports.length > 0) { fallbackStats.noAllowImports++; } return null; } const contentHash = DiskCache.computeHash(processedSource); sourceHashes.set(file, contentHash); if (diskCache) { const cached = await diskCache.get(file, contentHash); if (cached) { esbuildCache.set(file, { code: cached.code, map: cached.map }); processedFiles.add(file); diskCacheHits++; return null; } } return { id: file, source: processedSource, originalSource: rawSource, filepath: file, contentHash }; }) ); const inputs = inputsOrNull.filter((i): i is NonNullable => i !== null); if (fallbackFiles.size > 0) { const breakdown: string[] = []; if (fallbackStats.disallowedImports > 0) { breakdown.push(`${fallbackStats.disallowedImports} with disallowed imports`); } console.info( `[xmdx] Pre-detected ${fallbackFiles.size} files with patterns incompatible with markdown-rs (delegating to Astro MDX)` + (breakdown.length > 0 ? ` [${breakdown.join(', ')}]` : '') ); if (disallowedImportSources.size > 0 && fallbackFiles.size >= 10) { const topSources = Array.from(disallowedImportSources.entries()) .sort((a, b) => b[1] - a[1]) .slice(0, 5) .map(([src, count]) => `${src} (${count})`); console.info(`[xmdx] Top disallowed import sources: ${topSources.join(', ')}`); console.info('[xmdx] Tip: Add these to your preset\'s allowImports to reduce fallback rate'); } } return { inputs, sourceHashes, diskCacheHits }; } interface CompileResultEntry { id: string; code: string; frontmatterJson?: string; headings: Array<{ depth: number; slug: string; text: string }>; originalSource: string; isMdx: boolean; } interface BatchCompileOutput { stats: BatchCompileStatsResult; results: CompileResultEntry[]; } export function batchCompileFiles( compiler: XmdxCompiler, mdInputs: BatchInput[], mdxInputs: BatchInput[], fallbackFiles: Set, fallbackReasons: Map, ): BatchCompileOutput { const results: CompileResultEntry[] = []; const originalSources = new Map(); for (const input of [...mdInputs, ...mdxInputs]) { originalSources.set(input.id, input.originalSource); } let mdStats: BatchStats = { succeeded: 0, total: 0, failed: 0, processingTimeMs: 0 }; if (mdInputs.length > 0) { const mdBatchResult = compiler.compileBatchToModule(mdInputs, { continueOnError: true, }); mdStats = mdBatchResult.stats; for (const result of mdBatchResult.results) { if (result.result) { results.push({ id: result.id, code: result.result.code, frontmatterJson: result.result.frontmatterJson, headings: result.result.headings || [], originalSource: originalSources.get(result.id) ?? '', isMdx: false, }); } else if (result.error) { fallbackFiles.add(result.id); fallbackReasons.set(result.id, result.error.message); } } } let mdxStats: BatchStats = { succeeded: 0, total: 0, failed: 0, processingTimeMs: 0 }; if (mdxInputs.length > 0) { const mdxBatchResult = compiler.compileMdxBatch(mdxInputs, { continueOnError: true, }); mdxStats = mdxBatchResult.stats; for (const result of mdxBatchResult.results) { if (result.result) { results.push({ id: result.id, code: result.result.code, frontmatterJson: result.result.frontmatterJson, headings: result.result.headings || [], originalSource: originalSources.get(result.id) ?? '', isMdx: true, }); } else if (result.error) { fallbackFiles.add(result.id); fallbackReasons.set(result.id, result.error.message); } } } return { stats: { md: mdStats, mdx: mdxStats }, results }; } export async function batchJsxTransform( jsxInputs: Array<{ id: string; virtualId: string; jsx: string; contentHash?: string }>, jsxCache: Map, warn: (message: string) => void ): Promise { if (jsxInputs.length === 0) return false; const useParallel = jsxInputs.length >= 100; let usedParallel = false; try { if (useParallel) { try { debugLog(`Using parallel JSX transform workers for ${jsxInputs.length} files`); const parallelResults = await runParallelJsxTransform( jsxInputs.map((input) => ({ id: input.id, jsx: input.jsx })) ); for (const [id, result] of parallelResults) { jsxCache.set(id, { code: result.code, map: asSourceMap(result.map) }); } usedParallel = true; } catch (workerErr) { debugLog(`Worker JSX transform failed, falling back to single-threaded: ${String(workerErr)}`); } } if (!usedParallel) { const results = await batchTransformJsx( jsxInputs.map((input) => ({ id: input.id, jsx: input.jsx })) ); for (const [id, result] of results) { jsxCache.set(id, { code: result.code, map: result.map }); } } return usedParallel; } catch (transformErr) { warn(`[xmdx] Batch JSX transform failed, will use individual transforms: ${String(transformErr)}`); return null; } } export function persistCaches( persistentCache: PersistentCache, esbuildCache: Map, fallbackFiles: Set, fallbackReasons: Map ): void { for (const [k, v] of esbuildCache) { persistentCache.esbuild.set(k, v); } for (const file of fallbackFiles) { persistentCache.fallbackFiles.add(file); } for (const [k, v] of fallbackReasons) { persistentCache.fallbackReasons.set(k, v); } } function restorePersistentCaches( persistentCache: PersistentCache, esbuildCache: Map, fallbackFiles: Set, fallbackReasons: Map ): void { for (const [k, v] of persistentCache.esbuild) { esbuildCache.set(k, v); } for (const file of persistentCache.fallbackFiles) { fallbackFiles.add(file); } for (const [k, v] of persistentCache.fallbackReasons) { fallbackReasons.set(k, v); } } async function writeDiskCacheEntries( diskCache: DiskCache, jsxInputs: Array<{ id: string; virtualId: string; jsx: string; contentHash?: string }>, esbuildCache: Map ): Promise { const entriesToCache: Array<{ filename: string; sourceHash: string; code: string; map?: SourceMapInput; }> = []; const inputHashMap = new Map(); for (const input of jsxInputs) { if (input.contentHash) { inputHashMap.set(input.id, input.contentHash); } } for (const [id, cached] of esbuildCache) { const hash = inputHashMap.get(id); if (hash) { entriesToCache.push({ filename: id, sourceHash: hash, code: cached.code, map: cached.map, }); } } if (entriesToCache.length > 0) { await diskCache.setBatch(entriesToCache); await diskCache.flush(); debugLog(`Wrote ${entriesToCache.length} entries to disk cache`); } } export async function handleBuildStart(deps: BuildStartDeps): Promise { if (deps.resolvedConfig?.command !== 'build') return; deps.state.buildPassCount++; const buildPassCount = deps.state.buildPassCount; debugLog(`Build pass ${buildPassCount}`); if (buildPassCount === 1 && deps.diskCacheEnabled && !deps.state.diskCache) { deps.state.diskCache = new DiskCache(deps.resolvedConfig.root, true); await deps.state.diskCache.init(); const preloaded = await deps.state.diskCache.preloadEntries(); const stats = deps.state.diskCache.getStats(); if (stats.entries > 0) { console.info(`[xmdx] Disk cache enabled (${stats.entries} cached entries, ${preloaded} preloaded)`); } } if (buildPassCount > 1 && deps.persistentCache.esbuild.size > 0) { debugTime('buildStart:total'); debugLog(`Reusing ${deps.persistentCache.esbuild.size} cached esbuild results from pass ${buildPassCount - 1}`); restorePersistentCaches( deps.persistentCache, deps.esbuildCache, deps.fallbackFiles, deps.fallbackReasons ); console.info( `[xmdx] Build pass ${buildPassCount}: Reusing ${deps.persistentCache.esbuild.size} cached results` ); debugTimeEnd('buildStart:total'); return; } const root = deps.resolvedConfig.root; const astroDir = path.join(root, '.astro'); const distDir = path.join(root, 'dist'); if (existsSync(astroDir) && !existsSync(distDir)) { console.warn( '[xmdx] Stale cache detected (.astro exists but dist does not). Consider running `rm -rf .astro` if you encounter module resolution errors.' ); } debugTime('buildStart:total'); debugTime('buildStart:glob'); let globModule: typeof import('glob'); try { globModule = asBinding(require('glob')); } catch { throw new Error( '[xmdx] glob is required for file discovery. Please install: npm install glob' ); } const { glob } = asBinding<{ glob: ( pattern: string, options: { cwd: string; ignore: string[]; absolute: boolean } ) => Promise; }>(globModule); const files = await glob('**/*.{md,mdx}', { cwd: deps.resolvedConfig.root, ignore: [...DEFAULT_IGNORE_PATTERNS], absolute: true, }); debugTimeEnd('buildStart:glob'); debugLog(`Found ${files.length} markdown files`); if (files.length === 0) { debugTimeEnd('buildStart:total'); return; } debugTime('buildStart:readFiles'); const { inputs, sourceHashes, diskCacheHits } = await batchReadAndDetectFallbacks( files, deps.hooks, deps.mdxOptions, deps.state.diskCache, deps.esbuildCache, deps.fallbackFiles, deps.fallbackReasons, deps.processedFiles ); debugTimeEnd('buildStart:readFiles'); if (diskCacheHits > 0) { debugLog(`Disk cache hits: ${diskCacheHits}/${files.length} files`); console.info(`[xmdx] Disk cache: ${diskCacheHits} files loaded from cache`); } if (inputs.length === 0) { debugTimeEnd('buildStart:total'); return; } try { debugTime('buildStart:batchCompile'); debugTime('buildStart:shikiInit'); const shikiPromise = deps.shikiManager.init(); const ecPromise = deps.ecManager.init(); const mdInputs = inputs.filter((i) => !i.filepath.endsWith('.mdx')); const mdxInputs = inputs.filter((i) => i.filepath.endsWith('.mdx')); debugLog(`Separated: ${mdInputs.length} MD files, ${mdxInputs.length} MDX files`); const binding = deps.providedBinding ?? (await deps.loadBinding()); const createCompiler = binding.createCompiler ? binding.createCompiler.bind(binding) : (cfg: Record) => new binding.XmdxCompiler!(cfg); const compiler = createCompiler(deps.compilerOptions); const { stats, results: compileResults } = batchCompileFiles( compiler, mdInputs, mdxInputs, deps.fallbackFiles, deps.fallbackReasons, ); debugTimeEnd('buildStart:batchCompile'); const totalFiles = stats.md.total + stats.mdx.total; const totalSucceeded = stats.md.succeeded + stats.mdx.succeeded; const totalTime = stats.md.processingTimeMs + stats.mdx.processingTimeMs; console.info( `[xmdx] Batch compiled ${totalSucceeded}/${totalFiles} files in ${totalTime.toFixed(0)}ms` + (mdxInputs.length > 0 ? ` (${stats.mdx.succeeded} MDX via mdxjs-rs)` : '') ); const esbuildStartTime = performance.now(); const [resolvedShiki] = await Promise.all([shikiPromise, ecPromise]); const expressiveCodeCanRewrite = deps.expressiveCode ? await deps.ecManager.canRewrite(deps.expressiveCode.moduleId, deps.resolvedConfig?.root) : false; // Fallback: enable Shiki only when ExpressiveCode cannot safely rewrite/pre-render. let finalResolvedShiki = resolvedShiki; if (deps.expressiveCode && !expressiveCodeCanRewrite) { deps.shikiManager.enable(); finalResolvedShiki = await deps.shikiManager.init(); } debugTimeEnd('buildStart:shikiInit'); debugTime('buildStart:pipelineProcessing'); const normalizedStarlightComponents = normalizeStarlightComponents(deps.starlightComponents ?? false); const PIPELINE_CONCURRENCY = 50; const jsxInputs = await mapConcurrent(compileResults, PIPELINE_CONCURRENCY, async (entry) => { let frontmatter: Record = {}; if (entry.frontmatterJson) { try { frontmatter = parseJsonRecord(entry.frontmatterJson); } catch { // invalid frontmatter JSON, use empty object } } const headings = entry.headings; const jsxCode = entry.isMdx ? wrapMdxModule(entry.code, { frontmatter, headings, registry: deps.registry }, entry.id) : entry.code; const ctx: TransformContext = { code: jsxCode, source: entry.originalSource, filename: entry.id, frontmatter, headings, registry: deps.registry, config: { expressiveCode: deps.expressiveCode, expressiveCodeCanRewrite, starlightComponents: normalizedStarlightComponents, shiki: deps.shikiManager.forCode(jsxCode, finalResolvedShiki), }, }; const transformed = await deps.transformPipeline(ctx); return { id: entry.id, virtualId: `${VIRTUAL_MODULE_PREFIX}${entry.id}${OUTPUT_EXTENSION}`, jsx: transformed.code, contentHash: sourceHashes.get(entry.id), }; }); debugLog( `Pipeline processed ${jsxInputs.length} files for esbuild batch (${compileResults.filter((e) => !e.isMdx).length} MD modules, ${compileResults.filter((e) => e.isMdx).length} MDX)` ); debugTimeEnd('buildStart:pipelineProcessing'); if (jsxInputs.length > 0) { debugTime('buildStart:jsxTransform'); const usedParallel = await batchJsxTransform(jsxInputs, deps.esbuildCache, deps.warn); if (usedParallel !== null) { const jsxEndTime = performance.now(); console.info( `[xmdx] Batch JSX transformed ${deps.esbuildCache.size} files in ${(jsxEndTime - esbuildStartTime).toFixed(0)}ms` + (usedParallel ? ' (parallel workers)' : '') ); debugTimeEnd('buildStart:jsxTransform'); if (deps.state.diskCache) { debugTime('buildStart:diskCacheWrite'); await writeDiskCacheEntries(deps.state.diskCache, jsxInputs, deps.esbuildCache); debugTimeEnd('buildStart:diskCacheWrite'); } } else { debugTimeEnd('buildStart:jsxTransform'); } } persistCaches( deps.persistentCache, deps.esbuildCache, deps.fallbackFiles, deps.fallbackReasons ); debugLog(`Persisted ${deps.persistentCache.esbuild.size} esbuild results for subsequent passes`); debugTimeEnd('buildStart:total'); } catch (err) { debugTimeEnd('buildStart:total'); deps.warn(`[xmdx] Batch compile skipped due to binding load failure: ${String(err)}`); } }