/** * Build system - combines sections → paper.md → PDF/DOCX/TEX * * Features: * - Reads rev.yaml config * - Combines section files into paper.md (persisted) * - Strips annotations appropriately per output format * - Runs pandoc with crossref filter */ import * as fs from 'fs'; import * as path from 'path'; import { fileURLToPath } from 'url'; import { execSync, spawn, ChildProcess } from 'child_process'; import YAML from 'yaml'; import { stripAnnotations } from './annotations.js'; import { buildRegistry, labelToDisplay, detectDynamicRefs, resolveForwardRefs, resolveSupplementaryRefs } from './crossref.js'; import { processVariables, hasVariables } from './variables.js'; import { processSlideMarkdown, hasSlideSyntax } from './slides.js'; import { generatePptxTemplate, templateNeedsRegeneration, injectMediaIntoPptx, injectSlideNumbers, applyThemeFonts, applyCentering, applyBuildupColors } from './pptx-template.js'; import { getThemePath, getThemeNames, PPTX_THEMES } from './pptx-themes.js'; import { runPostprocess } from './postprocess.js'; import { hasPandoc, hasPandocCrossref, hasLatex } from './dependencies.js'; import { buildImageRegistry, writeImageRegistry } from './image-registry.js'; import type { Author, JournalFormatting } from './types.js'; import { getJournalProfile } from './journals.js'; import { resolveCSL } from './csl.js'; import { type MacroDef, mergeMacros, generateLatexPreamble, writeMacrosSidecar, getMacroFilterPath, } from './macros.js'; // ============================================================================= // Constants // ============================================================================= /** Supported output formats */ const SUPPORTED_FORMATS = ['pdf', 'docx', 'tex', 'beamer', 'pptx'] as const; /** * Maximum length for slugified-title output filenames. Only used when no * explicit `output:` filename is configured. Long titles are truncated at the * last `-` boundary at-or-before this length so words stay intact (the old * blind `.slice(0, 50)` cut mid-word). */ const MAX_TITLE_FILENAME_LENGTH = 80; // ============================================================================= // Interfaces // ============================================================================= export interface CrossrefConfig { figureTitle?: string; tableTitle?: string; figPrefix?: string | string[]; tblPrefix?: string | string[]; secPrefix?: string | string[]; linkReferences?: boolean; } export interface PdfConfig { template?: string | null; headerIncludes?: string | null; documentclass?: string; fontsize?: string; geometry?: string; linestretch?: number; numbersections?: boolean; toc?: boolean; /** * LaTeX engine: pdflatex (default), xelatex, lualatex, tectonic, etc. * xelatex/lualatex are required for native UTF-8 rendering of Latin-Extended * diacritics (Czech/Polish/Croatian/Spanish author names, species epithets). */ engine?: string; /** Roman/serif main font (xelatex/lualatex only — uses fontspec). */ mainfont?: string; /** Sans-serif font (xelatex/lualatex only). */ sansfont?: string; /** Monospace font (xelatex/lualatex only). */ monofont?: string; /** Extra pandoc args appended for this format (after top-level pandocArgs). */ pandocArgs?: string[]; } export interface DocxConfig { reference?: string | null; keepComments?: boolean; affiliationNewline?: boolean; toc?: boolean; pandocArgs?: string[]; /** * Auto-translate the common-shape raw `\begin{figure}...\end{figure}` block * to portable `![caption](path){#fig:label width=N%}` markdown so figures * survive the docx build (pandoc otherwise drops raw LaTeX silently). * Default true. Set false to opt out — blocks then warn and are left alone. */ translateRawFigures?: boolean; } export interface TexConfig { standalone?: boolean; pandocArgs?: string[]; } export interface BeamerConfig { theme?: string; colortheme?: string | null; fonttheme?: string | null; aspectratio?: string | null; navigation?: string | null; section?: boolean; notes?: string | false; fit_images?: boolean; pandocArgs?: string[]; } export interface PptxConfig { theme?: string; reference?: string | null; media?: string | null; colors?: { default?: string; title?: string; }; buildup?: { grey?: string; accent?: string; enabled?: boolean; }; pandocArgs?: string[]; } export interface TablesConfig { nowrap?: string[]; } export interface PostprocessConfig { pdf?: string | null; docx?: string | null; tex?: string | null; pptx?: string | null; beamer?: string | null; all?: string | null; [key: string]: string | null | undefined; } export interface BuildConfig { title: string; authors: (string | Author)[]; affiliations: Record; sections: string[]; bibliography: string | null; csl: string | null; crossref: CrossrefConfig; pdf: PdfConfig; docx: DocxConfig; tex: TexConfig; beamer: BeamerConfig; pptx: PptxConfig; tables: TablesConfig; postprocess: PostprocessConfig; /** * User-declared placeholder macros. Merged with the built-in macros * (currently \tofill). Each entry overrides a built-in by name. * * See lib/macros.ts for the per-format rendering rules. */ macros?: MacroDef[]; /** * Directory (relative to the project) where final outputs land. Created on * demand. Set to null/empty to keep outputs alongside paper.md (legacy * behavior). */ outputDir?: string | null; /** * Per-format output filenames. Keys are format names (pdf/docx/tex/beamer/ * pptx); values are paths. Relative paths resolve under outputDir; absolute * paths are honored as-is. Extension is added if missing. CLI `-o` wins * over this map. */ output?: Record; /** * Extra pandoc args applied to every format. Format-specific args * (e.g. docx.pandocArgs) are appended *after* these, and CLI --pandoc-arg * values are appended last. */ pandocArgs?: string[]; _configPath?: string | null; } export interface BuildResult { format: string; success: boolean; outputPath?: string; error?: string; } interface BuildOptions { verbose?: boolean; config?: BuildConfig; /** * Internal: forces the exact output path. Used by dual-mode/temp builds that * route to specific temp files. Bypasses the `output:` resolver. */ outputPath?: string; /** * CLI override (`-o, --output `). Beats `config.output[format]` but * loses to `options.outputPath`. Relative paths resolve under outputDir; * absolute paths bypass outputDir. */ output?: string; crossref?: boolean; /** Extra pandoc args from CLI (--pandoc-arg). Appended after config args. */ pandocArgs?: string[]; _refsAutoInjected?: boolean; _forwardRefsResolved?: number; } interface CombineOptions extends BuildOptions { _refsAutoInjected?: boolean; } interface VariablesContext { sectionContents: string[]; } interface PandocResult { outputPath: string; success: boolean; error?: string; } interface FullBuildResult { results: BuildResult[]; paperPath: string; warnings: string[]; forwardRefsResolved: number; refsAutoInjected?: boolean; } interface DynamicRef { type: string; label: string; match: string; position: number; } interface Registry { figures: Map; tables: Map; equations: Map; byNumber: { fig?: Map; figS?: Map; tbl?: Map; tblS?: Map; eq?: Map; }; } /** * Default rev.yaml configuration */ export const DEFAULT_CONFIG: BuildConfig = { title: 'Untitled Document', authors: [], affiliations: {}, sections: [], bibliography: null, csl: null, crossref: { figureTitle: 'Figure', tableTitle: 'Table', figPrefix: ['Fig.', 'Figs.'], tblPrefix: ['Table', 'Tables'], secPrefix: ['Section', 'Sections'], linkReferences: true, }, pdf: { template: null, documentclass: 'article', fontsize: '12pt', geometry: 'margin=1in', linestretch: 1.5, numbersections: false, toc: false, }, docx: { reference: null, keepComments: false, affiliationNewline: true, toc: false, translateRawFigures: true, }, tex: { standalone: true, }, // Slide formats beamer: { theme: 'default', colortheme: null, fonttheme: null, aspectratio: null, // '169' for 16:9, '43' for 4:3 navigation: null, // 'horizontal', 'vertical', 'frame', 'empty' section: true, // section divider slides notes: 'show', // 'show' (presenter view), 'only' (notes only), 'hide', or false fit_images: true, // scale images to fit within slide bounds }, pptx: { theme: 'default', // Built-in theme: default, dark, academic, minimal, corporate reference: null, // Custom reference-doc (overrides theme) media: null, // directory with logo images (e.g., logo-left.png, logo-right.png) }, // Table formatting tables: { nowrap: [], // Column headers to apply nowrap formatting (converts Normal() → $\mathcal{N}()$ etc.) }, // Postprocess scripts postprocess: { pdf: null, docx: null, tex: null, pptx: null, beamer: null, all: null, // Runs after any format }, // Placeholder/highlight macros. Defaults are the built-ins from // lib/macros.ts; users append their own here. macros: [], // Final outputs land here (created on demand). Set to null or '' to keep // outputs in the project root. outputDir: 'output', }; // ============================================================================= // Public API // ============================================================================= /** * Merge journal formatting defaults into a config. * Priority: DEFAULT_CONFIG < journal formatting < rev.yaml explicit settings */ export function mergeJournalFormatting(config: BuildConfig, formatting: JournalFormatting, directory: string): BuildConfig { const merged = { ...config }; // CSL: only apply if user hasn't set one if (formatting.csl && !config.csl) { const resolved = resolveCSL(formatting.csl, directory); if (resolved) { merged.csl = resolved; } // If not resolved locally, store the name — pandoc --citeproc // can sometimes resolve it, and the user can fetch with rev profiles --fetch-csl if (!resolved) { merged.csl = formatting.csl; } } // PDF settings: merge only unset fields if (formatting.pdf) { const userPdf = config.pdf || {}; const defaults = DEFAULT_CONFIG.pdf; merged.pdf = { ...config.pdf }; for (const [key, value] of Object.entries(formatting.pdf)) { const k = key as keyof PdfConfig; // Apply journal value only if user config matches the default (i.e., wasn't explicitly set) if (value !== undefined && JSON.stringify(userPdf[k]) === JSON.stringify(defaults[k])) { (merged.pdf as Record)[k] = value; } } } // DOCX settings: merge only unset fields if (formatting.docx) { const userDocx = config.docx || {}; const defaults = DEFAULT_CONFIG.docx; merged.docx = { ...config.docx }; for (const [key, value] of Object.entries(formatting.docx)) { const k = key as keyof DocxConfig; if (value !== undefined && JSON.stringify(userDocx[k]) === JSON.stringify(defaults[k])) { (merged.docx as Record)[k] = value; } } } // Crossref settings: merge only unset fields if (formatting.crossref) { const userCrossref = config.crossref || {}; const defaults = DEFAULT_CONFIG.crossref; merged.crossref = { ...config.crossref }; for (const [key, value] of Object.entries(formatting.crossref)) { const k = key as keyof CrossrefConfig; if (value !== undefined && JSON.stringify(userCrossref[k]) === JSON.stringify(defaults[k])) { (merged.crossref as Record)[k] = value; } } } return merged; } /** * In-place: copy `pandoc-args` → `pandocArgs` on an object (if not already set). * Idempotent. Coerces a single string into a one-element array. */ function normalizePandocArgsKey(obj: Record): void { if (!obj || typeof obj !== 'object') return; const hy = obj['pandoc-args']; if (hy === undefined) return; if (obj.pandocArgs === undefined) { obj.pandocArgs = Array.isArray(hy) ? hy : [hy]; } delete obj['pandoc-args']; } /** * Load rev.yaml config from directory * @param directory - Project directory path * @returns Merged config with defaults * @throws {TypeError} If directory is not a string * @throws {Error} If rev.yaml exists but cannot be parsed */ export function loadConfig(directory: string): BuildConfig { if (typeof directory !== 'string') { throw new TypeError(`directory must be a string, got ${typeof directory}`); } const configPath = path.join(directory, 'rev.yaml'); if (!fs.existsSync(configPath)) { return { ...DEFAULT_CONFIG, _configPath: null }; } try { const content = fs.readFileSync(configPath, 'utf-8'); const userConfig = YAML.parse(content) || {}; // Accept hyphenated `pandoc-args` (the form pandoc itself uses) in addition // to camelCase `pandocArgs`. Hyphenated is what we document; camelCase is // accepted for users who already prefer that convention. normalizePandocArgsKey(userConfig); for (const fmt of ['pdf', 'docx', 'tex', 'beamer', 'pptx'] as const) { if (userConfig[fmt] && typeof userConfig[fmt] === 'object') { normalizePandocArgsKey(userConfig[fmt]); } } // Deep merge with defaults let config: BuildConfig = { ...DEFAULT_CONFIG, ...userConfig, crossref: { ...DEFAULT_CONFIG.crossref, ...userConfig.crossref }, pdf: { ...DEFAULT_CONFIG.pdf, ...userConfig.pdf }, docx: { ...DEFAULT_CONFIG.docx, ...userConfig.docx }, tex: { ...DEFAULT_CONFIG.tex, ...userConfig.tex }, beamer: { ...DEFAULT_CONFIG.beamer, ...userConfig.beamer }, pptx: { ...DEFAULT_CONFIG.pptx, ...userConfig.pptx }, tables: { ...DEFAULT_CONFIG.tables, ...userConfig.tables }, postprocess: { ...DEFAULT_CONFIG.postprocess, ...userConfig.postprocess }, _configPath: configPath, }; // Apply journal formatting defaults (between DEFAULT_CONFIG and user settings) if (userConfig.journal) { const profile = getJournalProfile(userConfig.journal); if (profile?.formatting) { config = mergeJournalFormatting(config, profile.formatting, directory); } } return config; } catch (err) { const error = err as Error; throw new Error(`Failed to parse rev.yaml: ${error.message}`); } } /** * Find section files in directory * @param directory - Project directory path * @param configSections - Sections from rev.yaml (optional) * @returns Ordered list of section file names * @throws {TypeError} If directory is not a string */ export function findSections(directory: string, configSections: string[] = []): string[] { if (typeof directory !== 'string') { throw new TypeError(`directory must be a string, got ${typeof directory}`); } // If sections specified in config, use that order if (configSections.length > 0) { const sections: string[] = []; for (const section of configSections) { const filePath = path.join(directory, section); if (fs.existsSync(filePath)) { sections.push(section); } else { console.warn(`Warning: Section file not found: ${section}`); } } return sections; } // Try sections.yaml const sectionsYamlPath = path.join(directory, 'sections.yaml'); if (fs.existsSync(sectionsYamlPath)) { try { const sectionsConfig = YAML.parse(fs.readFileSync(sectionsYamlPath, 'utf-8')); if (sectionsConfig.sections) { return Object.entries(sectionsConfig.sections) .sort((a: [string, any], b: [string, any]) => (a[1].order ?? 999) - (b[1].order ?? 999)) .map(([file]) => file) .filter((f) => fs.existsSync(path.join(directory, f))); } } catch (e) { if (process.env.DEBUG) { const error = e as Error; console.warn('build: YAML parse error in sections.yaml:', error.message); } } } // Default: find all .md files except special ones const exclude = ['paper.md', 'readme.md', 'claude.md']; const files = fs.readdirSync(directory).filter((f) => { if (!f.endsWith('.md')) return false; if (exclude.includes(f.toLowerCase())) return false; return true; }); // Sort alphabetically as fallback return files.sort(); } /** * Combine section files into paper.md */ export function combineSections(directory: string, config: BuildConfig, options: CombineOptions = {}): string { const sections = findSections(directory, config.sections); if (sections.length === 0) { throw new Error('No section files found. Create .md files or specify sections in rev.yaml'); } const parts: string[] = []; // Add YAML frontmatter const frontmatter = buildFrontmatter(config); parts.push('---'); parts.push(YAML.stringify(frontmatter).trim()); parts.push('---'); parts.push(''); // Read all section contents for variable processing const sectionContents: string[] = []; // Check if we need to auto-inject references before supplementary // Pandoc places refs at the end by default, which breaks when supplementary follows const hasRefsSection = sections.some(s => s.toLowerCase().includes('reference') || s.toLowerCase().includes('refs') ); const suppIndex = sections.findIndex(s => s.toLowerCase().includes('supp') || s.toLowerCase().includes('appendix') ); const hasBibliography = config.bibliography && fs.existsSync(path.join(directory, config.bibliography)); // Track if we find an explicit refs div in any section let hasExplicitRefsDiv = false; // Combine sections for (let i = 0; i < sections.length; i++) { const section = sections[i]; if (!section) continue; const filePath = path.join(directory, section); let content = fs.readFileSync(filePath, 'utf-8'); // Remove any existing frontmatter from section files content = stripFrontmatter(content); sectionContents.push(content); // Check if this section has an explicit refs div if (content.includes('::: {#refs}') || content.includes('::: {#refs}')) { hasExplicitRefsDiv = true; } // Auto-inject references before supplementary if needed if (i === suppIndex && hasBibliography && !hasRefsSection && !hasExplicitRefsDiv) { parts.push('# References\n'); parts.push('::: {#refs}'); parts.push(':::'); parts.push(''); parts.push(''); options._refsAutoInjected = true; } parts.push(content.trim()); parts.push(''); parts.push(''); // Double newline between sections } let paperContent = parts.join('\n'); // Process template variables if any exist if (hasVariables(paperContent)) { paperContent = processVariables(paperContent, config as any, { sectionContents }); } // Resolve forward references (refs that appear before their anchor definition) // This fixes pandoc-crossref limitation with multi-file documents if (hasPandocCrossref()) { const registry = buildRegistry(directory, sections); const { text, resolved } = resolveForwardRefs(paperContent, registry); if (resolved.length > 0) { paperContent = text; // Store resolved count for optional reporting options._forwardRefsResolved = resolved.length; } // Resolve supplementary references and strip their anchors. // pandoc-crossref cannot produce "Figure S1" numbering — it numbers all // figures sequentially. We resolve supplementary refs to plain text and // remove the {#fig:...} attributes so crossref ignores them. const supp = resolveSupplementaryRefs(paperContent, registry); if (supp.resolved.length > 0) { paperContent = supp.text; } } const paperPath = path.join(directory, 'paper.md'); fs.writeFileSync(paperPath, paperContent, 'utf-8'); return paperPath; } /** * Build YAML frontmatter from config */ function buildFrontmatter(config: BuildConfig): Record { const fm: Record = {}; if (config.title) fm.title = config.title; // Skip author in frontmatter when using numbered affiliations — // the author block is injected separately per format if (config.authors && config.authors.length > 0 && !hasNumberedAffiliations(config)) { fm.author = config.authors; } if (config.bibliography) { fm.bibliography = config.bibliography; } if (config.csl) { fm.csl = config.csl; } return fm; } /** * Strip YAML frontmatter from content */ function stripFrontmatter(content: string): string { const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n/); if (match) { return content.slice(match[0].length); } return content; } /** * Check if config uses numbered affiliation mode * (authors have `affiliations` arrays and an affiliations map is defined) */ function hasNumberedAffiliations(config: BuildConfig): boolean { if (!config.affiliations || Object.keys(config.affiliations).length === 0) return false; return config.authors.some(a => typeof a !== 'string' && a.affiliations && a.affiliations.length > 0); } /** * Generate LaTeX author block using authblk package for numbered superscript affiliations. * Returns LaTeX code to be injected via header-includes. */ function generateLatexAuthorBlock(config: BuildConfig): string { const lines: string[] = []; lines.push('\\usepackage{authblk}'); lines.push('\\renewcommand\\Authfont{\\normalsize}'); lines.push('\\renewcommand\\Affilfont{\\small}'); lines.push(''); // Map affiliation keys to numbers const affiliationKeys = Object.keys(config.affiliations); const keyToNum = new Map(); affiliationKeys.forEach((key, i) => keyToNum.set(key, i + 1)); // Authors for (const author of config.authors) { if (typeof author === 'string') { lines.push(`\\author{${author}}`); continue; } const marks = (author.affiliations || []) .map(k => keyToNum.get(k)) .filter((n): n is number => n !== undefined); const markStr = marks.length > 0 ? `[${marks.join(',')}]` : ''; let nameStr = author.name; if (author.corresponding && author.email) { nameStr += `\\thanks{Corresponding author: ${author.email}}`; } else if (author.corresponding) { nameStr += '\\thanks{Corresponding author}'; } lines.push(`\\author${markStr}{${nameStr}}`); } // Affiliations for (const [key, text] of Object.entries(config.affiliations)) { const num = keyToNum.get(key); if (num !== undefined) { lines.push(`\\affil[${num}]{${text}}`); } } return lines.join('\n'); } /** * Generate markdown author block for DOCX output with superscript affiliations. * Returns markdown text to insert after the YAML frontmatter. */ function generateMarkdownAuthorBlock(config: BuildConfig): string { const lines: string[] = []; // Map affiliation keys to numbers const affiliationKeys = Object.keys(config.affiliations); const keyToNum = new Map(); affiliationKeys.forEach((key, i) => keyToNum.set(key, i + 1)); // Author line: Name^1,2^, Name^3^, ... const authorParts: string[] = []; for (const author of config.authors) { if (typeof author === 'string') { authorParts.push(author); continue; } const marks = (author.affiliations || []) .map(k => keyToNum.get(k)) .filter((n): n is number => n !== undefined); let entry = author.name; const superParts = marks.map(String); if (author.corresponding) superParts.push('\\*'); if (superParts.length > 0) { entry += `^${superParts.join(',')}^`; } authorParts.push(entry); } lines.push(authorParts.join(', ')); lines.push(''); // Affiliation lines: ^1^ Department of ... const affiliationEntries = Object.entries(config.affiliations); const useLineBreaks = config.docx.affiliationNewline !== false; affiliationEntries.forEach(([key, text], idx) => { const num = keyToNum.get(key); if (num !== undefined) { const isLast = idx === affiliationEntries.length - 1; const suffix = useLineBreaks && !isLast ? '\\' : ''; lines.push(`^${num}^ ${text}${suffix}`); } }); // Corresponding author footnote const corresponding = config.authors.find(a => typeof a !== 'string' && a.corresponding) as Author | undefined; if (corresponding?.email) { lines.push(''); lines.push(`^\\*^ Corresponding author: ${corresponding.email}`); } lines.push(''); return lines.join('\n'); } /** * Process markdown tables to apply nowrap formatting to specified columns. * Converts distribution notation (Normal, Student-t, Gamma) to LaTeX math. * @param content - Markdown content * @param tablesConfig - tables config from rev.yaml * @param format - output format (pdf, docx, etc.) * @returns processed content */ export function processTablesForFormat(content: string, tablesConfig: TablesConfig, format: string): string { // Only process for PDF/TeX output if (format !== 'pdf' && format !== 'tex') { return content; } // Check if we have nowrap columns configured if (!tablesConfig?.nowrap?.length) { return content; } const nowrapPatterns = tablesConfig.nowrap.map((p) => p.toLowerCase()); // Match pipe tables: header row, separator row, body rows // Header: | Col1 | Col2 | Col3 | // Separator: |:-----|:-----|:-----| // Body: | val1 | val2 | val3 | const tableRegex = /^(\|[^\n]+\|\r?\n\|[-:| ]+\|\r?\n)((?:\|[^\n]+\|\r?\n?)+)/gm; return content.replace(tableRegex, (match, headerAndSep, body) => { // Split header from separator const lines = headerAndSep.split(/\r?\n/); const headerLine = lines[0] ?? ''; // Parse header cells to find nowrap column indices const headerCells = headerLine .split('|') .slice(1, -1) .map((c: string) => c.trim().toLowerCase()); const nowrapCols: number[] = []; headerCells.forEach((cell: string, i: number) => { if (nowrapPatterns.some((p) => cell.includes(p))) { nowrapCols.push(i); } }); // If no nowrap columns found in this table, return unchanged if (nowrapCols.length === 0) { return match; } // Process body rows const bodyLines = body.split(/\r?\n/).filter((l: string) => l.trim()); const processedBody = bodyLines .map((row: string) => { // Split row into cells, keeping the pipe structure const cells = row.split('|'); // cells[0] is empty (before first |), cells[last] is empty (after last |) nowrapCols.forEach((colIdx) => { const cellIdx = colIdx + 1; // Account for empty first element if (cells[cellIdx] !== undefined) { const cellContent = cells[cellIdx].trim(); // Skip if empty, already math, or already has LaTeX commands if (!cellContent || cellContent.startsWith('$') || cellContent.startsWith('\\')) { return; } // Convert distribution notation to LaTeX math // Order matters: compound names (Half-Normal) must come before simple names (Normal) let processed = cellContent; // Half-Normal(x) → $\text{Half-Normal}(x)$ (must come before Normal) processed = processed.replace(/Half-Normal\(([^)]+)\)/g, '$\\text{Half-Normal}($1)$'); // Normal(x, y) → $\mathcal{N}(x, y)$ processed = processed.replace(/Normal\(([^)]+)\)/g, '$\\mathcal{N}($1)$'); // Student-t(df, loc, scale) → $t_{df}(loc, scale)$ processed = processed.replace(/Student-t\((\d+),\s*([^)]+)\)/g, '$t_{$1}($2)$'); // Gamma(a, b) → $\text{Gamma}(a, b)$ processed = processed.replace(/Gamma\(([^)]+)\)/g, '$\\text{Gamma}($1)$'); // Exponential(x) → $\text{Exp}(x)$ processed = processed.replace(/Exponential\(([^)]+)\)/g, '$\\text{Exp}($1)$'); // Update cell with padding cells[cellIdx] = ` ${processed} `; } }); return cells.join('|'); }) .join('\n'); return headerAndSep + processedBody + '\n'; }); } /** * Apply format-specific transforms (table normalization, author blocks, * crossref display conversion, slide syntax). Caller is responsible for * stripping annotations beforehand — the dual-output paths keep comments * in the markdown stream and need to apply these transforms separately * from annotation handling. * * @param content - Markdown content (annotations already stripped as needed) * @param format - Output format * @param config - Build config * @param registry - Crossref registry for the project * @returns Transformed markdown */ export function applyFormatTransforms( content: string, format: string, config: BuildConfig, registry: Registry ): string { if (format === 'pdf' || format === 'tex') { content = processTablesForFormat(content, config.tables, format); if (hasNumberedAffiliations(config)) { const latexBlock = generateLatexAuthorBlock(config); content = content.replace(/^(---\r?\n[\s\S]*?)(---\r?\n)/, (_match, yamlContent, closing) => { return `${yamlContent}header-includes: |\n${latexBlock.split('\n').map(l => ' ' + l).join('\n')}\n${closing}`; }); } } else if (format === 'docx') { content = convertDynamicRefsToDisplay(content, registry); // Pandoc strips raw LaTeX in docx output. Translate the common // `\begin{figure}...\end{figure}` shape to portable markdown so figures // actually appear; exotic blocks are left alone (warned about in build()). if (config.docx?.translateRawFigures !== false) { const { translated } = translateRawLatexFigures(content); content = translated; } if (hasNumberedAffiliations(config)) { const mdBlock = generateMarkdownAuthorBlock(config); content = content.replace(/^(---\r?\n[\s\S]*?---\r?\n)/, `$1\n${mdBlock}\n`); } } else if (format === 'beamer' || format === 'pptx') { if (hasSlideSyntax(content)) { content = processSlideMarkdown(content, format); } } return content; } /** * Prepare paper.md for specific output format */ export function prepareForFormat( paperPath: string, format: string, config: BuildConfig, _options: BuildOptions = {} ): string { const directory = path.dirname(paperPath); let content = fs.readFileSync(paperPath, 'utf-8'); // Build crossref registry for reference conversion // Pass sections from config to ensure correct file ordering const registry = buildRegistry(directory, config.sections); // Strip annotations per format if (format === 'docx') { content = stripAnnotations(content, { keepComments: config.docx.keepComments }); } else { content = stripAnnotations(content); } // Apply shared format transforms content = applyFormatTransforms(content, format, config, registry); // Write to temporary file const preparedPath = path.join(directory, `.paper-${format}.md`); fs.writeFileSync(preparedPath, content, 'utf-8'); return preparedPath; } /** * Convert @fig:label references to display format (Figure 1) */ function convertDynamicRefsToDisplay(text: string, registry: Registry): string { const refs = detectDynamicRefs(text); // Process in reverse order to preserve positions let result = text; for (let i = refs.length - 1; i >= 0; i--) { const ref = refs[i]; if (!ref) continue; const display = labelToDisplay(ref.type, ref.label, registry as any); if (display) { result = result.slice(0, ref.position) + display + result.slice(ref.position + ref.match.length); } } return result; } // ============================================================================= // Raw LaTeX figure detection / translation (docx) // ============================================================================= /** * A raw LaTeX `\begin{figure}...\end{figure}` block found in source markdown. * `exotic` blocks contain features we don't auto-translate (multiple * `\includegraphics`, `\subfloat`, `\rotatebox`, unrecognised width units); * pandoc strips raw LaTeX silently in docx output, so users get warned about * anything that won't be translated. */ export interface RawLatexFigure { file?: string; line: number; block: string; exotic: boolean; } /** Match `\begin{figure}` / `\begin{figure*}` … `\end{figure}` blocks. */ function makeRawFigureRegex(): RegExp { return /\\begin\{figure\*?\}(?:\[[^\]]*\])?[\s\S]*?\\end\{figure\*?\}/g; } /** * Convert a LaTeX width spec to a markdown image attribute value. * - `0.8\textwidth` → `80%` * - `\linewidth` → `100%` * - `8cm`, `2in`, `12pt` → kept verbatim * Returns null for anything we don't translate (block stays "exotic"). */ function convertLatexWidth(raw: string): string | null { const trimmed = raw.trim(); // Coefficient × relative length const rel = trimmed.match(/^([\d.]+)\s*\\(textwidth|linewidth|columnwidth)$/); if (rel) { const pct = Math.round(parseFloat(rel[1]!) * 100); if (!isFinite(pct) || pct <= 0) return null; return `${pct}%`; } // Bare relative length if (/^\\(textwidth|linewidth|columnwidth)$/.test(trimmed)) return '100%'; // Absolute units if (/^[\d.]+\s*(cm|mm|in|pt|px|em|ex)$/.test(trimmed)) return trimmed.replace(/\s+/g, ''); return null; } /** Extract a balanced `{...}` argument that follows `command` in `text`. */ function extractBracedArg(text: string, command: string): string | null { const idx = text.indexOf(command); if (idx === -1) return null; let i = idx + command.length; while (i < text.length && /\s/.test(text[i]!)) i++; if (text[i] !== '{') return null; i++; const start = i; let depth = 1; while (i < text.length) { const ch = text[i]!; if (ch === '\\' && i + 1 < text.length) { i += 2; continue; } if (ch === '{') depth++; else if (ch === '}') { depth--; if (depth === 0) return text.slice(start, i); } i++; } return null; } /** True if a `\begin{figure}` block contains features we don't auto-translate. */ function isExoticFigureBlock(block: string): boolean { if (/\\subfloat\b/.test(block)) return true; if (/\\rotatebox\b/.test(block)) return true; const includes = (block.match(/\\includegraphics\b/g) || []).length; if (includes !== 1) return true; const m = block.match(/\\includegraphics\s*(?:\[([^\]]*)\])?\s*\{([^}]+)\}/); if (!m) return true; const opts = m[1] || ''; const widthMatch = opts.match(/(?:^|,)\s*width\s*=\s*([^,]+)/); if (widthMatch && !convertLatexWidth(widthMatch[1]!)) return true; return false; } /** * Find raw LaTeX figure blocks containing `\includegraphics` in markdown. * `file`, if given, is attached to each result. `line` is 1-based within the * supplied content (the line where `\begin{figure}` sits). */ export function detectRawLatexFigures(content: string, file?: string): RawLatexFigure[] { const figures: RawLatexFigure[] = []; const re = makeRawFigureRegex(); let m: RegExpExecArray | null; while ((m = re.exec(content)) !== null) { const block = m[0]; if (!block.includes('\\includegraphics')) continue; const line = content.slice(0, m.index).split(/\r?\n/).length; figures.push({ file, line, block, exotic: isExoticFigureBlock(block) }); } return figures; } /** * Translate the 80% case: single `\includegraphics` figure with optional * `\caption{...}` and `\label{...}`, wrapped in `\begin{figure}...\end{figure}`, * to portable `![caption](path){#fig:label width=N%}` markdown. Exotic blocks * (see `isExoticFigureBlock`) are left untouched. */ export function translateRawLatexFigures(content: string): { translated: string; translatedCount: number } { let translatedCount = 0; const re = makeRawFigureRegex(); const translated = content.replace(re, (block) => { if (!block.includes('\\includegraphics')) return block; if (isExoticFigureBlock(block)) return block; const inc = block.match(/\\includegraphics\s*(?:\[([^\]]*)\])?\s*\{([^}]+)\}/); if (!inc) return block; const optsStr = inc[1] || ''; const imgPath = inc[2]!.trim(); let width: string | undefined; const widthMatch = optsStr.match(/(?:^|,)\s*width\s*=\s*([^,]+)/); if (widthMatch) { const w = convertLatexWidth(widthMatch[1]!); if (!w) return block; // already filtered by isExoticFigureBlock, defensive width = w; } const caption = (extractBracedArg(block, '\\caption') ?? '').trim(); const labelRaw = extractBracedArg(block, '\\label'); const attrs: string[] = []; if (labelRaw) { const label = labelRaw.trim(); const labelWithPrefix = /^[a-z]+:/i.test(label) ? label : `fig:${label}`; attrs.push(`#${labelWithPrefix}`); } if (width) attrs.push(`width=${width}`); translatedCount++; const attrStr = attrs.length > 0 ? ` {${attrs.join(' ')}}` : ''; return `![${caption}](${imgPath})${attrStr}`; }); return { translated, translatedCount }; } /** * Format the warning surfaced for raw LaTeX figure blocks that won't render * in docx. `translateEnabled` reflects whether auto-translate ran (true = the * listed blocks are exotic leftovers; false = no translation was attempted). */ function formatRawLatexFigureWarning(figs: RawLatexFigure[], translateEnabled: boolean): string { const reason = translateEnabled ? 'too complex to auto-translate' : 'translateRawFigures: false'; const lines: string[] = [ `${figs.length} raw LaTeX figure block(s) won't render in docx (${reason}).`, ]; for (const f of figs) { const loc = f.file ? `${f.file}:${f.line}` : `line ${f.line}`; const pathMatch = f.block.match(/\\includegraphics\s*(?:\[[^\]]*\])?\s*\{([^}]+)\}/); const pathInfo = pathMatch ? ` ${pathMatch[1]!.trim()}` : ''; lines.push(` ${loc}${pathInfo}`); } lines.push(' Hint: use ![caption](path){#fig:label width=80%} for format-portable figures,'); lines.push(' or pass --pandoc-arg=--lua-filter= to translate them yourself.'); return lines.join('\n'); } /** * Walk section files and gather a warning for any raw LaTeX figure blocks that * won't survive the docx build. Returns null when there's nothing to warn about. */ export function collectRawLatexFigureWarning(directory: string, config: BuildConfig): string | null { const translateEnabled = config.docx?.translateRawFigures !== false; const all: RawLatexFigure[] = []; for (const section of findSections(directory, config.sections)) { const sectionPath = path.join(directory, section); if (!fs.existsSync(sectionPath)) continue; try { const content = fs.readFileSync(sectionPath, 'utf-8'); const figs = detectRawLatexFigures(content, section); for (const f of figs) { // When auto-translate is on, non-exotic blocks get rewritten cleanly — // only the exotic leftovers need warning. When opted out, everything // is at risk and we warn about every block. if (translateEnabled && !f.exotic) continue; all.push(f); } } catch { // ignore unreadable sections } } if (all.length === 0) return null; return formatRawLatexFigureWarning(all, translateEnabled); } /** * Build pandoc arguments for format. * * Returns only the built-in args derived from config. Passthrough args * (config.pandocArgs, config[format].pandocArgs, CLI --pandoc-arg) are * appended later in runPandoc so they win against pptx/crossref defaults * added there. */ export function buildPandocArgs(format: string, config: BuildConfig, outputPath: string): string[] { const args: string[] = []; // Output format if (format === 'tex') { args.push('-t', 'latex'); if (config.tex.standalone) { args.push('-s'); } } else if (format === 'pdf') { args.push('-t', 'pdf'); } else if (format === 'docx') { args.push('-t', 'docx'); } else if (format === 'beamer') { args.push('-t', 'beamer'); } else if (format === 'pptx') { args.push('-t', 'pptx'); } // Output file. runPandoc sets cwd to the project directory and passes a // path relative to that cwd; passing it through here unchanged lets pandoc // write to subdirectories like output/.. args.push('-o', outputPath); // Crossref filter (if available) - skip for slides if (hasPandocCrossref() && format !== 'beamer' && format !== 'pptx') { args.push('--filter', 'pandoc-crossref'); } // Bibliography if (config.bibliography) { args.push('--citeproc'); } // Format-specific options if (format === 'pdf') { if (config.pdf.template) { args.push('--template', config.pdf.template); } if (config.pdf.engine) { args.push(`--pdf-engine=${config.pdf.engine}`); } if (config.pdf.mainfont) { args.push('-V', `mainfont=${config.pdf.mainfont}`); } if (config.pdf.sansfont) { args.push('-V', `sansfont=${config.pdf.sansfont}`); } if (config.pdf.monofont) { args.push('-V', `monofont=${config.pdf.monofont}`); } args.push('-V', `documentclass=${config.pdf.documentclass}`); args.push('-V', `fontsize=${config.pdf.fontsize}`); args.push('-V', `geometry:${config.pdf.geometry}`); if (config.pdf.headerIncludes) { args.push('-H', config.pdf.headerIncludes); } if (config.pdf.linestretch !== 1) { args.push('-V', `linestretch=${config.pdf.linestretch}`); } if (config.pdf.numbersections) { args.push('--number-sections'); } if (config.pdf.toc) { args.push('--toc'); } } else if (format === 'docx') { if (config.docx.reference) { args.push('--reference-doc', config.docx.reference); } if (config.docx.toc) { args.push('--toc'); } } else if (format === 'beamer') { // Beamer slide options const beamer = config.beamer || {}; if (beamer.theme) { args.push('-V', `theme=${beamer.theme}`); } if (beamer.colortheme) { args.push('-V', `colortheme=${beamer.colortheme}`); } if (beamer.fonttheme) { args.push('-V', `fonttheme=${beamer.fonttheme}`); } if (beamer.aspectratio) { args.push('-V', `aspectratio=${beamer.aspectratio}`); } if (beamer.navigation) { args.push('-V', `navigation=${beamer.navigation}`); } // Speaker notes - default to 'show' which creates presenter view PDF // Options: 'show' (dual screen), 'only' (notes only), 'hide' (no notes), false (disabled) const notesMode = beamer.notes !== undefined ? beamer.notes : 'show'; if (notesMode && notesMode !== 'hide') { args.push('-V', `classoption=notes=${notesMode}`); } // Fit images within slide bounds (default: true) if (beamer.fit_images !== false) { const fitImagesHeader = `\\makeatletter \\def\\maxwidth{\\ifdim\\Gin@nat@width>\\linewidth\\linewidth\\else\\Gin@nat@width\\fi} \\def\\maxheight{\\ifdim\\Gin@nat@height>0.75\\textheight 0.75\\textheight\\else\\Gin@nat@height\\fi} \\makeatother \\setkeys{Gin}{width=\\maxwidth,height=\\maxheight,keepaspectratio}`; args.push('-V', `header-includes=${fitImagesHeader}`); } // Slides need standalone args.push('-s'); } else if (format === 'pptx') { // PowerPoint options - handled separately in preparePptxTemplate // Reference doc is set by caller after template generation } return args; } /** * Collect passthrough pandoc args for a format in the canonical order: * top-level config → format-specific config → CLI extras. Later wins for * repeated flags. */ export function collectPandocPassthroughArgs( format: string, config: BuildConfig, extraArgs: string[] = [] ): string[] { const out: string[] = []; if (config.pandocArgs && config.pandocArgs.length > 0) { out.push(...config.pandocArgs); } const formatConfig = (config as unknown as Record)[format]; if (formatConfig?.pandocArgs && formatConfig.pandocArgs.length > 0) { out.push(...formatConfig.pandocArgs); } if (extraArgs.length > 0) { out.push(...extraArgs); } return out; } /** * Write crossref.yaml if needed */ function ensureCrossrefConfig(directory: string, config: BuildConfig): void { const crossrefPath = path.join(directory, 'crossref.yaml'); if (!fs.existsSync(crossrefPath) && hasPandocCrossref()) { fs.writeFileSync(crossrefPath, YAML.stringify(config.crossref), 'utf-8'); } } /** * Get install instructions for missing dependency */ function getInstallInstructions(tool: string): string { const instructions: Record = { pandoc: 'https://pandoc.org/installing.html', latex: 'https://www.latex-project.org/get/', }; return instructions[tool] || 'Check documentation'; } /** * Resolve the absolute directory where final outputs should land. * Honors config.outputDir; falls back to the project directory when null/empty. */ export function resolveOutputDir(directory: string, config: BuildConfig): string { const out = config.outputDir; if (!out) return directory; return path.isAbsolute(out) ? out : path.join(directory, out); } /** File extension (with leading dot) for each supported pandoc format. */ const FORMAT_EXTENSIONS: Record = { tex: '.tex', pdf: '.pdf', docx: '.docx', beamer: '.pdf', pptx: '.pptx', }; /** Get file extension for a format, defaulting to `.pdf`. */ export function getFormatExtension(format: string): string { return FORMAT_EXTENSIONS[format] ?? '.pdf'; } /** * Slugify a title for use as a default output filename. Lowercases, replaces * non-alphanumeric runs with `-`, and truncates at the last `-` boundary * at-or-before MAX_TITLE_FILENAME_LENGTH so words stay whole (the old blind * `.slice` cut mid-word). */ export function slugifyTitle(title: string): string { if (!title) return 'paper'; const slug = title.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-+|-+$/g, ''); if (!slug) return 'paper'; if (slug.length <= MAX_TITLE_FILENAME_LENGTH) return slug; const cut = slug.slice(0, MAX_TITLE_FILENAME_LENGTH); const lastDash = cut.lastIndexOf('-'); // Only truncate at a hyphen if it leaves a reasonable amount of content. // Otherwise hard-cut (handles degenerate titles with no spaces at all). if (lastDash >= MAX_TITLE_FILENAME_LENGTH / 2) { return slug.slice(0, lastDash); } return cut; } /** * Ensure `name` ends with `ext` (case-insensitive). If the user already supplied * the correct extension, return unchanged; if they supplied none or a different * one, append the format's canonical extension. * * Different-extension case (e.g. `output.docx` when building tex): we append * rather than replace, since stripping looks like an unsafe guess. The result * `output.docx.tex` is loud enough to flag the misconfiguration. */ function ensureExtension(name: string, ext: string): string { if (name.toLowerCase().endsWith(ext.toLowerCase())) return name; return name + ext; } /** * Resolve the final output path for a build. * * Priority: `options.outputPath` (internal force) > `cliOverride` (-o flag) > * `config.output[format]` > slugified title fallback. * * Relative paths from `cliOverride`/`config.output` resolve under outputDir; * absolute paths bypass outputDir. The fallback path always lives under * outputDir. * * @param suffix - Appended before the extension (e.g. "-changes", "-slides"). * Suppressed when user supplied an explicit name via CLI or * config — they pick their own suffix. */ export function resolveOutputPath( directory: string, config: BuildConfig, format: string, options: { cliOverride?: string; suffix?: string } = {} ): string { const { cliOverride, suffix = '' } = options; const ext = getFormatExtension(format); const explicit = cliOverride ?? config.output?.[format]; if (explicit) { const baseDir = path.isAbsolute(explicit) ? path.dirname(explicit) : resolveOutputDir(directory, config); const baseName = path.basename(explicit); const stem = baseName.replace(/\.[^./\\]+$/, ''); return path.join(baseDir, ensureExtension(`${stem}${suffix}`, ext)); } const slug = slugifyTitle(config.title); return path.join(resolveOutputDir(directory, config), `${slug}${suffix}${ext}`); } /** * Run pandoc build */ export async function runPandoc( inputPath: string, format: string, config: BuildConfig, options: BuildOptions = {} ): Promise { const directory = path.dirname(inputPath); // outputPath (internal force) wins over the resolver. For beamer, we keep // the `-slides` suffix on the slug fallback to distinguish from a regular // PDF build; when the user supplies an explicit name, they pick their own. const suffix = format === 'beamer' ? '-slides' : ''; const outputPath = options.outputPath ?? resolveOutputPath(directory, config, format, { cliOverride: options.output, suffix, }); if (!options.outputPath) { const outDir = path.dirname(outputPath); if (!fs.existsSync(outDir)) { fs.mkdirSync(outDir, { recursive: true }); } } // Ensure crossref.yaml exists ensureCrossrefConfig(directory, config); // Pandoc runs with cwd = directory, so pass the output path relative to it. const args = buildPandocArgs(format, config, path.relative(directory, outputPath) || path.basename(outputPath)); // Handle PPTX reference template and themes let pptxMediaDir: string | null = null; if (format === 'pptx') { const pptx = config.pptx || {}; // Determine media directory (default: pptx/media or slides/media) let mediaDir = pptx.media; if (!mediaDir) { if (fs.existsSync(path.join(directory, 'pptx', 'media'))) { mediaDir = path.join(directory, 'pptx', 'media'); } else if (fs.existsSync(path.join(directory, 'slides', 'media'))) { mediaDir = path.join(directory, 'slides', 'media'); } } else if (!path.isAbsolute(mediaDir)) { mediaDir = path.join(directory, mediaDir); } pptxMediaDir = mediaDir || null; // Determine reference doc: custom reference overrides theme let referenceDoc: string | null = null; if (pptx.reference && fs.existsSync(path.join(directory, pptx.reference))) { // Custom reference doc takes precedence referenceDoc = path.join(directory, pptx.reference); } else { // Use built-in theme (default: 'default') const themeName = pptx.theme || 'default'; const themePath = getThemePath(themeName); if (themePath && fs.existsSync(themePath)) { referenceDoc = themePath; } } if (referenceDoc) { args.push('--reference-doc', referenceDoc); } // Add color filter for PPTX (handles [text]{color=#RRGGBB} syntax). // fileURLToPath handles Windows paths with spaces — the old // `new URL(...).pathname` returned URL-encoded `%20` and fs.existsSync // silently failed. const colorFilterPath = path.join( path.dirname(fileURLToPath(import.meta.url)), 'pptx-color-filter.lua' ); if (fs.existsSync(colorFilterPath)) { args.push('--lua-filter', colorFilterPath); } } // Wire placeholder macros (built-in \tofill plus user-declared entries). // - docx/html: lua filter expands \name{X} to format-specific raw runs. // - pdf/tex/beamer: inject a \providecommand preamble so LaTeX renders it // directly. `\providecommand` is non-clobbering, so a user who already // has `\providecommand{\tofill}{...}` in their own header keeps theirs. // // Sidecar path is passed to the lua filter via DOCREV_MACROS_FILE in the // child env (not pandoc metadata) because pandoc walks RawInline/RawBlock // BEFORE Meta — by the time a Meta handler could read the path, the inline // expansion has already happened. const macroTempFiles: string[] = []; let macroEnvFile: string | null = null; const macros = mergeMacros((config as { macros?: unknown }).macros); if (macros.length > 0) { if (format === 'docx' || format === 'html' || format === 'html5' || format === 'html4') { const sidecarPath = writeMacrosSidecar(directory, macros); macroTempFiles.push(sidecarPath); macroEnvFile = sidecarPath; const filterPath = getMacroFilterPath(); if (fs.existsSync(filterPath)) { args.push('--lua-filter', filterPath); } } else if (format === 'pdf' || format === 'tex' || format === 'beamer') { const preamble = generateLatexPreamble(macros); const preamblePath = path.join(directory, '.macros.tex'); fs.writeFileSync(preamblePath, preamble, 'utf-8'); macroTempFiles.push(preamblePath); args.push('-H', path.basename(preamblePath)); } } // Add crossref metadata file if exists (skip for slides - they don't use crossref) if (format !== 'beamer' && format !== 'pptx') { const crossrefPath = path.join(directory, 'crossref.yaml'); if (fs.existsSync(crossrefPath) && hasPandocCrossref()) { // Use basename since we set cwd to directory args.push('--metadata-file', 'crossref.yaml'); } } // Passthrough args go last so they win against built-in defaults. args.push(...collectPandocPassthroughArgs(format, config, options.pandocArgs)); // Input file (use basename since we set cwd to directory) args.push(path.basename(inputPath)); if (options.verbose) { const quoted = args.map(a => /[\s"'$`]/.test(a) ? `"${a.replace(/"/g, '\\"')}"` : a).join(' '); console.error(`[pandoc ${format}] (cwd: ${directory})`); console.error(` pandoc ${quoted}`); } return new Promise((resolve) => { const pandocEnv: NodeJS.ProcessEnv = { ...process.env }; if (macroEnvFile) { pandocEnv.DOCREV_MACROS_FILE = macroEnvFile; } const pandoc: ChildProcess = spawn('pandoc', args, { cwd: directory, stdio: ['ignore', 'pipe', 'pipe'], env: pandocEnv, }); let stderr = ''; pandoc.stderr?.on('data', (data) => { stderr += data.toString(); }); const cleanupMacroTempFiles = (): void => { for (const tmp of macroTempFiles) { try { fs.unlinkSync(tmp); } catch { // ignore — best-effort cleanup } } }; pandoc.on('close', async (code) => { cleanupMacroTempFiles(); if (code === 0) { // For PPTX, post-process to add slide numbers, buildup colors, and logos if (format === 'pptx') { try { // Inject slide numbers into content slides only await injectSlideNumbers(outputPath); } catch (e) { // Slide number injection failed but output was created } try { // Apply colors (default text color, title color, buildup greying) const pptxConfig = config.pptx || {}; const colorsConfig = pptxConfig.colors || {}; const buildupConfig = pptxConfig.buildup || {}; // Merge colors and buildup config for applyBuildupColors const colorConfig = { default: colorsConfig.default, title: colorsConfig.title, grey: buildupConfig.grey, accent: buildupConfig.accent, enabled: buildupConfig.enabled }; await applyBuildupColors(outputPath, colorConfig); } catch (e) { // Color application failed but output was created } // Inject logos into cover slide (if media dir configured) if (pptxMediaDir) { try { await injectMediaIntoPptx(outputPath, pptxMediaDir); } catch (e) { // Logo injection failed but output was created } } } // Run user postprocess scripts const postResult = await runPostprocess(outputPath, format, config as unknown as Parameters[2], options); if (!postResult.success && options.verbose) { console.error(`Postprocess warning: ${postResult.error}`); } resolve({ outputPath, success: true }); } else { resolve({ outputPath, success: false, error: stderr || `Exit code ${code}` }); } }); pandoc.on('error', (err) => { cleanupMacroTempFiles(); resolve({ outputPath, success: false, error: err.message }); }); }); } /** * Full build pipeline */ export async function build( directory: string, formats: string[] = ['pdf', 'docx'], options: BuildOptions = {} ): Promise { const warnings: string[] = []; let forwardRefsResolved = 0; // Check pandoc if (!hasPandoc()) { const instruction = getInstallInstructions('pandoc'); throw new Error(`Pandoc not found. Install with: ${instruction}\nOr run: rev doctor`); } // Check LaTeX if PDF is requested if ((formats.includes('pdf') || formats.includes('all')) && !hasLatex()) { warnings.push(`LaTeX not found - PDF generation may fail. Install with: ${getInstallInstructions('latex')}`); } // Check pandoc-crossref if (!hasPandocCrossref()) { warnings.push('pandoc-crossref not found - figure/table numbering will not work'); } // Load config (use passed config if provided, otherwise load from file) const config = options.config || loadConfig(directory); // Combine sections → paper.md const buildOptions: CombineOptions = { ...options }; const paperPath = combineSections(directory, config, buildOptions); forwardRefsResolved = buildOptions._forwardRefsResolved || 0; const refsAutoInjected = buildOptions._refsAutoInjected || false; // Expand 'all' to all formats if (formats.includes('all')) { formats = ['pdf', 'docx', 'tex']; } // Build and save image registry when DOCX is being built // This allows import to restore proper image syntax from Word documents if (formats.includes('docx')) { const paperContent = fs.readFileSync(paperPath, 'utf-8'); const crossrefReg = buildRegistry(directory, config.sections); const imageReg = buildImageRegistry(paperContent, crossrefReg as any); if ((imageReg as any).figures?.length > 0) { writeImageRegistry(directory, imageReg); } // Warn about raw LaTeX figure blocks that won't render in docx (pandoc // drops them silently). With auto-translate on (default), this surfaces // only the exotic leftovers; with it off, every block. const rawFigWarning = collectRawLatexFigureWarning(directory, config); if (rawFigWarning) warnings.push(rawFigWarning); } const results: BuildResult[] = []; for (const format of formats) { // Prepare format-specific version const preparedPath = prepareForFormat(paperPath, format, config, options); // Run pandoc const result = await runPandoc(preparedPath, format, config, options); results.push({ format, ...result }); // Clean up temp file try { fs.unlinkSync(preparedPath); } catch { // Ignore cleanup errors } } return { results, paperPath, warnings, forwardRefsResolved, refsAutoInjected }; } /** * Get build status summary */ export function formatBuildResults(results: BuildResult[]): string { const lines: string[] = []; for (const r of results) { if (r.success) { lines.push(` ${r.format.toUpperCase()}: ${path.basename(r.outputPath!)}`); } else { lines.push(` ${r.format.toUpperCase()}: FAILED - ${r.error}`); } } return lines.join('\n'); }