{Path(md_file).stem}

#!/usr/bin/env python3 """ Batch Markdown to PDF Converter with Mermaid Support Processes all .md files in the current directory that don't have corresponding PDFs """ import sys import os import asyncio import subprocess from pathlib import Path import glob def install_dependencies(): """Install required dependencies""" try: import playwright import markdown return True except ImportError: print("Installing required packages...") try: subprocess.check_call([sys.executable, "-m", "pip", "install", "playwright", "markdown"]) subprocess.check_call([sys.executable, "-m", "playwright", "install", "chromium"]) return True except Exception as e: print(f"Failed to install dependencies: {e}") return False def convert_markdown_to_html(md_file, html_file): """Convert Markdown to HTML with Mermaid support""" try: import markdown # Read markdown content with open(md_file, 'r', encoding='utf-8') as f: md_content = f.read() # Basic markdown conversion md = markdown.Markdown(extensions=['extra', 'codehilite', 'toc']) html_content = md.convert(md_content) # Create full HTML document with enhanced Mermaid support full_html = f""" {Path(md_file).stem} {html_content} """ # Write HTML file with open(html_file, 'w', encoding='utf-8') as f: f.write(full_html) print(f" ✅ Converted {md_file} to HTML") return True except Exception as e: print(f" ❌ Error converting {md_file}: {e}") return False async def convert_html_to_pdf(html_file, pdf_file): """Convert HTML to PDF using Playwright""" try: from playwright.async_api import async_playwright async with async_playwright() as p: browser = await p.chromium.launch() page = await browser.new_page() # Load HTML file file_path = Path(html_file).resolve() await page.goto(f"file://{file_path}") # Wait for initial content load await page.wait_for_timeout(2000) # Check for and wait for Mermaid diagrams try: mermaid_count = await page.evaluate("document.querySelectorAll('.mermaid').length") if mermaid_count > 0: print(f" 🔍 Found {mermaid_count} Mermaid diagram(s), waiting for rendering...") # Wait for SVG elements to appear (Mermaid renders as SVG) await page.wait_for_selector('svg[id^="mermaid"], .mermaid svg', timeout=15000) # Additional wait to ensure all diagrams are fully rendered await page.wait_for_timeout(2000) print(f" ✅ All Mermaid diagrams rendered successfully") else: print(f" ℹ️ No Mermaid diagrams detected") except Exception as e: print(f" ⚠️ Mermaid rendering issue: {e}") # Generate PDF with page break support await page.pdf( path=pdf_file, format='A4', margin={ 'top': '0.75in', 'right': '0.75in', 'bottom': '0.75in', 'left': '0.75in' }, print_background=True, prefer_css_page_size=True, display_header_footer=False ) await browser.close() print(f" ✅ Generated PDF: {pdf_file}") return True except Exception as e: print(f" ❌ Error generating PDF: {e}") return False def find_files_to_convert(): """Find all .md files that don't have corresponding PDFs""" md_files = glob.glob("*.md") files_to_convert = [] for md_file in md_files: base_name = Path(md_file).stem pdf_file = f"{base_name}.pdf" if not os.path.exists(pdf_file): files_to_convert.append(base_name) return files_to_convert def main(): """Main batch conversion process""" if not install_dependencies(): print("❌ Could not install required dependencies") return False files_to_convert = find_files_to_convert() if not files_to_convert: print("✅ All Markdown files already have corresponding PDFs!") return True print(f"🔄 Batch converting {len(files_to_convert)} Markdown files to PDF with Mermaid diagrams...") print("=" * 70) print(f"Files to convert: {', '.join(files_to_convert)}") print("=" * 70) success_count = 0 for file_base in files_to_convert: md_file = f"{file_base}.md" html_file = f"{file_base}.html" pdf_file = f"{file_base}.pdf" print(f"\n📄 Processing {md_file}...") # Convert MD to HTML if convert_markdown_to_html(md_file, html_file): # Convert HTML to PDF if asyncio.run(convert_html_to_pdf(html_file, pdf_file)): success_count += 1 size_kb = os.path.getsize(pdf_file) / 1024 print(f" 📊 PDF size: {size_kb:.1f} KB") print("\n" + "=" * 70) print(f"🎯 Batch conversion complete!") print(f"✅ Successfully converted {success_count}/{len(files_to_convert)} files") print(f"📋 All Markdown files now have PDF versions with:") print(f" • Properly rendered Mermaid diagrams") print(f" • Professional A4 formatting (0.75\" margins)") print(f" • Print-optimized styling") print(f" • Page break optimization") return success_count > 0 if __name__ == "__main__": main()