#!/usr/bin/env python3 """ Custom PDF conversion script that handles manual page break markers. Converts markdown to HTML, processes manual page breaks, then to PDF. """ import re import subprocess import sys import os from pathlib import Path def process_manual_breaks(html_content): """Convert manual page break markers to proper CSS page breaks.""" # Pattern to match our manual page break markers page_break_pattern = r'[\s\n]*

PAGE BREAK

[\s\n]*' # Replace with proper CSS page break page_break_replacement = '

' processed_html = re.sub(page_break_pattern, page_break_replacement, html_content, flags=re.IGNORECASE | re.MULTILINE) return processed_html def add_pdf_styles(html_content): """Add comprehensive PDF-specific CSS styles.""" pdf_styles = """ """ # Insert styles after tag or create head if it doesn't exist if '' in html_content: html_content = html_content.replace('', f'{pdf_styles}') else: # If no head tag, add it if '' in html_content: html_content = html_content.replace('', f'{pdf_styles}') else: html_content = f'{pdf_styles}{html_content}' return html_content def convert_md_to_pdf(input_file, output_file=None): """Convert markdown to PDF with manual page break processing.""" input_path = Path(input_file) if not input_path.exists(): print(f"Error: Input file {input_file} not found") return False if output_file is None: output_file = input_path.with_suffix('.pdf') output_path = Path(output_file) print(f"Converting {input_file} to {output_file}") try: # Step 1: Convert markdown to HTML using pandoc print("Step 1: Converting markdown to HTML...") html_result = subprocess.run([ 'pandoc', str(input_path), '-f', 'markdown', '-t', 'html', '--standalone', '--mathjax' ], capture_output=True, text=True, encoding='utf-8', check=True) html_content = html_result.stdout if not html_content: print("Error: No HTML content generated from pandoc") return False # Step 2: Process manual page breaks print("Step 2: Processing manual page breaks...") html_content = process_manual_breaks(html_content) # Step 3: Add PDF-specific styles print("Step 3: Adding PDF styles...") html_content = add_pdf_styles(html_content) # Step 4: Save processed HTML to temporary file temp_html = input_path.with_suffix('.temp.html') with open(temp_html, 'w', encoding='utf-8') as f: f.write(html_content) # Step 5: Convert HTML to PDF using wkhtmltopdf print("Step 4: Converting HTML to PDF...") subprocess.run([ 'wkhtmltopdf', '--page-size', 'A4', '--margin-top', '0.75in', '--margin-right', '0.75in', '--margin-bottom', '0.75in', '--margin-left', '0.75in', '--enable-local-file-access', '--print-media-type', '--encoding', 'utf-8', str(temp_html), str(output_path) ], check=True, encoding='utf-8') # Clean up temporary file temp_html.unlink() print(f"Successfully converted to {output_path}") return True except subprocess.CalledProcessError as e: print(f"Error during conversion: {e}") if hasattr(e, 'stderr') and e.stderr: print(f"Error details: {e.stderr}") return False except Exception as e: print(f"Unexpected error: {e}") return False def main(): if len(sys.argv) < 2: print("Usage: python convert_with_manual_breaks.py [output.pdf]") sys.exit(1) input_file = sys.argv[1] output_file = sys.argv[2] if len(sys.argv) > 2 else None success = convert_md_to_pdf(input_file, output_file) sys.exit(0 if success else 1) if __name__ == "__main__": main()