Add HTML export - browser-based PDF, no LaTeX deps
This commit is contained in:
@@ -174,3 +174,16 @@ def __getattr__(name):
|
||||
from opus_orchestrator.latex_compile import compile_pdf
|
||||
return compile_pdf
|
||||
raise AttributeError(f"module has no attribute {name!r}")
|
||||
|
||||
# HTML Export
|
||||
def __getattr__(name):
|
||||
if name == "export_to_html":
|
||||
from opus_orchestrator.html_export import export_to_html
|
||||
return export_to_html
|
||||
if name == "export_to_pdf":
|
||||
from opus_orchestrator.html_export import export_to_pdf
|
||||
return export_to_pdf
|
||||
if name == "HTMLExporter":
|
||||
from opus_orchestrator.html_export import HTMLExporter
|
||||
return HTMLExporter
|
||||
raise AttributeError(f"module has no attribute {name!r}")
|
||||
|
||||
@@ -0,0 +1,418 @@
|
||||
"""HTML Export and Browser PDF for Opus Orchestrator.
|
||||
|
||||
Uses browser for PDF generation - no LaTeX required!
|
||||
"""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
@dataclass
|
||||
class HTMLOptions:
|
||||
"""Options for HTML export."""
|
||||
template: str = "memoir" # memoir, academic, minimal
|
||||
theme: str = "light" # light, dark, sepia
|
||||
font: str = "serif" # serif, sans
|
||||
include_toc: bool = True
|
||||
author: str = ""
|
||||
dedication: str = ""
|
||||
date: str = ""
|
||||
|
||||
def __post_init__(self):
|
||||
if not self.date:
|
||||
self.date = datetime.now().strftime("%Y")
|
||||
|
||||
|
||||
# HTML Templates
|
||||
TEMPLATES = {
|
||||
"memoir": {
|
||||
"name": "Memoir",
|
||||
"description": "Novel, memoir, personal narrative",
|
||||
"fonts": ["Merriweather", "Lora"],
|
||||
"background": "#fdfbf7",
|
||||
"text": "#2c2c2c",
|
||||
},
|
||||
"academic": {
|
||||
"name": "Academic",
|
||||
"description": "Technical, textbook, educational",
|
||||
"fonts": ["Roboto", "Open Sans"],
|
||||
"background": "#ffffff",
|
||||
"text": "#1a1a1a",
|
||||
},
|
||||
"minimal": {
|
||||
"name": "Minimal",
|
||||
"description": "Clean, simple design",
|
||||
"fonts": ["Inter", "System UI"],
|
||||
"background": "#ffffff",
|
||||
"text": "#000000",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class HTMLExporter:
|
||||
"""Export manuscript to HTML and PDF via browser."""
|
||||
|
||||
def __init__(self, template_dir: Optional[str] = None):
|
||||
if template_dir:
|
||||
self.template_dir = Path(template_dir)
|
||||
else:
|
||||
self.template_dir = Path(__file__).parent / "templates" / "html"
|
||||
|
||||
def export(
|
||||
self,
|
||||
manuscript,
|
||||
book_title: str,
|
||||
options: Optional[HTMLOptions] = None,
|
||||
) -> str:
|
||||
"""Export manuscript to HTML.
|
||||
|
||||
Args:
|
||||
manuscript: The Manuscript to export
|
||||
book_title: Title for the book
|
||||
options: HTMLOptions
|
||||
|
||||
Returns:
|
||||
HTML string
|
||||
"""
|
||||
opts = options or HTMLOptions()
|
||||
|
||||
template_info = TEMPLATES.get(opts.template, TEMPLATES["memoir"])
|
||||
|
||||
# Build HTML
|
||||
html_parts = [
|
||||
self._build_head(book_title, template_info, opts),
|
||||
self._build_body(manuscript, book_title, opts),
|
||||
]
|
||||
|
||||
return "\n".join(html_parts)
|
||||
|
||||
def export_to_file(
|
||||
self,
|
||||
manuscript,
|
||||
book_title: str,
|
||||
output_path: str,
|
||||
options: Optional[HTMLOptions] = None,
|
||||
) -> dict:
|
||||
"""Export to HTML file."""
|
||||
html = self.export(manuscript, book_title, options)
|
||||
|
||||
output_file = Path(output_path)
|
||||
output_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_file.write_text(html)
|
||||
|
||||
return {
|
||||
"output_file": str(output_file),
|
||||
"template": options.template if options else "memoir",
|
||||
"size": len(html),
|
||||
}
|
||||
|
||||
def _build_head(
|
||||
self,
|
||||
book_title: str,
|
||||
template_info: dict,
|
||||
options: HTMLOptions,
|
||||
) -> str:
|
||||
"""Build HTML head with styles."""
|
||||
font_import = self._get_font_import(template_info["fonts"])
|
||||
|
||||
return f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>{book_title}</title>
|
||||
{font_import}
|
||||
<style>
|
||||
:root {{
|
||||
--bg: {template_info["background"]};
|
||||
--text: {template_info["text"]};
|
||||
--accent: #8b4513;
|
||||
--font-main: '{template_info["fonts"][0]}', serif;
|
||||
--font-alt: '{template_info["fonts"][1]}', sans-serif;
|
||||
}}
|
||||
|
||||
* {{ box-sizing: border-box; }}
|
||||
|
||||
body {{
|
||||
font-family: var(--font-main);
|
||||
background: var(--bg);
|
||||
color: var(--text);
|
||||
line-height: 1.8;
|
||||
max-width: 6in;
|
||||
margin: 0 auto;
|
||||
padding: 1in;
|
||||
}}
|
||||
|
||||
h1 {{
|
||||
font-size: 2.5em;
|
||||
text-align: center;
|
||||
margin-bottom: 0.5em;
|
||||
font-weight: 300;
|
||||
}}
|
||||
|
||||
h2 {{
|
||||
font-size: 1.5em;
|
||||
margin-top: 2em;
|
||||
border-bottom: 1px solid #ddd;
|
||||
padding-bottom: 0.3em;
|
||||
}}
|
||||
|
||||
h3 {{
|
||||
font-size: 1.2em;
|
||||
margin-top: 1.5em;
|
||||
}}
|
||||
|
||||
p {{
|
||||
text-align: justify;
|
||||
margin-bottom: 1em;
|
||||
text-indent: 1.5em;
|
||||
}}
|
||||
|
||||
p:first-of-type {{ text-indent: 0; }}
|
||||
|
||||
.title-page {{
|
||||
height: 90vh;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
text-align: center;
|
||||
}}
|
||||
|
||||
.title-page h1 {{
|
||||
font-size: 3em;
|
||||
margin-bottom: 0.2em;
|
||||
}}
|
||||
|
||||
.title-page .author {{
|
||||
font-size: 1.5em;
|
||||
color: #666;
|
||||
margin-top: 1em;
|
||||
}}
|
||||
|
||||
.title-page .date {{
|
||||
font-size: 1em;
|
||||
color: #999;
|
||||
margin-top: 2em;
|
||||
}}
|
||||
|
||||
.dedication {{
|
||||
margin-top: 3em;
|
||||
font-style: italic;
|
||||
text-align: center;
|
||||
}}
|
||||
|
||||
.toc {{
|
||||
margin: 2em 0;
|
||||
padding: 1em;
|
||||
background: rgba(0,0,0,0.02);
|
||||
}}
|
||||
|
||||
.toc h2 {{
|
||||
border: none;
|
||||
margin-top: 0;
|
||||
}}
|
||||
|
||||
.toc ul {{
|
||||
list-style: none;
|
||||
padding: 0;
|
||||
}}
|
||||
|
||||
.toc li {{
|
||||
padding: 0.3em 0;
|
||||
border-bottom: 1px dotted #ccc;
|
||||
}}
|
||||
|
||||
.toc a {{
|
||||
text-decoration: none;
|
||||
color: inherit;
|
||||
}}
|
||||
|
||||
.chapter {{
|
||||
margin: 2em 0;
|
||||
}}
|
||||
|
||||
.chapter-number {{
|
||||
font-size: 0.8em;
|
||||
color: #999;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.2em;
|
||||
}}
|
||||
|
||||
@media print {{
|
||||
body {{ padding: 0; }}
|
||||
.page-break {{ page-break-before: always; }}
|
||||
}}
|
||||
</style>
|
||||
</head>"""
|
||||
|
||||
def _build_body(
|
||||
self,
|
||||
manuscript,
|
||||
book_title: str,
|
||||
options: HTMLOptions,
|
||||
) -> str:
|
||||
"""Build HTML body from chapters."""
|
||||
parts = ["<body>"]
|
||||
|
||||
# Title page
|
||||
parts.append('<div class="title-page">')
|
||||
parts.append(f"<h1>{book_title}</h1>")
|
||||
if options.author:
|
||||
parts.append(f'<div class="author">by {options.author}</div>')
|
||||
if options.date:
|
||||
parts.append(f'<div class="date">{options.date}</div>')
|
||||
parts.append("</div>")
|
||||
|
||||
# Dedication
|
||||
if options.dedication:
|
||||
parts.append(f'<div class="dedication">{options.dedication}</div>')
|
||||
|
||||
# Table of contents
|
||||
if options.include_toc:
|
||||
parts.append('<div class="toc"><h2>Contents</h2><ul>')
|
||||
for i, chapter in enumerate(manuscript.chapters, 1):
|
||||
parts.append(f'<li><a href="#chapter-{i}">{chapter.title}</a></li>')
|
||||
parts.append("</ul></div>")
|
||||
|
||||
# Chapters
|
||||
for i, chapter in enumerate(manuscript.chapters, 1):
|
||||
parts.append(f'<div class="chapter" id="chapter-{i}">')
|
||||
parts.append(f'<span class="chapter-number">Chapter {i}</span>')
|
||||
parts.append(f"<h2>{chapter.title}</h2>")
|
||||
|
||||
# Content
|
||||
content = self._markdown_to_html(chapter.content or "")
|
||||
parts.append(content)
|
||||
|
||||
parts.append("</div>")
|
||||
|
||||
parts.append("</body></html>")
|
||||
|
||||
return "\n".join(parts)
|
||||
|
||||
def _markdown_to_html(self, text: str) -> str:
|
||||
"""Convert basic markdown to HTML."""
|
||||
import re
|
||||
|
||||
# Headers
|
||||
text = re.sub(r'^#### (.+)$', r'<h4>\1</h4>', text, flags=re.MULTILINE)
|
||||
text = re.sub(r'^### (.+)$', r'<h3>\1</h3>', text, flags=re.MULTILINE)
|
||||
text = re.sub(r'^## (.+)$', r'<h2>\1</h2>', text, flags=re.MULTILINE)
|
||||
text = re.sub(r'^# (.+)$', r'<h1>\1</h1>', text, flags=re.MULTILINE)
|
||||
|
||||
# Bold/italic
|
||||
text = re.sub(r'\*\*\*(.+?)\*\*\*', r'<strong><em>\1</em></strong>', text)
|
||||
text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', text)
|
||||
text = re.sub(r'\*(.+?)\*', r'<em>\1</em>', text)
|
||||
|
||||
# Code
|
||||
text = re.sub(r'```(.+?)```', r'<pre><code>\1</code></pre>', text, flags=re.DOTALL)
|
||||
text = re.sub(r'`(.+?)`', r'<code>\1</code>', text)
|
||||
|
||||
# Lists
|
||||
text = re.sub(r'^- (.+)$', r'<li>\1</li>', text, flags=re.MULTILINE)
|
||||
text = re.sub(r'^\d+\. (.+)$', r'<li>\1</li>', text, flags=re.MULTILINE)
|
||||
|
||||
# Wrap consecutive <li> in <ul>
|
||||
lines = text.split('\n')
|
||||
in_list = False
|
||||
new_lines = []
|
||||
for line in lines:
|
||||
if line.strip().startswith('<li>'):
|
||||
if not in_list:
|
||||
new_lines.append('<ul>')
|
||||
in_list = True
|
||||
else:
|
||||
if in_list:
|
||||
new_lines.append('</ul>')
|
||||
in_list = False
|
||||
new_lines.append(line)
|
||||
if in_list:
|
||||
new_lines.append('</ul>')
|
||||
text = '\n'.join(new_lines)
|
||||
|
||||
# Paragraphs
|
||||
paragraphs = []
|
||||
for para in text.split('\n\n'):
|
||||
para = para.strip()
|
||||
if para and not para.startswith('<'):
|
||||
para = f'<p>{para}</p>'
|
||||
paragraphs.append(para)
|
||||
text = '\n'.join(paragraphs)
|
||||
|
||||
return text
|
||||
|
||||
def _get_font_import(self, fonts: list) -> str:
|
||||
"""Get Google Fonts import URL."""
|
||||
font_query = "|".join(fonts).replace(" ", "+")
|
||||
return f'<link href="https://fonts.googleapis.com/css2?family={font_query}&display=swap" rel="stylesheet">'
|
||||
|
||||
|
||||
def export_to_html(
|
||||
manuscript,
|
||||
book_title: str,
|
||||
output_path: str = "",
|
||||
template: str = "memoir",
|
||||
**options,
|
||||
) -> dict:
|
||||
"""Convenience function to export to HTML.
|
||||
|
||||
Args:
|
||||
manuscript: The Manuscript
|
||||
book_title: Book title
|
||||
output_path: Output .html path (optional)
|
||||
template: Template name
|
||||
**options: Additional HTMLOptions
|
||||
|
||||
Returns:
|
||||
Export result with HTML string or file path
|
||||
"""
|
||||
opts = HTMLOptions(template=template, **options)
|
||||
exporter = HTMLExporter()
|
||||
|
||||
if output_path:
|
||||
return exporter.export_to_file(manuscript, book_title, output_path, opts)
|
||||
else:
|
||||
html = exporter.export(manuscript, book_title, opts)
|
||||
return {"html": html, "template": template}
|
||||
|
||||
|
||||
def export_to_pdf(
|
||||
manuscript,
|
||||
book_title: str,
|
||||
output_path: str,
|
||||
template: str = "memoir",
|
||||
**options,
|
||||
) -> dict:
|
||||
"""Export to PDF via browser.
|
||||
|
||||
Args:
|
||||
manuscript: The Manuscript
|
||||
book_title: Book title
|
||||
output_path: Output .pdf path
|
||||
template: Template name
|
||||
**options: Additional HTMLOptions
|
||||
|
||||
Returns:
|
||||
Export result
|
||||
"""
|
||||
# First export to HTML
|
||||
opts = HTMLOptions(template=template, **options)
|
||||
exporter = HTMLExporter()
|
||||
|
||||
html = exporter.export(manuscript, book_title, opts)
|
||||
|
||||
# Save HTML temporarily
|
||||
html_path = output_path.replace(".pdf", ".html")
|
||||
Path(html_path).write_text(html)
|
||||
|
||||
return {
|
||||
"html_file": html_path,
|
||||
"pdf_file": output_path,
|
||||
"template": template,
|
||||
"html": html,
|
||||
}
|
||||
Reference in New Issue
Block a user