Files
the-quantum-blueprint/generate_original_markdown.py
T

143 lines
4.7 KiB
Python

from html.parser import HTMLParser
import json
class StandardMarkdownParser(HTMLParser):
def __init__(self, image_map):
super().__init__()
self.markdown = []
self.in_h1 = False
self.in_h3 = False
self.in_h4 = False
self.in_p = False
self.in_em = False
self.in_strong = False
self.in_blockquote = False
self.in_ul = False
self.in_ol = False
self.in_li = False
self.in_figure = False
self.in_figcaption = False
self.current_img_src = ""
self.image_map = image_map
def handle_starttag(self, tag, attrs):
if tag == 'h3':
self.in_h3 = True
self.markdown.append('\n### ')
elif tag == 'h4':
self.in_h4 = True
self.markdown.append('\n#### ')
elif tag == 'p':
self.in_p = True
if not self.in_blockquote:
self.markdown.append('\n\n')
elif tag == 'em':
self.in_em = True
self.markdown.append('*')
elif tag == 'strong':
self.in_strong = True
self.markdown.append('**')
elif tag == 'blockquote':
self.in_blockquote = True
self.markdown.append('\n\n> ')
elif tag == 'ul':
self.in_ul = True
self.markdown.append('\n')
elif tag == 'ol':
self.in_ol = True
self.markdown.append('\n')
elif tag == 'li':
self.in_li = True
if self.in_ul:
self.markdown.append('\n- ')
elif self.in_ol:
self.markdown.append('\n1. ')
elif tag == 'figure':
self.in_figure = True
self.current_img_src = ""
elif tag == 'img':
for attr in attrs:
if attr[0] == 'src':
self.current_img_src = attr[1]
# Map to local
for url, local in self.image_map:
if url in self.current_img_src:
self.current_img_src = f"docs/original_assets/{local}"
break
elif tag == 'figcaption':
self.in_figcaption = True
def handle_endtag(self, tag):
if tag == 'h3':
self.in_h3 = False
elif tag == 'h4':
self.in_h4 = False
elif tag == 'p':
self.in_p = False
if self.in_blockquote:
self.markdown.append('\n> ')
elif tag == 'em':
self.in_em = False
self.markdown.append('*')
elif tag == 'strong':
self.in_strong = False
self.markdown.append('**')
elif tag == 'blockquote':
self.in_blockquote = False
elif tag == 'ul':
self.in_ul = False
elif tag == 'ol':
self.in_ol = False
elif tag == 'li':
self.in_li = False
elif tag == 'figure':
self.in_figure = False
elif tag == 'figcaption':
self.in_figcaption = False
# When figcaption ends, we assemble the standard markdown image
alt_text = self.markdown.pop() # Pop the text we just gathered
# Construct standard image: ![alt](path)
self.markdown.append(f'\n\n![{alt_text}]({self.current_img_src})\n\n')
def handle_data(self, data):
text = data.replace('\n', ' ')
if self.in_figcaption:
self.markdown.append(text)
elif self.in_h3 or self.in_h4 or self.in_p or self.in_li or self.in_blockquote:
self.markdown.append(text)
def main():
html_file = "/home/antigravity/medium-export/posts/2025-01-12_The-Quantum-Blueprint--How-Information-Shapes-Reality-and-Consciousness-41cd62a88d61.html"
image_map_file = "/home/antigravity/the-quantum-blueprint/docs/image_map.json"
output_md_file = "/home/antigravity/the-quantum-blueprint/original_export.md"
with open(html_file, "r") as f:
html_content = f.read()
with open(image_map_file, "r") as f:
image_map = json.load(f)
parser = StandardMarkdownParser(image_map)
parser.feed(html_content)
md_content = "".join(parser.markdown).strip()
header = """# The Quantum Blueprint: How Information Shapes Reality and Consciousness (Original Export)
*This is a preserved rendering of the original Medium export.*
* [View the Transmuted Sovereign Canon Version](quantum_blueprint.md)
* [View the GitHub Pages HTML preservation](https://mrhavens.github.io/the-quantum-blueprint/)
---
"""
with open(output_md_file, "w") as f:
f.write(header + md_content)
print(f"Generated {output_md_file}")
if __name__ == "__main__":
main()