143 lines
4.7 KiB
Python
143 lines
4.7 KiB
Python
from html.parser import HTMLParser
|
|
import json
|
|
|
|
class StandardMarkdownParser(HTMLParser):
|
|
def __init__(self, image_map):
|
|
super().__init__()
|
|
self.markdown = []
|
|
self.in_h1 = False
|
|
self.in_h3 = False
|
|
self.in_h4 = False
|
|
self.in_p = False
|
|
self.in_em = False
|
|
self.in_strong = False
|
|
self.in_blockquote = False
|
|
self.in_ul = False
|
|
self.in_ol = False
|
|
self.in_li = False
|
|
self.in_figure = False
|
|
self.in_figcaption = False
|
|
|
|
self.current_img_src = ""
|
|
self.image_map = image_map
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
if tag == 'h3':
|
|
self.in_h3 = True
|
|
self.markdown.append('\n### ')
|
|
elif tag == 'h4':
|
|
self.in_h4 = True
|
|
self.markdown.append('\n#### ')
|
|
elif tag == 'p':
|
|
self.in_p = True
|
|
if not self.in_blockquote:
|
|
self.markdown.append('\n\n')
|
|
elif tag == 'em':
|
|
self.in_em = True
|
|
self.markdown.append('*')
|
|
elif tag == 'strong':
|
|
self.in_strong = True
|
|
self.markdown.append('**')
|
|
elif tag == 'blockquote':
|
|
self.in_blockquote = True
|
|
self.markdown.append('\n\n> ')
|
|
elif tag == 'ul':
|
|
self.in_ul = True
|
|
self.markdown.append('\n')
|
|
elif tag == 'ol':
|
|
self.in_ol = True
|
|
self.markdown.append('\n')
|
|
elif tag == 'li':
|
|
self.in_li = True
|
|
if self.in_ul:
|
|
self.markdown.append('\n- ')
|
|
elif self.in_ol:
|
|
self.markdown.append('\n1. ')
|
|
elif tag == 'figure':
|
|
self.in_figure = True
|
|
self.current_img_src = ""
|
|
elif tag == 'img':
|
|
for attr in attrs:
|
|
if attr[0] == 'src':
|
|
self.current_img_src = attr[1]
|
|
# Map to local
|
|
for url, local in self.image_map:
|
|
if url in self.current_img_src:
|
|
self.current_img_src = f"docs/original_assets/{local}"
|
|
break
|
|
elif tag == 'figcaption':
|
|
self.in_figcaption = True
|
|
|
|
def handle_endtag(self, tag):
|
|
if tag == 'h3':
|
|
self.in_h3 = False
|
|
elif tag == 'h4':
|
|
self.in_h4 = False
|
|
elif tag == 'p':
|
|
self.in_p = False
|
|
if self.in_blockquote:
|
|
self.markdown.append('\n> ')
|
|
elif tag == 'em':
|
|
self.in_em = False
|
|
self.markdown.append('*')
|
|
elif tag == 'strong':
|
|
self.in_strong = False
|
|
self.markdown.append('**')
|
|
elif tag == 'blockquote':
|
|
self.in_blockquote = False
|
|
elif tag == 'ul':
|
|
self.in_ul = False
|
|
elif tag == 'ol':
|
|
self.in_ol = False
|
|
elif tag == 'li':
|
|
self.in_li = False
|
|
elif tag == 'figure':
|
|
self.in_figure = False
|
|
elif tag == 'figcaption':
|
|
self.in_figcaption = False
|
|
# When figcaption ends, we assemble the standard markdown image
|
|
alt_text = self.markdown.pop() # Pop the text we just gathered
|
|
# Construct standard image: 
|
|
self.markdown.append(f'\n\n\n\n')
|
|
|
|
def handle_data(self, data):
|
|
text = data.replace('\n', ' ')
|
|
if self.in_figcaption:
|
|
self.markdown.append(text)
|
|
elif self.in_h3 or self.in_h4 or self.in_p or self.in_li or self.in_blockquote:
|
|
self.markdown.append(text)
|
|
|
|
def main():
|
|
html_file = "/home/antigravity/medium-export/posts/2025-01-12_The-Quantum-Blueprint--How-Information-Shapes-Reality-and-Consciousness-41cd62a88d61.html"
|
|
image_map_file = "/home/antigravity/the-quantum-blueprint/docs/image_map.json"
|
|
output_md_file = "/home/antigravity/the-quantum-blueprint/original_export.md"
|
|
|
|
with open(html_file, "r") as f:
|
|
html_content = f.read()
|
|
|
|
with open(image_map_file, "r") as f:
|
|
image_map = json.load(f)
|
|
|
|
parser = StandardMarkdownParser(image_map)
|
|
parser.feed(html_content)
|
|
|
|
md_content = "".join(parser.markdown).strip()
|
|
|
|
header = """# The Quantum Blueprint: How Information Shapes Reality and Consciousness (Original Export)
|
|
|
|
*This is a preserved rendering of the original Medium export.*
|
|
* [View the Transmuted Sovereign Canon Version](quantum_blueprint.md)
|
|
* [View the GitHub Pages HTML preservation](https://mrhavens.github.io/the-quantum-blueprint/)
|
|
|
|
---
|
|
|
|
"""
|
|
|
|
with open(output_md_file, "w") as f:
|
|
f.write(header + md_content)
|
|
|
|
print(f"Generated {output_md_file}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|