from html.parser import HTMLParser import json class StandardMarkdownParser(HTMLParser): def __init__(self, image_map): super().__init__() self.markdown = [] self.in_h1 = False self.in_h3 = False self.in_h4 = False self.in_p = False self.in_em = False self.in_strong = False self.in_blockquote = False self.in_ul = False self.in_ol = False self.in_li = False self.in_figure = False self.in_figcaption = False self.current_img_src = "" self.image_map = image_map def handle_starttag(self, tag, attrs): if tag == 'h3': self.in_h3 = True self.markdown.append('\n### ') elif tag == 'h4': self.in_h4 = True self.markdown.append('\n#### ') elif tag == 'p': self.in_p = True if not self.in_blockquote: self.markdown.append('\n\n') elif tag == 'em': self.in_em = True self.markdown.append('*') elif tag == 'strong': self.in_strong = True self.markdown.append('**') elif tag == 'blockquote': self.in_blockquote = True self.markdown.append('\n\n> ') elif tag == 'ul': self.in_ul = True self.markdown.append('\n') elif tag == 'ol': self.in_ol = True self.markdown.append('\n') elif tag == 'li': self.in_li = True if self.in_ul: self.markdown.append('\n- ') elif self.in_ol: self.markdown.append('\n1. ') elif tag == 'figure': self.in_figure = True self.current_img_src = "" elif tag == 'img': for attr in attrs: if attr[0] == 'src': self.current_img_src = attr[1] # Map to local for url, local in self.image_map: if url in self.current_img_src: self.current_img_src = f"docs/original_assets/{local}" break elif tag == 'figcaption': self.in_figcaption = True def handle_endtag(self, tag): if tag == 'h3': self.in_h3 = False elif tag == 'h4': self.in_h4 = False elif tag == 'p': self.in_p = False if self.in_blockquote: self.markdown.append('\n> ') elif tag == 'em': self.in_em = False self.markdown.append('*') elif tag == 'strong': self.in_strong = False self.markdown.append('**') elif tag == 'blockquote': self.in_blockquote = False elif tag == 'ul': self.in_ul = False elif tag == 'ol': self.in_ol = False elif tag == 'li': self.in_li = False elif tag == 'figure': self.in_figure = False elif tag == 'figcaption': self.in_figcaption = False # When figcaption ends, we assemble the standard markdown image alt_text = self.markdown.pop() # Pop the text we just gathered # Construct standard image: ![alt](path) self.markdown.append(f'\n\n![{alt_text}]({self.current_img_src})\n\n') def handle_data(self, data): text = data.replace('\n', ' ') if self.in_figcaption: self.markdown.append(text) elif self.in_h3 or self.in_h4 or self.in_p or self.in_li or self.in_blockquote: self.markdown.append(text) def main(): html_file = "/home/antigravity/medium-export/posts/2025-01-12_The-Quantum-Blueprint--How-Information-Shapes-Reality-and-Consciousness-41cd62a88d61.html" image_map_file = "/home/antigravity/the-quantum-blueprint/docs/image_map.json" output_md_file = "/home/antigravity/the-quantum-blueprint/original_export.md" with open(html_file, "r") as f: html_content = f.read() with open(image_map_file, "r") as f: image_map = json.load(f) parser = StandardMarkdownParser(image_map) parser.feed(html_content) md_content = "".join(parser.markdown).strip() header = """# The Quantum Blueprint: How Information Shapes Reality and Consciousness (Original Export) *This is a preserved rendering of the original Medium export.* * [View the Transmuted Sovereign Canon Version](quantum_blueprint.md) * [View the GitHub Pages HTML preservation](https://mrhavens.github.io/the-quantum-blueprint/) --- """ with open(output_md_file, "w") as f: f.write(header + md_content) print(f"Generated {output_md_file}") if __name__ == "__main__": main()