from html.parser import HTMLParser
import json

class StandardMarkdownParser(HTMLParser):
    def __init__(self, image_map):
        super().__init__()
        self.markdown = []
        self.in_h1 = False
        self.in_h3 = False
        self.in_h4 = False
        self.in_p = False
        self.in_em = False
        self.in_strong = False
        self.in_blockquote = False
        self.in_ul = False
        self.in_ol = False
        self.in_li = False
        self.in_figure = False
        self.in_figcaption = False
        
        self.current_img_src = ""
        self.image_map = image_map

    def handle_starttag(self, tag, attrs):
        if tag == 'h3':
            self.in_h3 = True
            self.markdown.append('\n### ')
        elif tag == 'h4':
            self.in_h4 = True
            self.markdown.append('\n#### ')
        elif tag == 'p':
            self.in_p = True
            if not self.in_blockquote:
                self.markdown.append('\n\n')
        elif tag == 'em':
            self.in_em = True
            self.markdown.append('*')
        elif tag == 'strong':
            self.in_strong = True
            self.markdown.append('**')
        elif tag == 'blockquote':
            self.in_blockquote = True
            self.markdown.append('\n\n> ')
        elif tag == 'ul':
            self.in_ul = True
            self.markdown.append('\n')
        elif tag == 'ol':
            self.in_ol = True
            self.markdown.append('\n')
        elif tag == 'li':
            self.in_li = True
            if self.in_ul:
                self.markdown.append('\n- ')
            elif self.in_ol:
                self.markdown.append('\n1. ')
        elif tag == 'figure':
            self.in_figure = True
            self.current_img_src = ""
        elif tag == 'img':
            for attr in attrs:
                if attr[0] == 'src':
                    self.current_img_src = attr[1]
                    # Map to local
                    for url, local in self.image_map:
                        if url in self.current_img_src:
                            self.current_img_src = f"docs/original_assets/{local}"
                            break
        elif tag == 'figcaption':
            self.in_figcaption = True

    def handle_endtag(self, tag):
        if tag == 'h3':
            self.in_h3 = False
        elif tag == 'h4':
            self.in_h4 = False
        elif tag == 'p':
            self.in_p = False
            if self.in_blockquote:
                self.markdown.append('\n> ')
        elif tag == 'em':
            self.in_em = False
            self.markdown.append('*')
        elif tag == 'strong':
            self.in_strong = False
            self.markdown.append('**')
        elif tag == 'blockquote':
            self.in_blockquote = False
        elif tag == 'ul':
            self.in_ul = False
        elif tag == 'ol':
            self.in_ol = False
        elif tag == 'li':
            self.in_li = False
        elif tag == 'figure':
            self.in_figure = False
        elif tag == 'figcaption':
            self.in_figcaption = False
            # When figcaption ends, we assemble the standard markdown image
            alt_text = self.markdown.pop() # Pop the text we just gathered
            # Construct standard image: ![alt](path)
            self.markdown.append(f'\n\n![{alt_text}]({self.current_img_src})\n\n')

    def handle_data(self, data):
        text = data.replace('\n', ' ')
        if self.in_figcaption:
            self.markdown.append(text)
        elif self.in_h3 or self.in_h4 or self.in_p or self.in_li or self.in_blockquote:
            self.markdown.append(text)

def main():
    html_file = "/home/antigravity/medium-export/posts/2025-01-12_The-Quantum-Blueprint--How-Information-Shapes-Reality-and-Consciousness-41cd62a88d61.html"
    image_map_file = "/home/antigravity/the-quantum-blueprint/docs/image_map.json"
    output_md_file = "/home/antigravity/the-quantum-blueprint/original_export.md"
    
    with open(html_file, "r") as f:
        html_content = f.read()
        
    with open(image_map_file, "r") as f:
        image_map = json.load(f)
        
    parser = StandardMarkdownParser(image_map)
    parser.feed(html_content)
    
    md_content = "".join(parser.markdown).strip()
    
    header = """# The Quantum Blueprint: How Information Shapes Reality and Consciousness (Original Export)

*This is a preserved rendering of the original Medium export.*
* [View the Transmuted Sovereign Canon Version](quantum_blueprint.md)
* [View the GitHub Pages HTML preservation](https://mrhavens.github.io/the-quantum-blueprint/)

---

"""
    
    with open(output_md_file, "w") as f:
        f.write(header + md_content)
        
    print(f"Generated {output_md_file}")

if __name__ == "__main__":
    main()