import os import urllib.request from html.parser import HTMLParser class ImageDownloader(HTMLParser): def __init__(self, output_dir): super().__init__() self.output_dir = output_dir self.image_count = 0 self.downloaded_images = [] def handle_starttag(self, tag, attrs): if tag == "img": for attr in attrs: if attr[0] == "src": url = attr[1] if url.startswith("http"): self.image_count += 1 filename = f"original_image_{self.image_count}.jpg" filepath = os.path.join(self.output_dir, filename) print(f"Downloading {url} to {filepath}...") try: # Add a user-agent to avoid 403 Forbidden req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'}) with urllib.request.urlopen(req) as response, open(filepath, 'wb') as out_file: out_file.write(response.read()) self.downloaded_images.append((url, filename)) except Exception as e: print(f"Failed to download {url}: {e}") def main(): html_file = "/home/antigravity/medium-export/posts/2025-01-12_The-Quantum-Blueprint--How-Information-Shapes-Reality-and-Consciousness-41cd62a88d61.html" output_dir = "/home/antigravity/the-quantum-blueprint/docs/original_assets" with open(html_file, "r") as f: content = f.read() parser = ImageDownloader(output_dir) parser.feed(content) print(f"Successfully downloaded {parser.image_count} images.") # Save a mapping of URL -> Local Filename for later replacement with open("/home/antigravity/the-quantum-blueprint/docs/image_map.json", "w") as f: import json json.dump(parser.downloaded_images, f) if __name__ == "__main__": main()