Files
the-quantum-blueprint/download_original_images.py
T

50 lines
2.0 KiB
Python

import os
import urllib.request
from html.parser import HTMLParser
class ImageDownloader(HTMLParser):
def __init__(self, output_dir):
super().__init__()
self.output_dir = output_dir
self.image_count = 0
self.downloaded_images = []
def handle_starttag(self, tag, attrs):
if tag == "img":
for attr in attrs:
if attr[0] == "src":
url = attr[1]
if url.startswith("http"):
self.image_count += 1
filename = f"original_image_{self.image_count}.jpg"
filepath = os.path.join(self.output_dir, filename)
print(f"Downloading {url} to {filepath}...")
try:
# Add a user-agent to avoid 403 Forbidden
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
with urllib.request.urlopen(req) as response, open(filepath, 'wb') as out_file:
out_file.write(response.read())
self.downloaded_images.append((url, filename))
except Exception as e:
print(f"Failed to download {url}: {e}")
def main():
html_file = "/home/antigravity/medium-export/posts/2025-01-12_The-Quantum-Blueprint--How-Information-Shapes-Reality-and-Consciousness-41cd62a88d61.html"
output_dir = "/home/antigravity/the-quantum-blueprint/docs/original_assets"
with open(html_file, "r") as f:
content = f.read()
parser = ImageDownloader(output_dir)
parser.feed(content)
print(f"Successfully downloaded {parser.image_count} images.")
# Save a mapping of URL -> Local Filename for later replacement
with open("/home/antigravity/the-quantum-blueprint/docs/image_map.json", "w") as f:
import json
json.dump(parser.downloaded_images, f)
if __name__ == "__main__":
main()