50 lines
2.0 KiB
Python
50 lines
2.0 KiB
Python
import os
|
|
import urllib.request
|
|
from html.parser import HTMLParser
|
|
|
|
class ImageDownloader(HTMLParser):
|
|
def __init__(self, output_dir):
|
|
super().__init__()
|
|
self.output_dir = output_dir
|
|
self.image_count = 0
|
|
self.downloaded_images = []
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
if tag == "img":
|
|
for attr in attrs:
|
|
if attr[0] == "src":
|
|
url = attr[1]
|
|
if url.startswith("http"):
|
|
self.image_count += 1
|
|
filename = f"original_image_{self.image_count}.jpg"
|
|
filepath = os.path.join(self.output_dir, filename)
|
|
print(f"Downloading {url} to {filepath}...")
|
|
try:
|
|
# Add a user-agent to avoid 403 Forbidden
|
|
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
|
|
with urllib.request.urlopen(req) as response, open(filepath, 'wb') as out_file:
|
|
out_file.write(response.read())
|
|
self.downloaded_images.append((url, filename))
|
|
except Exception as e:
|
|
print(f"Failed to download {url}: {e}")
|
|
|
|
def main():
|
|
html_file = "/home/antigravity/medium-export/posts/2025-01-12_The-Quantum-Blueprint--How-Information-Shapes-Reality-and-Consciousness-41cd62a88d61.html"
|
|
output_dir = "/home/antigravity/the-quantum-blueprint/docs/original_assets"
|
|
|
|
with open(html_file, "r") as f:
|
|
content = f.read()
|
|
|
|
parser = ImageDownloader(output_dir)
|
|
parser.feed(content)
|
|
|
|
print(f"Successfully downloaded {parser.image_count} images.")
|
|
|
|
# Save a mapping of URL -> Local Filename for later replacement
|
|
with open("/home/antigravity/the-quantum-blueprint/docs/image_map.json", "w") as f:
|
|
import json
|
|
json.dump(parser.downloaded_images, f)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|