-
Notifications
You must be signed in to change notification settings - Fork 28
Expand file tree
/
Copy pathsync-image.py
More file actions
141 lines (108 loc) · 4.25 KB
/
Copy pathsync-image.py
File metadata and controls
141 lines (108 loc) · 4.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/env python3
import os
import re
import json
import hashlib
import requests
import argparse
from urllib.parse import urlparse
from pathlib import Path
from mimetypes import guess_extension
DATA_DIR = "content/maintainers/"
CACHE_FILE = ".image-cache.json"
IMAGES_DIR = "public/images/"
MAX_SIZE_KB = 500
HEADERS = {"User-Agent": "Mozilla/5.0 (compatible; forklore-image-sync/1.0)"}
Path(IMAGES_DIR).mkdir(parents=True, exist_ok=True)
parser = argparse.ArgumentParser(description="Sync remote images to local storage")
parser.add_argument("json_file", nargs="?", help="Specific JSON file in content/maintainers/")
parser.add_argument("--debug", action="store_true")
args = parser.parse_args()
cache = json.loads(Path(CACHE_FILE).read_text()) if Path(CACHE_FILE).exists() else {}
def log(msg):
if args.debug:
print(msg)
def sha256(data):
return hashlib.sha256(data).hexdigest()
def get_ext(url, content_type=""):
ext = os.path.splitext(urlparse(url).path)[-1].split("?")[0]
if ext:
return ext
return guess_extension(content_type.split(";")[0]) or ".img" if content_type else ".img"
def resolve_url(url):
"""Convert GitHub blob URLs to raw.githubusercontent.com."""
gh = re.match(r"https://github\.com/([^/]+/[^/]+)/blob/(.+)", url)
if gh:
return f"https://raw.githubusercontent.com/{gh.group(1)}/{gh.group(2)}"
return url
def download(url):
try:
r = requests.get(url, headers=HEADERS, timeout=15)
r.raise_for_status()
return r.content, r.headers.get("Content-Type", "")
except Exception as e:
print(f"[ERROR] {url}: {e}")
return None, None
def warn_size(filepath):
size_kb = os.path.getsize(filepath) / 1024
if size_kb > MAX_SIZE_KB:
ext = os.path.splitext(filepath)[1].lower()
print(f"[WARN] {os.path.basename(filepath)} is {size_kb:.0f} KB (limit {MAX_SIZE_KB} KB)")
if ext in (".jpg", ".jpeg", ".png"):
print(f' Fix: magick {filepath} -resize "800x800>" -strip -quality 75 {filepath}')
def process_image(url, filename_base):
if not url or not url.strip() or url.startswith("/images/"):
return url or ""
cached = cache.get(url)
if cached:
filepath = os.path.join(IMAGES_DIR, cached["filename"])
if os.path.exists(filepath) and sha256(Path(filepath).read_bytes()) == cached["hash"]:
log(f"[SKIP] {cached['filename']}")
warn_size(filepath)
return f"/images/{cached['filename']}"
resolved = resolve_url(url)
if resolved != url:
log(f"[RESOLVED] {url} → {resolved}")
content, content_type = download(resolved)
if not content:
return ""
filename = f"{filename_base}{get_ext(resolved, content_type)}"
filepath = os.path.join(IMAGES_DIR, filename)
Path(filepath).write_bytes(content)
cache[url] = {"hash": sha256(content), "filename": filename}
print(f"[SAVED] {filename}")
warn_size(filepath)
return f"/images/{filename}"
def process_json(file):
path = os.path.join(DATA_DIR, file)
try:
data = json.loads(Path(path).read_text(encoding="utf-8"))
except Exception as e:
print(f"[SKIP] {file}: {e}")
return
base = os.path.splitext(file)[0]
changed = False
if isinstance(data.get("photo"), str):
result = process_image(data["photo"], f"{base}_photo")
if result != data["photo"]:
data["photo"] = result
changed = True
for i, project in enumerate(data.get("projects", [])):
url = project.get("logo", "")
if isinstance(url, str):
safe_name = project.get("name", f"project_{i}").replace(" ", "_").lower()
result = process_image(url, f"{base}_{safe_name}")
if result != url:
project["logo"] = result
changed = True
if changed:
Path(path).write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8")
print(f"[DONE] {file}")
files = [args.json_file] if args.json_file else [f for f in os.listdir(DATA_DIR) if f.endswith(".json")]
for f in files:
if not f.endswith(".json"):
print("[ERROR] Need .json file")
else:
process_json(f)
Path(CACHE_FILE).write_text(json.dumps(cache, indent=2))
print("[OK] Done.")