Skip to content

Commit a896478

Browse files
committed
feat(fetchspec): adding a converter utility for fetchspecs
This was largely ChatGPT's work. I'm way too lazy to put type declarations on my return values. :-)
1 parent 5acc615 commit a896478

1 file changed

Lines changed: 265 additions & 0 deletions

File tree

util/fetchspec_conv.py

Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
#!/usr/bin/env python3
2+
"""Convert an abiftool fetchspec JSON file into bifelsrc or awt catalog YAML."""
3+
import argparse
4+
import json
5+
import re
6+
from pathlib import Path
7+
import sys
8+
from typing import Any, Dict, Iterable, List, Optional
9+
10+
try:
11+
import yaml # type: ignore
12+
except ImportError as exc: # pragma: no cover
13+
sys.stderr.write("PyYAML is required to run fetchspec_conv.py\n")
14+
raise
15+
16+
17+
def parse_args() -> argparse.Namespace:
18+
parser = argparse.ArgumentParser(description=__doc__)
19+
parser.add_argument("fetchspec", help="Path to fetchspec JSON file")
20+
parser.add_argument(
21+
"--fmt",
22+
choices=("bifelsrc", "awtyaml"),
23+
default="bifelsrc",
24+
help="Output format: bifhub elsrc (default) or awt abif_list entries",
25+
)
26+
parser.add_argument(
27+
"-t",
28+
"--tag",
29+
action="append",
30+
dest="extra_tags",
31+
help="Tag to apply to every election (may be given multiple times)",
32+
)
33+
parser.add_argument(
34+
"--title-prefix",
35+
default="",
36+
help="Prefix to add to every title that appears in the output",
37+
)
38+
return parser.parse_args()
39+
40+
41+
def infer_provenance(path: Path) -> str:
42+
stem = path.stem
43+
if stem.endswith(".fetchspec"):
44+
stem = stem[: -len(".fetchspec")]
45+
return stem
46+
47+
48+
def load_fetchspec(path: Path) -> Dict[str, Any]:
49+
with path.open(encoding="utf-8") as handle:
50+
return json.load(handle)
51+
52+
53+
def normalize_tags(existing: Optional[Any], extras: Optional[Iterable[str]]) -> Optional[List[str]]:
54+
tags: List[str] = []
55+
if existing:
56+
if isinstance(existing, str):
57+
parts = [piece.strip() for piece in existing.split(",")]
58+
tags.extend(filter(None, parts))
59+
else:
60+
tags.extend(str(tag).strip() for tag in existing)
61+
if extras:
62+
tags.extend(tag.strip() for tag in extras if tag and tag.strip())
63+
64+
seen = set()
65+
unique: List[str] = []
66+
for tag in tags:
67+
if tag and tag not in seen:
68+
seen.add(tag)
69+
unique.append(tag)
70+
return unique or None
71+
72+
73+
def apply_title_prefix(entry: Dict[str, Any], prefix: str) -> None:
74+
if prefix and entry.get("title"):
75+
entry["title"] = f"{prefix}{entry['title']}"
76+
77+
78+
def web_entries(fetchspec: Dict[str, Any], extras: Optional[Iterable[str]], prefix: str) -> List[Dict[str, Any]]:
79+
desired = ("abifloc", "desc", "metaurls", "contest_string", "title", "tags", "id")
80+
entries: List[Dict[str, Any]] = []
81+
for item in fetchspec.get("web_urls", []) or []:
82+
entry: Dict[str, Any] = {}
83+
if "url" in item:
84+
entry["source_url"] = item["url"]
85+
if "urls" in item:
86+
entry["source_urls"] = item["urls"]
87+
for key in desired:
88+
if key == "tags":
89+
tags = normalize_tags(item.get("tags"), extras)
90+
if tags:
91+
entry["tags"] = tags
92+
continue
93+
if key in item:
94+
entry[key] = item[key]
95+
if "tags" not in entry:
96+
tags = normalize_tags(None, extras)
97+
if tags:
98+
entry["tags"] = tags
99+
apply_title_prefix(entry, prefix)
100+
entries.append(entry)
101+
return entries
102+
103+
104+
def ext_entries(fetchspec: Dict[str, Any], extras: Optional[Iterable[str]], prefix: str) -> List[Dict[str, Any]]:
105+
desired = ("abifloc", "desc", "metaurls", "srcfmt", "tags", "title", "id")
106+
entries: List[Dict[str, Any]] = []
107+
for item in fetchspec.get("extfiles", []) or []:
108+
entry: Dict[str, Any] = {}
109+
if "localcopy" in item:
110+
entry["repo_path"] = item["localcopy"]
111+
if "localcopies" in item:
112+
entry["repo_paths"] = item["localcopies"]
113+
for key in desired:
114+
if key == "tags":
115+
tags = normalize_tags(item.get("tags"), extras)
116+
if tags:
117+
entry["tags"] = tags
118+
continue
119+
if key in item:
120+
entry[key] = item[key]
121+
if "tags" not in entry:
122+
tags = normalize_tags(None, extras)
123+
if tags:
124+
entry["tags"] = tags
125+
apply_title_prefix(entry, prefix)
126+
entries.append(entry)
127+
return entries
128+
129+
130+
def archive_entries(fetchspec: Dict[str, Any], extras: Optional[Iterable[str]], prefix: str) -> List[Dict[str, Any]]:
131+
desired = ("abifloc", "desc", "tags", "title", "id")
132+
entries: List[Dict[str, Any]] = []
133+
for item in fetchspec.get("archive_subfiles", []) or []:
134+
entry: Dict[str, Any] = {}
135+
if "archive_subfile" in item:
136+
entry["archive_subfile"] = item["archive_subfile"]
137+
for key in desired:
138+
if key == "tags":
139+
tags = normalize_tags(item.get("tags"), extras)
140+
if tags:
141+
entry["tags"] = tags
142+
continue
143+
if key in item:
144+
entry[key] = item[key]
145+
if "tags" not in entry:
146+
tags = normalize_tags(None, extras)
147+
if tags:
148+
entry["tags"] = tags
149+
apply_title_prefix(entry, prefix)
150+
entries.append(entry)
151+
return entries
152+
153+
154+
def build_bifelsrc(
155+
fetchspec_path: Path,
156+
fetchspec: Dict[str, Any],
157+
extras: Optional[Iterable[str]],
158+
prefix: str,
159+
) -> Dict[str, Any]:
160+
document: Dict[str, Any] = {
161+
"schema": "elsrc-0.33",
162+
"provenance": infer_provenance(fetchspec_path),
163+
"batch": "auto-generated",
164+
"elections": [],
165+
}
166+
167+
if fetchspec.get("gitrepo_url"):
168+
document["source_repo"] = fetchspec["gitrepo_url"]
169+
if fetchspec.get("download_subdir"):
170+
document["download_subdir"] = fetchspec["download_subdir"]
171+
if fetchspec.get("abifloc_subdir"):
172+
document["abifloc_subdir"] = fetchspec["abifloc_subdir"]
173+
174+
document["elections"].extend(web_entries(fetchspec, extras, prefix))
175+
document["elections"].extend(ext_entries(fetchspec, extras, prefix))
176+
document["elections"].extend(archive_entries(fetchspec, extras, prefix))
177+
return document
178+
179+
180+
def slug_from_text(text: str) -> str:
181+
slug = re.sub(r"[^A-Za-z0-9_-]+", "-", text.strip())
182+
return slug.strip("-") or "unnamed"
183+
184+
185+
def resolve_abif_path(base_dir: str, abifloc: str) -> str:
186+
if not base_dir:
187+
return abifloc
188+
base_dir = base_dir.rstrip("/")
189+
if abifloc.startswith(base_dir + "/"):
190+
return abifloc
191+
return f"{base_dir}/{abifloc.lstrip('/')}"
192+
193+
194+
def make_awt_entry(
195+
fetchspec: Dict[str, Any],
196+
item: Dict[str, Any],
197+
extras: Optional[Iterable[str]],
198+
prefix: str,
199+
) -> Dict[str, Any]:
200+
abifloc = item.get("abifloc")
201+
if not abifloc:
202+
raise ValueError("abifloc is required to build awt catalog entries")
203+
abifloc_subdir = fetchspec.get("abifloc_subdir", "")
204+
filename = resolve_abif_path(abifloc_subdir, abifloc)
205+
206+
entry: Dict[str, Any] = {"filename": filename}
207+
208+
id_source = item.get("id") or Path(abifloc).stem
209+
entry["id"] = slug_from_text(id_source)
210+
211+
base_title = (
212+
item.get("title")
213+
or item.get("contest_string")
214+
or item.get("desc")
215+
or entry["id"]
216+
)
217+
entry["title"] = f"{prefix}{base_title}" if prefix else base_title
218+
219+
if item.get("desc"):
220+
entry["desc"] = item["desc"]
221+
222+
merged_tags = normalize_tags(item.get("tags"), extras)
223+
if merged_tags:
224+
entry["tags"] = ", ".join(merged_tags)
225+
226+
return entry
227+
228+
229+
def build_awtyaml(
230+
fetchspec: Dict[str, Any],
231+
extras: Optional[Iterable[str]],
232+
prefix: str,
233+
) -> List[Dict[str, Any]]:
234+
entries: List[Dict[str, Any]] = []
235+
for item in web_entries(fetchspec, extras, prefix):
236+
if "abifloc" in item:
237+
entries.append(make_awt_entry(fetchspec, item, extras, prefix))
238+
for item in ext_entries(fetchspec, extras, prefix):
239+
if "abifloc" in item:
240+
entries.append(make_awt_entry(fetchspec, item, extras, prefix))
241+
for item in archive_entries(fetchspec, extras, prefix):
242+
if "abifloc" in item:
243+
entries.append(make_awt_entry(fetchspec, item, extras, prefix))
244+
return entries
245+
246+
247+
def main() -> None:
248+
args = parse_args()
249+
fetchspec_path = Path(args.fetchspec)
250+
if not fetchspec_path.exists():
251+
sys.stderr.write(f"Fetchspec not found: {fetchspec_path}\n")
252+
sys.exit(1)
253+
254+
fetchspec = load_fetchspec(fetchspec_path)
255+
256+
if args.fmt == "bifelsrc":
257+
document = build_bifelsrc(fetchspec_path, fetchspec, args.extra_tags, args.title_prefix)
258+
else:
259+
document = build_awtyaml(fetchspec, args.extra_tags, args.title_prefix)
260+
261+
yaml.safe_dump(document, stream=sys.stdout, sort_keys=False)
262+
263+
264+
if __name__ == "__main__":
265+
main()

0 commit comments

Comments
 (0)