Skip to content

Commit 8775ae0

Browse files
TMHSDigitalclaude
andauthored
fix(catalog): generate search index, footer version, and standards grid from source [skip version] (#81)
Three hand-maintained regions of docs/index.html had silently drifted and were not covered by sync-check: - The inline search-index block was frozen at 9 of 14 tools, so the five newest tools (developer-tools-mcp, cfx-mcp, blender, local-ai, screencast) were unsearchable on the live site (D1). - The footer version was hardcoded v1.9.5 against a real latest tag of v1.17.0 (D2). - The standards grid was missing born-green-contract and its count read 18 for 19 standards (F2). - docs/search-index.json was generated by aggregate_search.py but never fetched by the page; the page read only the inline block (F3). sync_from_registry.py now owns all four: it reconciles docs/search-index.json from the registry (preserving the scanned skill/rule/MCP-tool name arrays), mirrors it into the inline fallback block, sets the footer from VERSION, and generates the standards grid and count from the standards/*.md listing. Each new region is best-effort so a minimal catalog without a VERSION file or standards directory is left untouched. All four are covered by --check. setupSearch now fetches search-index.json at runtime with the inline block as the offline/file:// fallback, so the file and the embedded copy agree (P1n). aggregate_search.py merges with the existing index so refreshing without a full fleet checkout no longer discards previously scanned names. Signed-off-by: TMHSDigital <TMHospitalityStrategies@gmail.com> Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
1 parent d5d1367 commit 8775ae0

5 files changed

Lines changed: 281 additions & 31 deletions

File tree

docs/index.html

Lines changed: 24 additions & 17 deletions
Large diffs are not rendered by default.

docs/search-index.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

scripts/sync_from_registry.py

Lines changed: 225 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,13 @@
88
- README.md tool descriptions (between registry:descriptions:start / :end markers)
99
- README.md aggregate stats (between registry:stats:start / :end markers)
1010
- docs/index.html embedded JSON (inside <script id="registry-data">)
11+
- docs/index.html search index (inside <script id="search-index">)
12+
- docs/index.html footer version (element id="footerVersion", from VERSION)
13+
- docs/index.html standards grid (<div class="standards-grid">, from standards/*.md)
14+
- docs/index.html standards count (element id="standardsCount")
15+
- docs/search-index.json (registry-driven; skill/rule/MCP names are
16+
preserved and refreshed out-of-band by
17+
site-template/aggregate_search.py)
1118
- CLAUDE.md cataloged tools (between registry:tools:start / :end markers)
1219
- CLAUDE.md totals (between registry:stats:start / :end markers)
1320
@@ -32,12 +39,71 @@
3239
README_PATH = REPO_ROOT / "README.md"
3340
CLAUDE_PATH = REPO_ROOT / "CLAUDE.md"
3441
INDEX_PATH = REPO_ROOT / "docs" / "index.html"
42+
SEARCH_INDEX_PATH = REPO_ROOT / "docs" / "search-index.json"
43+
VERSION_PATH = REPO_ROOT / "VERSION"
44+
STANDARDS_DIR = REPO_ROOT / "standards"
3545

3646
TYPE_DISPLAY = {
3747
"cursor-plugin": "Plugin",
3848
"mcp-server": "MCP Server",
3949
}
4050

51+
STANDARDS_REPO_BLOB = (
52+
"https://github.com/TMHSDigital/Developer-Tools-Directory/blob/main/standards"
53+
)
54+
55+
# Curated display order and short descriptions for the standards grid. The grid
56+
# is generated from the actual standards/*.md listing (README excluded), so a
57+
# new standard file always gets a card and the count badge tracks the directory.
58+
# A file missing from this map still renders, using its filename and a fallback.
59+
STANDARDS_ORDER = [
60+
"folder-structure",
61+
"plugin-manifest",
62+
"ci-cd",
63+
"github-pages",
64+
"commit-conventions",
65+
"readme-template",
66+
"agents-template",
67+
"versioning",
68+
"release-doc-sync",
69+
"testing",
70+
"skills",
71+
"rules",
72+
"mcp-server",
73+
"security",
74+
"licensing",
75+
"scope",
76+
"born-green-contract",
77+
"lifecycle",
78+
"writing-style",
79+
]
80+
STANDARDS_META = {
81+
"folder-structure": ("Folder Structure", "Canonical repository layout for plugins and MCP servers"),
82+
"plugin-manifest": ("Plugin Manifest", "plugin.json specification and required fields"),
83+
"ci-cd": ("CI/CD", "GitHub Actions workflows every repo must have"),
84+
"github-pages": ("GitHub Pages", "Documentation site setup and deployment"),
85+
"commit-conventions": ("Commit Conventions", "Conventional commits and version bumping rules"),
86+
"readme-template": ("README Template", "Standard README structure and required sections"),
87+
"agents-template": ("AGENTS.md Template", "AI agent guidance file structure"),
88+
"versioning": ("Versioning", "Semver management and automated release flow"),
89+
"release-doc-sync": ("Release Doc Sync", "Composite action contract for keeping CHANGELOG, CLAUDE, and ROADMAP in sync after a release"),
90+
"testing": ("Testing", "Test frameworks, minimum coverage bar, and CI wiring"),
91+
"skills": ("Skills", "SKILL.md structure and frontmatter conventions"),
92+
"rules": ("Rules", ".mdc structure, globs, and the secrets rule pattern"),
93+
"mcp-server": ("MCP Server", "Tool naming, runtime, transport, and destructive operation handling"),
94+
"security": ("Security", "Vulnerability disclosure, secrets handling, and workflow supply chain"),
95+
"licensing": ("Licensing", "DCO + inbound license grant model"),
96+
"scope": ("Scope", "What belongs in the directory and what does not"),
97+
"born-green-contract": ("Born-Green Contract", "Acceptance criterion that any generator must produce a release-ready repo"),
98+
"lifecycle": ("Lifecycle", "Tool status transitions from experimental to archived"),
99+
"writing-style": ("Writing Style", "Prose conventions across all repos"),
100+
}
101+
102+
# Search-index entry fields that come from the registry. Skill/rule/MCP-tool
103+
# name arrays are preserved from the existing index (refreshed out-of-band by
104+
# site-template/aggregate_search.py against local repo checkouts).
105+
SEARCH_ARRAY_FIELDS = ("skills", "rules", "mcpTools")
106+
41107

42108
def load_registry(registry_path: Path = REGISTRY_PATH) -> list[dict[str, Any]]:
43109
with registry_path.open("r", encoding="utf-8") as fh:
@@ -144,6 +210,99 @@ def render_embedded_registry(entries: list[dict[str, Any]]) -> str:
144210
return json.dumps(entries, separators=(",", ":"), ensure_ascii=False)
145211

146212

213+
def list_standards(standards_dir: Path = STANDARDS_DIR) -> list[str]:
214+
"""Return standard slugs present on disk (``*.md`` minus ``README``)."""
215+
return sorted(
216+
p.stem for p in standards_dir.glob("*.md") if p.stem.lower() != "readme"
217+
)
218+
219+
220+
def ordered_standards(present: list[str]) -> list[str]:
221+
"""Curated order first, then any unmapped files alphabetically."""
222+
known = [s for s in STANDARDS_ORDER if s in present]
223+
extra = sorted(s for s in present if s not in STANDARDS_ORDER)
224+
return known + extra
225+
226+
227+
def standard_title_desc(slug: str) -> tuple[str, str]:
228+
if slug in STANDARDS_META:
229+
return STANDARDS_META[slug]
230+
# Fallback for an unmapped standard: parse H1 + first paragraph.
231+
title = slug.replace("-", " ").title()
232+
desc = ""
233+
md = STANDARDS_DIR / f"{slug}.md"
234+
if md.is_file():
235+
lines = md.read_text(encoding="utf-8").splitlines()
236+
for i, line in enumerate(lines):
237+
if line.startswith("# "):
238+
title = line[2:].strip()
239+
for nxt in lines[i + 1:]:
240+
s = nxt.strip()
241+
if s and not s.startswith("#"):
242+
desc = s[:120]
243+
break
244+
break
245+
return title, desc
246+
247+
248+
def render_standards_grid(present: list[str]) -> str:
249+
cards = []
250+
for slug in ordered_standards(present):
251+
title, desc = standard_title_desc(slug)
252+
cards.append(
253+
f' <a href="{STANDARDS_REPO_BLOB}/{slug}.md" '
254+
f'class="standard-card" target="_blank" rel="noopener">'
255+
f"<h3>{title}</h3><p>{desc}</p></a>"
256+
)
257+
return "\n" + "\n".join(cards) + "\n "
258+
259+
260+
def read_version(version_path: Path = VERSION_PATH) -> str:
261+
return "v" + version_path.read_text(encoding="utf-8").strip()
262+
263+
264+
def build_search_index(
265+
entries: list[dict[str, Any]], existing: dict[str, dict[str, Any]]
266+
) -> list[dict[str, Any]]:
267+
"""Registry-driven search index. Basic fields come from the registry;
268+
skill/rule/MCP-tool name arrays are preserved from *existing* so the
269+
out-of-band aggregate_search refresh is not clobbered by a sync."""
270+
index = []
271+
for e in entries:
272+
slug = e.get("slug", "")
273+
prior = existing.get(slug, {})
274+
entry = {
275+
"name": e["name"],
276+
"slug": slug,
277+
"description": e.get("description", ""),
278+
"type": e.get("type", ""),
279+
"topics": e.get("topics", []),
280+
"npm": e.get("npm", "") or "",
281+
"url": f"https://github.com/{e['repo']}",
282+
"homepage": e.get("homepage", ""),
283+
}
284+
for field in SEARCH_ARRAY_FIELDS:
285+
entry[field] = list(prior.get(field, []))
286+
index.append(entry)
287+
return index
288+
289+
290+
def render_search_index(index: list[dict[str, Any]]) -> str:
291+
return json.dumps(index, separators=(",", ":"), ensure_ascii=False)
292+
293+
294+
def load_existing_search_index(
295+
path: Path = SEARCH_INDEX_PATH,
296+
) -> dict[str, dict[str, Any]]:
297+
if not path.is_file():
298+
return {}
299+
try:
300+
data = json.loads(path.read_text(encoding="utf-8"))
301+
except (ValueError, OSError):
302+
return {}
303+
return {e["slug"]: e for e in data if isinstance(e, dict) and e.get("slug")}
304+
305+
147306
def replace_between(
148307
text: str, start_marker: str, end_marker: str, new_body: str, path: Path
149308
) -> str:
@@ -159,17 +318,39 @@ def replace_between(
159318
return pattern.sub(replacement, text, count=1)
160319

161320

162-
def replace_script_block(text: str, new_body: str) -> str:
321+
def replace_script_block(text: str, new_body: str, block_id: str = "registry-data") -> str:
163322
pattern = re.compile(
164-
r'(<script id="registry-data" type="application/json">)(.*?)(</script>)',
323+
r'(<script id="' + re.escape(block_id) + r'" type="application/json">)(.*?)(</script>)',
165324
re.DOTALL,
166325
)
167326
if not pattern.search(text):
168327
raise SystemExit(
169-
"Could not find <script id=\"registry-data\" ...> in docs/index.html"
328+
f'Could not find <script id="{block_id}" ...> in docs/index.html'
170329
)
171-
replacement = r"\g<1>\n" + new_body + r"\n\g<3>"
172-
return pattern.sub(replacement, text, count=1)
330+
return pattern.sub(
331+
lambda m: m.group(1) + "\n" + new_body + "\n" + m.group(3), text, count=1
332+
)
333+
334+
335+
def replace_element_text_by_id(text: str, elem_id: str, new_text: str, path: Path) -> str:
336+
"""Replace the inner text of the first element carrying ``id="elem_id"``."""
337+
pattern = re.compile(
338+
r'(<[a-zA-Z][\w-]*\b[^>]*\bid="' + re.escape(elem_id) + r'"[^>]*>)(.*?)(</[a-zA-Z][\w-]*>)',
339+
re.DOTALL,
340+
)
341+
if not pattern.search(text):
342+
raise SystemExit(f'Element id="{elem_id}" not found in {path}')
343+
return pattern.sub(lambda m: m.group(1) + new_text + m.group(3), text, count=1)
344+
345+
346+
def replace_standards_grid(text: str, new_body: str, path: Path) -> str:
347+
pattern = re.compile(
348+
r'(<div class="standards-grid">)(.*?)(</div>)',
349+
re.DOTALL,
350+
)
351+
if not pattern.search(text):
352+
raise SystemExit(f'<div class="standards-grid"> not found in {path}')
353+
return pattern.sub(lambda m: m.group(1) + new_body + m.group(3), text, count=1)
173354

174355

175356
def sync_readme(entries: list[dict[str, Any]], check: bool, root: Path = REPO_ROOT) -> bool:
@@ -222,10 +403,48 @@ def sync_claude(entries: list[dict[str, Any]], check: bool, root: Path = REPO_RO
222403

223404

224405
def sync_index(entries: list[dict[str, Any]], check: bool, root: Path = REPO_ROOT) -> bool:
406+
"""Reconcile every registry-derived region of docs/index.html, plus the
407+
docs/search-index.json companion file.
408+
409+
The embedded registry block is always required. The search index, footer
410+
version, and standards grid/count are best-effort: each is rewritten only
411+
when its anchor (and, for the version/standards, its source) is present, so
412+
a minimal catalog without a VERSION file or standards/ directory is left
413+
untouched rather than crashing or being blanked."""
225414
index_path = root / "docs" / "index.html"
415+
search_path = root / "docs" / "search-index.json"
226416
current = index_path.read_text(encoding="utf-8")
227417
new = replace_script_block(current, render_embedded_registry(entries))
228-
return write_if_changed(index_path, current, new, check, root)
418+
419+
# Search index: reconcile the companion file and mirror it into the inline
420+
# fallback block. Skip entirely when the catalog has no inline block.
421+
search_drift = False
422+
if '<script id="search-index"' in new:
423+
existing = load_existing_search_index(search_path)
424+
search_index = build_search_index(entries, existing)
425+
new = replace_script_block(new, render_search_index(search_index), "search-index")
426+
file_current = search_path.read_text(encoding="utf-8") if search_path.is_file() else ""
427+
file_new = render_search_index(search_index) + "\n"
428+
search_drift = write_if_changed(search_path, file_current, file_new, check, root)
429+
430+
# Footer version, sourced from the VERSION file.
431+
version_path = root / "VERSION"
432+
if version_path.is_file() and 'id="footerVersion"' in new:
433+
new = replace_element_text_by_id(
434+
new, "footerVersion", read_version(version_path), index_path
435+
)
436+
437+
# Standards grid and count, sourced from the standards/*.md listing.
438+
present = list_standards(root / "standards")
439+
if present and 'class="standards-grid"' in new:
440+
new = replace_standards_grid(new, render_standards_grid(present), index_path)
441+
if 'id="standardsCount"' in new:
442+
new = replace_element_text_by_id(
443+
new, "standardsCount", str(len(present)), index_path
444+
)
445+
446+
html_drift = write_if_changed(index_path, current, new, check, root)
447+
return html_drift or search_drift
229448

230449

231450
def write_if_changed(

site-template/aggregate_search.py

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,26 @@
1414
from build_site import load_json, load_mcp_tools, parse_rules, parse_skills
1515

1616

17-
def build_index(registry_path: Path, repo_dirs: dict[str, Path]) -> list[dict]:
18-
"""Build search index entries from registry + local repo data."""
17+
def build_index(
18+
registry_path: Path,
19+
repo_dirs: dict[str, Path],
20+
existing: dict[str, dict] | None = None,
21+
) -> list[dict]:
22+
"""Build search index entries from registry + local repo data.
23+
24+
Basic fields (name, description, type, topics, npm, url, homepage) always
25+
come from the registry. Skill/rule/MCP-tool names come from a local repo
26+
checkout when one is available; otherwise they are preserved from
27+
*existing* (a prior index keyed by slug) so refreshing without a full
28+
fleet checkout does not discard data that was scanned earlier.
29+
"""
1930
registry = load_json(registry_path)
31+
existing = existing or {}
2032
index = []
2133

2234
for tool in registry:
2335
slug = tool.get("slug", "")
36+
prior = existing.get(slug, {})
2437
entry = {
2538
"name": tool["name"],
2639
"slug": slug,
@@ -30,9 +43,9 @@ def build_index(registry_path: Path, repo_dirs: dict[str, Path]) -> list[dict]:
3043
"npm": tool.get("npm", ""),
3144
"url": f"https://github.com/{tool['repo']}",
3245
"homepage": tool.get("homepage", ""),
33-
"skills": [],
34-
"rules": [],
35-
"mcpTools": [],
46+
"skills": list(prior.get("skills", [])),
47+
"rules": list(prior.get("rules", [])),
48+
"mcpTools": list(prior.get("mcpTools", [])),
3649
}
3750

3851
repo_root = repo_dirs.get(slug)
@@ -100,7 +113,16 @@ def main():
100113
if slug and slug not in repo_dirs:
101114
repo_dirs[slug] = child
102115

103-
index = build_index(args.registry, repo_dirs)
116+
existing: dict[str, dict] = {}
117+
if args.out.is_file():
118+
try:
119+
for e in load_json(args.out):
120+
if isinstance(e, dict) and e.get("slug"):
121+
existing[e["slug"]] = e
122+
except (ValueError, OSError):
123+
existing = {}
124+
125+
index = build_index(args.registry, repo_dirs, existing)
104126

105127
args.out.parent.mkdir(parents=True, exist_ok=True)
106128
with open(args.out, "w", encoding="utf-8") as f:

tests/test_scaffold_born_green.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,9 +311,11 @@ def _make_temp_registry_root(tmp_path: Path) -> Path:
311311
registration can be exercised without mutating the live registry."""
312312
root = tmp_path / "catalog"
313313
(root / "docs").mkdir(parents=True)
314-
for rel in ("registry.json", "README.md", "CLAUDE.md"):
314+
for rel in ("registry.json", "README.md", "CLAUDE.md", "VERSION"):
315315
shutil.copy2(REPO_ROOT / rel, root / rel)
316316
shutil.copy2(REPO_ROOT / "docs" / "index.html", root / "docs" / "index.html")
317+
shutil.copy2(REPO_ROOT / "docs" / "search-index.json", root / "docs" / "search-index.json")
318+
shutil.copytree(REPO_ROOT / "standards", root / "standards")
317319
return root
318320

319321

0 commit comments

Comments
 (0)