Skip to content

Commit 6dd18dd

Browse files
committed
CIP-0002: add inventory and freshness scripts
Add initial material review tooling (include inventory + snippet freshness) and mark related backlog tasks as in progress. Ignore generated artifacts outputs.
1 parent 0345083 commit 6dd18dd

5 files changed

Lines changed: 402 additions & 2 deletions

File tree

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,3 +82,6 @@ lynguine.log
8282
execed.code-workspace
8383
execed.code-workspace~
8484
.DS_Store
85+
86+
# Material review artifacts (generated)
87+
artifacts/

backlog/documentation/2026-01-28_execed-material-review-inventory.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ owner: Neil D. Lawrence
77
priority: High
88
related_cips:
99
- "0002"
10-
status: Proposed
10+
status: In Progress
1111
tags:
1212
- execed
1313
- material-review
@@ -36,3 +36,9 @@ Create the repeatable “inventory layer” for CIP-0002:
3636
- Backlog items should link to CIPs (HOW), not requirements (WHAT).
3737
- This task is intentionally “inventory only” — recommendations and refresh proposals come in follow-on tasks.
3838

39+
## Progress Updates
40+
41+
### 2026-01-28
42+
43+
Started implementation. Initial inventory script added under `tools/material_review/` and can write outputs to `artifacts/material-review/`.
44+

backlog/documentation/2026-01-28_execed-material-review-snippet-freshness.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ owner: Neil D. Lawrence
77
priority: High
88
related_cips:
99
- "0002"
10-
status: Proposed
10+
status: In Progress
1111
tags:
1212
- execed
1313
- material-review
@@ -32,3 +32,9 @@ Using the inventories from CIP-0002, compute snippet “freshness”:
3232
- [ ] A “top candidates for refresh” list exists with rationale (high-impact + stale, or high-impact + unstable).
3333
- [ ] Results are reproducible (one command/script regenerates them).
3434

35+
## Progress Updates
36+
37+
### 2026-01-28
38+
39+
Started implementation. Added `tools/material_review/snippet_freshness.py` and generated an initial report under `artifacts/material-review/`.
40+

tools/material_review/inventory.py

Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,243 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Material review inventory (CIP-0002).
4+
5+
Builds a simple inventory of \\include{...} usage across:
6+
- ExecEd lecture sources (execed/_lamd/*.md)
7+
- Talk sources (e.g. ~/lawrennd/talks/_atomic-human/, _business/, _policy/, _economics/)
8+
9+
Outputs:
10+
- JSON (full inventory + frequency tables)
11+
- CSV (include frequency table)
12+
13+
Design goals:
14+
- No third-party dependencies.
15+
- Works even if target directories live outside this repo.
16+
- Can be upstreamed/mirrored into lamd tooling later.
17+
"""
18+
19+
from __future__ import annotations
20+
21+
import argparse
22+
import csv
23+
import dataclasses
24+
import datetime as dt
25+
import json
26+
import os
27+
import re
28+
import subprocess
29+
from pathlib import Path
30+
from typing import Iterable, Optional
31+
32+
33+
INCLUDE_RE = re.compile(r"\\include\{([^}]+)\}")
34+
35+
36+
SKIP_DIR_NAMES = {
37+
".git",
38+
"_site",
39+
".jekyll-cache",
40+
".sass-cache",
41+
"vendor",
42+
".venv",
43+
".venv-vibesafe",
44+
"__pycache__",
45+
".ipynb_checkpoints",
46+
}
47+
48+
49+
SKIP_FILE_SUFFIXES = (
50+
".posts.html",
51+
".posts.html.markdown",
52+
".slides.html",
53+
".slides.html.markdown",
54+
".notes.ipynb.markdown",
55+
".ipynb",
56+
)
57+
58+
59+
def run_git_last_change_date(repo_dir: Path, file_path: Path) -> Optional[dt.date]:
60+
"""
61+
Return last-change date for file via git, or None if unavailable.
62+
"""
63+
try:
64+
out = subprocess.check_output(
65+
["git", "-C", str(repo_dir), "log", "-1", "--format=%cs", "--", str(file_path)],
66+
stderr=subprocess.DEVNULL,
67+
text=True,
68+
).strip()
69+
except Exception:
70+
return None
71+
if not out:
72+
return None
73+
try:
74+
return dt.date.fromisoformat(out)
75+
except ValueError:
76+
return None
77+
78+
79+
def find_repo_root(start: Path) -> Optional[Path]:
80+
cur = start.resolve()
81+
if cur.is_file():
82+
cur = cur.parent
83+
for p in [cur] + list(cur.parents):
84+
if (p / ".git").exists():
85+
return p
86+
return None
87+
88+
89+
def iter_source_files(root: Path) -> Iterable[Path]:
90+
"""
91+
Yield .md and .gpp.markdown files under root, skipping generated artifacts.
92+
"""
93+
root = root.resolve()
94+
if not root.exists():
95+
return
96+
for dirpath, dirnames, filenames in os.walk(root):
97+
# prune
98+
dirnames[:] = [d for d in dirnames if d not in SKIP_DIR_NAMES]
99+
for name in filenames:
100+
p = Path(dirpath) / name
101+
if name.endswith(SKIP_FILE_SUFFIXES):
102+
continue
103+
if name.endswith(".md") or name.endswith(".gpp.markdown"):
104+
yield p
105+
106+
107+
def parse_frontmatter_session(md_text: str) -> Optional[str]:
108+
"""
109+
Best-effort frontmatter parse for 'session:' value.
110+
"""
111+
lines = md_text.splitlines()
112+
if not (lines and lines[0].strip() == "---"):
113+
return None
114+
# find second ---
115+
try:
116+
end = next(i for i in range(1, len(lines)) if lines[i].strip() == "---")
117+
except StopIteration:
118+
return None
119+
for line in lines[1:end]:
120+
if line.strip().startswith("session:"):
121+
return line.split(":", 1)[1].strip().strip('"').strip("'")
122+
return None
123+
124+
125+
def extract_includes(md_text: str) -> list[str]:
126+
return sorted({m.group(1).strip() for m in INCLUDE_RE.finditer(md_text)})
127+
128+
129+
@dataclasses.dataclass(frozen=True)
130+
class FileInventory:
131+
path: str
132+
includes: list[str]
133+
session: Optional[str] = None
134+
last_changed: Optional[str] = None # YYYY-MM-DD when available
135+
136+
137+
def build_inventory(
138+
roots: list[Path],
139+
since: Optional[dt.date],
140+
) -> list[FileInventory]:
141+
inv: list[FileInventory] = []
142+
for root in roots:
143+
repo_root = find_repo_root(root)
144+
for f in iter_source_files(root):
145+
try:
146+
txt = f.read_text(errors="ignore")
147+
except Exception:
148+
continue
149+
includes = extract_includes(txt)
150+
if not includes:
151+
continue
152+
153+
session = parse_frontmatter_session(txt)
154+
155+
last_changed: Optional[dt.date] = None
156+
if repo_root is not None:
157+
last_changed = run_git_last_change_date(repo_root, f)
158+
if last_changed is None:
159+
try:
160+
last_changed = dt.date.fromtimestamp(f.stat().st_mtime)
161+
except Exception:
162+
last_changed = None
163+
164+
if since is not None and last_changed is not None and last_changed < since:
165+
continue
166+
167+
inv.append(
168+
FileInventory(
169+
path=str(f),
170+
includes=includes,
171+
session=session,
172+
last_changed=last_changed.isoformat() if last_changed else None,
173+
)
174+
)
175+
return inv
176+
177+
178+
def include_frequency(items: list[FileInventory]) -> dict[str, int]:
179+
freq: dict[str, int] = {}
180+
for it in items:
181+
for inc in it.includes:
182+
freq[inc] = freq.get(inc, 0) + 1
183+
return dict(sorted(freq.items(), key=lambda kv: (-kv[1], kv[0])))
184+
185+
186+
def main() -> int:
187+
ap = argparse.ArgumentParser()
188+
ap.add_argument("--execed-lamd", type=Path, default=Path("execed/_lamd"))
189+
ap.add_argument("--talks-dir", type=Path, action="append", default=[])
190+
ap.add_argument("--since", type=str, default=None, help="Only include files changed on/after YYYY-MM-DD")
191+
ap.add_argument("--out-dir", type=Path, default=None, help="Write outputs to this directory")
192+
args = ap.parse_args()
193+
194+
since: Optional[dt.date] = None
195+
if args.since:
196+
since = dt.date.fromisoformat(args.since)
197+
198+
execed_items = build_inventory([args.execed_lamd], since=None)
199+
talks_items = build_inventory(args.talks_dir, since=since) if args.talks_dir else []
200+
201+
out = {
202+
"generated_at": dt.datetime.now(dt.timezone.utc).isoformat(),
203+
"since": since.isoformat() if since else None,
204+
"execed": {
205+
"root": str(args.execed_lamd),
206+
"files": [dataclasses.asdict(x) for x in execed_items],
207+
"include_frequency": include_frequency(execed_items),
208+
},
209+
"talks": {
210+
"roots": [str(p) for p in args.talks_dir],
211+
"files": [dataclasses.asdict(x) for x in talks_items],
212+
"include_frequency": include_frequency(talks_items),
213+
},
214+
}
215+
216+
if args.out_dir:
217+
args.out_dir.mkdir(parents=True, exist_ok=True)
218+
json_path = args.out_dir / "inventory.json"
219+
csv_path = args.out_dir / "include_frequency.csv"
220+
221+
json_path.write_text(json.dumps(out, indent=2, sort_keys=True))
222+
223+
# CSV: combined include frequency with execed + talks counts
224+
execed_freq = out["execed"]["include_frequency"]
225+
talks_freq = out["talks"]["include_frequency"]
226+
all_incs = sorted(set(execed_freq.keys()) | set(talks_freq.keys()))
227+
with csv_path.open("w", newline="") as f:
228+
w = csv.writer(f)
229+
w.writerow(["include", "execed_count", "talks_count", "total"])
230+
for inc in all_incs:
231+
e = int(execed_freq.get(inc, 0))
232+
t = int(talks_freq.get(inc, 0))
233+
w.writerow([inc, e, t, e + t])
234+
235+
else:
236+
print(json.dumps(out, indent=2, sort_keys=True))
237+
238+
return 0
239+
240+
241+
if __name__ == "__main__":
242+
raise SystemExit(main())
243+

0 commit comments

Comments
 (0)