Skip to content

Commit fd05cde

Browse files
committed
tools:collect: add tool to collect modifications from deployed directory back to repos.
This is the reverse of deploy.py. Given a snapshot directory of deployed MicroPython packages, it maps each .py file back to its source in micropython-lib or a third-party repo, strips any __version__ trailer, compares with the repo source, and optionally copies modifications back. Supports --remap for non-standard deployment prefixes. Made-with: Cursor
1 parent 8dd519e commit fd05cde

1 file changed

Lines changed: 373 additions & 0 deletions

File tree

tools/collect.py

Lines changed: 373 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,373 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Collect modifications from a deployed directory back into source repos.
4+
5+
This is the reverse of deploy.py. Given a directory of deployed MicroPython
6+
packages (e.g. a snapshot produced by deploy.py, mip, or manual installation),
7+
it maps each .py file back to its source location in micropython-lib or a
8+
third-party repo, strips any __version__ trailer, compares with the repo
9+
source, and optionally copies modifications back.
10+
11+
This enables a development workflow where you can:
12+
13+
1. Deploy packages to a working directory (deploy.py --output lib ...)
14+
2. Edit and test in the working directory
15+
3. Collect changes back into the repos (collect.py --snapshot lib ...)
16+
4. Commit the changes in each repo
17+
18+
Usage examples:
19+
20+
# Show what has been modified (dry-run is the default):
21+
collect.py --snapshot /path/to/deployed
22+
23+
# Include unix-ffi packages and third-party repos:
24+
collect.py --snapshot /path/to/deployed --unix-ffi --repo /path/to/other
25+
26+
# Handle files installed under a different prefix (e.g. primitives/ was
27+
# installed into asyncio_extras/primitives/ in the snapshot):
28+
collect.py --snapshot /path/to/deployed \\
29+
--remap asyncio_extras/primitives:primitives \\
30+
--remap asyncio_extras/threadsafe:threadsafe
31+
32+
# Actually copy the modifications back to the repos:
33+
collect.py --snapshot /path/to/deployed --write
34+
35+
# Just list the file-to-source mapping without diffing:
36+
collect.py --snapshot /path/to/deployed --list
37+
"""
38+
39+
import os
40+
import re
41+
import sys
42+
43+
# ---------------------------------------------------------------------------
44+
# Import deploy.py from the same directory so we can reuse its package
45+
# discovery, manifest parsing, and file-collection infrastructure.
46+
# ---------------------------------------------------------------------------
47+
48+
_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
49+
sys.path.insert(0, _SCRIPT_DIR)
50+
51+
import deploy # noqa: E402
52+
53+
54+
# ===========================================================================
55+
# __version__ stripping
56+
# ===========================================================================
57+
58+
59+
def strip_version(text):
60+
"""
61+
Remove __version__ = '...' lines that were injected by mip or deploy.py.
62+
63+
Handles two patterns:
64+
- Trailing (appended by mip/deploy.py): \\n\\n__version__ = '...'\\n at EOF
65+
- Body (artifact from concatenated files): __version__ line between blanks
66+
"""
67+
text = re.sub(r"\n\n__version__ = '[^']*'\n$", "", text)
68+
text = re.sub(r"\n__version__ = '[^']*'\n\n", "\n", text)
69+
return text
70+
71+
72+
# ===========================================================================
73+
# Mapping: deployed target path -> repo source path
74+
# ===========================================================================
75+
76+
77+
def build_target_to_source_map(all_packages):
78+
"""
79+
Build a dict mapping deployed target paths to their repo source paths.
80+
81+
Uses deploy.collect_files() to get (source, target) pairs for every
82+
package, then inverts them. Returns {target_path: source_path}.
83+
"""
84+
mapping = {}
85+
for pkg_name, pkg_info in all_packages.items():
86+
files = deploy.collect_files(pkg_name, pkg_info)
87+
for src_path, target_path in files:
88+
mapping[target_path] = src_path
89+
return mapping
90+
91+
92+
# ===========================================================================
93+
# Core logic
94+
# ===========================================================================
95+
96+
97+
def find_snapshot_files(snapshot_dir):
98+
"""Yield (relative_path, absolute_path) for every .py file in snapshot_dir."""
99+
for root, dirs, files in os.walk(snapshot_dir):
100+
dirs[:] = sorted(d for d in dirs if d != "__pycache__")
101+
for f in sorted(files):
102+
if f.endswith(".py"):
103+
full = os.path.join(root, f)
104+
rel = os.path.relpath(full, snapshot_dir)
105+
yield rel, full
106+
107+
108+
def apply_remaps(path, remaps):
109+
"""
110+
Apply prefix remapping rules to a snapshot-relative path.
111+
112+
Each remap is a (snapshot_prefix, target_prefix) tuple. If *path*
113+
starts with a snapshot_prefix, it is replaced with target_prefix.
114+
First match wins.
115+
"""
116+
for snap_pfx, target_pfx in remaps:
117+
if path == snap_pfx or path.startswith(snap_pfx + "/"):
118+
if target_pfx:
119+
return target_pfx + path[len(snap_pfx):]
120+
return path[len(snap_pfx) + 1:] # strip prefix entirely
121+
return path
122+
123+
124+
def classify_files(snapshot_dir, target_map, remaps):
125+
"""
126+
Classify every .py file in the snapshot.
127+
128+
Returns three lists:
129+
modified: [(snap_rel, snap_abs, source_path), ...]
130+
identical: [(snap_rel, source_path), ...]
131+
unmapped: [snap_rel, ...]
132+
"""
133+
modified = []
134+
identical = []
135+
unmapped = []
136+
137+
for snap_rel, snap_abs in find_snapshot_files(snapshot_dir):
138+
target = apply_remaps(snap_rel, remaps)
139+
source_path = target_map.get(target)
140+
141+
if source_path is None:
142+
unmapped.append(snap_rel)
143+
continue
144+
145+
with open(snap_abs, "r", errors="replace") as f:
146+
snap_content = strip_version(f.read())
147+
with open(source_path, "r", errors="replace") as f:
148+
src_content = f.read()
149+
150+
if snap_content == src_content:
151+
identical.append((snap_rel, source_path))
152+
else:
153+
modified.append((snap_rel, snap_abs, source_path))
154+
155+
return modified, identical, unmapped
156+
157+
158+
def copy_to_source(snap_abs, source_path):
159+
"""Copy a snapshot file to its repo source, stripping __version__."""
160+
with open(snap_abs, "r") as f:
161+
content = strip_version(f.read())
162+
with open(source_path, "w") as f:
163+
f.write(content)
164+
165+
166+
# ===========================================================================
167+
# Output helpers
168+
# ===========================================================================
169+
170+
171+
def _c(text, code):
172+
return deploy.color(text, code)
173+
174+
175+
def print_summary(modified, identical, unmapped, write_mode):
176+
"""Print a summary of the classification results."""
177+
total = len(modified) + len(identical) + len(unmapped)
178+
print(f"\n{total} file(s) scanned: "
179+
f"{_c(str(len(identical)), deploy._COLOR_OK)} identical, "
180+
f"{_c(str(len(modified)), deploy._COLOR_WARN if modified else deploy._COLOR_OK)} modified, "
181+
f"{_c(str(len(unmapped)), deploy._COLOR_DIM)} unmapped")
182+
183+
if not modified:
184+
print(_c("Nothing to collect.", deploy._COLOR_OK))
185+
return
186+
187+
action = "Copied" if write_mode else "Would copy"
188+
print(f"\n{action} {_c(str(len(modified)), deploy._COLOR_BOLD)} file(s):\n")
189+
for snap_rel, snap_abs, source_path in modified:
190+
arrow = _c("->", deploy._COLOR_DIM)
191+
print(f" {_c(snap_rel, deploy._COLOR_BOLD)} {arrow} {source_path}")
192+
193+
194+
def print_file_listing(snapshot_dir, target_map, remaps):
195+
"""Print the full file-to-source mapping for every snapshot file."""
196+
mapped = 0
197+
unmapped = 0
198+
for snap_rel, _ in find_snapshot_files(snapshot_dir):
199+
target = apply_remaps(snap_rel, remaps)
200+
source = target_map.get(target)
201+
if source:
202+
print(f" {deploy.ljust(snap_rel, 40)} -> {source}")
203+
mapped += 1
204+
else:
205+
print(f" {deploy.ljust(snap_rel, 40)} {_c('(unmapped)', deploy._COLOR_DIM)}")
206+
unmapped += 1
207+
print(f"\n{mapped} mapped, {unmapped} unmapped")
208+
209+
210+
# ===========================================================================
211+
# Argument parsing
212+
# ===========================================================================
213+
214+
215+
_USAGE = """\
216+
usage: collect.py [-h] --snapshot DIR [--unix-ffi] [--repo DIR]
217+
[--remap SNAP_PFX:TARGET_PFX] [--lib-dir DIR]
218+
[--write] [-l]
219+
220+
Collect modifications from a deployed directory back into source repos.
221+
This is the reverse of deploy.py.
222+
223+
required:
224+
--snapshot DIR Directory containing deployed .py files.
225+
226+
options:
227+
-h, --help Show this help message and exit.
228+
--unix-ffi Include unix-ffi packages in the mapping.
229+
--repo DIR Third-party repo to scan (may be repeated).
230+
--remap A:B Map snapshot path prefix A to deploy target prefix B.
231+
Use when packages were installed under a non-standard
232+
prefix (e.g. --remap asyncio_extras/primitives:primitives).
233+
May be specified multiple times.
234+
--lib-dir DIR Path to micropython-lib root (default: auto-detected).
235+
--write Actually copy modified files back to repos.
236+
Without this flag, only a dry-run report is shown.
237+
-l, --list List the file-to-source mapping and exit.
238+
239+
examples:
240+
collect.py --snapshot lib/ --unix-ffi --repo /path/to/micropython-async
241+
collect.py --snapshot lib/ --remap asyncio_extras/primitives:primitives --write
242+
collect.py --snapshot lib/ --list
243+
"""
244+
245+
246+
def parse_args(argv=None):
247+
if argv is None:
248+
argv = sys.argv[1:]
249+
250+
class Args:
251+
snapshot = None
252+
unix_ffi = False
253+
repos = []
254+
remaps = []
255+
lib_dir = None
256+
write = False
257+
list_map = False
258+
259+
args = Args()
260+
args.repos = []
261+
args.remaps = []
262+
i = 0
263+
264+
def _need_value(name):
265+
nonlocal i
266+
i += 1
267+
if i >= len(argv):
268+
print(f"Error: {name} requires a value.", file=sys.stderr)
269+
sys.exit(2)
270+
return argv[i]
271+
272+
while i < len(argv):
273+
a = argv[i]
274+
if a in ("-h", "--help"):
275+
print(_USAGE)
276+
sys.exit(0)
277+
elif a == "--snapshot":
278+
args.snapshot = _need_value(a)
279+
elif a.startswith("--snapshot="):
280+
args.snapshot = a.split("=", 1)[1]
281+
elif a == "--unix-ffi":
282+
args.unix_ffi = True
283+
elif a == "--repo":
284+
args.repos.append(_need_value(a))
285+
elif a.startswith("--repo="):
286+
args.repos.append(a.split("=", 1)[1])
287+
elif a == "--remap":
288+
val = _need_value(a)
289+
if ":" not in val:
290+
print(f"Error: --remap value must be A:B, got '{val}'", file=sys.stderr)
291+
sys.exit(2)
292+
parts = val.split(":", 1)
293+
args.remaps.append((parts[0].rstrip("/"), parts[1].rstrip("/")))
294+
elif a.startswith("--remap="):
295+
val = a.split("=", 1)[1]
296+
if ":" not in val:
297+
print(f"Error: --remap value must be A:B, got '{val}'", file=sys.stderr)
298+
sys.exit(2)
299+
parts = val.split(":", 1)
300+
args.remaps.append((parts[0].rstrip("/"), parts[1].rstrip("/")))
301+
elif a == "--lib-dir":
302+
args.lib_dir = _need_value(a)
303+
elif a.startswith("--lib-dir="):
304+
args.lib_dir = a.split("=", 1)[1]
305+
elif a == "--write":
306+
args.write = True
307+
elif a in ("-l", "--list"):
308+
args.list_map = True
309+
elif a.startswith("-"):
310+
print(f"Error: unknown option: {a}", file=sys.stderr)
311+
print("Use -h for help.", file=sys.stderr)
312+
sys.exit(2)
313+
else:
314+
print(f"Error: unexpected argument: {a}", file=sys.stderr)
315+
sys.exit(2)
316+
i += 1
317+
318+
return args
319+
320+
321+
# ===========================================================================
322+
# Entry point
323+
# ===========================================================================
324+
325+
326+
def main():
327+
args = parse_args()
328+
329+
if not args.snapshot:
330+
print("Error: --snapshot is required.", file=sys.stderr)
331+
print("Use -h for help.", file=sys.stderr)
332+
sys.exit(2)
333+
334+
if not os.path.isdir(args.snapshot):
335+
print(f"Error: snapshot directory not found: '{args.snapshot}'", file=sys.stderr)
336+
sys.exit(1)
337+
338+
if args.lib_dir:
339+
deploy.LIB_DIR = os.path.abspath(args.lib_dir)
340+
341+
if not os.path.isdir(deploy.LIB_DIR):
342+
print(f"Error: micropython-lib not found at '{deploy.LIB_DIR}'.", file=sys.stderr)
343+
sys.exit(1)
344+
345+
all_packages = deploy.discover_packages(
346+
deploy.DEFAULT_LIB_DIRS, include_unix_ffi=args.unix_ffi
347+
)
348+
for repo_dir in args.repos:
349+
if not os.path.isdir(repo_dir):
350+
print(f"Error: repo directory not found: '{repo_dir}'", file=sys.stderr)
351+
sys.exit(1)
352+
repo_pkgs = deploy.discover_repo_packages(repo_dir)
353+
all_packages.update(repo_pkgs)
354+
355+
target_map = build_target_to_source_map(all_packages)
356+
357+
if args.list_map:
358+
print_file_listing(args.snapshot, target_map, args.remaps)
359+
return
360+
361+
modified, identical, unmapped = classify_files(
362+
args.snapshot, target_map, args.remaps
363+
)
364+
365+
if args.write and modified:
366+
for snap_rel, snap_abs, source_path in modified:
367+
copy_to_source(snap_abs, source_path)
368+
369+
print_summary(modified, identical, unmapped, write_mode=args.write)
370+
371+
372+
if __name__ == "__main__":
373+
main()

0 commit comments

Comments
 (0)