Skip to content

Commit 6549e71

Browse files
committed
feat(audit): split tools_snapshot.json into upstream cache and local state
Split the monolithic tools_snapshot.json into two separate files: - upstream_versions.json: Upstream version cache (committed as baseline) - local_state.json: Machine-specific installation state (gitignored) This enables faster local-only updates without network calls, separate lifecycle management for upstream vs local data, and cleaner commits without machine-specific state pollution. New features: - make update-local: Fast local detection only (no network) - make update-baseline: Update upstream versions (for commit) New modules: - cli_audit/upstream_cache.py: UpstreamVersion/UpstreamCache dataclasses - cli_audit/local_state.py: LocalInstallation/LocalState dataclasses The legacy tools_snapshot.json format remains supported for backward compatibility via build_legacy_snapshot().
1 parent 883d8c0 commit 6549e71

10 files changed

Lines changed: 1860 additions & 1 deletion

File tree

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ htmlcov/
3232
# Local overrides
3333
*.local
3434

35+
# Machine-specific audit state (Phase 2.1 split files)
36+
local_state.json
37+
3538
# Node.js
3639
node_modules/
3740

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ MAKEFLAGS += --no-print-directory
1111
export
1212

1313
.PHONY: user-help help audit audit-offline audit-% audit-offline-% update upgrade guide \
14+
update-local update-baseline \
1415
test test-unit test-integration test-coverage test-watch test-failed \
1516
lint lint-code lint-types lint-security format format-check \
1617
install install-dev install-core install-python install-node install-go \

Makefile.d/user.mk

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,18 @@ update: ## Collect fresh version data with network calls and update snapshot (~1
6363
@$(MAKE) check-python-managers 2>/dev/null || true
6464
@$(MAKE) check-node-managers 2>/dev/null || true
6565

66+
update-local: ## Update only local installation state (fast, no network)
67+
@echo "→ Detecting local tool installations..." >&2
68+
@bash -c 'set -o pipefail; $(PYTHON) audit.py --update-local' || true
69+
70+
update-baseline: ## Update upstream version baseline (for commit)
71+
@echo "→ Collecting upstream versions for baseline..." >&2
72+
@bash -c 'set -o pipefail; $(PYTHON) audit.py --update-baseline' || true
73+
@echo "" >&2
74+
@echo "To commit the baseline:" >&2
75+
@echo " git add upstream_versions.json" >&2
76+
@echo " git commit -m 'chore: update upstream version baseline'" >&2
77+
6678
update-debug: ## Collect with verbose debug output (shows network calls)
6779
@bash -c 'set -o pipefail; CLI_AUDIT_COLLECT=1 CLI_AUDIT_DEBUG=1 CLI_AUDIT_TIMINGS=1 $(PYTHON) audit.py --update --verbose' || true
6880

audit.py

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,17 @@
3131
from cli_audit.collectors import get_github_rate_limit # noqa: E402
3232
from cli_audit import collectors # noqa: E402
3333
from cli_audit.logging_config import setup_logging # noqa: E402
34+
# Split file support (Phase 2.1)
35+
from cli_audit.upstream_cache import ( # noqa: E402
36+
UpstreamVersion, UpstreamCache,
37+
load_upstream_cache, write_upstream_cache, get_upstream_cache_path,
38+
update_cached_upstream,
39+
)
40+
from cli_audit.local_state import ( # noqa: E402
41+
LocalInstallation, LocalState,
42+
load_local_state, write_local_state, get_local_state_path,
43+
update_local_installation, merge_for_display, build_legacy_snapshot,
44+
)
3445

3546
# Configuration from environment
3647
OFFLINE_MODE = os.environ.get("CLI_AUDIT_OFFLINE", "0") == "1"
@@ -40,6 +51,10 @@
4051
RENDER_MODE = os.environ.get("CLI_AUDIT_RENDER", "0") == "1"
4152
JSON_MODE = os.environ.get("CLI_AUDIT_JSON", "0") == "1"
4253
FILTER_STATUS = os.environ.get("CLI_AUDIT_FILTER_STATUS", "") # e.g., "NOT INSTALLED,OUTDATED"
54+
# Split file modes (Phase 2.1)
55+
UPDATE_LOCAL_ONLY = os.environ.get("CLI_AUDIT_UPDATE_LOCAL", "0") == "1"
56+
UPDATE_BASELINE_ONLY = os.environ.get("CLI_AUDIT_UPDATE_BASELINE", "0") == "1"
57+
USE_SPLIT_FILES = os.environ.get("CLI_AUDIT_SPLIT_FILES", "0") == "1"
4358

4459

4560
def normalize_version(version: str) -> str:
@@ -479,6 +494,170 @@ def cmd_update(args: argparse.Namespace) -> int:
479494
return 1
480495

481496

497+
def cmd_update_local(args: argparse.Namespace) -> int:
498+
"""Update only local installation state (fast, no network)."""
499+
print("=" * 80, file=sys.stderr)
500+
print("Update Local State", file=sys.stderr)
501+
print("=" * 80, file=sys.stderr)
502+
503+
# Get tools to audit
504+
tools_list = filter_tools(args.tools) if args.tools else all_tools()
505+
total = len(tools_list)
506+
507+
print(f"# Detecting local installations for {total} tools...", file=sys.stderr)
508+
509+
# Load existing upstream cache for status determination
510+
upstream_cache = load_upstream_cache()
511+
512+
# Collect local state only (no network calls)
513+
local_state = LocalState()
514+
completed = 0
515+
516+
with ThreadPoolExecutor(max_workers=min(MAX_WORKERS, total)) as executor:
517+
future_to_tool = {}
518+
for tool in tools_list:
519+
# Submit only local detection (no upstream collection)
520+
future = executor.submit(_detect_local_only, tool)
521+
future_to_tool[future] = tool
522+
523+
for future in as_completed(future_to_tool):
524+
tool = future_to_tool[future]
525+
try:
526+
installation = future.result()
527+
# Determine status using cached upstream
528+
cached = upstream_cache.versions.get(tool.name)
529+
if cached and installation.installed_version:
530+
norm_inst = normalize_version(installation.installed_version)
531+
norm_latest = normalize_version(cached.latest_version)
532+
if norm_inst == norm_latest:
533+
installation.status = "UP-TO-DATE"
534+
else:
535+
installation.status = "OUTDATED"
536+
elif not installation.installed_version:
537+
installation.status = "NOT INSTALLED"
538+
else:
539+
installation.status = "UNKNOWN"
540+
541+
local_state.tools[tool.name] = installation
542+
completed += 1
543+
print(f"# [{completed}/{total}] {tool.name}: {installation.installed_version or 'not installed'}", file=sys.stderr)
544+
except Exception as e:
545+
completed += 1
546+
print(f"# [{completed}/{total}] {tool.name}: failed ({e})", file=sys.stderr)
547+
548+
# Write local state
549+
write_local_state(local_state, offline=OFFLINE_MODE)
550+
print("", file=sys.stderr)
551+
print(f"✓ Local state updated: {get_local_state_path()}", file=sys.stderr)
552+
print(f"✓ Detected {len(local_state.tools)} tools", file=sys.stderr)
553+
554+
# Also update legacy snapshot for backward compatibility
555+
legacy_snapshot = build_legacy_snapshot(upstream_cache, local_state)
556+
write_snapshot(legacy_snapshot.get("tools", []), offline=OFFLINE_MODE)
557+
print(f"✓ Legacy snapshot updated: {get_snapshot_path()}", file=sys.stderr)
558+
559+
return 0
560+
561+
562+
def cmd_update_baseline(args: argparse.Namespace) -> int:
563+
"""Update only upstream baseline cache (network required)."""
564+
print("=" * 80, file=sys.stderr)
565+
print("Update Upstream Baseline", file=sys.stderr)
566+
print("=" * 80, file=sys.stderr)
567+
568+
# Get tools to audit
569+
tools_list = filter_tools(args.tools) if args.tools else all_tools()
570+
total = len(tools_list)
571+
572+
# Show GitHub rate limit
573+
rate_limit = get_github_rate_limit()
574+
if rate_limit:
575+
remaining = rate_limit.get("remaining", 0)
576+
limit = rate_limit.get("limit", 0)
577+
print(f"✓ GitHub rate limit: {remaining}/{limit} remaining", file=sys.stderr)
578+
579+
print(f"# Collecting upstream versions for {total} tools...", file=sys.stderr)
580+
581+
# Collect upstream versions only
582+
upstream_cache = UpstreamCache()
583+
completed = 0
584+
585+
with ThreadPoolExecutor(max_workers=min(MAX_WORKERS, total)) as executor:
586+
future_to_tool = {}
587+
for tool in tools_list:
588+
future = executor.submit(collect_latest_version, tool, None)
589+
future_to_tool[future] = tool
590+
591+
for future in as_completed(future_to_tool):
592+
tool = future_to_tool[future]
593+
try:
594+
latest_tag, latest_num = future.result()
595+
version = UpstreamVersion(
596+
latest_tag=latest_tag,
597+
latest_version=latest_num,
598+
latest_url=latest_target_url(tool, latest_tag, latest_num),
599+
tool_url=tool_homepage_url(tool),
600+
upstream_method=tool.source_kind,
601+
)
602+
upstream_cache.versions[tool.name] = version
603+
completed += 1
604+
605+
display = latest_num or latest_tag or "n/a"
606+
print(f"# [{completed}/{total}] {tool.name}: {display}", file=sys.stderr)
607+
except Exception as e:
608+
completed += 1
609+
print(f"# [{completed}/{total}] {tool.name}: failed ({e})", file=sys.stderr)
610+
611+
# Write upstream cache
612+
write_upstream_cache(upstream_cache)
613+
print("", file=sys.stderr)
614+
print(f"✓ Upstream baseline updated: {get_upstream_cache_path()}", file=sys.stderr)
615+
print(f"✓ Collected {len(upstream_cache.versions)} versions", file=sys.stderr)
616+
617+
# Report rate limit
618+
rate_limit = get_github_rate_limit()
619+
if rate_limit:
620+
remaining = rate_limit.get("remaining", 0)
621+
limit = rate_limit.get("limit", 0)
622+
print(f"✓ GitHub rate limit: {remaining}/{limit} remaining", file=sys.stderr)
623+
624+
return 0
625+
626+
627+
def _detect_local_only(tool: Tool) -> LocalInstallation:
628+
"""Detect local installation without collecting upstream version."""
629+
from cli_audit.catalog import ToolCatalog
630+
catalog = ToolCatalog()
631+
version_flag = None
632+
version_command = None
633+
if catalog.has_tool(tool.name):
634+
catalog_data = catalog.get_raw_data(tool.name)
635+
version_flag = catalog_data.get("version_flag")
636+
version_command = catalog_data.get("version_command")
637+
638+
deep_scan = tool.name in {"node", "python", "semgrep", "pre-commit", "bandit", "black", "flake8", "isort"}
639+
version_num, version_line, path, install_method = audit_tool_installation(
640+
tool.name, tool.candidates, deep=deep_scan, version_flag=version_flag, version_command=version_command
641+
)
642+
643+
installed = version_num if version_num else (version_line if version_line != "X" else "")
644+
645+
if install_method:
646+
classification_reason = f"Detected via path analysis: {install_method}"
647+
else:
648+
classification_reason = "No installation detected"
649+
650+
return LocalInstallation(
651+
installed_version=installed,
652+
installed_path=path,
653+
installed_method=install_method,
654+
status="UNKNOWN", # Will be determined by caller using cached upstream
655+
classification_reason=classification_reason,
656+
category=tool.category,
657+
hint=tool.hint,
658+
)
659+
660+
482661
def cmd_install(args: argparse.Namespace) -> int:
483662
"""Install missing tools using bulk installation system."""
484663
print("=" * 80, file=sys.stderr)
@@ -511,6 +690,16 @@ def main() -> int:
511690
action="store_true",
512691
help="Collect latest versions from upstream (network required)",
513692
)
693+
parser.add_argument(
694+
"--update-local",
695+
action="store_true",
696+
help="Update only local state (no network, fast)",
697+
)
698+
parser.add_argument(
699+
"--update-baseline",
700+
action="store_true",
701+
help="Update only upstream baseline cache (network required)",
702+
)
514703
parser.add_argument(
515704
"--install",
516705
action="store_true",
@@ -541,6 +730,12 @@ def main() -> int:
541730
if args.update:
542731
# Explicit --update flag: full update of all tools
543732
return cmd_update(args)
733+
elif getattr(args, 'update_local', False) or UPDATE_LOCAL_ONLY:
734+
# Update only local state (fast, no network)
735+
return cmd_update_local(args)
736+
elif getattr(args, 'update_baseline', False) or UPDATE_BASELINE_ONLY:
737+
# Update only upstream baseline cache
738+
return cmd_update_baseline(args)
544739
elif args.install:
545740
return cmd_install(args)
546741
elif args.upgrade:

0 commit comments

Comments
 (0)