Skip to content

Commit 36612bb

Browse files
pirateclaude
andcommitted
Stop excluding 'layout/', 'rebuild/' etc. from line counts
The exclusion list matched raw substrings, which made several entries silently drop legit code: "out/" matched layout/, checkout/, payouts/, routes/, ... "build/" matched rebuild/, webuild/, prebuild/, ... "target/" matched subtarget/, targets/, ... "env/" matched environment/, envoy/, tensor_env/, ... "dist/" matched redistribute/, coexist/, ... Switch to path-component (segment-anchored) matching: split the path on '/' and exclude only when a whole segment equals 'build', 'out', 'target', etc. Keeps every intended dependency / build-output dir excluded while no longer over-matching legit source directories. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 066417b commit 36612bb

1 file changed

Lines changed: 29 additions & 18 deletions

File tree

generate_stats.py

Lines changed: 29 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -130,21 +130,27 @@ def rebind_cache_paths() -> None:
130130
"/bash-utils/",
131131
)
132132

133-
# File-path substrings excluded from line counts (vendored / generated /
134-
# locked / binary files). Matched as substring against the file path
135-
# reported by `git log --numstat`. Keep these as substrings so paths like
136-
# "frontend/node_modules/foo.js" or "src/dist/bundle.js" match.
137-
EXCLUDE_FILE_SUBSTRINGS = (
133+
# Path-component (directory name) exclusions. We split the file path on
134+
# '/' and exclude if ANY segment exactly matches one of these names —
135+
# anchored matching avoids false positives like 'layout/' triggering on
136+
# 'out/', or 'rebuild/' triggering on 'build/'.
137+
EXCLUDE_PATH_COMPONENTS = frozenset({
138138
# Vendored / dependency directories
139-
"node_modules/", "vendor/", "bower_components/", "third_party/",
140-
".venv/", "venv/", "env/", "site-packages/", "__pycache__/",
141-
# Build / dist / generated output
142-
"dist/", "build/", "target/", "out/",
143-
".next/", ".nuxt/", ".turbo/", ".parcel-cache/",
144-
"coverage/", "htmlcov/", ".pytest_cache/", ".mypy_cache/",
145-
".tox/", ".eggs/", ".egg-info/",
139+
"node_modules", "vendor", "bower_components", "third_party",
140+
".venv", "venv", "site-packages", "__pycache__",
141+
# Build / dist / generated output (anchored so 'rebuild/' is kept)
142+
"dist", "build", "target", "out",
143+
".next", ".nuxt", ".turbo", ".parcel-cache",
144+
"coverage", "htmlcov", ".pytest_cache", ".mypy_cache",
145+
".tox", ".eggs",
146146
# ArchiveBox snapshot dirs (huge data dumps)
147-
"/archive/", "/snapshots/",
147+
"archive", "snapshots",
148+
})
149+
150+
# Glob-like suffix exclusions for directory names (matches whole segments).
151+
# These can't be in the frozenset above because they're suffix patterns.
152+
EXCLUDE_PATH_COMPONENT_SUFFIXES = (
153+
".egg-info",
148154
)
149155

150156
# File-name (basename) exact matches — lock files etc.
@@ -271,12 +277,17 @@ def _is_excluded_file(path: str) -> bool:
271277
"""Return True if the file path matches an exclusion pattern."""
272278
if not path:
273279
return False
274-
# Substring (directory) match
275-
for sub in EXCLUDE_FILE_SUBSTRINGS:
276-
if sub in path:
280+
# Path-component match: any /-delimited segment in EXCLUDE_PATH_COMPONENTS
281+
# or matching one of the suffix patterns (e.g. *.egg-info).
282+
segments = path.split("/")
283+
for seg in segments:
284+
if seg in EXCLUDE_PATH_COMPONENTS:
277285
return True
278-
# Basename exact match
279-
base = path.rsplit("/", 1)[-1]
286+
for suf in EXCLUDE_PATH_COMPONENT_SUFFIXES:
287+
if seg.endswith(suf):
288+
return True
289+
# Basename exact match (lock files etc.)
290+
base = segments[-1]
280291
if base in EXCLUDE_FILE_BASENAMES:
281292
return True
282293
# Extension match (lowercase, support compound suffixes like .min.js)

0 commit comments

Comments
 (0)