Skip to content

Commit c8d1ecf

Browse files
pirateclaude
andcommitted
Exclude WeKruit/Hand-X + leonardojyanez/redux_time from pirate stats
Forks pirate never actually contributed to — GH search picked them up because of an old fork-relationship that no longer reflects real attribution. Add an EXCLUDE_REPOS set checked after alias resolution in dedupe_commits, plus the same drop in aggregate() for PR/issue counts. pirate.html delta: commits 27,610 -> 24,717 (-2,893) +lines 20.3M -> 18.4M (-1.92M) -lines 16.3M -> 14.3M (-2.04M) repos 461 -> 459 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 57b7653 commit c8d1ecf

2 files changed

Lines changed: 29 additions & 5 deletions

File tree

cloudflare/public/pirate.html

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

generate_stats.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -981,6 +981,14 @@ def _slug(name: str) -> str:
981981
"pirate/cmdty.ncm-ui": "Monadical-Inc/cmdty.ncm-ui",
982982
}
983983

984+
# Repos to fully exclude from this user's stats — forks where the user
985+
# never actually contributed (commits, PRs, issues, lines all dropped).
986+
# Compared against the canonical name AFTER alias resolution.
987+
EXCLUDE_REPOS = {
988+
"WeKruit/Hand-X",
989+
"leonardojyanez/redux_time",
990+
}
991+
984992

985993
_RENAME_CACHE_FILE = None # set in resolve_canonical_name
986994
def resolve_canonical_name(full_name: str) -> str:
@@ -1215,17 +1223,20 @@ def dedupe_commits(records: Iterable[dict]) -> list[dict]:
12151223
if cur_total > prev_total:
12161224
by_sha[sha] = r
12171225

1218-
# Rewrite repo attribution to canonical (alias map).
1226+
# Rewrite repo attribution to canonical (alias map), then drop any
1227+
# records that resolve to a manually-excluded repo.
1228+
kept: list[dict] = []
12191229
for r in by_sha.values():
12201230
canon_input = repo_canonical(r)
12211231
target = alias.get(canon_input, canon_input)
12221232
if target != canon_input:
12231233
r["repo"] = target
1224-
# Use a synthetic remote pointing to the canonical owner/repo
1225-
# if it looks like a github full_name.
12261234
if "/" in target and not target.startswith("http"):
12271235
r["repo_remote"] = f"https://github.com/{target}.git"
1228-
return list(by_sha.values())
1236+
if target in EXCLUDE_REPOS or canon_input in EXCLUDE_REPOS:
1237+
continue
1238+
kept.append(r)
1239+
return kept
12291240

12301241

12311242
def repo_canonical(record: dict) -> str:
@@ -2100,6 +2111,18 @@ def iso_day(s: str) -> str:
21002111
pr_merged_del_repo.pop(full, None)
21012112
rename_cache_file.write_text(json.dumps(rename_cache))
21022113

2114+
# Drop manually-excluded repos from PR/issue counts (forks the user
2115+
# never actually contributed to, so the GH search picked them up
2116+
# because of an old fork-relationship that no longer represents
2117+
# real attribution).
2118+
for excluded in EXCLUDE_REPOS:
2119+
pr_repo.pop(excluded, None)
2120+
pr_merged_repo.pop(excluded, None)
2121+
iss_repo.pop(excluded, None)
2122+
pr_merged_add_repo.pop(excluded, None)
2123+
pr_merged_del_repo.pop(excluded, None)
2124+
by_repo.pop(excluded, None)
2125+
21032126
# Now create virtual by_repo entries (PR/issue-only contributions).
21042127
for full in set(pr_repo) | set(iss_repo):
21052128
if "/" not in full:
@@ -2530,6 +2553,7 @@ def auto_derive_config_for_user(login: str) -> None:
25302553
)
25312554

25322555
MANUAL_CANONICAL.clear()
2556+
EXCLUDE_REPOS.clear() # pirate-specific fork exclusions
25332557
REPO_JOB_PATTERNS.clear()
25342558
SEARCH_DIRS[:] = [] # generic users — no local mining by default
25352559
print(f" auto-config: @{login} ({GH_NAME}) "

0 commit comments

Comments
 (0)