From e3b55475ba022c4a52c0a38770eecd7c284148a2 Mon Sep 17 00:00:00 2001 From: gazeatcode <206354045+gazeatcode@users.noreply.github.com> Date: Wed, 13 May 2026 01:47:15 +0200 Subject: [PATCH] feat(rewards): allocate OSS emissions by repository emission share --- README.md | 8 +- gittensor/classes.py | 5 +- gittensor/cli/miner_commands/score.py | 3 +- gittensor/constants.py | 12 +- gittensor/validator/forward.py | 162 +++++++++++++----- gittensor/validator/issue_discovery/scan.py | 8 +- .../oss_contributions/mirror/scored_pr.py | 3 +- .../oss_contributions/mirror/scoring.py | 7 +- .../validator/oss_contributions/scoring.py | 3 +- gittensor/validator/utils/load_weights.py | 88 ++++++++-- .../weights/master_repositories.json | 18 +- .../mirror/test_scored_pr.py | 6 +- .../oss_contributions/mirror/test_scoring.py | 7 +- tests/validator/test_emission_allocation.py | 134 +++++++++++++++ tests/validator/test_load_weights.py | 98 +++++++++++ 15 files changed, 471 insertions(+), 91 deletions(-) create mode 100644 tests/validator/test_emission_allocation.py diff --git a/README.md b/README.md index bfba94f3..ff348cc4 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ Incentivize open source contributions. ## How it Works -Miners register with a fine-grained [GitHub personal access token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens) (PAT) and contribute to whitelisted open source repositories. When their pull requests get merged, validators authenticate account ownership via the PAT, verify the merged contributions, and score them based on code quality, repository weight, and programming language factors. Rewards are distributed proportionally to contribution scores. +Miners register with a fine-grained [GitHub personal access token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens) (PAT) and contribute to whitelisted open source repositories. When their pull requests get merged, validators authenticate account ownership via the PAT, verify the merged contributions, and score them based on code quality and programming language factors. Rewards are allocated by each repository's bounded `emission_share`, then distributed proportionally to contribution scores within that repository. ## Why Gittensor @@ -75,11 +75,11 @@ See full guide **[here](https://docs.gittensor.io/validator.html)** ### Important Structures -- Master Repositories & Weights +- Master Repositories & Emission Shares -A list of repositories pulled from GitHub that have been deemed valid for scoring. They each have an associated weight based on factors like: forks, commits, contributors, stars, etc. +A list of repositories pulled from GitHub that have been deemed valid for scoring. They each have an associated `emission_share` that caps how much of the scoring pool that repository can receive in a round. -_NOTE: this list will be dynamic. It will see various audits, additions, deletions, weight changes, and shuffles as the subnet matures._ +_NOTE: this list will be dynamic. It will see various audits, additions, deletions, emission share changes, and shuffles as the subnet matures._ _NOTE: don’t be afraid to provide recommendations for your favorite open source repositories and the team will review it as a possible addition. A repo is more likely to be included if: they provide contributing guidelines, are active/community driven, provide value/have users_ diff --git a/gittensor/classes.py b/gittensor/classes.py index c944e1d0..eaea8de0 100644 --- a/gittensor/classes.py +++ b/gittensor/classes.py @@ -234,7 +234,6 @@ def is_pioneer_eligible(self) -> bool: def calculate_final_earned_score(self) -> float: """Combine base score with all multipliers. Pioneer dividend is added separately after.""" multipliers = { - 'repo': self.repo_weight_multiplier, 'issue': self.issue_multiplier, 'label': self.label_multiplier, 'spam': self.open_pr_spam_multiplier, @@ -289,6 +288,7 @@ class MinerEvaluation: total_valid_solved_issues: int = 0 # solved issues where solving PR has token_score >= 5 total_closed_issues: int = 0 total_open_issues: int = 0 # mirror-tracked open issues in lookback window (set by issue_discovery.scan) + discovered_issues: List[Issue] = field(default_factory=list) @property def total_prs(self) -> int: @@ -505,6 +505,7 @@ class CachedEvaluation: 'total_valid_solved_issues', 'total_closed_issues', 'total_open_issues', + 'discovered_issues', ) @@ -627,6 +628,7 @@ def _build_cache_entry(evaluation: 'MinerEvaluation') -> 'MinerEvaluation': cached.merged_prs = [_scored_mirror_pr_for_cache(pr) for pr in evaluation.merged_prs] cached.open_prs = [_scored_mirror_pr_for_cache(pr) for pr in evaluation.open_prs] cached.closed_prs = [_scored_mirror_pr_for_cache(pr) for pr in evaluation.closed_prs] + cached.discovered_issues = list(evaluation.discovered_issues) return cached @staticmethod @@ -636,6 +638,7 @@ def _isolate_for_downstream(cached_eval: 'MinerEvaluation') -> 'MinerEvaluation' # adapters produce fresh Issue objects per call via get_all_issues(). copy_eval = copy.copy(cached_eval) copy_eval.unique_repos_contributed_to = set(cached_eval.unique_repos_contributed_to) + copy_eval.discovered_issues = list(cached_eval.discovered_issues) return copy_eval diff --git a/gittensor/cli/miner_commands/score.py b/gittensor/cli/miner_commands/score.py index bc5fdf5c..52ec64b3 100644 --- a/gittensor/cli/miner_commands/score.py +++ b/gittensor/cli/miner_commands/score.py @@ -66,6 +66,7 @@ def store_or_use_cached_evaluation(self, miner_evaluations: Dict) -> Set[int]: 'merged_prs', 'open_prs', 'closed_prs', + 'discovered_issues', 'unique_repos_contributed_to', } ) @@ -280,7 +281,7 @@ async def _run() -> Dict[str, Any]: issue_rewards = await issue_discovery( miner_evaluations, master_repositories, programming_languages, token_config, miner_uids ) - rewards = blend_emission_pools(oss_rewards, issue_rewards, miner_uids) + rewards = blend_emission_pools(miner_evaluations, master_repositories, miner_uids) return { 'success': True, diff --git a/gittensor/constants.py b/gittensor/constants.py index 6daa940f..8a8a5af9 100644 --- a/gittensor/constants.py +++ b/gittensor/constants.py @@ -74,7 +74,7 @@ # ============================================================================= # Repository & PR Scoring # ============================================================================= -DEFAULT_REPO_WEIGHT = 0.01 # fallback weight for repos not in master_repositories.json +DEFAULT_REPO_EMISSION_SHARE = 0.01 # fallback share for repos not in master_repositories.json PR_LOOKBACK_DAYS = 35 # rolling window for scoring MERGED_PR_BASE_SCORE = 25 MIN_TOKEN_SCORE_FOR_BASE_SCORE = 5 # PRs below this get 0 base score @@ -154,11 +154,9 @@ # ============================================================================= RECYCLE_UID = 0 -# Hardcoded emission splits per competition (replaces dynamic emissions) -OSS_EMISSION_SHARE = 0.30 # 30% to OSS contributions (PR scoring) -ISSUE_DISCOVERY_EMISSION_SHARE = 0.10 # 10% to issue discovery -RECYCLE_EMISSION_SHARE = 0.45 # 45% to recycle UID 0 -# ISSUES_TREASURY_EMISSION_SHARE = 0.15 defined below (15% to smart contract treasury) +# Scoring pool is allocated by per-repo emission_share, then split within each +# repo between PR scoring and issue discovery. +OSS_EMISSION_SHARE = 0.90 # ============================================================================= # Spam & Gaming Mitigation @@ -187,5 +185,5 @@ # ============================================================================= CONTRACT_ADDRESS = '5FWNdk8YNtNcHKrAx2krqenFrFAZG7vmsd2XN2isJSew3MrD' ISSUES_TREASURY_UID = 111 # UID of the smart contract neuron, if set to RECYCLE_UID then it's disabled -ISSUES_TREASURY_EMISSION_SHARE = 0.15 # % of emissions allocated to funding issues treasury +ISSUES_TREASURY_EMISSION_SHARE = 0.10 # % of emissions allocated to funding issues treasury MAX_ISSUE_ID = 1_000_000 # sanity-check upper bound for any real deployment diff --git a/gittensor/validator/forward.py b/gittensor/validator/forward.py index 33a01de7..181b2084 100644 --- a/gittensor/validator/forward.py +++ b/gittensor/validator/forward.py @@ -2,18 +2,17 @@ # Copyright © 2025 Entrius import asyncio -from typing import TYPE_CHECKING, Dict, Optional, Set, Tuple +from collections import defaultdict +from typing import TYPE_CHECKING, Dict, Iterable, Optional, Set, Tuple import bittensor as bt import numpy as np from gittensor.classes import MinerEvaluation, MinerEvaluationCache from gittensor.constants import ( - ISSUE_DISCOVERY_EMISSION_SHARE, ISSUES_TREASURY_EMISSION_SHARE, ISSUES_TREASURY_UID, OSS_EMISSION_SHARE, - RECYCLE_EMISSION_SHARE, RECYCLE_UID, ) from gittensor.utils.uids import get_all_uids @@ -48,11 +47,10 @@ async def forward(self: 'Validator') -> None: 4. Store all evaluations to DB 5. Blend emission pools and update scores - Emission blending (hardcoded per-competition): - - OSS contributions: 30% - - Issue discovery: 30% - - Issue treasury: 15% (flat to UID 111) - - Recycle: 25% (flat to UID 0) + Emission blending: + - OSS scoring pool: 90%, allocated by repository emission_share + - Issue treasury: 10% (flat to UID 111) + - Recycle: registry slack and inactive repo slices """ if self.step % VALIDATOR_STEPS_INTERVAL == 0: @@ -62,12 +60,12 @@ async def forward(self: 'Validator') -> None: token_config = load_token_config() # 1. Score OSS contributions - oss_rewards, miner_evaluations, cached_uids, penalized_uids = await oss_contributions( + _oss_rewards, miner_evaluations, cached_uids, penalized_uids = await oss_contributions( self, miner_uids, master_repositories, programming_languages, token_config ) # 2. Score issue discovery - issue_rewards = await issue_discovery( + _issue_rewards = await issue_discovery( miner_evaluations, master_repositories, programming_languages, @@ -85,8 +83,8 @@ async def forward(self: 'Validator') -> None: # 4. Store all evaluations to DB (includes issue discovery fields) await self.bulk_store_evaluation(miner_evaluations, skip_uids=cached_uids) - # 5. Blend 4 emission pools into final rewards - rewards = blend_emission_pools(oss_rewards, issue_rewards, miner_uids) + # 5. Allocate the scoring pool by per-repo emission_share + rewards = blend_emission_pools(miner_evaluations, master_repositories, miner_uids) self.update_scores(rewards, miner_uids, blacklisted_uids=sorted(penalized_uids)) @@ -150,49 +148,129 @@ async def issue_discovery( def blend_emission_pools( - oss_rewards: np.ndarray, - issue_rewards: np.ndarray, + miner_evaluations: Dict[int, MinerEvaluation], + master_repositories: Dict[str, RepositoryConfig], miner_uids: set[int], ) -> np.ndarray: - """Blend 4 emission pools into a single rewards array. + """Allocate emissions by configured repo slices and route slack to recycle. - - OSS contributions: 30% - - Issue discovery: 30% - - Issue treasury: 15% (flat to UID 111) - - Recycle: 25% (flat to UID 0) + Each repository receives at most ``emission_share * OSS_EMISSION_SHARE``. + That repo slice is divided proportionally by raw PR and issue-discovery + scores inside the repo. Registry slack and repo slices with no enabled + nonzero scorers route to the recycle UID. """ sorted_uids = sorted(miner_uids) rewards = np.zeros(len(sorted_uids)) - recycle_extra = 0.0 - - # Pool 1: OSS contributions (30%) - oss_total = float(oss_rewards.sum()) - if oss_total > 0: - rewards += oss_rewards * OSS_EMISSION_SHARE - else: - recycle_extra += OSS_EMISSION_SHARE - - # Pool 2: Issue discovery (30%) - issue_total = float(issue_rewards.sum()) - if issue_total > 0: - rewards += issue_rewards * ISSUE_DISCOVERY_EMISSION_SHARE - else: - recycle_extra += ISSUE_DISCOVERY_EMISSION_SHARE - - # Pool 3: Issue treasury (15% flat to UID 111) + uid_index = {uid: idx for idx, uid in enumerate(sorted_uids)} + + recycle_amount = allocate_repo_scoring_pool(rewards, uid_index, miner_evaluations, master_repositories) + + # Issue treasury (10% flat to UID 111) if ISSUES_TREASURY_UID > 0 and ISSUES_TREASURY_UID in miner_uids: - treasury_idx = sorted_uids.index(ISSUES_TREASURY_UID) + treasury_idx = uid_index[ISSUES_TREASURY_UID] rewards[treasury_idx] += ISSUES_TREASURY_EMISSION_SHARE bt.logging.info( f'Treasury allocation: UID {ISSUES_TREASURY_UID} receives ' f'{ISSUES_TREASURY_EMISSION_SHARE * 100:.0f}% of emissions' ) - # Pool 4: Recycle (25% + unclaimed from empty pools) + # Recycle receives registry slack plus unclaimed repo slices. There is no + # fixed recycle baseline under the emission_share allocation model. if RECYCLE_UID in miner_uids: - recycle_idx = sorted_uids.index(RECYCLE_UID) - rewards[recycle_idx] += RECYCLE_EMISSION_SHARE + recycle_extra - if recycle_extra > 0: - bt.logging.info(f'Recycling {recycle_extra * 100:.0f}% unclaimed emissions from empty pools') + recycle_idx = uid_index[RECYCLE_UID] + rewards[recycle_idx] += recycle_amount + if recycle_amount > 0: + bt.logging.info(f'Recycling {recycle_amount * 100:.2f}% unclaimed scoring-pool emissions') return rewards + + +def allocate_repo_scoring_pool( + rewards: np.ndarray, + uid_index: Dict[int, int], + miner_evaluations: Dict[int, MinerEvaluation], + master_repositories: Dict[str, RepositoryConfig], +) -> float: + """Distribute the OSS scoring pool by repository emission shares. + + Returns the amount that should be paid to the recycle UID. + """ + pr_scores, issue_scores = _collect_repo_scores(miner_evaluations) + configured_share = sum(config.emission_share for config in master_repositories.values()) + recycle_amount = max(0.0, 1.0 - configured_share) * OSS_EMISSION_SHARE + + if recycle_amount > 0: + bt.logging.info(f'Registry emission_share slack: {recycle_amount * 100:.2f}% routed to recycle') + + for repo_name, config in master_repositories.items(): + repo_key = repo_name.lower() + repo_slice = config.emission_share * OSS_EMISSION_SHARE + if repo_slice <= 0: + continue + + pr_entries = pr_scores.get(repo_key, []) + issue_entries = issue_scores.get(repo_key, []) + pr_total = sum(score for _, score in pr_entries) + issue_total = sum(score for _, score in issue_entries) + + issue_share = config.issue_discovery_share + pr_share = 1.0 - issue_share + pr_active = pr_share > 0 and pr_total > 0 + issue_active = issue_share > 0 and issue_total > 0 + + if not pr_active and not issue_active: + recycle_amount += repo_slice + continue + + if pr_active and issue_active: + _distribute_entries(rewards, uid_index, pr_entries, repo_slice * pr_share, pr_total) + _distribute_entries(rewards, uid_index, issue_entries, repo_slice * issue_share, issue_total) + elif pr_active: + _distribute_entries(rewards, uid_index, pr_entries, repo_slice, pr_total) + else: + _distribute_entries(rewards, uid_index, issue_entries, repo_slice, issue_total) + + return recycle_amount + + +def _collect_repo_scores( + miner_evaluations: Dict[int, MinerEvaluation], +) -> Tuple[Dict[str, list[Tuple[int, float]]], Dict[str, list[Tuple[int, float]]]]: + pr_scores: Dict[str, list[Tuple[int, float]]] = defaultdict(list) + issue_scores: Dict[str, list[Tuple[int, float]]] = defaultdict(list) + + for uid, evaluation in miner_evaluations.items(): + for pr in _positive_pr_scores(evaluation): + pr_scores[pr.repository_full_name.lower()].append((uid, float(pr.earned_score))) + for issue in _positive_issue_scores(evaluation): + issue_scores[issue.repository_full_name.lower()].append((uid, float(issue.discovery_earned_score))) + + return pr_scores, issue_scores + + +def _positive_pr_scores(evaluation: MinerEvaluation) -> Iterable: + return (pr for pr in evaluation.merged_prs if getattr(pr, 'earned_score', 0.0) > 0) + + +def _positive_issue_scores(evaluation: MinerEvaluation) -> Iterable: + return ( + issue + for issue in getattr(evaluation, 'discovered_issues', []) + if getattr(issue, 'discovery_earned_score', 0.0) > 0 + ) + + +def _distribute_entries( + rewards: np.ndarray, + uid_index: Dict[int, int], + entries: list[Tuple[int, float]], + allocation: float, + total_score: float, +) -> None: + if allocation <= 0 or total_score <= 0: + return + for uid, score in entries: + idx = uid_index.get(uid) + if idx is None: + continue + rewards[idx] += allocation * score / total_score diff --git a/gittensor/validator/issue_discovery/scan.py b/gittensor/validator/issue_discovery/scan.py index ad771fe2..a991fc7a 100644 --- a/gittensor/validator/issue_discovery/scan.py +++ b/gittensor/validator/issue_discovery/scan.py @@ -60,7 +60,6 @@ LanguageConfig, RepositoryConfig, TokenConfig, - resolve_repo_weight, ) @@ -224,6 +223,7 @@ def _clear_issue_discovery_fields(evaluation: MinerEvaluation) -> None: evaluation.total_valid_solved_issues = 0 evaluation.total_closed_issues = 0 evaluation.total_open_issues = 0 + evaluation.discovered_issues = [] def _copy_issue_discovery_fields(target: MinerEvaluation, source: MinerEvaluation) -> None: @@ -235,6 +235,7 @@ def _copy_issue_discovery_fields(target: MinerEvaluation, source: MinerEvaluatio target.total_valid_solved_issues = source.total_valid_solved_issues target.total_closed_issues = source.total_closed_issues target.total_open_issues = source.total_open_issues + target.discovered_issues = list(source.discovered_issues) def _restore_issue_discovery_from_cache( @@ -435,6 +436,7 @@ async def _score_miner_issues( evaluation.total_closed_issues = closed_count evaluation.total_open_issues = open_issue_count evaluation.issue_token_score = round(issue_token_score, 2) + evaluation.discovered_issues = [] is_eligible, credibility, reason = check_issue_eligibility(solved_count, valid_solved_count, closed_count) evaluation.is_issue_eligible = is_eligible @@ -456,7 +458,6 @@ async def _score_miner_issues( issue.discovery_open_issue_spam_multiplier = spam_mult issue.discovery_earned_score = round( issue.discovery_base_score - * issue.discovery_repo_weight_multiplier * issue.discovery_time_decay_multiplier * issue.discovery_review_quality_multiplier * issue.discovery_credibility_multiplier @@ -465,6 +466,7 @@ async def _score_miner_issues( ) total_discovery_score += issue.discovery_earned_score + evaluation.discovered_issues = scored_issues evaluation.issue_discovery_score = round(total_discovery_score, 2) bt.logging.info( @@ -620,7 +622,7 @@ def _mirror_issue_for_scoring( ) adapted.discovery_base_score = base_score - adapted.discovery_repo_weight_multiplier = resolve_repo_weight(repo_config) + adapted.discovery_repo_weight_multiplier = 1.0 adapted.discovery_time_decay_multiplier = round(calculate_time_decay(solving_pr.merged_at), 2) adapted.discovery_review_quality_multiplier = round( calculate_issue_review_quality_multiplier(solving_pr.review_summary.maintainer_changes_requested_count), diff --git a/gittensor/validator/oss_contributions/mirror/scored_pr.py b/gittensor/validator/oss_contributions/mirror/scored_pr.py index 882377be..c2f17857 100644 --- a/gittensor/validator/oss_contributions/mirror/scored_pr.py +++ b/gittensor/validator/oss_contributions/mirror/scored_pr.py @@ -27,6 +27,8 @@ class ScoredPR: pr: MirrorPullRequest # Multipliers (default 1.0 — neutral if not yet computed) + # repo_weight_multiplier is retained as a neutral legacy field for storage + # compatibility; repo emission shares are applied only at round aggregation. repo_weight_multiplier: float = 1.0 issue_multiplier: float = 1.0 open_pr_spam_multiplier: float = 1.0 @@ -88,7 +90,6 @@ def is_pioneer_eligible(self) -> bool: def calculate_final_earned_score(self) -> float: """Combine base score with all multipliers. Pioneer dividend is added separately after.""" multipliers = { - 'repo': self.repo_weight_multiplier, 'issue': self.issue_multiplier, 'label': self.label_multiplier, 'spam': self.open_pr_spam_multiplier, diff --git a/gittensor/validator/oss_contributions/mirror/scoring.py b/gittensor/validator/oss_contributions/mirror/scoring.py index bda651e0..8e24365e 100644 --- a/gittensor/validator/oss_contributions/mirror/scoring.py +++ b/gittensor/validator/oss_contributions/mirror/scoring.py @@ -2,7 +2,7 @@ Scope: - Compute base_score for each PR via the existing token-scoring infra. -- Compute per-PR multipliers: repo_weight, time_decay, review_quality, label, issue. +- Compute per-PR multipliers: time_decay, review_quality, label, issue. - The merge-eligibility gate (``_should_skip_merged_mirror_pr``) is exported and applied at LOAD time by ``mirror.load._maybe_add_pr`` — rejected PRs never enter ``merged_prs``, so the merged_count used by ``check_eligibility`` @@ -54,7 +54,6 @@ LanguageConfig, RepositoryConfig, TokenConfig, - resolve_repo_weight, ) from gittensor.validator.utils.tree_sitter_scoring import calculate_token_score_from_file_changes @@ -338,7 +337,7 @@ def calculate_base_score_for_pr_files( def _calculate_pr_multipliers(scored: ScoredPR, repo_config: RepositoryConfig) -> None: - """Compute repo_weight, time_decay, review_quality, label, issue multipliers. + """Compute time_decay, review_quality, label, and issue multipliers. Spam and credibility multipliers are deferred to ``finalize_miner_scores`` — they depend on per-miner aggregate counts. @@ -346,7 +345,7 @@ def _calculate_pr_multipliers(scored: ScoredPR, repo_config: RepositoryConfig) - pr = scored.pr is_merged = pr.state == 'MERGED' - scored.repo_weight_multiplier = resolve_repo_weight(repo_config) + scored.repo_weight_multiplier = 1.0 chosen_label, label_multiplier = _resolve_trusted_scoring_label(pr, repo_config) scored.label = chosen_label diff --git a/gittensor/validator/oss_contributions/scoring.py b/gittensor/validator/oss_contributions/scoring.py index 4336d248..7398e274 100644 --- a/gittensor/validator/oss_contributions/scoring.py +++ b/gittensor/validator/oss_contributions/scoring.py @@ -283,14 +283,13 @@ def calculate_open_pr_collateral_score(pr: 'ScoredPR') -> float: Collateral = base_score * applicable_multipliers * OPEN_PR_COLLATERAL_PERCENT - Applicable multipliers: repo_weight, issue, label, review_collateral + Applicable multipliers: issue, label, review_collateral NOT applicable: time_decay (merge-based), credibility_multiplier (merge-based), open_pr_spam (not for collateral) """ from math import prod multipliers = { - 'repo_weight': pr.repo_weight_multiplier, 'issue': pr.issue_multiplier, 'label': pr.label_multiplier, 'review_collateral': calculate_review_collateral_multiplier(pr.changes_requested_count, pr.number), diff --git a/gittensor/validator/utils/load_weights.py b/gittensor/validator/utils/load_weights.py index b838529a..efa3c0fd 100644 --- a/gittensor/validator/utils/load_weights.py +++ b/gittensor/validator/utils/load_weights.py @@ -7,7 +7,9 @@ import bittensor as bt -from gittensor.constants import DEFAULT_REPO_WEIGHT, NON_CODE_EXTENSIONS +from gittensor.constants import DEFAULT_REPO_EMISSION_SHARE, NON_CODE_EXTENSIONS + +EMISSION_SHARE_TOLERANCE = 1e-9 @dataclass @@ -23,12 +25,13 @@ class LanguageConfig: language: Optional[str] = None -@dataclass +@dataclass(init=False) class RepositoryConfig: """Configuration for a repository in the master_repositories list. Attributes: - weight: Repository weight for scoring + emission_share: Bounded share of the OSS scoring pool allocated to this repository. + issue_discovery_share: Share of this repository allocation assigned to issue discovery. inactive_at: ISO timestamp when repository became inactive (None if active) additional_acceptable_branches: List of additional branch patterns to accept (None if only default branch) trusted_label_pipeline: When True, scoring labels count regardless of @@ -46,7 +49,8 @@ class RepositoryConfig: """ - weight: float + emission_share: float + issue_discovery_share: float = 0.5 inactive_at: Optional[str] = None additional_acceptable_branches: Optional[List[str]] = None trusted_label_pipeline: bool = False @@ -55,12 +59,51 @@ class RepositoryConfig: fixed_base_score: Optional[float] = None eligibility_mode: bool = True + def __init__( + self, + emission_share: Optional[float] = None, + *, + weight: Optional[float] = None, + issue_discovery_share: float = 0.5, + inactive_at: Optional[str] = None, + additional_acceptable_branches: Optional[List[str]] = None, + trusted_label_pipeline: bool = False, + label_multipliers: Optional[Dict[str, float]] = None, + default_label_multiplier: float = 1.0, + fixed_base_score: Optional[float] = None, + eligibility_mode: bool = True, + ) -> None: + if emission_share is None: + emission_share = weight if weight is not None else DEFAULT_REPO_EMISSION_SHARE + elif weight is not None and float(weight) != float(emission_share): + raise ValueError('RepositoryConfig received conflicting emission_share and weight values') + + self.emission_share = float(emission_share) + self.issue_discovery_share = float(issue_discovery_share) + self.inactive_at = inactive_at + self.additional_acceptable_branches = additional_acceptable_branches + self.trusted_label_pipeline = trusted_label_pipeline + self.label_multipliers = label_multipliers + self.default_label_multiplier = default_label_multiplier + self.fixed_base_score = fixed_base_score + self.eligibility_mode = eligibility_mode + + @property + def weight(self) -> float: + """Compatibility alias for callers that have not migrated constructor usage.""" + return self.emission_share + + +def resolve_repo_emission_share(repo_config: Optional[RepositoryConfig]) -> float: + """Return the repo emission share preserving full JSON precision, or the default for unknown repos.""" + if repo_config is None: + return DEFAULT_REPO_EMISSION_SHARE + return repo_config.emission_share + def resolve_repo_weight(repo_config: Optional[RepositoryConfig]) -> float: - """Return the repo weight preserving full JSON precision, or the default for unknown repos.""" - if repo_config is None: - return DEFAULT_REPO_WEIGHT - return repo_config.weight + """Deprecated compatibility alias for resolve_repo_emission_share().""" + return resolve_repo_emission_share(repo_config) @dataclass @@ -107,9 +150,25 @@ def _get_weights_dir() -> Path: return Path(__file__).parent.parent / 'weights' +def validate_repository_emission_shares(repositories: Dict[str, RepositoryConfig]) -> None: + """Validate per-repo emission share bounds and the registry-wide sum cap.""" + total = 0.0 + for repo_name, config in repositories.items(): + if not 0.0 <= config.emission_share <= 1.0: + raise ValueError(f'{repo_name} emission_share must be within [0, 1], got {config.emission_share}') + if not 0.0 <= config.issue_discovery_share <= 1.0: + raise ValueError( + f'{repo_name} issue_discovery_share must be within [0, 1], got {config.issue_discovery_share}' + ) + total += config.emission_share + + if total > 1.0 + EMISSION_SHARE_TOLERANCE: + raise ValueError(f'total repository emission_share must be <= 1.0, got {total}') + + def load_master_repo_weights() -> Dict[str, RepositoryConfig]: """ - Load repository weights from the local JSON file. + Load repository emission shares from the local JSON file. Normalizes repository names to lowercase for case-insensitive matching. Returns: @@ -131,7 +190,10 @@ def load_master_repo_weights() -> Dict[str, RepositoryConfig]: for repo_name, metadata in data.items(): try: config = RepositoryConfig( - weight=float(metadata.get('weight', 0.01)), + emission_share=float( + metadata.get('emission_share', metadata.get('weight', DEFAULT_REPO_EMISSION_SHARE)) + ), + issue_discovery_share=float(metadata.get('issue_discovery_share', 0.5)), inactive_at=metadata.get('inactive_at'), additional_acceptable_branches=metadata.get('additional_acceptable_branches'), trusted_label_pipeline=bool(metadata.get('trusted_label_pipeline', False)), @@ -146,9 +208,9 @@ def load_master_repo_weights() -> Dict[str, RepositoryConfig]: ) normalized_data[repo_name.lower()] = config except (ValueError, TypeError) as e: - bt.logging.warning(f'Could not parse config for {repo_name}: {e}, using defaults') - # Create config with defaults if parsing fails - normalized_data[repo_name.lower()] = RepositoryConfig(weight=float(metadata.get('weight', 0.01))) + raise ValueError(f'Could not parse config for {repo_name}: {e}') from e + + validate_repository_emission_shares(normalized_data) bt.logging.debug(f'Successfully loaded {len(normalized_data)} repository entries from {weights_file}') return normalized_data diff --git a/gittensor/validator/weights/master_repositories.json b/gittensor/validator/weights/master_repositories.json index 3d0254d7..dcd7de6c 100644 --- a/gittensor/validator/weights/master_repositories.json +++ b/gittensor/validator/weights/master_repositories.json @@ -1,6 +1,7 @@ { "entrius/allways": { - "weight": 0.05, + "emission_share": 0.05, + "issue_discovery_share": 0.5, "trusted_label_pipeline": true, "label_multipliers": { "bug": 1.25, @@ -9,7 +10,8 @@ } }, "entrius/allways-ui": { - "weight": 0.01, + "emission_share": 0.01, + "issue_discovery_share": 0.5, "trusted_label_pipeline": true, "label_multipliers": { "feature": 1.25, @@ -19,7 +21,8 @@ } }, "entrius/das-github-mirror": { - "weight": 0.02, + "emission_share": 0.02, + "issue_discovery_share": 0.5, "trusted_label_pipeline": true, "label_multipliers": { "bug": 1.25, @@ -28,7 +31,8 @@ } }, "entrius/gittensor": { - "weight": 0.1, + "emission_share": 0.1, + "issue_discovery_share": 0.5, "trusted_label_pipeline": true, "label_multipliers": { "feature": 1.5, @@ -38,7 +42,8 @@ } }, "entrius/gittensor-ui": { - "weight": 0.03, + "emission_share": 0.03, + "issue_discovery_share": 0.5, "trusted_label_pipeline": true, "label_multipliers": { "feature": 1.25, @@ -48,7 +53,8 @@ } }, "entrius/oc-1": { - "weight": 0.5, + "emission_share": 0.5, + "issue_discovery_share": 0.0, "trusted_label_pipeline": true, "fixed_base_score": 1.0, "eligibility_mode": false, diff --git a/tests/validator/oss_contributions/mirror/test_scored_pr.py b/tests/validator/oss_contributions/mirror/test_scored_pr.py index b3c138f0..8ed79613 100644 --- a/tests/validator/oss_contributions/mirror/test_scored_pr.py +++ b/tests/validator/oss_contributions/mirror/test_scored_pr.py @@ -3,7 +3,7 @@ Covers: - Composition: raw response data accessed via .pr.; scoring fields default neutrally - is_pioneer_eligible respects merged + token_score gate -- calculate_final_earned_score multiplies base by every multiplier +- calculate_final_earned_score multiplies base by active per-PR multipliers """ from __future__ import annotations @@ -115,8 +115,8 @@ def test_multipliers_compose(self): scored.base_score = 100.0 scored.repo_weight_multiplier = 0.5 scored.review_quality_multiplier = 0.5 - # 100 * 0.5 * 0.5 (others 1.0) = 25 - assert scored.calculate_final_earned_score() == 25.0 + # repo_weight_multiplier is a neutral legacy field; emission_share is applied at aggregation. + assert scored.calculate_final_earned_score() == 50.0 def test_zero_multiplier_zeros_score(self): scored = ScoredPR(pr=_make_pr()) diff --git a/tests/validator/oss_contributions/mirror/test_scoring.py b/tests/validator/oss_contributions/mirror/test_scoring.py index c14b792e..bb8cd99d 100644 --- a/tests/validator/oss_contributions/mirror/test_scoring.py +++ b/tests/validator/oss_contributions/mirror/test_scoring.py @@ -378,7 +378,7 @@ def test_fixed_base_score_scores_without_stored_files(self): client.get_pr_files.assert_not_called() assert scored.base_score == pytest.approx(7.5) - assert scored.repo_weight_multiplier == pytest.approx(0.5) + assert scored.repo_weight_multiplier == pytest.approx(1.0) class TestFixedBaseScore: @@ -439,7 +439,6 @@ def test_fixed_base_replaces_token_base_but_keeps_token_breakdown_and_multiplier assert scored.label_multiplier == pytest.approx(2.0) assert scored.calculate_final_earned_score() == pytest.approx( scored.base_score - * scored.repo_weight_multiplier * scored.issue_multiplier * scored.label_multiplier * scored.open_pr_spam_multiplier @@ -914,7 +913,7 @@ def test_merged_pr_populates_all_multipliers(self): _config(weight=0.7, additional_branches=['test'], label_multipliers={'feature': 1.5}), ) - assert scored.repo_weight_multiplier == 0.7 + assert scored.repo_weight_multiplier == 1.0 assert scored.label == 'feature' assert scored.label_multiplier == pytest.approx(1.5) assert 0.0 <= scored.time_decay_multiplier <= 1.0 @@ -926,7 +925,7 @@ def test_open_pr_only_neutral_multipliers(self): scored = ScoredPR(pr=_pr(state='OPEN')) _calculate_pr_multipliers(scored, _config(weight=0.5)) - assert scored.repo_weight_multiplier == 0.5 + assert scored.repo_weight_multiplier == 1.0 # Time decay / review quality / credibility are merge-only — kept neutral here. assert scored.time_decay_multiplier == 1.0 assert scored.credibility_multiplier == 1.0 diff --git a/tests/validator/test_emission_allocation.py b/tests/validator/test_emission_allocation.py new file mode 100644 index 00000000..35802898 --- /dev/null +++ b/tests/validator/test_emission_allocation.py @@ -0,0 +1,134 @@ +from types import SimpleNamespace + +import pytest + +from gittensor.classes import MinerEvaluation +from gittensor.validator.forward import blend_emission_pools +from gittensor.validator.utils.load_weights import RepositoryConfig + + +def _evaluation(uid: int, prs=None, issues=None) -> MinerEvaluation: + evaluation = MinerEvaluation(uid=uid, hotkey=f'hotkey-{uid}', github_id=str(uid)) + evaluation.merged_prs = list(prs or []) + evaluation.discovered_issues = list(issues or []) + return evaluation + + +def _pr(repo: str, score: float): + return SimpleNamespace(repository_full_name=repo, earned_score=score) + + +def _issue(repo: str, score: float): + return SimpleNamespace(repository_full_name=repo, discovery_earned_score=score) + + +def _rewards(miner_evaluations, repositories): + uids = set(miner_evaluations) | {0, 111} + rewards = blend_emission_pools(miner_evaluations, repositories, uids) + return dict(zip(sorted(uids), rewards)) + + +def test_single_pr_claims_full_repo_slice(): + repositories = {'foo/repo': RepositoryConfig(emission_share=0.05, issue_discovery_share=0.0)} + miner_evaluations = {1: _evaluation(1, prs=[_pr('foo/repo', 10.0)])} + + rewards = _rewards(miner_evaluations, repositories) + + assert rewards[1] == pytest.approx(0.045) + assert rewards[0] == pytest.approx(0.855) + assert rewards[111] == pytest.approx(0.1) + assert sum(rewards.values()) == pytest.approx(1.0) + + +def test_many_prs_split_same_repo_slice_proportionally(): + repositories = {'foo/repo': RepositoryConfig(emission_share=0.05, issue_discovery_share=0.0)} + miner_evaluations = { + 1: _evaluation(1, prs=[_pr('foo/repo', 1.0)]), + 2: _evaluation(2, prs=[_pr('foo/repo', 1.0) for _ in range(49)]), + } + + rewards = _rewards(miner_evaluations, repositories) + + assert rewards[1] == pytest.approx(0.045 / 50) + assert rewards[2] == pytest.approx(0.045 * 49 / 50) + assert rewards[0] == pytest.approx(0.855) + + +def test_issue_subslice_spills_to_pr_side_inside_same_repo(): + repositories = {'foo/repo': RepositoryConfig(emission_share=0.2, issue_discovery_share=0.3)} + miner_evaluations = {1: _evaluation(1, prs=[_pr('foo/repo', 5.0)])} + + rewards = _rewards(miner_evaluations, repositories) + + assert rewards[1] == pytest.approx(0.18) + assert rewards[0] == pytest.approx(0.72) + + +def test_pr_subslice_spills_to_issue_side_inside_same_repo(): + repositories = {'foo/repo': RepositoryConfig(emission_share=0.2, issue_discovery_share=0.3)} + miner_evaluations = {1: _evaluation(1, issues=[_issue('foo/repo', 5.0)])} + + rewards = _rewards(miner_evaluations, repositories) + + assert rewards[1] == pytest.approx(0.18) + assert rewards[0] == pytest.approx(0.72) + + +def test_disabled_issue_side_does_not_claim_repo_slice(): + repositories = {'foo/repo': RepositoryConfig(emission_share=0.2, issue_discovery_share=0.0)} + miner_evaluations = {1: _evaluation(1, issues=[_issue('foo/repo', 5.0)])} + + rewards = _rewards(miner_evaluations, repositories) + + assert rewards[1] == pytest.approx(0.0) + assert rewards[0] == pytest.approx(0.9) + assert rewards[111] == pytest.approx(0.1) + + +def test_no_activity_recycles_entire_scoring_pool(): + repositories = {'foo/repo': RepositoryConfig(emission_share=0.1, issue_discovery_share=0.5)} + miner_evaluations = {1: _evaluation(1)} + + rewards = _rewards(miner_evaluations, repositories) + + assert rewards[1] == pytest.approx(0.0) + assert rewards[0] == pytest.approx(0.9) + assert rewards[111] == pytest.approx(0.1) + assert sum(rewards.values()) == pytest.approx(1.0) + + +def test_registry_sum_point_eight_routes_shortfall_to_recycle(): + repositories = { + 'foo/a': RepositoryConfig(emission_share=0.4, issue_discovery_share=0.0), + 'foo/b': RepositoryConfig(emission_share=0.4, issue_discovery_share=0.0), + } + miner_evaluations = { + 1: _evaluation(1, prs=[_pr('foo/a', 1.0)]), + 2: _evaluation(2, prs=[_pr('foo/b', 1.0)]), + } + + rewards = _rewards(miner_evaluations, repositories) + + assert rewards[1] == pytest.approx(0.36) + assert rewards[2] == pytest.approx(0.36) + assert rewards[0] == pytest.approx(0.18) + assert rewards[111] == pytest.approx(0.1) + + +def test_full_registry_sum_with_all_repos_active_has_no_recycle(): + repositories = { + 'foo/a': RepositoryConfig(emission_share=0.5, issue_discovery_share=0.0), + 'foo/b': RepositoryConfig(emission_share=0.5, issue_discovery_share=0.0), + } + miner_evaluations = { + 1: _evaluation(1, prs=[_pr('foo/a', 1.0)]), + 2: _evaluation(2, prs=[_pr('foo/b', 1.0)]), + } + + rewards = _rewards(miner_evaluations, repositories) + + assert rewards[1] == pytest.approx(0.45) + assert rewards[2] == pytest.approx(0.45) + assert rewards[0] == pytest.approx(0.0) + assert rewards[111] == pytest.approx(0.1) + assert sum(rewards.values()) == pytest.approx(1.0) diff --git a/tests/validator/test_load_weights.py b/tests/validator/test_load_weights.py index bb179784..5cbaee51 100644 --- a/tests/validator/test_load_weights.py +++ b/tests/validator/test_load_weights.py @@ -19,7 +19,9 @@ load_master_repo_weights, load_programming_language_weights, load_token_config, + resolve_repo_emission_share, resolve_repo_weight, + validate_repository_emission_shares, ) @@ -117,6 +119,11 @@ def test_repo_configs_are_repository_config_objects(self): for repo_name, config in repos.items(): assert isinstance(config, RepositoryConfig), f'{repo_name} should be RepositoryConfig' + def test_live_repositories_use_emission_share_not_weight(self): + for repo_name, metadata in _live_master_repo_metadata(): + assert 'emission_share' in metadata, f'{repo_name} must define emission_share' + assert 'weight' not in metadata, f'{repo_name} must not use legacy weight' + def test_repo_names_are_lowercase(self): """Repository names should be normalized to lowercase.""" repos = load_master_repo_weights() @@ -181,6 +188,95 @@ def test_loader_parses_trusted_label_pipeline_true(self, tmp_path, monkeypatch): assert repos['foo/explicit-off'].trusted_label_pipeline is False +class TestRepositoryEmissionShare: + def test_defaults(self): + config = RepositoryConfig(emission_share=0.5) + + assert config.emission_share == pytest.approx(0.5) + assert config.issue_discovery_share == pytest.approx(0.5) + + def test_weight_constructor_alias_maps_to_emission_share(self): + config = RepositoryConfig(weight=0.25) + + assert config.emission_share == pytest.approx(0.25) + assert config.weight == pytest.approx(0.25) + + def test_loader_parses_emission_and_issue_discovery_shares(self, tmp_path, monkeypatch): + from gittensor.validator.utils import load_weights as lw + + fake_weights_dir = tmp_path + (fake_weights_dir / 'master_repositories.json').write_text( + json.dumps( + { + 'foo/pr-heavy': {'emission_share': 0.2, 'issue_discovery_share': 0.0}, + 'foo/issues': {'emission_share': 0.3, 'issue_discovery_share': 1.0}, + 'foo/default': {'emission_share': 0.1}, + } + ) + ) + monkeypatch.setattr(lw, '_get_weights_dir', lambda: fake_weights_dir) + + repos = lw.load_master_repo_weights() + + assert repos['foo/pr-heavy'].emission_share == pytest.approx(0.2) + assert repos['foo/pr-heavy'].issue_discovery_share == pytest.approx(0.0) + assert repos['foo/issues'].issue_discovery_share == pytest.approx(1.0) + assert repos['foo/default'].issue_discovery_share == pytest.approx(0.5) + + def test_validator_rejects_individual_share_outside_range(self): + repos = {'foo/bad': RepositoryConfig(emission_share=1.01)} + + with pytest.raises(ValueError, match='emission_share'): + validate_repository_emission_shares(repos) + + def test_validator_rejects_issue_discovery_share_outside_range(self): + repos = {'foo/bad': RepositoryConfig(emission_share=0.1, issue_discovery_share=-0.1)} + + with pytest.raises(ValueError, match='issue_discovery_share'): + validate_repository_emission_shares(repos) + + def test_validator_rejects_registry_sum_above_one(self): + repos = { + 'foo/a': RepositoryConfig(emission_share=0.6), + 'foo/b': RepositoryConfig(emission_share=0.5), + } + + with pytest.raises(ValueError, match='<= 1.0'): + validate_repository_emission_shares(repos) + + def test_loader_rejects_registry_sum_above_one(self, tmp_path, monkeypatch): + from gittensor.validator.utils import load_weights as lw + + fake_weights_dir = tmp_path + (fake_weights_dir / 'master_repositories.json').write_text( + json.dumps( + { + 'foo/a': {'emission_share': 0.6}, + 'foo/b': {'emission_share': 0.5}, + } + ) + ) + monkeypatch.setattr(lw, '_get_weights_dir', lambda: fake_weights_dir) + + assert lw.load_master_repo_weights() == {} + + def test_validator_accepts_registry_sum_below_one(self): + repos = { + 'foo/a': RepositoryConfig(emission_share=0.2), + 'foo/b': RepositoryConfig(emission_share=0.3), + } + + validate_repository_emission_shares(repos) + + def test_live_registry_sum_is_within_bounds(self): + repos = load_master_repo_weights() + + assert 0.0 <= sum(config.emission_share for config in repos.values()) <= 1.0 + for repo_name, config in repos.items(): + assert 0.0 <= config.emission_share <= 1.0, f'{repo_name} emission_share out of bounds' + assert 0.0 <= config.issue_discovery_share <= 1.0, f'{repo_name} issue_discovery_share out of bounds' + + class TestRepositoryConfigLabelMultipliers: """Dataclass + JSON-parsing tests for per-repo label multiplier config.""" @@ -344,6 +440,7 @@ class TestResolveRepoWeight: """Tests for resolve_repo_weight — full-precision repo weight lookup.""" def test_none_returns_default(self): + assert resolve_repo_emission_share(None) == 0.01 assert resolve_repo_weight(None) == 0.01 @pytest.mark.parametrize( @@ -352,6 +449,7 @@ def test_none_returns_default(self): ) def test_preserves_full_precision(self, weight): config = RepositoryConfig(weight=weight) + assert resolve_repo_emission_share(config) == weight assert resolve_repo_weight(config) == weight def test_live_master_repo_precision(self):