diff --git a/gittensor/classes.py b/gittensor/classes.py index 326e2a18..4e1b53e9 100644 --- a/gittensor/classes.py +++ b/gittensor/classes.py @@ -4,7 +4,7 @@ from datetime import datetime, timezone from enum import Enum from math import prod -from typing import TYPE_CHECKING, DefaultDict, Dict, List, Optional, Set, Tuple +from typing import TYPE_CHECKING, Any, DefaultDict, Dict, List, Optional, Set, Tuple import bittensor as bt @@ -234,7 +234,6 @@ def is_pioneer_eligible(self) -> bool: def calculate_final_earned_score(self) -> float: """Combine base score with all multipliers. Pioneer dividend is added separately after.""" multipliers = { - 'repo': self.repo_weight_multiplier, 'issue': self.issue_multiplier, 'label': self.label_multiplier, 'spam': self.open_pr_spam_multiplier, @@ -289,6 +288,7 @@ class MinerEvaluation: total_valid_solved_issues: int = 0 # solved issues where solving PR has token_score >= 5 total_closed_issues: int = 0 total_open_issues: int = 0 # current mirror-tracked open issues (set by issue_discovery.scan) + discovered_issues: List[Issue] = field(default_factory=list) @property def total_prs(self) -> int: @@ -505,6 +505,7 @@ class CachedEvaluation: 'total_valid_solved_issues', 'total_closed_issues', 'total_open_issues', + 'discovered_issues', ) @@ -547,7 +548,7 @@ def store(self, evaluation: 'MinerEvaluation') -> None: existing = self._cache.get(evaluation.uid) if existing is not None and existing.hotkey == evaluation.hotkey and existing.github_id == evaluation.github_id: for name in _ISSUE_DISCOVERY_FIELDS: - setattr(cached_eval, name, getattr(existing.evaluation, name)) + setattr(cached_eval, name, _copy_issue_discovery_field(name, getattr(existing.evaluation, name))) self._cache[evaluation.uid] = CachedEvaluation( hotkey=evaluation.hotkey, @@ -581,7 +582,7 @@ def update_issue_discovery(self, evaluation: 'MinerEvaluation') -> None: return for name in _ISSUE_DISCOVERY_FIELDS: - setattr(existing.evaluation, name, getattr(evaluation, name)) + setattr(existing.evaluation, name, _copy_issue_discovery_field(name, getattr(evaluation, name))) bt.logging.debug(f'Refreshed cached issue discovery for UID {evaluation.uid}') @@ -627,6 +628,7 @@ def _build_cache_entry(evaluation: 'MinerEvaluation') -> 'MinerEvaluation': cached.merged_prs = [_scored_mirror_pr_for_cache(pr) for pr in evaluation.merged_prs] cached.open_prs = [_scored_mirror_pr_for_cache(pr) for pr in evaluation.open_prs] cached.closed_prs = [_scored_mirror_pr_for_cache(pr) for pr in evaluation.closed_prs] + cached.discovered_issues = [copy.copy(issue) for issue in evaluation.discovered_issues] return cached @staticmethod @@ -636,9 +638,16 @@ def _isolate_for_downstream(cached_eval: 'MinerEvaluation') -> 'MinerEvaluation' # adapters produce fresh Issue objects per call via get_all_issues(). copy_eval = copy.copy(cached_eval) copy_eval.unique_repos_contributed_to = set(cached_eval.unique_repos_contributed_to) + copy_eval.discovered_issues = [copy.copy(issue) for issue in cached_eval.discovered_issues] return copy_eval +def _copy_issue_discovery_field(name: str, value: Any) -> Any: + if name == 'discovered_issues': + return [copy.copy(issue) for issue in value] + return value + + def _scored_mirror_pr_for_cache(scored: 'ScoredPR') -> 'ScoredPR': scored_copy = copy.copy(scored) scored_copy.files = None diff --git a/gittensor/cli/miner_commands/score.py b/gittensor/cli/miner_commands/score.py index 0bcd085c..478eee40 100644 --- a/gittensor/cli/miner_commands/score.py +++ b/gittensor/cli/miner_commands/score.py @@ -283,7 +283,7 @@ async def _run() -> Dict[str, Any]: issue_rewards = await issue_discovery( miner_evaluations, master_repositories, programming_languages, token_config, miner_uids ) - rewards = blend_emission_pools(oss_rewards, issue_rewards, miner_uids) + rewards = blend_emission_pools(miner_uids, miner_evaluations, master_repositories) return { 'success': True, diff --git a/gittensor/constants.py b/gittensor/constants.py index 6daa940f..65305ac0 100644 --- a/gittensor/constants.py +++ b/gittensor/constants.py @@ -74,7 +74,7 @@ # ============================================================================= # Repository & PR Scoring # ============================================================================= -DEFAULT_REPO_WEIGHT = 0.01 # fallback weight for repos not in master_repositories.json +DEFAULT_REPO_EMISSION_SHARE = 0.01 # fallback share for repos not in master_repositories.json PR_LOOKBACK_DAYS = 35 # rolling window for scoring MERGED_PR_BASE_SCORE = 25 MIN_TOKEN_SCORE_FOR_BASE_SCORE = 5 # PRs below this get 0 base score @@ -154,11 +154,8 @@ # ============================================================================= RECYCLE_UID = 0 -# Hardcoded emission splits per competition (replaces dynamic emissions) -OSS_EMISSION_SHARE = 0.30 # 30% to OSS contributions (PR scoring) -ISSUE_DISCOVERY_EMISSION_SHARE = 0.10 # 10% to issue discovery -RECYCLE_EMISSION_SHARE = 0.45 # 45% to recycle UID 0 -# ISSUES_TREASURY_EMISSION_SHARE = 0.15 defined below (15% to smart contract treasury) +# Hardcoded emission splits. +OSS_EMISSION_SHARE = 0.90 # 90% combined scoring pool allocated by repo emission_share # ============================================================================= # Spam & Gaming Mitigation @@ -187,5 +184,5 @@ # ============================================================================= CONTRACT_ADDRESS = '5FWNdk8YNtNcHKrAx2krqenFrFAZG7vmsd2XN2isJSew3MrD' ISSUES_TREASURY_UID = 111 # UID of the smart contract neuron, if set to RECYCLE_UID then it's disabled -ISSUES_TREASURY_EMISSION_SHARE = 0.15 # % of emissions allocated to funding issues treasury +ISSUES_TREASURY_EMISSION_SHARE = 0.10 # % of emissions allocated to funding issues treasury MAX_ISSUE_ID = 1_000_000 # sanity-check upper bound for any real deployment diff --git a/gittensor/validator/forward.py b/gittensor/validator/forward.py index 33a01de7..64cd692f 100644 --- a/gittensor/validator/forward.py +++ b/gittensor/validator/forward.py @@ -8,14 +8,7 @@ import numpy as np from gittensor.classes import MinerEvaluation, MinerEvaluationCache -from gittensor.constants import ( - ISSUE_DISCOVERY_EMISSION_SHARE, - ISSUES_TREASURY_EMISSION_SHARE, - ISSUES_TREASURY_UID, - OSS_EMISSION_SHARE, - RECYCLE_EMISSION_SHARE, - RECYCLE_UID, -) +from gittensor.constants import ISSUES_TREASURY_EMISSION_SHARE, ISSUES_TREASURY_UID, OSS_EMISSION_SHARE, RECYCLE_UID from gittensor.utils.uids import get_all_uids from gittensor.validator.issue_competitions.forward import issue_competitions from gittensor.validator.issue_discovery.normalize import ( @@ -46,13 +39,12 @@ async def forward(self: 'Validator') -> None: 2. Score issue discovery 3. Run issue bounties verification 4. Store all evaluations to DB - 5. Blend emission pools and update scores + 5. Allocate repo emission slices and update scores - Emission blending (hardcoded per-competition): - - OSS contributions: 30% - - Issue discovery: 30% - - Issue treasury: 15% (flat to UID 111) - - Recycle: 25% (flat to UID 0) + Emission allocation: + - Combined scoring pool: 90%, allocated by repo emission_share + - Issue treasury: 10% (flat to UID 111) + - Recycle: unclaimed repo slices and registry slack to UID 0 """ if self.step % VALIDATOR_STEPS_INTERVAL == 0: @@ -62,12 +54,12 @@ async def forward(self: 'Validator') -> None: token_config = load_token_config() # 1. Score OSS contributions - oss_rewards, miner_evaluations, cached_uids, penalized_uids = await oss_contributions( + _, miner_evaluations, cached_uids, penalized_uids = await oss_contributions( self, miner_uids, master_repositories, programming_languages, token_config ) # 2. Score issue discovery - issue_rewards = await issue_discovery( + await issue_discovery( miner_evaluations, master_repositories, programming_languages, @@ -85,8 +77,8 @@ async def forward(self: 'Validator') -> None: # 4. Store all evaluations to DB (includes issue discovery fields) await self.bulk_store_evaluation(miner_evaluations, skip_uids=cached_uids) - # 5. Blend 4 emission pools into final rewards - rewards = blend_emission_pools(oss_rewards, issue_rewards, miner_uids) + # 5. Allocate repo emission slices into final rewards + rewards = blend_emission_pools(miner_uids, miner_evaluations, master_repositories) self.update_scores(rewards, miner_uids, blacklisted_uids=sorted(penalized_uids)) @@ -150,36 +142,20 @@ async def issue_discovery( def blend_emission_pools( - oss_rewards: np.ndarray, - issue_rewards: np.ndarray, miner_uids: set[int], + miner_evaluations: Dict[int, MinerEvaluation], + master_repositories: Dict[str, RepositoryConfig], ) -> np.ndarray: - """Blend 4 emission pools into a single rewards array. + """Allocate the combined scoring pool by repo emission_share. - - OSS contributions: 30% - - Issue discovery: 30% - - Issue treasury: 15% (flat to UID 111) - - Recycle: 25% (flat to UID 0) + Allocation is repo-first: each active repo receives exactly its configured + slice of ``OSS_EMISSION_SHARE`` and distributes it within the repo by raw score. """ sorted_uids = sorted(miner_uids) rewards = np.zeros(len(sorted_uids)) - recycle_extra = 0.0 - - # Pool 1: OSS contributions (30%) - oss_total = float(oss_rewards.sum()) - if oss_total > 0: - rewards += oss_rewards * OSS_EMISSION_SHARE - else: - recycle_extra += OSS_EMISSION_SHARE - - # Pool 2: Issue discovery (30%) - issue_total = float(issue_rewards.sum()) - if issue_total > 0: - rewards += issue_rewards * ISSUE_DISCOVERY_EMISSION_SHARE - else: - recycle_extra += ISSUE_DISCOVERY_EMISSION_SHARE - - # Pool 3: Issue treasury (15% flat to UID 111) + + rewards += _allocate_repo_scoring_pool(sorted_uids, miner_evaluations, master_repositories) + if ISSUES_TREASURY_UID > 0 and ISSUES_TREASURY_UID in miner_uids: treasury_idx = sorted_uids.index(ISSUES_TREASURY_UID) rewards[treasury_idx] += ISSUES_TREASURY_EMISSION_SHARE @@ -188,11 +164,87 @@ def blend_emission_pools( f'{ISSUES_TREASURY_EMISSION_SHARE * 100:.0f}% of emissions' ) - # Pool 4: Recycle (25% + unclaimed from empty pools) - if RECYCLE_UID in miner_uids: - recycle_idx = sorted_uids.index(RECYCLE_UID) - rewards[recycle_idx] += RECYCLE_EMISSION_SHARE + recycle_extra - if recycle_extra > 0: - bt.logging.info(f'Recycling {recycle_extra * 100:.0f}% unclaimed emissions from empty pools') + return rewards + + +def _allocate_repo_scoring_pool( + sorted_uids: list[int], + miner_evaluations: Dict[int, MinerEvaluation], + master_repositories: Dict[str, RepositoryConfig], +) -> np.ndarray: + rewards = np.zeros(len(sorted_uids)) + uid_index = {uid: idx for idx, uid in enumerate(sorted_uids)} + recycle_idx = uid_index.get(RECYCLE_UID) + allocated_share = 0.0 + + for repo_name, repo_config in master_repositories.items(): + repo_key = repo_name.lower() + repo_share = repo_config.emission_share + allocated_share += repo_share + repo_slice = OSS_EMISSION_SHARE * repo_share + if repo_slice <= 0: + continue + + pr_scores: dict[int, float] = {} + issue_scores: dict[int, float] = {} + issue_share = repo_config.issue_discovery_share + pr_share = 1.0 - issue_share + for uid, evaluation in miner_evaluations.items(): + if uid not in uid_index: + continue + if pr_share > 0: + pr_score = _repo_pr_score(evaluation, repo_key) + if pr_score > 0: + pr_scores[uid] = pr_score + if issue_share > 0: + for issue in evaluation.discovered_issues: + if issue.repository_full_name.lower() == repo_key and issue.discovery_earned_score > 0: + issue_scores[uid] = issue_scores.get(uid, 0.0) + float(issue.discovery_earned_score) + + pr_total = sum(pr_scores.values()) + issue_total = sum(issue_scores.values()) + if pr_total <= 0 and issue_total <= 0: + if recycle_idx is not None: + rewards[recycle_idx] += repo_slice + continue + + pr_slice = repo_slice * pr_share + issue_slice = repo_slice * issue_share + if pr_total <= 0: + issue_slice += pr_slice + pr_slice = 0.0 + elif issue_total <= 0: + pr_slice += issue_slice + issue_slice = 0.0 + + _add_proportional_rewards(rewards, uid_index, pr_scores, pr_total, pr_slice) + _add_proportional_rewards(rewards, uid_index, issue_scores, issue_total, issue_slice) + + slack_share = max(0.0, 1.0 - allocated_share) + if slack_share > 0 and recycle_idx is not None: + rewards[recycle_idx] += OSS_EMISSION_SHARE * slack_share return rewards + + +def _repo_pr_score(evaluation: MinerEvaluation, repo_name: str) -> float: + return sum( + float(pr.earned_score) + for pr in evaluation.merged_prs + if pr.repository_full_name.lower() == repo_name and pr.earned_score > 0 + ) + + +def _add_proportional_rewards( + rewards: np.ndarray, + uid_index: dict[int, int], + scores: dict[int, float], + total: float, + amount: float, +) -> None: + if total <= 0 or amount <= 0: + return + for uid, score in scores.items(): + idx = uid_index.get(uid) + if idx is not None: + rewards[idx] += amount * score / total diff --git a/gittensor/validator/issue_discovery/scan.py b/gittensor/validator/issue_discovery/scan.py index e9241fec..2a68099c 100644 --- a/gittensor/validator/issue_discovery/scan.py +++ b/gittensor/validator/issue_discovery/scan.py @@ -60,7 +60,6 @@ LanguageConfig, RepositoryConfig, TokenConfig, - resolve_repo_weight, ) @@ -450,6 +449,8 @@ async def _score_miner_issues( evaluation.issue_credibility = credibility if not is_eligible: + evaluation.discovered_issues = [] + evaluation.issue_discovery_score = 0.0 bt.logging.info( f'├─ UID {evaluation.uid}: ineligible ({reason}) | ' f'{solved_count} solved ({valid_solved_count} valid) | {closed_count} closed | ' @@ -465,7 +466,6 @@ async def _score_miner_issues( issue.discovery_open_issue_spam_multiplier = spam_mult issue.discovery_earned_score = round( issue.discovery_base_score - * issue.discovery_repo_weight_multiplier * issue.discovery_time_decay_multiplier * issue.discovery_review_quality_multiplier * issue.discovery_credibility_multiplier @@ -474,6 +474,7 @@ async def _score_miner_issues( ) total_discovery_score += issue.discovery_earned_score + evaluation.discovered_issues = scored_issues evaluation.issue_discovery_score = round(total_discovery_score, 2) bt.logging.info( @@ -629,7 +630,7 @@ def _mirror_issue_for_scoring( ) adapted.discovery_base_score = base_score - adapted.discovery_repo_weight_multiplier = resolve_repo_weight(repo_config) + adapted.discovery_repo_weight_multiplier = 1.0 adapted.discovery_time_decay_multiplier = round(calculate_time_decay(solving_pr.merged_at), 2) adapted.discovery_review_quality_multiplier = round( calculate_issue_review_quality_multiplier(solving_pr.review_summary.maintainer_changes_requested_count), diff --git a/gittensor/validator/oss_contributions/mirror/scored_pr.py b/gittensor/validator/oss_contributions/mirror/scored_pr.py index 882377be..0f2a7f52 100644 --- a/gittensor/validator/oss_contributions/mirror/scored_pr.py +++ b/gittensor/validator/oss_contributions/mirror/scored_pr.py @@ -88,7 +88,6 @@ def is_pioneer_eligible(self) -> bool: def calculate_final_earned_score(self) -> float: """Combine base score with all multipliers. Pioneer dividend is added separately after.""" multipliers = { - 'repo': self.repo_weight_multiplier, 'issue': self.issue_multiplier, 'label': self.label_multiplier, 'spam': self.open_pr_spam_multiplier, diff --git a/gittensor/validator/oss_contributions/mirror/scoring.py b/gittensor/validator/oss_contributions/mirror/scoring.py index bda651e0..11b78ab5 100644 --- a/gittensor/validator/oss_contributions/mirror/scoring.py +++ b/gittensor/validator/oss_contributions/mirror/scoring.py @@ -2,7 +2,7 @@ Scope: - Compute base_score for each PR via the existing token-scoring infra. -- Compute per-PR multipliers: repo_weight, time_decay, review_quality, label, issue. +- Compute per-PR multipliers: time_decay, review_quality, label, issue. - The merge-eligibility gate (``_should_skip_merged_mirror_pr``) is exported and applied at LOAD time by ``mirror.load._maybe_add_pr`` — rejected PRs never enter ``merged_prs``, so the merged_count used by ``check_eligibility`` @@ -54,7 +54,6 @@ LanguageConfig, RepositoryConfig, TokenConfig, - resolve_repo_weight, ) from gittensor.validator.utils.tree_sitter_scoring import calculate_token_score_from_file_changes @@ -338,7 +337,7 @@ def calculate_base_score_for_pr_files( def _calculate_pr_multipliers(scored: ScoredPR, repo_config: RepositoryConfig) -> None: - """Compute repo_weight, time_decay, review_quality, label, issue multipliers. + """Compute time_decay, review_quality, label, issue multipliers. Spam and credibility multipliers are deferred to ``finalize_miner_scores`` — they depend on per-miner aggregate counts. @@ -346,7 +345,7 @@ def _calculate_pr_multipliers(scored: ScoredPR, repo_config: RepositoryConfig) - pr = scored.pr is_merged = pr.state == 'MERGED' - scored.repo_weight_multiplier = resolve_repo_weight(repo_config) + scored.repo_weight_multiplier = 1.0 chosen_label, label_multiplier = _resolve_trusted_scoring_label(pr, repo_config) scored.label = chosen_label diff --git a/gittensor/validator/oss_contributions/scoring.py b/gittensor/validator/oss_contributions/scoring.py index 4336d248..7398e274 100644 --- a/gittensor/validator/oss_contributions/scoring.py +++ b/gittensor/validator/oss_contributions/scoring.py @@ -283,14 +283,13 @@ def calculate_open_pr_collateral_score(pr: 'ScoredPR') -> float: Collateral = base_score * applicable_multipliers * OPEN_PR_COLLATERAL_PERCENT - Applicable multipliers: repo_weight, issue, label, review_collateral + Applicable multipliers: issue, label, review_collateral NOT applicable: time_decay (merge-based), credibility_multiplier (merge-based), open_pr_spam (not for collateral) """ from math import prod multipliers = { - 'repo_weight': pr.repo_weight_multiplier, 'issue': pr.issue_multiplier, 'label': pr.label_multiplier, 'review_collateral': calculate_review_collateral_multiplier(pr.changes_requested_count, pr.number), diff --git a/gittensor/validator/utils/load_weights.py b/gittensor/validator/utils/load_weights.py index b838529a..230c9e12 100644 --- a/gittensor/validator/utils/load_weights.py +++ b/gittensor/validator/utils/load_weights.py @@ -2,12 +2,20 @@ # Copyright © 2025 Entrius import json from dataclasses import dataclass, field +from math import isfinite from pathlib import Path -from typing import Dict, List, Optional +from typing import Any, Dict, List, Optional import bittensor as bt -from gittensor.constants import DEFAULT_REPO_WEIGHT, NON_CODE_EXTENSIONS +from gittensor.constants import DEFAULT_REPO_EMISSION_SHARE, NON_CODE_EXTENSIONS + +_SHARE_TOLERANCE = 1e-9 +_DEFAULT_ISSUE_DISCOVERY_SHARE = 0.5 + + +class RepositoryRegistryError(ValueError): + """Raised when repository emission-share config violates the registry contract.""" @dataclass @@ -23,12 +31,13 @@ class LanguageConfig: language: Optional[str] = None -@dataclass +@dataclass(init=False) class RepositoryConfig: """Configuration for a repository in the master_repositories list. Attributes: - weight: Repository weight for scoring + emission_share: Repository share of the combined scoring emission pool + issue_discovery_share: Fraction of the repo slice reserved for issue discovery inactive_at: ISO timestamp when repository became inactive (None if active) additional_acceptable_branches: List of additional branch patterns to accept (None if only default branch) trusted_label_pipeline: When True, scoring labels count regardless of @@ -46,7 +55,8 @@ class RepositoryConfig: """ - weight: float + emission_share: float + issue_discovery_share: float inactive_at: Optional[str] = None additional_acceptable_branches: Optional[List[str]] = None trusted_label_pipeline: bool = False @@ -55,12 +65,49 @@ class RepositoryConfig: fixed_base_score: Optional[float] = None eligibility_mode: bool = True + def __init__( + self, + emission_share: Optional[float] = None, + *, + weight: Optional[float] = None, + issue_discovery_share: float = _DEFAULT_ISSUE_DISCOVERY_SHARE, + inactive_at: Optional[str] = None, + additional_acceptable_branches: Optional[List[str]] = None, + trusted_label_pipeline: bool = False, + label_multipliers: Optional[Dict[str, float]] = None, + default_label_multiplier: float = 1.0, + fixed_base_score: Optional[float] = None, + eligibility_mode: bool = True, + ) -> None: + if emission_share is None: + emission_share = DEFAULT_REPO_EMISSION_SHARE if weight is None else weight + + self.emission_share = float(emission_share) + self.issue_discovery_share = float(issue_discovery_share) + self.inactive_at = inactive_at + self.additional_acceptable_branches = additional_acceptable_branches + self.trusted_label_pipeline = trusted_label_pipeline + self.label_multipliers = label_multipliers + self.default_label_multiplier = default_label_multiplier + self.fixed_base_score = fixed_base_score + self.eligibility_mode = eligibility_mode + + @property + def weight(self) -> float: + """Backward-compatible alias for callers that still display old config names.""" + return self.emission_share + def resolve_repo_weight(repo_config: Optional[RepositoryConfig]) -> float: - """Return the repo weight preserving full JSON precision, or the default for unknown repos.""" + """Backward-compatible alias for the configured repo emission share.""" + return resolve_repo_emission_share(repo_config) + + +def resolve_repo_emission_share(repo_config: Optional[RepositoryConfig]) -> float: + """Return the configured repo emission share, or the default for unknown repos.""" if repo_config is None: - return DEFAULT_REPO_WEIGHT - return repo_config.weight + return DEFAULT_REPO_EMISSION_SHARE + return repo_config.emission_share @dataclass @@ -107,6 +154,15 @@ def _get_weights_dir() -> Path: return Path(__file__).parent.parent / 'weights' +def _coerce_share(repo_name: str, field_name: str, raw_value: Any) -> float: + if isinstance(raw_value, bool): + raise RepositoryRegistryError(f'{repo_name} {field_name} must be a finite float, got bool') + value = float(raw_value) + if not isfinite(value): + raise RepositoryRegistryError(f'{repo_name} {field_name} must be a finite float, got {raw_value!r}') + return value + + def load_master_repo_weights() -> Dict[str, RepositoryConfig]: """ Load repository weights from the local JSON file. @@ -114,7 +170,8 @@ def load_master_repo_weights() -> Dict[str, RepositoryConfig]: Returns: Dictionary mapping normalized (lowercase) fullName (str) to RepositoryConfig object. - Returns empty dict on error. + Returns empty dict when the file is missing or invalid JSON. Raises + ValueError when repository entries violate the emission-share contract. """ weights_file = _get_weights_dir() / 'master_repositories.json' @@ -123,15 +180,21 @@ def load_master_repo_weights() -> Dict[str, RepositoryConfig]: data = json.load(f) if not isinstance(data, dict): - bt.logging.error(f'Expected dict from {weights_file}, got {type(data)}') - return {} + raise RepositoryRegistryError(f'Expected dict from {weights_file}, got {type(data)}') # Parse JSON data into RepositoryConfig objects normalized_data: Dict[str, RepositoryConfig] = {} for repo_name, metadata in data.items(): try: + if not isinstance(metadata, dict): + raise TypeError(f'expected object metadata, got {type(metadata)}') config = RepositoryConfig( - weight=float(metadata.get('weight', 0.01)), + emission_share=_coerce_share(repo_name, 'emission_share', metadata['emission_share']), + issue_discovery_share=_coerce_share( + repo_name, + 'issue_discovery_share', + metadata.get('issue_discovery_share', _DEFAULT_ISSUE_DISCOVERY_SHARE), + ), inactive_at=metadata.get('inactive_at'), additional_acceptable_branches=metadata.get('additional_acceptable_branches'), trusted_label_pipeline=bool(metadata.get('trusted_label_pipeline', False)), @@ -145,10 +208,12 @@ def load_master_repo_weights() -> Dict[str, RepositoryConfig]: eligibility_mode=metadata.get('eligibility_mode', True), ) normalized_data[repo_name.lower()] = config - except (ValueError, TypeError) as e: - bt.logging.warning(f'Could not parse config for {repo_name}: {e}, using defaults') - # Create config with defaults if parsing fails - normalized_data[repo_name.lower()] = RepositoryConfig(weight=float(metadata.get('weight', 0.01))) + except RepositoryRegistryError: + raise + except (KeyError, ValueError, TypeError) as e: + raise ValueError(f'Could not parse config for {repo_name}: {e}') from e + + _validate_repository_emission_shares(normalized_data) bt.logging.debug(f'Successfully loaded {len(normalized_data)} repository entries from {weights_file}') return normalized_data @@ -159,11 +224,28 @@ def load_master_repo_weights() -> Dict[str, RepositoryConfig]: except json.JSONDecodeError as e: bt.logging.error(f'Failed to parse JSON from {weights_file}: {e}') return {} + except ValueError: + raise except Exception as e: bt.logging.error(f'Unexpected error loading repository weights: {e}') return {} +def _validate_repository_emission_shares(repos: Dict[str, RepositoryConfig]) -> None: + total_share = 0.0 + for repo_name, config in repos.items(): + if not 0.0 <= config.emission_share <= 1.0: + raise ValueError(f'{repo_name} emission_share must be within [0, 1], got {config.emission_share}') + if not 0.0 <= config.issue_discovery_share <= 1.0: + raise ValueError( + f'{repo_name} issue_discovery_share must be within [0, 1], got {config.issue_discovery_share}' + ) + total_share += config.emission_share + + if total_share > 1.0 + _SHARE_TOLERANCE: + raise ValueError(f'total repository emission_share must be <= 1.0, got {total_share}') + + def load_programming_language_weights() -> Dict[str, LanguageConfig]: """ Load programming language weights from the local JSON file. diff --git a/gittensor/validator/weights/master_repositories.json b/gittensor/validator/weights/master_repositories.json index 3d0254d7..380038ce 100644 --- a/gittensor/validator/weights/master_repositories.json +++ b/gittensor/validator/weights/master_repositories.json @@ -1,6 +1,6 @@ { "entrius/allways": { - "weight": 0.05, + "emission_share": 0.05, "trusted_label_pipeline": true, "label_multipliers": { "bug": 1.25, @@ -9,7 +9,7 @@ } }, "entrius/allways-ui": { - "weight": 0.01, + "emission_share": 0.01, "trusted_label_pipeline": true, "label_multipliers": { "feature": 1.25, @@ -19,7 +19,7 @@ } }, "entrius/das-github-mirror": { - "weight": 0.02, + "emission_share": 0.02, "trusted_label_pipeline": true, "label_multipliers": { "bug": 1.25, @@ -28,7 +28,7 @@ } }, "entrius/gittensor": { - "weight": 0.1, + "emission_share": 0.1, "trusted_label_pipeline": true, "label_multipliers": { "feature": 1.5, @@ -38,7 +38,7 @@ } }, "entrius/gittensor-ui": { - "weight": 0.03, + "emission_share": 0.03, "trusted_label_pipeline": true, "label_multipliers": { "feature": 1.25, @@ -48,7 +48,8 @@ } }, "entrius/oc-1": { - "weight": 0.5, + "emission_share": 0.5, + "issue_discovery_share": 0.0, "trusted_label_pipeline": true, "fixed_base_score": 1.0, "eligibility_mode": false, diff --git a/tests/validator/oss_contributions/mirror/test_scored_pr.py b/tests/validator/oss_contributions/mirror/test_scored_pr.py index b3c138f0..4361e62c 100644 --- a/tests/validator/oss_contributions/mirror/test_scored_pr.py +++ b/tests/validator/oss_contributions/mirror/test_scored_pr.py @@ -113,10 +113,9 @@ def test_neutral_multipliers_returns_base(self): def test_multipliers_compose(self): scored = ScoredPR(pr=_make_pr()) scored.base_score = 100.0 - scored.repo_weight_multiplier = 0.5 scored.review_quality_multiplier = 0.5 - # 100 * 0.5 * 0.5 (others 1.0) = 25 - assert scored.calculate_final_earned_score() == 25.0 + # Repo allocation happens after per-PR scoring, so only review applies here. + assert scored.calculate_final_earned_score() == 50.0 def test_zero_multiplier_zeros_score(self): scored = ScoredPR(pr=_make_pr()) diff --git a/tests/validator/oss_contributions/mirror/test_scoring.py b/tests/validator/oss_contributions/mirror/test_scoring.py index c14b792e..2f33f1fc 100644 --- a/tests/validator/oss_contributions/mirror/test_scoring.py +++ b/tests/validator/oss_contributions/mirror/test_scoring.py @@ -378,7 +378,7 @@ def test_fixed_base_score_scores_without_stored_files(self): client.get_pr_files.assert_not_called() assert scored.base_score == pytest.approx(7.5) - assert scored.repo_weight_multiplier == pytest.approx(0.5) + assert scored.repo_weight_multiplier == pytest.approx(1.0) class TestFixedBaseScore: @@ -439,7 +439,6 @@ def test_fixed_base_replaces_token_base_but_keeps_token_breakdown_and_multiplier assert scored.label_multiplier == pytest.approx(2.0) assert scored.calculate_final_earned_score() == pytest.approx( scored.base_score - * scored.repo_weight_multiplier * scored.issue_multiplier * scored.label_multiplier * scored.open_pr_spam_multiplier @@ -854,13 +853,12 @@ def test_collateral_computed_without_crash(self): scored = ScoredPR(pr=_pr(state='OPEN')) scored.base_score = 25.0 - scored.repo_weight_multiplier = 0.5 scored.issue_multiplier = 1.0 scored.label_multiplier = 1.0 # Must not raise AttributeError on .number result = calculate_open_pr_collateral_score(scored) - assert result >= 0.0 + assert result == pytest.approx(5.0) def test_number_property_proxies_to_pr_pr_number(self): scored = ScoredPR(pr=_pr()) @@ -914,7 +912,7 @@ def test_merged_pr_populates_all_multipliers(self): _config(weight=0.7, additional_branches=['test'], label_multipliers={'feature': 1.5}), ) - assert scored.repo_weight_multiplier == 0.7 + assert scored.repo_weight_multiplier == 1.0 assert scored.label == 'feature' assert scored.label_multiplier == pytest.approx(1.5) assert 0.0 <= scored.time_decay_multiplier <= 1.0 @@ -926,7 +924,7 @@ def test_open_pr_only_neutral_multipliers(self): scored = ScoredPR(pr=_pr(state='OPEN')) _calculate_pr_multipliers(scored, _config(weight=0.5)) - assert scored.repo_weight_multiplier == 0.5 + assert scored.repo_weight_multiplier == 1.0 # Time decay / review quality / credibility are merge-only — kept neutral here. assert scored.time_decay_multiplier == 1.0 assert scored.credibility_multiplier == 1.0 diff --git a/tests/validator/test_emission_share_allocation.py b/tests/validator/test_emission_share_allocation.py new file mode 100644 index 00000000..9ccc6faa --- /dev/null +++ b/tests/validator/test_emission_share_allocation.py @@ -0,0 +1,189 @@ +from types import SimpleNamespace +from typing import Any, cast + +import pytest + +from gittensor.classes import Issue, MinerEvaluation +from gittensor.validator.forward import blend_emission_pools +from gittensor.validator.utils.load_weights import RepositoryConfig + + +def _eval(uid: int, repo_scores=None, issue_scores=None) -> MinerEvaluation: + evaluation = MinerEvaluation(uid=uid, hotkey=f'hotkey-{uid}', github_id=f'github-{uid}') + evaluation.merged_prs = cast( + Any, + [SimpleNamespace(repository_full_name=repo, earned_score=score) for repo, score in (repo_scores or [])], + ) + evaluation.discovered_issues = [ + Issue( + number=idx + 1, pr_number=idx + 10, repository_full_name=repo, title='issue', discovery_earned_score=score + ) + for idx, (repo, score) in enumerate(issue_scores or []) + ] + return evaluation + + +def test_active_repo_receives_fixed_slice_regardless_of_pr_count(): + repos = { + 'repo/a': RepositoryConfig(emission_share=0.05, issue_discovery_share=0.0), + 'repo/b': RepositoryConfig(emission_share=0.05, issue_discovery_share=0.0), + } + many_prs = [('repo/b', 1.0) for _ in range(50)] + evaluations = { + 1: _eval(1, [('repo/a', 10.0)]), + 2: _eval(2, many_prs), + 0: _eval(0), + } + + rewards = blend_emission_pools({0, 1, 2}, evaluations, repos) + + assert rewards[1] == pytest.approx(0.90 * 0.05) + assert rewards[2] == pytest.approx(0.90 * 0.05) + assert rewards[0] == pytest.approx(0.90 * 0.90) + + +def test_issue_slice_spills_to_pr_side_within_same_repo(): + repos = {'repo/a': RepositoryConfig(emission_share=0.2, issue_discovery_share=0.3)} + evaluations = { + 1: _eval(1, [('repo/a', 10.0)]), + 0: _eval(0), + } + + rewards = blend_emission_pools({0, 1}, evaluations, repos) + + assert rewards[1] == pytest.approx(0.90 * 0.2) + assert rewards[0] == pytest.approx(0.90 * 0.8) + + +def test_pr_slice_spills_to_issue_side_within_same_repo(): + repos = {'repo/a': RepositoryConfig(emission_share=0.2, issue_discovery_share=0.3)} + evaluations = { + 1: _eval(1, issue_scores=[('repo/a', 10.0)]), + 0: _eval(0), + } + + rewards = blend_emission_pools({0, 1}, evaluations, repos) + + assert rewards[1] == pytest.approx(0.90 * 0.2) + assert rewards[0] == pytest.approx(0.90 * 0.8) + + +def test_issue_discovery_share_zero_disables_issue_rewards_and_recycles_empty_pr_side(): + repos = {'repo/a': RepositoryConfig(emission_share=0.2, issue_discovery_share=0.0)} + evaluations = { + 1: _eval(1, issue_scores=[('repo/a', 10.0)]), + 0: _eval(0), + } + + rewards = blend_emission_pools({0, 1}, evaluations, repos) + + assert rewards[1] == pytest.approx(0.0) + assert rewards[0] == pytest.approx(0.90) + + +def test_repo_slice_recycles_when_both_sides_are_empty(): + repos = {'repo/a': RepositoryConfig(emission_share=1.0, issue_discovery_share=0.5)} + evaluations = {0: _eval(0), 1: _eval(1)} + + rewards = blend_emission_pools({0, 1}, evaluations, repos) + + assert rewards[0] == pytest.approx(0.90) + assert rewards[1] == pytest.approx(0.0) + + +def test_pr_and_issue_sides_split_by_repo_config(): + repos = {'repo/a': RepositoryConfig(emission_share=1.0, issue_discovery_share=0.25)} + evaluations = { + 1: _eval(1, [('repo/a', 3.0)]), + 2: _eval(2, [('repo/a', 1.0)], [('repo/a', 4.0)]), + 0: _eval(0), + } + + rewards = blend_emission_pools({0, 1, 2}, evaluations, repos) + + assert rewards[1] == pytest.approx(0.90 * 0.75 * 0.75) + assert rewards[2] == pytest.approx((0.90 * 0.75 * 0.25) + (0.90 * 0.25)) + assert rewards[0] == pytest.approx(0.0) + + +def test_fully_active_full_registry_sends_nothing_to_recycle(): + repos = {'repo/a': RepositoryConfig(emission_share=1.0, issue_discovery_share=0.0)} + evaluations = { + 0: _eval(0), + 1: _eval(1, [('repo/a', 5.0)]), + 111: _eval(111), + } + + rewards = blend_emission_pools({0, 1, 111}, evaluations, repos) + + assert rewards[0] == pytest.approx(0.0) + assert rewards[1] == pytest.approx(0.90) + assert rewards[2] == pytest.approx(0.10) + assert rewards.sum() == pytest.approx(1.0) + + +def test_no_repo_activity_recycles_scoring_pool_and_preserves_treasury(): + repos = {'repo/a': RepositoryConfig(emission_share=1.0, issue_discovery_share=0.5)} + evaluations = {0: _eval(0), 1: _eval(1), 111: _eval(111)} + + rewards = blend_emission_pools({0, 1, 111}, evaluations, repos) + + assert rewards[0] == pytest.approx(0.90) + assert rewards[1] == pytest.approx(0.0) + assert rewards[2] == pytest.approx(0.10) + assert rewards.sum() == pytest.approx(1.0) + + +def test_registry_slack_recycles_without_redistributing_to_active_repos(): + repos = {'repo/a': RepositoryConfig(emission_share=0.8, issue_discovery_share=0.0)} + evaluations = { + 0: _eval(0), + 1: _eval(1, [('repo/a', 5.0)]), + 111: _eval(111), + } + + rewards = blend_emission_pools({0, 1, 111}, evaluations, repos) + + assert rewards[0] == pytest.approx(0.90 * 0.2) + assert rewards[1] == pytest.approx(0.90 * 0.8) + assert rewards[2] == pytest.approx(0.10) + assert rewards.sum() == pytest.approx(1.0) + + +def test_repo_matching_is_case_insensitive_for_pr_scores(): + repos = {'repo/a': RepositoryConfig(emission_share=0.2, issue_discovery_share=0.0)} + evaluations = { + 0: _eval(0), + 1: _eval(1, [('Repo/A', 10.0)]), + } + + rewards = blend_emission_pools({0, 1}, evaluations, repos) + + assert rewards[1] == pytest.approx(0.90 * 0.2) + assert rewards[0] == pytest.approx(0.90 * 0.8) + + +def test_repo_matching_is_case_insensitive_for_issue_scores(): + repos = {'repo/a': RepositoryConfig(emission_share=0.2, issue_discovery_share=1.0)} + evaluations = { + 0: _eval(0), + 1: _eval(1, issue_scores=[('Repo/A', 10.0)]), + } + + rewards = blend_emission_pools({0, 1}, evaluations, repos) + + assert rewards[1] == pytest.approx(0.90 * 0.2) + assert rewards[0] == pytest.approx(0.90 * 0.8) + + +def test_issue_discovery_share_one_disables_pr_rewards_and_recycles_empty_issue_side(): + repos = {'repo/a': RepositoryConfig(emission_share=0.2, issue_discovery_share=1.0)} + evaluations = { + 1: _eval(1, [('repo/a', 10.0)]), + 0: _eval(0), + } + + rewards = blend_emission_pools({0, 1}, evaluations, repos) + + assert rewards[1] == pytest.approx(0.0) + assert rewards[0] == pytest.approx(0.90) diff --git a/tests/validator/test_load_weights.py b/tests/validator/test_load_weights.py index bb179784..3adeb2d2 100644 --- a/tests/validator/test_load_weights.py +++ b/tests/validator/test_load_weights.py @@ -131,6 +131,77 @@ def test_trusted_label_pipeline_field_present_on_live_configs(self): f'{repo_name} trusted_label_pipeline should be bool, got {type(config.trusted_label_pipeline)}' ) + def test_live_emission_shares_are_bounded(self): + repos = load_master_repo_weights() + total = sum(config.emission_share for config in repos.values()) + + assert 0.0 <= total <= 1.0 + for repo_name, config in repos.items(): + assert 0.0 <= config.emission_share <= 1.0, f'{repo_name} emission_share must be within [0, 1]' + assert 0.0 <= config.issue_discovery_share <= 1.0, ( + f'{repo_name} issue_discovery_share must be within [0, 1]' + ) + + def test_loader_rejects_emission_share_sum_over_one(self, tmp_path, monkeypatch): + from gittensor.validator.utils import load_weights as lw + + fake_weights_dir = tmp_path + (fake_weights_dir / 'master_repositories.json').write_text( + json.dumps( + { + 'foo/a': {'emission_share': 0.7}, + 'foo/b': {'emission_share': 0.4}, + } + ) + ) + monkeypatch.setattr(lw, '_get_weights_dir', lambda: fake_weights_dir) + + with pytest.raises(ValueError, match='total repository emission_share'): + lw.load_master_repo_weights() + + def test_loader_rejects_emission_share_out_of_range(self, tmp_path, monkeypatch): + from gittensor.validator.utils import load_weights as lw + + fake_weights_dir = tmp_path + (fake_weights_dir / 'master_repositories.json').write_text(json.dumps({'foo/a': {'emission_share': 1.1}})) + monkeypatch.setattr(lw, '_get_weights_dir', lambda: fake_weights_dir) + + with pytest.raises(ValueError, match='emission_share must be within'): + lw.load_master_repo_weights() + + def test_loader_rejects_issue_discovery_share_out_of_range(self, tmp_path, monkeypatch): + from gittensor.validator.utils import load_weights as lw + + fake_weights_dir = tmp_path + (fake_weights_dir / 'master_repositories.json').write_text( + json.dumps({'foo/a': {'emission_share': 0.5, 'issue_discovery_share': -0.1}}) + ) + monkeypatch.setattr(lw, '_get_weights_dir', lambda: fake_weights_dir) + + with pytest.raises(ValueError, match='issue_discovery_share must be within'): + lw.load_master_repo_weights() + + def test_loader_rejects_missing_emission_share(self, tmp_path, monkeypatch): + from gittensor.validator.utils import load_weights as lw + + fake_weights_dir = tmp_path + (fake_weights_dir / 'master_repositories.json').write_text(json.dumps({'foo/a': {'weight': 0.5}})) + monkeypatch.setattr(lw, '_get_weights_dir', lambda: fake_weights_dir) + + with pytest.raises(ValueError, match='Could not parse config'): + lw.load_master_repo_weights() + + @pytest.mark.parametrize('raw_share', [True, float('nan'), float('inf')]) + def test_loader_rejects_non_finite_or_bool_shares(self, tmp_path, monkeypatch, raw_share): + from gittensor.validator.utils import load_weights as lw + + fake_weights_dir = tmp_path + (fake_weights_dir / 'master_repositories.json').write_text(json.dumps({'foo/a': {'emission_share': raw_share}})) + monkeypatch.setattr(lw, '_get_weights_dir', lambda: fake_weights_dir) + + with pytest.raises(ValueError, match='emission_share must be a finite float'): + lw.load_master_repo_weights() + def test_entrius_repos_have_trusted_label_pipeline(self): """All entrius/* entries opt into trusted_label_pipeline (issue #911).""" repos = load_master_repo_weights() @@ -166,9 +237,9 @@ def test_loader_parses_trusted_label_pipeline_true(self, tmp_path, monkeypatch): (fake_weights_dir / 'master_repositories.json').write_text( json.dumps( { - 'foo/trusted': {'weight': 0.5, 'trusted_label_pipeline': True}, - 'foo/untrusted': {'weight': 0.3}, - 'foo/explicit-off': {'weight': 0.2, 'trusted_label_pipeline': False}, + 'foo/trusted': {'emission_share': 0.5, 'trusted_label_pipeline': True}, + 'foo/untrusted': {'emission_share': 0.3}, + 'foo/explicit-off': {'emission_share': 0.2, 'trusted_label_pipeline': False}, } ) ) @@ -198,11 +269,11 @@ def test_loader_parses_label_multiplier_config(self, tmp_path, monkeypatch): json.dumps( { 'foo/labeled': { - 'weight': 0.5, + 'emission_share': 0.5, 'label_multipliers': {'kind/*': 1.5, 'type:bug': 1.25}, 'default_label_multiplier': 0.8, }, - 'foo/defaults': {'weight': 0.3}, + 'foo/defaults': {'emission_share': 0.3}, } ) ) @@ -260,11 +331,11 @@ def test_loader_parses_mirror_scoring_fields(self, tmp_path, monkeypatch): json.dumps( { 'foo/fixed': { - 'weight': 0.5, + 'emission_share': 0.5, 'fixed_base_score': 12.5, 'eligibility_mode': False, }, - 'foo/defaults': {'weight': 0.3}, + 'foo/defaults': {'emission_share': 0.3}, } ) ) @@ -341,7 +412,7 @@ def test_no_active_banned_org_repos(self): class TestResolveRepoWeight: - """Tests for resolve_repo_weight — full-precision repo weight lookup.""" + """Tests for the legacy repo-weight lookup alias.""" def test_none_returns_default(self): assert resolve_repo_weight(None) == 0.01