diff --git a/bugbug/tools/code_review/agent.py b/bugbug/tools/code_review/agent.py
index 794dc6194d..e424e8df68 100644
--- a/bugbug/tools/code_review/agent.py
+++ b/bugbug/tools/code_review/agent.py
@@ -35,6 +35,7 @@
from bugbug.tools.code_review.prompts import (
CODE_REVIEW_TODO_PROMPT,
CODE_REVIEW_TODO_TOOL_DESCRIPTION,
+ EXTERNAL_CONTEXT_TEMPLATE,
FIRST_MESSAGE_TEMPLATE,
STATIC_COMMENT_EXAMPLES,
SYSTEM_PROMPT_TEMPLATE,
@@ -70,6 +71,7 @@ def __init__(
target_software: str = "Mozilla Firefox",
todo_enabled: bool = True,
skills: Optional[list[Skill]] = None,
+ rules_url: Optional[str] = None,
) -> None:
super().__init__()
@@ -94,6 +96,8 @@ def __init__(
self.patch_summarizer = patch_summarizer
self.suggestion_filterer = suggestion_filterer
+ self._rules_url = rules_url
+
tools = list(SEARCHFOX_TOOLS)
if skills:
@@ -160,6 +164,15 @@ def create(cls, **kwargs):
max_tokens=40_000,
temperature=None,
thinking={"type": "enabled", "budget_tokens": 10_000},
+ mcp_servers=[
+ {
+ "type": "url",
+ "url": os.getenv(
+ "BUGBUG_MOZ_MCP_URL", "https://mcp-dev.moz.tools/mcp"
+ ),
+ "name": "mozilla",
+ }
+ ],
)
if "patch_summarizer" not in kwargs:
@@ -177,30 +190,38 @@ def create(cls, **kwargs):
kwargs["skills"] = REVIEW_SKILLS
+ if "rules_url" not in kwargs:
+ kwargs["rules_url"] = os.getenv("BUGBUG_REVIEW_RULES_URL")
+
return cls(**kwargs)
def count_tokens(self, text):
return len(self._tokenizer.encode(text))
- def generate_initial_prompt(self, patch: Patch, patch_summary: str) -> str:
+ def generate_initial_prompt(
+ self, patch: Patch, patch_summary: str, external_context: str = ""
+ ) -> str:
created_before = patch.date_created if self.is_experiment_env else None
return FIRST_MESSAGE_TEMPLATE.format(
patch=format_patch_set(patch.patch_set),
patch_summarization=patch_summary,
+ external_context=external_context,
comment_examples=self._get_comment_examples(patch, created_before),
approved_examples=self._get_generated_examples(patch, created_before),
)
async def generate_review_comments(
- self, patch: Patch, patch_summary: str
+ self, patch: Patch, patch_summary: str, external_context: str = ""
) -> list[GeneratedReviewComment]:
try:
async for chunk in self.agent.astream(
{
"messages": [
HumanMessage(
- self.generate_initial_prompt(patch, patch_summary)
+ self.generate_initial_prompt(
+ patch, patch_summary, external_context
+ )
),
]
},
@@ -214,14 +235,40 @@ async def generate_review_comments(
return result["structured_response"].comments
- async def run(self, patch: Patch) -> CodeReviewToolResponse:
+ async def run(
+ self,
+ patch: Patch,
+ extra_rules_toml: Optional[str] = None,
+ content_overrides: Optional[dict[str, str]] = None,
+ ) -> CodeReviewToolResponse:
if self.count_tokens(patch.raw_diff) > 21000:
raise LargeDiffError("The diff is too large")
patch_summary = self.patch_summarizer.run(patch)
+ external_context = ""
+ if self._rules_url:
+ from bugbug.tools.code_review.skill_rules import (
+ load_external_content_for_diff,
+ )
+
+ bug_id = getattr(patch, "bug_id", None)
+ content_items = await load_external_content_for_diff(
+ patch.raw_diff,
+ self._rules_url,
+ bug_id=bug_id,
+ extra_rules_toml=extra_rules_toml,
+ content_overrides=content_overrides,
+ )
+ if content_items:
+ content = "\n\n".join(
+ f'\n{body.strip()}\n'
+ for name, body in content_items
+ )
+ external_context = EXTERNAL_CONTEXT_TEMPLATE.format(content=content)
+
unfiltered_suggestions = await self.generate_review_comments(
- patch, patch_summary
+ patch, patch_summary, external_context
)
if not unfiltered_suggestions:
logger.info("No suggestions were generated")
diff --git a/bugbug/tools/code_review/data_types.py b/bugbug/tools/code_review/data_types.py
index d7ecd6938d..3b2c48e470 100644
--- a/bugbug/tools/code_review/data_types.py
+++ b/bugbug/tools/code_review/data_types.py
@@ -1,10 +1,13 @@
import re
+from datetime import datetime
import httpx
+import tenacity
from pydantic import BaseModel, Field, PrivateAttr
from bugbug.tools.core.connection import get_http_client
from bugbug.tools.core.data_types import InlineComment
+from bugbug.tools.core.platforms.base import Patch
class GeneratedReviewComment(BaseModel):
@@ -82,3 +85,129 @@ async def load(self) -> str:
self._cached_body = _strip_frontmatter(response.text)
return self._cached_body
+
+
+_BUG_RE = re.compile(r"[Bb]ug\s+(\d+)")
+_DIFFERENTIAL_RE = re.compile(r"Differential Revision:\s*(https?://\S+)")
+_DATE_RE = re.compile(r"^Date:\s+(.+)$", re.MULTILINE)
+
+
+class LocalPatch(Patch):
+ """A patch from a local diff string, not backed by any platform."""
+
+ def __init__(
+ self,
+ diff: str,
+ commit_message: str = "",
+ ) -> None:
+ self._diff = diff
+ self._commit_message = commit_message
+ lines = commit_message.splitlines()
+ self._title = lines[0].strip() if lines else "Local patch"
+ self._description = "\n".join(lines[1:]).strip() if len(lines) > 1 else ""
+ m = _BUG_RE.search(commit_message)
+ self._bug_id: int | None = int(m.group(1)) if m else None
+ m2 = _DIFFERENTIAL_RE.search(commit_message)
+ self._url = m2.group(1) if m2 else ""
+ m3 = _DATE_RE.search(commit_message)
+ if m3:
+ from email.utils import parsedate_to_datetime
+ try:
+ self._date = parsedate_to_datetime(m3.group(1))
+ except Exception:
+ self._date = datetime.now()
+ else:
+ self._date = datetime.now()
+
+ @property
+ def patch_id(self) -> str:
+ return "local"
+
+ @property
+ def raw_diff(self) -> str:
+ return self._diff
+
+ @property
+ def date_created(self) -> datetime:
+ return self._date
+
+ @property
+ def has_bug(self) -> bool:
+ return self._bug_id is not None
+
+ @property
+ def bug_id(self) -> int | None:
+ return self._bug_id
+
+ @property
+ def bug_title(self) -> str:
+ return ""
+
+ @property
+ def patch_title(self) -> str:
+ return self._title
+
+ @property
+ def patch_description(self) -> str:
+ return self._description
+
+ @property
+ def patch_url(self) -> str:
+ return self._url
+
+ def is_accessible(self) -> bool:
+ return True
+
+ def is_public(self) -> bool:
+ return True
+
+ async def get_new_file(self, file_path: str) -> str:
+ raise FileNotFoundError(
+ f"No repository checkout available for '{file_path}' — use local tools to inspect it directly."
+ )
+
+ async def get_old_file(self, file_path: str) -> str:
+ raise FileNotFoundError(
+ f"No repository checkout available for '{file_path}' — use local tools to inspect it directly."
+ )
+
+
+class ExternalContentLoadError(Exception):
+ """Raised when an ExternalContent body cannot be loaded."""
+
+
+@tenacity.retry(
+ stop=tenacity.stop_after_attempt(3),
+ wait=tenacity.wait_exponential(multiplier=1, min=1),
+ retry=tenacity.retry_if_exception_type(httpx.TransportError),
+ reraise=True,
+)
+async def _fetch_url(url: str) -> httpx.Response:
+ response = await get_http_client().get(url, timeout=30)
+ response.raise_for_status()
+ return response
+
+
+class ExternalContent(BaseModel):
+ """An external file fetched and injected as context for the review."""
+
+ name: str = Field(description="A unique identifier for this content item.")
+ url: str = Field(description="HTTPS URL of the file to fetch.")
+ description: str = Field(description="Short description of what this content provides.")
+
+ _cached_body: str | None = PrivateAttr(default=None)
+
+ async def load(self) -> str:
+ """Return the content body, fetching and caching it on first use."""
+ if self._cached_body is not None:
+ return self._cached_body
+
+ try:
+ response = await _fetch_url(self.url)
+ except httpx.HTTPError as e:
+ raise ExternalContentLoadError(
+ f"Could not load content '{self.name}' from {self.url}"
+ ) from e
+
+ self._cached_body = _strip_frontmatter(response.text)
+ return self._cached_body
diff --git a/bugbug/tools/code_review/prompts.py b/bugbug/tools/code_review/prompts.py
index bf4073d474..512e85dd7f 100644
--- a/bugbug/tools/code_review/prompts.py
+++ b/bugbug/tools/code_review/prompts.py
@@ -5,80 +5,43 @@
"""Prompt templates for code review agent."""
-from bugbug.tools.code_review.data_types import Skill
-
-TARGET_SOFTWARE: str | None = None
-
-
-SYSTEM_PROMPT_TEMPLATE = """You are an expert {target_software} engineer tasked with analyzing a pull request and providing high-quality review comments. You will examine a code patch and generate constructive feedback focusing on potential issues in the changed code.
+from pathlib import Path
-## Instructions
-
-Follow this systematic approach to review the patch:
-
-**Step 1: Analyze the Changes**
-- Understand what the patch is trying to accomplish
-- Use the patch summary for context, but focus primarily on what you can see in the actual diff
-- Identify the intent and structure of the changes
-
-**Step 2: Identify Issues**
-- Look for bugs, logical errors, performance problems, security vulnerabilities, or violations of the coding standards
-- Focus ONLY on new or changed lines (lines that begin with `+`)
-- Never comment on unmodified code
-- Prioritize issues in this order: Security vulnerabilities > Functional bugs > Performance issues > Style/readability concerns
-
-**Step 3: Verify and Assess Confidence**
-- Use available tools when you need to verify concerns or gather additional context
-- Only include comments where you are at least 80% confident the issue is valid
-- When uncertain about an issue, use available tools to verify before commenting
-- Do not suggest issues you cannot verify with available context
+from bugbug.tools.code_review.data_types import Skill
-**Step 4: Sort and Order Comments**
-- Sort comments by descending confidence and importance
-- Start with issues you are certain are valid and that are most critical
-- Assign each comment a numeric order starting at 1
+_PROMPTS_DIR = Path(__file__).parent / "prompts"
-**Step 5: Write Clear, Constructive Comments**
-- Use direct, declarative language - state the problem definitively, then suggest the fix
-- Keep comments short and specific
-- Use directive language: "Fix", "Remove", "Change", "Add"
-- NEVER use these banned phrases: "maybe", "might want to", "consider", "possibly", "could be", "you may want to"
-- Focus strictly on code-related concerns
+TARGET_SOFTWARE: str | None = None
-## What NOT to Include
+_SYSTEM_PROMPT_RAW = (_PROMPTS_DIR / "system.md").read_text()
-Do not write comments that:
-- Refer to unmodified code (lines without a `+` prefix)
-- Ask for verification or confirmation (e.g., "Check if...", "Ensure that...")
-- Provide praise or restate obvious facts
-- Focus on testing concerns
-- Point out issues that are already handled in the visible code
-- Suggest problems based on assumptions without verifying the context
-- Flag style preferences without clear coding standard violations
+_CONTEXT_TOOLS_AGENT = """\
+ - `expand_context` to read surrounding code in the patched files
+ - `find_function_definition` to look up callers or definitions\
"""
+_CONTEXT_TOOLS_LOCAL = """\
+ - `Read` and `Grep` to inspect files in the local checkout
+ - `Bash` with `searchfox-cli` to query Searchfox for definitions, callers, and cross-references\
+"""
-FIRST_MESSAGE_TEMPLATE = """Here is a summary of the patch:
-
-
-{patch_summarization}
-
-
-
-Here are examples of good code review comments to guide your style and approach:
+SYSTEM_PROMPT_TEMPLATE = _SYSTEM_PROMPT_RAW.format(
+ target_software="{target_software}",
+ context_tools=_CONTEXT_TOOLS_AGENT,
+)
-
-{comment_examples}
-{approved_examples}
-
+LOCAL_SYSTEM_PROMPT_TEMPLATE = _SYSTEM_PROMPT_RAW.format(
+ target_software="{target_software}",
+ context_tools=_CONTEXT_TOOLS_LOCAL,
+)
+EXTERNAL_CONTEXT_TEMPLATE = """
-Here is the patch you need to review:
+
+{content}
+"""
-
-{patch}
-
-"""
+FIRST_MESSAGE_TEMPLATE = (_PROMPTS_DIR / "first_message.md").read_text()
TEMPLATE_COMMENT_EXAMPLE = """Patch example {example_number}:
diff --git a/bugbug/tools/code_review/prompts/first_message.md b/bugbug/tools/code_review/prompts/first_message.md
new file mode 100644
index 0000000000..848897fb69
--- /dev/null
+++ b/bugbug/tools/code_review/prompts/first_message.md
@@ -0,0 +1,21 @@
+Here is a summary of the patch:
+
+
+{patch_summarization}
+
+{external_context}
+
+
+Here are examples of good code review comments to guide your style and approach:
+
+
+{comment_examples}
+{approved_examples}
+
+
+
+Here is the patch you need to review:
+
+
+{patch}
+
diff --git a/bugbug/tools/code_review/prompts/system.md b/bugbug/tools/code_review/prompts/system.md
new file mode 100644
index 0000000000..674b4000a2
--- /dev/null
+++ b/bugbug/tools/code_review/prompts/system.md
@@ -0,0 +1,51 @@
+## Instructions
+
+Follow this systematic approach to review the patch:
+
+**Step 1: Analyze the Changes**
+
+- Read the commit message
+- Understand what the patch is trying to accomplish
+- Identify the intent and structure of the changes; if what the patch does doesn't match the commit message, this always warrants a review comment — mismatches make the history hard to understand and bisect
+
+**Step 2: Identify Issues**
+
+- Look for bugs, logical errors, performance problems, security vulnerabilities, or violations of the coding standards
+- Focus ONLY on new or changed lines (lines that begin with `+`)
+- Never comment on unmodified code
+- Prioritize issues in this order: Security vulnerabilities > Functional bugs > Performance issues > Style/readability concerns
+
+**Step 3: Verify and Assess Confidence**
+
+- Use available tools when you need to verify concerns or gather additional context:
+ {context_tools}
+ - `mozilla__get_phabricator_revision` to fetch related Phabricator revisions — open and closed, including their review comments; particularly useful for other revisions in the same stack
+ - `mozilla__get_bugzilla_bug` to fetch the associated Bugzilla bug and its history
+ - `mozilla__read_fx_doc_section` to read sections from the Firefox Source Tree Documentation (firefox-source-docs.mozilla.org) — useful for architecture and API documentation relevant to the changed code
+- Significant tool usage is expected to perform a high-quality review, much like a human reviewer would
+- When uncertain about an issue, use available tools to verify before commenting
+- Do not suggest issues you cannot verify with available context
+
+**Step 4: Sort and Order Comments**
+
+- Sort comments by descending confidence and importance
+- Start with issues you are certain are valid and that are most critical
+- Assign each comment a numeric order starting at 1
+
+**Step 5: Write Clear, Constructive Comments**
+
+- Use direct, declarative language - state the problem definitively, then suggest the fix
+- Keep comments short and specific
+- Use directive language: "Fix", "Remove", "Change", "Add"
+- NEVER use these banned phrases: "maybe", "might want to", "consider", "possibly", "could be", "you may want to"
+- Focus strictly on code-related concerns
+
+## What NOT to Include
+
+Do not write comments that:
+
+- Refer to unmodified code (lines without a `+` prefix)
+- Ask for verification or confirmation (e.g., "Check if...", "Ensure that...")
+- Provide praise or restate obvious facts
+- Suggest problems based on assumptions without verifying the context
+- Flag style preferences without clear coding standard violations
diff --git a/bugbug/tools/code_review/skill_rules.py b/bugbug/tools/code_review/skill_rules.py
new file mode 100644
index 0000000000..aeb43c50e8
--- /dev/null
+++ b/bugbug/tools/code_review/skill_rules.py
@@ -0,0 +1,328 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""Rule-based external content loading for code review.
+
+Fetches skill-rules.toml from the reviewed repository, matches changed files
+against the rules, and pre-loads the referenced content.
+"""
+
+import asyncio
+import fnmatch
+import re
+import time
+from logging import getLogger
+
+try:
+ import tomllib
+except ImportError:
+ import tomli as tomllib # type: ignore
+
+import httpx
+from unidiff import PatchSet
+
+from bugbug.tools.code_review.data_types import (
+ ExternalContent,
+ ExternalContentLoadError,
+ _fetch_url,
+)
+from bugbug.tools.core.connection import get_http_client
+
+logger = getLogger(__name__)
+
+_PHAB_RE = re.compile(r"D(\d+)$")
+
+_rules_cache: dict[str, tuple[float, dict]] = {}
+_RULES_CACHE_TTL = 300 # seconds
+
+
+def parse_diff_files(diff: str) -> set[str]:
+ """Return the set of file paths added or modified by the diff."""
+ try:
+ return {f.path for f in PatchSet.from_string(diff) if not f.is_removed_file}
+ except Exception:
+ files = set()
+ for line in diff.splitlines():
+ if line.startswith("+++ b/"):
+ files.add(line[6:].strip())
+ return files
+
+
+async def get_bug_component(bug_id: int) -> str | None:
+ """Return 'Product::Component' for the given Bugzilla bug, or None on failure."""
+ from libmozdata.bugzilla import Bugzilla
+
+ def _fetch() -> str | None:
+ bugs: list[dict] = []
+ Bugzilla(
+ bug_id,
+ include_fields=["product", "component"],
+ bughandler=lambda bug, data: data.append(bug),
+ bugdata=bugs,
+ ).get_data().wait()
+ if not bugs:
+ return None
+ return f"{bugs[0]['product']}::{bugs[0]['component']}"
+
+ try:
+ return await asyncio.get_event_loop().run_in_executor(None, _fetch)
+ except Exception:
+ logger.warning("Could not fetch component for bug %s", bug_id)
+ return None
+
+
+def rule_matches(
+ rule: dict, changed_files: set[str], bug_component: str | None = None
+) -> bool:
+ """Return True if ALL specified conditions are satisfied.
+
+ match_extensions and match_paths are ANDed per-file: at least one file
+ must satisfy both simultaneously.
+
+ match_bugzilla_components checks the patch's associated bug component via
+ the BMO REST API. Pass bug_component=None to fail closed (rule never fires).
+ """
+ match_exts = rule.get("match_extensions", [])
+ match_paths = rule.get("match_paths", [])
+ match_components = rule.get("match_bugzilla_components", [])
+
+ if not match_exts and not match_paths and not match_components:
+ return False
+
+ if match_components:
+ if bug_component is None or bug_component not in match_components:
+ return False
+
+ if match_exts or match_paths:
+ per_file_ok = any(
+ (not match_exts or any(f.endswith(ext) for ext in match_exts))
+ and (not match_paths or any(fnmatch.fnmatch(f, pat) for pat in match_paths))
+ for f in changed_files
+ )
+ if not per_file_ok:
+ return False
+
+ return True
+
+
+def _action_key(action: dict) -> tuple:
+ t = action["type"]
+ if t == "load_file":
+ return ("load_file", action.get("repo", ""), action["path"])
+ if t == "fetch_revision":
+ return (
+ "fetch_revision",
+ action.get("revision", ""),
+ action.get("repo", ""),
+ action.get("hash", ""),
+ )
+ return (t, str(action))
+
+
+def _action_to_content(action: dict, base_url: str) -> ExternalContent:
+ """Resolve a load_file action to an ExternalContent instance.
+
+ For cross-repo references (repo= present), constructs a
+ raw.githubusercontent.com URL using the given branch (default: main).
+ For same-repo references, derives the URL from base_url, which is the
+ directory portion of the rules file's URL.
+ """
+ if action["type"] != "load_file":
+ raise ValueError(f"Unknown action type: {action['type']!r}")
+ path = action["path"]
+ repo = action.get("repo")
+ if repo:
+ branch = action.get("branch", "main")
+ url = f"https://raw.githubusercontent.com/{repo}/refs/heads/{branch}/{path}"
+ else:
+ url = base_url.rstrip("/") + "/" + path.lstrip("/")
+ return ExternalContent(name=path, url=url, description=path)
+
+
+async def _fetch_revision(action: dict) -> tuple[str, str] | None:
+ """Fetch the diff of another revision, either from Phabricator or GitHub."""
+ revision_str = action.get("revision", "")
+ repo = action.get("repo", "")
+ commit_hash = action.get("hash", "")
+
+ if revision_str:
+ m = _PHAB_RE.match(revision_str.strip())
+ if not m:
+ logger.warning("fetch_revision: cannot parse revision %r", revision_str)
+ return None
+ rev_num = int(m.group(1))
+ try:
+ from bugbug.tools.core.platforms.phabricator import get_phabricator_client
+
+ def _load():
+ phab = get_phabricator_client()
+ revision = phab.load_revision(rev_id=rev_num)
+ diff_id = revision["fields"]["diffID"]
+ return phab.load_raw_diff(diff_id)
+
+ content = await asyncio.get_event_loop().run_in_executor(None, _load)
+ return f"Revision D{rev_num}", content
+ except Exception:
+ logger.warning("fetch_revision: failed to load Phabricator D%s", rev_num)
+ return None
+
+ if repo and commit_hash:
+ try:
+ response = await get_http_client().get(
+ f"https://api.github.com/repos/{repo}/commits/{commit_hash}",
+ headers={"Accept": "application/vnd.github.diff"},
+ timeout=30,
+ )
+ response.raise_for_status()
+ return f"{repo}@{commit_hash[:12]}", response.text
+ except httpx.HTTPError:
+ logger.warning("fetch_revision: failed to load %s@%s", repo, commit_hash)
+ return None
+
+ logger.warning(
+ "fetch_revision: action has neither revision nor repo+hash: %s", action
+ )
+ return None
+
+
+def _merge_rules(base: list[dict], extra_toml: str) -> list[dict]:
+ """Merge extra rules into the base list.
+
+ Rules in extra_toml whose name matches an existing rule replace that rule
+ in-place. Rules with new names are appended. Rules without a name are always
+ appended. Returns a new list; base is not mutated.
+ """
+ extra = tomllib.loads(extra_toml).get("rules", [])
+ if not extra:
+ return base
+ rules = list(base)
+ index = {r["name"]: i for i, r in enumerate(rules) if "name" in r}
+ for rule in extra:
+ name = rule.get("name")
+ if name and name in index:
+ rules[index[name]] = rule
+ else:
+ rules.append(rule)
+ return rules
+
+
+def collect_actions(
+ diff: str,
+ config: dict,
+ bug_component: str | None = None,
+ extra_rules_toml: str | None = None,
+) -> list[dict]:
+ """Return deduplicated actions matched from config against the diff.
+
+ Merges extra_rules_toml into config before matching. Evaluates each rule
+ against the changed files (and optional bug component). Actions are
+ deduplicated across rules so the same file is never fetched twice. Pure —
+ no I/O.
+ """
+ if extra_rules_toml:
+ config = {
+ **config,
+ "rules": _merge_rules(config.get("rules", []), extra_rules_toml),
+ }
+
+ changed_files = parse_diff_files(diff)
+ logger.debug("Matching rules against %d changed file(s): %s", len(changed_files), changed_files)
+
+ seen: set[tuple] = set()
+ actions: list[dict] = []
+
+ for rule in config.get("rules", []):
+ if not rule_matches(rule, changed_files, bug_component):
+ continue
+ rule_name = rule.get("name", "")
+ new_actions = []
+ for action in rule.get("actions", []):
+ key = _action_key(action)
+ if key in seen:
+ continue
+ seen.add(key)
+ actions.append(action)
+ new_actions.append(action)
+ if new_actions:
+ logger.debug("Rule %r matched: %d action(s) queued", rule_name, len(new_actions))
+
+ logger.debug("Total actions to execute: %d", len(actions))
+ return actions
+
+
+async def execute_actions(
+ actions: list[dict],
+ base_url: str,
+ content_overrides: dict[str, str] | None = None,
+) -> list[tuple[str, str]]:
+ """Execute a list of actions and return (name, content) pairs.
+
+ fetch_revision actions fetch the raw diff of a Phabricator revision or
+ GitHub commit. load_file actions resolve to an ExternalContent URL and
+ fetch the file content. content_overrides bypasses the network fetch for
+ matching names, allowing callers to inject test content.
+ """
+ results: list[tuple[str, str]] = []
+ for action in actions:
+ if action["type"] == "fetch_revision":
+ result = await _fetch_revision(action)
+ if result:
+ results.append(result)
+ continue
+ try:
+ item = _action_to_content(action, base_url)
+ if content_overrides and item.name in content_overrides:
+ results.append((item.name, content_overrides[item.name]))
+ else:
+ body = await item.load()
+ results.append((item.name, body))
+ except (ValueError, ExternalContentLoadError):
+ logger.error("Failed to load content for action %s", action)
+ return results
+
+
+async def _fetch_rules(rules_url: str) -> dict:
+ """Fetch and parse skill-rules.toml, with a short in-process TTL cache."""
+ now = time.time()
+ if rules_url in _rules_cache:
+ ts, config = _rules_cache[rules_url]
+ if now - ts < _RULES_CACHE_TTL:
+ logger.debug("Using cached rules from %s (age %.0fs)", rules_url, now - ts)
+ return config
+ response = await _fetch_url(rules_url)
+ config = tomllib.loads(response.text)
+ _rules_cache[rules_url] = (now, config)
+ return config
+
+
+async def load_external_content_for_diff(
+ diff: str,
+ rules_url: str,
+ bug_id: int | None = None,
+ extra_rules_toml: str | None = None,
+ content_overrides: dict[str, str] | None = None,
+) -> list[tuple[str, str]]:
+ """Fetch skill-rules.toml from rules_url, match against diff, return (name, content) pairs.
+
+ Retries the rules fetch on transient errors (via _fetch_url). The parsed
+ config is cached in-process with a short TTL to avoid redundant fetches
+ across back-to-back reviews.
+ """
+ try:
+ config = await _fetch_rules(rules_url)
+ except httpx.HTTPError:
+ logger.error("Could not fetch skill rules from %s", rules_url)
+ return []
+
+ # base_url is the directory portion of the rules file URL, used to
+ # resolve relative load_file paths in the same repository.
+ base_url = rules_url.rsplit("/", 1)[0]
+
+ bug_component: str | None = None
+ if bug_id is not None:
+ bug_component = await get_bug_component(bug_id)
+
+ actions = collect_actions(diff, config, bug_component, extra_rules_toml)
+ return await execute_actions(actions, base_url, content_overrides)
diff --git a/docs/code-review-skills.md b/docs/code-review-skills.md
new file mode 100644
index 0000000000..8939ef035d
--- /dev/null
+++ b/docs/code-review-skills.md
@@ -0,0 +1,245 @@
+# Code Review Skills
+
+The code review agent can load external knowledge at review time, injecting it
+as context before the LLM sees the patch. This lets domain experts encode their
+knowledge once, in a form the agent uses automatically whenever it reviews
+relevant code.
+
+## How it works
+
+When the agent reviews a patch, it:
+
+1. Fetches `skill-rules.toml` from the root of the reviewed repository (any
+ GitHub repo; configured via `BUGBUG_REVIEW_RULES_URL`).
+2. Parses the changed files from the diff.
+3. Matches each rule against the changed files (and optionally the patch's
+ associated Bugzilla component, fetched from BMO).
+4. For each matched rule, fetches the referenced skill or documentation files.
+5. Injects the content as `` blocks inside a `` section in
+ the review prompt, before the patch.
+
+Set `BUGBUG_REVIEW_RULES_URL` to the raw URL of `skill-rules.toml` in the
+repository being reviewed, e.g.:
+
+```
+BUGBUG_REVIEW_RULES_URL=https://raw.githubusercontent.com/mozilla-firefox/firefox/refs/heads/main/skill-rules.toml
+```
+
+---
+
+## For bugbug developers
+
+### Key files
+
+| File | Purpose |
+| --------------------------------------------------- | ------------------------------------------------------------- |
+| `bugbug/tools/code_review/skill_rules.py` | Rule engine: fetches rules, matches files, loads skills |
+| `bugbug/tools/code_review/data_types.py` | `Skill` model with async HTTP fetch and frontmatter stripping |
+| `bugbug/tools/code_review/agent.py` | Calls `load_skills_for_diff` in `run()`, formats context |
+| `bugbug/tools/code_review/prompts/system.md` | System prompt template |
+| `bugbug/tools/code_review/prompts/first_message.md` | First user message template (contains `{skills}` slot) |
+
+### Prompt templates
+
+Both `system.md` and `first_message.md` are plain Markdown files loaded at
+import time. Edit them directly — no Python changes needed. They use Python
+`str.format()` placeholders (`{target_software}`, `{patch}`, `{skills}`, etc.).
+
+The `{skills}` slot in `first_message.md` is filled by `SKILLS_CONTEXT_TEMPLATE`
+from `prompts.py` when rules match. It is an empty string otherwise, so no
+blank section appears in the prompt.
+
+### Caching
+
+Skill content is cached in-process on the `Skill` instance (`_cached_body`).
+The shared `httpx.AsyncClient` from `get_http_client()` handles connection
+reuse. The rules file itself is re-fetched on every review (it is small and
+changes rarely; add process-level caching if it becomes a bottleneck).
+
+### `match_bugzilla_components`
+
+Implemented via `libmozdata.Bugzilla` with `include_fields=["product", "component"]`.
+The patch's associated bug ID is read from `patch.bug_id` (available on
+`PhabricatorPatch`; other patch types pass `None`, which fails closed). The
+component string compared against the rule is `"Product::Component"`.
+
+### `fetch_revision`
+
+Fetches the raw diff of another revision for use as context. Supports:
+
+- **Phabricator**: `{ type = "fetch_revision", revision = "D12345" }` — calls
+ the Phabricator API via `get_phabricator_client()`.
+- **GitHub**: `{ type = "fetch_revision", repo = "org/repo", hash = "abc123" }`
+ — uses the GitHub API with `Accept: application/vnd.github.diff`.
+
+---
+
+## For Mozilla developers: authoring skills and rules
+
+### Overview
+
+You add two things to your repository:
+
+- One or more **skill or documentation files** — Markdown or RST documents
+ with expert knowledge, guidelines, or relevant specs. These can be existing
+ docs already in the tree, or new files written specifically as review guides.
+- Entries in **`skill-rules.toml`** at the repo root — rules that say which
+ files to load when certain code paths are touched.
+
+The review agent picks these up automatically on the next review of a matching
+patch.
+
+### Skill file format
+
+Skill files are Markdown (`.md`); documentation files referenced as context can
+also be RST (`.rst`). All formats accept optional YAML frontmatter. The frontmatter is stripped before injection; only the body
+reaches the LLM. See an [existing example on Searchfox](https://searchfox.org/mozilla-central/source/.claude/skills/firefox-desktop-frontend/SKILL.md).
+
+```markdown
+---
+name: dom-audio
+description: Web Audio API review guidance
+---
+
+## Web Audio review checklist
+
+- `AudioContext` must not be created on a background thread.
+- `AudioNode` lifetimes are graph-managed; do not hold raw pointers.
+- Prefer `MediaStreamTrack` over raw PCM where the spec allows it.
+```
+
+Conventional location for dedicated skill files:
+
+```
+.claude/skills//SKILL.md
+```
+
+### `skill-rules.toml` format
+
+```toml
+[[rules]]
+name = "DOM: Web Audio C++"
+match_extensions = [".cpp", ".h"]
+match_paths = ["dom/media/webaudio/**"]
+actions = [
+ { type = "load_file", path = ".claude/skills/dom-audio/SKILL.md" },
+]
+```
+
+Multiple rules can fire for a single patch. Actions are deduplicated across
+rules, so the same file is never fetched twice.
+
+### Rule matching
+
+All conditions in a rule are ANDed. A rule with no conditions never fires.
+
+| Field | Type | Semantics |
+| --------------------------- | --------------------- | ------------------------------------------------------------------- |
+| `match_extensions` | list of strings | At least one changed file must end with one of these extensions |
+| `match_paths` | list of glob patterns | At least one changed file must match at least one pattern (fnmatch) |
+| `match_bugzilla_components` | list of strings | The patch's bug component (from BMO) must be in the list |
+
+When both `match_extensions` and `match_paths` are present, at least one
+changed file must satisfy **both** simultaneously (not one-file-per-condition).
+
+Example — load a skill when C++ or HTML files change under `dom/media/` (which
+catches both implementation files and their tests):
+
+```toml
+[[rules]]
+name = "DOM: Media C++ and tests"
+match_extensions = [".cpp", ".h", ".html"]
+match_paths = ["dom/media/**"]
+actions = [
+ { type = "load_file", path = ".claude/skills/dom-media/SKILL.md" },
+]
+```
+
+### Action types
+
+#### `load_file` — fetch a file from a GitHub repository
+
+```toml
+# File from the same repo as skill-rules.toml (default)
+{ type = "load_file", path = ".claude/skills/dom-audio/SKILL.md" }
+
+# File from another repo (e.g., cubeb's real-time programming guidelines)
+{ type = "load_file", path = ".claude/skills/real-time-programming/SKILL.md",
+ repo = "mozilla/cubeb" }
+
+# Explicit branch
+{ type = "load_file", path = ".claude/skills/dom-audio/SKILL.md",
+ repo = "mozilla-firefox/firefox", branch = "main" }
+```
+
+Constructs a raw GitHub URL: `https://raw.githubusercontent.com/{repo}/refs/heads/{branch}/{path}`.
+When `repo` is omitted, the repo is derived from the base URL of `skill-rules.toml`.
+`branch` defaults to `main`.
+
+Any Markdown or RST file in the repository can be referenced — skill files,
+architecture docs, READMEs, spec notes, etc.
+
+#### `fetch_revision` — diff of another revision as context
+
+Injects the raw diff of a related revision — useful for providing context about
+a stack parent or a related patch.
+
+```toml
+# Phabricator revision
+{ type = "fetch_revision", revision = "D12345" }
+
+# GitHub commit (use a git commit hash — Firefox moved from Mercurial to GitHub)
+{ type = "fetch_revision", repo = "padenot/cubeb", hash = "abc123def456" }
+```
+
+### Testing your rules and skills
+
+Pass your work-in-progress rules and skill files directly to `patch_review`
+via the MCP. Rules are merged by name into the fetched `skill-rules.toml`
+(replacing a rule with the same name, or appending if new). Skill content is
+injected directly, bypassing the network fetch for matching names.
+
+From Claude Code, trigger a review with your local files:
+
+```
+/patch_review patch_url="https://phabricator.services.mozilla.com/D295935" \
+ extra_rules_toml="$(cat skill-rules.toml)" \
+ skill_overrides='{".claude/skills/dom-audio/SKILL.md": "skill content here"}'
+```
+
+`skill_overrides` is a JSON object (as a string). For multi-line skill files,
+build the JSON separately:
+
+```bash
+python3 -c "import json,sys; print(json.dumps({'.claude/skills/dom-audio/SKILL.md': open('.claude/skills/dom-audio/SKILL.md').read()}))"
+```
+
+Then paste the output as the `skill_overrides` value.
+
+This simulates the state as if your changes had already landed — no push
+required. If a skill is missing, verify the rule conditions match the patch's
+changed files (`match_extensions` and `match_paths` against the actual
+`+++ b/` paths in the diff).
+
+### Gotchas
+
+**The injected content counts against the context window.** Keep skill files
+focused and composable — aim for under 500 lines. Multiple small skills loaded
+by separate rules compose better than one large catch-all file.
+
+**Rules are evaluated against the post-patch file list.** Paths in `match_paths`
+should match the `+++ b/` filenames in the diff.
+
+**Glob patterns use `fnmatch` semantics.** `dom/media/**` matches
+`dom/media/foo/bar.cpp`. Single `*` does not cross directory boundaries.
+
+**Frontmatter is stripped automatically.** You can put metadata (name,
+description, owner) in frontmatter without it polluting the injected context.
+
+**Rules fail silently if a URL 404s.** The review proceeds without that skill,
+and the failure is logged at ERROR level and captured by Sentry.
+Check the URL manually if a skill you expect is not appearing.
+
+**`match_bugzilla_components` requires the patch to have an associated bug.**
+On non-Phabricator patches or patches without a bug link, the component is
+unknown and the rule fails closed.
diff --git a/services/mcp/src/bugbug_mcp/server.py b/services/mcp/src/bugbug_mcp/server.py
index a5179ee7ff..b0fe053eb8 100644
--- a/services/mcp/src/bugbug_mcp/server.py
+++ b/services/mcp/src/bugbug_mcp/server.py
@@ -1,6 +1,7 @@
"""MCP server for Firefox Development."""
import functools
+import logging
import os
from pathlib import Path
from typing import Annotated
@@ -12,7 +13,15 @@
from fastmcp.resources import FileResource
from pydantic import Field
-from bugbug.tools.code_review.prompts import SYSTEM_PROMPT_TEMPLATE
+import json
+
+from bugbug.tools.code_review.data_types import LocalPatch
+from bugbug.tools.code_review.prompts import (
+ EXTERNAL_CONTEXT_TEMPLATE,
+ LOCAL_SYSTEM_PROMPT_TEMPLATE,
+ SYSTEM_PROMPT_TEMPLATE,
+)
+from bugbug.tools.code_review.skill_rules import load_external_content_for_diff
from bugbug.tools.core.platforms.bugzilla import SanitizedBug
from bugbug.tools.core.platforms.phabricator import (
PhabricatorPatch,
@@ -20,6 +29,7 @@
)
mcp = FastMCP("Firefox Development MCP Server")
+logger = logging.getLogger(__name__)
@functools.cache
@@ -29,30 +39,123 @@ def get_code_review_tool():
return CodeReviewTool.create()
-@mcp.prompt()
-async def patch_review(
- patch_url: str = Field(description="URL to the Phabricator patch to review."),
+@functools.cache
+def get_local_code_review_tool():
+ """Minimal tool for local diff prompt assembly — no LLM calls on the server."""
+ from bugbug.tools.code_review.agent import CodeReviewTool
+
+ return CodeReviewTool.create(
+ review_comments_db=None,
+ suggestion_filterer=None,
+ )
+
+
+async def _patch_review_impl(
+ patch_url: str | None,
+ diff: str | None,
+ commit_message: str | None,
+ extra_rules_toml: str | None,
+ content_overrides: str | None,
) -> str:
- """Review a code patch from Phabricator."""
- parsed_url = urlparse(patch_url)
- if (
- parsed_url.netloc == "phabricator.services.mozilla.com"
- and parsed_url.path.startswith("/D")
- ):
- revision_id = int(parsed_url.path[2:])
+ if diff:
+ patch = LocalPatch(diff, commit_message=commit_message or "")
+ elif patch_url:
+ parsed_url = urlparse(patch_url)
+ if (
+ parsed_url.netloc == "phabricator.services.mozilla.com"
+ and parsed_url.path.startswith("/D")
+ ):
+ revision_id = int(parsed_url.path[2:])
+ else:
+ raise ValueError(f"Unsupported patch URL: {patch_url}")
+ patch = PhabricatorPatch(revision_id=revision_id)
else:
- raise ValueError(f"Unsupported patch URL: {patch_url}")
+ raise ValueError("Provide either patch_url or diff.")
+
+ tool = get_local_code_review_tool() if diff else get_code_review_tool()
+ prompt_template = LOCAL_SYSTEM_PROMPT_TEMPLATE if diff else SYSTEM_PROMPT_TEMPLATE
+ system_prompt = prompt_template.format(target_software=tool.target_software)
+ patch_summary = "" if diff else tool.patch_summarizer.run(patch)
+
+ parsed_overrides: dict[str, str] | None = None
+ if content_overrides:
+ try:
+ parsed_overrides = json.loads(content_overrides)
+ except json.JSONDecodeError:
+ pass
+
+ external_context = ""
+ if tool._rules_url:
+ bug_id = getattr(patch, "bug_id", None) if patch.has_bug else None
+ content_items = await load_external_content_for_diff(
+ patch.raw_diff,
+ tool._rules_url,
+ bug_id=bug_id,
+ extra_rules_toml=extra_rules_toml if extra_rules_toml != "None" else None,
+ content_overrides=parsed_overrides,
+ )
+ if content_items:
+ content = "\n\n".join(
+ f'\n{body.strip()}\n'
+ for name, body in content_items
+ )
+ external_context = EXTERNAL_CONTEXT_TEMPLATE.format(content=content)
+
+ initial_prompt = tool.generate_initial_prompt(patch, patch_summary, external_context)
+ return system_prompt + "\n\n" + initial_prompt
- patch = PhabricatorPatch(revision_id=revision_id)
- tool = get_code_review_tool()
- system_prompt = SYSTEM_PROMPT_TEMPLATE.format(
- target_software=tool.target_software,
+@mcp.prompt()
+async def patch_review(
+ patch_url: str | None = Field(
+ default=None,
+ description="URL to the Phabricator patch to review.",
+ ),
+ diff: str | None = Field(
+ default=None,
+ description="Raw unified diff to review (for local patches not yet on Phabricator).",
+ ),
+ commit_message: str | None = Field(
+ default=None,
+ description="Commit message for the local diff (optional, used to extract bug ID, title, and Differential Revision URL).",
+ ),
+ extra_rules_toml: str | None = Field(
+ default=None,
+ description=(
+ "TOML content to merge into the fetched skill-rules.toml. "
+ "Rules are matched by name: same name replaces, new name appends. "
+ "Use this to test new or modified rules before landing."
+ ),
+ ),
+ content_overrides: str | None = Field(
+ default=None,
+ description=(
+ "JSON object mapping content name (file path) to body text. Overrides "
+ "the fetched content for matching items, or provides content for new "
+ "items referenced by extra_rules_toml. Use this to test changes before "
+ "landing. Example: "
+ '{".claude/skills/dom-audio.md": "My guidelines."}'
+ ),
+ ),
+) -> str:
+ """Review a code patch from Phabricator or a raw local diff."""
+ return await _patch_review_impl(
+ patch_url, diff, commit_message, extra_rules_toml, content_overrides
)
- patch_summary = tool.patch_summarizer.run(patch)
- initial_prompt = tool.generate_initial_prompt(patch, patch_summary)
- return system_prompt + "\n\n" + initial_prompt
+
+@mcp.tool()
+async def patch_review_tool(
+ patch_url: str | None = None,
+ diff: str | None = None,
+ commit_message: str | None = None,
+ extra_rules_toml: str | None = None,
+ content_overrides: str | None = None,
+) -> str:
+ """Build the patch review prompt. Use diff= for local diffs, patch_url= for Phabricator."""
+ return await _patch_review_impl(
+ patch_url, diff, commit_message, extra_rules_toml, content_overrides
+ )
@mcp.resource(
diff --git a/tests/test_code_review.py b/tests/test_code_review.py
index 584824544d..ed112be030 100644
--- a/tests/test_code_review.py
+++ b/tests/test_code_review.py
@@ -8,9 +8,20 @@
import pytest
from unidiff import PatchSet
-from bugbug.tools.code_review import data_types, langchain_tools
-from bugbug.tools.code_review.data_types import Skill, _strip_frontmatter
+from bugbug.tools.code_review import data_types, langchain_tools, skill_rules
+from bugbug.tools.code_review.data_types import (
+ ExternalContent,
+ Skill,
+ _strip_frontmatter,
+)
from bugbug.tools.code_review.langchain_tools import _fetch_file, create_load_skill_tool
+from bugbug.tools.code_review.skill_rules import (
+ _merge_rules,
+ get_bug_component,
+ load_external_content_for_diff,
+ parse_diff_files,
+ rule_matches,
+)
from bugbug.tools.code_review.utils import find_comment_scope
from bugbug.tools.core.platforms.patch_apply import (
apply_patched_file,
@@ -480,3 +491,381 @@ def test_fetch_file_skips_revision_when_none():
result = asyncio.run(_fetch_file("f.txt", None, client, patch))
assert result == "latest content"
client.get_file_at_revision.assert_not_called()
+
+
+
+@pytest.fixture(autouse=True)
+def clear_rules_cache():
+ skill_rules._rules_cache.clear()
+ yield
+ skill_rules._rules_cache.clear()
+
+
+def _mock_client_returning(text: str) -> MagicMock:
+ response = MagicMock()
+ response.text = text
+ response.raise_for_status = MagicMock()
+ client = MagicMock()
+ client.get = AsyncMock(return_value=response)
+ return client
+
+
+def test_strip_frontmatter_present():
+ text = "---\nname: mozilla\ndescription: foo\n---\nThe body.\n"
+ assert _strip_frontmatter(text) == "The body.\n"
+
+
+def test_strip_frontmatter_absent():
+ text = "Just a body, no frontmatter.\n"
+ assert _strip_frontmatter(text) == text
+
+
+def test_strip_frontmatter_unterminated():
+ text = "---\nname: mozilla\nstill no closing marker\n"
+ assert _strip_frontmatter(text) == text
+
+
+@pytest.mark.asyncio
+async def test_external_content_load_caches():
+ item = ExternalContent(
+ name="a",
+ url="https://example.com/a.md",
+ description="example",
+ )
+ client = _mock_client_returning("---\nname: a\n---\nbody\n")
+ with patch.object(data_types, "get_http_client", return_value=client):
+ first = await item.load()
+ second = await item.load()
+ assert first == "body\n"
+ assert second == "body\n"
+ assert client.get.await_count == 1
+
+
+_RULES_TOML = """
+[[rules]]
+name = "Audio/Video C++"
+match_extensions = [".cpp", ".h"]
+match_paths = ["dom/media/**"]
+actions = [
+ { type = "load_file", path = ".claude/skills/dom-media.md" },
+]
+
+[[rules]]
+name = "WebIDL"
+match_extensions = [".webidl"]
+actions = [
+ { type = "load_file", path = ".claude/skills/webidl.md", repo = "mozilla-firefox/firefox" },
+]
+
+[[rules]]
+name = "Any JS"
+match_extensions = [".js"]
+actions = [
+ { type = "load_file", path = ".claude/skills/js-style.md" },
+]
+
+[[rules]]
+name = "Bugzilla component only"
+match_bugzilla_components = ["Core::DOM: Web Audio"]
+actions = [
+ { type = "load_file", path = ".claude/skills/dom-audio.md" },
+]
+"""
+
+_DIFF_MEDIA = """\
+diff --git a/dom/media/Foo.cpp b/dom/media/Foo.cpp
+--- a/dom/media/Foo.cpp
++++ b/dom/media/Foo.cpp
+@@ -1,1 +1,1 @@
+-old
++new
+"""
+
+_DIFF_WEBIDL = """\
+diff --git a/dom/webidl/Foo.webidl b/dom/webidl/Foo.webidl
+--- a/dom/webidl/Foo.webidl
++++ b/dom/webidl/Foo.webidl
+@@ -1,1 +1,1 @@
+-old
++new
+"""
+
+
+def test_parse_diff_files():
+ files = parse_diff_files(_DIFF_MEDIA)
+ assert files == {"dom/media/Foo.cpp"}
+
+
+def test_rule_matches_extension_and_path():
+ rule = {
+ "match_extensions": [".cpp"],
+ "match_paths": ["dom/media/**"],
+ }
+ assert rule_matches(rule, {"dom/media/Foo.cpp"})
+ assert not rule_matches(rule, {"dom/canvas/Foo.cpp"})
+ assert not rule_matches(rule, {"dom/media/Foo.js"})
+
+
+def test_rule_matches_extension_only():
+ rule = {"match_extensions": [".webidl"]}
+ assert rule_matches(rule, {"dom/webidl/Foo.webidl"})
+ assert not rule_matches(rule, {"dom/media/Foo.cpp"})
+
+
+def test_rule_no_conditions_never_matches():
+ assert not rule_matches({}, {"dom/media/Foo.cpp"})
+
+
+def test_rule_bugzilla_component_fails_closed_without_component():
+ rule = {"match_bugzilla_components": ["Core::DOM: Web Audio"]}
+ assert not rule_matches(rule, {"dom/media/Foo.cpp"}, bug_component=None)
+
+
+def test_rule_bugzilla_component_matches():
+ rule = {"match_bugzilla_components": ["Core::DOM: Web Audio"]}
+ assert rule_matches(
+ rule, {"dom/media/Foo.cpp"}, bug_component="Core::DOM: Web Audio"
+ )
+ assert not rule_matches(
+ rule, {"dom/media/Foo.cpp"}, bug_component="Core::Networking"
+ )
+
+
+@pytest.mark.asyncio
+async def test_get_bug_component():
+ def fake_bugzilla(bug_id, include_fields, bughandler, bugdata):
+ bughandler({"product": "Core", "component": "DOM: Web Audio"}, bugdata)
+ mock = MagicMock()
+ mock.get_data.return_value.wait = MagicMock()
+ return mock
+
+ with patch("libmozdata.bugzilla.Bugzilla", side_effect=fake_bugzilla):
+ component = await get_bug_component(12345)
+
+ assert component == "Core::DOM: Web Audio"
+
+
+@pytest.mark.asyncio
+async def test_load_external_content_for_diff_load_file():
+ rules_url = "https://hg.mozilla.org/mozilla-central/raw-file/tip/skill-rules.toml"
+ content_body = "---\nname: dom-media\n---\nAudio guidelines.\n"
+
+ rules_response = MagicMock()
+ rules_response.text = _RULES_TOML
+ rules_response.raise_for_status = MagicMock()
+
+ content_response = MagicMock()
+ content_response.text = content_body
+ content_response.raise_for_status = MagicMock()
+
+ client = MagicMock()
+ client.get = AsyncMock(side_effect=[rules_response, content_response])
+
+ with patch.object(data_types, "get_http_client", return_value=client):
+ with patch.object(skill_rules, "get_http_client", return_value=client):
+ results = await load_external_content_for_diff(_DIFF_MEDIA, rules_url)
+
+ assert len(results) == 1
+ name, content = results[0]
+ assert name == ".claude/skills/dom-media.md"
+ assert content == "Audio guidelines.\n"
+
+
+@pytest.mark.asyncio
+async def test_load_external_content_for_diff_no_match():
+ rules_url = "https://hg.mozilla.org/mozilla-central/raw-file/tip/skill-rules.toml"
+
+ rules_response = MagicMock()
+ rules_response.text = _RULES_TOML
+ rules_response.raise_for_status = MagicMock()
+
+ client = MagicMock()
+ client.get = AsyncMock(return_value=rules_response)
+
+ diff = "diff --git a/build/Makefile b/build/Makefile\n+++ b/build/Makefile\n+new\n"
+
+ with patch.object(data_types, "get_http_client", return_value=client):
+ with patch.object(skill_rules, "get_http_client", return_value=client):
+ results = await load_external_content_for_diff(diff, rules_url)
+
+ assert results == []
+ assert client.get.await_count == 1
+
+
+@pytest.mark.asyncio
+async def test_load_external_content_for_diff_rules_fetch_failure():
+ client = MagicMock()
+ client.get = AsyncMock(side_effect=httpx.ConnectError("boom"))
+
+ with patch.object(data_types, "get_http_client", return_value=client):
+ with patch.object(skill_rules, "get_http_client", return_value=client):
+ results = await load_external_content_for_diff(
+ _DIFF_MEDIA, "https://example.com/skill-rules.toml"
+ )
+
+ assert results == []
+
+
+@pytest.mark.asyncio
+async def test_load_external_content_deduplicates_actions():
+ toml = """
+[[rules]]
+name = "Rule A"
+match_extensions = [".cpp"]
+match_paths = ["dom/media/**"]
+actions = [{ type = "load_file", path = "skills/guide.md" }]
+
+[[rules]]
+name = "Rule B"
+match_extensions = [".cpp"]
+actions = [{ type = "load_file", path = "skills/guide.md" }]
+"""
+ rules_url = "https://hg.mozilla.org/mozilla-central/raw-file/tip/skill-rules.toml"
+
+ rules_response = MagicMock()
+ rules_response.text = toml
+ rules_response.raise_for_status = MagicMock()
+
+ content_response = MagicMock()
+ content_response.text = "body"
+ content_response.raise_for_status = MagicMock()
+
+ client = MagicMock()
+ client.get = AsyncMock(side_effect=[rules_response, content_response])
+
+ with patch.object(data_types, "get_http_client", return_value=client):
+ with patch.object(skill_rules, "get_http_client", return_value=client):
+ results = await load_external_content_for_diff(_DIFF_MEDIA, rules_url)
+
+ assert len(results) == 1
+ assert client.get.await_count == 2
+
+
+# --- _merge_rules ---
+
+
+def test_merge_rules_appends_new():
+ base = [{"name": "A", "match_extensions": [".cpp"], "actions": []}]
+ extra = """
+[[rules]]
+name = "B"
+match_extensions = [".js"]
+actions = []
+"""
+ merged = _merge_rules(base, extra)
+ assert len(merged) == 2
+ assert merged[1]["name"] == "B"
+
+
+def test_merge_rules_replaces_by_name():
+ base = [{"name": "A", "match_extensions": [".cpp"], "actions": []}]
+ extra = """
+[[rules]]
+name = "A"
+match_extensions = [".cpp", ".h"]
+actions = []
+"""
+ merged = _merge_rules(base, extra)
+ assert len(merged) == 1
+ assert merged[0]["match_extensions"] == [".cpp", ".h"]
+
+
+def test_merge_rules_empty_extra():
+ base = [{"name": "A", "match_extensions": [".cpp"], "actions": []}]
+ assert _merge_rules(base, "") == base
+ assert _merge_rules(base, "# comment\n") == base
+
+
+# --- content_overrides ---
+
+
+@pytest.mark.asyncio
+async def test_content_override_used_instead_of_fetch():
+ rules_url = "https://hg.mozilla.org/mozilla-central/raw-file/tip/skill-rules.toml"
+
+ rules_response = MagicMock()
+ rules_response.text = _RULES_TOML
+ rules_response.raise_for_status = MagicMock()
+
+ client = MagicMock()
+ client.get = AsyncMock(return_value=rules_response)
+
+ overrides = {".claude/skills/dom-media.md": "Overridden content.\n"}
+
+ with patch.object(data_types, "get_http_client", return_value=client):
+ with patch.object(skill_rules, "get_http_client", return_value=client):
+ results = await load_external_content_for_diff(
+ _DIFF_MEDIA, rules_url, content_overrides=overrides
+ )
+
+ assert len(results) == 1
+ assert results[0] == (".claude/skills/dom-media.md", "Overridden content.\n")
+ assert client.get.await_count == 1 # only the rules fetch, not the content
+
+
+@pytest.mark.asyncio
+async def test_extra_rules_toml_appended():
+ rules_url = "https://hg.mozilla.org/mozilla-central/raw-file/tip/skill-rules.toml"
+ extra = """
+[[rules]]
+name = "Extra JS rule"
+match_extensions = [".js"]
+actions = [{ type = "load_file", path = ".claude/skills/js.md" }]
+"""
+ diff_js = "diff --git a/Foo.js b/Foo.js\n+++ b/Foo.js\n+new\n"
+
+ # Use a base TOML without any .js rules so only the extra rule fires.
+ base_toml = """
+[[rules]]
+name = "Audio/Video C++"
+match_extensions = [".cpp", ".h"]
+match_paths = ["dom/media/**"]
+actions = [{ type = "load_file", path = ".claude/skills/dom-media.md" }]
+"""
+ rules_response = MagicMock()
+ rules_response.text = base_toml
+ rules_response.raise_for_status = MagicMock()
+
+ client = MagicMock()
+ client.get = AsyncMock(return_value=rules_response)
+
+ overrides = {".claude/skills/js.md": "JS guidelines.\n"}
+
+ with patch.object(data_types, "get_http_client", return_value=client):
+ with patch.object(skill_rules, "get_http_client", return_value=client):
+ results = await load_external_content_for_diff(
+ diff_js, rules_url, extra_rules_toml=extra, content_overrides=overrides
+ )
+
+ assert len(results) == 1
+ assert results[0] == (".claude/skills/js.md", "JS guidelines.\n")
+
+
+@pytest.mark.asyncio
+async def test_extra_rules_toml_replaces_by_name():
+ rules_url = "https://hg.mozilla.org/mozilla-central/raw-file/tip/skill-rules.toml"
+ # Replace the "Audio/Video C++" rule with a version that loads different content
+ extra = """
+[[rules]]
+name = "Audio/Video C++"
+match_extensions = [".cpp", ".h"]
+match_paths = ["dom/media/**"]
+actions = [{ type = "load_file", path = ".claude/skills/dom-media-v2.md" }]
+"""
+ rules_response = MagicMock()
+ rules_response.text = _RULES_TOML
+ rules_response.raise_for_status = MagicMock()
+
+ client = MagicMock()
+ client.get = AsyncMock(return_value=rules_response)
+
+ overrides = {".claude/skills/dom-media-v2.md": "Updated guidelines.\n"}
+
+ with patch.object(data_types, "get_http_client", return_value=client):
+ with patch.object(skill_rules, "get_http_client", return_value=client):
+ results = await load_external_content_for_diff(
+ _DIFF_MEDIA, rules_url, extra_rules_toml=extra, content_overrides=overrides
+ )
+
+ assert len(results) == 1
+ assert results[0] == (".claude/skills/dom-media-v2.md", "Updated guidelines.\n")