Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 52 additions & 5 deletions bugbug/tools/code_review/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from bugbug.tools.code_review.prompts import (
CODE_REVIEW_TODO_PROMPT,
CODE_REVIEW_TODO_TOOL_DESCRIPTION,
EXTERNAL_CONTEXT_TEMPLATE,
FIRST_MESSAGE_TEMPLATE,
STATIC_COMMENT_EXAMPLES,
SYSTEM_PROMPT_TEMPLATE,
Expand Down Expand Up @@ -70,6 +71,7 @@ def __init__(
target_software: str = "Mozilla Firefox",
todo_enabled: bool = True,
skills: Optional[list[Skill]] = None,
rules_url: Optional[str] = None,
) -> None:
super().__init__()

Expand All @@ -94,6 +96,8 @@ def __init__(
self.patch_summarizer = patch_summarizer
self.suggestion_filterer = suggestion_filterer

self._rules_url = rules_url

tools = list(SEARCHFOX_TOOLS)

if skills:
Expand Down Expand Up @@ -160,6 +164,15 @@ def create(cls, **kwargs):
max_tokens=40_000,
temperature=None,
thinking={"type": "enabled", "budget_tokens": 10_000},
mcp_servers=[
{
"type": "url",
"url": os.getenv(
"BUGBUG_MOZ_MCP_URL", "https://mcp-dev.moz.tools/mcp"
),
"name": "mozilla",
}
],
)

if "patch_summarizer" not in kwargs:
Expand All @@ -177,30 +190,38 @@ def create(cls, **kwargs):

kwargs["skills"] = REVIEW_SKILLS

if "rules_url" not in kwargs:
kwargs["rules_url"] = os.getenv("BUGBUG_REVIEW_RULES_URL")

return cls(**kwargs)

def count_tokens(self, text):
return len(self._tokenizer.encode(text))

def generate_initial_prompt(self, patch: Patch, patch_summary: str) -> str:
def generate_initial_prompt(
self, patch: Patch, patch_summary: str, external_context: str = ""
) -> str:
created_before = patch.date_created if self.is_experiment_env else None

return FIRST_MESSAGE_TEMPLATE.format(
patch=format_patch_set(patch.patch_set),
patch_summarization=patch_summary,
external_context=external_context,
comment_examples=self._get_comment_examples(patch, created_before),
approved_examples=self._get_generated_examples(patch, created_before),
)

async def generate_review_comments(
self, patch: Patch, patch_summary: str
self, patch: Patch, patch_summary: str, external_context: str = ""
) -> list[GeneratedReviewComment]:
try:
async for chunk in self.agent.astream(
{
"messages": [
HumanMessage(
self.generate_initial_prompt(patch, patch_summary)
self.generate_initial_prompt(
patch, patch_summary, external_context
)
),
]
},
Expand All @@ -214,14 +235,40 @@ async def generate_review_comments(

return result["structured_response"].comments

async def run(self, patch: Patch) -> CodeReviewToolResponse:
async def run(
self,
patch: Patch,
extra_rules_toml: Optional[str] = None,
content_overrides: Optional[dict[str, str]] = None,
) -> CodeReviewToolResponse:
if self.count_tokens(patch.raw_diff) > 21000:
raise LargeDiffError("The diff is too large")

patch_summary = self.patch_summarizer.run(patch)

external_context = ""
if self._rules_url:
from bugbug.tools.code_review.skill_rules import (
load_external_content_for_diff,
)

bug_id = getattr(patch, "bug_id", None)
content_items = await load_external_content_for_diff(
patch.raw_diff,
self._rules_url,
bug_id=bug_id,
extra_rules_toml=extra_rules_toml,
content_overrides=content_overrides,
)
if content_items:
content = "\n\n".join(
f'<context name="{name}">\n{body.strip()}\n</context>'
for name, body in content_items
)
external_context = EXTERNAL_CONTEXT_TEMPLATE.format(content=content)

unfiltered_suggestions = await self.generate_review_comments(
patch, patch_summary
patch, patch_summary, external_context
)
if not unfiltered_suggestions:
logger.info("No suggestions were generated")
Expand Down
129 changes: 129 additions & 0 deletions bugbug/tools/code_review/data_types.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import re
from datetime import datetime

import httpx
import tenacity
from pydantic import BaseModel, Field, PrivateAttr

from bugbug.tools.core.connection import get_http_client
from bugbug.tools.core.data_types import InlineComment
from bugbug.tools.core.platforms.base import Patch


class GeneratedReviewComment(BaseModel):
Expand Down Expand Up @@ -82,3 +85,129 @@ async def load(self) -> str:

self._cached_body = _strip_frontmatter(response.text)
return self._cached_body


_BUG_RE = re.compile(r"[Bb]ug\s+(\d+)")
_DIFFERENTIAL_RE = re.compile(r"Differential Revision:\s*(https?://\S+)")
_DATE_RE = re.compile(r"^Date:\s+(.+)$", re.MULTILINE)


class LocalPatch(Patch):
"""A patch from a local diff string, not backed by any platform."""

def __init__(
self,
diff: str,
commit_message: str = "",
) -> None:
self._diff = diff
self._commit_message = commit_message
lines = commit_message.splitlines()
self._title = lines[0].strip() if lines else "Local patch"
self._description = "\n".join(lines[1:]).strip() if len(lines) > 1 else ""
m = _BUG_RE.search(commit_message)
self._bug_id: int | None = int(m.group(1)) if m else None
m2 = _DIFFERENTIAL_RE.search(commit_message)
self._url = m2.group(1) if m2 else ""
m3 = _DATE_RE.search(commit_message)
if m3:
from email.utils import parsedate_to_datetime
try:
self._date = parsedate_to_datetime(m3.group(1))
except Exception:
self._date = datetime.now()
else:
self._date = datetime.now()

@property
def patch_id(self) -> str:
return "local"

@property
def raw_diff(self) -> str:
return self._diff

@property
def date_created(self) -> datetime:
return self._date

@property
def has_bug(self) -> bool:
return self._bug_id is not None

@property
def bug_id(self) -> int | None:
return self._bug_id

@property
def bug_title(self) -> str:
return ""

@property
def patch_title(self) -> str:
return self._title

@property
def patch_description(self) -> str:
return self._description

@property
def patch_url(self) -> str:
return self._url

def is_accessible(self) -> bool:
return True

def is_public(self) -> bool:
return True

async def get_new_file(self, file_path: str) -> str:
raise FileNotFoundError(
f"No repository checkout available for '{file_path}' — use local tools to inspect it directly."
)

async def get_old_file(self, file_path: str) -> str:
raise FileNotFoundError(
f"No repository checkout available for '{file_path}' — use local tools to inspect it directly."
)


class ExternalContentLoadError(Exception):
"""Raised when an ExternalContent body cannot be loaded."""


@tenacity.retry(
stop=tenacity.stop_after_attempt(3),
wait=tenacity.wait_exponential(multiplier=1, min=1),
retry=tenacity.retry_if_exception_type(httpx.TransportError),
reraise=True,
)
async def _fetch_url(url: str) -> httpx.Response:
response = await get_http_client().get(url, timeout=30)
response.raise_for_status()
return response


class ExternalContent(BaseModel):
"""An external file fetched and injected as context for the review."""

name: str = Field(description="A unique identifier for this content item.")
url: str = Field(description="HTTPS URL of the file to fetch.")
description: str = Field(description="Short description of what this content provides.")

_cached_body: str | None = PrivateAttr(default=None)

async def load(self) -> str:
"""Return the content body, fetching and caching it on first use."""
if self._cached_body is not None:
return self._cached_body

try:
response = await _fetch_url(self.url)
except httpx.HTTPError as e:
raise ExternalContentLoadError(
f"Could not load content '{self.name}' from {self.url}"
) from e

self._cached_body = _strip_frontmatter(response.text)
return self._cached_body
87 changes: 25 additions & 62 deletions bugbug/tools/code_review/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,80 +5,43 @@

"""Prompt templates for code review agent."""

from bugbug.tools.code_review.data_types import Skill

TARGET_SOFTWARE: str | None = None


SYSTEM_PROMPT_TEMPLATE = """You are an expert {target_software} engineer tasked with analyzing a pull request and providing high-quality review comments. You will examine a code patch and generate constructive feedback focusing on potential issues in the changed code.
from pathlib import Path

## Instructions

Follow this systematic approach to review the patch:

**Step 1: Analyze the Changes**
- Understand what the patch is trying to accomplish
- Use the patch summary for context, but focus primarily on what you can see in the actual diff
- Identify the intent and structure of the changes

**Step 2: Identify Issues**
- Look for bugs, logical errors, performance problems, security vulnerabilities, or violations of the coding standards
- Focus ONLY on new or changed lines (lines that begin with `+`)
- Never comment on unmodified code
- Prioritize issues in this order: Security vulnerabilities > Functional bugs > Performance issues > Style/readability concerns

**Step 3: Verify and Assess Confidence**
- Use available tools when you need to verify concerns or gather additional context
- Only include comments where you are at least 80% confident the issue is valid
- When uncertain about an issue, use available tools to verify before commenting
- Do not suggest issues you cannot verify with available context
from bugbug.tools.code_review.data_types import Skill

**Step 4: Sort and Order Comments**
- Sort comments by descending confidence and importance
- Start with issues you are certain are valid and that are most critical
- Assign each comment a numeric order starting at 1
_PROMPTS_DIR = Path(__file__).parent / "prompts"

**Step 5: Write Clear, Constructive Comments**
- Use direct, declarative language - state the problem definitively, then suggest the fix
- Keep comments short and specific
- Use directive language: "Fix", "Remove", "Change", "Add"
- NEVER use these banned phrases: "maybe", "might want to", "consider", "possibly", "could be", "you may want to"
- Focus strictly on code-related concerns
TARGET_SOFTWARE: str | None = None

## What NOT to Include
_SYSTEM_PROMPT_RAW = (_PROMPTS_DIR / "system.md").read_text()

Do not write comments that:
- Refer to unmodified code (lines without a `+` prefix)
- Ask for verification or confirmation (e.g., "Check if...", "Ensure that...")
- Provide praise or restate obvious facts
- Focus on testing concerns
- Point out issues that are already handled in the visible code
- Suggest problems based on assumptions without verifying the context
- Flag style preferences without clear coding standard violations
_CONTEXT_TOOLS_AGENT = """\
- `expand_context` to read surrounding code in the patched files
- `find_function_definition` to look up callers or definitions\
"""

_CONTEXT_TOOLS_LOCAL = """\
- `Read` and `Grep` to inspect files in the local checkout
- `Bash` with `searchfox-cli` to query Searchfox for definitions, callers, and cross-references\
"""

FIRST_MESSAGE_TEMPLATE = """Here is a summary of the patch:

<patch_summary>
{patch_summarization}
</patch_summary>


Here are examples of good code review comments to guide your style and approach:
SYSTEM_PROMPT_TEMPLATE = _SYSTEM_PROMPT_RAW.format(
target_software="{target_software}",
context_tools=_CONTEXT_TOOLS_AGENT,
)

<examples>
{comment_examples}
{approved_examples}
</examples>
LOCAL_SYSTEM_PROMPT_TEMPLATE = _SYSTEM_PROMPT_RAW.format(
target_software="{target_software}",
context_tools=_CONTEXT_TOOLS_LOCAL,
)

EXTERNAL_CONTEXT_TEMPLATE = """

Here is the patch you need to review:
<external_context>
{content}
</external_context>"""

<patch>
{patch}
</patch>
"""
FIRST_MESSAGE_TEMPLATE = (_PROMPTS_DIR / "first_message.md").read_text()


TEMPLATE_COMMENT_EXAMPLE = """Patch example {example_number}:
Expand Down
21 changes: 21 additions & 0 deletions bugbug/tools/code_review/prompts/first_message.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
Here is a summary of the patch:

<patch_summary>
{patch_summarization}
</patch_summary>
{external_context}


Here are examples of good code review comments to guide your style and approach:

<examples>
{comment_examples}
{approved_examples}
</examples>


Here is the patch you need to review:

<patch>
{patch}
</patch>
Loading