Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
28f80fe
Add live BCQuality consumption building blocks for code-review
Jun 25, 2026
15c3feb
code-review: wire live BCQuality path into copilot agent + tests
Jun 26, 2026
196f302
code-review: add BCApps pre-#8700 inline review knowledge as old base…
Jun 26, 2026
058a5e1
code-review: markdown formatting in BCApps AGENTS.md (blank line befo…
Jun 26, 2026
2fa1ccd
Merge main into code-review-live-bcquality (sync leaderboard + main u…
Jun 26, 2026
e8713f7
code-review docs: add Experiment Leaderboard table (vanilla / inline …
Jun 26, 2026
b19f7e5
code-review docs: add Agent column, drop Vanilla reference from Exper…
Jun 26, 2026
fd275cd
Fix pre-commit whitespace in instruction files; rename F1 column to M…
Jun 26, 2026
7da69c5
code-review: address self-review (reuse review.json constant, determi…
Jun 26, 2026
5bf1745
code-review: reuse review.json constant + deterministic BCQuality sev…
Jun 26, 2026
edd6dbd
code-review: cache BCQuality clone per-SHA (clone once, copy+filter p…
Jun 26, 2026
b07213b
code-review: drop BCQuality clone cache (clone is cheap); keep git st…
Jun 26, 2026
72f7c51
code-review: externalize BCQuality bootstrap prompt to config.yaml Ji…
Jun 26, 2026
0dc121c
code-review: add super-skill execution-discipline / progress markers …
Jun 26, 2026
5a3b5a7
Merge branch 'main' into private/wenjiefan/code-review-live-bcquality
gggdttt Jun 26, 2026
4c6c104
code-review: make BCQuality task-context goal/inputs-available config…
Jun 26, 2026
88b3ea7
Merge branch 'main' into private/wenjiefan/code-review-live-bcquality
gggdttt Jun 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 47 additions & 15 deletions src/bcbench/agent/copilot/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@

from bcbench.agent.copilot.metrics import parse_metrics
from bcbench.agent.shared import build_al_lsp_plugin, build_mcp_config, build_prompt, parse_tool_usage_from_hooks
from bcbench.agent.shared.codereview_bcquality import parse_bcquality_config, prepare_bcquality_workspace
from bcbench.config import get_config
from bcbench.dataset import BaseDatasetEntry
from bcbench.evaluate.codereview import REVIEW_OUTPUT_FILE
from bcbench.exceptions import AgentError, AgentTimeoutError
from bcbench.logger import get_logger
from bcbench.operations import setup_agent_skills, setup_custom_agent, setup_hooks, setup_instructions_from_config
Expand Down Expand Up @@ -41,22 +43,49 @@ def run_copilot_agent(

logger.info(f"Running GitHub Copilot CLI on: {entry.instance_id}")

prompt: str = build_prompt(entry, repo_path, copilot_config, category, al_mcp=al_mcp)
mcp_config_json, mcp_server_names = build_mcp_config(copilot_config, entry, repo_path, al_mcp=al_mcp, container_name=container_name)
lsp_plugin_dir: Path | None = build_al_lsp_plugin(entry, category, repo_path, AgentType.COPILOT, al_lsp=al_lsp, container_name=container_name)
instructions_enabled: bool = setup_instructions_from_config(copilot_config, entry, repo_path, agent_type=AgentType.COPILOT)
skills_enabled: bool = setup_agent_skills(copilot_config, entry, repo_path, agent_type=AgentType.COPILOT)
custom_agent: str | None = setup_custom_agent(copilot_config, entry, repo_path, agent_type=AgentType.COPILOT)
tool_log_path: Path = setup_hooks(repo_path, AgentType.COPILOT, output_dir)
config = ExperimentConfiguration(
mcp_servers=mcp_server_names,
al_lsp_enabled=lsp_plugin_dir is not None,
custom_instructions=instructions_enabled,
skills_enabled=skills_enabled,
custom_agent=custom_agent,
)

logger.info(f"Executing Copilot CLI in directory: {repo_path}")

bcquality_config = parse_bcquality_config(copilot_config)
bcquality_live: bool = category == EvaluationCategory.CODE_REVIEW and bcquality_config is not None and bcquality_config.enabled

if bcquality_live:
assert bcquality_config is not None
# Live BCQuality consumption: clone+filter BCQuality and route the agent through skills/entry.md.
# The filtered clone (not the repo) becomes the Copilot CLI working directory; the repo under
# review is granted via --add-dir. No static instruction/skill/agent injection in this mode.
bootstrap_template: str = copilot_config["prompt"]["bcquality-bootstrap-template"]
bcquality_root, prompt = prepare_bcquality_workspace(bcquality_config, output_dir / "bcquality-clone", repo_path, REVIEW_OUTPUT_FILE, bootstrap_template)
work_dir: Path = bcquality_root
instructions_enabled: bool = False
skills_enabled: bool = False
custom_agent: str | None = None
# Copilot reads hooks from the CWD's .github/hooks, so install them into the clone to keep tool-usage metrics.
tool_log_path: Path = setup_hooks(bcquality_root, AgentType.COPILOT, output_dir)
config = ExperimentConfiguration(
mcp_servers=mcp_server_names,
al_lsp_enabled=lsp_plugin_dir is not None,
custom_instructions=False,
skills_enabled=False,
custom_agent=None,
bcquality=True,
)
else:
prompt = build_prompt(entry, repo_path, copilot_config, category, al_mcp=al_mcp)
work_dir = repo_path
instructions_enabled = setup_instructions_from_config(copilot_config, entry, repo_path, agent_type=AgentType.COPILOT)
skills_enabled = setup_agent_skills(copilot_config, entry, repo_path, agent_type=AgentType.COPILOT)
custom_agent = setup_custom_agent(copilot_config, entry, repo_path, agent_type=AgentType.COPILOT)
tool_log_path = setup_hooks(repo_path, AgentType.COPILOT, output_dir)
config = ExperimentConfiguration(
mcp_servers=mcp_server_names,
al_lsp_enabled=lsp_plugin_dir is not None,
custom_instructions=instructions_enabled,
skills_enabled=skills_enabled,
custom_agent=custom_agent,
)

logger.info(f"Executing Copilot CLI in directory: {work_dir}")
logger.debug(f"Using prompt:\n{prompt}")

# Prefer copilot.exe over copilot.bat/copilot.cmd shims on Windows: the .bat shim invokes PowerShell,
Expand All @@ -83,12 +112,15 @@ def run_copilot_agent(
cmd_args.append(f"--plugin-dir={lsp_plugin_dir}")
if custom_agent:
cmd_args.append(f"--agent={custom_agent}")
if bcquality_live:
# Grant the agent access to the repo under review (it lives outside the BCQuality CWD).
cmd_args.extend(["--add-dir", str(repo_path)])

logger.debug(f"Copilot command args: {cmd_args}")

result = subprocess.run(
cmd_args,
cwd=str(repo_path),
cwd=str(work_dir),
env={
**os.environ,
"GITHUB_COPILOT_PROMPT_MODE_REPO_HOOKS": "true",
Expand Down
Loading
Loading