Skip to content

Commit b3dc033

Browse files
authored
Merge pull request #1685 from codeflash-ai/feat/subagent-low-effort-defaults
feat: set low effort and skip review/explanation in subagent mode
2 parents c7d4e5b + dea6710 commit b3dc033

4 files changed

Lines changed: 63 additions & 36 deletions

File tree

codeflash/cli_cmds/cli.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ def parse_args() -> Namespace:
148148
args.yes = True
149149
args.no_pr = True
150150
args.worktree = True
151+
args.effort = "low"
151152
return process_and_validate_cmd_args(args)
152153

153154

codeflash/cli_cmds/console.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import contextlib
34
import logging
45
from collections import deque
56
from contextlib import contextmanager
@@ -408,6 +409,7 @@ def subagent_log_optimization_result(
408409
new_code: dict[Path, str],
409410
review: str,
410411
test_results: TestResults,
412+
project_root: Path | None = None,
411413
) -> None:
412414
import sys
413415
from xml.sax.saxutils import escape
@@ -421,7 +423,11 @@ def subagent_log_optimization_result(
421423
old = original_code.get(path, "")
422424
new = new_code.get(path, "")
423425
if old != new:
424-
diff = unified_diff_strings(old, new, fromfile=str(path), tofile=str(path))
426+
display_path = str(path)
427+
if project_root is not None:
428+
with contextlib.suppress(ValueError):
429+
display_path = str(path.relative_to(project_root))
430+
diff = unified_diff_strings(old, new, fromfile=display_path, tofile=display_path)
425431
if diff:
426432
diff_parts.append(diff)
427433

@@ -468,18 +474,17 @@ def subagent_log_optimization_result(
468474
if new_code[path] != original_code.get(path, ""):
469475
xml.append(f' <optimized-code file="{escape(str(path))}">{escape(new_code[path])}</optimized-code>')
470476
xml.append(" <action>")
471-
xml.append(" First, present a short summary: function name, performance improvement, and explanation.")
472-
xml.append(" If you are running inside an IDE (VS Code, Cursor, Windsurf, etc.):")
473-
xml.append(" For each optimized-code element, use the Write tool to write the optimized code to the file.")
474-
xml.append(" This will surface the diff in the IDE's native diff view for the user to accept or reject.")
475-
xml.append(" If you are running in a terminal without an IDE connection:")
476-
xml.append(" Present this optimization to the user using AskUserQuestion.")
477-
xml.append(" Show the function name, performance improvement, and explanation as the question text.")
478-
xml.append(" Provide two options: 'Apply' (write the optimized code to the file) and 'Reject' (do nothing).")
477+
xml.append(" 1. Review the diff and optimized code yourself. Write a brief assessment (2-3 sentences) covering:")
478+
xml.append(" - Whether the optimization is correct and preserves behavior")
479+
xml.append(" - What technique was used (e.g. algorithmic improvement, caching, vectorization)")
480+
xml.append(" - Any risks or trade-offs (e.g. increased memory usage, reduced readability)")
481+
xml.append(" 2. Present this optimization to the user using AskUserQuestion.")
482+
xml.append(" Show the function name, performance improvement, and your review as the question text.")
483+
xml.append(" Provide two options: 'Apply' (write the optimized code to the file) and 'Reject' (do nothing).")
479484
xml.append(
480-
" Put the full diff in the 'Apply' option's markdown preview so the user can review the exact changes."
485+
" Put the full diff in the 'Apply' option's markdown preview so the user can review the exact changes."
481486
)
482-
xml.append(" If the user chooses 'Apply', write the content from optimized-code to the corresponding file.")
487+
xml.append(" 3. If the user chooses 'Apply', write the content from optimized-code to the corresponding file.")
483488
xml.append(" </action>")
484489
xml.append("</codeflash-optimization>")
485490

codeflash/optimization/function_optimizer.py

Lines changed: 44 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1018,16 +1018,18 @@ def select_best_optimization(
10181018
runtimes_list.append(new_best_opt.runtime)
10191019

10201020
if len(optimization_ids) > 1:
1021-
future_ranking = self.executor.submit(
1022-
ai_service_client.generate_ranking,
1023-
diffs=diff_strs,
1024-
optimization_ids=optimization_ids,
1025-
speedups=speedups_list,
1026-
trace_id=self.get_trace_id(exp_type),
1027-
function_references=function_references,
1028-
)
1029-
concurrent.futures.wait([future_ranking])
1030-
ranking = future_ranking.result()
1021+
ranking = None
1022+
if not is_subagent_mode():
1023+
future_ranking = self.executor.submit(
1024+
ai_service_client.generate_ranking,
1025+
diffs=diff_strs,
1026+
optimization_ids=optimization_ids,
1027+
speedups=speedups_list,
1028+
trace_id=self.get_trace_id(exp_type),
1029+
function_references=function_references,
1030+
)
1031+
concurrent.futures.wait([future_ranking])
1032+
ranking = future_ranking.result()
10311033
if ranking:
10321034
min_key = ranking[0]
10331035
else:
@@ -2390,6 +2392,25 @@ def process_review(
23902392
code_context: CodeOptimizationContext,
23912393
function_references: str,
23922394
) -> None:
2395+
if is_subagent_mode():
2396+
subagent_log_optimization_result(
2397+
function_name=explanation.function_name,
2398+
file_path=explanation.file_path,
2399+
perf_improvement_line=explanation.perf_improvement_line,
2400+
original_runtime_ns=explanation.original_runtime_ns,
2401+
best_runtime_ns=explanation.best_runtime_ns,
2402+
raw_explanation=explanation.raw_explanation_message,
2403+
original_code=original_code_combined,
2404+
new_code=new_code_combined,
2405+
review="",
2406+
test_results=explanation.winning_behavior_test_results,
2407+
project_root=self.project_root,
2408+
)
2409+
mark_optimization_success(
2410+
trace_id=self.function_trace_id, is_optimization_found=best_optimization is not None
2411+
)
2412+
return
2413+
23932414
coverage_message = (
23942415
original_code_baseline.coverage_results.build_message()
23952416
if original_code_baseline.coverage_results
@@ -2537,20 +2558,7 @@ def process_review(
25372558
self.optimization_review = opt_review_result.review
25382559

25392560
# Display the reviewer result to the user
2540-
if is_subagent_mode():
2541-
subagent_log_optimization_result(
2542-
function_name=new_explanation.function_name,
2543-
file_path=new_explanation.file_path,
2544-
perf_improvement_line=new_explanation.perf_improvement_line,
2545-
original_runtime_ns=new_explanation.original_runtime_ns,
2546-
best_runtime_ns=new_explanation.best_runtime_ns,
2547-
raw_explanation=new_explanation.raw_explanation_message,
2548-
original_code=original_code_combined,
2549-
new_code=new_code_combined,
2550-
review=opt_review_result.review,
2551-
test_results=new_explanation.winning_behavior_test_results,
2552-
)
2553-
elif opt_review_result.review:
2561+
if opt_review_result.review:
25542562
review_display = {
25552563
"high": ("[bold green]High[/bold green]", "green", "Recommended to merge"),
25562564
"medium": ("[bold yellow]Medium[/bold yellow]", "yellow", "Review recommended before merging"),
@@ -2667,12 +2675,15 @@ def establish_original_code_baseline(
26672675
logger.debug(
26682676
f"[PIPELINE] Test file {idx}: behavior={tf.instrumented_behavior_file_path}, perf={tf.benchmarking_file_path}"
26692677
)
2678+
total_looping_time = (
2679+
TOTAL_LOOPING_TIME_EFFECTIVE / 2 if is_subagent_mode() else TOTAL_LOOPING_TIME_EFFECTIVE
2680+
)
26702681
behavioral_results, coverage_results = self.run_and_parse_tests(
26712682
testing_type=TestingMode.BEHAVIOR,
26722683
test_env=test_env,
26732684
test_files=self.test_files,
26742685
optimization_iteration=0,
2675-
testing_time=TOTAL_LOOPING_TIME_EFFECTIVE,
2686+
testing_time=total_looping_time,
26762687
enable_coverage=True,
26772688
code_context=code_context,
26782689
)
@@ -2713,6 +2724,7 @@ def establish_original_code_baseline(
27132724
self.instrument_async_for_mode(TestingMode.PERFORMANCE)
27142725

27152726
try:
2727+
subagent = is_subagent_mode()
27162728
benchmarking_results, _ = self.run_and_parse_tests(
27172729
testing_type=TestingMode.PERFORMANCE,
27182730
test_env=test_env,
@@ -2721,6 +2733,7 @@ def establish_original_code_baseline(
27212733
testing_time=TOTAL_LOOPING_TIME_EFFECTIVE,
27222734
enable_coverage=False,
27232735
code_context=code_context,
2736+
**({"pytest_min_loops": 3, "pytest_max_loops": 100} if subagent else {}),
27242737
)
27252738
logger.debug(f"[BENCHMARK-DONE] Got {len(benchmarking_results.test_results)} benchmark results")
27262739
finally:
@@ -2871,6 +2884,10 @@ def run_optimized_candidate(
28712884

28722885
try:
28732886
self.instrument_capture(file_path_to_helper_classes)
2887+
2888+
total_looping_time = (
2889+
TOTAL_LOOPING_TIME_EFFECTIVE / 2 if is_subagent_mode() else TOTAL_LOOPING_TIME_EFFECTIVE
2890+
)
28742891
candidate_behavior_results, _ = self.run_and_parse_tests(
28752892
testing_type=TestingMode.BEHAVIOR,
28762893
test_env=test_env,
@@ -2911,13 +2928,15 @@ def run_optimized_candidate(
29112928
self.instrument_async_for_mode(TestingMode.PERFORMANCE)
29122929

29132930
try:
2931+
subagent = is_subagent_mode()
29142932
candidate_benchmarking_results, _ = self.run_and_parse_tests(
29152933
testing_type=TestingMode.PERFORMANCE,
29162934
test_env=test_env,
29172935
test_files=self.test_files,
29182936
optimization_iteration=optimization_candidate_index,
29192937
testing_time=TOTAL_LOOPING_TIME_EFFECTIVE,
29202938
enable_coverage=False,
2939+
**({"pytest_min_loops": 3, "pytest_max_loops": 100} if subagent else {}),
29212940
)
29222941
finally:
29232942
if self.function_to_optimize.is_async:

tests/test_languages/test_javascript_test_runner.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Tests for JavaScript/Jest test runner functionality."""
22

3+
import sys
34
import tempfile
45
from pathlib import Path
56
from unittest.mock import patch, MagicMock
@@ -896,6 +897,7 @@ def test_line_profile_command_uses_bundled_reporter(self):
896897
reporter_args = [a for a in cmd if "--reporters=codeflash/jest-reporter" in a]
897898
assert len(reporter_args) == 1
898899

900+
@pytest.mark.skipif(sys.platform == "win32", reason="Node.js subprocess pipe behavior unreliable on Windows CI")
899901
def test_reporter_produces_valid_junit_xml(self):
900902
"""The reporter JS should produce JUnit XML parseable by junitparser."""
901903
import subprocess

0 commit comments

Comments
 (0)