Merge pull request #1685 from codeflash-ai/feat/subagent-low-effort-defaults

aseembits93 · web-flow · commit b3dc0339e03b · 2026-03-09T13:49:41.000-07:00
feat: set low effort and skip review/explanation in subagent mode
diff --git a/codeflash/cli_cmds/cli.py b/codeflash/cli_cmds/cli.py
@@ -148,6 +148,7 @@ def parse_args() -> Namespace:
         args.yes = True
         args.no_pr = True
         args.worktree = True
+        args.effort = "low"
     return process_and_validate_cmd_args(args)
 
 
diff --git a/codeflash/cli_cmds/console.py b/codeflash/cli_cmds/console.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import contextlib
 import logging
 from collections import deque
 from contextlib import contextmanager
@@ -408,6 +409,7 @@ def subagent_log_optimization_result(
     new_code: dict[Path, str],
     review: str,
     test_results: TestResults,
+    project_root: Path | None = None,
 ) -> None:
     import sys
     from xml.sax.saxutils import escape
@@ -421,7 +423,11 @@ def subagent_log_optimization_result(
         old = original_code.get(path, "")
         new = new_code.get(path, "")
         if old != new:
-            diff = unified_diff_strings(old, new, fromfile=str(path), tofile=str(path))
+            display_path = str(path)
+            if project_root is not None:
+                with contextlib.suppress(ValueError):
+                    display_path = str(path.relative_to(project_root))
+            diff = unified_diff_strings(old, new, fromfile=display_path, tofile=display_path)
             if diff:
                 diff_parts.append(diff)
 
@@ -468,18 +474,17 @@ def subagent_log_optimization_result(
         if new_code[path] != original_code.get(path, ""):
             xml.append(f'  <optimized-code file="{escape(str(path))}">{escape(new_code[path])}</optimized-code>')
     xml.append("  <action>")
-    xml.append("    First, present a short summary: function name, performance improvement, and explanation.")
-    xml.append("    If you are running inside an IDE (VS Code, Cursor, Windsurf, etc.):")
-    xml.append("      For each optimized-code element, use the Write tool to write the optimized code to the file.")
-    xml.append("      This will surface the diff in the IDE's native diff view for the user to accept or reject.")
-    xml.append("    If you are running in a terminal without an IDE connection:")
-    xml.append("      Present this optimization to the user using AskUserQuestion.")
-    xml.append("      Show the function name, performance improvement, and explanation as the question text.")
-    xml.append("      Provide two options: 'Apply' (write the optimized code to the file) and 'Reject' (do nothing).")
+    xml.append("    1. Review the diff and optimized code yourself. Write a brief assessment (2-3 sentences) covering:")
+    xml.append("       - Whether the optimization is correct and preserves behavior")
+    xml.append("       - What technique was used (e.g. algorithmic improvement, caching, vectorization)")
+    xml.append("       - Any risks or trade-offs (e.g. increased memory usage, reduced readability)")
+    xml.append("    2. Present this optimization to the user using AskUserQuestion.")
+    xml.append("       Show the function name, performance improvement, and your review as the question text.")
+    xml.append("       Provide two options: 'Apply' (write the optimized code to the file) and 'Reject' (do nothing).")
     xml.append(
-        "      Put the full diff in the 'Apply' option's markdown preview so the user can review the exact changes."
+        "       Put the full diff in the 'Apply' option's markdown preview so the user can review the exact changes."
     )
-    xml.append("      If the user chooses 'Apply', write the content from optimized-code to the corresponding file.")
+    xml.append("    3. If the user chooses 'Apply', write the content from optimized-code to the corresponding file.")
     xml.append("  </action>")
     xml.append("</codeflash-optimization>")
 
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
@@ -1018,16 +1018,18 @@ def select_best_optimization(
             runtimes_list.append(new_best_opt.runtime)
 
         if len(optimization_ids) > 1:
-            future_ranking = self.executor.submit(
-                ai_service_client.generate_ranking,
-                diffs=diff_strs,
-                optimization_ids=optimization_ids,
-                speedups=speedups_list,
-                trace_id=self.get_trace_id(exp_type),
-                function_references=function_references,
-            )
-            concurrent.futures.wait([future_ranking])
-            ranking = future_ranking.result()
+            ranking = None
+            if not is_subagent_mode():
+                future_ranking = self.executor.submit(
+                    ai_service_client.generate_ranking,
+                    diffs=diff_strs,
+                    optimization_ids=optimization_ids,
+                    speedups=speedups_list,
+                    trace_id=self.get_trace_id(exp_type),
+                    function_references=function_references,
+                )
+                concurrent.futures.wait([future_ranking])
+                ranking = future_ranking.result()
             if ranking:
                 min_key = ranking[0]
             else:
@@ -2390,6 +2392,25 @@ def process_review(
         code_context: CodeOptimizationContext,
         function_references: str,
     ) -> None:
+        if is_subagent_mode():
+            subagent_log_optimization_result(
+                function_name=explanation.function_name,
+                file_path=explanation.file_path,
+                perf_improvement_line=explanation.perf_improvement_line,
+                original_runtime_ns=explanation.original_runtime_ns,
+                best_runtime_ns=explanation.best_runtime_ns,
+                raw_explanation=explanation.raw_explanation_message,
+                original_code=original_code_combined,
+                new_code=new_code_combined,
+                review="",
+                test_results=explanation.winning_behavior_test_results,
+                project_root=self.project_root,
+            )
+            mark_optimization_success(
+                trace_id=self.function_trace_id, is_optimization_found=best_optimization is not None
+            )
+            return
+
         coverage_message = (
             original_code_baseline.coverage_results.build_message()
             if original_code_baseline.coverage_results
@@ -2537,20 +2558,7 @@ def process_review(
         self.optimization_review = opt_review_result.review
 
         # Display the reviewer result to the user
-        if is_subagent_mode():
-            subagent_log_optimization_result(
-                function_name=new_explanation.function_name,
-                file_path=new_explanation.file_path,
-                perf_improvement_line=new_explanation.perf_improvement_line,
-                original_runtime_ns=new_explanation.original_runtime_ns,
-                best_runtime_ns=new_explanation.best_runtime_ns,
-                raw_explanation=new_explanation.raw_explanation_message,
-                original_code=original_code_combined,
-                new_code=new_code_combined,
-                review=opt_review_result.review,
-                test_results=new_explanation.winning_behavior_test_results,
-            )
-        elif opt_review_result.review:
+        if opt_review_result.review:
             review_display = {
                 "high": ("[bold green]High[/bold green]", "green", "Recommended to merge"),
                 "medium": ("[bold yellow]Medium[/bold yellow]", "yellow", "Review recommended before merging"),
@@ -2667,12 +2675,15 @@ def establish_original_code_baseline(
                         logger.debug(
                             f"[PIPELINE] Test file {idx}: behavior={tf.instrumented_behavior_file_path}, perf={tf.benchmarking_file_path}"
                         )
+                    total_looping_time = (
+                        TOTAL_LOOPING_TIME_EFFECTIVE / 2 if is_subagent_mode() else TOTAL_LOOPING_TIME_EFFECTIVE
+                    )
                     behavioral_results, coverage_results = self.run_and_parse_tests(
                         testing_type=TestingMode.BEHAVIOR,
                         test_env=test_env,
                         test_files=self.test_files,
                         optimization_iteration=0,
-                        testing_time=TOTAL_LOOPING_TIME_EFFECTIVE,
+                        testing_time=total_looping_time,
                         enable_coverage=True,
                         code_context=code_context,
                     )
@@ -2713,6 +2724,7 @@ def establish_original_code_baseline(
                 self.instrument_async_for_mode(TestingMode.PERFORMANCE)
 
             try:
+                subagent = is_subagent_mode()
                 benchmarking_results, _ = self.run_and_parse_tests(
                     testing_type=TestingMode.PERFORMANCE,
                     test_env=test_env,
@@ -2721,6 +2733,7 @@ def establish_original_code_baseline(
                     testing_time=TOTAL_LOOPING_TIME_EFFECTIVE,
                     enable_coverage=False,
                     code_context=code_context,
+                    **({"pytest_min_loops": 3, "pytest_max_loops": 100} if subagent else {}),
                 )
                 logger.debug(f"[BENCHMARK-DONE] Got {len(benchmarking_results.test_results)} benchmark results")
             finally:
@@ -2871,6 +2884,10 @@ def run_optimized_candidate(
 
             try:
                 self.instrument_capture(file_path_to_helper_classes)
+
+                total_looping_time = (
+                    TOTAL_LOOPING_TIME_EFFECTIVE / 2 if is_subagent_mode() else TOTAL_LOOPING_TIME_EFFECTIVE
+                )
                 candidate_behavior_results, _ = self.run_and_parse_tests(
                     testing_type=TestingMode.BEHAVIOR,
                     test_env=test_env,
@@ -2911,13 +2928,15 @@ def run_optimized_candidate(
                 self.instrument_async_for_mode(TestingMode.PERFORMANCE)
 
             try:
+                subagent = is_subagent_mode()
                 candidate_benchmarking_results, _ = self.run_and_parse_tests(
                     testing_type=TestingMode.PERFORMANCE,
                     test_env=test_env,
                     test_files=self.test_files,
                     optimization_iteration=optimization_candidate_index,
                     testing_time=TOTAL_LOOPING_TIME_EFFECTIVE,
                     enable_coverage=False,
+                    **({"pytest_min_loops": 3, "pytest_max_loops": 100} if subagent else {}),
                 )
             finally:
                 if self.function_to_optimize.is_async:
diff --git a/tests/test_languages/test_javascript_test_runner.py b/tests/test_languages/test_javascript_test_runner.py
@@ -1,5 +1,6 @@
 """Tests for JavaScript/Jest test runner functionality."""
 
+import sys
 import tempfile
 from pathlib import Path
 from unittest.mock import patch, MagicMock
@@ -896,6 +897,7 @@ def test_line_profile_command_uses_bundled_reporter(self):
                     reporter_args = [a for a in cmd if "--reporters=codeflash/jest-reporter" in a]
                     assert len(reporter_args) == 1
 
+    @pytest.mark.skipif(sys.platform == "win32", reason="Node.js subprocess pipe behavior unreliable on Windows CI")
     def test_reporter_produces_valid_junit_xml(self):
         """The reporter JS should produce JUnit XML parseable by junitparser."""
         import subprocess