Skip to content

Commit f92d93e

Browse files
mashraf-222claude
andcommitted
feat: add multi-language orchestration loop with per-language config discovery
Adds the ability for codeflash CLI to discover and optimize multiple languages in a single run. When configs for Python, Java, and/or JS/TS are found, the optimizer runs once per language with isolated args. - Add LanguageConfig dataclass and find_all_config_files() for upward walk + monorepo subdirectory scanning (Python/JS/Java) - Extract normalize_toml_config() from inline parse_config_file logic - Add apply_language_config() for per-language arg setup in cli.py - Set language singleton early in process_pyproject_config() - Add orchestration loop in main.py with --file language filtering, per-language error isolation, and summary logging - Use build_config_strategy.parse_java_project_config() (from #1906) - Make get_git_diff tests language-agnostic (no singleton dependency) Intentionally omits auto_configure_language and detect_unconfigured_languages per Linear CF-1075 review. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent e073592 commit f92d93e

7 files changed

Lines changed: 1428 additions & 100 deletions

File tree

codeflash/cli_cmds/cli.py

Lines changed: 86 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@
99
from codeflash.cli_cmds.console import apologize_and_exit, logger
1010
from codeflash.code_utils import env_utils
1111
from codeflash.code_utils.code_utils import exit_with_message, normalize_ignore_paths
12-
from codeflash.code_utils.config_parser import parse_config_file
12+
from codeflash.code_utils.config_parser import LanguageConfig, parse_config_file
13+
from codeflash.languages import set_current_language
14+
from codeflash.languages.language_enum import Language
1315
from codeflash.languages.test_framework import set_current_test_framework
1416
from codeflash.lsp.helpers import is_LSP_enabled
1517
from codeflash.version import __version__ as version
@@ -110,11 +112,14 @@ def process_pyproject_config(args: Namespace) -> Namespace:
110112
assert args.module_root is not None, "--module-root must be specified"
111113
assert Path(args.module_root).is_dir(), f"--module-root {args.module_root} must be a valid directory"
112114

113-
# For JS/TS projects, tests_root is optional (Jest auto-discovers tests)
114-
# Default to module_root if not specified
115115
is_js_ts_project = pyproject_config.get("language") in ("javascript", "typescript")
116116
is_java_project = pyproject_config.get("language") == "java"
117117

118+
# Set the language singleton early so downstream code (e.g. get_git_diff)
119+
# can use current_language_support() before function discovery.
120+
if pyproject_config.get("language"):
121+
set_current_language(pyproject_config["language"])
122+
118123
# Set the test framework singleton for JS/TS projects
119124
if is_js_ts_project and pyproject_config.get("test_framework"):
120125
set_current_test_framework(pyproject_config["test_framework"])
@@ -221,6 +226,83 @@ def project_root_from_module_root(module_root: Path, pyproject_file_path: Path)
221226
return module_root.parent.resolve()
222227

223228

229+
def apply_language_config(args: Namespace, lang_config: LanguageConfig) -> Namespace:
230+
config = lang_config.config
231+
config_path = lang_config.config_path
232+
233+
supported_keys = [
234+
"module_root",
235+
"tests_root",
236+
"benchmarks_root",
237+
"ignore_paths",
238+
"pytest_cmd",
239+
"formatter_cmds",
240+
"disable_telemetry",
241+
"disable_imports_sorting",
242+
"git_remote",
243+
"override_fixtures",
244+
]
245+
for key in supported_keys:
246+
if key in config and ((hasattr(args, key) and getattr(args, key) is None) or not hasattr(args, key)):
247+
setattr(args, key, config[key])
248+
249+
assert args.module_root is not None, "--module-root must be specified"
250+
assert Path(args.module_root).is_dir(), f"--module-root {args.module_root} must be a valid directory"
251+
252+
set_current_language(lang_config.language)
253+
254+
is_js_ts = lang_config.language in (Language.JAVASCRIPT, Language.TYPESCRIPT)
255+
if is_js_ts and config.get("test_framework"):
256+
set_current_test_framework(config["test_framework"])
257+
258+
is_java = lang_config.language == Language.JAVA
259+
if args.tests_root is None:
260+
if is_java:
261+
for test_dir in ["src/test/java", "test", "tests"]:
262+
test_path = Path(args.module_root).parent / test_dir if "/" in test_dir else Path(test_dir)
263+
if not test_path.is_absolute():
264+
test_path = Path.cwd() / test_path
265+
if test_path.is_dir():
266+
args.tests_root = str(test_path)
267+
break
268+
if args.tests_root is None:
269+
args.tests_root = str(Path.cwd() / "src" / "test" / "java")
270+
elif is_js_ts:
271+
for test_dir in ["test", "tests", "__tests__"]:
272+
if Path(test_dir).is_dir():
273+
args.tests_root = test_dir
274+
break
275+
if args.tests_root is None and args.module_root:
276+
module_root_path = Path(args.module_root)
277+
for test_dir in ["test", "tests", "__tests__"]:
278+
test_path = module_root_path / test_dir
279+
if test_path.is_dir():
280+
args.tests_root = str(test_path)
281+
break
282+
if args.tests_root is None:
283+
args.tests_root = args.module_root
284+
else:
285+
raise AssertionError("--tests-root must be specified")
286+
287+
assert Path(args.tests_root).is_dir(), f"--tests-root {args.tests_root} must be a valid directory"
288+
289+
args.module_root = Path(args.module_root).resolve()
290+
if hasattr(args, "ignore_paths") and args.ignore_paths is not None:
291+
args.ignore_paths = normalize_ignore_paths(args.ignore_paths, base_path=args.module_root)
292+
args.project_root = project_root_from_module_root(args.module_root, config_path)
293+
args.tests_root = Path(args.tests_root).resolve()
294+
if args.benchmarks_root:
295+
args.benchmarks_root = Path(args.benchmarks_root).resolve()
296+
args.test_project_root = project_root_from_module_root(args.tests_root, config_path)
297+
298+
if is_java and config_path.is_dir():
299+
# For Java projects, config_path IS the project root directory (from build-tool detection).
300+
args.project_root = config_path.resolve()
301+
args.test_project_root = config_path.resolve()
302+
303+
return args
304+
305+
224306
def handle_optimize_all_arg_parsing(args: Namespace) -> Namespace:
225307
if hasattr(args, "all") or (hasattr(args, "file") and args.file):
226308
no_pr = getattr(args, "no_pr", False)
@@ -391,7 +473,7 @@ def _build_parser() -> ArgumentParser:
391473
compare_parser.add_argument("--timeout", type=int, default=600, help="Benchmark timeout in seconds (default: 600)")
392474
compare_parser.add_argument("--config-file", type=str, dest="config_file", help="Path to pyproject.toml")
393475

394-
trace_optimize = subparsers.add_parser("optimize", help="Trace and optimize your project.")
476+
trace_optimize = subparsers.add_parser("optimize", help="Trace and optimize your project.", add_help=False)
395477

396478
trace_optimize.add_argument(
397479
"--max-function-count",

codeflash/code_utils/config_parser.py

Lines changed: 155 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,26 @@
11
from __future__ import annotations
22

3+
from dataclasses import dataclass
34
from pathlib import Path
45
from typing import Any
56

67
import tomlkit
78

89
from codeflash.code_utils.config_js import find_package_json, parse_package_json_config
10+
from codeflash.languages.language_enum import Language
911
from codeflash.lsp.helpers import is_LSP_enabled
1012

1113
PYPROJECT_TOML_CACHE: dict[Path, Path] = {}
1214
ALL_CONFIG_FILES: dict[Path, dict[str, Path]] = {}
1315

1416

17+
@dataclass
18+
class LanguageConfig:
19+
config: dict[str, Any]
20+
config_path: Path
21+
language: Language
22+
23+
1524
def _try_parse_java_build_config() -> tuple[dict[str, Any], Path] | None:
1625
"""Detect Java project from build files and parse config from pom.xml/gradle.properties.
1726
@@ -103,6 +112,149 @@ def find_conftest_files(test_paths: list[Path]) -> list[Path]:
103112
return list(list_of_conftest_files)
104113

105114

115+
def normalize_toml_config(config: dict[str, Any], config_file_path: Path) -> dict[str, Any]:
116+
path_keys = ["module-root", "tests-root", "benchmarks-root"]
117+
path_list_keys = ["ignore-paths"]
118+
str_keys = {"pytest-cmd": "pytest", "git-remote": "origin"}
119+
bool_keys = {
120+
"override-fixtures": False,
121+
"disable-telemetry": False,
122+
"disable-imports-sorting": False,
123+
"benchmark": False,
124+
}
125+
list_str_keys = {"formatter-cmds": []}
126+
127+
for key, default_value in str_keys.items():
128+
if key in config:
129+
config[key] = str(config[key])
130+
else:
131+
config[key] = default_value
132+
for key, default_value in bool_keys.items():
133+
if key in config:
134+
config[key] = bool(config[key])
135+
else:
136+
config[key] = default_value
137+
for key in path_keys:
138+
if key in config:
139+
config[key] = str((config_file_path.parent / Path(config[key])).resolve())
140+
for key, default_value in list_str_keys.items():
141+
if key in config:
142+
config[key] = [str(cmd) for cmd in config[key]]
143+
else:
144+
config[key] = default_value
145+
for key in path_list_keys:
146+
if key in config:
147+
config[key] = [str((config_file_path.parent / path).resolve()) for path in config[key]]
148+
else:
149+
config[key] = []
150+
151+
# Convert hyphenated keys to underscored keys
152+
for key in list(config.keys()):
153+
if "-" in key:
154+
config[key.replace("-", "_")] = config[key]
155+
del config[key]
156+
157+
return config
158+
159+
160+
def _parse_java_config_for_dir(dir_path: Path) -> dict[str, Any] | None:
161+
from codeflash.languages.java.build_config_strategy import parse_java_project_config
162+
163+
return parse_java_project_config(dir_path)
164+
165+
166+
_SUBDIR_SKIP = frozenset(
167+
{
168+
".git",
169+
".hg",
170+
".svn",
171+
"node_modules",
172+
".venv",
173+
"venv",
174+
"__pycache__",
175+
"target",
176+
"build",
177+
"dist",
178+
".tox",
179+
".mypy_cache",
180+
".ruff_cache",
181+
".pytest_cache",
182+
}
183+
)
184+
185+
186+
def _check_dir_for_configs(dir_path: Path, configs: list[LanguageConfig], seen_languages: set[Language]) -> None:
187+
if Language.PYTHON not in seen_languages:
188+
pyproject = dir_path / "pyproject.toml"
189+
if pyproject.exists():
190+
try:
191+
with pyproject.open("rb") as f:
192+
data = tomlkit.parse(f.read())
193+
tool = data.get("tool", {})
194+
if isinstance(tool, dict) and "codeflash" in tool:
195+
raw_config = dict(tool["codeflash"])
196+
normalized = normalize_toml_config(raw_config, pyproject)
197+
seen_languages.add(Language.PYTHON)
198+
configs.append(LanguageConfig(config=normalized, config_path=pyproject, language=Language.PYTHON))
199+
except Exception:
200+
pass
201+
202+
if Language.JAVASCRIPT not in seen_languages:
203+
package_json = dir_path / "package.json"
204+
if package_json.exists():
205+
try:
206+
result = parse_package_json_config(package_json)
207+
if result is not None:
208+
config, path = result
209+
seen_languages.add(Language.JAVASCRIPT)
210+
configs.append(LanguageConfig(config=config, config_path=path, language=Language.JAVASCRIPT))
211+
except Exception:
212+
pass
213+
214+
if Language.JAVA not in seen_languages:
215+
if (
216+
(dir_path / "pom.xml").exists()
217+
or (dir_path / "build.gradle").exists()
218+
or (dir_path / "build.gradle.kts").exists()
219+
):
220+
try:
221+
java_config = _parse_java_config_for_dir(dir_path)
222+
if java_config is not None:
223+
seen_languages.add(Language.JAVA)
224+
configs.append(LanguageConfig(config=java_config, config_path=dir_path, language=Language.JAVA))
225+
except Exception:
226+
pass
227+
228+
229+
def find_all_config_files(start_dir: Path | None = None) -> list[LanguageConfig]:
230+
if start_dir is None:
231+
start_dir = Path.cwd()
232+
233+
configs: list[LanguageConfig] = []
234+
seen_languages: set[Language] = set()
235+
236+
# Walk upward from start_dir to filesystem root (closest config wins per language)
237+
dir_path = start_dir.resolve()
238+
while True:
239+
_check_dir_for_configs(dir_path, configs, seen_languages)
240+
241+
parent = dir_path.parent
242+
if parent == dir_path:
243+
break
244+
dir_path = parent
245+
246+
# Scan immediate subdirectories for monorepo language subprojects
247+
resolved_start = start_dir.resolve()
248+
try:
249+
subdirs = sorted(p for p in resolved_start.iterdir() if p.is_dir() and p.name not in _SUBDIR_SKIP)
250+
except OSError:
251+
subdirs = []
252+
for subdir in subdirs:
253+
_check_dir_for_configs(subdir, configs, seen_languages)
254+
255+
return configs
256+
257+
106258
def parse_config_file(
107259
config_file_path: Path | None = None, override_formatter_check: bool = False
108260
) -> tuple[dict[str, Any], Path]:
@@ -174,55 +326,13 @@ def parse_config_file(
174326
if config == {} and lsp_mode:
175327
return {}, config_file_path
176328

177-
# Preserve language field if present (important for JS/TS projects)
178-
# default values:
179-
path_keys = ["module-root", "tests-root", "benchmarks-root"]
180-
path_list_keys = ["ignore-paths"]
181-
str_keys = {"pytest-cmd": "pytest", "git-remote": "origin"}
182-
bool_keys = {
183-
"override-fixtures": False,
184-
"disable-telemetry": False,
185-
"disable-imports-sorting": False,
186-
"benchmark": False,
187-
}
188-
# Note: formatter-cmds defaults to empty list. For Python projects, black is typically
189-
# detected by the project detector. For Java projects, no formatter is supported yet.
190-
list_str_keys = {"formatter-cmds": []}
191-
192-
for key, default_value in str_keys.items():
193-
if key in config:
194-
config[key] = str(config[key])
195-
else:
196-
config[key] = default_value
197-
for key, default_value in bool_keys.items():
198-
if key in config:
199-
config[key] = bool(config[key])
200-
else:
201-
config[key] = default_value
202-
for key in path_keys:
203-
if key in config:
204-
config[key] = str((Path(config_file_path).parent / Path(config[key])).resolve())
205-
for key, default_value in list_str_keys.items():
206-
if key in config:
207-
config[key] = [str(cmd) for cmd in config[key]]
208-
else:
209-
config[key] = default_value
210-
211-
for key in path_list_keys:
212-
if key in config:
213-
config[key] = [str((Path(config_file_path).parent / path).resolve()) for path in config[key]]
214-
else:
215-
config[key] = []
329+
config = normalize_toml_config(config, config_file_path)
216330

217331
# see if this is happening during GitHub actions setup
218-
if config.get("formatter-cmds") and len(config.get("formatter-cmds")) > 0 and not override_formatter_check:
219-
assert config.get("formatter-cmds")[0] != "your-formatter $file", (
332+
if config.get("formatter_cmds") and len(config.get("formatter_cmds")) > 0 and not override_formatter_check:
333+
assert config.get("formatter_cmds")[0] != "your-formatter $file", (
220334
"The formatter command is not set correctly in pyproject.toml. Please set the "
221335
"formatter command in the 'formatter-cmds' key. More info - https://docs.codeflash.ai/configuration"
222336
)
223-
for key in list(config.keys()):
224-
if "-" in key:
225-
config[key.replace("-", "_")] = config[key]
226-
del config[key]
227337

228338
return config, config_file_path

0 commit comments

Comments
 (0)