fix(cli,bench): stabilize metrics mode and baseline path handling

orenlab · orenlab · commit 9acac52f9cec · 2026-04-15T13:11:23.000+05:00
- neutralize repo quality gates in benchmark runs
- resolve pyproject baseline paths from the analysis root
- treat --api-surface as a metrics-mode request
- refresh CLI regressions and targeted coverage tests
- document relative baseline path resolution
diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py
@@ -25,6 +25,30 @@
 from codeclone.baseline import current_python_tag
 
 BENCHMARK_SCHEMA_VERSION = "1.0"
+BENCHMARK_NEUTRAL_ARGS: tuple[str, ...] = (
+    "--no-fail-on-new",
+    "--no-fail-on-new-metrics",
+    "--no-fail-cycles",
+    "--no-fail-dead-code",
+    "--no-fail-on-typing-regression",
+    "--no-fail-on-docstring-regression",
+    "--no-fail-on-api-break",
+    "--no-fail-on-untested-hotspots",
+    "--fail-threshold",
+    "-1",
+    "--fail-complexity",
+    "-1",
+    "--fail-coupling",
+    "-1",
+    "--fail-cohesion",
+    "-1",
+    "--fail-health",
+    "-1",
+    "--min-typing-coverage",
+    "-1",
+    "--min-docstring-coverage",
+    "-1",
+)
 
 
 @dataclass(frozen=True)
@@ -139,6 +163,7 @@ def _run_cli_once(
         "-m",
         "codeclone.cli",
         str(target),
+        *BENCHMARK_NEUTRAL_ARGS,
         "--json",
         str(report_path),
         "--cache-path",
diff --git a/codeclone/_cli_runtime.py b/codeclone/_cli_runtime.py
@@ -101,6 +101,7 @@ def _metrics_flags_requested(args: _RuntimeArgs) -> bool:
         or args.fail_on_untested_hotspots
         or args.min_typing_coverage >= 0
         or args.min_docstring_coverage >= 0
+        or args.api_surface
         or args.update_metrics_baseline
         or bool(getattr(args, "coverage_xml", None))
     )
diff --git a/codeclone/cli.py b/codeclone/cli.py
@@ -1150,6 +1150,17 @@ def _main_impl() -> None:
     analysis_started_at_utc = _current_report_timestamp_utc()
     ap = build_parser(__version__)
 
+    def _resolve_runtime_path_arg(
+        *,
+        root_path: Path,
+        raw_path: str,
+        from_cli: bool,
+    ) -> Path:
+        candidate_path = Path(raw_path).expanduser()
+        if from_cli or candidate_path.is_absolute():
+            return candidate_path.resolve()
+        return (root_path / candidate_path).resolve()
+
     def _prepare_run_inputs() -> tuple[
         Namespace,
         Path,
@@ -1174,6 +1185,9 @@ def _prepare_run_inputs() -> tuple[
             or arg.startswith(("--cache-dir=", "--cache-path="))
             for arg in sys.argv
         )
+        baseline_path_from_args = any(
+            arg == "--baseline" or arg.startswith("--baseline=") for arg in sys.argv
+        )
         metrics_path_from_args = any(
             arg == "--metrics-baseline" or arg.startswith("--metrics-baseline=")
             for arg in sys.argv
@@ -1235,7 +1249,11 @@ def _prepare_run_inputs() -> tuple[
 
         baseline_arg_path = Path(args.baseline).expanduser()
         try:
-            baseline_path = baseline_arg_path.resolve()
+            baseline_path = _resolve_runtime_path_arg(
+                root_path=root_path,
+                raw_path=args.baseline,
+                from_cli=baseline_path_from_args,
+            )
             baseline_exists = baseline_path.exists()
         except OSError as exc:
             console.print(
@@ -1254,7 +1272,13 @@ def _prepare_run_inputs() -> tuple[
             args.metrics_baseline if metrics_path_overridden else args.baseline
         ).expanduser()
         try:
-            metrics_baseline_path = metrics_baseline_arg_path.resolve()
+            metrics_baseline_path = _resolve_runtime_path_arg(
+                root_path=root_path,
+                raw_path=(
+                    args.metrics_baseline if metrics_path_overridden else args.baseline
+                ),
+                from_cli=metrics_path_from_args,
+            )
             if metrics_baseline_path == baseline_path:
                 probe = _probe_metrics_baseline_section(metrics_baseline_path)
                 metrics_baseline_exists = probe.has_metrics_section
diff --git a/docs/book/04-config-and-defaults.md b/docs/book/04-config-and-defaults.md
@@ -166,6 +166,8 @@ Current-run coverage join config:
 
 Metrics baseline path selection contract:
 
+- Relative `baseline` / `metrics_baseline` paths coming from defaults or
+  `pyproject.toml` resolve from the analysis root.
 - If `--metrics-baseline` is explicitly set, that path is used.
 - If `metrics_baseline` in `pyproject.toml` differs from parser default, that
   configured path is used even without explicit CLI flag.
diff --git a/docs/book/09-cli.md b/docs/book/09-cli.md
@@ -108,6 +108,9 @@ Refs:
   `api_surface` data.
 - `--coverage` is a current-run external Cobertura input. It does not update or
   compare against `codeclone.baseline.json`.
+- Relative clone-baseline and metrics-baseline paths from defaults or
+  `pyproject.toml` resolve from the analysis root. Explicit CLI paths are used
+  as provided.
 - Invalid Cobertura XML is warning-only in normal runs: CLI prints
   `Coverage join ignored`, keeps exit `0`, and shows `Coverage` as unavailable
   in the normal `Metrics` block. It becomes a contract error only when
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
@@ -9,6 +9,7 @@
 import pytest
 
 from benchmarks.run_benchmark import (
+    BENCHMARK_NEUTRAL_ARGS,
     RunMeasurement,
     Scenario,
     _validate_inventory_sample,
@@ -43,6 +44,17 @@ def test_benchmark_inventory_validation_accepts_valid_cold_and_warm_samples() ->
     )
 
 
+def test_benchmark_neutral_args_disable_repo_quality_gates() -> None:
+    assert "--no-fail-on-new" in BENCHMARK_NEUTRAL_ARGS
+    assert "--no-fail-on-new-metrics" in BENCHMARK_NEUTRAL_ARGS
+    assert "--no-fail-cycles" in BENCHMARK_NEUTRAL_ARGS
+    assert "--no-fail-dead-code" in BENCHMARK_NEUTRAL_ARGS
+    assert "--fail-health" in BENCHMARK_NEUTRAL_ARGS
+    assert "--min-typing-coverage" in BENCHMARK_NEUTRAL_ARGS
+    assert "--min-docstring-coverage" in BENCHMARK_NEUTRAL_ARGS
+    assert "--skip-metrics" not in BENCHMARK_NEUTRAL_ARGS
+
+
 @pytest.mark.parametrize(
     ("scenario", "measurement", "message"),
     (
diff --git a/tests/test_cli_config.py b/tests/test_cli_config.py
@@ -169,6 +169,11 @@ def test_validate_config_value_accepts_expected_types(
         ("min_loc", True, "expected int"),
         ("baseline", 1, "expected str"),
         ("golden_fixture_paths", "tests/fixtures/golden_*", "expected list\\[str\\]"),
+        (
+            "golden_fixture_paths",
+            ["tests/fixtures/golden_*", 1],
+            "expected list\\[str\\]",
+        ),
         ("golden_fixture_paths", ["pkg/*"], "must target tests/"),
     ],
 )
diff --git a/tests/test_cli_inprocess.py b/tests/test_cli_inprocess.py
@@ -3367,9 +3367,9 @@ def test_cli_summary_format_stable(
     out = capsys.readouterr().out
     assert "Summary" in out
     assert out.count("Summary") == 1
-    assert "Metrics" in out
-    assert "Adoption" in out
-    assert "Overloaded" in out
+    assert "Metrics" not in out
+    assert "Adoption" not in out
+    assert "Overloaded" not in out
     assert "callables" in out
     assert "Files parsed" not in out
     assert "Input" not in out
@@ -3383,6 +3383,39 @@ def test_cli_summary_format_stable(
     assert _summary_metric(out, "New vs baseline") >= 0
 
 
+def test_cli_summary_with_metrics_baseline_shows_metrics_section(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    src = tmp_path / "a.py"
+    metrics_baseline_path = tmp_path / "metrics-baseline.json"
+    src.write_text("def f(value: int) -> int:\n    return value\n", "utf-8")
+    _patch_parallel(monkeypatch)
+    _run_main(
+        monkeypatch,
+        [
+            str(tmp_path),
+            "--no-progress",
+            "--metrics-baseline",
+            str(metrics_baseline_path),
+            "--update-metrics-baseline",
+        ],
+    )
+    _ = capsys.readouterr()
+    _run_main(
+        monkeypatch,
+        [
+            str(tmp_path),
+            "--no-progress",
+            "--metrics-baseline",
+            str(metrics_baseline_path),
+        ],
+    )
+    out = capsys.readouterr().out
+    assert_contains_all(out, "Metrics", "Adoption", "Overloaded")
+
+
 def test_cli_summary_with_api_surface_shows_public_api_line(
     tmp_path: Path,
     monkeypatch: pytest.MonkeyPatch,
@@ -3436,10 +3469,7 @@ def test_cli_ci_summary_includes_adoption_and_public_api_lines(
         ],
     )
     out = capsys.readouterr().out
-    assert "Adoption" in out
-    assert "Public API" in out
-    assert "symbols=" in out
-    assert "docstrings=" in out
+    assert_contains_all(out, "Adoption", "Public API", "symbols=", "docstrings=")
 
 
 def test_cli_pyproject_golden_fixture_paths_exclude_fixture_clone_groups(
@@ -3450,6 +3480,7 @@ def test_cli_pyproject_golden_fixture_paths_exclude_fixture_clone_groups(
     fixtures_dir.mkdir(parents=True)
     _write_duplicate_function_module(fixtures_dir, "a.py")
     _write_duplicate_function_module(fixtures_dir, "b.py")
+    _write_current_python_baseline(tmp_path / "codeclone.baseline.json")
     report_path = tmp_path / "report.json"
     (tmp_path / "pyproject.toml").write_text(
         """
diff --git a/tests/test_cli_unit.py b/tests/test_cli_unit.py
@@ -1379,6 +1379,34 @@ def test_configure_metrics_mode_does_not_force_api_surface_for_baseline_update()
     assert args.api_surface is False
 
 
+def test_configure_metrics_mode_forces_api_surface_for_api_break_gate() -> None:
+    args = Namespace(
+        skip_metrics=False,
+        fail_complexity=-1,
+        fail_coupling=-1,
+        fail_cohesion=-1,
+        fail_cycles=False,
+        fail_dead_code=False,
+        fail_health=-1,
+        fail_on_new_metrics=False,
+        fail_on_typing_regression=False,
+        fail_on_docstring_regression=False,
+        fail_on_api_break=True,
+        fail_on_untested_hotspots=False,
+        min_typing_coverage=-1,
+        min_docstring_coverage=-1,
+        update_metrics_baseline=False,
+        skip_dead_code=False,
+        skip_dependencies=False,
+        api_surface=False,
+        coverage_xml=None,
+    )
+
+    cli._configure_metrics_mode(args=args, metrics_baseline_exists=True)
+
+    assert args.api_surface is True
+
+
 def test_probe_metrics_baseline_section_for_non_object_payload(tmp_path: Path) -> None:
     path = tmp_path / "baseline.json"
     path.write_text("[]", "utf-8")
diff --git a/tests/test_html_report_helpers.py b/tests/test_html_report_helpers.py
@@ -546,3 +546,35 @@ def test_meta_snippet_and_assembly_helpers_cover_empty_optional_paths(
         report_document={},
     )
     assert '[data-theme="light"] .codebox span' not in html_without_light_rules
+
+
+def test_render_meta_panel_covers_status_tones_and_runtime_mismatch() -> None:
+    meta_html = render_meta_panel(
+        cast(
+            Any,
+            SimpleNamespace(
+                meta={
+                    "python_tag": "cp313",
+                    "baseline_python_tag": "cp312",
+                    "cache_status": "stale",
+                    "metrics_baseline_loaded": True,
+                    "metrics_baseline_payload_sha256_verified": True,
+                },
+                baseline_meta={"status": "FAILED"},
+                cache_meta={},
+                metrics_baseline_meta={},
+                runtime_meta={},
+                integrity_map={},
+                report_schema_version="2.8",
+                report_generated_at="2026-04-15T12:00:00Z",
+            ),
+        )
+    )
+    assert "meta-status--err" in meta_html
+    assert ">FAILED<" in meta_html
+    assert "meta-status--neutral" in meta_html
+    assert ">stale<" in meta_html
+    assert "prov-match--mismatch" in meta_html
+    assert "differs from runtime (cp313)" in meta_html
+    assert '<span class="prov-badge-val">verified</span>' in meta_html
+    assert '<span class="prov-badge-lbl">Metrics baseline</span>' in meta_html

Original file line number	Diff line number	Diff line change
`@@ -101,6 +101,7 @@ def _metrics_flags_requested(args: _RuntimeArgs) -> bool:`
`101`	`101`	`or args.fail_on_untested_hotspots`
`102`	`102`	`or args.min_typing_coverage >= 0`
`103`	`103`	`or args.min_docstring_coverage >= 0`
	`104`	`+ or args.api_surface`
`104`	`105`	`or args.update_metrics_baseline`
`105`	`106`	`or bool(getattr(args, "coverage_xml", None))`
`106`	`107`	`)`