From fed40abbf6420224877beac8fc5edfd245335ab7 Mon Sep 17 00:00:00 2001 From: "sds.rs" Date: Wed, 10 Jun 2026 02:25:07 +0800 Subject: [PATCH] =?UTF-8?q?feat(metrics):=20deny=E2=86=92use=20conversion?= =?UTF-8?q?=20measurement=20+=20honest=20dark-metric=20visibility?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: make the recommend→use conversion metric honest - stats/health-check surface DARK state instead of silently skipping (absent|empty|live) - stop plugin_e2e metrics leaking into the real usage.jsonl (spawn_server .git marker) - purge of 8 historical test-leak entries done in gitignored data (not in this commit) feat: measure whether the DENY stick actually converts - per-session deny→use funnel via window-join (SessionMetrics.started_at + recs field) - doctor.js stale-path detection (present-but-old-version hook → metric silently dark) - spec: docs/ADOPTION-DENY-MEASUREMENT-PLAN.md (gitignored, local) Co-Authored-By: Claude Opus 4.8 (1M context) --- claude-plugin/scripts/doctor.js | 37 ++++++-- claude-plugin/scripts/doctor.test.js | 37 +++++++- src/cli.rs | 132 ++++++++++++++++++++++++++- src/mcp/metrics.rs | 67 +++++++++++++- tests/cli_e2e.rs | 65 +++++++++++++ tests/plugin_e2e.rs | 45 +++++++++ 6 files changed, 374 insertions(+), 9 deletions(-) diff --git a/claude-plugin/scripts/doctor.js b/claude-plugin/scripts/doctor.js index 19127aa..7e1288c 100644 --- a/claude-plugin/scripts/doctor.js +++ b/claude-plugin/scripts/doctor.js @@ -206,19 +206,28 @@ function runDiagnostics() { try { const settings = readJson(settingsPath()) || {}; const cov = surveyHookCoverage(settings); - if (cov.missing.length === 0) { + if (cov.missing.length === 0 && cov.stale.length === 0) { results.push({ name: 'Hook coverage', status: 'ok', detail: `settings.json has all ${cov.expected.length} expected entries`, }); - } else { + } else if (cov.missing.length > 0) { results.push({ name: 'Hook coverage', status: 'warn', detail: `missing ${cov.missing.length}/${cov.expected.length} settings.json entries: ${cov.missing.join(', ')}`, fixId: 'missing-hooks-in-settings', }); + } else { + // Present but stale path(s) — re-register rewrites them to the current + // version. A stale PreToolUse hook can keep the conversion metric dark. + results.push({ + name: 'Hook coverage', + status: 'warn', + detail: `${cov.stale.length}/${cov.expected.length} settings.json entries point at a stale path (re-register to current version): ${cov.stale.join(', ')}`, + fixId: 'missing-hooks-in-settings', + }); } } catch { /* probe failed — skip */ } @@ -230,26 +239,42 @@ function runDiagnostics() { function surveyHookCoverage(settings) { const desired = buildSettingsHookEntries(); const expected = []; + const desiredCmd = {}; // key -> command string we would write now for (const [event, entries] of Object.entries(desired)) { for (const e of entries) { - expected.push(`${event}:${e.matcher || '*'}`); + const key = `${event}:${e.matcher || '*'}`; + expected.push(key); + desiredCmd[key] = e.hooks && e.hooks[0] && e.hooks[0].command; } } const present = new Set(); + const presentCmd = {}; // key -> command currently registered if (settings && settings.hooks) { for (const [event, entries] of Object.entries(settings.hooks)) { if (!Array.isArray(entries)) continue; for (const entry of entries) { if (isOurHookEntry(entry)) { - present.add(`${event}:${entry.matcher || '*'}`); + const key = `${event}:${entry.matcher || '*'}`; + present.add(key); + if (entry.hooks && entry.hooks[0] && entry.hooks[0].command) { + presentCmd[key] = entry.hooks[0].command; + } } } } } const missing = expected.filter(k => !present.has(k)); - return { expected, present: [...present], missing }; + // Stale = present but the registered command no longer matches what we'd write + // now (points at an old plugin-cache version dir / moved path). A stale path can + // run pre-recordRecommendation hook code, so the hook fires but the conversion + // metric stays dark — invisible to a present/absent check. This is the + // 0.45.1-registered-while-0.45.4-active case the RCA surfaced. + const stale = expected.filter(k => + present.has(k) && desiredCmd[k] && presentCmd[k] && presentCmd[k] !== desiredCmd[k] + ); + return { expected, present: [...present], missing, stale }; } // ── Report Formatting ───────────────────────────────────── @@ -449,7 +474,7 @@ function runDoctor(opts = {}) { return { results, issueCount: issues.length }; } -module.exports = { runDiagnostics, formatReport, runRepairs, runDoctor }; +module.exports = { runDiagnostics, formatReport, runRepairs, runDoctor, surveyHookCoverage }; if (require.main === module) { const args = process.argv.slice(2); diff --git a/claude-plugin/scripts/doctor.test.js b/claude-plugin/scripts/doctor.test.js index c52c666..9487388 100644 --- a/claude-plugin/scripts/doctor.test.js +++ b/claude-plugin/scripts/doctor.test.js @@ -2,7 +2,18 @@ const test = require('node:test'); const assert = require('node:assert/strict'); -const { runDiagnostics, formatReport } = require('./doctor'); +const { runDiagnostics, formatReport, surveyHookCoverage } = require('./doctor'); +const { buildSettingsHookEntries } = require('./lifecycle'); + +// Build a settings.json whose hooks exactly mirror what we'd register now. +function settingsWithCurrentHooks() { + const desired = buildSettingsHookEntries(); + const hooks = {}; + for (const [event, entries] of Object.entries(desired)) { + hooks[event] = entries.map(e => JSON.parse(JSON.stringify(e))); + } + return { hooks }; +} test('runDiagnostics returns an array of check results', () => { const results = runDiagnostics(); @@ -45,3 +56,27 @@ test('formatReport shows all-clear when no problems', () => { const output = formatReport(results); assert.ok(output.includes('All checks passed') || output.includes('0 issues')); }); + +test('surveyHookCoverage reports clean when all entries are current', () => { + const cov = surveyHookCoverage(settingsWithCurrentHooks()); + assert.equal(cov.missing.length, 0, 'no missing entries'); + assert.equal(cov.stale.length, 0, 'no stale entries'); +}); + +test('surveyHookCoverage flags a present-but-stale hook path', () => { + const settings = settingsWithCurrentHooks(); + // Repoint one PreToolUse entry at an old plugin-cache version dir — present, + // recognized as ours (description unchanged), but command no longer current. + const bash = settings.hooks.PreToolUse.find(e => e.matcher === 'Bash'); + bash.hooks[0].command = bash.hooks[0].command.replace('/scripts/', '/0.0.1-old/scripts/'); + const cov = surveyHookCoverage(settings); + assert.equal(cov.missing.length, 0, 'entry is present, not missing'); + assert.ok(cov.stale.includes('PreToolUse:Bash'), + `stale Bash path should be flagged; got stale=${JSON.stringify(cov.stale)}`); +}); + +test('surveyHookCoverage flags missing entries when settings empty', () => { + const cov = surveyHookCoverage({}); + assert.ok(cov.missing.length === cov.expected.length, 'all expected entries missing'); + assert.equal(cov.stale.length, 0, 'nothing present to be stale'); +}); diff --git a/src/cli.rs b/src/cli.rs index 51f9988..2c6e9d8 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -551,6 +551,23 @@ impl HealthCheckArgs { } } +/// Recording-side state of the recommend→use conversion metric, surfaced by +/// `stats` and `health-check` so a dark metric is a visible signal rather than +/// silence. `"absent"` = `recommendations.jsonl` missing (the PreToolUse hooks +/// that record recommendations are not active in this project — e.g. it runs a +/// dev `.mcp.json` server with the marketplace plugin disabled, so the metric is +/// structurally dark); `"empty"` = file present, no recommendations yet; +/// `"live"` = recommendations recorded. +pub fn recommendation_metric_state(project_root: &Path) -> &'static str { + let p = project_root.join(CODE_GRAPH_DIR).join("recommendations.jsonl"); + match std::fs::read_to_string(&p) { + Err(_) => "absent", + Ok(c) => { + if aggregate_recommendations_jsonl(&c).total > 0 { "live" } else { "empty" } + } + } +} + /// Run health check and print status, including index freshness. pub fn cmd_health_check(project_root: &Path, format: &str) -> Result<()> { // JSON callers (doctor.js, scripts, MCP UIs) need a parseable response @@ -678,6 +695,7 @@ pub fn cmd_health_check(project_root: &Path, format: &str) -> Result<()> { "embedding_status": embedding_status, "model_available": model_available, "snapshot": snapshot_block, + "conversion_metric": recommendation_metric_state(project_root), }); if let Some(ts) = status.last_indexed_at { json["last_indexed_at"] = serde_json::json!(ts); @@ -706,6 +724,11 @@ pub fn cmd_health_check(project_root: &Path, format: &str) -> Result<()> { status.nodes_count, status.edges_count, status.files_count, age_info ); println!("Snapshot: {}", snapshot_status); + println!("Conversion metric: {}", match recommendation_metric_state(project_root) { + "live" => "live (recommendations recorded)", + "empty" => "active, no recommendations recorded yet", + _ => "DARK (no recommendations.jsonl — PreToolUse hooks not recording here)", + }); } else if !schema_ok { eprintln!( "UNHEALTHY: schema version mismatch (got {}, expected {})", @@ -746,6 +769,11 @@ pub struct UsageSummary { pub versions: std::collections::BTreeSet, pub first_ts: Option, pub last_ts: Option, + /// Recommend→use funnel (per-session, window-joined from `recs` field). + pub sessions_with_deny: u64, + pub sessions_with_deny_and_cg: u64, + pub sessions_with_hint: u64, + pub sessions_with_hint_and_cg: u64, } impl UsageSummary { @@ -754,6 +782,27 @@ impl UsageSummary { } } +/// Code-understanding cg tools the DENY hook steers grep toward. Housekeeping +/// tools (start/stop_watch, get_index_status, rebuild_index) are excluded so the +/// funnel measures real "used cg instead of grep" substitution, not background +/// bookkeeping. Kept in sync by hand with the `src/mcp/tools.rs` registry. +const CG_QUERY_TOOLS: &[&str] = &[ + "get_call_graph", "get_ast_node", "module_overview", "semantic_code_search", + "ast_search", "find_references", "project_map", "impact_analysis", + "trace_http_chain", "dependency_graph", "find_similar_code", "find_dead_code", + "find_http_route", "read_snippet", +]; + +/// Per-session funnel conversion = `num/denom` rounded to 2 decimals, or JSON +/// `null` when the bucket is empty (avoids a misleading 0.0 for "no data"). +fn session_conversion(num: u64, denom: u64) -> serde_json::Value { + if denom == 0 { + serde_json::Value::Null + } else { + serde_json::json!((num as f64 / denom as f64 * 100.0).round() / 100.0) + } +} + /// Parse and aggregate `.code-graph/usage.jsonl` content. /// Pure function: no IO, no panics — malformed lines are counted, not fatal. /// `last_n`: if Some, keep only the last N records before aggregating. @@ -791,6 +840,10 @@ pub fn aggregate_usage_jsonl(content: &str, last_n: Option) -> UsageSumma versions: std::collections::BTreeSet::new(), first_ts: None, last_ts: None, + sessions_with_deny: 0, + sessions_with_deny_and_cg: 0, + sessions_with_hint: 0, + sessions_with_hint_and_cg: 0, }; for rec in &records { @@ -831,6 +884,25 @@ pub fn aggregate_usage_jsonl(content: &str, last_n: Option) -> UsageSumma summary.incr_count += idx.get("incr").and_then(|v| v.as_u64()).unwrap_or(0); summary.files_indexed += idx.get("files").and_then(|v| v.as_u64()).unwrap_or(0); } + // Recommend→use funnel: per-session, did a session that saw a deny/hint + // (window-joined into the `recs` field at flush) also call a cg query tool? + let used_cg = rec.get("tools").and_then(|v| v.as_object()).is_some_and(|tools| { + CG_QUERY_TOOLS.iter().any(|t| { + tools.get(*t).and_then(|s| s.get("n")).and_then(|n| n.as_u64()).unwrap_or(0) > 0 + }) + }); + if let Some(recs) = rec.get("recs") { + let deny = recs.get("deny").and_then(|v| v.as_u64()).unwrap_or(0); + let hint = recs.get("hint").and_then(|v| v.as_u64()).unwrap_or(0); + if deny > 0 { + summary.sessions_with_deny += 1; + if used_cg { summary.sessions_with_deny_and_cg += 1; } + } + if hint > 0 { + summary.sessions_with_hint += 1; + if used_cg { summary.sessions_with_hint_and_cg += 1; } + } + } } summary } @@ -940,9 +1012,14 @@ pub fn cmd_stats(project_root: &Path, args: StatsArgs) -> Result<()> { // has no per-session boundary, so it is aggregated whole (last_n applies only // to usage sessions). Absent file → empty (default) summary. let rec_path = project_root.join(CODE_GRAPH_DIR).join("recommendations.jsonl"); + let rec_exists = rec_path.exists(); let recs = std::fs::read_to_string(&rec_path).ok() .map(|c| aggregate_recommendations_jsonl(&c)) .unwrap_or_default(); + // Recording-side state of the conversion metric, made explicit so a dark + // metric (file absent → PreToolUse hooks not recording here) is never + // silently indistinguishable from "feature absent" or "no data yet". + let rec_state = if recs.total > 0 { "live" } else if rec_exists { "empty" } else { "absent" }; if summary.sessions == 0 { if json_mode { @@ -988,6 +1065,7 @@ pub fn cmd_stats(project_root: &Path, args: StatsArgs) -> Result<()> { "files_indexed": summary.files_indexed, }, "recommendations": { + "state": rec_state, "total": recs.total, "by_action": recs.by_action.iter().map(|(k, v)| (k.clone(), serde_json::json!(v))) .collect::>(), @@ -997,6 +1075,15 @@ pub fn cmd_stats(project_root: &Path, args: StatsArgs) -> Result<()> { "conversion_ratio": if recs.total > 0 { (summary.total_tool_calls() as f64 / recs.total as f64 * 100.0).round() / 100.0 } else { 0.0 }, + // Per-session deny→use / hint→use funnel (window-joined attribution). + "funnel": { + "deny_sessions": summary.sessions_with_deny, + "deny_then_cg": summary.sessions_with_deny_and_cg, + "deny_conversion": session_conversion(summary.sessions_with_deny_and_cg, summary.sessions_with_deny), + "hint_sessions": summary.sessions_with_hint, + "hint_then_cg": summary.sessions_with_hint_and_cg, + "hint_conversion": session_conversion(summary.sessions_with_hint_and_cg, summary.sessions_with_hint), + }, }, })); } else { @@ -1046,8 +1133,8 @@ pub fn cmd_stats(project_root: &Path, args: StatsArgs) -> Result<()> { summary.full_index_count, full_part, summary.incr_count, summary.files_indexed); } + println!(); if recs.total > 0 { - println!(); let actions: Vec = recs.by_action.iter().map(|(k, v)| format!("{v} {k}")).collect(); let ratio = summary.total_tool_calls() as f64 / recs.total as f64; println!("Recommendations: {} emitted ({})", recs.total, actions.join(", ")); @@ -1055,6 +1142,29 @@ pub fn cmd_stats(project_root: &Path, args: StatsArgs) -> Result<()> { // cg tool calls vs hook recommendations. ≪1 = recommendations ignored. println!("Conversion (proxy): {} cg tool calls / {} recommendations = {ratio:.2}", summary.total_tool_calls(), recs.total); + } else if rec_exists { + // File present but empty: hooks are wired and recording, just no + // recommendation has fired yet. + println!("Recommendations: 0 recorded (PreToolUse hooks active; conversion metric live, no data yet)"); + } else { + // No file at all: the recording hooks are not active in this project + // (e.g. a dev `.mcp.json` server with the marketplace plugin's + // PreToolUse hooks disabled). Surface the dark state instead of + // printing nothing — silence reads as "feature absent". + println!("Conversion metric: DARK — no recommendations.jsonl. PreToolUse hooks are not"); + println!(" recording here, so recommend→use conversion cannot be measured in this project."); + } + // Per-session funnel: of sessions that saw a deny/hint, how many also called + // a cg query tool. This is the deny→use attribution the aggregate ratio can't give. + if summary.sessions_with_deny > 0 { + let pct = (summary.sessions_with_deny_and_cg as f64 / summary.sessions_with_deny as f64 * 100.0).round() as u64; + println!("Deny→use: {}/{} deny-sessions also called cg = {}%", + summary.sessions_with_deny_and_cg, summary.sessions_with_deny, pct); + } + if summary.sessions_with_hint > 0 { + let pct = (summary.sessions_with_hint_and_cg as f64 / summary.sessions_with_hint as f64 * 100.0).round() as u64; + println!("Hint→use: {}/{} hint-sessions also called cg = {}%", + summary.sessions_with_hint_and_cg, summary.sessions_with_hint, pct); } } @@ -4214,6 +4324,26 @@ mod tests { assert_eq!(s.last_ts.as_deref(), Some("2026-04-20T10:00:00Z")); } + #[test] + fn test_aggregate_funnel_deny_and_hint_to_use() { + // s1: deny + called cg (converted). s2: deny + NO cg (not converted). + // s3: hint + called cg. s4: no recs (ignored by funnel). s5: deny but only + // a housekeeping tool (get_index_status) → NOT counted as cg use. + let s1 = r#"{"ts":"2026-06-10T10:00:00Z","v":"0.45.4","tools":{"get_call_graph":{"n":1,"ms":5,"err":0,"max_ms":5}},"recs":{"deny":2,"hint":0}}"#; + let s2 = r#"{"ts":"2026-06-10T11:00:00Z","v":"0.45.4","tools":{},"recs":{"deny":1,"hint":1}}"#; + let s3 = r#"{"ts":"2026-06-10T12:00:00Z","v":"0.45.4","tools":{"find_references":{"n":3,"ms":9,"err":0,"max_ms":4}},"recs":{"deny":0,"hint":1}}"#; + let s4 = r#"{"ts":"2026-06-10T13:00:00Z","v":"0.45.4","tools":{"get_call_graph":{"n":1,"ms":5,"err":0,"max_ms":5}}}"#; + let s5 = r#"{"ts":"2026-06-10T14:00:00Z","v":"0.45.4","tools":{"get_index_status":{"n":1,"ms":0,"err":0,"max_ms":0}},"recs":{"deny":1,"hint":0}}"#; + let content = format!("{s1}\n{s2}\n{s3}\n{s4}\n{s5}\n"); + let s = aggregate_usage_jsonl(&content, None); + // deny sessions: s1, s2, s5 = 3; of those, only s1 called a cg query tool. + assert_eq!(s.sessions_with_deny, 3, "s1+s2+s5 saw a deny"); + assert_eq!(s.sessions_with_deny_and_cg, 1, "only s1 called a cg query tool (s5's get_index_status is housekeeping)"); + // hint sessions: s2, s3 = 2; of those, only s3 called cg. + assert_eq!(s.sessions_with_hint, 2); + assert_eq!(s.sessions_with_hint_and_cg, 1); + } + #[test] fn test_version_sort_key_is_numeric_not_lexical() { // Regression: the stats `versions:` list is stored in a BTreeSet (lexical), diff --git a/src/mcp/metrics.rs b/src/mcp/metrics.rs index 490efdd..13c9440 100644 --- a/src/mcp/metrics.rs +++ b/src/mcp/metrics.rs @@ -88,6 +88,10 @@ pub struct SearchMetrics { /// Lightweight session metrics — append-only JSONL flush at session end. pub struct SessionMetrics { start: Instant, + /// Wallclock session-start (ISO 8601 UTC), captured at construction. Used to + /// window-join PreToolUse recommendations (`recommendations.jsonl`) that fired + /// during this session, so the recommend→use funnel can attribute per-session. + started_at: String, tools: HashMap, search: SearchMetrics, pub full_index_ms: Option, @@ -105,6 +109,7 @@ impl SessionMetrics { pub fn new() -> Self { Self { start: Instant::now(), + started_at: iso8601_now(), tools: HashMap::new(), search: SearchMetrics { total_queries: 0, @@ -231,7 +236,19 @@ impl SessionMetrics { // CODE_GRAPH_DOGFOOD=1 tags this session as dev self-test traffic so it // can be filtered out of real-adoption metrics (audit §7). let dogfood = std::env::var("CODE_GRAPH_DOGFOOD").ok().as_deref() == Some("1"); - let record = self.build_record(version, dogfood); + let mut record = self.build_record(version, dogfood); + + // Window-join PreToolUse recommendations that fired during this session so + // the recommend→use funnel can attribute per-session (P#5211). Additive + + // only-when-non-zero: older readers ignore it, success lines stay compact. + if let Some(dir) = usage_path.parent() { + if let Ok(content) = std::fs::read_to_string(dir.join("recommendations.jsonl")) { + let (deny, hint) = count_recs_in_window(&content, &self.started_at); + if deny > 0 || hint > 0 { + record["recs"] = serde_json::json!({ "deny": deny, "hint": hint }); + } + } + } let line = match serde_json::to_string(&record) { Ok(s) => s, @@ -288,6 +305,34 @@ impl SessionMetrics { } } +/// Count PreToolUse recommendation events (`recommendations.jsonl` content) whose +/// `ts` falls inside the current session window `[started_at, ∞)`. Returns +/// `(deny, hint)`. Pure: ISO-8601 UTC strings compare lexicographically at second +/// granularity (a sub-second boundary event may be off by one — accepted per spec). +pub(crate) fn count_recs_in_window(rec_content: &str, started_at: &str) -> (u64, u64) { + let mut deny = 0u64; + let mut hint = 0u64; + for line in rec_content.lines() { + let line = line.trim(); + if line.is_empty() { + continue; + } + let Ok(v) = serde_json::from_str::(line) else { + continue; + }; + let ts = v.get("ts").and_then(|t| t.as_str()).unwrap_or(""); + if ts < started_at { + continue; + } + match v.get("action").and_then(|a| a.as_str()) { + Some("deny") => deny += 1, + Some("hint") => hint += 1, + _ => {} + } + } + (deny, hint) +} + /// Generate an ISO 8601 timestamp from SystemTime (no chrono dependency). fn iso8601_now() -> String { use std::time::SystemTime; @@ -343,6 +388,26 @@ mod tests { assert!(m.full_index_ms.is_none()); } + #[test] + fn test_count_recs_in_window() { + let content = "\ +{\"ts\":\"2026-06-10T00:00:00Z\",\"hook\":\"grep\",\"action\":\"deny\"} +{\"ts\":\"2026-06-10T01:00:00Z\",\"hook\":\"grep\",\"action\":\"deny\"} +{\"ts\":\"2026-06-10T01:00:05Z\",\"hook\":\"read\",\"action\":\"hint\"} +{\"ts\":\"2026-06-10T01:00:09Z\",\"hook\":\"grep\",\"action\":\"hint\"} +not json +{\"ts\":\"2026-06-10T02:00:00Z\",\"hook\":\"grep\",\"action\":\"deny\"} +"; + // Window starts at 01:00:00 → the 00:00:00 deny is excluded; 2 denies + 2 hints remain. + let (deny, hint) = count_recs_in_window(content, "2026-06-10T01:00:00Z"); + assert_eq!((deny, hint), (2, 2), "in-window deny/hint count, pre-window excluded, malformed skipped"); + + // Window after everything → nothing in range. + assert_eq!(count_recs_in_window(content, "2026-06-10T03:00:00Z"), (0, 0)); + // Empty content → zero, no panic. + assert_eq!(count_recs_in_window("", "2026-06-10T01:00:00Z"), (0, 0)); + } + #[test] fn test_record_tool_call_basic() { let mut m = SessionMetrics::new(); diff --git a/tests/cli_e2e.rs b/tests/cli_e2e.rs index abef4e4..56341c7 100644 --- a/tests/cli_e2e.rs +++ b/tests/cli_e2e.rs @@ -733,6 +733,71 @@ fn test_cli_stats_unknown_flag_errors() { "clap should name the unknown flag; got: {stderr:?}"); } +// Dark-metric visibility: when usage data is present but recommendations.jsonl +// is absent, `stats` must SAY so (the recording hooks aren't active here) rather +// than silently skipping the block — that silence is what hid the dark metric. +#[test] +fn test_cli_stats_recommendations_dark_when_absent() { + let project = setup_indexed_project(); + // One real session so stats reaches the conversion block (sessions==0 bails). + std::fs::write( + project.path().join(code_graph_mcp::domain::CODE_GRAPH_DIR).join("usage.jsonl"), + "{\"ts\":\"2026-06-01T00:00:00Z\",\"v\":\"0.45.4\",\"tools\":{\"get_call_graph\":{\"n\":1,\"ms\":5,\"err\":0,\"max_ms\":5}}}\n", + ).unwrap(); + // No recommendations.jsonl written → dark. + let (stdout, _, code) = run_cli(&project, &["stats"]); + assert_eq!(code, 0); + assert!(stdout.contains("DARK") && stdout.contains("recommendations.jsonl"), + "stats must surface the dark conversion metric when recommendations.jsonl is absent; got: {stdout:?}"); + + let (jstdout, _, jcode) = run_cli(&project, &["stats", "--json"]); + assert_eq!(jcode, 0); + let v: serde_json::Value = serde_json::from_str(jstdout.trim()).unwrap(); + assert_eq!(v["recommendations"]["state"], "absent", + "JSON stats must mark recommendations.state=absent; got: {jstdout:?}"); +} + +#[test] +fn test_cli_stats_recommendations_empty_distinct_from_absent() { + let project = setup_indexed_project(); + let cg = project.path().join(code_graph_mcp::domain::CODE_GRAPH_DIR); + std::fs::write( + cg.join("usage.jsonl"), + "{\"ts\":\"2026-06-01T00:00:00Z\",\"v\":\"0.45.4\",\"tools\":{\"get_call_graph\":{\"n\":1,\"ms\":5,\"err\":0,\"max_ms\":5}}}\n", + ).unwrap(); + // Present but empty → "live but no data", NOT dark. + std::fs::write(cg.join("recommendations.jsonl"), "").unwrap(); + let (jstdout, _, jcode) = run_cli(&project, &["stats", "--json"]); + assert_eq!(jcode, 0); + let v: serde_json::Value = serde_json::from_str(jstdout.trim()).unwrap(); + assert_eq!(v["recommendations"]["state"], "empty", + "an empty recommendations.jsonl is 'empty', distinct from 'absent'; got: {jstdout:?}"); +} + +// Deny→use funnel: stats must print the per-session attribution line when usage +// records carry the window-joined `recs` field. +#[test] +fn test_cli_stats_deny_to_use_funnel() { + let project = setup_indexed_project(); + let cg = project.path().join(code_graph_mcp::domain::CODE_GRAPH_DIR); + // Two deny-sessions, one of which called a cg query tool → 1/2 = 50%. + let s1 = "{\"ts\":\"2026-06-10T10:00:00Z\",\"v\":\"0.45.4\",\"tools\":{\"get_call_graph\":{\"n\":1,\"ms\":5,\"err\":0,\"max_ms\":5}},\"recs\":{\"deny\":1,\"hint\":0}}"; + let s2 = "{\"ts\":\"2026-06-10T11:00:00Z\",\"v\":\"0.45.4\",\"tools\":{},\"recs\":{\"deny\":1,\"hint\":0}}"; + std::fs::write(cg.join("usage.jsonl"), format!("{s1}\n{s2}\n")).unwrap(); + let (stdout, _, code) = run_cli(&project, &["stats"]); + assert_eq!(code, 0); + assert!(stdout.contains("Deny→use: 1/2 deny-sessions also called cg = 50%"), + "stats must print the deny→use funnel; got: {stdout:?}"); + + let (jstdout, _, jcode) = run_cli(&project, &["stats", "--json"]); + assert_eq!(jcode, 0); + let v: serde_json::Value = serde_json::from_str(jstdout.trim()).unwrap(); + let funnel = &v["recommendations"]["funnel"]; + assert_eq!(funnel["deny_sessions"], 2); + assert_eq!(funnel["deny_then_cg"], 1); + assert_eq!(funnel["deny_conversion"], 0.5); +} + // ============================================================ // benchmark (clap-migrated, audit #4) — contract lock // ============================================================ diff --git a/tests/plugin_e2e.rs b/tests/plugin_e2e.rs index dcc5023..17ed86d 100644 --- a/tests/plugin_e2e.rs +++ b/tests/plugin_e2e.rs @@ -22,6 +22,11 @@ fn spawn_server(cwd: &std::path::Path) -> std::process::Child { cwd.join("Cargo.toml"), "[package]\nname = \"e2e-fixture\"\nversion = \"0.0.0\"\nedition = \"2021\"\n", ); + // A `.git` marker anchors `cli::resolve_project_root_from` to this fixture dir + // so the server's metrics flush stays local. Without it, a fixture spawned + // under a real repo (e.g. `target/`, or any TMPDIR nested in a checkout) + // walks up to the ancestor `.git` and pollutes the real `usage.jsonl`. + let _ = std::fs::create_dir_all(cwd.join(".git")); Command::new(binary_path()) .current_dir(cwd) .stdin(Stdio::piped()) @@ -331,3 +336,43 @@ fn test_stdio_unknown_tool() { drop(stdin); let _ = child.wait(); } + +/// Regression: a server spawned in a project dir that lives UNDER another git +/// repo must flush its metrics into ITS OWN `.code-graph`, never walk up to the +/// ancestor repo's `usage.jsonl`. Before `spawn_server` dropped a `.git` marker, +/// `cli::resolve_project_root_from` walked from the bare fixture dir up to the +/// real code-graph-mcp repo's `.git` and appended test metrics (`nonexistent_tool`, +/// `dur_s:0`) into the real `usage.jsonl`, polluting the recommend→use conversion +/// denominator. A `/tmp` TempDir has no ancestor `.git`, so reproducing the leak +/// requires putting the fixture under the repo (`target/` is gitignored). +#[test] +fn test_stdio_metrics_isolated_from_ancestor_repo() { + let base = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("target"); + let dir = base.join(format!("metrics-iso-{}", std::process::id())); + let _ = std::fs::remove_dir_all(&dir); + std::fs::create_dir_all(&dir).unwrap(); + + let mut child = spawn_server(&dir); + let mut stdin = child.stdin.take().unwrap(); + let rx = spawn_reader(child.stdout.take().unwrap()); + + send(&mut stdin, &initialize_msg()); + let _ = read_response(&rx, TIMEOUT).expect("no response to initialize"); + // One (error) tool call makes the session non-empty so flush_metrics writes. + send(&mut stdin, &tool_call_msg(2, "nonexistent_tool", serde_json::json!({}))); + let _ = read_response(&rx, TIMEOUT); + + drop(stdin); + let _ = child.wait(); + + // Metrics must land in the fixture's own .code-graph, proving the server did + // NOT adopt the ancestor repo as its project root. + let local_usage = dir.join(".code-graph").join("usage.jsonl"); + let isolated = local_usage.exists(); + let _ = std::fs::remove_dir_all(&dir); + assert!( + isolated, + "metrics leaked: a server spawned under the repo did not write its own \ + .code-graph/usage.jsonl — resolve_project_root walked up to the ancestor .git" + ); +}