From d03f5641c2a9a1e82e20426428cc4912621799e8 Mon Sep 17 00:00:00 2001 From: Matt Van Horn <455140+mvanhorn@users.noreply.github.com> Date: Sat, 27 Jun 2026 09:44:29 -0700 Subject: [PATCH] fix: trace_path returns full caller set across .d.ts-duplicated symbol nodes Fixes #546 Signed-off-by: Matt Van Horn <455140+mvanhorn@users.noreply.github.com> --- src/mcp/mcp.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++- tests/test_mcp.c | 197 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 429 insertions(+), 4 deletions(-) diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index d3aa3bf3d..2768ce311 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -2202,6 +2202,205 @@ static yyjson_mut_val *bfs_to_json_array(yyjson_mut_doc *doc, cbm_traverse_resul return arr; } +static bool trace_is_callable_node(const cbm_node_t *n) { + return n && n->label && (strcmp(n->label, "Function") == 0 || strcmp(n->label, "Method") == 0); +} + +static bool trace_is_bodyless_declaration(const cbm_node_t *n) { + return n && n->start_line == n->end_line; +} + +static int trace_label_rank(const cbm_node_t *n) { + if (!n || !n->label) { + return 0; + } + if (strcmp(n->label, "Function") == 0 || strcmp(n->label, "Method") == 0) { + return 3; + } + if (strcmp(n->label, "Route") == 0) { + return 2; + } + if (strcmp(n->label, "Class") == 0 || strcmp(n->label, "Interface") == 0) { + return 1; + } + return 0; +} + +static int trace_node_resolution_score(const cbm_node_t *n) { + int span = n && n->end_line > n->start_line ? n->end_line - n->start_line : 0; + return trace_label_rank(n) * 1000 + span; +} + +static int pick_trace_resolved_node(const cbm_node_t *nodes, int node_count) { + int sel = 0; + int best = trace_node_resolution_score(&nodes[0]); + for (int i = 1; i < node_count; i++) { + int score = trace_node_resolution_score(&nodes[i]); + if (score > best) { + best = score; + sel = i; + } + } + return sel; +} + +static int trace_real_callable_count(const cbm_node_t *nodes, int node_count) { + int count = 0; + for (int i = 0; i < node_count; i++) { + if (trace_is_callable_node(&nodes[i]) && !trace_is_bodyless_declaration(&nodes[i])) { + count++; + } + } + return count; +} + +static char *trace_node_signature_dup(const cbm_node_t *n) { + if (!n || !n->properties_json || n->properties_json[0] == '\0') { + return NULL; + } + + yyjson_doc *doc = yyjson_read(n->properties_json, strlen(n->properties_json), 0); + if (!doc) { + return NULL; + } + yyjson_val *root = yyjson_doc_get_root(doc); + yyjson_val *sig = root && yyjson_is_obj(root) ? yyjson_obj_get(root, "signature") : NULL; + const char *sig_str = sig && yyjson_is_str(sig) ? yyjson_get_str(sig) : NULL; + char *copy = sig_str && sig_str[0] ? heap_strdup(sig_str) : NULL; + yyjson_doc_free(doc); + return copy; +} + +static bool trace_signatures_compatible(const cbm_node_t *a, const cbm_node_t *b) { + char *sig_a = trace_node_signature_dup(a); + char *sig_b = trace_node_signature_dup(b); + bool compatible = true; + if (sig_a && sig_b) { + compatible = strcmp(sig_a, sig_b) == 0; + } + free(sig_a); + free(sig_b); + return compatible; +} + +static bool trace_same_symbol_declaration(const cbm_node_t *canonical, + const cbm_node_t *candidate) { + return canonical && candidate && candidate->id != canonical->id && + trace_is_callable_node(canonical) && trace_is_callable_node(candidate) && + trace_is_bodyless_declaration(candidate) && canonical->name && candidate->name && + strcmp(canonical->name, candidate->name) == 0 && + trace_signatures_compatible(canonical, candidate); +} + +static int collect_trace_root_ids(const cbm_node_t *nodes, int node_count, int selected, + int64_t *root_ids) { + int root_count = 0; + const cbm_node_t *canonical = &nodes[selected]; + root_ids[root_count++] = canonical->id; + + for (int i = 0; i < node_count; i++) { + if (trace_same_symbol_declaration(canonical, &nodes[i])) { + root_ids[root_count++] = nodes[i].id; + } + } + return root_count; +} + +static void trace_copy_node(const cbm_node_t *src, cbm_node_t *dst) { + dst->id = src->id; + dst->project = heap_strdup(src->project); + dst->label = heap_strdup(src->label); + dst->name = heap_strdup(src->name); + dst->qualified_name = heap_strdup(src->qualified_name); + dst->file_path = heap_strdup(src->file_path); + dst->start_line = src->start_line; + dst->end_line = src->end_line; + dst->properties_json = src->properties_json ? heap_strdup(src->properties_json) : NULL; +} + +static void trace_merge_visited(cbm_traverse_result_t *dst, int *dst_cap, + const cbm_traverse_result_t *src) { + enum { TRACE_INIT_CAP = 16, TRACE_GROWTH = 2 }; + + for (int i = 0; i < src->visited_count; i++) { + const cbm_node_hop_t *hop = &src->visited[i]; + bool seen = false; + for (int j = 0; j < dst->visited_count; j++) { + if (dst->visited[j].node.id == hop->node.id) { + if (hop->hop < dst->visited[j].hop) { + dst->visited[j].hop = hop->hop; + } + seen = true; + break; + } + } + if (seen) { + continue; + } + + if (dst->visited_count >= *dst_cap) { + *dst_cap = *dst_cap == 0 ? TRACE_INIT_CAP : *dst_cap * TRACE_GROWTH; + dst->visited = safe_realloc(dst->visited, *dst_cap * sizeof(cbm_node_hop_t)); + } + trace_copy_node(&hop->node, &dst->visited[dst->visited_count].node); + dst->visited[dst->visited_count].hop = hop->hop; + dst->visited_count++; + } +} + +static void trace_bfs_from_roots(cbm_store_t *store, const int64_t *root_ids, int root_count, + const char *direction, const char **edge_types, + int edge_type_count, int depth, cbm_traverse_result_t *merged) { + int merged_cap = 0; + for (int i = 0; i < root_count; i++) { + cbm_traverse_result_t current = {0}; + if (cbm_store_bfs(store, root_ids[i], direction, edge_types, edge_type_count, depth, + MCP_BFS_LIMIT, ¤t) == CBM_STORE_OK) { + trace_merge_visited(merged, &merged_cap, ¤t); + } + cbm_store_traverse_free(¤t); + } +} + +static char *trace_ambiguous_suggestions(const char *input, cbm_node_t *nodes, int count) { + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); + yyjson_mut_val *root = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, root); + + yyjson_mut_obj_add_str(doc, root, "status", "ambiguous"); + yyjson_mut_obj_add_str(doc, root, "error", "trace_ambiguous"); + + char msg[CBM_SZ_512]; + snprintf(msg, sizeof(msg), + "%d real callable matches for \"%s\". Pick a qualified_name from suggestions below.", + trace_real_callable_count(nodes, count), input); + yyjson_mut_obj_add_strcpy(doc, root, "message", msg); + + yyjson_mut_val *arr = yyjson_mut_arr(doc); + for (int i = 0; i < count; i++) { + if (!trace_is_callable_node(&nodes[i]) || trace_is_bodyless_declaration(&nodes[i])) { + continue; + } + yyjson_mut_val *s = yyjson_mut_obj(doc); + yyjson_mut_obj_add_str(doc, s, "qualified_name", + nodes[i].qualified_name ? nodes[i].qualified_name : ""); + yyjson_mut_obj_add_str(doc, s, "name", nodes[i].name ? nodes[i].name : ""); + yyjson_mut_obj_add_str(doc, s, "label", nodes[i].label ? nodes[i].label : ""); + yyjson_mut_obj_add_str(doc, s, "file_path", nodes[i].file_path ? nodes[i].file_path : ""); + yyjson_mut_obj_add_int(doc, s, "start_line", nodes[i].start_line); + yyjson_mut_obj_add_int(doc, s, "end_line", nodes[i].end_line); + yyjson_mut_arr_append(arr, s); + } + yyjson_mut_obj_add_val(doc, root, "suggestions", arr); + + char *json = yy_doc_to_str(doc); + yyjson_mut_doc_free(doc); + + char *result = cbm_mcp_text_result(json, false); + free(json); + return result; +} + static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { char *func_name = cbm_mcp_get_string_arg(args, "function_name"); char *project = cbm_mcp_get_string_arg(args, "project"); @@ -2286,6 +2485,29 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { return cbm_mcp_text_result(hint, true); } + if (node_count > 1 && trace_real_callable_count(nodes, node_count) > 1) { + char *result = trace_ambiguous_suggestions(func_name, nodes, node_count); + free(func_name); + free(project); + free(direction); + free(mode); + free(param_name); + cbm_store_free_nodes(nodes, node_count); + return result; + } + + int selected = pick_trace_resolved_node(nodes, node_count); + int64_t fallback_root_id = nodes[selected].id; + bool root_ids_heap = true; + int64_t *root_ids = malloc((size_t)node_count * sizeof(int64_t)); + int root_count = 1; + if (root_ids) { + root_count = collect_trace_root_ids(nodes, node_count, selected, root_ids); + } else { + root_ids = &fallback_root_id; + root_ids_heap = false; + } + yyjson_mut_doc *doc = yyjson_mut_doc_new(NULL); yyjson_mut_val *root = yyjson_mut_obj(doc); yyjson_mut_doc_set_root(doc, root); @@ -2295,6 +2517,9 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { if (mode) { yyjson_mut_obj_add_str(doc, root, "mode", mode); } + if (root_count > 1) { + yyjson_mut_obj_add_bool(doc, root, "fragmented_symbol", true); + } /* Edge types: explicit > mode-based > default */ const char *edge_types[MCP_COL_16]; @@ -2311,15 +2536,15 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { cbm_traverse_result_t tr_in = {0}; if (do_outbound) { - cbm_store_bfs(store, nodes[0].id, "outbound", edge_types, edge_type_count, depth, - MCP_BFS_LIMIT, &tr_out); + trace_bfs_from_roots(store, root_ids, root_count, "outbound", edge_types, edge_type_count, + depth, &tr_out); yyjson_mut_obj_add_val(doc, root, "callees", bfs_to_json_array(doc, &tr_out, risk_labels, include_tests)); } if (do_inbound) { - cbm_store_bfs(store, nodes[0].id, "inbound", edge_types, edge_type_count, depth, - MCP_BFS_LIMIT, &tr_in); + trace_bfs_from_roots(store, root_ids, root_count, "inbound", edge_types, edge_type_count, + depth, &tr_in); yyjson_mut_obj_add_val(doc, root, "callers", bfs_to_json_array(doc, &tr_in, risk_labels, include_tests)); } @@ -2337,6 +2562,9 @@ static char *handle_trace_call_path(cbm_mcp_server_t *srv, const char *args) { } cbm_store_free_nodes(nodes, node_count); + if (root_ids_heap) { + free(root_ids); + } free(func_name); free(project); free(direction); diff --git a/tests/test_mcp.c b/tests/test_mcp.c index 61c71b897..a24b30b5b 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -532,6 +532,199 @@ TEST(tool_trace_missing_function_name) { PASS(); } +static int64_t insert_trace_test_node(cbm_store_t *st, const char *project, const char *label, + const char *name, const char *qualified_name, + const char *file_path, int start_line, int end_line, + const char *properties_json) { + cbm_node_t n = {0}; + n.project = project; + n.label = label; + n.name = name; + n.qualified_name = qualified_name; + n.file_path = file_path; + n.start_line = start_line; + n.end_line = end_line; + n.properties_json = properties_json; + return cbm_store_upsert_node(st, &n); +} + +static void insert_trace_test_call(cbm_store_t *st, const char *project, int64_t source_id, + int64_t target_id) { + cbm_edge_t e = { + .project = project, .source_id = source_id, .target_id = target_id, .type = "CALLS"}; + cbm_store_insert_edge(st, &e); +} + +static char *call_trace_path(cbm_mcp_server_t *srv, const char *args_json) { + char *raw = cbm_mcp_handle_tool(srv, "trace_path", args_json); + char *text = extract_text_content(raw); + free(raw); + return text; +} + +static int count_occurrences(const char *haystack, const char *needle) { + int count = 0; + const char *p = haystack; + size_t needle_len = strlen(needle); + while (p && (p = strstr(p, needle)) != NULL) { + count++; + p += needle_len; + } + return count; +} + +static cbm_mcp_server_t *setup_trace_fragmented_server(void) { + const char *proj = "trace-dts-fragment"; + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + if (!srv) { + return NULL; + } + cbm_store_t *st = cbm_mcp_server_store(srv); + if (!st) { + cbm_mcp_server_free(srv); + return NULL; + } + + cbm_mcp_server_set_project(srv, proj); + cbm_store_upsert_project(st, proj, "/tmp/trace-dts-fragment"); + + const char *sig = "{\"signature\":\"function alignToEdge(edge: Edge): void\"}"; + int64_t impl = insert_trace_test_node(st, proj, "Function", "alignToEdge", + "src.scroll.alignToEdge", "src/scroll.ts", 10, 18, sig); + int64_t shim = + insert_trace_test_node(st, proj, "Function", "alignToEdge", "types.widget-shim.alignToEdge", + "types/widget-shim.d.ts", 3, 3, sig); + int64_t internal = + insert_trace_test_node(st, proj, "Function", "internalConsumer", "src.internalConsumer", + "src/internalConsumer.ts", 20, 25, NULL); + int64_t external = + insert_trace_test_node(st, proj, "Function", "externalConsumer", "src.externalConsumer", + "src/externalConsumer.ts", 30, 35, NULL); + int64_t dual = insert_trace_test_node(st, proj, "Function", "dualConsumer", "src.dualConsumer", + "src/dualConsumer.ts", 40, 45, NULL); + + insert_trace_test_call(st, proj, internal, impl); + insert_trace_test_call(st, proj, external, shim); + insert_trace_test_call(st, proj, dual, impl); + insert_trace_test_call(st, proj, dual, shim); + return srv; +} + +TEST(tool_trace_path_dts_fragmented_callers_union) { + cbm_mcp_server_t *srv = setup_trace_fragmented_server(); + ASSERT_NOT_NULL(srv); + + char *resp = call_trace_path(srv, "{\"function_name\":\"alignToEdge\"," + "\"project\":\"trace-dts-fragment\"," + "\"mode\":\"calls\",\"direction\":\"inbound\",\"depth\":1}"); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "\"callers\"")); + ASSERT_NOT_NULL(strstr(resp, "\"fragmented_symbol\":true")); + ASSERT_EQ(count_occurrences(resp, "\"name\":\"internalConsumer\""), 1); + ASSERT_EQ(count_occurrences(resp, "\"name\":\"externalConsumer\""), 1); + ASSERT_EQ(count_occurrences(resp, "\"name\":\"dualConsumer\""), 1); + free(resp); + + cbm_mcp_server_free(srv); + PASS(); +} + +static cbm_mcp_server_t *setup_trace_ambiguous_server(void) { + const char *proj = "trace-ambiguous"; + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + if (!srv) { + return NULL; + } + cbm_store_t *st = cbm_mcp_server_store(srv); + if (!st) { + cbm_mcp_server_free(srv); + return NULL; + } + + cbm_mcp_server_set_project(srv, proj); + cbm_store_upsert_project(st, proj, "/tmp/trace-ambiguous"); + insert_trace_test_node(st, proj, "Function", "parse", "src.moduleA.parse", "src/moduleA.ts", 5, + 12, "{\"signature\":\"function parse(input: string): A\"}"); + insert_trace_test_node(st, proj, "Function", "parse", "src.moduleB.parse", "src/moduleB.ts", 20, + 29, "{\"signature\":\"function parse(input: string): B\"}"); + return srv; +} + +TEST(tool_trace_path_same_name_real_functions_stay_ambiguous) { + cbm_mcp_server_t *srv = setup_trace_ambiguous_server(); + ASSERT_NOT_NULL(srv); + + char *resp = call_trace_path(srv, "{\"function_name\":\"parse\"," + "\"project\":\"trace-ambiguous\"," + "\"mode\":\"calls\",\"direction\":\"inbound\",\"depth\":1}"); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "\"status\":\"ambiguous\"")); + ASSERT_NOT_NULL(strstr(resp, "trace_ambiguous")); + ASSERT_NOT_NULL(strstr(resp, "src.moduleA.parse")); + ASSERT_NOT_NULL(strstr(resp, "src.moduleB.parse")); + ASSERT_NULL(strstr(resp, "\"callers\"")); + free(resp); + + cbm_mcp_server_free(srv); + PASS(); +} + +static cbm_mcp_server_t *setup_trace_single_server(void) { + const char *proj = "trace-single"; + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + if (!srv) { + return NULL; + } + cbm_store_t *st = cbm_mcp_server_store(srv); + if (!st) { + cbm_mcp_server_free(srv); + return NULL; + } + + cbm_mcp_server_set_project(srv, proj); + cbm_store_upsert_project(st, proj, "/tmp/trace-single"); + int64_t target = insert_trace_test_node(st, proj, "Function", "uniqueTarget", + "src.uniqueTarget", "src/unique.ts", 8, 15, NULL); + int64_t caller = + insert_trace_test_node(st, proj, "Function", "singleCaller", "src.singleCaller", + "src/singleCaller.ts", 20, 25, NULL); + insert_trace_test_call(st, proj, caller, target); + return srv; +} + +TEST(tool_trace_path_single_node_unchanged) { + cbm_mcp_server_t *srv = setup_trace_single_server(); + ASSERT_NOT_NULL(srv); + + char *resp = call_trace_path(srv, "{\"function_name\":\"uniqueTarget\"," + "\"project\":\"trace-single\"," + "\"mode\":\"calls\",\"direction\":\"inbound\",\"depth\":1}"); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "\"callers\"")); + ASSERT_EQ(count_occurrences(resp, "\"name\":\"singleCaller\""), 1); + ASSERT_NULL(strstr(resp, "\"fragmented_symbol\"")); + free(resp); + + cbm_mcp_server_free(srv); + PASS(); +} + +TEST(tool_trace_path_unknown_function_still_not_found) { + cbm_mcp_server_t *srv = setup_trace_single_server(); + ASSERT_NOT_NULL(srv); + + char *resp = call_trace_path(srv, "{\"function_name\":\"missingTarget\"," + "\"project\":\"trace-single\"," + "\"mode\":\"calls\",\"direction\":\"inbound\",\"depth\":1}"); + ASSERT_NOT_NULL(resp); + ASSERT_NOT_NULL(strstr(resp, "\"error\":\"function not found\"")); + ASSERT_NOT_NULL(strstr(resp, "search_graph")); + free(resp); + + cbm_mcp_server_free(srv); + PASS(); +} + TEST(tool_delete_project_not_found) { cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); @@ -2044,6 +2237,10 @@ SUITE(mcp) { /* Tool handlers with validation */ RUN_TEST(tool_trace_call_path_not_found); RUN_TEST(tool_trace_missing_function_name); + RUN_TEST(tool_trace_path_dts_fragmented_callers_union); + RUN_TEST(tool_trace_path_same_name_real_functions_stay_ambiguous); + RUN_TEST(tool_trace_path_single_node_unchanged); + RUN_TEST(tool_trace_path_unknown_function_still_not_found); RUN_TEST(tool_delete_project_not_found); RUN_TEST(tool_get_architecture_empty); RUN_TEST(tool_get_architecture_emits_populated_sections);