From d10701dcedfc206bb82b2d930a1018eefd79b331 Mon Sep 17 00:00:00 2001 From: Martin Vogel Date: Mon, 29 Jun 2026 01:33:40 +0200 Subject: [PATCH] fix(lsp): seal shared Tier-2 cross-registry against O(n^2) resolve hang The per-file C cross-LSP resolver mutated the finalized, project-wide cross registry that is shared read-only across the parallel resolve workers. Each post-finalize add landed in a tail the hash index does not cover, so every lookup linear-scanned an ever-growing tail -> O(files*defs) on large C codebases. The Linux-kernel full index hung at "[4/9] Resolving" on 11 cores for >6 min and never finished, plus a heap data race across workers. Seal every shared cross-registry: CBMTypeRegistry gains a read_only flag, set by all five {c,py,cs,ts,go}_build_cross_registry builders right after finalize, and cbm_registry_add_func/_type no-op on a sealed registry. One chokepoint guards every language, robust to any resolver mutation site. The plain-C function-registration site is also skipped directly when the registry is shared. Add six cross-language invariant tests (c, c++, py, c#, ts, go): resolving against a finalized shared registry must leave func_count/type_count unchanged -- RED before the fix (C +1, C++ +2 post-finalize adds), GREEN after. Verified: full suite 5720 passed / 0 failed; Linux-kernel full index now completes all 9 phases in 643s (4.88M nodes, 11.9M edges) instead of hanging at phase 4. Signed-off-by: Martin Vogel --- internal/cbm/lsp/c_lsp.c | 12 ++- internal/cbm/lsp/cs_lsp.c | 1 + internal/cbm/lsp/go_lsp.c | 1 + internal/cbm/lsp/py_lsp.c | 1 + internal/cbm/lsp/ts_lsp.c | 1 + internal/cbm/lsp/type_registry.c | 8 ++ internal/cbm/lsp/type_registry.h | 11 +++ tests/test_c_lsp.c | 157 +++++++++++++++++++++++++++++++ 8 files changed, 190 insertions(+), 2 deletions(-) diff --git a/internal/cbm/lsp/c_lsp.c b/internal/cbm/lsp/c_lsp.c index f7598c7bd..1050b86c5 100644 --- a/internal/cbm/lsp/c_lsp.c +++ b/internal/cbm/lsp/c_lsp.c @@ -4432,8 +4432,15 @@ static void c_process_body_child(CLSPContext *ctx, TSNode child) { !strstr(func_qn, ctx->current_namespace)) func_qn = cbm_arena_sprintf(ctx->arena, "%s.%s", ctx->current_namespace, fname); - // Only register if not already registered - if (!cbm_registry_lookup_func(ctx->registry, func_qn)) { + // Only register if not already registered. On the + // shared Tier-2 registry, skip entirely: it is + // finalized + read-only, the def is already present + // from the project-wide build, and the lookup + add + // would otherwise hit/grow the post-finalize tail -> + // O(files*defs) on large C codebases (the Linux-kernel + // full-index hang) plus a cross-worker heap race. + if (!ctx->registry_shared && + !cbm_registry_lookup_func(ctx->registry, func_qn)) { const CBMType **rets = (const CBMType **)cbm_arena_alloc( ctx->arena, 2 * sizeof(const CBMType *)); rets[0] = ret_type; @@ -5315,6 +5322,7 @@ CBMTypeRegistry *cbm_c_build_cross_registry(CBMArena *arena, CBMLSPDef *defs, in c_register_lsp_defs(arena, reg, "", d, 1); } cbm_registry_finalize(reg); + reg->read_only = true; /* seal: shared Tier-2 registry is read-only during resolve */ return reg; } diff --git a/internal/cbm/lsp/cs_lsp.c b/internal/cbm/lsp/cs_lsp.c index 077ef499b..bac42378c 100644 --- a/internal/cbm/lsp/cs_lsp.c +++ b/internal/cbm/lsp/cs_lsp.c @@ -2954,6 +2954,7 @@ CBMTypeRegistry *cbm_cs_build_cross_registry(CBMArena *arena, CBMLSPDef *defs, i cs_register_lsp_defs(arena, reg, &defs[i], 1); } cbm_registry_finalize(reg); + reg->read_only = true; /* seal: shared Tier-2 registry is read-only during resolve */ return reg; } diff --git a/internal/cbm/lsp/go_lsp.c b/internal/cbm/lsp/go_lsp.c index 6090b8830..b104a9b83 100644 --- a/internal/cbm/lsp/go_lsp.c +++ b/internal/cbm/lsp/go_lsp.c @@ -2835,6 +2835,7 @@ CBMTypeRegistry* cbm_go_build_cross_registry( } cbm_registry_finalize(reg); + reg->read_only = true; /* seal: shared Tier-2 registry is read-only during resolve */ return reg; } diff --git a/internal/cbm/lsp/py_lsp.c b/internal/cbm/lsp/py_lsp.c index 6741f76e8..83725efad 100644 --- a/internal/cbm/lsp/py_lsp.c +++ b/internal/cbm/lsp/py_lsp.c @@ -3607,6 +3607,7 @@ CBMTypeRegistry *cbm_py_build_cross_registry(CBMArena *arena, CBMLSPDef *defs, i } cbm_registry_finalize(reg); + reg->read_only = true; /* seal: shared Tier-2 registry is read-only during resolve */ return reg; } diff --git a/internal/cbm/lsp/ts_lsp.c b/internal/cbm/lsp/ts_lsp.c index 8ee26ba71..cc233c700 100644 --- a/internal/cbm/lsp/ts_lsp.c +++ b/internal/cbm/lsp/ts_lsp.c @@ -4986,6 +4986,7 @@ CBMTypeRegistry *cbm_ts_build_cross_registry(CBMArena *arena, CBMLSPDef *defs, i ts_register_lsp_defs(arena, reg, d, 1); } cbm_registry_finalize(reg); + reg->read_only = true; /* seal: shared Tier-2 registry is read-only during resolve */ return reg; } diff --git a/internal/cbm/lsp/type_registry.c b/internal/cbm/lsp/type_registry.c index 2c2b2df89..fc17295b9 100644 --- a/internal/cbm/lsp/type_registry.c +++ b/internal/cbm/lsp/type_registry.c @@ -135,6 +135,10 @@ void cbm_registry_init(CBMTypeRegistry *reg, CBMArena *arena) { } void cbm_registry_add_func(CBMTypeRegistry *reg, CBMRegisteredFunc func) { + if (reg->read_only) { + return; /* sealed Tier-2 shared registry: refuse post-finalize mutation (O(n^2)+race guard) + */ + } if (reg->func_count >= reg->func_cap) { int new_cap = reg->func_cap == 0 ? 64 : reg->func_cap * 2; CBMRegisteredFunc *new_items = (CBMRegisteredFunc *)cbm_arena_alloc( @@ -151,6 +155,10 @@ void cbm_registry_add_func(CBMTypeRegistry *reg, CBMRegisteredFunc func) { } void cbm_registry_add_type(CBMTypeRegistry *reg, CBMRegisteredType type) { + if (reg->read_only) { + return; /* sealed Tier-2 shared registry: refuse post-finalize mutation (O(n^2)+race guard) + */ + } if (reg->type_count >= reg->type_cap) { int new_cap = reg->type_cap == 0 ? 64 : reg->type_cap * 2; CBMRegisteredType *new_items = (CBMRegisteredType *)cbm_arena_alloc( diff --git a/internal/cbm/lsp/type_registry.h b/internal/cbm/lsp/type_registry.h index bf723ed8d..2a4dbaf1d 100644 --- a/internal/cbm/lsp/type_registry.h +++ b/internal/cbm/lsp/type_registry.h @@ -3,6 +3,7 @@ #include "type_rep.h" #include "../arena.h" +#include // Decorator-derived flags (Python). Added at struct tail so existing // callers that memset to zero before populating other fields keep working. @@ -99,6 +100,16 @@ typedef struct CBMTypeRegistry { CBMRegistryHashEntry *method_entries; int method_bucket_count; int method_entry_count; + + /* Sealed / read-only. Set true by the cbm_X_build_cross_registry builders + * (c/cpp, python, c#, ts, go) right after finalize: a Tier-2 cross-registry + * is built ONCE and shared READ-ONLY across the parallel resolve workers. + * cbm_registry_add_func/_type no-op on a sealed registry, so a per-file + * resolver can never mutate the shared, finalized registry. Without this, + * post-finalize adds accumulate in a tail the hash index does not cover -> + * every lookup linear-scans it -> O(files*defs) (the Linux-kernel full-index + * hang) plus a heap data race across workers. */ + bool read_only; } CBMTypeRegistry; // Initialize a registry. diff --git a/tests/test_c_lsp.c b/tests/test_c_lsp.c index 171a5e838..0202e3c18 100644 --- a/tests/test_c_lsp.c +++ b/tests/test_c_lsp.c @@ -20,6 +20,13 @@ */ #include "test_framework.h" #include "cbm.h" +#include "lsp/c_lsp.h" +#include "lsp/py_lsp.h" +#include "lsp/cs_lsp.h" +#include "lsp/ts_lsp.h" +#include "lsp/go_lsp.h" +#include "lsp/type_registry.h" +#include "arena.h" #include #include @@ -15182,9 +15189,159 @@ TEST(clsp_easy_win_sfinaeconditional_return) { PASS(); } +/* Reproduce-first guard for the Linux-kernel full-index O(n^2) hang. + * + * In `full` mode the pipeline builds ONE project-wide C cross-registry + * (cbm_c_build_cross_registry), FINALIZES it (O(1) hash lookups), then shares it + * READ-ONLY across the parallel resolve workers (ctx.registry_shared = true). + * cbm_run_c_lsp_cross_with_registry must therefore NOT mutate it. + * + * Bug: c_lsp.c:4323 (and 4628/4201/4426) ignore registry_shared and add_func into + * the shared, already-finalized registry. Each post-finalize add lands in a tail + * the hash index does not cover, so lookup_func_self/lookup_method_self linear-scan + * that ever-growing tail on every lookup (type_registry.c:280-286, 186-195). Across + * 89k kernel files doing millions of lookups => O(files * defs) (the >6-min hang), + * plus an 11-thread heap race (c_lsp.c:4146-4152 warns of the SIGSEGV). + * + * INVARIANT (green <=> fixed): resolve leaves the finalized shared registry's + * func_count/type_count unchanged. RED on the unguarded code; GREEN once every + * mutation site honors !registry_shared. */ +TEST(clsp_tier2_shared_registry_readonly_c) { + CBMArena arena; + cbm_arena_init(&arena); + /* stdlib-only project registry, finalized inside the builder */ + CBMTypeRegistry *reg = cbm_c_build_cross_registry(&arena, NULL, 0); + ASSERT_NOT_NULL(reg); + int funcs_before = reg->func_count; + int types_before = reg->type_count; + /* A C translation unit defining functions absent from the shared registry; a + * correct read-only resolve must not register them into it. */ + const char *src = "struct Node { int v; };\n" + "struct Node *make_node(int v);\n" + "int helper(int x) { return x + 1; }\n" + "int caller(void) { return helper(make_node(7)->v); }\n"; + CBMResolvedCallArray out = {0}; + cbm_run_c_lsp_cross_with_registry(&arena, src, (int)strlen(src), "test.mod", + /*cpp_mode=*/false, reg, /*include_paths=*/NULL, + /*include_ns_qns=*/NULL, /*include_count=*/0, + /*cached_tree=*/NULL, &out); + ASSERT_EQ(reg->func_count, funcs_before); + ASSERT_EQ(reg->type_count, types_before); + cbm_arena_destroy(&arena); + PASS(); +} + /* ── Suite ─────────────────────────────────────────────────────── */ +/* C++ sibling guard: the same shared-registry read-only invariant for the + * C++-only mutation sites — method registration (c_lsp.c:4760), template + * type-param scan (4558), default-arg min_params scan (4333). RED while those + * sites ignore registry_shared; GREEN once guarded. (Latent O(n^2)+race for large + * C++ codebases like LLVM/bitcoin — not the kernel, which is C.) */ +TEST(clsp_tier2_shared_registry_readonly_cpp) { + CBMArena arena; + cbm_arena_init(&arena); + CBMTypeRegistry *reg = cbm_c_build_cross_registry(&arena, NULL, 0); + ASSERT_NOT_NULL(reg); + int funcs_before = reg->func_count; + int types_before = reg->type_count; + const char *src = "template struct Holder { T get(); };\n" + "struct Box { int unwrap(); };\n" + "int Box::unwrap() { return 0; }\n" + "int with_default(int a, int b = 2) { return a + b; }\n" + "int caller(Box *b) { return b->unwrap() + with_default(1); }\n"; + CBMResolvedCallArray out = {0}; + cbm_run_c_lsp_cross_with_registry(&arena, src, (int)strlen(src), "test.mod", + /*cpp_mode=*/true, reg, /*include_paths=*/NULL, + /*include_ns_qns=*/NULL, /*include_count=*/0, + /*cached_tree=*/NULL, &out); + ASSERT_EQ(reg->func_count, funcs_before); + ASSERT_EQ(reg->type_count, types_before); + cbm_arena_destroy(&arena); + PASS(); +} + +/* Cross-language check of the same Tier-2 shared-registry read-only invariant for + * the other languages that build a shared cross-registry (py, c#, ts, go). The + * registry-level seal (type_registry.c: add_func/_type no-op when reg->read_only) + * guards all of them at one chokepoint. Each test: build+finalize the shared + * registry, resolve a source, assert func_count/type_count are unchanged. (TS + * deliberately mutates a per-file OVERLAY chained to the base — the base must + * still be untouched.) */ +TEST(seal_py_shared_registry_readonly) { + CBMArena arena; + cbm_arena_init(&arena); + CBMTypeRegistry *reg = cbm_py_build_cross_registry(&arena, NULL, 0); + ASSERT_NOT_NULL(reg); + int fb = reg->func_count, tb = reg->type_count; + const char *src = "def helper(x):\n return x + 1\n\ndef caller():\n return helper(1)\n"; + CBMResolvedCallArray out = {0}; + cbm_run_py_lsp_cross_with_registry(&arena, src, (int)strlen(src), "test.mod", reg, NULL, NULL, + 0, NULL, &out); + ASSERT_EQ(reg->func_count, fb); + ASSERT_EQ(reg->type_count, tb); + cbm_arena_destroy(&arena); + PASS(); +} + +TEST(seal_cs_shared_registry_readonly) { + CBMArena arena; + cbm_arena_init(&arena); + CBMTypeRegistry *reg = cbm_cs_build_cross_registry(&arena, NULL, 0); + ASSERT_NOT_NULL(reg); + int fb = reg->func_count, tb = reg->type_count; + const char *src = "namespace N { class Box { int Unwrap() { return 0; }\n" + " int Caller() { return Unwrap(); } } }\n"; + CBMResolvedCallArray out = {0}; + cbm_run_cs_lsp_cross_with_registry(&arena, src, (int)strlen(src), "test.mod", reg, NULL, 0, + NULL, &out); + ASSERT_EQ(reg->func_count, fb); + ASSERT_EQ(reg->type_count, tb); + cbm_arena_destroy(&arena); + PASS(); +} + +TEST(seal_ts_shared_registry_readonly) { + CBMArena arena; + cbm_arena_init(&arena); + CBMTypeRegistry *reg = cbm_ts_build_cross_registry(&arena, NULL, 0); + ASSERT_NOT_NULL(reg); + int fb = reg->func_count, tb = reg->type_count; + const char *src = "class Box { unwrap(): number { return 0; } }\n" + "function caller(b: Box): number { return b.unwrap(); }\n"; + CBMResolvedCallArray out = {0}; + cbm_run_ts_lsp_cross_with_registry(&arena, src, (int)strlen(src), "test.mod", false, false, + false, reg, NULL, 0, NULL, NULL, 0, NULL, &out); + ASSERT_EQ(reg->func_count, fb); + ASSERT_EQ(reg->type_count, tb); + cbm_arena_destroy(&arena); + PASS(); +} + +TEST(seal_go_shared_registry_readonly) { + CBMArena arena; + cbm_arena_init(&arena); + CBMTypeRegistry *reg = cbm_go_build_cross_registry(&arena, NULL, 0); + ASSERT_NOT_NULL(reg); + int fb = reg->func_count, tb = reg->type_count; + const char *src = "package main\nfunc helper(x int) int { return x + 1 }\n" + "func caller() int { return helper(1) }\n"; + CBMResolvedCallArray out = {0}; + cbm_run_go_lsp_cross_with_registry(&arena, src, (int)strlen(src), "test.mod", reg, NULL, NULL, + 0, NULL, &out); + ASSERT_EQ(reg->func_count, fb); + ASSERT_EQ(reg->type_count, tb); + cbm_arena_destroy(&arena); + PASS(); +} + SUITE(c_lsp) { + RUN_TEST(clsp_tier2_shared_registry_readonly_c); + RUN_TEST(clsp_tier2_shared_registry_readonly_cpp); + RUN_TEST(seal_py_shared_registry_readonly); + RUN_TEST(seal_cs_shared_registry_readonly); + RUN_TEST(seal_ts_shared_registry_readonly); + RUN_TEST(seal_go_shared_registry_readonly); RUN_TEST(clsp_simple_var_decl); RUN_TEST(clsp_pointer_arrow); RUN_TEST(clsp_dot_access);