Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 55 additions & 5 deletions src/pipeline/pipeline.c
Original file line number Diff line number Diff line change
Expand Up @@ -485,17 +485,67 @@ static void cbm_pipeline_process_infra_bindings(cbm_gbuf_t *gbuf, const cbm_file
}
}

static bool is_infra_file(const char *fp) {
return fp != NULL &&
(strstr(fp, ".yaml") != NULL || strstr(fp, ".yml") != NULL ||
strstr(fp, ".tf") != NULL || strstr(fp, ".hcl") != NULL || strstr(fp, ".toml") != NULL);
/* Basename of a path: the segment after the final '/' or '\\'. */
static const char *infra_basename(const char *fp) {
const char *base = fp;
for (const char *p = fp; *p != '\0'; p++) {
if (*p == '/' || *p == '\\') {
base = p + 1;
}
}
return base;
}

/* True when `name` ends with `suffix`. */
static bool infra_ends_with(const char *name, const char *suffix) {
size_t nl = strlen(name);
size_t sl = strlen(suffix);
return nl >= sl && strcmp(name + nl - sl, suffix) == 0;
}

/* True for Infrastructure-as-Code files whose URL string literals denote real
* service endpoints: Terraform / HCL module sources, backends and provider
* endpoints. Generic application config (config.yaml), dependency manifests
* (dependabot.yaml), container-orchestration (compose.yaml) and Kubernetes /
* Kustomize manifests are NOT route sources — the URL-like strings they hold
* (package registries, JWKS discovery endpoints, upstream hosts, healthcheck
* shell commands) are not routes the service serves, and harvesting them
* inflated the Route set that get_architecture and cross-repo matching rely on
* (issue #521). Structured topic->endpoint bindings in config files still flow
* through cbm_pipeline_process_infra_bindings(). */
bool cbm_is_infra_route_source_file(const char *fp) {
if (fp == NULL) {
return false;
}
const char *base = infra_basename(fp);
return infra_ends_with(base, ".tf") || infra_ends_with(base, ".tf.json") ||
infra_ends_with(base, ".hcl");
}

/* A URL string literal denotes a single network endpoint only when it is a
* bare URL: no embedded whitespace. Rejects healthcheck/command strings such as
* "curl --fail http://localhost:9000/ || exit 1" that merely contain a URL,
* while still accepting query-string URLs (which use '&', ';'). */
bool cbm_is_bare_endpoint_url(const char *value) {
if (value == NULL || value[0] == '\0') {
return false;
}
for (const char *p = value; *p != '\0'; p++) {
if ((unsigned char)*p <= ' ') {
return false;
}
}
return true;
}

/* Try to create an infra Route node from one string_ref. */
static void try_upsert_infra_route(cbm_gbuf_t *gbuf, const CBMStringRef *sr, const char *fp) {
if (sr->kind != CBM_STRREF_URL || !sr->value || !strstr(sr->value, "://")) {
return;
}
if (!cbm_is_bare_endpoint_url(sr->value)) {
return;
}
char route_qn[CBM_ROUTE_QN_SIZE];
snprintf(route_qn, sizeof(route_qn), "__route__infra__%s", sr->value);
char route_props[CBM_SZ_512];
Expand All @@ -511,7 +561,7 @@ static void try_upsert_infra_route(cbm_gbuf_t *gbuf, const CBMStringRef *sr, con
static void cbm_pipeline_extract_infra_routes(cbm_gbuf_t *gbuf, const cbm_file_info_t *files,
CBMFileResult **result_cache, int file_count) {
for (int i = 0; i < file_count; i++) {
if (!result_cache[i] || !is_infra_file(files[i].rel_path)) {
if (!result_cache[i] || !cbm_is_infra_route_source_file(files[i].rel_path)) {
continue;
}
for (int si = 0; si < result_cache[i]->string_refs.count; si++) {
Expand Down
12 changes: 12 additions & 0 deletions src/pipeline/pipeline_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,18 @@ bool cbm_is_k8s_manifest(const char *name, const char *content);
bool cbm_is_secret_binding(const char *key, const char *value);
bool cbm_is_secret_value(const char *value);

/* ── Infra Route extraction gating (pipeline.c, issue #521) ───────── */

/* True only for Infrastructure-as-Code files (Terraform / HCL) whose URL string
* literals denote real service endpoints. Generic config, compose, dependabot
* and Kubernetes / Kustomize manifests are excluded so their URL-like strings
* are not harvested into spurious Route nodes. */
bool cbm_is_infra_route_source_file(const char *fp);

/* True when `value` is a bare URL (no whitespace or shell metacharacters), as
* opposed to a command string that merely embeds a URL. */
bool cbm_is_bare_endpoint_url(const char *value);

/* Clean JSON array brackets from CMD/ENTRYPOINT values.
* E.g. ["./app", "--flag"] → ./app --flag
* Writes result to out (up to out_sz). */
Expand Down
37 changes: 37 additions & 0 deletions tests/test_pipeline.c
Original file line number Diff line number Diff line change
Expand Up @@ -3422,6 +3422,41 @@ TEST(infra_is_compose_file) {
PASS();
}

/* Issue #521: only Terraform / HCL files are infra-Route sources. Generic
* config, dependabot, compose and k8s/kustomize manifests must be excluded so
* their URL-like strings do not become spurious Route nodes. */
TEST(infra_route_source_file_gate) {
/* Infrastructure-as-Code: URL literals are real endpoints. */
ASSERT(cbm_is_infra_route_source_file("main.tf"));
ASSERT(cbm_is_infra_route_source_file("infra/backend.tf"));
ASSERT(cbm_is_infra_route_source_file("modules/net/main.tf.json"));
ASSERT(cbm_is_infra_route_source_file("config.hcl"));
/* Config / orchestration / dependency manifests: NOT route sources. */
ASSERT(!cbm_is_infra_route_source_file(".github/dependabot.yaml"));
ASSERT(!cbm_is_infra_route_source_file("config.yaml"));
ASSERT(!cbm_is_infra_route_source_file("compose.yaml"));
ASSERT(!cbm_is_infra_route_source_file("docker-compose.yml"));
ASSERT(!cbm_is_infra_route_source_file("k8s/deployment.yaml"));
ASSERT(!cbm_is_infra_route_source_file("kustomization.yaml"));
ASSERT(!cbm_is_infra_route_source_file("settings.toml"));
ASSERT(!cbm_is_infra_route_source_file(NULL));
/* Don't false-match Terraform var files. */
ASSERT(!cbm_is_infra_route_source_file("prod.tfvars"));
PASS();
}

/* Issue #521: a healthcheck/command string that merely embeds a URL is not a
* bare endpoint and must not become a Route node. */
TEST(infra_bare_endpoint_url_gate) {
ASSERT(cbm_is_bare_endpoint_url("https://app.terraform.io"));
ASSERT(cbm_is_bare_endpoint_url("http://order-service:8080/v2/orders/{id}"));
ASSERT(!cbm_is_bare_endpoint_url("curl --fail http://localhost:9000/ || exit 1"));
ASSERT(!cbm_is_bare_endpoint_url("wget http://localhost:8080/health && true"));
ASSERT(!cbm_is_bare_endpoint_url(""));
ASSERT(!cbm_is_bare_endpoint_url(NULL));
PASS();
}

TEST(infra_is_cloudbuild_file) {
/* Port of TestIsCloudbuildFile (5 cases) */
ASSERT(cbm_is_cloudbuild_file("cloudbuild.yaml"));
Expand Down Expand Up @@ -6254,6 +6289,8 @@ SUITE(pipeline) {
RUN_TEST(compile_commands_parse_invalid);
/* Infrascan helpers */
RUN_TEST(infra_is_compose_file);
RUN_TEST(infra_route_source_file_gate);
RUN_TEST(infra_bare_endpoint_url_gate);
RUN_TEST(infra_is_cloudbuild_file);
RUN_TEST(infra_is_shell_script);
RUN_TEST(infra_is_dockerfile);
Expand Down
Loading