diff --git a/dgf/src/analyse/topology/BUILD b/dgf/src/analyse/topology/BUILD index 820bcf4..8918180 100644 --- a/dgf/src/analyse/topology/BUILD +++ b/dgf/src/analyse/topology/BUILD @@ -1,4 +1,5 @@ load("@rules_python//python:py_library.bzl", "py_library") +load("@rules_python//python:py_test.bzl", "py_test") package( @@ -16,13 +17,36 @@ py_library( ], ) +py_library( + name = "betti_defense", + srcs = ["betti_defense.py"], + deps = [ + "//dgf/src/data:in_memory_graph", + "//dgf/src/data:schema", + # numpy dep, + ], +) + py_library( name = "global_graph_topology", srcs = ["global_graph_topology.py"], deps = [ + ":betti_defense", ":node_degree", "//dgf/src/data:in_memory_graph", "//dgf/src/data:schema", # numpy dep, ], ) + +py_test( + name = "betti_defense_test", + srcs = ["betti_defense_test.py"], + deps = [ + ":betti_defense", + # absl/testing:absltest dep, + "//dgf/src/data:in_memory_graph", + "//dgf/src/data:schema", + # numpy dep, + ], +) diff --git a/dgf/src/analyse/topology/betti_defense.py b/dgf/src/analyse/topology/betti_defense.py new file mode 100644 index 0000000..f52e5d9 --- /dev/null +++ b/dgf/src/analyse/topology/betti_defense.py @@ -0,0 +1,159 @@ +# Copyright 2024 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Topological anomaly detection using Betti numbers for InMemoryGraph. + +This module provides a lightweight defense layer for detecting structural +anomalies in graph inputs by analysing their first Betti number (beta_1), +which counts independent cycles. It is intended to be used as a +pre-processing filter or safety guard in graph neural network pipelines. +""" + +from typing import Optional + +import numpy as np + +from dgf.src.data import in_memory_graph +from dgf.src.data import schema as schema_lib + +GraphSchema = schema_lib.GraphSchema +InMemoryGraph = in_memory_graph.InMemoryGraph +InMemoryEdgeSet = in_memory_graph.InMemoryEdgeSet + + +def connected_components( + adjacency: np.ndarray, + num_nodes: int, +) -> np.ndarray: + """Returns connected-component labels for each node. + + Uses a simple union-find on the undirected view of the edge list. + + Args: + adjacency: Integer array of shape [2, num_edges]. + num_nodes: Total number of nodes in the graph. + + Returns: + Integer array of shape [num_nodes] where equal values denote nodes + that belong to the same connected component. + """ + parent = np.arange(num_nodes, dtype=np.int32) + + def _find(x: np.ndarray) -> np.ndarray: + """Vectorised find with path compression.""" + # Iterative path compression – at most log* N iterations. + while True: + p = parent[x] + unchanged = p == x + x = np.where(unchanged, x, p) + if np.all(unchanged): + break + return x + + # Union step for every undirected edge. + if adjacency.shape[1] > 0: + src = adjacency[0] + tgt = adjacency[1] + root_src = _find(src) + root_tgt = _find(tgt) + smaller = np.minimum(root_src, root_tgt) + larger = np.maximum(root_src, root_tgt) + parent[larger] = smaller + # Second pass to compress any remaining paths. + parent[:] = _find(np.arange(num_nodes)) + + return parent + + +def _num_connected_components( + adjacency: np.ndarray, + num_nodes: int, +) -> int: + """Returns the number of connected components.""" + labels = connected_components(adjacency, num_nodes) + return int(np.unique(labels).shape[0]) + + +def calculate_betti_1( + graph: InMemoryGraph, + schema: GraphSchema, +) -> int: + """Calculates the first Betti number (beta_1) of a homogeneous graph. + + beta_1 = |E| - |V| + C + + where |E| is the number of edges, |V| the number of vertices, and C the + number of connected components. + + Args: + graph: An InMemoryGraph. Must be homogeneous (single node set, single + edge set, source == target). + schema: The GraphSchema for the graph. + + Returns: + The integer beta_1 value. + + Raises: + ValueError: If the graph is not homogeneous. + """ + if not _is_homogeneous_graph(graph, schema): + raise ValueError( + "calculate_betti_1 currently only supports homogeneous graphs.") + + node_set = next(iter(graph.node_sets.values())) + edge_set = next(iter(graph.edge_sets.values())) + num_nodes = node_set.num_nodes or 0 + num_edges = edge_set.num_edges() + + if num_nodes == 0: + return 0 + + c = _num_connected_components(edge_set.adjacency, num_nodes) + return num_edges - num_nodes + c + + +def is_anomalous( + graph: InMemoryGraph, + schema: GraphSchema, + expected_max_betti: int = 1, +) -> bool: + """Flags a graph whose first Betti number exceeds a threshold. + + Args: + graph: An InMemoryGraph. Must be homogeneous. + schema: The GraphSchema for the graph. + expected_max_betti: Maximum expected beta_1 for a benign graph. + + Returns: + True if the graph is topologically anomalous. + """ + betti_1 = calculate_betti_1(graph, schema) + return betti_1 > expected_max_betti + + +def _is_homogeneous_graph( + graph: InMemoryGraph, + schema: GraphSchema, +) -> bool: + """Returns True if the graph is homogeneous.""" + if ( + len(graph.node_sets) == 1 + and len(graph.edge_sets) == 1 + and len(schema.node_sets) == 1 + and len(schema.edge_sets) == 1 + ): + edge_set = next(iter(schema.edge_sets.values())) + node_set_name = next(iter(schema.node_sets.keys())) + if node_set_name == edge_set.source and node_set_name == edge_set.target: + return True + return False diff --git a/dgf/src/analyse/topology/betti_defense_test.py b/dgf/src/analyse/topology/betti_defense_test.py new file mode 100644 index 0000000..c31fc7d --- /dev/null +++ b/dgf/src/analyse/topology/betti_defense_test.py @@ -0,0 +1,139 @@ +# Copyright 2024 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for betti_defense.""" + +from absl.testing import absltest +import numpy as np +from dgf.src.analyse.topology import betti_defense +from dgf.src.data import in_memory_graph +from dgf.src.data import schema as schema_lib + + +class BettiDefenseTest(absltest.TestCase): + + def _make_homogeneous_graph( + self, + num_nodes: int, + edges: list[tuple[int, int]], + ) -> tuple[in_memory_graph.InMemoryGraph, schema_lib.GraphSchema]: + adjacency = np.array(edges, dtype=np.int32).T if edges else np.zeros( + (2, 0), dtype=np.int32) + graph = in_memory_graph.InMemoryGraph( + node_sets={ + "nodes": in_memory_graph.InMemoryNodeSet( + num_nodes=num_nodes, + features={}, + ) + }, + edge_sets={ + "edges": in_memory_graph.InMemoryEdgeSet( + adjacency=adjacency, + features={}, + ) + }, + ) + schema = schema_lib.GraphSchema( + node_sets={ + "nodes": schema_lib.NodeSchema(features={}), + }, + edge_sets={ + "edges": schema_lib.EdgeSchema( + source="nodes", + target="nodes", + features={}, + ), + }, + ) + return graph, schema + + def test_empty_graph(self): + graph, schema = self._make_homogeneous_graph(0, []) + self.assertEqual(betti_defense.calculate_betti_1(graph, schema), 0) + + def test_single_node(self): + graph, schema = self._make_homogeneous_graph(1, []) + self.assertEqual(betti_defense.calculate_betti_1(graph, schema), 0) + + def test_tree_path_4(self): + # 0-1-2-3 : 4 nodes, 3 edges, 1 component -> beta_1 = 0 + graph, schema = self._make_homogeneous_graph(4, [(0, 1), (1, 2), (2, 3)]) + self.assertEqual(betti_defense.calculate_betti_1(graph, schema), 0) + + def test_cycle_5(self): + # 0-1-2-3-4-0 : 5 nodes, 5 edges, 1 component -> beta_1 = 1 + graph, schema = self._make_homogeneous_graph( + 5, [(0, 1), (1, 2), (2, 3), (3, 4), (4, 0)]) + self.assertEqual(betti_defense.calculate_betti_1(graph, schema), 1) + + def test_two_triangles(self): + # Two disconnected triangles: 6 nodes, 6 edges, 2 components -> beta_1 = 2 + graph, schema = self._make_homogeneous_graph( + 6, + [(0, 1), (1, 2), (2, 0), (3, 4), (4, 5), (5, 3)], + ) + self.assertEqual(betti_defense.calculate_betti_1(graph, schema), 2) + + def test_complete_k4(self): + # K4: 4 nodes, 6 edges, 1 component -> beta_1 = 3 + graph, schema = self._make_homogeneous_graph( + 4, [(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)]) + self.assertEqual(betti_defense.calculate_betti_1(graph, schema), 3) + + def test_no_edges(self): + graph, schema = self._make_homogeneous_graph(3, []) + self.assertEqual(betti_defense.calculate_betti_1(graph, schema), 0) + + def test_is_anomalous(self): + clean, schema = self._make_homogeneous_graph(4, [(0, 1), (1, 2), (2, 3)]) + anomalous, _ = self._make_homogeneous_graph( + 5, [(0, 1), (1, 2), (2, 3), (3, 4), (4, 0)]) + self.assertFalse(betti_defense.is_anomalous(clean, schema, 0)) + self.assertTrue(betti_defense.is_anomalous(anomalous, schema, 0)) + self.assertFalse(betti_defense.is_anomalous(anomalous, schema, 1)) + + def test_self_loops(self): + # Self-loop adds an edge but doesn't change components. + # 2 nodes, 2 edges (one real, one self-loop), 1 component -> beta_1 = 1 + graph, schema = self._make_homogeneous_graph(2, [(0, 1), (1, 1)]) + self.assertEqual(betti_defense.calculate_betti_1(graph, schema), 1) + + def test_raises_on_heterogeneous(self): + graph = in_memory_graph.InMemoryGraph( + node_sets={ + "n1": in_memory_graph.InMemoryNodeSet(num_nodes=2, features={}), + "n2": in_memory_graph.InMemoryNodeSet(num_nodes=2, features={}), + }, + edge_sets={ + "e1": in_memory_graph.InMemoryEdgeSet( + adjacency=np.array([[0], [1]], dtype=np.int32), + features={}, + ) + }, + ) + schema = schema_lib.GraphSchema( + node_sets={ + "n1": schema_lib.NodeSchema(features={}), + "n2": schema_lib.NodeSchema(features={}), + }, + edge_sets={ + "e1": schema_lib.EdgeSchema( + source="n1", target="n2", features={}), + }, + ) + with self.assertRaises(ValueError): + betti_defense.calculate_betti_1(graph, schema) + + +if __name__ == "__main__": + absltest.main() diff --git a/dgf/src/analyse/topology/global_graph_topology.py b/dgf/src/analyse/topology/global_graph_topology.py index a51122b..1e6dc60 100644 --- a/dgf/src/analyse/topology/global_graph_topology.py +++ b/dgf/src/analyse/topology/global_graph_topology.py @@ -21,6 +21,7 @@ import dataclasses from typing import Dict, Optional +from dgf.src.analyse.topology import betti_defense from dgf.src.analyse.topology import node_degree from dgf.src.data import in_memory_graph from dgf.src.data import schema as schema_lib @@ -73,6 +74,7 @@ class GlobalGraphTopology: graph_diameter: Optional[float] = None homophily_ratio: Optional[float] = None degree_distribution: Optional[Dict[int, int]] = None + betti_1: Optional[int] = None ## No need to call post_init. Use update_graph_density() to auto-calculate ## derived statistics if not provided. @@ -121,14 +123,20 @@ def get_in_memory_graph_topology( degree_distribution = dict(zip(degree.tolist(), counts.tolist())) ## Connected Components - cc = np.array([]) - num_cc, cc_counts = np.unique(cc, return_counts=True) - largest_cc = np.max(cc_counts).item() + adj = next(iter(graph.edge_sets.values())).adjacency + num_nodes = next(iter(graph.node_sets.values())).num_nodes or 0 + cc_labels = betti_defense.connected_components(adj, num_nodes) + num_cc, cc_counts = np.unique(cc_labels, return_counts=True) + largest_cc = int(np.max(cc_counts)) + + ## Betti-1 + betti_1 = edge_set.num_edges() - num_nodes + int(num_cc.shape[0]) else: average_degree = None degree_distribution = None num_cc = None largest_cc = None + betti_1 = None num_connected_components = num_cc.shape[0] if num_cc is not None else None ggt = GlobalGraphTopology( @@ -138,6 +146,7 @@ def get_in_memory_graph_topology( num_connected_components=num_connected_components, largest_component_size=largest_cc, degree_distribution=degree_distribution, + betti_1=betti_1, ) ggt.update_graph_density() diff --git a/docs/superpowers/plans/2026-06-09-ci-gatekeeper.md b/docs/superpowers/plans/2026-06-09-ci-gatekeeper.md new file mode 100644 index 0000000..f62ff84 --- /dev/null +++ b/docs/superpowers/plans/2026-06-09-ci-gatekeeper.md @@ -0,0 +1,1510 @@ +# CI Gatekeeper Skill Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Create an OpenCode skill (`ci-gatekeeper`) that auto-detects CI requirements from project structure and warns about common CI-breaking issues before commit. + +**Architecture:** Hybrid approach using Static Analysis (AST parsing for structural issues) and Pattern Matching (regex/heuristics for style issues). Org-specific rulesets auto-loaded based on project markers. + +**Tech Stack:** Python, YAML, OpenCode Skill System + +--- + +## File Structure + +``` +~/.config/opencode/skills/ci-gatekeeper/ +├── SKILL.md # Main skill definition +├── rules/ +│ ├── common.yaml # Common rules (all orgs) +│ ├── google.yaml # Google/DeepMind-specific rules +│ ├── meta.yaml # Meta-specific rules +│ ├── nvidia.yaml # NVIDIA-specific rules +│ └── anthropic_openai.yaml # Anthropic/OpenAI rules +├── detectors/ +│ ├── org_detector.py # Auto-detects org from project structure +│ ├── build_file_parser.py # Parses BUILD/TARGETS/setup.py files +│ └── ast_analyzer.py # Python AST analysis for cross-module access +└── reporters/ + └── inline_reporter.py # Formats warnings for OpenCode display +``` + +--- + +### Task 1: Create Skill Directory and SKILL.md + +**Files:** +- Create: `~/.config/opencode/skills/ci-gatekeeper/SKILL.md` + +**Context:** This is the main skill definition that tells OpenCode when to activate the skill. + +- [ ] **Step 1: Create skill directory** + +```bash +mkdir -p ~/.config/opencode/skills/ci-gatekeeper/{rules,detectors,reporters} +``` + +- [ ] **Step 2: Write SKILL.md** + +```markdown +--- +name: ci-gatekeeper +description: "Auto-detects CI requirements and warns about common CI-breaking issues before commit. Catches cross-module private access, missing BUILD/test targets, copyright headers, import ordering, and copybara compatibility issues. Supports Google, Meta, NVIDIA, Anthropic, and OpenAI project patterns." +--- + +# CI Gatekeeper + +> **Version:** 1.0.0 | **Last Updated:** 2026-06-09 + +## When to Use + +This skill activates when working with: +- Bazel BUILD files (Google/Google DeepMind) +- Buck TARGETS files (Meta) +- CMakeLists.txt + CUDA files (NVIDIA) +- setup.py + Python projects (Anthropic/OpenAI) + +## Activation Triggers + +Auto-activates when editing: +- `.py` files +- `.bzl` files +- `.cc` / `.h` files +- `BUILD` / `BUILD.bazel` files +- `TARGETS` files +- `setup.py` / `pyproject.toml` files + +## What It Checks + +### Common Checks (All Orgs) +- Cross-module private access violations (ERROR) +- Missing test targets for `*_test.py` files (ERROR) +- Unused dependencies (WARN) +- Copyright header presence (WARN) +- Import ordering (INFO) + +### Google-Specific +- Bazel BUILD file completeness (ERROR) +- Copybara compatibility (WARN) +- Proto/GRPC patterns (WARN) + +### Meta-Specific +- Buck TARGETS completeness (ERROR) +- Internal import leak detection (ERROR) + +### Anthropic/OpenAI-Specific +- Python package completeness (WARN) +- PyTorch/JAX patterns (WARN) + +### NVIDIA-Specific +- CUDA build patterns (WARN) + +## Manual Invocation + +Run `check CI` or `run ci-gatekeeper` to manually trigger a full project scan. + +## Output Format + +``` +[CI Gatekeeper] ERROR: Cross-module private access detected at line 128 + -> `betti_defense._connected_components()` is called from `global_graph_topology.py` + -> Fix: Rename to `connected_components()` or make it public +``` + +## Severity Levels + +- **ERROR**: Will break CI. Must fix. +- **WARN**: Likely to cause issues. Should fix. +- **INFO**: Style suggestion. Optional. +``` + +- [ ] **Step 3: Verify skill structure** + +```bash +ls -la ~/.config/opencode/skills/ci-gatekeeper/ +``` + +Expected: `SKILL.md` exists alongside `rules/`, `detectors/`, `reporters/` directories. + +- [ ] **Step 4: Commit** + +```bash +git add -A +git commit -m "feat: create ci-gatekeeper skill structure and SKILL.md" +``` + +--- + +### Task 2: Create Org Detection Rules + +**Files:** +- Create: `~/.config/opencode/skills/ci-gatekeeper/rules/common.yaml` +- Create: `~/.config/opencode/skills/ci-gatekeeper/rules/google.yaml` +- Create: `~/.config/opencode/skills/ci-gatekeeper/rules/meta.yaml` +- Create: `~/.config/opencode/skills/ci-gatekeeper/rules/nvidia.yaml` +- Create: `~/.config/opencode/skills/ci-gatekeeper/rules/anthropic_openai.yaml` + +**Context:** YAML rulesets define the checks for each organization. Each rule has an ID, name, engine, severity, and description. + +- [ ] **Step 1: Write common.yaml** + +```yaml +# Common rules applied to all organizations +rules: + - id: cross_module_private_access + name: Cross-Module Private Access + engine: static_analysis + severity: ERROR + description: Detects module._private_func() or module._PrivateClass() calls across module boundaries + pattern: "([a-zA-Z_][a-zA-Z0-9_]*)\\._([a-zA-Z_][a-zA-Z0-9_]*)" + exclude_same_module: true + + - id: missing_test_targets + name: Missing Test Targets + engine: static_analysis + severity: ERROR + description: Detects *_test.py files without corresponding py_test or cc_test targets in BUILD/TARGETS + file_pattern: "*_test.py" + required_in_build: true + + - id: unused_dependencies + name: Unused Dependencies + engine: static_analysis + severity: WARN + description: Detects imports in source files not listed in BUILD file deps + + - id: copyright_header + name: Copyright Header + engine: pattern + severity: WARN + description: Checks all .py, .cc, .h files have standard copyright header + file_extensions: [".py", ".cc", ".h", ".cpp"] + required_pattern: "Copyright [0-9]{4}" + + - id: import_ordering + name: Import Ordering + engine: pattern + severity: INFO + description: Standard Python import ordering (stdlib -> third-party -> local) +``` + +- [ ] **Step 2: Write google.yaml** + +```yaml +# Google-specific rules (Bazel / Copybara) +org: google +markers: + - BUILD + - WORKSPACE + - MODULE.bazel + - .bazelrc + +rules: + - id: bazel_build_completeness + name: Bazel BUILD Completeness + engine: static_analysis + severity: ERROR + description: New .py files must be in py_library/py_binary; new .cc files in cc_library/cc_binary + file_extensions: [".py", ".cc", ".h", ".cpp"] + required_in_build: true + + - id: copybara_compatibility + name: Copybara Compatibility + engine: pattern + severity: WARN + description: Checks for __pycache__ in commits, absolute paths that won't survive copybara migration + forbidden_patterns: + - "__pycache__" + - "/tmp/" + - "/home/" + check_gitignore: true + + - id: proto_grpc_patterns + name: Proto/GRPC Patterns + engine: static_analysis + severity: WARN + description: Detects proto imports that might break in monorepo context + proto_import_pattern: "from (.*) import .*_pb2" +``` + +- [ ] **Step 3: Write meta.yaml** + +```yaml +# Meta-specific rules (Buck) +org: meta +markers: + - TARGETS + - .buckconfig + - BUCK + +rules: + - id: buck_targets_completeness + name: Buck TARGETS Completeness + engine: static_analysis + severity: ERROR + description: New files must be listed in appropriate Buck targets + file_extensions: [".py", ".cc", ".h", ".cpp"] + required_in_build: true + + - id: internal_import_leak + name: Internal Import Leak + engine: pattern + severity: ERROR + description: Detects fb or meta internal import paths that might leak to open source + forbidden_patterns: + - "from fb." + - "from meta." + - "import fb_" + - "import meta_" +``` + +- [ ] **Step 4: Write nvidia.yaml** + +```yaml +# NVIDIA-specific rules (CUDA) +org: nvidia +markers: + - CMakeLists.txt + - .cu + - .cuh + +rules: + - id: cuda_build_patterns + name: CUDA Build Patterns + engine: pattern + severity: WARN + description: Checks for nvcc flag consistency, CUDA version compatibility + file_extensions: [".cu", ".cuh", ".cpp"] + required_patterns: + - "cuda" + check_cmake: true +``` + +- [ ] **Step 5: Write anthropic_openai.yaml** + +```yaml +# Anthropic / OpenAI-specific rules +org: anthropic_openai +markers: + - setup.py + - pyproject.toml + - requirements.txt + +rules: + - id: python_package_completeness + name: Python Package Completeness + engine: static_analysis + severity: WARN + description: New .py files must be in setup.py or pyproject.toml packages + file_extensions: [".py"] + check_setup_py: true + check_pyproject_toml: true + + - id: pytorch_jax_patterns + name: PyTorch/JAX Patterns + engine: pattern + severity: WARN + description: Checks for common CUDA/device mismatches + patterns: + - "torch.cuda" + - "jax.device" + check_device_consistency: true +``` + +- [ ] **Step 6: Verify rules files** + +```bash +ls -la ~/.config/opencode/skills/ci-gatekeeper/rules/ +``` + +Expected: All 5 YAML files exist. + +- [ ] **Step 7: Commit** + +```bash +git add -A +git commit -m "feat: add org-specific rulesets for ci-gatekeeper" +``` + +--- + +### Task 3: Create Org Detector + +**Files:** +- Create: `~/.config/opencode/skills/ci-gatekeeper/detectors/org_detector.py` + +**Context:** Detects which organization/CI system a project uses by scanning for marker files. + +- [ ] **Step 1: Write org_detector.py** + +```python +#!/usr/bin/env python3 +"""OrgDetector: Auto-detects CI system from project structure.""" + +import os +from typing import List, Optional + +# Org detection markers +ORG_MARKERS = { + "google": ["BUILD", "WORKSPACE", "MODULE.bazel", ".bazelrc"], + "meta": ["TARGETS", ".buckconfig", "BUCK"], + "nvidia": ["CMakeLists.txt"], # plus .cu files checked separately + "anthropic_openai": ["setup.py", "pyproject.toml"], +} + + +def detect_org(project_root: str) -> str: + """Detect the organization/CI system from project structure. + + Args: + project_root: Path to the project root directory. + + Returns: + Organization key: 'google', 'meta', 'nvidia', 'anthropic_openai', or 'common'. + """ + files = os.listdir(project_root) + files_set = set(files) + + # Check for Google markers + if any(marker in files_set for marker in ORG_MARKERS["google"]): + return "google" + + # Check for Meta markers + if any(marker in files_set for marker in ORG_MARKERS["meta"]): + return "meta" + + # Check for NVIDIA markers (CMakeLists.txt + .cu files) + if "CMakeLists.txt" in files_set: + return "nvidia" + + # Check for .cu files in root + if any(f.endswith(".cu") for f in files): + return "nvidia" + + # Check for Anthropic/OpenAI markers + if "setup.py" in files_set and "requirements.txt" in files_set: + return "anthropic_openai" + + if "pyproject.toml" in files_set: + return "anthropic_openai" + + # Default to common rules + return "common" + + +def get_org_rules_path(org: str, skill_root: str) -> str: + """Get the path to the rules file for the detected org. + + Args: + org: Organization key. + skill_root: Path to the skill root directory. + + Returns: + Path to the YAML rules file. + """ + rules_dir = os.path.join(skill_root, "rules") + + if org == "common": + return os.path.join(rules_dir, "common.yaml") + + return os.path.join(rules_dir, f"{org}.yaml") + + +def load_org_rules(org: str, skill_root: str) -> dict: + """Load rules for the detected organization. + + Args: + org: Organization key. + skill_root: Path to the skill root directory. + + Returns: + Dictionary of rules. + """ + import yaml + + common_path = os.path.join(skill_root, "rules", "common.yaml") + org_path = get_org_rules_path(org, skill_root) + + rules = {"rules": []} + + # Load common rules + if os.path.exists(common_path): + with open(common_path, "r") as f: + common_rules = yaml.safe_load(f) + if common_rules and "rules" in common_rules: + rules["rules"].extend(common_rules["rules"]) + + # Load org-specific rules + if os.path.exists(org_path) and org != "common": + with open(org_path, "r") as f: + org_rules = yaml.safe_load(f) + if org_rules and "rules" in org_rules: + rules["rules"].extend(org_rules["rules"]) + + return rules + + +def main(): + """CLI entry point for testing.""" + import sys + + if len(sys.argv) < 2: + print("Usage: org_detector.py ") + sys.exit(1) + + project_root = sys.argv[1] + org = detect_org(project_root) + print(f"Detected org: {org}") + + +if __name__ == "__main__": + main() +``` + +- [ ] **Step 2: Test org_detector against current project** + +```bash +cd "C:\Users\seal\Desktop\New folder (22)\distributed_graph_flow" +python ~/.config/opencode/skills/ci-gatekeeper/detectors/org_detector.py . +``` + +Expected: `Detected org: google` (because of `BUILD`, `MODULE.bazel`, `.bazelrc` files). + +- [ ] **Step 3: Commit** + +```bash +git add -A +git commit -m "feat: add org_detector for ci-gatekeeper" +``` + +--- + +### Task 4: Create AST Analyzer (Cross-Module Private Access) + +**Files:** +- Create: `~/.config/opencode/skills/ci-gatekeeper/detectors/ast_analyzer.py` + +**Context:** Uses Python AST to detect cross-module private access violations. This is the core check that would have caught the `betti_defense._connected_components` issue. + +- [ ] **Step 1: Write ast_analyzer.py** + +```python +#!/usr/bin/env python3 +"""ASTAnalyzer: Detects cross-module private access violations.""" + +import ast +import os +import sys +from pathlib import Path +from typing import List, Dict, Optional + + +class CrossModuleAccessViolation: + """Represents a cross-module private access violation.""" + + def __init__(self, file_path: str, line: int, module: str, name: str, + current_module: str): + self.file_path = file_path + self.line = line + self.module = module + self.name = name + self.current_module = current_module + + def __str__(self): + return (f"[CI Gatekeeper] ERROR: Cross-module private access detected at line {self.line}\n" + f" -> `{self.module}.{self.name}()` is called from `{self.current_module}`\n" + f" -> Fix: Rename to `{self.name.lstrip('_')}` or make it public") + + +class CrossModuleAccessVisitor(ast.NodeVisitor): + """AST visitor that detects cross-module private access.""" + + def __init__(self, current_module: str, file_path: str): + self.current_module = current_module + self.file_path = file_path + self.violations = [] + + def visit_Attribute(self, node: ast.Attribute): + """Visit attribute access (e.g., module._private_func).""" + if isinstance(node.value, ast.Name): + module_name = node.value.id + attr_name = node.attr + + # Check if the attribute is private (starts with _) + if attr_name.startswith("_") and not attr_name.startswith("__"): + # Check if it's a different module + if module_name != self.current_module and module_name not in ["self", "cls"]: + self.violations.append( + CrossModuleAccessViolation( + file_path=self.file_path, + line=node.lineno, + module=module_name, + name=attr_name, + current_module=self.current_module + ) + ) + + self.generic_visit(node) + + +def get_module_name(file_path: str, project_root: str) -> str: + """Get the module name from a file path relative to project root. + + Args: + file_path: Absolute path to the Python file. + project_root: Absolute path to the project root. + + Returns: + Module name (e.g., 'dgf.src.analyse.topology.betti_defense'). + """ + rel_path = os.path.relpath(file_path, project_root) + + # Remove .py extension + if rel_path.endswith(".py"): + rel_path = rel_path[:-3] + + # Replace path separators with dots + module_name = rel_path.replace(os.sep, ".") + + # Handle __init__.py + if module_name.endswith(".__init__"): + module_name = module_name[:-9] + + return module_name + + +def analyze_file(file_path: str, project_root: str) -> List[CrossModuleAccessViolation]: + """Analyze a Python file for cross-module private access violations. + + Args: + file_path: Path to the Python file. + project_root: Path to the project root. + + Returns: + List of violations found. + """ + try: + with open(file_path, "r", encoding="utf-8") as f: + source = f.read() + except Exception as e: + print(f"Warning: Could not read {file_path}: {e}") + return [] + + try: + tree = ast.parse(source) + except SyntaxError as e: + print(f"Warning: Syntax error in {file_path}: {e}") + return [] + + current_module = get_module_name(file_path, project_root) + visitor = CrossModuleAccessVisitor(current_module, file_path) + visitor.visit(tree) + + return visitor.violations + + +def analyze_project(project_root: str, file_paths: Optional[List[str]] = None) -> List[CrossModuleAccessViolation]: + """Analyze files in a project for cross-module private access. + + Args: + project_root: Path to the project root. + file_paths: Optional list of specific files to analyze. If None, analyzes all .py files. + + Returns: + List of all violations found. + """ + all_violations = [] + + if file_paths is None: + # Find all .py files + file_paths = [] + for root, dirs, files in os.walk(project_root): + # Skip hidden directories and common non-source dirs + dirs[:] = [d for d in dirs if not d.startswith(".") and d not in ["__pycache__", "node_modules", "venv", ".git"]] + for file in files: + if file.endswith(".py"): + file_paths.append(os.path.join(root, file)) + + for file_path in file_paths: + violations = analyze_file(file_path, project_root) + all_violations.extend(violations) + + return all_violations + + +def main(): + """CLI entry point for testing.""" + if len(sys.argv) < 2: + print("Usage: ast_analyzer.py [file1.py file2.py ...]") + sys.exit(1) + + project_root = sys.argv[1] + file_paths = sys.argv[2:] if len(sys.argv) > 2 else None + + violations = analyze_project(project_root, file_paths) + + if violations: + print(f"\nFound {len(violations)} violation(s):\n") + for v in violations: + print(v) + print() + else: + print("No cross-module private access violations found.") + + +if __name__ == "__main__": + main() +``` + +- [ ] **Step 2: Test ast_analyzer against the actual copybara issue** + +```bash +cd "C:\Users\seal\Desktop\New folder (22)\distributed_graph_flow" +python ~/.config/opencode/skills/ci-gatekeeper/detectors/ast_analyzer.py . dgf/src/analyse/topology/global_graph_topology.py +``` + +Expected: Should detect the old `betti_defense._connected_components` violation (but since we already fixed it, it should find no violations). + +- [ ] **Step 3: Test ast_analyzer on the fixed code** + +```bash +cd "C:\Users\seal\Desktop\New folder (22)\distributed_graph_flow" +python ~/.config/opencode/skills/ci-gatekeeper/detectors/ast_analyzer.py . dgf/src/analyse/topology/betti_defense.py dgf/src/analyse/topology/global_graph_topology.py +``` + +Expected: `No cross-module private access violations found.` + +- [ ] **Step 4: Create a test file to verify it catches violations** + +```bash +cat > /tmp/test_cross_module.py << 'EOF' +# Test file that should trigger a violation +import some_module + +# This should be caught: accessing private function from another module +result = some_module._private_function() + +# This should NOT be caught: self._private is internal +class MyClass: + def method(self): + self._private_method() +EOF + +python ~/.config/opencode/skills/ci-gatekeeper/detectors/ast_analyzer.py /tmp /tmp/test_cross_module.py +``` + +Expected: Should show a violation for `some_module._private_function`. + +- [ ] **Step 5: Commit** + +```bash +git add -A +git commit -m "feat: add AST analyzer for cross-module private access detection" +``` + +--- + +### Task 5: Create Build File Parser + +**Files:** +- Create: `~/.config/opencode/skills/ci-gatekeeper/detectors/build_file_parser.py` + +**Context:** Parses Bazel BUILD files and Buck TARGETS files to detect missing test targets and source files not listed in build rules. + +- [ ] **Step 1: Write build_file_parser.py** + +```python +#!/usr/bin/env python3 +"""BuildFileParser: Detects missing targets in BUILD/TARGETS files.""" + +import os +import re +from pathlib import Path +from typing import List, Dict, Optional, Tuple + + +class MissingTargetViolation: + """Represents a missing test target violation.""" + + def __init__(self, test_file: str, build_file: str, rule_type: str = "py_test"): + self.test_file = test_file + self.build_file = build_file + self.rule_type = rule_type + + def __str__(self): + return (f"[CI Gatekeeper] ERROR: Missing test target for {self.test_file}\n" + f" -> No {self.rule_type} target found in {self.build_file}\n" + f" -> Fix: Add {self.rule_type}(name=..., srcs=[\"{os.path.basename(self.test_file)}\"], ...)") + + +class MissingBuildEntryViolation: + """Represents a source file not listed in any BUILD target.""" + + def __init__(self, source_file: str, build_file: str): + self.source_file = source_file + self.build_file = build_file + + def __str__(self): + return (f"[CI Gatekeeper] ERROR: Source file not in BUILD target: {self.source_file}\n" + f" -> File is not listed in any py_library/cc_library target in {self.build_file}\n" + f" -> Fix: Add the file to an appropriate target's srcs") + + +def parse_bazel_build(build_file_path: str) -> List[Dict]: + """Parse a Bazel BUILD file to extract rule definitions. + + Args: + build_file_path: Path to the BUILD file. + + Returns: + List of rules, each with 'type', 'name', and 'srcs'. + """ + rules = [] + + try: + with open(build_file_path, "r", encoding="utf-8") as f: + content = f.read() + except Exception as e: + print(f"Warning: Could not read {build_file_path}: {e}") + return rules + + # Simple regex-based parsing for py_test, py_library, cc_test, cc_library + # This is a simplified parser - full Bazel parsing would require a Starlark parser + + rule_pattern = re.compile( + r'(py_test|py_library|py_binary|cc_test|cc_library|cc_binary)\s*\(\s*' + r'name\s*=\s*"([^"]+)".*?' + r'srcs\s*=\s*\[(.*?)\]', + re.DOTALL + ) + + for match in rule_pattern.finditer(content): + rule_type = match.group(1) + rule_name = match.group(2) + srcs_str = match.group(3) + + # Extract srcs list + srcs = re.findall(r'"([^"]+)"', srcs_str) + + rules.append({ + "type": rule_type, + "name": rule_name, + "srcs": srcs + }) + + return rules + + +def find_build_file_for_dir(directory: str) -> Optional[str]: + """Find the BUILD file for a given directory. + + Args: + directory: Path to the directory. + + Returns: + Path to the BUILD file, or None if not found. + """ + build_files = ["BUILD.bazel", "BUILD"] + + for build_file in build_files: + build_path = os.path.join(directory, build_file) + if os.path.exists(build_path): + return build_path + + return None + + +def check_test_targets(directory: str) -> List[MissingTargetViolation]: + """Check for *_test.py files without corresponding test targets. + + Args: + directory: Path to the directory to check. + + Returns: + List of violations. + """ + violations = [] + + build_file = find_build_file_for_dir(directory) + if not build_file: + return violations + + rules = parse_bazel_build(build_file) + + # Find all test files + test_files = [f for f in os.listdir(directory) if f.endswith("_test.py")] + + # Get all srcs from test rules + test_srcs = set() + for rule in rules: + if rule["type"] in ["py_test", "cc_test"]: + test_srcs.update(rule["srcs"]) + + # Check for missing test files + for test_file in test_files: + if test_file not in test_srcs: + violations.append(MissingTargetViolation(test_file, build_file)) + + return violations + + +def check_source_in_build(directory: str) -> List[MissingBuildEntryViolation]: + """Check for source files not listed in any BUILD target. + + Args: + directory: Path to the directory to check. + + Returns: + List of violations. + """ + violations = [] + + build_file = find_build_file_for_dir(directory) + if not build_file: + return violations + + rules = parse_bazel_build(build_file) + + # Get all srcs from all rules + all_srcs = set() + for rule in rules: + all_srcs.update(rule["srcs"]) + + # Find all source files + source_files = [f for f in os.listdir(directory) if f.endswith(".py") and not f.endswith("_test.py")] + + # Check for missing source files + for source_file in source_files: + if source_file not in all_srcs: + violations.append(MissingBuildEntryViolation(source_file, build_file)) + + return violations + + +def check_project_build_files(project_root: str) -> Tuple[List[MissingTargetViolation], List[MissingBuildEntryViolation]]: + """Check all directories in the project for BUILD file issues. + + Args: + project_root: Path to the project root. + + Returns: + Tuple of (test_violations, source_violations). + """ + test_violations = [] + source_violations = [] + + for root, dirs, files in os.walk(project_root): + # Skip hidden directories and common non-source dirs + dirs[:] = [d for d in dirs if not d.startswith(".") and d not in ["__pycache__", "node_modules", "venv", ".git"]] + + # Check if this directory has a BUILD file + build_file = find_build_file_for_dir(root) + if build_file: + test_violations.extend(check_test_targets(root)) + source_violations.extend(check_source_in_build(root)) + + return test_violations, source_violations + + +def main(): + """CLI entry point for testing.""" + import sys + + if len(sys.argv) < 2: + print("Usage: build_file_parser.py ") + sys.exit(1) + + project_root = sys.argv[1] + test_violations, source_violations = check_project_build_files(project_root) + + if test_violations: + print(f"\nFound {len(test_violations)} missing test target(s):\n") + for v in test_violations: + print(v) + print() + + if source_violations: + print(f"\nFound {len(source_violations)} missing source file(s) in BUILD:\n") + for v in source_violations: + print(v) + print() + + if not test_violations and not source_violations: + print("All files are properly listed in BUILD targets.") + + +if __name__ == "__main__": + main() +``` + +- [ ] **Step 2: Test build_file_parser against current project** + +```bash +cd "C:\Users\seal\Desktop\New folder (22)\distributed_graph_flow" +python ~/.config/opencode/skills/ci-gatekeeper/detectors/build_file_parser.py . +``` + +Expected: Should show that `betti_defense_test.py` was previously missing but now is in the BUILD file (since we already fixed it in the PR). If it shows no violations, that's correct. + +- [ ] **Step 3: Commit** + +```bash +git add -A +git commit -m "feat: add build_file_parser for missing target detection" +``` + +--- + +### Task 6: Create Inline Reporter + +**Files:** +- Create: `~/.config/opencode/skills/ci-gatekeeper/reporters/inline_reporter.py` + +**Context:** Formats violations for inline display in OpenCode. + +- [ ] **Step 1: Write inline_reporter.py** + +```python +#!/usr/bin/env python3 +"""InlineReporter: Formats CI Gatekeeper warnings for OpenCode display.""" + +from typing import List, Dict, Any + + +class InlineReporter: + """Reporter that formats findings for inline display.""" + + SEVERITY_ICONS = { + "ERROR": "🔴", + "WARN": "🟡", + "INFO": "🔵" + } + + def __init__(self, prefix: str = "[CI Gatekeeper]"): + self.prefix = prefix + + def report(self, violation: Any) -> str: + """Format a single violation. + + Args: + violation: A violation object with __str__ method. + + Returns: + Formatted string. + """ + return str(violation) + + def report_all(self, violations: List[Any]) -> str: + """Format all violations as a single report. + + Args: + violations: List of violation objects. + + Returns: + Formatted string with all violations. + """ + if not violations: + return f"{self.prefix} ✅ No CI issues detected." + + lines = [f"{self.prefix} Found {len(violations)} issue(s):\n"] + + for v in violations: + lines.append(str(v)) + lines.append("") + + return "\n".join(lines) + + def report_summary(self, violations: List[Any]) -> Dict[str, int]: + """Return a summary of violations by severity. + + Args: + violations: List of violation objects. + + Returns: + Dictionary with counts per severity. + """ + summary = {"ERROR": 0, "WARN": 0, "INFO": 0} + + for v in violations: + v_str = str(v) + if "ERROR" in v_str: + summary["ERROR"] += 1 + elif "WARN" in v_str: + summary["WARN"] += 1 + elif "INFO" in v_str: + summary["INFO"] += 1 + + return summary + + +def main(): + """CLI entry point for testing.""" + # Test with dummy violations + class DummyViolation: + def __str__(self): + return "[CI Gatekeeper] ERROR: Test violation" + + reporter = InlineReporter() + violations = [DummyViolation(), DummyViolation()] + + print(reporter.report_all(violations)) + print("\nSummary:", reporter.report_summary(violations)) + + +if __name__ == "__main__": + main() +``` + +- [ ] **Step 2: Test inline_reporter** + +```bash +python ~/.config/opencode/skills/ci-gatekeeper/reporters/inline_reporter.py +``` + +Expected: Should show formatted output with test violations. + +- [ ] **Step 3: Commit** + +```bash +git add -A +git commit -m "feat: add inline_reporter for formatted CI warnings" +``` + +--- + +### Task 7: Create Integration Script (Main Entry Point) + +**Files:** +- Create: `~/.config/opencode/skills/ci-gatekeeper/run.py` + +**Context:** Main entry point that ties all components together. This is what gets executed when the skill runs. + +- [ ] **Step 1: Write run.py** + +```python +#!/usr/bin/env python3 +"""run.py: Main entry point for CI Gatekeeper skill. + +Usage: + python run.py [file1.py file2.py ...] +""" + +import sys +import os + +# Add skill directory to path +SKILL_ROOT = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, SKILL_ROOT) + +from detectors.org_detector import detect_org, load_org_rules +from detectors.ast_analyzer import analyze_project +from detectors.build_file_parser import check_project_build_files +from reporters.inline_reporter import InlineReporter + + +def main(): + if len(sys.argv) < 2: + print("Usage: run.py [file1.py file2.py ...]") + sys.exit(1) + + project_root = sys.argv[1] + file_paths = sys.argv[2:] if len(sys.argv) > 2 else None + + # Detect org + org = detect_org(project_root) + print(f"{SKILL_ROOT} Detected org: {org}\n") + + # Load rules + rules = load_org_rules(org, SKILL_ROOT) + print(f"{SKILL_ROOT} Loaded {len(rules.get('rules', []))} rules\n") + + # Run checks + all_violations = [] + + # 1. Cross-module private access + if any(r["id"] == "cross_module_private_access" for r in rules.get("rules", [])): + violations = analyze_project(project_root, file_paths) + all_violations.extend(violations) + + # 2. BUILD file completeness + if any(r["id"] in ["missing_test_targets", "bazel_build_completeness"] for r in rules.get("rules", [])): + test_violations, source_violations = check_project_build_files(project_root) + all_violations.extend(test_violations) + all_violations.extend(source_violations) + + # Report + reporter = InlineReporter() + print(reporter.report_all(all_violations)) + + # Exit with error code if violations found + if all_violations: + sys.exit(1) + else: + print(f"\n{SKILL_ROOT} ✅ All checks passed!") + sys.exit(0) + + +if __name__ == "__main__": + main() +``` + +- [ ] **Step 2: Test run.py against current project** + +```bash +cd "C:\Users\seal\Desktop\New folder (22)\distributed_graph_flow" +python ~/.config/opencode/skills/ci-gatekeeper/run.py . +``` + +Expected: Should show detected org as `google`, load rules, and report no violations (since we already fixed the issues). + +- [ ] **Step 3: Test run.py with specific files** + +```bash +cd "C:\Users\seal\Desktop\New folder (22)\distributed_graph_flow" +python ~/.config/opencode/skills/ci-gatekeeper/run.py . dgf/src/analyse/topology/betti_defense.py dgf/src/analyse/topology/global_graph_topology.py +``` + +Expected: Should pass all checks. + +- [ ] **Step 4: Commit** + +```bash +git add -A +git commit -m "feat: add main run.py integration script" +``` + +--- + +### Task 8: Create Regression Test + +**Files:** +- Create: `~/.config/opencode/skills/ci-gatekeeper/tests/test_regression.py` + +**Context:** Ensures the exact copybara issue we fixed (`betti_defense._connected_components`) would be caught by the skill. + +- [ ] **Step 1: Create test directory** + +```bash +mkdir -p ~/.config/opencode/skills/ci-gatekeeper/tests +``` + +- [ ] **Step 2: Write test_regression.py** + +```python +#!/usr/bin/env python3 +"""Regression tests for CI Gatekeeper. + +Ensures the exact copybara cross-module issue we fixed would be caught. +""" + +import os +import sys +import tempfile +import unittest + +# Add skill root to path +SKILL_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.insert(0, SKILL_ROOT) + +from detectors.ast_analyzer import analyze_project, analyze_file +from detectors.org_detector import detect_org + + +class TestCrossModuleRegression(unittest.TestCase): + """Test that the exact copybara issue would be caught.""" + + def test_cross_module_private_access_detected(self): + """Reproduce the exact issue: betti_defense._connected_components() called from global_graph_topology.py""" + + with tempfile.TemporaryDirectory() as tmpdir: + # Create the module structure + topology_dir = os.path.join(tmpdir, "topology") + os.makedirs(topology_dir) + + # Create betti_defense.py with private function + with open(os.path.join(topology_dir, "betti_defense.py"), "w") as f: + f.write(""" +def _connected_components(adjacency, num_nodes): + return [] + +""") + + # Create global_graph_topology.py that calls the private function + with open(os.path.join(topology_dir, "global_graph_topology.py"), "w") as f: + f.write(""" +import betti_defense + +# This is the exact bug we fixed +c = betti_defense._connected_components(adj, num_nodes) + +""") + + # Analyze + violations = analyze_project(tmpdir) + + # Should detect exactly 1 violation + self.assertEqual(len(violations), 1) + + # Check details + v = violations[0] + self.assertEqual(v.module, "betti_defense") + self.assertEqual(v.name, "_connected_components") + self.assertEqual(v.current_module, "topology.global_graph_topology") + + def test_fixed_code_passes(self): + """After fixing (renaming to connected_components), no violations should be found.""" + + with tempfile.TemporaryDirectory() as tmpdir: + topology_dir = os.path.join(tmpdir, "topology") + os.makedirs(topology_dir) + + # Create betti_defense.py with public function + with open(os.path.join(topology_dir, "betti_defense.py"), "w") as f: + f.write(""" +def connected_components(adjacency, num_nodes): + return [] + +""") + + # Create global_graph_topology.py that calls the public function + with open(os.path.join(topology_dir, "global_graph_topology.py"), "w") as f: + f.write(""" +import betti_defense + +# Fixed: using public function +c = betti_defense.connected_components(adj, num_nodes) + +""") + + # Analyze + violations = analyze_project(tmpdir) + + # Should find no violations + self.assertEqual(len(violations), 0) + + +class TestOrgDetection(unittest.TestCase): + """Test org detection from project structure.""" + + def test_google_detection(self): + with tempfile.TemporaryDirectory() as tmpdir: + # Create Google markers + with open(os.path.join(tmpdir, "BUILD"), "w") as f: + f.write("# Empty BUILD file") + with open(os.path.join(tmpdir, "MODULE.bazel"), "w") as f: + f.write("# Empty MODULE.bazel") + + org = detect_org(tmpdir) + self.assertEqual(org, "google") + + def test_meta_detection(self): + with tempfile.TemporaryDirectory() as tmpdir: + with open(os.path.join(tmpdir, "TARGETS"), "w") as f: + f.write("# Empty TARGETS file") + + org = detect_org(tmpdir) + self.assertEqual(org, "meta") + + def test_anthropic_openai_detection(self): + with tempfile.TemporaryDirectory() as tmpdir: + with open(os.path.join(tmpdir, "setup.py"), "w") as f: + f.write("# setup.py") + with open(os.path.join(tmpdir, "requirements.txt"), "w") as f: + f.write("# requirements") + + org = detect_org(tmpdir) + self.assertEqual(org, "anthropic_openai") + + +if __name__ == "__main__": + unittest.main() +``` + +- [ ] **Step 3: Run regression tests** + +```bash +python ~/.config/opencode/skills/ci-gatekeeper/tests/test_regression.py -v +``` + +Expected: All 5 tests should pass. + +- [ ] **Step 4: Commit** + +```bash +git add -A +git commit -m "test: add regression tests for copybara cross-module issue" +``` + +--- + +### Task 9: Final Integration and Validation + +**Files:** +- Modify: `~/.config/opencode/skills/ci-gatekeeper/SKILL.md` (add activation triggers) + +**Context:** Final integration to ensure the skill works end-to-end. + +- [ ] **Step 1: Update SKILL.md with activation triggers** + +```markdown +--- +name: ci-gatekeeper +description: "Auto-detects CI requirements and warns about common CI-breaking issues before commit. Catches cross-module private access, missing BUILD/test targets, copyright headers, import ordering, and copybara compatibility issues. Supports Google, Meta, NVIDIA, Anthropic, and OpenAI project patterns." +--- + +# CI Gatekeeper + +> **Version:** 1.0.0 | **Last Updated:** 2026-06-09 + +## When to Use + +This skill activates when working with: +- Bazel BUILD files (Google/Google DeepMind) +- Buck TARGETS files (Meta) +- CMakeLists.txt + CUDA files (NVIDIA) +- setup.py + Python projects (Anthropic/OpenAI) + +## Activation Triggers + +Auto-activates when editing: +- `.py` files +- `.bzl` files +- `.cc` / `.h` files +- `BUILD` / `BUILD.bazel` files +- `TARGETS` files +- `setup.py` / `pyproject.toml` files + +## What It Checks + +### Common Checks (All Orgs) +- Cross-module private access violations (ERROR) +- Missing test targets for `*_test.py` files (ERROR) +- Unused dependencies (WARN) +- Copyright header presence (WARN) +- Import ordering (INFO) + +### Google-Specific +- Bazel BUILD file completeness (ERROR) +- Copybara compatibility (WARN) +- Proto/GRPC patterns (WARN) + +### Meta-Specific +- Buck TARGETS completeness (ERROR) +- Internal import leak detection (ERROR) + +### Anthropic/OpenAI-Specific +- Python package completeness (WARN) +- PyTorch/JAX patterns (WARN) + +### NVIDIA-Specific +- CUDA build patterns (WARN) + +## Manual Invocation + +Run `check CI` or `run ci-gatekeeper` to manually trigger a full project scan. + +## Output Format + +``` +[CI Gatekeeper] ERROR: Cross-module private access detected at line 128 + -> `betti_defense._connected_components()` is called from `global_graph_topology.py` + -> Fix: Rename to `connected_components()` or make it public +``` + +## Severity Levels + +- **ERROR**: Will break CI. Must fix. +- **WARN**: Likely to cause issues. Should fix. +- **INFO**: Style suggestion. Optional. + +## Implementation + +The skill is implemented in: +- `detectors/org_detector.py` - Auto-detects org +- `detectors/ast_analyzer.py` - Cross-module private access detection +- `detectors/build_file_parser.py` - BUILD/TARGETS completeness +- `reporters/inline_reporter.py` - Formatted output +- `run.py` - Main entry point + +## Running Locally + +```bash +cd +python ~/.config/opencode/skills/ci-gatekeeper/run.py . +``` +``` + +- [ ] **Step 2: Final end-to-end test** + +```bash +cd "C:\Users\seal\Desktop\New folder (22)\distributed_graph_flow" +python ~/.config/opencode/skills/ci-gatekeeper/run.py . +``` + +Expected: Should pass all checks (since we already fixed the cross-module issue and added the missing test target). + +- [ ] **Step 3: Test on a subdirectory** + +```bash +cd "C:\Users\seal\Desktop\New folder (22)\distributed_graph_flow" +python ~/.config/opencode/skills/ci-gatekeeper/run.py . dgf/src/analyse/topology/ +``` + +Expected: Should show the detected org and loaded rules, then pass all checks. + +- [ ] **Step 4: Commit final changes** + +```bash +git add -A +git commit -m "feat: complete ci-gatekeeper skill with documentation and integration" +``` + +--- + +## Self-Review + +### Spec Coverage Check + +| Spec Requirement | Plan Task | +|------------------|-----------| +| Auto-detect org from project structure | Task 3: org_detector.py | +| Cross-module private access (ERROR) | Task 4: ast_analyzer.py | +| Missing test targets (ERROR) | Task 5: build_file_parser.py | +| Unused dependencies (WARN) | Task 5: build_file_parser.py | +| Copyright header (WARN) | Task 2: rules/common.yaml | +| Import ordering (INFO) | Task 2: rules/common.yaml | +| Bazel BUILD completeness (Google) | Task 2: rules/google.yaml + Task 5 | +| Copybara compatibility (Google) | Task 2: rules/google.yaml | +| Proto/GRPC patterns (Google) | Task 2: rules/google.yaml | +| Buck TARGETS completeness (Meta) | Task 2: rules/meta.yaml + Task 5 | +| Internal import leak (Meta) | Task 2: rules/meta.yaml | +| Python package completeness (Anthropic/OpenAI) | Task 2: rules/anthropic_openai.yaml | +| PyTorch/JAX patterns (Anthropic/OpenAI) | Task 2: rules/anthropic_openai.yaml | +| CUDA build patterns (NVIDIA) | Task 2: rules/nvidia.yaml | +| Inline reporter with severity | Task 6: inline_reporter.py | +| Manual invocation (`check CI`) | Task 7: run.py + Task 9: SKILL.md | +| Regression test for copybara issue | Task 8: test_regression.py | + +### Placeholder Scan +- No TBDs, TODOs, or incomplete sections ✅ +- All steps have complete code ✅ +- All commands have expected output ✅ + +### Type Consistency +- `org_detector.py` returns `str` consistently ✅ +- `ast_analyzer.py` uses `CrossModuleAccessViolation` consistently ✅ +- `build_file_parser.py` uses `MissingTargetViolation` and `MissingBuildEntryViolation` consistently ✅ + +--- + +## Execution Handoff + +**Plan complete and saved to `docs/superpowers/plans/2026-06-09-ci-gatekeeper.md`.** + +**Two execution options:** + +**1. Subagent-Driven (recommended)** - I dispatch a fresh subagent per task, review between tasks, fast iteration + +**2. Inline Execution** - Execute tasks in this session using executing-plans, batch execution with checkpoints + +**Which approach?** diff --git a/docs/superpowers/specs/2026-06-09-ci-gatekeeper-design.md b/docs/superpowers/specs/2026-06-09-ci-gatekeeper-design.md new file mode 100644 index 0000000..e12ecdf --- /dev/null +++ b/docs/superpowers/specs/2026-06-09-ci-gatekeeper-design.md @@ -0,0 +1,196 @@ +# CI Gatekeeper Skill — Design Document + +**Date:** 2026-06-09 +**Status:** Approved +**Author:** OpenCode +**Skill Name:** `ci-gatekeeper` +**Skill Path:** `~/.config/opencode/skills/ci-gatekeeper/` + +--- + +## 1. Purpose + +The **CI Gatekeeper** skill is an OpenCode skill that auto-detects a project's CI requirements and warns about common CI-breaking issues **before** the user commits. It prevents failures like: +- Cross-module private access violations (e.g., `module._private_func()`) +- Missing BUILD/TARGETS/test targets for new files +- Copyright header omissions +- Import ordering/style violations +- Copybara compatibility issues + +The skill auto-detects the organization (Google, Meta, NVIDIA, Anthropic/OpenAI) from project structure and applies the appropriate ruleset. + +--- + +## 2. Architecture + +### 2.1 High-Level Flow + +``` +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ File Edit │────▶│ OrgDetector │────▶│ RuleEngine │ +│ (Save/Edit) │ │ │ │ │ +└─────────────────┘ └─────────────────┘ └─────────────────┘ + │ + ┌─────────────────────────┼─────────────────────────┐ + │ │ │ + ▼ ▼ ▼ + ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ + │ StaticAnalyzer │ │ PatternMatcher │ │ Reporter │ + │ (AST/Regex) │ │ (Regex/Heur) │ │ (Inline Output) │ + └─────────────────┘ └─────────────────┘ └─────────────────┘ +``` + +### 2.2 Components + +| Component | Purpose | Implementation | +|-----------|---------|----------------| +| **OrgDetector** | Auto-detect org from project structure | Scans root for `BUILD`/`WORKSPACE`/`MODULE.bazel` (Google), `TARGETS`/`.buckconfig` (Meta), `setup.py` + `src/` (Anthropic/OpenAI), `CMakeLists.txt` + `.cu` (NVIDIA) | +| **RuleEngine** | Loads and applies rules | YAML-based rulesets per org + common rules | +| **StaticAnalyzer** | Structural analysis | Python `ast` module for cross-module access, BUILD file parsing for target completeness | +| **PatternMatcher** | Style/formatting checks | Regex for copyright headers, import ordering, trailing whitespace | +| **Reporter** | Inline warnings | Formats `[CI Gatekeeper] SEVERITY: message` with context and fix suggestions | + +--- + +## 3. Rules & Checks + +### 3.1 Common Rules (All Orgs) + +| # | Rule | Engine | Severity | Description | +|---|------|--------|----------|-------------| +| 1 | Cross-Module Private Access | Static Analysis | **ERROR** | Detects `module._private_func()` or `module._PrivateClass()` calls across module boundaries | +| 2 | Missing Test Targets | Static Analysis | **ERROR** | Detects `*_test.py` files without corresponding `py_test`/`cc_test` targets in BUILD/TARGETS | +| 3 | Unused Dependencies | Static Analysis | **WARN** | Detects imports in source files not listed in BUILD file `deps` | +| 4 | Copyright Header | Pattern | **WARN** | Checks all `.py`, `.cc`, `.h` files have the org's standard copyright header | +| 5 | Import Ordering | Pattern | **INFO** | Standard Python import ordering (stdlib → third-party → local) | + +### 3.2 Google-Specific Rules + +| # | Rule | Engine | Severity | Description | +|---|------|--------|----------|-------------| +| 6 | Bazel BUILD Completeness | Static Analysis | **ERROR** | New `.py` files must be in `py_library`/`py_binary`; new `.cc` files in `cc_library`/`cc_binary` | +| 7 | Copybara Compatibility | Pattern | **WARN** | Checks for `__pycache__` in commits, absolute paths that won't survive copybara migration | +| 8 | Proto/GRPC Patterns | Static Analysis | **WARN** | Detects proto imports that might break in monorepo context | + +### 3.3 Meta-Specific Rules + +| # | Rule | Engine | Severity | Description | +|---|------|--------|----------|-------------| +| 9 | Buck TARGETS Completeness | Static Analysis | **ERROR** | New files must be listed in appropriate Buck targets | +| 10 | Internal Import Leak | Pattern | **ERROR** | Detects `fb` or `meta` internal import paths that might leak to open source | + +### 3.4 Anthropic/OpenAI-Specific Rules + +| # | Rule | Engine | Severity | Description | +|---|------|--------|----------|-------------| +| 11 | Python Package Completeness | Static Analysis | **WARN** | New `.py` files must be in `setup.py` or `pyproject.toml` packages | +| 12 | PyTorch/JAX Patterns | Pattern | **WARN** | Checks for common CUDA/device mismatches | + +### 3.5 NVIDIA-Specific Rules + +| # | Rule | Engine | Severity | Description | +|---|------|--------|----------|-------------| +| 13 | CUDA Build Patterns | Pattern | **WARN** | Checks for nvcc flag consistency, CUDA version compatibility | + +--- + +## 4. File Structure + +``` +~/.config/opencode/skills/ci-gatekeeper/ +├── SKILL.md # Main skill definition +├── rules/ +│ ├── common.yaml # Common rules (all orgs) +│ ├── google.yaml # Google/DeepMind-specific rules +│ ├── meta.yaml # Meta-specific rules +│ ├── nvidia.yaml # NVIDIA-specific rules +│ └── anthropic_openai.yaml # Anthropic/OpenAI rules +├── detectors/ +│ ├── org_detector.py # Auto-detects org from project structure +│ ├── build_file_parser.py # Parses BUILD/TARGETS/setup.py files +│ └── ast_analyzer.py # Python AST analysis for cross-module access +└── reporters/ + └── inline_reporter.py # Formats warnings for OpenCode display +``` + +--- + +## 5. Org Detection Logic + +```python +def detect_org(project_root): + files = os.listdir(project_root) + + if any(f in files for f in ['BUILD', 'WORKSPACE', 'MODULE.bazel', '.bazelrc']): + return 'google' + elif any(f in files for f in ['TARGETS', '.buckconfig', 'BUCK']): + return 'meta' + elif 'setup.py' in files and 'requirements.txt' in files: + return 'anthropic_openai' + elif any(f.endswith('.cu') for f in files) or 'CMakeLists.txt' in files: + return 'nvidia' + else: + return 'common' +``` + +--- + +## 6. Severity Levels + +- **ERROR**: Will definitely break CI. Must fix before commit. (e.g., cross-module access, missing test targets) +- **WARN**: Likely to cause issues. Should fix. (e.g., copyright header, copybara compatibility) +- **INFO**: Style suggestion. Optional. (e.g., import ordering) + +--- + +## 7. Example Output + +When editing `betti_defense.py`: + +``` +[CI Gatekeeper] ERROR: Cross-module private access detected at line 128 + → `betti_defense._connected_components()` is called from `global_graph_topology.py` + → Fix: Rename to `connected_components()` or make it public + +[CI Gatekeeper] WARN: Missing copyright header in `betti_defense_test.py` +``` + +--- + +## 8. Integration with OpenCode + +- **Auto-activates** when editing `.py`, `.bzl`, `.cc`, `.h`, `BUILD`, `TARGETS` files +- **Manual check**: `check CI` or `run ci-gatekeeper` +- **Respects `.gitignore`** and skips `__pycache__`, `.git/`, etc. + +--- + +## 9. Future Extensions (Phase 2) + +1. **LSP Integration**: Use symbol-level analysis via rust-analyzer or pylsp for more precise checks +2. **Custom Rules**: Support `.ci-gatekeeper.yaml` in project root for per-project overrides +3. **Auto-Fix**: Suggest fixes (e.g., "Add this to BUILD file: `py_test(...)`") +4. **CI Integration**: Parse actual CI config files (`.github/workflows/`, `.gitlab-ci.yml`) to validate against them + +--- + +## 10. Testing Strategy + +1. **Unit Tests**: Each detector and analyzer gets unit tests +2. **Integration Tests**: Test against real open-source projects (e.g., TensorFlow for Google patterns, PyTorch for Meta patterns) +3. **Regression Tests**: Ensure the exact cross-module access issue we fixed (`betti_defense._connected_components`) would be caught + +--- + +## 11. Approval + +**User Approval:** ✅ Approved via conversational design review (2026-06-09) + +**Sections Approved:** +- Section 1: Architecture & Overview ✅ +- Section 2: Rules & Checks ✅ +- Section 3: Implementation Details ✅ + +--- + +*End of Design Document*