From d4ea814121c3b519cb857cec0372cf5e1c313340 Mon Sep 17 00:00:00 2001 From: Ram Dwivedi Date: Sun, 21 Jun 2026 23:26:58 -0400 Subject: [PATCH] docs: sweep remaining internal design-doc (SADD) references; fix stale semantic-analyzer doc Completes the internal-reference cleanup this PR started. The reviewer asked to fold in the remaining models.py case "so none remain"; grepping surfaced several more, all neutralized here while preserving meaning: - models.py: AnalyzerPlugin docstring describes the protocol instead of citing the internal spec section. - behavioral_taint_tracking.py (+ its test): cite the public rule family (TT1-TT5) rather than the internal section number. - mcp_rug_pull.py: keep the RP1-RP3 roadmap TODO, drop the internal-doc pointer. - semantic_developer_intent.py / semantic_quality_policy.py: drop section numbers from the module docstrings. - tests/nodes/analyzers/test_registry.py: reword docstrings/comment and rename test_analyzer_node_ids_match_sadd_spec -> test_analyzer_node_ids_match_expected. - docs/LLM_ANALYZER_BASE_GUIDE.md: the "Semantic Analyzer Stubs" table both called the now-implemented semantic analyzers stubs "ready to be implemented" and carried a "SADD Reference" column. Retitle to "Semantic Analyzers", note they emit only when use_llm is enabled, and drop the internal-reference column -- consistent with the DEVELOPMENT.md correction in this PR. Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Ram Dwivedi --- docs/LLM_ANALYZER_BASE_GUIDE.md | 14 +++++++------- src/skillspector/models.py | 2 +- .../nodes/analyzers/behavioral_taint_tracking.py | 2 +- src/skillspector/nodes/analyzers/mcp_rug_pull.py | 2 +- .../nodes/analyzers/semantic_developer_intent.py | 2 +- .../nodes/analyzers/semantic_quality_policy.py | 2 +- .../analyzers/test_behavioral_taint_tracking.py | 2 +- tests/nodes/analyzers/test_registry.py | 10 +++++----- 8 files changed, 18 insertions(+), 18 deletions(-) diff --git a/docs/LLM_ANALYZER_BASE_GUIDE.md b/docs/LLM_ANALYZER_BASE_GUIDE.md index fd590cc3..9742341d 100644 --- a/docs/LLM_ANALYZER_BASE_GUIDE.md +++ b/docs/LLM_ANALYZER_BASE_GUIDE.md @@ -347,15 +347,15 @@ evaluates *existing* static findings rather than discovering new ones: - Overrides `parse_response` to return dicts (not `Finding` objects) - Adds `apply_filter` to match LLM results back to originals by `(file, rule_id)` -### Semantic Analyzer Stubs +### Semantic Analyzers -These are ready to be implemented using `LLMAnalyzerBase`: +These are implemented on top of `LLMAnalyzerBase` and emit findings only when `use_llm` is enabled: -| Stub | SADD Reference | Purpose | -|------|---------------|---------| -| `semantic_security_discovery` | B.4.1 | Intent and attack-phrasing risks | -| `semantic_developer_intent` | B.4.2 | Description-behavior mismatch | -| `semantic_quality_policy` | B.4.3 | Quality/safety rubric violations | +| Analyzer | Purpose | +|----------|---------| +| `semantic_security_discovery` | Intent and attack-phrasing risks | +| `semantic_developer_intent` | Description-behavior mismatch | +| `semantic_quality_policy` | Quality/safety rubric violations | --- diff --git a/src/skillspector/models.py b/src/skillspector/models.py index a26a78be..5dff48f5 100644 --- a/src/skillspector/models.py +++ b/src/skillspector/models.py @@ -108,7 +108,7 @@ def __str__(self) -> str: class AnalyzerPlugin(Protocol): - """Analyzer protocol from SADD A.1.1.""" + """Analyzer plugin protocol: name/stage/availability and an ``analyze`` entry point.""" name: str stage: str diff --git a/src/skillspector/nodes/analyzers/behavioral_taint_tracking.py b/src/skillspector/nodes/analyzers/behavioral_taint_tracking.py index 90c7e248..d1cab5b9 100644 --- a/src/skillspector/nodes/analyzers/behavioral_taint_tracking.py +++ b/src/skillspector/nodes/analyzers/behavioral_taint_tracking.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Behavioral taint-tracking analyzer (SADD B.2.2): sources -> sinks data-flow analysis. +"""Behavioral taint-tracking analyzer (TT1–TT5): sources -> sinks data-flow analysis. Parses Python AST to identify data sources (env vars, file reads, network input) and sinks (network output, exec, file writes), then tracks flows between them diff --git a/src/skillspector/nodes/analyzers/mcp_rug_pull.py b/src/skillspector/nodes/analyzers/mcp_rug_pull.py index 44f9c5e4..45d74d3a 100644 --- a/src/skillspector/nodes/analyzers/mcp_rug_pull.py +++ b/src/skillspector/nodes/analyzers/mcp_rug_pull.py @@ -15,7 +15,7 @@ """MCP rug-pull analyzer stub node.""" -# TODO(SADD B.3.3): Compare current vs previous manifest; emit RP1–RP3 when previous manifest available. See SADD for skillspector § B.3.3. +# TODO: Compare current vs previous manifest; emit RP1–RP3 when a previous manifest is available. from __future__ import annotations diff --git a/src/skillspector/nodes/analyzers/semantic_developer_intent.py b/src/skillspector/nodes/analyzers/semantic_developer_intent.py index a3a54be2..c583cc38 100644 --- a/src/skillspector/nodes/analyzers/semantic_developer_intent.py +++ b/src/skillspector/nodes/analyzers/semantic_developer_intent.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Semantic developer-intent analyzer node (SADD B.4.2). +"""Semantic developer-intent analyzer node. Detects context-dependent risk and semantic description–behavior mismatches by comparing the skill's manifest (name, description, permissions) against diff --git a/src/skillspector/nodes/analyzers/semantic_quality_policy.py b/src/skillspector/nodes/analyzers/semantic_quality_policy.py index 3140334e..b039e51a 100644 --- a/src/skillspector/nodes/analyzers/semantic_quality_policy.py +++ b/src/skillspector/nodes/analyzers/semantic_quality_policy.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Semantic quality-policy analyzer node (SADD B.4.3). +"""Semantic quality-policy analyzer node. Evaluates AI agent skill files against a quality and safety rubric using LLM-based discovery. Flags vague triggers, missing user warnings, and diff --git a/tests/nodes/analyzers/test_behavioral_taint_tracking.py b/tests/nodes/analyzers/test_behavioral_taint_tracking.py index 77cc211b..b40e7c85 100644 --- a/tests/nodes/analyzers/test_behavioral_taint_tracking.py +++ b/tests/nodes/analyzers/test_behavioral_taint_tracking.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for behavioral_taint_tracking analyzer (SADD B.2.2): source→sink data-flow.""" +"""Tests for behavioral_taint_tracking analyzer (TT1–TT5): source→sink data-flow.""" from __future__ import annotations diff --git a/tests/nodes/analyzers/test_registry.py b/tests/nodes/analyzers/test_registry.py index 0459901c..2801da46 100644 --- a/tests/nodes/analyzers/test_registry.py +++ b/tests/nodes/analyzers/test_registry.py @@ -13,13 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for analyzer node registry alignment with SADD spec.""" +"""Tests for analyzer node registry alignment with the workflow reference table.""" from __future__ import annotations from skillspector.nodes.analyzers import ANALYZER_NODE_IDS, ANALYZER_NODES -# Expected analyzer node IDs per SADD spec workflow reference table. +# Expected analyzer node IDs per the workflow reference table. # Order: static (12), behavioral (2), mcp (3), semantic (3). EXPECTED_ANALYZER_NODE_IDS: list[str] = [ "static_patterns_prompt_injection", @@ -46,10 +46,10 @@ class TestAnalyzerRegistry: - """Registry matches SADD spec node set and order.""" + """Registry matches the expected node set and order.""" - def test_analyzer_node_ids_match_sadd_spec(self): - """ANALYZER_NODE_IDS equals expected list from SADD spec.""" + def test_analyzer_node_ids_match_expected(self): + """ANALYZER_NODE_IDS equals the expected list.""" assert ANALYZER_NODE_IDS == EXPECTED_ANALYZER_NODE_IDS def test_analyzer_nodes_has_entry_for_every_id(self):