From aa4b3e5665f8be61668218b12e39de1b212f6962 Mon Sep 17 00:00:00 2001 From: Michael Heller <21163552+mdheller@users.noreply.github.com> Date: Thu, 11 Jun 2026 20:33:35 -0400 Subject: [PATCH] feat: lawful metadata harvest smoke bundle, run artifact schema, and CI gate (#160) Adds lawful-metadata-harvest-run-artifact.schema.v0.1.json: run artifact for OAI-PMH-style three-batch harvest. Fields: artifact_id, run_ref, harvest_batches (verb enum: Identify/ListRecords/etc), envelope_ref, replay_ref, promotion_decision (explicit, not implied), token_loop_anomaly, and no_live_network_harvest (const: true). Compatible with ProCybernetica lawful-metadata-harvest-envelope contract structure. Adds bundles/lawful-metadata-harvest-smoke/bundle.json: smoke bundle descriptor with ProCybernetica and SocioSphere upstream anchors. No live network harvesting. Adds harvest-run-artifact.valid.json (three-batch run, promotion deferred, no anomaly) and reject_token-loop-anomaly.json (resumption_loop detected). Adds tools/validate_lawful_metadata_harvest.py with policy gates: no live harvest, explicit promotion, token loop anomaly invalidates replay; 4 checks pass. Wires validate-lawful-metadata-harvest into make validate. Closes #160 --- Makefile | 7 +- .../lawful-metadata-harvest-smoke/bundle.json | 45 ++++++++ ...data-harvest-run-artifact.schema.v0.1.json | 102 ++++++++++++++++++ .../harvest-run-artifact.valid.json | 51 +++++++++ .../reject_token-loop-anomaly.json | 33 ++++++ tools/validate_lawful_metadata_harvest.py | 94 ++++++++++++++++ 6 files changed, 330 insertions(+), 2 deletions(-) create mode 100644 bundles/lawful-metadata-harvest-smoke/bundle.json create mode 100644 schemas/lawful-metadata-harvest-run-artifact.schema.v0.1.json create mode 100644 tests/fixtures/lawful-metadata-harvest/harvest-run-artifact.valid.json create mode 100644 tests/fixtures/lawful-metadata-harvest/reject_token-loop-anomaly.json create mode 100644 tools/validate_lawful_metadata_harvest.py diff --git a/Makefile b/Makefile index 8856a654..a9040d33 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ -.PHONY: validate test validate-agent-cycle-health validate-authority-dependency-evidence validate-prometheus-sr validate-reasoning-failure-traces validate-governance-context validate-lattice-data-governai-execution-refs validate-lattice-runtime-profile-refs validate-network-native-assistant-evidence validate-guardrail-evidence-artifacts validate-stop-gate-evaluator validate-guarded-workcell-artifact validate-guarded-workcell-executor validate-guarded-invocation-artifact validate-guarded-invocation validate-agentic-pr-work-order validate-semantic-enterprise-agent-boundary validate-ops-history-contracts validate-action-contracts validate-agent-operation-contract validate-superconscious-reasoning-import validate-agent-harness-runtime-contracts validate-bounded-action-loop agentplane-evidence-receipt-composition-tier2-binding-ci lawful-learning-phase9-contract-ci validate-evidence-receipt-binding validate-semantic-activation-receipt validate-governed-run-contract validate-preflight-receipt validate-attempt-admission-receipt validate-verification-execution-receipt validate-synthetic-verification-receipt validate-governed-runner-v0-2-contract-chain validate-budget-settlement-receipt validate-rollback-receipts validate-run-dossier validate-governed-runner-readonly validate-workroom-context-evidence validate-wallguard-collaboration-admission validate-prophet-mesh-agentplane-adapter +.PHONY: validate test validate-agent-cycle-health validate-authority-dependency-evidence validate-prometheus-sr validate-reasoning-failure-traces validate-governance-context validate-lattice-data-governai-execution-refs validate-lattice-runtime-profile-refs validate-network-native-assistant-evidence validate-guardrail-evidence-artifacts validate-stop-gate-evaluator validate-guarded-workcell-artifact validate-guarded-workcell-executor validate-guarded-invocation-artifact validate-guarded-invocation validate-agentic-pr-work-order validate-semantic-enterprise-agent-boundary validate-ops-history-contracts validate-action-contracts validate-agent-operation-contract validate-superconscious-reasoning-import validate-agent-harness-runtime-contracts validate-bounded-action-loop agentplane-evidence-receipt-composition-tier2-binding-ci lawful-learning-phase9-contract-ci validate-evidence-receipt-binding validate-semantic-activation-receipt validate-governed-run-contract validate-preflight-receipt validate-attempt-admission-receipt validate-verification-execution-receipt validate-synthetic-verification-receipt validate-governed-runner-v0-2-contract-chain validate-budget-settlement-receipt validate-rollback-receipts validate-run-dossier validate-governed-runner-readonly validate-workroom-context-evidence validate-wallguard-collaboration-admission validate-prophet-mesh-agentplane-adapter validate-lawful-metadata-harvest -validate: validate-agent-cycle-health validate-authority-dependency-evidence validate-prometheus-sr validate-reasoning-failure-traces validate-governance-context validate-lattice-data-governai-execution-refs validate-lattice-runtime-profile-refs validate-network-native-assistant-evidence validate-guardrail-evidence-artifacts validate-stop-gate-evaluator validate-guarded-workcell-artifact validate-guarded-workcell-executor validate-guarded-invocation-artifact validate-guarded-invocation validate-agentic-pr-work-order validate-semantic-enterprise-agent-boundary validate-ops-history-contracts validate-action-contracts validate-agent-operation-contract validate-superconscious-reasoning-import validate-agent-harness-runtime-contracts validate-bounded-action-loop agentplane-evidence-receipt-composition-tier2-binding-ci lawful-learning-phase9-contract-ci validate-evidence-receipt-binding validate-semantic-activation-receipt validate-governed-run-contract validate-preflight-receipt validate-attempt-admission-receipt validate-verification-execution-receipt validate-synthetic-verification-receipt validate-governed-runner-v0-2-contract-chain validate-budget-settlement-receipt validate-rollback-receipts validate-run-dossier validate-governed-runner-readonly validate-workroom-context-evidence validate-wallguard-collaboration-admission validate-prophet-mesh-agentplane-adapter +validate: validate-agent-cycle-health validate-authority-dependency-evidence validate-prometheus-sr validate-reasoning-failure-traces validate-governance-context validate-lattice-data-governai-execution-refs validate-lattice-runtime-profile-refs validate-network-native-assistant-evidence validate-guardrail-evidence-artifacts validate-stop-gate-evaluator validate-guarded-workcell-artifact validate-guarded-workcell-executor validate-guarded-invocation-artifact validate-guarded-invocation validate-agentic-pr-work-order validate-semantic-enterprise-agent-boundary validate-ops-history-contracts validate-action-contracts validate-agent-operation-contract validate-superconscious-reasoning-import validate-agent-harness-runtime-contracts validate-bounded-action-loop agentplane-evidence-receipt-composition-tier2-binding-ci lawful-learning-phase9-contract-ci validate-evidence-receipt-binding validate-semantic-activation-receipt validate-governed-run-contract validate-preflight-receipt validate-attempt-admission-receipt validate-verification-execution-receipt validate-synthetic-verification-receipt validate-governed-runner-v0-2-contract-chain validate-budget-settlement-receipt validate-rollback-receipts validate-run-dossier validate-governed-runner-readonly validate-workroom-context-evidence validate-wallguard-collaboration-admission validate-prophet-mesh-agentplane-adapter validate-lawful-metadata-harvest python3 tools/validate_execution_timing.py validate-governance-context: @@ -248,6 +248,9 @@ validate-prophet-mesh-agentplane-adapter: python3 -m json.tool contracts/prophet-mesh/prophet-mesh-agentplane-adapter.v0.1.json >/dev/null python3 tools/validate_prophet_mesh_agentplane_adapter.py +validate-lawful-metadata-harvest: + python3 tools/validate_lawful_metadata_harvest.py + validate-agent-cycle-health: python3 tools/validate_agent_cycle_health.py diff --git a/bundles/lawful-metadata-harvest-smoke/bundle.json b/bundles/lawful-metadata-harvest-smoke/bundle.json new file mode 100644 index 00000000..99e23787 --- /dev/null +++ b/bundles/lawful-metadata-harvest-smoke/bundle.json @@ -0,0 +1,45 @@ +{ + "apiVersion": "agentplane.socioprophet.org/v0.1", + "kind": "Bundle", + "metadata": { + "name": "lawful-metadata-harvest-smoke", + "version": "0.1.0", + "createdAt": "2026-06-11T00:00:00Z", + "licensePolicy": { + "allowAGPL": false, + "notes": "Lawful metadata harvest bundle is evidence-forward. No live network harvesting." + }, + "source": { + "git": { "dirty": false, "rev": "UNSET" } + } + }, + "spec": { + "upstreamAnchors": [ + "SocioProphet/ProCybernetica:lawful-metadata-harvest-envelope", + "SocioProphet/sociosphere#322:epistemic-governance" + ], + "nonClaims": [ + "This bundle performs no live network harvesting.", + "This bundle does not constitute a harvesting right or legal authorization.", + "This bundle does not certify the authenticity of harvested metadata." + ], + "artifacts": { + "outDir": "./artifacts/lawful-metadata-harvest-smoke" + }, + "policy": { + "lane": "smoke", + "humanGateRequired": false, + "failOnTimeout": true, + "maxRunSeconds": 60, + "policyPackRef": "policy://agentplane/lawful-harvest-smoke-v1", + "policyPackHash": "UNSET" + }, + "smoke": { + "script": "bundles/lawful-metadata-harvest-smoke/smoke.sh" + }, + "secrets": { + "required": [], + "secretRefRoot": "secrets://tenant" + } + } +} diff --git a/schemas/lawful-metadata-harvest-run-artifact.schema.v0.1.json b/schemas/lawful-metadata-harvest-run-artifact.schema.v0.1.json new file mode 100644 index 00000000..fe64d369 --- /dev/null +++ b/schemas/lawful-metadata-harvest-run-artifact.schema.v0.1.json @@ -0,0 +1,102 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://socioprophet.io/schemas/agentplane/lawful-metadata-harvest-run-artifact/v0.1", + "title": "LawfulMetadataHarvestRunArtifact", + "description": "AgentPlane run artifact for a lawful metadata harvest bundle. Compatible with ProCybernetica lawful-metadata-harvest-envelope contract. No live network harvesting is performed; all harvest state is synthetic.", + "type": "object", + "required": [ + "kind", + "artifact_id", + "run_ref", + "harvest_batches", + "envelope_ref", + "replay_ref", + "promotion_decision", + "no_live_network_harvest", + "policy_ref", + "issued_at" + ], + "additionalProperties": false, + "properties": { + "kind": { "type": "string", "const": "LawfulMetadataHarvestRunArtifact" }, + "artifact_id": { "type": "string", "minLength": 1 }, + "run_ref": { "type": "string", "minLength": 1 }, + "harvest_batches": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/HarvestBatch" } + }, + "envelope_ref": { + "type": "string", + "description": "Reference to the lawful-metadata-harvest-envelope compatible with ProCybernetica contract" + }, + "replay_ref": { + "type": "string", + "description": "Reference to non-mutating replay artifact" + }, + "promotion_decision": { "$ref": "#/$defs/PromotionDecision" }, + "token_loop_anomaly": { "$ref": "#/$defs/TokenLoopAnomaly" }, + "no_live_network_harvest": { + "type": "boolean", + "const": true + }, + "policy_ref": { "type": "string", "minLength": 1 }, + "procybernetica_protocol_ref": { + "type": "string", + "description": "Reference to ProCybernetica lawful-metadata-harvest-envelope contract" + }, + "issued_at": { "type": "string", "format": "date-time" } + }, + "$defs": { + "HarvestBatch": { + "type": "object", + "required": ["batch_id", "verb", "records_harvested", "records_valid", "batch_status"], + "additionalProperties": false, + "properties": { + "batch_id": { "type": "string", "minLength": 1 }, + "verb": { + "type": "string", + "enum": ["Identify", "ListIdentifiers", "ListRecords", "GetRecord", "ListMetadataFormats", "ListSets"] + }, + "records_harvested": { "type": "integer", "minimum": 0 }, + "records_valid": { "type": "integer", "minimum": 0 }, + "batch_status": { + "type": "string", + "enum": ["complete", "partial", "error", "empty"] + }, + "resumption_token": { "type": ["string", "null"] }, + "evidence_ref": { "type": "string" } + } + }, + "PromotionDecision": { + "type": "object", + "required": ["decision", "decided_by", "promotion_is_explicit"], + "additionalProperties": false, + "properties": { + "decision": { + "type": "string", + "enum": ["promote", "defer", "reject"] + }, + "decided_by": { "type": "string", "minLength": 1 }, + "promotion_is_explicit": { "type": "boolean", "const": true }, + "reason": { "type": "string" } + } + }, + "TokenLoopAnomaly": { + "type": "object", + "required": ["detected", "anomaly_type"], + "additionalProperties": false, + "properties": { + "detected": { "type": "boolean" }, + "anomaly_type": { + "type": "string", + "enum": ["none", "resumption_loop", "duplicate_token", "stale_token", "unexpected_terminal"] + }, + "affected_batch_ids": { + "type": "array", + "items": { "type": "string" } + } + } + } + } +} diff --git a/tests/fixtures/lawful-metadata-harvest/harvest-run-artifact.valid.json b/tests/fixtures/lawful-metadata-harvest/harvest-run-artifact.valid.json new file mode 100644 index 00000000..f9008e14 --- /dev/null +++ b/tests/fixtures/lawful-metadata-harvest/harvest-run-artifact.valid.json @@ -0,0 +1,51 @@ +{ + "kind": "LawfulMetadataHarvestRunArtifact", + "artifact_id": "lmh_run_artifact_20260611_001", + "run_ref": "agentplane://run/lawful-metadata-harvest-smoke/run_20260611_001", + "harvest_batches": [ + { + "batch_id": "batch_001_identify", + "verb": "Identify", + "records_harvested": 1, + "records_valid": 1, + "batch_status": "complete", + "resumption_token": null, + "evidence_ref": "evidence://agentplane/lmh/batch_001_identify" + }, + { + "batch_id": "batch_002_list_records", + "verb": "ListRecords", + "records_harvested": 50, + "records_valid": 48, + "batch_status": "complete", + "resumption_token": null, + "evidence_ref": "evidence://agentplane/lmh/batch_002_list_records" + }, + { + "batch_id": "batch_003_list_records_page2", + "verb": "ListRecords", + "records_harvested": 22, + "records_valid": 22, + "batch_status": "complete", + "resumption_token": null, + "evidence_ref": "evidence://agentplane/lmh/batch_003_list_records_page2" + } + ], + "envelope_ref": "procybernetica://lawful-metadata-harvest-envelope/lmh_20260611_001", + "replay_ref": "agentplane://replay/lawful-metadata-harvest-smoke/replay_20260611_001", + "promotion_decision": { + "decision": "defer", + "decided_by": "agentplane/lawful-harvest-promotion-gate", + "promotion_is_explicit": true, + "reason": "Awaiting ProCybernetica validator confirmation before promotion" + }, + "token_loop_anomaly": { + "detected": false, + "anomaly_type": "none", + "affected_batch_ids": [] + }, + "no_live_network_harvest": true, + "policy_ref": "policy://agentplane/lawful-harvest-smoke-v1", + "procybernetica_protocol_ref": "procybernetica://protocol/lawful-metadata-harvest-envelope/v1", + "issued_at": "2026-06-11T10:00:00Z" +} diff --git a/tests/fixtures/lawful-metadata-harvest/reject_token-loop-anomaly.json b/tests/fixtures/lawful-metadata-harvest/reject_token-loop-anomaly.json new file mode 100644 index 00000000..feb5fce0 --- /dev/null +++ b/tests/fixtures/lawful-metadata-harvest/reject_token-loop-anomaly.json @@ -0,0 +1,33 @@ +{ + "_reject_reason": "token_loop_anomaly.detected=true with anomaly_type=resumption_loop — validator must reject this run artifact as it indicates a harvesting anomaly that invalidates replay", + "kind": "LawfulMetadataHarvestRunArtifact", + "artifact_id": "lmh_run_artifact_reject_001", + "run_ref": "agentplane://run/lawful-metadata-harvest-smoke/run_reject_001", + "harvest_batches": [ + { + "batch_id": "batch_reject_001", + "verb": "ListRecords", + "records_harvested": 50, + "records_valid": 30, + "batch_status": "partial", + "resumption_token": "TOKEN_LOOP_001", + "evidence_ref": "evidence://agentplane/lmh/batch_reject_001" + } + ], + "envelope_ref": "procybernetica://lawful-metadata-harvest-envelope/lmh_reject_001", + "replay_ref": "agentplane://replay/lawful-metadata-harvest-smoke/replay_reject_001", + "promotion_decision": { + "decision": "reject", + "decided_by": "agentplane/lawful-harvest-promotion-gate", + "promotion_is_explicit": true, + "reason": "Token loop anomaly detected — replay invalidated" + }, + "token_loop_anomaly": { + "detected": true, + "anomaly_type": "resumption_loop", + "affected_batch_ids": ["batch_reject_001"] + }, + "no_live_network_harvest": true, + "policy_ref": "policy://agentplane/lawful-harvest-smoke-v1", + "issued_at": "2026-06-11T10:30:00Z" +} diff --git a/tools/validate_lawful_metadata_harvest.py b/tools/validate_lawful_metadata_harvest.py new file mode 100644 index 00000000..3daf6f85 --- /dev/null +++ b/tools/validate_lawful_metadata_harvest.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +"""Validate LawfulMetadataHarvestRunArtifact fixtures. + +Policy gates: +- no_live_network_harvest must be true +- promotion_decision.promotion_is_explicit must be true (promotion is never implied by harvest completion) +- token_loop_anomaly.detected=true invalidates replay — run must be rejected +""" +from __future__ import annotations + +import json +import sys +from pathlib import Path + +try: + import jsonschema +except ImportError: + print("ERROR: jsonschema not installed", file=sys.stderr) + sys.exit(1) + +ROOT = Path(__file__).resolve().parent.parent +SCHEMA_PATH = ROOT / "schemas" / "lawful-metadata-harvest-run-artifact.schema.v0.1.json" +FIXTURES = ROOT / "tests" / "fixtures" / "lawful-metadata-harvest" + +SCHEMA = json.loads(SCHEMA_PATH.read_text()) + +errors: list[str] = [] +results: list[bool] = [] + + +def ok(label: str) -> None: + print(f"PASS {label}") + results.append(True) + + +def fail(label: str, reason: str) -> None: + errors.append(f"FAIL {label}: {reason}") + results.append(False) + + +def policy_gate_errors(data: dict) -> list[str]: + errs = [] + if not data.get("no_live_network_harvest"): + errs.append("no_live_network_harvest must be true") + pd = data.get("promotion_decision", {}) + if not pd.get("promotion_is_explicit"): + errs.append("promotion_decision.promotion_is_explicit must be true") + anomaly = data.get("token_loop_anomaly", {}) + if anomaly.get("detected") and anomaly.get("anomaly_type") != "none": + errs.append( + f"token_loop_anomaly detected ({anomaly.get('anomaly_type')}) — run artifact is invalid; replay is not trusted" + ) + return errs + + +for path in sorted(FIXTURES.glob("*.json")): + is_reject = path.name.startswith("reject_") + label = path.name + + try: + data = json.loads(path.read_text()) + except json.JSONDecodeError as e: + fail(f"json-parse {label}", str(e)) + continue + + ok(f"json-parse {label}") + + v = jsonschema.Draft202012Validator(SCHEMA) + schema_errs = list(v.iter_errors(data)) + policy_errs = policy_gate_errors(data) + has_errors = bool(schema_errs) or bool(policy_errs) + + if is_reject: + if has_errors: + ok(f"reject-expected {label}") + else: + fail(f"reject-fixture {label}", "expected failure but fixture appears valid") + else: + for e in schema_errs: + fail(f"schema {label}", e.message) + for e in policy_errs: + fail(f"policy-gate {label}", e) + if not schema_errs and not policy_errs: + ok(f"valid {label}") + +passed = sum(results) +if errors: + print(file=sys.stderr) + for e in errors: + print(e, file=sys.stderr) + print(f"\n{passed} passed, {len(errors)} failed", file=sys.stderr) + sys.exit(1) + +print(f"\n{passed} lawful-metadata-harvest checks passed")