From 350abac441ec1dd693ec1968667c5959cd33d74d Mon Sep 17 00:00:00 2001 From: CamiloCod3 Date: Wed, 17 Jun 2026 15:42:12 +0200 Subject: [PATCH] Polish JSON schema metadata --- README.md | 4 +-- activerecon/modules/json_report.py | 40 ++++++++++++++++++++++++------ tests/test_json_report.py | 15 ++++++++--- 3 files changed, 46 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 6f0bdca..647f7fd 100644 --- a/README.md +++ b/README.md @@ -313,7 +313,7 @@ The JSON report uses a simple stable wrapper: ```json { - "schema_version": "1.0", + "schema_version": "1.1", "generated_at": "2026-06-17T09:08:07Z", "target": "example.com", "metadata": { @@ -337,7 +337,7 @@ Attention Interesting Signals ``` -Markdown reports use the heading `Interesting Signals`. JSON output keeps the `Attention` key for compatibility and also includes `Interesting Signals` as an alias. +Markdown reports use the heading `Interesting Signals`. JSON output keeps `results["Attention"]` for backwards compatibility. New JSON consumers should prefer `results["Interesting Signals"]`. When the `web` profile is used, reports also include `Endpoint Discovery` with the original endpoint list plus summary counts and categorized endpoint groups. diff --git a/activerecon/modules/json_report.py b/activerecon/modules/json_report.py index 5007b85..9cfe6f0 100644 --- a/activerecon/modules/json_report.py +++ b/activerecon/modules/json_report.py @@ -5,7 +5,7 @@ from pathlib import Path, PurePosixPath -SCHEMA_VERSION = "1.0" +SCHEMA_VERSION = "1.1" TOOL_NAME = "ActiveRecon" AUTHORIZED_USE_NOTICE = True HSTS_HEADER = "strict-transport-security" @@ -37,7 +37,14 @@ "/api-docs", "/ftp", } -CATEGORY_KEYS = ("api_like", "frontend_routes", "static_assets", "well_known", "header_discovered") +CATEGORY_KEYS = ( + "api_like", + "frontend_routes", + "static_assets", + "well_known", + "header_discovered", + "realtime_services", +) def _as_list(value): @@ -87,11 +94,11 @@ def _endpoint_groups(results): def _endpoint_count(results): - total = 0 + paths = set() for group in _endpoint_groups(results): if isinstance(group, dict): - total += len(_as_list(group.get("endpoints", []))) - return total + paths.update(_unique_endpoint_paths(_as_list(group.get("endpoints", [])))) + return len(paths) def build_json_summary(results): @@ -164,9 +171,28 @@ def _is_static_asset(path): return PurePosixPath(clean_path).suffix in STATIC_ASSET_EXTENSIONS or "chunk" in filename +def _is_realtime_service(path): + clean_path = _path_without_query(path).lower().rstrip("/") + return ( + clean_path == "/socket.io" + or clean_path == "/engine.io" + or clean_path.startswith(("/socket.io/", "/engine.io/")) + ) + + +def _unique_endpoint_paths(endpoints): + return { + endpoint.get("path") + for endpoint in endpoints + if isinstance(endpoint, dict) and endpoint.get("path") + } + + def _primary_endpoint_category(endpoint): path = endpoint.get("path", "") lower_path = _path_without_query(path).lower() + if _is_realtime_service(path): + return "realtime_services" if _is_static_asset(path): return "static_assets" if _is_api_like(path): @@ -185,9 +211,9 @@ def _endpoint_categories(endpoints): if str(endpoint.get("source", "")).startswith("response-header"): categories["header_discovered"].append(endpoint) - summary = {"endpoint_count": len(endpoints)} + summary = {"endpoint_count": len(_unique_endpoint_paths(endpoints))} for key in CATEGORY_KEYS: - summary[key] = len(categories[key]) + summary[key] = len(_unique_endpoint_paths(categories[key])) return summary, categories diff --git a/tests/test_json_report.py b/tests/test_json_report.py index b4e6a11..7b77021 100644 --- a/tests/test_json_report.py +++ b/tests/test_json_report.py @@ -6,7 +6,7 @@ def test_build_json_payload_wraps_results_with_schema(): payload = build_json_payload("example.com", {"Nmap Scan": {"ports": []}}, "2026-06-17T09:08:07Z") - assert payload["schema_version"] == "1.0" + assert payload["schema_version"] == "1.1" assert payload["generated_at"] == "2026-06-17T09:08:07Z" assert payload["target"] == "example.com" assert payload["metadata"]["tool"] == "ActiveRecon" @@ -42,10 +42,12 @@ def test_build_json_payload_adds_summary_metadata_alias_and_endpoint_categories( "base_url": "http://127.0.0.1", "endpoints": [ {"path": "/api", "source": "well-known"}, + {"path": "/api", "source": "javascript"}, {"path": "/login", "source": "html:href"}, {"path": "/app.js", "source": "html:script-src"}, {"path": "/robots.txt", "source": "well-known"}, {"path": "/#/jobs", "source": "response-header:X-Recruiting"}, + {"path": "/socket.io/?EIO=4", "source": "javascript"}, ], } ], @@ -76,26 +78,31 @@ def test_build_json_payload_adds_summary_metadata_alias_and_endpoint_categories( "tls_results": 1, "dns_records": 1, "interesting_signals": 1, - "endpoint_count": 5, + "endpoint_count": 6, } + assert "Attention" in payload["results"] + assert "Interesting Signals" in payload["results"] assert payload["results"]["Interesting Signals"] == payload["results"]["Attention"] assert payload["results"]["HTTP Analysis"][0]["missing_security_headers"] == ["content-security-policy"] endpoint_group = payload["results"]["Endpoint Discovery"][0] - assert len(endpoint_group["endpoints"]) == 5 + assert len(endpoint_group["endpoints"]) == 7 assert endpoint_group["summary"] == { - "endpoint_count": 5, + "endpoint_count": 6, "api_like": 1, "frontend_routes": 2, "static_assets": 1, "well_known": 1, "header_discovered": 1, + "realtime_services": 1, } assert endpoint_group["categories"]["api_like"][0]["path"] == "/api" + assert len(endpoint_group["categories"]["api_like"]) == 2 assert endpoint_group["categories"]["frontend_routes"][0]["path"] == "/login" assert endpoint_group["categories"]["static_assets"][0]["path"] == "/app.js" assert endpoint_group["categories"]["well_known"][0]["path"] == "/robots.txt" assert endpoint_group["categories"]["header_discovered"][0]["path"] == "/#/jobs" + assert endpoint_group["categories"]["realtime_services"][0]["path"] == "/socket.io/?EIO=4" def test_generate_json_report_writes_file(tmp_path):