diff --git a/README.md b/README.md index 9eaaf84..6f0bdca 100644 --- a/README.md +++ b/README.md @@ -316,6 +316,12 @@ The JSON report uses a simple stable wrapper: "schema_version": "1.0", "generated_at": "2026-06-17T09:08:07Z", "target": "example.com", + "metadata": { + "tool": "ActiveRecon", + "scan_profile": "web", + "authorized_use_notice": true + }, + "summary": {}, "results": {} } ``` @@ -328,11 +334,12 @@ HTTP Analysis TLS Analysis DNS Analysis Attention +Interesting Signals ``` -Markdown reports use the heading `Interesting Signals`. JSON output keeps the `Attention` key for compatibility. +Markdown reports use the heading `Interesting Signals`. JSON output keeps the `Attention` key for compatibility and also includes `Interesting Signals` as an alias. -When the `web` profile is used, reports also include `Endpoint Discovery`. +When the `web` profile is used, reports also include `Endpoint Discovery` with the original endpoint list plus summary counts and categorized endpoint groups. --- diff --git a/activerecon/main.py b/activerecon/main.py index 6ae571b..85f36ac 100644 --- a/activerecon/main.py +++ b/activerecon/main.py @@ -245,7 +245,9 @@ def main(): logging.error(f"Error during DNS analysis: {e}") results["DNS Analysis"] = {"error": f"DNS analysis failed: {e}"} - results["Attention"] = generate_attention_findings(results) + interesting_signals = generate_attention_findings(results) + results["Attention"] = interesting_signals + results["Interesting Signals"] = interesting_signals if markdown_output: try: @@ -256,7 +258,7 @@ def main(): if json_output: try: - generate_json_report(target, results, json_output) + generate_json_report(target, results, json_output, scan_profile=chosen_profile) logging.info(f"JSON report saved to {json_output}") except Exception as e: logging.error(f"Error during JSON report generation: {e}") diff --git a/activerecon/modules/http_enum.py b/activerecon/modules/http_enum.py index a659b44..cd09272 100644 --- a/activerecon/modules/http_enum.py +++ b/activerecon/modules/http_enum.py @@ -32,6 +32,12 @@ def _security_headers(headers): return present, missing +def _filter_missing_headers_for_url(missing_headers, url): + if str(url or "").lower().startswith("https://"): + return missing_headers + return [header for header in missing_headers if header != "strict-transport-security"] + + def _technology_hints(headers): hints = [] server = headers.get("Server") or headers.get("server") @@ -81,9 +87,11 @@ def analyze_http(target, config, http_ports): response = requests.get(url, timeout=timeout) headers = dict(response.headers) present_headers, missing_headers = _security_headers(headers) + final_url = getattr(response, "url", url) + missing_headers = _filter_missing_headers_for_url(missing_headers, final_url) results.append({ "url": url, - "final_url": getattr(response, "url", url), + "final_url": final_url, "port": portid, "service": service or scheme, "status": response.status_code, diff --git a/activerecon/modules/json_report.py b/activerecon/modules/json_report.py index 5f8ca00..5007b85 100644 --- a/activerecon/modules/json_report.py +++ b/activerecon/modules/json_report.py @@ -1,24 +1,239 @@ +import copy +import ipaddress import json from datetime import datetime -from pathlib import Path +from pathlib import Path, PurePosixPath SCHEMA_VERSION = "1.0" +TOOL_NAME = "ActiveRecon" +AUTHORIZED_USE_NOTICE = True +HSTS_HEADER = "strict-transport-security" +SCAN_CONTEXT_NOTE = ( + "Target appears to be local or private. Results may include local system, " + "development, Docker, virtualization, or lab services." +) +STATIC_ASSET_EXTENSIONS = { + ".css", + ".eot", + ".gif", + ".ico", + ".jpeg", + ".jpg", + ".js", + ".map", + ".png", + ".svg", + ".ttf", + ".webp", + ".woff", + ".woff2", +} +WELL_KNOWN_PATHS = { + "/robots.txt", + "/sitemap.xml", + "/.well-known/security.txt", + "/swagger", + "/api-docs", + "/ftp", +} +CATEGORY_KEYS = ("api_like", "frontend_routes", "static_assets", "well_known", "header_discovered") -def build_json_payload(target, results, generated_at=None): +def _as_list(value): + return value if isinstance(value, list) else [] + + +def _nmap_results(results): + return results.get("Nmap Scan", results) if isinstance(results, dict) else {} + + +def _is_https_http_item(item): + url = str(item.get("final_url") or item.get("url") or "").lower() + return url.startswith("https://") + + +def _filter_http_security_headers(results): + http_results = results.get("HTTP Analysis", []) + if not isinstance(http_results, list): + return + + for item in http_results: + if not isinstance(item, dict) or _is_https_http_item(item): + continue + missing_headers = item.get("missing_security_headers", []) + if isinstance(missing_headers, list): + item["missing_security_headers"] = [ + header + for header in missing_headers + if str(header).lower() != HSTS_HEADER + ] + + +def _dns_record_count(results): + dns_results = results.get("DNS Analysis", {}) + if not isinstance(dns_results, dict): + return 0 + return sum( + len(records) + for record_type, records in dns_results.items() + if record_type != "errors" and isinstance(records, list) + ) + + +def _endpoint_groups(results): + endpoint_results = results.get("Endpoint Discovery", []) + return endpoint_results if isinstance(endpoint_results, list) else [] + + +def _endpoint_count(results): + total = 0 + for group in _endpoint_groups(results): + if isinstance(group, dict): + total += len(_as_list(group.get("endpoints", []))) + return total + + +def build_json_summary(results): + nmap_results = _nmap_results(results) + ports = _as_list(nmap_results.get("ports", [])) + http_results = _as_list(results.get("HTTP Analysis", [])) + tls_results = _as_list(results.get("TLS Analysis", [])) + signals = _as_list(results.get("Attention", results.get("Interesting Signals", []))) + + return { + "host_status": nmap_results.get("status", {}).get("state", "Unknown"), + "total_ports_listed": len(ports), + "open_ports": len([ + port + for port in ports + if isinstance(port, dict) and port.get("state") == "open" + ]), + "http_services": len(http_results), + "tls_results": len(tls_results), + "dns_records": _dns_record_count(results), + "interesting_signals": len(signals), + "endpoint_count": _endpoint_count(results), + } + + +def _looks_local_or_private(value): + text = str(value or "").strip() + if text.lower() == "localhost": + return True + try: + address = ipaddress.ip_address(text) + return address.is_loopback or address.is_private + except ValueError: + return False + + +def _scan_context(target, results): + nmap_results = _nmap_results(results) + if _looks_local_or_private(target) or _looks_local_or_private(nmap_results.get("host")): + return SCAN_CONTEXT_NOTE + return None + + +def build_json_metadata(target, results, scan_profile=None, scan_context=None): + metadata = { + "tool": TOOL_NAME, + "authorized_use_notice": AUTHORIZED_USE_NOTICE, + } + if scan_profile: + metadata["scan_profile"] = scan_profile + + context = scan_context or _scan_context(target, results) + if context: + metadata["scan_context"] = context + return metadata + + +def _path_without_query(path): + return str(path or "/").split("?", 1)[0].split("#", 1)[0] + + +def _is_api_like(path): + lower_path = str(path or "").lower() + return lower_path == "/api" or lower_path == "/rest" or lower_path.startswith("/api/") or lower_path.startswith("/rest/") + + +def _is_static_asset(path): + clean_path = _path_without_query(path).lower() + filename = PurePosixPath(clean_path).name + return PurePosixPath(clean_path).suffix in STATIC_ASSET_EXTENSIONS or "chunk" in filename + + +def _primary_endpoint_category(endpoint): + path = endpoint.get("path", "") + lower_path = _path_without_query(path).lower() + if _is_static_asset(path): + return "static_assets" + if _is_api_like(path): + return "api_like" + if lower_path in WELL_KNOWN_PATHS: + return "well_known" + return "frontend_routes" + + +def _endpoint_categories(endpoints): + categories = {key: [] for key in CATEGORY_KEYS} + for endpoint in endpoints: + if not isinstance(endpoint, dict): + continue + categories[_primary_endpoint_category(endpoint)].append(endpoint) + if str(endpoint.get("source", "")).startswith("response-header"): + categories["header_discovered"].append(endpoint) + + summary = {"endpoint_count": len(endpoints)} + for key in CATEGORY_KEYS: + summary[key] = len(categories[key]) + return summary, categories + + +def _enrich_endpoint_discovery(results): + for group in _endpoint_groups(results): + if not isinstance(group, dict): + continue + endpoints = _as_list(group.get("endpoints", [])) + summary, categories = _endpoint_categories(endpoints) + group["summary"] = summary + group["categories"] = categories + + +def normalize_results_for_json(results): + normalized = copy.deepcopy(results if isinstance(results, dict) else {}) + signals = normalized.get("Attention", normalized.get("Interesting Signals", [])) + if not isinstance(signals, list): + signals = [] + normalized["Attention"] = signals + normalized["Interesting Signals"] = signals + _filter_http_security_headers(normalized) + _enrich_endpoint_discovery(normalized) + return normalized + + +def build_json_payload(target, results, generated_at=None, scan_profile=None, scan_context=None): + normalized_results = normalize_results_for_json(results) return { "schema_version": SCHEMA_VERSION, "generated_at": generated_at or datetime.utcnow().replace(microsecond=0).isoformat() + "Z", "target": target, - "results": results, + "metadata": build_json_metadata(target, normalized_results, scan_profile, scan_context), + "summary": build_json_summary(normalized_results), + "results": normalized_results, } -def generate_json_report(target, results, output_file, generated_at=None): +def generate_json_report(target, results, output_file, generated_at=None, scan_profile=None, scan_context=None): output_path = Path(output_file) output_path.parent.mkdir(parents=True, exist_ok=True) with output_path.open("w", encoding="utf-8") as f: - json.dump(build_json_payload(target, results, generated_at), f, indent=2, sort_keys=True) + json.dump( + build_json_payload(target, results, generated_at, scan_profile, scan_context), + f, + indent=2, + sort_keys=True, + ) f.write("\n") diff --git a/tests/test_http_enum.py b/tests/test_http_enum.py index 30f624f..7ed3276 100644 --- a/tests/test_http_enum.py +++ b/tests/test_http_enum.py @@ -58,3 +58,39 @@ def fake_get(url, timeout): assert results[0]["url"] == "http://example.com:8080" assert "timed out" in results[0]["error"] + + +def test_analyze_http_filters_hsts_missing_header_for_plain_http(monkeypatch): + class PlainResponse: + status_code = 200 + url = "http://example.com:80" + history = [] + headers = {"Content-Type": "text/html"} + text = "Plain HTTP" + + class SecureResponse: + status_code = 200 + url = "https://example.com:443" + history = [] + headers = {"Content-Type": "text/html"} + text = "HTTPS" + + def fake_get(url, timeout): + if url.startswith("https://"): + return SecureResponse() + return PlainResponse() + + monkeypatch.setattr(requests, "get", fake_get) + + results = analyze_http( + "example.com", + {}, + [ + {"portid": "80", "service": "http"}, + {"portid": "443", "service": "https"}, + ], + ) + + assert "strict-transport-security" not in results[0]["missing_security_headers"] + assert "content-security-policy" in results[0]["missing_security_headers"] + assert "strict-transport-security" in results[1]["missing_security_headers"] diff --git a/tests/test_json_report.py b/tests/test_json_report.py index 9175acc..b4e6a11 100644 --- a/tests/test_json_report.py +++ b/tests/test_json_report.py @@ -9,14 +9,102 @@ def test_build_json_payload_wraps_results_with_schema(): assert payload["schema_version"] == "1.0" assert payload["generated_at"] == "2026-06-17T09:08:07Z" assert payload["target"] == "example.com" + assert payload["metadata"]["tool"] == "ActiveRecon" + assert payload["metadata"]["authorized_use_notice"] is True + assert payload["summary"]["total_ports_listed"] == 0 assert payload["results"]["Nmap Scan"]["ports"] == [] +def test_build_json_payload_adds_summary_metadata_alias_and_endpoint_categories(): + results = { + "Nmap Scan": { + "status": {"state": "up"}, + "host": "127.0.0.1", + "ports": [ + {"portid": "80", "state": "open"}, + {"portid": "25", "state": "filtered"}, + ], + }, + "HTTP Analysis": [ + { + "url": "http://127.0.0.1:80", + "status": 200, + "missing_security_headers": [ + "strict-transport-security", + "content-security-policy", + ], + } + ], + "TLS Analysis": [{"host": "127.0.0.1", "port": 443}], + "DNS Analysis": {"A": ["127.0.0.1"], "MX": [], "TXT": []}, + "Endpoint Discovery": [ + { + "base_url": "http://127.0.0.1", + "endpoints": [ + {"path": "/api", "source": "well-known"}, + {"path": "/login", "source": "html:href"}, + {"path": "/app.js", "source": "html:script-src"}, + {"path": "/robots.txt", "source": "well-known"}, + {"path": "/#/jobs", "source": "response-header:X-Recruiting"}, + ], + } + ], + "Attention": [ + { + "severity": "info", + "category": "endpoint", + "message": "API-like endpoint discovered; follow-up recommended", + "evidence": "http://127.0.0.1/api", + } + ], + } + + payload = build_json_payload( + "127.0.0.1", + results, + "2026-06-17T09:08:07Z", + scan_profile="web", + ) + + assert payload["metadata"]["scan_profile"] == "web" + assert "local or private" in payload["metadata"]["scan_context"] + assert payload["summary"] == { + "host_status": "up", + "total_ports_listed": 2, + "open_ports": 1, + "http_services": 1, + "tls_results": 1, + "dns_records": 1, + "interesting_signals": 1, + "endpoint_count": 5, + } + assert payload["results"]["Interesting Signals"] == payload["results"]["Attention"] + assert payload["results"]["HTTP Analysis"][0]["missing_security_headers"] == ["content-security-policy"] + + endpoint_group = payload["results"]["Endpoint Discovery"][0] + assert len(endpoint_group["endpoints"]) == 5 + assert endpoint_group["summary"] == { + "endpoint_count": 5, + "api_like": 1, + "frontend_routes": 2, + "static_assets": 1, + "well_known": 1, + "header_discovered": 1, + } + assert endpoint_group["categories"]["api_like"][0]["path"] == "/api" + assert endpoint_group["categories"]["frontend_routes"][0]["path"] == "/login" + assert endpoint_group["categories"]["static_assets"][0]["path"] == "/app.js" + assert endpoint_group["categories"]["well_known"][0]["path"] == "/robots.txt" + assert endpoint_group["categories"]["header_discovered"][0]["path"] == "/#/jobs" + + def test_generate_json_report_writes_file(tmp_path): output = tmp_path / "reports" / "example.json" - generate_json_report("example.com", {"Attention": []}, str(output), "2026-06-17T09:08:07Z") + generate_json_report("example.com", {"Attention": []}, str(output), "2026-06-17T09:08:07Z", scan_profile="fast") data = json.loads(output.read_text(encoding="utf-8")) assert data["target"] == "example.com" + assert data["metadata"]["scan_profile"] == "fast" assert data["results"]["Attention"] == [] + assert data["results"]["Interesting Signals"] == [] diff --git a/tests/test_main.py b/tests/test_main.py index 9a55d90..565e851 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -67,8 +67,9 @@ def fake_report(target, results, output_file): captured["results"] = results captured["output_file"] = output_file - def fake_json_report(target, results, output_file): + def fake_json_report(target, results, output_file, **kwargs): captured["json_output_file"] = output_file + captured["json_kwargs"] = kwargs monkeypatch.setattr(main_module, "CONFIG", {"scan_profiles": {"fast": "-Pn"}, "http_timeout": 5}) monkeypatch.setattr(main_module, "run_nmap_scan", fake_nmap) @@ -93,8 +94,10 @@ def fake_json_report(target, results, output_file): assert captured["http_ports"] == [{"portid": "80", "protocol": "tcp", "state": "open", "service": "http"}] assert captured["results"]["Nmap Scan"]["status"]["state"] == "up" + assert captured["results"]["Interesting Signals"] == captured["results"]["Attention"] assert captured["output_file"] == str(tmp_path / "report_20260617_090807.md") assert captured["json_output_file"] == str(tmp_path / "report_20260617_090807.json") + assert captured["json_kwargs"]["scan_profile"] == "fast" def test_main_handles_failed_nmap_without_http(monkeypatch, tmp_path): @@ -107,7 +110,7 @@ def fake_http(target, config, http_ports): def fake_report(target, results, output_file): captured["results"] = results - def fake_json_report(target, results, output_file): + def fake_json_report(target, results, output_file, **kwargs): captured["json_output_file"] = output_file monkeypatch.setattr(main_module, "CONFIG", {"scan_profiles": {"fast": "-Pn"}, "http_timeout": 5}) @@ -181,7 +184,7 @@ def test_main_uses_timestamped_default_output(monkeypatch): def fake_report(target, results, output_file): captured["output_file"] = output_file - def fake_json_report(target, results, output_file): + def fake_json_report(target, results, output_file, **kwargs): captured["json_output_file"] = output_file monkeypatch.setattr(main_module, "CONFIG", {"scan_profiles": {"fast": "-Pn"}, "http_timeout": 5}) @@ -267,7 +270,7 @@ def fake_report(target, results, output_file): ) monkeypatch.setattr(main_module, "generate_attention_findings", lambda results: []) monkeypatch.setattr(main_module, "generate_report", fake_report) - monkeypatch.setattr(main_module, "generate_json_report", lambda target, results, output_file: None) + monkeypatch.setattr(main_module, "generate_json_report", lambda target, results, output_file, **kwargs: None) monkeypatch.setattr( sys, "argv", @@ -326,7 +329,7 @@ def fake_report(target, results, output_file): monkeypatch.setattr(main_module, "analyze_dns", lambda target: {"A": [], "MX": [], "TXT": []}) monkeypatch.setattr(main_module, "generate_attention_findings", lambda results: []) monkeypatch.setattr(main_module, "generate_report", fake_report) - monkeypatch.setattr(main_module, "generate_json_report", lambda target, results, output_file: None) + monkeypatch.setattr(main_module, "generate_json_report", lambda target, results, output_file, **kwargs: None) monkeypatch.setattr( sys, "argv",