From bb54b5bdd2cda3c405ad37fa0c0fe92b61542a57 Mon Sep 17 00:00:00 2001 From: CamiloCod3 Date: Wed, 17 Jun 2026 15:03:40 +0200 Subject: [PATCH] Add web profile endpoint discovery --- README.md | 25 ++ activerecon/main.py | 15 ++ activerecon/modules/config/config.yaml | 19 +- activerecon/modules/endpoint_discovery.py | 293 ++++++++++++++++++++++ activerecon/modules/report_generator.py | 41 +++ activerecon/modules/risk_analysis.py | 52 ++++ tests/test_endpoint_discovery.py | 90 +++++++ tests/test_main.py | 53 ++++ tests/test_report_generator.py | 13 + tests/test_risk_analysis.py | 28 +++ 10 files changed, 628 insertions(+), 1 deletion(-) create mode 100644 activerecon/modules/endpoint_discovery.py create mode 100644 tests/test_endpoint_discovery.py diff --git a/README.md b/README.md index 17b4d2c..9eaaf84 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,7 @@ Instead of manually running separate commands and collecting notes from differen * collect HTTP status, headers, redirects, page titles, and technology hints * collect TLS certificate metadata for HTTPS services * query common DNS records +* run a richer web reconnaissance workflow from the `web` scan profile * generate Markdown and JSON reports * highlight interesting signals for follow-up review @@ -75,6 +76,7 @@ ActiveRecon currently supports: | HTTP | Status codes, titles, redirects, headers, technology hints | | TLS | TLS version, cipher, certificate metadata | | DNS | A, MX, and TXT lookups | +| Web | Endpoint discovery from HTML, headers, JavaScript, and safe well-known paths | | Reporting | Markdown and JSON output | | Safety | Scope guard, dry-run mode, doctor checks | | Analysis | Interesting signals for follow-up review | @@ -153,6 +155,7 @@ Generated Markdown reports include sections such as: ## Scan Information ## Port Scan Results ## HTTP Analysis +## Endpoint Discovery ## TLS Analysis ## DNS Analysis ## Interesting Signals @@ -254,6 +257,25 @@ scan_profiles: standard: "-Pn -n -sT -sV -sC -T3" full: "-Pn -n -sT -p- -sV -sC -T4" udp: "-Pn -n -sU --top-ports 100 -sC --script-timeout 5m" + +web_recon: + enabled_profiles: + - web + endpoint_probe_limit: 50 + fetch_javascript: true + same_origin_only: true + well_known_paths: + - /robots.txt + - /sitemap.xml + - /.well-known/security.txt + - /api + - /rest + - /ftp + - /admin + - /login + - /debug + - /swagger + - /api-docs ``` --- @@ -310,6 +332,8 @@ Attention Markdown reports use the heading `Interesting Signals`. JSON output keeps the `Attention` key for compatibility. +When the `web` profile is used, reports also include `Endpoint Discovery`. + --- ## Project Structure @@ -324,6 +348,7 @@ ActiveRecon/ | |-- config_loader.py | |-- dns_analysis.py | |-- doctor.py +| |-- endpoint_discovery.py | |-- http_enum.py | |-- json_report.py | |-- nmap_scan.py diff --git a/activerecon/main.py b/activerecon/main.py index 6d6b0e0..6ae571b 100644 --- a/activerecon/main.py +++ b/activerecon/main.py @@ -12,6 +12,7 @@ from .modules.json_report import generate_json_report from .modules.config_loader import load_config from .modules.doctor import run_doctor +from .modules.endpoint_discovery import discover_endpoints from .modules.risk_analysis import generate_attention_findings from .modules.scope_guard import is_target_in_scope from .modules.tls_analysis import analyze_tls @@ -81,6 +82,12 @@ def _dns_skip_result(): } +def _web_recon_enabled(config, scan_profile): + web_recon = config.get("web_recon", {}) if isinstance(config, dict) else {} + enabled_profiles = web_recon.get("enabled_profiles", []) + return scan_profile in enabled_profiles + + def _safe_report_name(target): safe_name = re.sub(r"[^A-Za-z0-9_.-]+", "_", target).strip("._-") return safe_name or "target" @@ -219,6 +226,14 @@ def main(): logging.error(f"Error during TLS analysis: {e}") results["TLS Analysis"] = {"error": f"TLS analysis failed: {e}"} + if _web_recon_enabled(config, chosen_profile): + try: + logging.info("Running endpoint discovery.") + results["Endpoint Discovery"] = discover_endpoints(results["HTTP Analysis"], config) + except Exception as e: + logging.error(f"Error during endpoint discovery: {e}") + results["Endpoint Discovery"] = {"error": f"Endpoint discovery failed: {e}"} + if _is_ip_target(target): logging.info(DNS_IP_SKIP_REASON) results["DNS Analysis"] = _dns_skip_result() diff --git a/activerecon/modules/config/config.yaml b/activerecon/modules/config/config.yaml index d0b83f9..45db610 100644 --- a/activerecon/modules/config/config.yaml +++ b/activerecon/modules/config/config.yaml @@ -4,8 +4,25 @@ scan_profiles: full: "-Pn -n -sT -p- -sV -sC -T4" udp: "-Pn -n -sU --top-ports 100 -sC --script-timeout 5m" web: "-Pn -n -sT -p 80,443,3000,5000,8000,8080,8443,9000,9443 -sV -T3" - http_timeout: 5 nmap_timeout: 300 # Optional override if Nmap is installed outside PATH. # nmap_executable: "C:\\Program Files\\Nmap\\nmap.exe" +web_recon: + enabled_profiles: + - web + endpoint_probe_limit: 50 + fetch_javascript: true + same_origin_only: true + well_known_paths: + - /robots.txt + - /sitemap.xml + - /.well-known/security.txt + - /api + - /rest + - /ftp + - /admin + - /login + - /debug + - /swagger + - /api-docs diff --git a/activerecon/modules/endpoint_discovery.py b/activerecon/modules/endpoint_discovery.py new file mode 100644 index 0000000..d519fc8 --- /dev/null +++ b/activerecon/modules/endpoint_discovery.py @@ -0,0 +1,293 @@ +import logging +import re +from html.parser import HTMLParser +from urllib.parse import urljoin, urlparse + +import requests + + +DEFAULT_WELL_KNOWN_PATHS = [ + "/robots.txt", + "/sitemap.xml", + "/.well-known/security.txt", + "/api", + "/rest", + "/ftp", + "/admin", + "/login", + "/debug", + "/swagger", + "/api-docs", +] +DEFAULT_ENDPOINT_LIMIT = 50 +DEFAULT_HTTP_TIMEOUT = 5 +PATH_STRING_RE = re.compile(r"""["'`](/[A-Za-z0-9._~:/?#\[\]@!$&()*+,;=%-]{1,200})["'`]""") + + +class EndpointHTMLParser(HTMLParser): + def __init__(self): + HTMLParser.__init__(self) + self.links = [] + self.script_srcs = [] + + def handle_starttag(self, tag, attrs): + attrs_dict = dict(attrs) + + if "href" in attrs_dict: + self.links.append((attrs_dict["href"], "html:href")) + if "src" in attrs_dict: + self.links.append((attrs_dict["src"], "html:src")) + if tag == "form" and attrs_dict.get("action"): + self.links.append((attrs_dict["action"], "html:form-action")) + if tag == "script" and attrs_dict.get("src"): + self.script_srcs.append(attrs_dict["src"]) + + +def _web_recon_settings(config): + web_recon = config.get("web_recon", {}) if isinstance(config, dict) else {} + return { + "endpoint_probe_limit": web_recon.get("endpoint_probe_limit", DEFAULT_ENDPOINT_LIMIT), + "fetch_javascript": web_recon.get("fetch_javascript", True), + "same_origin_only": web_recon.get("same_origin_only", True), + "well_known_paths": web_recon.get("well_known_paths", DEFAULT_WELL_KNOWN_PATHS), + } + + +def _timeout(config): + if isinstance(config, dict): + return config.get("http_timeout", DEFAULT_HTTP_TIMEOUT) + return DEFAULT_HTTP_TIMEOUT + + +def _limit(value): + try: + return max(0, int(value)) + except (TypeError, ValueError): + return DEFAULT_ENDPOINT_LIMIT + + +def _is_successful_http_result(item): + if not isinstance(item, dict) or item.get("error"): + return False + try: + status = int(item.get("status", 0)) + except (TypeError, ValueError): + return False + return 200 <= status < 400 + + +def _origin(url): + parsed = urlparse(url) + if not parsed.scheme or not parsed.netloc: + return "" + return f"{parsed.scheme}://{parsed.netloc}" + + +def _same_origin(url, base_url): + return _origin(url) == _origin(base_url) + + +def _path_from_url(url): + parsed = urlparse(url) + path = parsed.path or "/" + if parsed.query: + path = f"{path}?{parsed.query}" + if parsed.fragment: + path = f"{path}#{parsed.fragment}" + return path + + +def _normalize_candidate(value, base_url, same_origin_only=True): + if not value: + return None + + raw_value = str(value).strip() + if raw_value.startswith(("mailto:", "tel:", "javascript:", "data:")): + return None + + absolute = urljoin(base_url, raw_value) + parsed = urlparse(absolute) + if parsed.scheme not in {"http", "https"}: + return None + if same_origin_only and not _same_origin(absolute, base_url): + return None + + path = _path_from_url(absolute) + if not path.startswith("/") or path.startswith("//"): + return None + return path + + +def _confidence(source): + if source.startswith(("response-header", "well-known", "robots.txt", "html:")): + return "medium" + return "low" + + +def _add_endpoint(endpoints, path, source, limit, status_code=None, content_type=None): + if not path or not path.startswith("/") or path.startswith("//") or len(path) > 250: + return + if path not in endpoints and len(endpoints) >= limit: + return + + if path not in endpoints: + endpoints[path] = { + "path": path, + "source": source, + "confidence": _confidence(source), + } + + if status_code is not None: + endpoints[path]["status_code"] = status_code + if content_type: + endpoints[path]["content_type"] = content_type + + +def _extract_paths_from_text(text): + paths = [] + for match in PATH_STRING_RE.finditer(text or ""): + path = match.group(1).strip() + if path.startswith("/") and not path.startswith("//"): + paths.append(path) + return paths + + +def _path_like_header_values(value): + values = value if isinstance(value, (list, tuple, set)) else [value] + paths = [] + for raw_value in values: + for candidate in str(raw_value).split(","): + path = candidate.strip().strip("\"'") + if path.startswith("/") and not path.startswith("//") and len(path) > 1: + paths.append(path) + return paths + + +def _safe_get(url, timeout): + try: + return requests.get(url, timeout=timeout) + except requests.RequestException as e: + logging.debug(f"Endpoint discovery request failed for {url}: {e}") + return None + + +def _content_type(response): + return response.headers.get("Content-Type") or response.headers.get("content-type") or "" + + +def _is_found_probe(response): + if response is None: + return False + return response.status_code < 400 or response.status_code in {401, 403} + + +def _robots_disallow_paths(text): + paths = [] + for line in (text or "").splitlines(): + stripped = line.strip() + if not stripped.lower().startswith("disallow:"): + continue + path = stripped.split(":", 1)[1].strip() + if path.startswith("/") and not path.startswith("//"): + paths.append(path) + return paths + + +def _parse_html(text): + parser = EndpointHTMLParser() + try: + parser.feed(text or "") + except Exception as e: + logging.debug(f"Endpoint discovery HTML parsing failed: {e}") + return parser + + +def discover_endpoints(http_results, config=None): + """ + Discover a small set of interesting endpoints from HTTP results. + This intentionally avoids aggressive directory brute forcing. + """ + if not isinstance(http_results, list): + return [] + + settings = _web_recon_settings(config or {}) + endpoint_limit = _limit(settings["endpoint_probe_limit"]) + timeout = _timeout(config) + same_origin_only = bool(settings["same_origin_only"]) + fetch_javascript = bool(settings["fetch_javascript"]) + well_known_paths = settings["well_known_paths"] or DEFAULT_WELL_KNOWN_PATHS + groups = [] + + for item in http_results: + if not _is_successful_http_result(item): + continue + + base_url = item.get("final_url") or item.get("url") + if not base_url: + continue + base_origin = _origin(base_url) + if not base_origin: + continue + + endpoints = {} + requests_made = 0 + + def get_if_allowed(url): + nonlocal requests_made + if requests_made >= endpoint_limit: + return None + requests_made += 1 + return _safe_get(url, timeout) + + for header_name, header_value in (item.get("headers") or {}).items(): + for path in _path_like_header_values(header_value): + _add_endpoint(endpoints, path, f"response-header:{header_name}", endpoint_limit) + + page_response = get_if_allowed(base_url) + if page_response is not None and page_response.status_code < 400 and "html" in _content_type(page_response).lower(): + html_text = getattr(page_response, "text", "")[:200000] + parser = _parse_html(html_text) + for raw_link, source in parser.links: + path = _normalize_candidate(raw_link, base_url, same_origin_only) + if path: + _add_endpoint(endpoints, path, source, endpoint_limit) + for path in _extract_paths_from_text(html_text): + _add_endpoint(endpoints, path, "html-string", endpoint_limit) + + if fetch_javascript: + for script_src in parser.script_srcs: + script_url = urljoin(base_url, script_src) + if same_origin_only and not _same_origin(script_url, base_url): + continue + script_response = get_if_allowed(script_url) + if script_response is None or script_response.status_code >= 400: + continue + script_text = getattr(script_response, "text", "")[:200000] + for path in _extract_paths_from_text(script_text): + _add_endpoint(endpoints, path, "javascript", endpoint_limit) + + for path in well_known_paths[:endpoint_limit]: + if not str(path).startswith("/"): + continue + response = get_if_allowed(urljoin(base_origin, path)) + if not _is_found_probe(response): + continue + _add_endpoint( + endpoints, + path, + "well-known", + endpoint_limit, + status_code=response.status_code, + content_type=_content_type(response), + ) + if path == "/robots.txt" and response.status_code < 400: + for disallow_path in _robots_disallow_paths(getattr(response, "text", "")): + _add_endpoint(endpoints, disallow_path, "robots.txt", endpoint_limit) + + if endpoints: + groups.append({ + "base_url": base_origin, + "endpoints": list(endpoints.values()), + }) + + return groups diff --git a/activerecon/modules/report_generator.py b/activerecon/modules/report_generator.py index 8af5dd6..c225713 100644 --- a/activerecon/modules/report_generator.py +++ b/activerecon/modules/report_generator.py @@ -73,6 +73,43 @@ def _write_http_result(f, item): f.write(f" - `{key}`: {value}\n") +def _write_endpoint_discovery(f, endpoint_results): + f.write("## Endpoint Discovery\n\n") + if isinstance(endpoint_results, dict) and endpoint_results.get("error"): + f.write(f"**Error:** {endpoint_results['error']}\n") + f.write("---\n\n") + return + + groups = endpoint_results if isinstance(endpoint_results, list) else [] + if not groups: + f.write("No endpoints discovered.\n") + f.write("---\n\n") + return + + for group in groups: + f.write(f"### {group.get('base_url', 'Unknown base URL')}\n\n") + endpoints = group.get("endpoints", []) + if not endpoints: + f.write("- No endpoints discovered.\n\n") + continue + for endpoint in endpoints[:50]: + line = ( + f"- `{endpoint.get('path', '/')}` " + f"- **Source:** {endpoint.get('source', 'unknown')} " + f"- **Confidence:** {endpoint.get('confidence', 'low')}" + ) + if endpoint.get("status_code") is not None: + line += f" - **Status:** {endpoint['status_code']}" + if endpoint.get("content_type"): + line += f" - **Content-Type:** {endpoint['content_type']}" + f.write(f"{line}\n") + if len(endpoints) > 50: + f.write(f"- Output trimmed. {len(endpoints) - 50} additional endpoints omitted.\n") + f.write("\n") + + f.write("---\n\n") + + def build_report_summary(results): nmap_results = results.get("Nmap Scan", results) ports = _as_list(nmap_results.get("ports", [])) @@ -107,6 +144,7 @@ def generate_report(target, results, output_file): logging.info(f"Generating report to: {output_file}") nmap_results = results.get("Nmap Scan", results) http_results = results.get("HTTP Analysis", []) + endpoint_results = results.get("Endpoint Discovery") tls_results = results.get("TLS Analysis", []) dns_results = results.get("DNS Analysis", {}) attention_results = results.get("Attention", []) @@ -176,6 +214,9 @@ def generate_report(target, results, output_file): f.write("No HTTP services analyzed.\n") f.write("---\n\n") + if "Endpoint Discovery" in results: + _write_endpoint_discovery(f, endpoint_results) + f.write("## TLS Analysis\n\n") if isinstance(tls_results, dict) and tls_results.get("error"): f.write(f"**Error:** {tls_results['error']}\n") diff --git a/activerecon/modules/risk_analysis.py b/activerecon/modules/risk_analysis.py index 3448a33..1b916f7 100644 --- a/activerecon/modules/risk_analysis.py +++ b/activerecon/modules/risk_analysis.py @@ -49,6 +49,19 @@ def _dns_results(results): return dns_results if isinstance(dns_results, dict) else {} +def _endpoint_groups(results): + endpoint_results = results.get("Endpoint Discovery", []) + return endpoint_results if isinstance(endpoint_results, list) else [] + + +def _endpoint_items(results): + for group in _endpoint_groups(results): + base_url = group.get("base_url", "") + for endpoint in group.get("endpoints", []): + if isinstance(endpoint, dict): + yield base_url, endpoint + + def _is_https_result(item): url = str(item.get("final_url") or item.get("url") or "").lower() return url.startswith("https://") @@ -75,8 +88,26 @@ def _header_value_text(value): return str(value).strip() +def _is_api_like_path(path): + lower_path = str(path).lower() + return lower_path == "/api" or lower_path == "/rest" or lower_path.startswith("/api/") or lower_path.startswith("/rest/") + + +def _is_admin_debug_docs_path(path): + lower_path = str(path).lower() + return any(token in lower_path for token in ("/admin", "/debug", "/swagger", "/api-docs")) + + def generate_attention_findings(results, now=None): findings = [] + seen_endpoint_signals = set() + + def add_endpoint_signal(severity, category, message, evidence): + key = (category, message, evidence) + if key in seen_endpoint_signals: + return + seen_endpoint_signals.add(key) + findings.append(_finding(severity, category, message, evidence)) for port in _open_ports(results): service = str(port.get("service", "")).lower() @@ -123,6 +154,27 @@ def generate_attention_findings(results, now=None): if item.get("redirect_chain"): findings.append(_finding("info", "http", "HTTP redirects observed", " -> ".join(item["redirect_chain"]))) + for base_url, endpoint in _endpoint_items(results): + path = endpoint.get("path", "") + source = endpoint.get("source", "") + evidence = f"{base_url}{path}" if base_url else path + + if path == "/robots.txt" and endpoint.get("status_code") is not None: + add_endpoint_signal("info", "endpoint", "robots.txt found", evidence) + if source == "robots.txt": + add_endpoint_signal("info", "endpoint", "robots.txt contains Disallow paths", evidence) + if source.startswith("response-header"): + add_endpoint_signal("info", "endpoint", "Interesting endpoint from response header", evidence) + if _is_api_like_path(path): + if source.startswith("javascript"): + add_endpoint_signal("info", "endpoint", "JavaScript exposes API-like paths", evidence) + else: + add_endpoint_signal("info", "endpoint", "API-like endpoint discovered", evidence) + if _is_admin_debug_docs_path(path): + add_endpoint_signal("info", "endpoint", "Possible admin/debug/docs route discovered", evidence) + if str(path).lower() == "/ftp": + add_endpoint_signal("info", "endpoint", "/ftp endpoint discovered", evidence) + now = now or datetime.now(timezone.utc) comparable_now = now.replace(tzinfo=None) for item in _tls_results(results): diff --git a/tests/test_endpoint_discovery.py b/tests/test_endpoint_discovery.py new file mode 100644 index 0000000..9515ddf --- /dev/null +++ b/tests/test_endpoint_discovery.py @@ -0,0 +1,90 @@ +from activerecon.modules import endpoint_discovery + + +class Response: + def __init__(self, status_code=200, headers=None, text="", url="http://example.com:3000"): + self.status_code = status_code + self.headers = headers or {} + self.text = text + self.url = url + + +def test_discover_endpoints_extracts_html_js_headers_and_safe_probes(monkeypatch): + calls = [] + + def fake_get(url, timeout): + calls.append((url, timeout)) + if url == "http://example.com:3000": + return Response( + headers={"Content-Type": "text/html"}, + text=""" + login +
+ + + + """, + url=url, + ) + if url == "http://example.com:3000/app.js": + return Response(headers={"Content-Type": "application/javascript"}, text='const api = "/api/orders";', url=url) + if url == "http://example.com:3000/robots.txt": + return Response(headers={"Content-Type": "text/plain"}, text="Disallow: /hidden\n", url=url) + if url == "http://example.com:3000/api": + return Response(headers={"Content-Type": "application/json"}, text="{}", url=url) + if url == "http://example.com:3000/admin": + return Response(status_code=403, headers={"Content-Type": "text/html"}, text="", url=url) + return Response(status_code=404, headers={"Content-Type": "text/plain"}, text="", url=url) + + monkeypatch.setattr(endpoint_discovery.requests, "get", fake_get) + + results = endpoint_discovery.discover_endpoints( + [{ + "url": "http://example.com:3000", + "final_url": "http://example.com:3000", + "status": 200, + "headers": {"X-Recruiting": "/#/jobs"}, + }], + { + "http_timeout": 2, + "web_recon": { + "endpoint_probe_limit": 20, + "fetch_javascript": True, + "same_origin_only": True, + "well_known_paths": ["/robots.txt", "/api", "/admin"], + }, + }, + ) + + endpoints = {item["path"]: item for item in results[0]["endpoints"]} + + assert results[0]["base_url"] == "http://example.com:3000" + assert endpoints["/#/jobs"]["source"] == "response-header:X-Recruiting" + assert endpoints["/login"]["source"] == "html:href" + assert endpoints["/submit"]["source"] == "html:form-action" + assert endpoints["/rest/products"]["source"] == "html-string" + assert endpoints["/api/orders"]["source"] == "javascript" + assert endpoints["/robots.txt"]["status_code"] == 200 + assert endpoints["/robots.txt"]["content_type"] == "text/plain" + assert endpoints["/hidden"]["source"] == "robots.txt" + assert endpoints["/api"]["status_code"] == 200 + assert endpoints["/admin"]["status_code"] == 403 + assert not any("cdn.example.net" in url for url, timeout in calls) + + +def test_discover_endpoints_skips_unsuccessful_http_results(monkeypatch): + monkeypatch.setattr( + endpoint_discovery.requests, + "get", + lambda url, timeout: (_ for _ in ()).throw(AssertionError("No requests expected")), + ) + + results = endpoint_discovery.discover_endpoints( + [ + {"url": "http://example.com", "status": 500, "headers": {}}, + {"url": "http://example.com", "status": 200, "error": "timeout"}, + ], + {"web_recon": {"endpoint_probe_limit": 5}}, + ) + + assert results == [] diff --git a/tests/test_main.py b/tests/test_main.py index 9e2179b..9a55d90 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -286,6 +286,59 @@ def fake_report(target, results, output_file): } +def test_main_web_profile_runs_endpoint_discovery(monkeypatch, tmp_path): + output = tmp_path / "report.md" + captured = {} + + def fake_nmap(target, scan_command, config): + assert scan_command == "-web" + return { + "target": target, + "ports": [{"portid": "3000", "protocol": "tcp", "state": "open", "service": "ppp"}], + "status": {"state": "up"}, + "scan_info": {}, + "host": target, + } + + def fake_http(target, config, http_ports): + return [{"url": "http://example.com:3000", "status": 200, "headers": {}}] + + def fake_endpoints(http_results, config): + captured["endpoint_http_results"] = http_results + return [{"base_url": "http://example.com:3000", "endpoints": [{"path": "/api", "source": "well-known"}]}] + + def fake_report(target, results, output_file): + captured["results"] = results + + monkeypatch.setattr( + main_module, + "CONFIG", + { + "scan_profiles": {"web": "-web"}, + "http_timeout": 5, + "web_recon": {"enabled_profiles": ["web"]}, + }, + ) + monkeypatch.setattr(main_module, "run_nmap_scan", fake_nmap) + monkeypatch.setattr(main_module, "analyze_http", fake_http) + monkeypatch.setattr(main_module, "analyze_tls", lambda http_results, timeout: []) + monkeypatch.setattr(main_module, "discover_endpoints", fake_endpoints) + monkeypatch.setattr(main_module, "analyze_dns", lambda target: {"A": [], "MX": [], "TXT": []}) + monkeypatch.setattr(main_module, "generate_attention_findings", lambda results: []) + monkeypatch.setattr(main_module, "generate_report", fake_report) + monkeypatch.setattr(main_module, "generate_json_report", lambda target, results, output_file: None) + monkeypatch.setattr( + sys, + "argv", + ["activerecon", "--target", "example.com", "--scan-profile", "web", "--output", str(output)], + ) + + main_module.main() + + assert captured["endpoint_http_results"] == [{"url": "http://example.com:3000", "status": 200, "headers": {}}] + assert captured["results"]["Endpoint Discovery"][0]["endpoints"][0]["path"] == "/api" + + def test_main_rejects_target_outside_scope(monkeypatch, tmp_path): scope = tmp_path / "scope.txt" scope.write_text("allowed.example.com\n", encoding="utf-8") diff --git a/tests/test_report_generator.py b/tests/test_report_generator.py index 02590c9..836463e 100644 --- a/tests/test_report_generator.py +++ b/tests/test_report_generator.py @@ -50,6 +50,16 @@ def test_generate_report_writes_nested_results(tmp_path): "TXT": [], "errors": {"MX": "missing"}, }, + "Endpoint Discovery": [{ + "base_url": "http://example.com", + "endpoints": [{ + "path": "/api", + "source": "well-known", + "confidence": "medium", + "status_code": 200, + "content_type": "application/json", + }], + }], "Attention": [{ "severity": "low", "category": "http", @@ -87,6 +97,9 @@ def test_generate_report_writes_nested_results(tmp_path): assert " - `server:test`" in content assert "- **Response Headers:**" in content assert " - `Server`: test" in content + assert "## Endpoint Discovery" in content + assert "### http://example.com" in content + assert "`/api` - **Source:** well-known - **Confidence:** medium - **Status:** 200 - **Content-Type:** application/json" in content assert "## DNS Analysis" in content assert "## TLS Analysis" in content assert "TLSv1.3" in content diff --git a/tests/test_risk_analysis.py b/tests/test_risk_analysis.py index 7d30e63..4cd4839 100644 --- a/tests/test_risk_analysis.py +++ b/tests/test_risk_analysis.py @@ -131,6 +131,34 @@ def test_generate_attention_findings_reports_cors_and_header_paths_as_info(): assert all(item.get("evidence") != "no path here" for item in findings) +def test_generate_attention_findings_reports_endpoint_discovery_signals(): + results = { + "Endpoint Discovery": [{ + "base_url": "http://example.com", + "endpoints": [ + {"path": "/api", "source": "well-known", "status_code": 200}, + {"path": "/robots.txt", "source": "well-known", "status_code": 200}, + {"path": "/hidden", "source": "robots.txt"}, + {"path": "/#/jobs", "source": "response-header:X-Recruiting"}, + {"path": "/api/orders", "source": "javascript"}, + {"path": "/admin", "source": "well-known", "status_code": 403}, + {"path": "/ftp", "source": "well-known", "status_code": 200}, + ], + }], + } + + findings = generate_attention_findings(results) + messages = [item["message"] for item in findings] + + assert "API-like endpoint discovered" in messages + assert "robots.txt found" in messages + assert "robots.txt contains Disallow paths" in messages + assert "Interesting endpoint from response header" in messages + assert "JavaScript exposes API-like paths" in messages + assert "Possible admin/debug/docs route discovered" in messages + assert "/ftp endpoint discovered" in messages + + def test_generate_attention_findings_reports_expired_tls_certificates(): results = { "TLS Analysis": [{