diff --git a/README.md b/README.md index bda9038..7026fe0 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,7 @@ Instead of manually running separate commands and collecting notes from differen * collect TLS certificate metadata for HTTPS services * query A, MX, and TXT DNS records, while skipping noisy DNS lookups for IP address targets * run endpoint discovery automatically from the `web` scan profile +* import, normalize, deduplicate, diff, and export target inventories without scanning * generate timestamped Markdown and JSON reports under `reports/` * highlight interesting signals for follow-up review @@ -77,6 +78,7 @@ ActiveRecon currently supports: | TLS | TLS version, cipher, subject, issuer, and certificate validity dates | | DNS | Separate A, MX, and TXT lookups, with clean IP-target skip behavior | | Web | Endpoint discovery from HTML, headers, JavaScript, robots.txt, and probes | +| Inventory | Target import, normalization, deduplication, diff, and scope export | | Reporting | Timestamped Markdown and JSON schema `1.1` reports | | Safety | Responsible-use notice, scope guard, dry-run mode, doctor checks | | Analysis | Low-noise interesting signals for follow-up review | @@ -149,6 +151,24 @@ Use a scope file: activerecon --target app.example.com --scope scope.txt --scan-profile standard ``` +Import target inventory without scanning: + +```bash +activerecon targets import --input targets.txt --output inventories/latest.json +``` + +Compare two inventories without scanning: + +```bash +activerecon targets diff --previous inventories/old.json --current inventories/latest.json +``` + +Export normalized inventory hosts to a scope file: + +```bash +activerecon targets export-scope --inventory inventories/latest.json --output scopes/latest.txt +``` + --- ## Example Report Output @@ -241,6 +261,9 @@ pip install -e . ```bash activerecon --target --scan-profile [--output ] [--output-format md|json|both] [--verbose|--quiet] activerecon --doctor +activerecon targets import --input --output +activerecon targets diff --previous --current +activerecon targets export-scope --inventory --output ``` ### Arguments @@ -259,6 +282,39 @@ activerecon --doctor --- +## Target Inventory + +Inventory commands are intentionally separate from scanning. They do not run Nmap, HTTP checks, DNS lookups, endpoint discovery, or reports. + +Supported import formats: + +| Format | Behavior | +| ------- | ------------------------------------------------------------ | +| `.txt` | One target per line. Blank lines and `#` comments are ignored | +| `.json` | List of strings, list of objects, or inventory-like object | +| `.jsonl` | One string or object per line | + +For JSON objects, ActiveRecon reads the first useful field from: + +```text +target, url, host, domain, uri +``` + +Inventory files use schema version `1.0`: + +```json +{ + "schema_version": "1.0", + "generated_at": "2026-06-17T18:05:44Z", + "source": "targets.txt", + "targets": [] +} +``` + +Scope export writes one normalized host per line, compatible with the current `--scope` file behavior. + +--- + ## Config Common config values live in: @@ -420,6 +476,11 @@ ActiveRecon/ | |-- risk_analysis.py | |-- scope_guard.py | `-- tls_analysis.py +| |-- targets/ +| | |-- parser.py +| | |-- target_diff.py +| | |-- target_inventory.py +| | `-- target_loader.py |-- reports/ |-- tests/ |-- .github/workflows/ diff --git a/activerecon/cli.py b/activerecon/cli.py index cc11b58..e7d7756 100644 --- a/activerecon/cli.py +++ b/activerecon/cli.py @@ -1,4 +1,5 @@ import argparse +import json import logging from .models import ReconOptions @@ -6,6 +7,14 @@ from .modules.json_report import build_json_summary from .output_paths import DEFAULT_REPORT_DIR from .runner import ReconValidationError, run_recon +from .targets.target_diff import diff_inventories +from .targets.target_inventory import ( + build_inventory, + export_scope_file, + load_inventory, + save_inventory, +) +from .targets.target_loader import load_targets LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s" @@ -61,6 +70,22 @@ def build_parser(): default="fast", help="Choose a pre-defined Nmap profile from config.yaml", ) + + subparsers = parser.add_subparsers(dest="command") + targets_parser = subparsers.add_parser("targets", help="Import, diff, and export target inventories") + targets_subparsers = targets_parser.add_subparsers(dest="targets_action") + + import_parser = targets_subparsers.add_parser("import", help="Import targets into an inventory file") + import_parser.add_argument("--input", required=True, help="Input targets file (.txt, .json, or .jsonl)") + import_parser.add_argument("--output", required=True, help="Output inventory JSON file") + + diff_parser = targets_subparsers.add_parser("diff", help="Compare two inventory files") + diff_parser.add_argument("--previous", required=True, help="Previous inventory JSON file") + diff_parser.add_argument("--current", required=True, help="Current inventory JSON file") + + export_parser = targets_subparsers.add_parser("export-scope", help="Export inventory hosts to a scope file") + export_parser.add_argument("--inventory", required=True, help="Input inventory JSON file") + export_parser.add_argument("--output", required=True, help="Output scope text file") return parser @@ -103,6 +128,14 @@ def _report_paths(result): return paths +def _inventory_host_count(inventory): + return len({ + item.get("host") + for item in inventory.get("targets", []) + if item.get("host") + }) + + def print_report_paths(result, output=print): paths = _report_paths(result) if not paths: @@ -141,6 +174,44 @@ def print_scan_summary(result, output=print): output(f"- {label}: {path}") +def run_targets_command(args, output=print): + if args.targets_action == "import": + raw_targets = load_targets(args.input) + inventory = build_inventory(raw_targets, source=args.input) + save_inventory(inventory, args.output) + output("ActiveRecon target import completed") + output(f"Input: {args.input}") + output(f"Output: {args.output}") + output(f"Targets loaded: {len(raw_targets)}") + output(f"Unique targets: {len(inventory['targets'])}") + output(f"Duplicates removed: {len(raw_targets) - len(inventory['targets'])}") + output("Scans run: 0") + return 0 + + if args.targets_action == "diff": + previous = load_inventory(args.previous) + current = load_inventory(args.current) + diff = diff_inventories(previous, current) + output("ActiveRecon target diff completed") + output(f"Added: {len(diff['added'])}") + output(f"Removed: {len(diff['removed'])}") + output(f"Unchanged: {len(diff['unchanged'])}") + output("Scans run: 0") + return 0 + + if args.targets_action == "export-scope": + inventory = load_inventory(args.inventory) + export_scope_file(inventory, args.output) + output("ActiveRecon scope export completed") + output(f"Inventory: {args.inventory}") + output(f"Output: {args.output}") + output(f"Targets exported: {_inventory_host_count(inventory)}") + output("Scans run: 0") + return 0 + + raise ValueError("targets requires a subcommand: import, diff, or export-scope") + + def main(argv=None): parser = build_parser() args = parser.parse_args(argv) @@ -153,6 +224,13 @@ def main(argv=None): run_doctor(DEFAULT_REPORT_DIR) return 0 + if args.command == "targets": + try: + return run_targets_command(args) + except (OSError, ValueError, json.JSONDecodeError) as e: + parser.error(str(e)) + return 2 + if not args.target: parser.error("--target is required unless --doctor is used") diff --git a/activerecon/targets/target_diff.py b/activerecon/targets/target_diff.py new file mode 100644 index 0000000..0649cc3 --- /dev/null +++ b/activerecon/targets/target_diff.py @@ -0,0 +1,40 @@ +from .target_inventory import inventory_target_key +from .parser import parse_target + + +def _spec_from_inventory_item(item): + raw = item.get("raw") or item.get("host") or "" + target_spec = parse_target(raw) + target_spec.host = str(item.get("host") or target_spec.host).lower() + target_spec.scheme = item.get("scheme") + target_spec.port = item.get("port") + target_spec.path = item.get("path") or "" + return target_spec + + +def _target_map(inventory): + targets = {} + for item in inventory.get("targets", []): + if not isinstance(item, dict): + continue + target_spec = _spec_from_inventory_item(item) + targets[inventory_target_key(target_spec)] = item + return targets + + +def diff_inventories(previous, current): + previous_targets = _target_map(previous or {}) + current_targets = _target_map(current or {}) + + previous_keys = set(previous_targets) + current_keys = set(current_targets) + + added = [current_targets[key] for key in sorted(current_keys - previous_keys)] + removed = [previous_targets[key] for key in sorted(previous_keys - current_keys)] + unchanged = [current_targets[key] for key in sorted(current_keys & previous_keys)] + + return { + "added": added, + "removed": removed, + "unchanged": unchanged, + } diff --git a/activerecon/targets/target_inventory.py b/activerecon/targets/target_inventory.py new file mode 100644 index 0000000..4fa229f --- /dev/null +++ b/activerecon/targets/target_inventory.py @@ -0,0 +1,91 @@ +import json +from datetime import datetime, timezone +from pathlib import Path + +from .parser import parse_target + + +INVENTORY_SCHEMA_VERSION = "1.0" +DEFAULT_SOURCE = "manual" + + +def _utc_timestamp(): + return datetime.now(timezone.utc).replace(microsecond=0, tzinfo=None).isoformat() + "Z" + + +def _normalized_path(path): + if path in ("", "/"): + return "" + return str(path or "").rstrip("/") + + +def inventory_target_key(target_spec): + scheme = target_spec.scheme or "" + port = "" if target_spec.port is None else str(target_spec.port) + return "|".join([scheme, target_spec.host, port, _normalized_path(target_spec.path)]) + + +def target_spec_to_dict(target_spec): + return { + "raw": target_spec.raw, + "host": target_spec.host, + "scheme": target_spec.scheme, + "port": target_spec.port, + "path": target_spec.path, + "is_ip": target_spec.is_ip, + "is_private": target_spec.is_private, + "is_loopback": target_spec.is_loopback, + } + + +def build_inventory(targets, source=None, generated_at=None): + seen = set() + normalized_targets = [] + + for raw_target in targets or []: + if not str(raw_target or "").strip(): + continue + target_spec = parse_target(raw_target) + key = inventory_target_key(target_spec) + if key in seen: + continue + seen.add(key) + normalized_targets.append(target_spec_to_dict(target_spec)) + + return { + "schema_version": INVENTORY_SCHEMA_VERSION, + "generated_at": generated_at or _utc_timestamp(), + "source": source or DEFAULT_SOURCE, + "targets": normalized_targets, + } + + +def save_inventory(inventory, output_file): + output_path = Path(output_file) + output_path.parent.mkdir(parents=True, exist_ok=True) + with output_path.open("w", encoding="utf-8") as f: + json.dump(inventory, f, indent=2, sort_keys=True) + f.write("\n") + + +def load_inventory(input_file): + input_path = Path(input_file) + with input_path.open("r", encoding="utf-8") as f: + return json.load(f) + + +def export_scope_file(inventory, output_file): + output_path = Path(output_file) + output_path.parent.mkdir(parents=True, exist_ok=True) + seen_hosts = set() + hosts = [] + + for item in inventory.get("targets", []): + host = str(item.get("host", "")).strip().lower() + if host and host not in seen_hosts: + seen_hosts.add(host) + hosts.append(host) + + with output_path.open("w", encoding="utf-8") as f: + for host in hosts: + f.write(f"{host}\n") diff --git a/activerecon/targets/target_loader.py b/activerecon/targets/target_loader.py new file mode 100644 index 0000000..0861345 --- /dev/null +++ b/activerecon/targets/target_loader.py @@ -0,0 +1,70 @@ +import json +from pathlib import Path + + +OBJECT_TARGET_FIELDS = ("target", "url", "host", "domain", "uri") + + +def _target_from_object(value): + if isinstance(value, str): + return value + if not isinstance(value, dict): + return None + for field in OBJECT_TARGET_FIELDS: + candidate = value.get(field) + if candidate: + return str(candidate) + return None + + +def _targets_from_values(values): + targets = [] + for value in values or []: + target = _target_from_object(value) + if target: + targets.append(target) + return targets + + +def _load_txt(path): + targets = [] + for line in path.read_text(encoding="utf-8").splitlines(): + line = line.split("#", 1)[0].strip() + if line: + targets.append(line) + return targets + + +def _load_json(path): + data = json.loads(path.read_text(encoding="utf-8")) + if isinstance(data, list): + return _targets_from_values(data) + if isinstance(data, dict): + if isinstance(data.get("targets"), list): + return _targets_from_values(data["targets"]) + target = _target_from_object(data) + return [target] if target else [] + return [] + + +def _load_jsonl(path): + targets = [] + for line in path.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if not line: + continue + value = json.loads(line) + target = _target_from_object(value) + if target: + targets.append(target) + return targets + + +def load_targets(input_file): + path = Path(input_file) + suffix = path.suffix.lower() + if suffix == ".json": + return _load_json(path) + if suffix == ".jsonl": + return _load_jsonl(path) + return _load_txt(path) diff --git a/tests/test_cli.py b/tests/test_cli.py index b2ddefe..7f2ecfa 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,7 +1,10 @@ +import json + import pytest from activerecon import cli from activerecon.models import ReconOptions, ReconResult, TargetSpec +from activerecon.targets.target_inventory import build_inventory, save_inventory def _sample_result(): @@ -141,3 +144,95 @@ def test_cli_verbose_enables_detailed_logging(monkeypatch): assert cli.main(["--target", "127.0.0.1", "--verbose"]) == 0 assert captured == {"verbose": True, "quiet": False} + + +def test_cli_dry_run_still_passes_scan_option(monkeypatch): + captured = {} + + def fake_run_recon(options): + captured["options"] = options + result = _sample_result() + result.dry_run = True + result.results = {} + return result + + monkeypatch.setattr(cli, "run_recon", fake_run_recon) + + assert cli.main(["--target", "example.com", "--dry-run"]) == 0 + assert captured["options"].dry_run is True + + +def test_cli_targets_import_does_not_scan(monkeypatch, tmp_path, capsys): + input_file = tmp_path / "targets.txt" + output_file = tmp_path / "inventories" / "latest.json" + input_file.write_text("example.com\nexample.com\nhttps://api.example.com\n", encoding="utf-8") + monkeypatch.setattr(cli, "run_recon", lambda options: (_ for _ in ()).throw(AssertionError())) + + result = cli.main([ + "targets", + "import", + "--input", + str(input_file), + "--output", + str(output_file), + ]) + captured_output = capsys.readouterr() + + assert result == 0 + assert output_file.exists() + assert "ActiveRecon target import completed" in captured_output.out + assert "Targets loaded: 3" in captured_output.out + assert "Unique targets: 2" in captured_output.out + assert "Duplicates removed: 1" in captured_output.out + assert "Scans run: 0" in captured_output.out + + inventory = json.loads(output_file.read_text(encoding="utf-8")) + assert [item["host"] for item in inventory["targets"]] == ["example.com", "api.example.com"] + + +def test_cli_targets_diff_does_not_scan(monkeypatch, tmp_path, capsys): + previous = tmp_path / "old.json" + current = tmp_path / "latest.json" + save_inventory(build_inventory(["example.com", "old.example.com"]), previous) + save_inventory(build_inventory(["example.com", "new.example.com"]), current) + monkeypatch.setattr(cli, "run_recon", lambda options: (_ for _ in ()).throw(AssertionError())) + + result = cli.main([ + "targets", + "diff", + "--previous", + str(previous), + "--current", + str(current), + ]) + captured_output = capsys.readouterr() + + assert result == 0 + assert "ActiveRecon target diff completed" in captured_output.out + assert "Added: 1" in captured_output.out + assert "Removed: 1" in captured_output.out + assert "Unchanged: 1" in captured_output.out + assert "Scans run: 0" in captured_output.out + + +def test_cli_targets_export_scope_does_not_scan(monkeypatch, tmp_path, capsys): + inventory_file = tmp_path / "latest.json" + scope_file = tmp_path / "scopes" / "latest.txt" + save_inventory(build_inventory(["https://api.example.com", "https://api.example.com/login"]), inventory_file) + monkeypatch.setattr(cli, "run_recon", lambda options: (_ for _ in ()).throw(AssertionError())) + + result = cli.main([ + "targets", + "export-scope", + "--inventory", + str(inventory_file), + "--output", + str(scope_file), + ]) + captured_output = capsys.readouterr() + + assert result == 0 + assert scope_file.read_text(encoding="utf-8").splitlines() == ["api.example.com"] + assert "ActiveRecon scope export completed" in captured_output.out + assert "Targets exported: 1" in captured_output.out + assert "Scans run: 0" in captured_output.out diff --git a/tests/test_target_diff.py b/tests/test_target_diff.py new file mode 100644 index 0000000..451bdd0 --- /dev/null +++ b/tests/test_target_diff.py @@ -0,0 +1,19 @@ +from activerecon.targets.target_diff import diff_inventories +from activerecon.targets.target_inventory import build_inventory + + +def test_diff_inventories_reports_added_removed_and_unchanged(): + previous = build_inventory( + ["example.com", "old.example.com", "https://api.example.com"], + generated_at="2026-06-17T18:00:00Z", + ) + current = build_inventory( + ["example.com", "new.example.com", "https://api.example.com"], + generated_at="2026-06-17T18:05:00Z", + ) + + diff = diff_inventories(previous, current) + + assert [item["host"] for item in diff["added"]] == ["new.example.com"] + assert [item["host"] for item in diff["removed"]] == ["old.example.com"] + assert [item["host"] for item in diff["unchanged"]] == ["api.example.com", "example.com"] diff --git a/tests/test_target_inventory.py b/tests/test_target_inventory.py new file mode 100644 index 0000000..babf73d --- /dev/null +++ b/tests/test_target_inventory.py @@ -0,0 +1,65 @@ +from activerecon.policies.scope_policy import ScopePolicy +from activerecon.targets.target_inventory import ( + build_inventory, + export_scope_file, + inventory_target_key, + load_inventory, + save_inventory, +) +from activerecon.targets.parser import parse_target + + +def test_build_inventory_normalizes_and_deduplicates_targets(): + inventory = build_inventory( + [ + "https://API.example.com/", + "https://api.example.com", + "http://api.example.com", + "", + ], + generated_at="2026-06-17T18:05:44Z", + ) + + assert inventory["schema_version"] == "1.0" + assert inventory["generated_at"] == "2026-06-17T18:05:44Z" + assert inventory["source"] == "manual" + assert len(inventory["targets"]) == 2 + assert inventory["targets"][0]["host"] == "api.example.com" + assert inventory["targets"][0]["scheme"] == "https" + assert inventory["targets"][0]["is_ip"] is False + assert inventory["targets"][1]["scheme"] == "http" + + +def test_inventory_target_key_is_stable_for_trailing_slash(): + first = parse_target("https://example.com/") + second = parse_target("https://example.com") + + assert inventory_target_key(first) == inventory_target_key(second) + + +def test_save_and_load_inventory(tmp_path): + output = tmp_path / "inventories" / "latest.json" + inventory = build_inventory(["example.com"], generated_at="2026-06-17T18:05:44Z") + + save_inventory(inventory, output) + + assert load_inventory(output) == inventory + + +def test_export_scope_file_works_with_scope_policy(tmp_path): + scope_file = tmp_path / "scopes" / "latest.txt" + inventory = build_inventory([ + "https://api.example.com", + "https://api.example.com/login", + "192.0.2.10", + ]) + + export_scope_file(inventory, scope_file) + + content = scope_file.read_text(encoding="utf-8").splitlines() + assert content == ["api.example.com", "192.0.2.10"] + + policy = ScopePolicy.from_file(scope_file) + assert policy.allows("api.example.com") + assert policy.allows("192.0.2.10") + assert not policy.allows("example.net") diff --git a/tests/test_target_loader.py b/tests/test_target_loader.py new file mode 100644 index 0000000..6721b0c --- /dev/null +++ b/tests/test_target_loader.py @@ -0,0 +1,62 @@ +import json + +from activerecon.targets.target_loader import load_targets + + +def test_load_targets_from_txt_ignores_comments_and_blanks(tmp_path): + input_file = tmp_path / "targets.txt" + input_file.write_text( + "\n# comment\nexample.com\napi.example.com # inline\n\n", + encoding="utf-8", + ) + + assert load_targets(input_file) == ["example.com", "api.example.com"] + + +def test_load_targets_from_json_strings_and_objects(tmp_path): + input_file = tmp_path / "targets.json" + input_file.write_text( + json.dumps([ + "example.com", + {"url": "https://api.example.com"}, + {"domain": "app.example.com"}, + {"unused": "ignored"}, + ]), + encoding="utf-8", + ) + + assert load_targets(input_file) == [ + "example.com", + "https://api.example.com", + "app.example.com", + ] + + +def test_load_targets_from_inventory_like_json(tmp_path): + input_file = tmp_path / "inventory.json" + input_file.write_text( + json.dumps({ + "schema_version": "1.0", + "targets": [ + {"host": "example.com"}, + {"target": "https://api.example.com"}, + ], + }), + encoding="utf-8", + ) + + assert load_targets(input_file) == ["example.com", "https://api.example.com"] + + +def test_load_targets_from_jsonl(tmp_path): + input_file = tmp_path / "targets.jsonl" + input_file.write_text( + '"example.com"\n{"host": "api.example.com"}\n{"uri": "https://app.example.com"}\n', + encoding="utf-8", + ) + + assert load_targets(input_file) == [ + "example.com", + "api.example.com", + "https://app.example.com", + ]