diff --git a/.agent-plan.md b/.agent-plan.md
index a2779df..d3e309d 100644
--- a/.agent-plan.md
+++ b/.agent-plan.md
@@ -44,7 +44,7 @@ First public dataset release: `leadforge-b2b-lead-scoring`. Three difficulty tie
 - [x] Verify three tiers produce different conversion rates (intro 41.5%, intermediate 20.1%, advanced 7.9%)
 - [x] Update release/README.md — remove stale "Known limitations", add conversion rates to dataset summary
 - [x] Update release/HF_DATASET_CARD.md — add conversion rates to summary table
-- [ ] Verify SHA-256 hash determinism (re-run build, compare hashes)
+- [x] Verify SHA-256 hash determinism (re-run build, compare hashes) — `scripts/verify_hash_determinism.py`; 73/73 files identical across two `build_public_release.py` runs (modulo `manifest.json`'s wall-clock `generation_timestamp`)
 - [ ] Upload to Kaggle and HuggingFace
 - [ ] Announce
 
diff --git a/.gitignore b/.gitignore
index 8347321..385be89 100644
--- a/.gitignore
+++ b/.gitignore
@@ -216,3 +216,4 @@ release/intermediate/
 release/advanced/
 release/intermediate_instructor/
 release/LICENSE
+release/_determinism/
diff --git a/leadforge/render/manifests.py b/leadforge/render/manifests.py
index 03d6201..d43fade 100644
--- a/leadforge/render/manifests.py
+++ b/leadforge/render/manifests.py
@@ -22,6 +22,11 @@
 # Bump this whenever the bundle layout or manifest schema changes.
 BUNDLE_SCHEMA_VERSION = "2"
 
+# Manifest fields whose value is non-deterministic by design (wall-clock,
+# host metadata, etc.).  Determinism checks must ignore these fields when
+# comparing two bundles produced from the same (recipe, config, seed, version).
+NON_DETERMINISTIC_MANIFEST_FIELDS: tuple[str, ...] = ("generation_timestamp",)
+
 
 def build_manifest(
     config: GenerationConfig,
diff --git a/leadforge/validation/invariants.py b/leadforge/validation/invariants.py
index d06ac16..cb7caf1 100644
--- a/leadforge/validation/invariants.py
+++ b/leadforge/validation/invariants.py
@@ -9,9 +9,11 @@
 
 from __future__ import annotations
 
+import json
 from pathlib import Path
 
 from leadforge.core.hashing import file_sha256
+from leadforge.render.manifests import NON_DETERMINISTIC_MANIFEST_FIELDS
 
 
 def check_determinism(bundle_a: Path, bundle_b: Path) -> list[str]:
@@ -60,6 +62,69 @@ def check_determinism(bundle_a: Path, bundle_b: Path) -> list[str]:
     return errors
 
 
+def _manifest_payloads_match_modulo_non_deterministic(a: Path, b: Path) -> bool:
+    """Compare two manifest.json files after stripping non-deterministic fields.
+
+    Re-dumps both payloads with ``sort_keys=True`` so a key reordering still
+    counts as a mismatch.
+    """
+    payload_a = json.loads(a.read_text())
+    payload_b = json.loads(b.read_text())
+    for field in NON_DETERMINISTIC_MANIFEST_FIELDS:
+        payload_a.pop(field, None)
+        payload_b.pop(field, None)
+    return json.dumps(payload_a, sort_keys=True) == json.dumps(payload_b, sort_keys=True)
+
+
+def compare_bundle_trees(bundle_a: Path, bundle_b: Path) -> list[str]:
+    """Full-tree byte-identical comparison of two bundle directories.
+
+    Walks every file under both roots and reports:
+
+    - files present in only one tree (``only in A:`` / ``only in B:``)
+    - files whose SHA-256 differs (``hash mismatch:``)
+
+    The bundle ``manifest.json`` is special-cased: it carries
+    ``generation_timestamp`` (wall-clock UTC, set by ``build_manifest()``),
+    which is expected to differ across runs unless the caller pinned it.
+    For that one file, if the raw hashes differ, the function re-compares the
+    payload with non-deterministic fields stripped (see
+    :data:`NON_DETERMINISTIC_MANIFEST_FIELDS`).  A mismatch *after* stripping
+    is still reported.
+
+    Use this for release-time integration checks; for the fast in-process
+    determinism property used in CI, see :func:`check_determinism`.
+    """
+    errors: list[str] = []
+
+    files_a = {p.relative_to(bundle_a) for p in bundle_a.rglob("*") if p.is_file()}
+    files_b = {p.relative_to(bundle_b) for p in bundle_b.rglob("*") if p.is_file()}
+
+    for rel in sorted(files_a - files_b):
+        errors.append(f"only in A: {rel}")
+    for rel in sorted(files_b - files_a):
+        errors.append(f"only in B: {rel}")
+
+    for rel in sorted(files_a & files_b):
+        path_a = bundle_a / rel
+        path_b = bundle_b / rel
+        if file_sha256(path_a) == file_sha256(path_b):
+            continue
+        if rel.name == "manifest.json" and rel.parent == Path():
+            if _manifest_payloads_match_modulo_non_deterministic(path_a, path_b):
+                continue
+            errors.append(
+                f"manifest payload mismatch (after stripping "
+                f"{list(NON_DETERMINISTIC_MANIFEST_FIELDS)}): {rel}"
+            )
+            continue
+        size_a = path_a.stat().st_size
+        size_b = path_b.stat().st_size
+        errors.append(f"hash mismatch: {rel} (sizes: A={size_a}B, B={size_b}B)")
+
+    return errors
+
+
 def check_exposure_monotonicity(student_bundle: Path, instructor_bundle: Path) -> list[str]:
     """Verify that student_public is a subset of research_instructor.
 
diff --git a/scripts/build_public_release.py b/scripts/build_public_release.py
index e348453..c4e72c2 100644
--- a/scripts/build_public_release.py
+++ b/scripts/build_public_release.py
@@ -2,7 +2,7 @@
 """Build the public release bundles for Kaggle/HuggingFace.
 
 Usage:
-    python scripts/build_public_release.py [OUTPUT_DIR]
+    python scripts/build_public_release.py [OUTPUT_DIR] [--generation-timestamp ISO8601]
 
 Generates four bundles:
 - intro/          (student_public, intro difficulty)
@@ -14,10 +14,16 @@
 (lead_scoring.csv) merging train/valid/test with a ``split`` column.
 
 All bundles are validated with ``leadforge validate`` after generation.
+
+The ``--generation-timestamp`` flag pins ``manifest.generation_timestamp`` to a
+caller-supplied ISO-8601 UTC string.  This is the supported way to produce
+byte-reproducible bundles (used by ``scripts/verify_hash_determinism.py``);
+the released bundles always use the wall-clock default.
 """
 
 from __future__ import annotations
 
+import argparse
 import json
 import shutil
 import sys
@@ -45,6 +51,7 @@ def generate_and_save(
     exposure_mode: str,
     difficulty: str,
     seed: int = SEED,
+    generation_timestamp: str | None = None,
 ) -> None:
     """Generate a bundle and write it to *out_dir*."""
     gen = Generator.from_recipe(
@@ -54,7 +61,7 @@ def generate_and_save(
         difficulty=difficulty,
     )
     bundle = gen.generate()
-    bundle.save(str(out_dir))
+    bundle.save(str(out_dir), generation_timestamp=generation_timestamp)
 
 
 # Columns to drop from the flat CSV convenience export.
@@ -111,7 +118,24 @@ def print_summary(bundle_dir: Path, name: str) -> None:
 
 
 def main() -> None:
-    output_root = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("release")
+    parser = argparse.ArgumentParser(description=__doc__.split("\n", maxsplit=1)[0])
+    parser.add_argument(
+        "output_dir",
+        nargs="?",
+        default="release",
+        help="Output directory (default: release/)",
+    )
+    parser.add_argument(
+        "--generation-timestamp",
+        default=None,
+        help=(
+            "ISO-8601 UTC string to pin manifest.generation_timestamp. "
+            "Default: wall-clock now. Use this for reproducible bundles."
+        ),
+    )
+    args = parser.parse_args()
+
+    output_root = Path(args.output_dir)
     output_root.mkdir(parents=True, exist_ok=True)
 
     # Copy LICENSE
@@ -122,7 +146,12 @@ def main() -> None:
     for dir_name, exposure_mode, difficulty in BUNDLES:
         bundle_dir = output_root / dir_name
         print(f"Generating {dir_name} ({exposure_mode}, {difficulty})...", file=sys.stderr)
-        generate_and_save(bundle_dir, exposure_mode, difficulty)
+        generate_and_save(
+            bundle_dir,
+            exposure_mode,
+            difficulty,
+            generation_timestamp=args.generation_timestamp,
+        )
 
         # Flat CSV for student_public bundles
         if exposure_mode == "student_public":
diff --git a/scripts/verify_hash_determinism.py b/scripts/verify_hash_determinism.py
new file mode 100755
index 0000000..000f8ff
--- /dev/null
+++ b/scripts/verify_hash_determinism.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env python3
+"""Verify SHA-256 hash determinism of the public release build.
+
+Runs ``scripts/build_public_release.py`` twice into two output directories with
+the same seed/config and a *pinned* manifest timestamp, then asserts every
+generated file hashes identically across runs.
+
+Pinning ``--generation-timestamp`` on the build script means the resulting
+``manifest.json`` is also byte-identical — no special-cased manifest stripping
+needed at compare time.  (For defence-in-depth, the underlying
+:func:`leadforge.validation.invariants.compare_bundle_trees` still tolerates
+a wall-clock-only manifest diff, but pinning is the supported workflow.)
+
+The architectural invariant being enforced is
+"generation is deterministic given (recipe, config, seed, version)".
+The corresponding fast in-process check lives in
+``tests/validation/test_invariants.py::TestDeterminism`` and runs in CI on
+every PR; this script is the slower release-time check that exercises the
+full ``build_public_release.py`` pipeline.
+
+On failure, output directories are preserved (NOT auto-cleaned) so the
+mismatching artifacts can be diffed directly.
+
+Exit code: 0 on PASS, 1 on FAIL.
+
+Usage:
+    python scripts/verify_hash_determinism.py [--out DIR] [--keep-on-success]
+"""
+
+from __future__ import annotations
+
+import argparse
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+from leadforge.core.hashing import file_sha256
+from leadforge.validation.invariants import compare_bundle_trees
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+BUILD_SCRIPT = REPO_ROOT / "scripts" / "build_public_release.py"
+
+# Pinned timestamp for both runs.  Any fixed ISO-8601 UTC string works; using
+# the unix epoch makes it obvious that it's a sentinel, not a real run time.
+PINNED_TIMESTAMP = "1970-01-01T00:00:00+00:00"
+
+# Bundle subdirectories produced by build_public_release.py.  Hardcoded here
+# because the script's BUNDLES list is not exposed as a public API.  If the
+# build script grows new bundles, add them here.
+BUNDLE_DIRS = ("intro", "intermediate", "advanced", "intermediate_instructor")
+
+
+def run_build(out_dir: Path) -> None:
+    cmd = [
+        sys.executable,
+        str(BUILD_SCRIPT),
+        str(out_dir),
+        "--generation-timestamp",
+        PINNED_TIMESTAMP,
+    ]
+    print(f"  $ {' '.join(cmd)}")
+    subprocess.run(cmd, check=True, cwd=REPO_ROOT)  # noqa: S603
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description=__doc__.split("\n", maxsplit=1)[0])
+    parser.add_argument(
+        "--out",
+        type=Path,
+        default=REPO_ROOT / "release" / "_determinism",
+        help="Base directory for both runs (will be wiped at start). "
+        "Default: release/_determinism/",
+    )
+    parser.add_argument(
+        "--keep-on-success",
+        action="store_true",
+        help="Keep output directories even on PASS (default: clean up on PASS, "
+        "always preserve on FAIL).",
+    )
+    return parser.parse_args()
+
+
+def main() -> int:
+    args = parse_args()
+
+    if not BUILD_SCRIPT.exists():
+        print(f"FAIL: build script not found at {BUILD_SCRIPT}", file=sys.stderr)
+        return 1
+
+    base = args.out
+    run_a = base / "run_a"
+    run_b = base / "run_b"
+
+    # Wipe and recreate.
+    if base.exists():
+        shutil.rmtree(base)
+    base.mkdir(parents=True)
+
+    print(f"Run A → {run_a}")
+    run_build(run_a)
+    print(f"Run B → {run_b}")
+    run_build(run_b)
+
+    # Per-bundle comparison so error messages stay scoped to a single bundle.
+    all_errors: list[tuple[str, list[str]]] = []
+    total_files = 0
+    for name in BUNDLE_DIRS:
+        bundle_a = run_a / name
+        bundle_b = run_b / name
+        if not bundle_a.exists() or not bundle_b.exists():
+            all_errors.append((name, [f"bundle directory missing: {name}"]))
+            continue
+        errors = compare_bundle_trees(bundle_a, bundle_b)
+        bundle_files = sum(1 for p in bundle_a.rglob("*") if p.is_file())
+        total_files += bundle_files
+        if errors:
+            all_errors.append((name, errors))
+
+    # Top-level files (LICENSE, etc.) — compare via hash directly.
+    top_a = {p.name for p in run_a.iterdir() if p.is_file()}
+    top_b = {p.name for p in run_b.iterdir() if p.is_file()}
+    top_errors: list[str] = []
+    for name in sorted(top_a - top_b):
+        top_errors.append(f"top-level file only in A: {name}")
+    for name in sorted(top_b - top_a):
+        top_errors.append(f"top-level file only in B: {name}")
+    for name in sorted(top_a & top_b):
+        if file_sha256(run_a / name) != file_sha256(run_b / name):
+            top_errors.append(f"top-level hash mismatch: {name}")
+    total_files += len(top_a)
+    if top_errors:
+        all_errors.append(("<top-level>", top_errors))
+
+    if not all_errors:
+        print(f"\nPASS: all {total_files} files hash identically across runs.")
+        if not args.keep_on_success:
+            shutil.rmtree(base)
+            print(f"(cleaned up {base})")
+        else:
+            print(f"(kept artifacts at {base})")
+        return 0
+
+    print(f"\nFAIL: mismatches in {len(all_errors)} bundle(s):")
+    for name, errors in all_errors:
+        print(f"  [{name}]")
+        for e in errors:
+            print(f"    - {e}")
+    print(f"\nArtifacts preserved for inspection:\n  A: {run_a}\n  B: {run_b}")
+    return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tests/validation/test_invariants.py b/tests/validation/test_invariants.py
index 3668ccf..078548c 100644
--- a/tests/validation/test_invariants.py
+++ b/tests/validation/test_invariants.py
@@ -2,12 +2,17 @@
 
 from __future__ import annotations
 
+import json
 from pathlib import Path
 
 import pytest
 
 from leadforge.api.generator import Generator
-from leadforge.validation.invariants import check_determinism, check_exposure_monotonicity
+from leadforge.validation.invariants import (
+    check_determinism,
+    check_exposure_monotonicity,
+    compare_bundle_trees,
+)
 
 _SMALL = {"n_leads": 20, "n_accounts": 10, "n_contacts": 30}
 
@@ -80,3 +85,111 @@ def test_instructor_without_metadata_fails(self, exposure_bundles: tuple[Path, P
         # Student as "instructor" lacks metadata/
         errors = check_exposure_monotonicity(student, student)
         assert any("missing metadata" in e for e in errors)
+
+
+def _make_synthetic_bundle(
+    root: Path,
+    files: dict[str, str | bytes],
+    manifest: dict | None = None,
+) -> Path:
+    """Write a fake bundle layout with the given files and optional manifest."""
+    root.mkdir(parents=True, exist_ok=True)
+    if manifest is not None:
+        (root / "manifest.json").write_text(json.dumps(manifest, indent=2))
+    for rel, content in files.items():
+        path = root / rel
+        path.parent.mkdir(parents=True, exist_ok=True)
+        if isinstance(content, bytes):
+            path.write_bytes(content)
+        else:
+            path.write_text(content)
+    return root
+
+
+class TestCompareBundleTrees:
+    """Synthetic-bundle unit tests for compare_bundle_trees.
+
+    These avoid running the full generator so the verifier's logic is exercised
+    independently of generation determinism.  Real end-to-end determinism is
+    covered by TestDeterminism above.
+    """
+
+    def test_identical_trees_no_errors(self, tmp_path: Path) -> None:
+        a = _make_synthetic_bundle(
+            tmp_path / "a",
+            files={"tables/x.parquet": b"\x01\x02", "dataset_card.md": "hello"},
+        )
+        b = _make_synthetic_bundle(
+            tmp_path / "b",
+            files={"tables/x.parquet": b"\x01\x02", "dataset_card.md": "hello"},
+        )
+        assert compare_bundle_trees(a, b) == []
+
+    def test_only_in_a_reported(self, tmp_path: Path) -> None:
+        a = _make_synthetic_bundle(
+            tmp_path / "a",
+            files={"tables/x.parquet": b"x", "tables/extra.parquet": b"y"},
+        )
+        b = _make_synthetic_bundle(tmp_path / "b", files={"tables/x.parquet": b"x"})
+        errors = compare_bundle_trees(a, b)
+        assert any("only in A" in e and "extra.parquet" in e for e in errors)
+
+    def test_only_in_b_reported(self, tmp_path: Path) -> None:
+        a = _make_synthetic_bundle(tmp_path / "a", files={"tables/x.parquet": b"x"})
+        b = _make_synthetic_bundle(
+            tmp_path / "b",
+            files={"tables/x.parquet": b"x", "metadata/world_spec.json": "{}"},
+        )
+        errors = compare_bundle_trees(a, b)
+        assert any("only in B" in e and "world_spec.json" in e for e in errors)
+
+    def test_hash_mismatch_reported_with_sizes(self, tmp_path: Path) -> None:
+        a = _make_synthetic_bundle(tmp_path / "a", files={"tables/x.parquet": b"abc"})
+        b = _make_synthetic_bundle(tmp_path / "b", files={"tables/x.parquet": b"abcd"})
+        errors = compare_bundle_trees(a, b)
+        assert len(errors) == 1
+        assert "hash mismatch" in errors[0]
+        assert "x.parquet" in errors[0]
+        assert "A=3B" in errors[0]
+        assert "B=4B" in errors[0]
+
+    def test_manifest_only_timestamp_diff_passes(self, tmp_path: Path) -> None:
+        manifest_a = {"seed": 42, "generation_timestamp": "2026-01-01T00:00:00+00:00"}
+        manifest_b = {"seed": 42, "generation_timestamp": "2026-12-31T23:59:59+00:00"}
+        a = _make_synthetic_bundle(tmp_path / "a", files={}, manifest=manifest_a)
+        b = _make_synthetic_bundle(tmp_path / "b", files={}, manifest=manifest_b)
+        assert compare_bundle_trees(a, b) == []
+
+    def test_manifest_real_diff_reported(self, tmp_path: Path) -> None:
+        manifest_a = {"seed": 42, "generation_timestamp": "2026-01-01T00:00:00+00:00"}
+        manifest_b = {"seed": 43, "generation_timestamp": "2026-01-01T00:00:00+00:00"}
+        a = _make_synthetic_bundle(tmp_path / "a", files={}, manifest=manifest_a)
+        b = _make_synthetic_bundle(tmp_path / "b", files={}, manifest=manifest_b)
+        errors = compare_bundle_trees(a, b)
+        assert len(errors) == 1
+        assert "manifest payload mismatch" in errors[0]
+
+    def test_manifest_key_reorder_only_passes(self, tmp_path: Path) -> None:
+        # Same logical payload, different on-disk key order — must NOT be flagged
+        # as a mismatch.  (json.dumps with sort_keys=True normalises both sides.)
+        a_root = tmp_path / "a"
+        b_root = tmp_path / "b"
+        a_root.mkdir()
+        b_root.mkdir()
+        (a_root / "manifest.json").write_text(json.dumps({"seed": 42, "n_leads": 100}, indent=2))
+        (b_root / "manifest.json").write_text(json.dumps({"n_leads": 100, "seed": 42}, indent=2))
+        assert compare_bundle_trees(a_root, b_root) == []
+
+    def test_nested_manifest_not_special_cased(self, tmp_path: Path) -> None:
+        # Only the top-level bundle manifest.json gets timestamp-stripping.
+        # A file named manifest.json deeper in the tree is compared byte-for-byte.
+        a = _make_synthetic_bundle(
+            tmp_path / "a",
+            files={"tasks/foo/manifest.json": '{"generation_timestamp": "T1"}'},
+        )
+        b = _make_synthetic_bundle(
+            tmp_path / "b",
+            files={"tasks/foo/manifest.json": '{"generation_timestamp": "T2"}'},
+        )
+        errors = compare_bundle_trees(a, b)
+        assert any("hash mismatch" in e for e in errors)