BASE-Laboratory · jameslehoux · May 17, 2026 · May 16, 2026 · May 16, 2026 · May 16, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -10,8 +10,22 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - run: pip install ruff
+      - name: Ruff check
+        run: ruff check braggtrack/ tests/ scripts/
+      - name: Ruff format check
+        run: ruff format --check braggtrack/ tests/ scripts/
+
   test:
     runs-on: ubuntu-latest
+    needs: lint
     strategy:
       fail-fast: false
       matrix:
@@ -37,16 +51,25 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           python -m pip install -e ".[notebook]"
+          python -m pip install pytest pytest-cov
 
       - name: Run pre-PR diagnostics
         run: python scripts/pre_pr_check.py
 
-      - name: Run unit tests
-        run: python -m unittest discover tests -v
+      - name: Run tests with coverage
+        run: |
+          python -m pytest tests/ -v --cov=braggtrack --cov-report=term-missing --cov-report=xml
 
       - name: Run acceptance gates
         run: python scripts/ci_report.py
 
+      - name: Upload coverage
+        if: matrix.python-version == '3.12'
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-report
+          path: coverage.xml
+
   notebook:
     runs-on: ubuntu-latest
     needs: test
@@ -79,4 +102,4 @@ jobs:
             --to notebook --execute \
             --ExecutePreprocessor.timeout=600 \
             notebooks/braggtrack_demo.ipynb \
-            --output /dev/null
+            --output-dir /tmp
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,43 @@
+# Changelog
+
+All notable changes to BraggTrack will be documented in this file.
+
+The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+### Added
+- Rolling-median threshold smoother (`smooth_thresholds`) for stable multi-frame segmentation
+- Outlier frame detection (`flag_outlier_frames`) via MAD-based statistics
+- Label projection by intensity (`label_projection_by_intensity`) — replaces broken `labels.max(axis=k)`
+- MIP-floor masking via 2-D Otsu (`otsu_floor_from_mip`)
+- Tri-axis segmented mask visualisation in demo notebook
+- Semantic MIP gallery and PCA embedding space plots
+- Google Colab support with auto-install cell and "Open in Colab" badge
+- CI matrix (Python 3.10/3.11/3.12) with pip caching and notebook execution job
+- Optional dependency groups: `[torch]`, `[notebook]`, `[dev]`
+- `BRAGGTRACK_DATA_ROOT` env var for custom data locations
+- Ruff linting and formatting configuration
+- PEP 561 `py.typed` marker
+
+### Changed
+- Seed floor now uses robust peak reference (p99.99) instead of absolute max
+- Default `seed_response_percentile` raised from 99.5 to 99.95
+- `torch` and `transformers` moved to optional `[torch]` extra (bare install is lightweight)
+
+### Fixed
+- Critical threshold domain mismatch: intensity Otsu was applied to LoG response domain
+- Spot count instability across scans (11/22/36 → 18/20/16 on bundled data)
+- Label projection picking highest label ID instead of brightest voxel's label
+
+## [0.1.0] - 2025-12-01
+
+### Added
+- Initial release: discovery, segmentation (Otsu + connected components), tracking (Hungarian + lifecycle DAG)
+- Week 1: beamline adapter, scan discovery, validation
+- Week 2: classical LoG segmentation, h-maxima seeds, seeded watershed
+- Week 3: position+shape cost, per-axis gating, NetworkX lifecycle graph
+- Week 4: multi-view MIPs, mock/DINOv2 encoder, geometry+semantic cost, alpha/beta ablation
+- CLI tools: inspect, validate, segment-synthetic, segment-dataset, track-dataset, embed-dataset
+- Bundled ESRF-ID03 sample data (3 scans, 100×250×250 uint16)
diff --git a/CITATION.cff b/CITATION.cff
@@ -0,0 +1,27 @@
+cff-version: 1.2.0
+message: "If you use this software, please cite it as below."
+title: "BraggTrack"
+type: software
+version: 0.1.0
+date-released: "2026-05-16"
+license: MIT
+repository-code: "https://github.com/BASE-Laboratory/BraggTrack"
+url: "https://github.com/BASE-Laboratory/BraggTrack"
+abstract: >-
+  Semantic 4D kinematics and fracture tracking for operando
+  X-ray diffraction. BraggTrack segments Bragg reflections in
+  3D reciprocal-space volumes and tracks them across time using
+  classical image processing and self-supervised vision-transformer
+  embeddings.
+keywords:
+  - X-ray diffraction
+  - operando crystallography
+  - 3D segmentation
+  - object tracking
+  - materials science
+  - Python
+authors:
+  - family-names: "Le Houx"
+    given-names: "James"
+    orcid: "https://orcid.org/0000-0000-0000-0000"
+    affiliation: "BASE Laboratory, School of Engineering, University of Greenwich"
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 BSD 3-Clause License
 
-Copyright (c) 2026, BASE Laboratory
+Copyright (c) 2025-2026, BASE Laboratory
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:

diff --git a/README.md b/README.md
@@ -1,3 +1,5 @@
+<img width="1774" height="887" alt="Social_banner" src="https://github.com/user-attachments/assets/4c73be10-eb60-4e6b-a0cd-4b6ac00af46b" />
+
 # BraggTrack
 
 Semantic 4D kinematics and fracture tracking for operando diffraction using foundation vision models.

diff --git a/braggtrack/__init__.py b/braggtrack/__init__.py
@@ -1,6 +1,10 @@
-"""BraggTrack package.
+"""BraggTrack — semantic 4D kinematics and fracture tracking for operando diffraction."""
 
-Initial scaffolding for operando diffraction tracking workflows.
-"""
+from importlib.metadata import PackageNotFoundError, version
 
-__all__ = ["io", "segmentation", "tracking"]
+try:
+    __version__ = version("braggtrack")
+except PackageNotFoundError:
+    __version__ = "0.1.0.dev0"
+
+__all__ = ["io", "segmentation", "semantic", "tracking"]
diff --git a/braggtrack/cli/_utils.py b/braggtrack/cli/_utils.py
@@ -0,0 +1,61 @@
+"""Shared CLI helpers for volume loading, CSV I/O, and synthetic fallback."""
+
+from __future__ import annotations
+
+import csv
+import hashlib
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+
+
+def synth_volume_from_file(path: Path, size: int = 24) -> np.ndarray:
+    """Generate a deterministic synthetic volume with Gaussian blobs."""
+    digest = hashlib.sha256(path.read_bytes()[:4096]).digest()
+    seed_vals = [b for b in digest[:12]]
+    volume = np.ones((size, size, size), dtype=np.float64)
+    centers = [
+        (4 + seed_vals[0] % 8, 4 + seed_vals[1] % 8, 4 + seed_vals[2] % 8),
+        (10 + seed_vals[3] % 8, 10 + seed_vals[4] % 8, 10 + seed_vals[5] % 8),
+        (6 + seed_vals[6] % 10, 6 + seed_vals[7] % 10, 6 + seed_vals[8] % 10),
+    ]
+    zz, yy, xx = np.mgrid[0:size, 0:size, 0:size]
+    for cz, cy, cx in centers:
+        amp = 10.0 + (seed_vals[(cz + cy + cx) % len(seed_vals)] % 20)
+        sigma_blob = 1.5
+        d2 = (zz - cz) ** 2 + (yy - cy) ** 2 + (xx - cx) ** 2
+        volume += amp * np.exp(-d2 / (2.0 * sigma_blob**2))
+    return volume
+
+
+def load_feature_csv(path: Path) -> list[dict[str, Any]]:
+    """Load a features.csv into a list of dicts with numeric types."""
+    rows: list[dict[str, Any]] = []
+    with path.open() as fh:
+        for row in csv.DictReader(fh):
+            typed: dict[str, Any] = {}
+            for k, v in row.items():
+                try:
+                    typed[k] = int(v)
+                except ValueError:
+                    try:
+                        typed[k] = float(v)
+                    except ValueError:
+                        typed[k] = v
+            rows.append(typed)
+    return rows
+
+
+def write_csv(path: Path, rows: list[dict[str, Any]]) -> None:
+    """Write a list of dicts to CSV with auto-detected fieldnames."""
+    path.parent.mkdir(parents=True, exist_ok=True)
+    if not rows:
+        with path.open("w", newline="") as fh:
+            fh.write("")
+        return
+    fieldnames = list(rows[0].keys())
+    with path.open("w", newline="") as fh:
+        writer = csv.DictWriter(fh, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerows(rows)
diff --git a/braggtrack/cli/embed_dataset.py b/braggtrack/cli/embed_dataset.py
@@ -3,12 +3,12 @@
 from __future__ import annotations
 
 import argparse
-import hashlib
 import json
 from pathlib import Path
 
 import numpy as np
 
+from braggtrack.cli._utils import load_feature_csv, synth_volume_from_file
 from braggtrack.io import (
     MissingH5DependencyError,
     discover_operando_scans,
@@ -39,43 +39,6 @@ def build_parser() -> argparse.ArgumentParser:
     return p
 
 
-def _synth_volume_from_file(path: Path, size: int = 24) -> np.ndarray:
-    digest = hashlib.sha256(path.read_bytes()[:4096]).digest()
-    seed_vals = [b for b in digest[:12]]
-    volume = np.ones((size, size, size), dtype=np.float64)
-    centers = [
-        (4 + seed_vals[0] % 8, 4 + seed_vals[1] % 8, 4 + seed_vals[2] % 8),
-        (10 + seed_vals[3] % 8, 10 + seed_vals[4] % 8, 10 + seed_vals[5] % 8),
-        (6 + seed_vals[6] % 10, 6 + seed_vals[7] % 10, 6 + seed_vals[8] % 10),
-    ]
-    zz, yy, xx = np.mgrid[0:size, 0:size, 0:size]
-    for cz, cy, cx in centers:
-        amp = 10.0 + (seed_vals[(cz + cy + cx) % len(seed_vals)] % 20)
-        sigma_blob = 1.5
-        d2 = (zz - cz) ** 2 + (yy - cy) ** 2 + (xx - cx) ** 2
-        volume += amp * np.exp(-d2 / (2.0 * sigma_blob ** 2))
-    return volume
-
-
-def _load_feature_rows(path: Path) -> list[dict[str, object]]:
-    import csv
-
-    rows: list[dict[str, object]] = []
-    with path.open() as fh:
-        for row in csv.DictReader(fh):
-            typed: dict[str, object] = {}
-            for k, v in row.items():
-                try:
-                    typed[k] = int(v)
-                except ValueError:
-                    try:
-                        typed[k] = float(v)
-                    except ValueError:
-                        typed[k] = v
-            rows.append(typed)
-    return rows
-
-
 def main() -> int:
     args = build_parser().parse_args()
     root = resolve_dataset_root(args.root)
@@ -87,6 +50,11 @@ def main() -> int:
     scan_by_name = {s.scan_name: s for s in scans_fs}
     summaries: list[dict[str, object]] = []
 
+    enc = make_multiview_encoder(
+        args.backend,  # type: ignore[arg-type]
+        model_name=args.model,
+    )
+
     for scan_dir in sorted(d for d in segdir.iterdir() if d.is_dir() and d.name.startswith("scan")):
         name = scan_dir.name
         feat_path = scan_dir / "features.csv"
@@ -97,7 +65,7 @@ def main() -> int:
             print(json.dumps({"error": "Missing labels.npz — re-run segment_dataset", "scan": name}))
             return 1
 
-        rows = _load_feature_rows(feat_path)
+        rows = load_feature_csv(feat_path)
         labels_full = np.load(lab_path)["labels"]
 
         scan_file = scan_by_name.get(name)
@@ -107,10 +75,8 @@ def main() -> int:
 
         try:
             volume = load_primary_volume(scan_file.path)
-            if not isinstance(volume, np.ndarray):
-                volume = np.asarray(volume, dtype=np.float64)
         except (MissingH5DependencyError, KeyError, ValueError):
-            volume = _synth_volume_from_file(scan_file.path)
+            volume = synth_volume_from_file(scan_file.path)
 
         if volume.shape != labels_full.shape:
             print(
@@ -125,11 +91,6 @@ def main() -> int:
             )
             return 1
 
-        enc = make_multiview_encoder(
-            args.backend,  # type: ignore[arg-type]
-            model_name=args.model,
-        )
-
         label_ids: list[int] = []
         vectors: list[np.ndarray] = []
 

diff --git a/braggtrack/cli/inspect_datasets.py b/braggtrack/cli/inspect_datasets.py
@@ -4,7 +4,6 @@
 
 import argparse
 import json
-from pathlib import Path
 
 from braggtrack.io import (
     MissingH5DependencyError,