Use differentiable sorting for Spearman: diffsort fallback, log backend at train start

Henry Wallace · Henry Wallace · commit 0d0a8a4ef9db · 2026-02-23T16:08:41.000-05:00
- loss_unified: when method=auto and torchsort unavailable, use diffsort (HAS_DIFFSORT)
- get_spearman_backend() for logging; train_all_fronts prints Spearman loss backend at start
- README/Justfile/MAKING_IT_GOOD: recommend uv sync --extra sorting, document backend order
diff --git a/README.md b/README.md
@@ -8,7 +8,7 @@ ICF is normalized to \([0, 1]\): **0.0 = very common**, **1.0 = very rare**.
 
 ```bash
 uv sync --extra dev
-# Optional: uv sync --extra sorting  (torchsort for differentiable Spearman in multi-task training)
+# Recommended for multi-task training: uv sync --extra sorting  (torchsort or diffsort for differentiable Spearman; backend is logged at train start)
 
 # Train
 uv run tiny-icf-train --help
diff --git a/docs/guides/MAKING_IT_GOOD_MINIMAL_HEURISTICS.md b/docs/guides/MAKING_IT_GOOD_MINIMAL_HEURISTICS.md
@@ -20,7 +20,7 @@ Research-backed, low-heuristic improvements. No hand-picked anchor words or ad-h
 
 **Fix:** Use **differentiable Spearman** via soft sorting (Blondel et al., "Fast Differentiable Sorting and Ranking", ICML 2020; [arxiv 2002.08871](https://arxiv.org/abs/2002.08871)). Loss = \( \frac{1}{2}\|r - r_\Psi(\theta)\|^2 \) where \( r_\Psi \) are soft ranks. Implementations: **torchsort** (O(n log n), recommended), **diffsort** (O(n²(log n)²)).
 
-**Implemented:** `loss_unified.spearman_loss_tensor` prefers **torchsort** when `spearman_method` is `"auto"` (default) and torchsort is installed. Training uses `--spearman-method auto`; install with `uv sync --extra sorting` for O(n log n) differentiable Spearman. CLI: `--spearman-reg-strength 0.1`, `--spearman-method auto|torchsort|sigmoid`. Fallback is rank_relax or built-in sigmoid.
+**Implemented:** `loss_unified.spearman_loss_tensor` with `spearman_method="auto"` (default): use **torchsort** if available, else **diffsort**, else rank_relax or built-in soft_rank. All paths are differentiable. Install `uv sync --extra sorting` for torchsort and/or diffsort. At training start we log `Spearman loss backend: <torchsort|diffsort|rank_relax|built-in>`. CLI: `--spearman-reg-strength 0.1`, `--spearman-method auto|torchsort|diffsort|sigmoid`.
 
 ---
 
diff --git a/justfile b/justfile
@@ -64,8 +64,8 @@ sync-s3:
     aws s3 sync models/ s3://arclabs-backups/tiny-icf/models/ --exclude "*" --include "multitask_*.pt" --include "v3_base*.pt" --include "*.pt.cal.json"
 
 # English-only training (better "the"/"and", no lang prefix); uses frequency sampling + spearman-method auto
+# For differentiable Spearman: uv sync --extra sorting (torchsort or diffsort; backend logged at start)
 # For custom EPOCHS/SAMPLES run: uv run python scripts/train_all_fronts.py ... --epochs N --train-max-samples M
-# Background run: nohup uv run python scripts/train_all_fronts.py ... > models/all_fronts_en/train_en_30ep.log 2>&1 &
 train-en DATA="data/word_frequency.csv" EPOCHS="30" SAMPLES="200000":
     mkdir -p models/all_fronts_en
     uv run python scripts/train_all_fronts.py \
diff --git a/scripts/train_all_fronts.py b/scripts/train_all_fronts.py
@@ -278,6 +278,11 @@ def main() -> int:
         },
     }
 
+    from tiny_icf.loss_unified import get_spearman_backend
+
+    spearman_backend = get_spearman_backend(config.get("spearman_method", "auto"))
+    print(f"Spearman loss backend: {spearman_backend} (method={args.spearman_method})")
+
     module = FlexibleIDFLightningModule(config=config, learning_rate=args.lr, weight_decay=args.weight_decay)
 
     # Optional init-from: load a UniversalICF checkpoint into the base model.
diff --git a/src/tiny_icf/loss_unified.py b/src/tiny_icf/loss_unified.py
@@ -38,6 +38,26 @@
     HAS_TORCHSORT = False
     spearman_loss_torchsort = None
 
+# Fallback: diffsort (differentiable sorting networks) when torchsort unavailable
+try:
+    from tiny_icf.loss import _try_import_diffsort, spearman_loss_diffsort
+
+    HAS_DIFFSORT = _try_import_diffsort() is not None
+except Exception:
+    HAS_DIFFSORT = False
+    spearman_loss_diffsort = None
+
+
+def get_spearman_backend(method: str = "auto") -> str:
+    """Return which backend will be used for Spearman loss (for logging)."""
+    if method in ("torchsort", "auto") and HAS_TORCHSORT and spearman_loss_torchsort is not None:
+        return "torchsort"
+    if method in ("diffsort", "auto") and HAS_DIFFSORT and spearman_loss_diffsort is not None:
+        return "diffsort"
+    if HAS_RANK_RELAX:
+        return "rank_relax"
+    return "built-in (soft_rank)"
+
 
 def _to_list(tensor: torch.Tensor) -> List[float]:
     """Convert tensor to Python list for rank-relax."""
@@ -177,6 +197,17 @@ def spearman_loss_tensor(
             predictions, targets, regularization_strength=regularization_strength
         )
 
+    # Differentiable sorting fallback: diffsort when method is auto or diffsort
+    use_diffsort = (
+        (method in ("diffsort", "auto"))
+        and HAS_DIFFSORT
+        and spearman_loss_diffsort is not None
+        and predictions.numel() >= 2
+    )
+    if use_diffsort:
+        steepness = max(1.0, min(20.0, regularization_strength * 5.0))
+        return spearman_loss_diffsort(predictions, targets, steepness=steepness)
+
     if not HAS_RANK_RELAX:
         # Fallback: use soft ranking and compute correlation manually
         pred_ranks = soft_rank_tensor(