AstroAI-Lab
diff --git a/‎codes/train/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎codes/train/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎config_full.yaml‎
Lines changed: 13 additions & 13 deletions b/‎config_full.yaml‎
Lines changed: 13 additions & 13 deletions
diff --git a/‎test/test_bench_modalities.py‎
Lines changed: 135 additions & 0 deletions b/‎test/test_bench_modalities.py‎
Lines changed: 135 additions & 0 deletions
diff --git a/‎test/test_data.py‎
Lines changed: 0 additions & 69 deletions b/‎test/test_data.py‎
Lines changed: 0 additions & 69 deletions
diff --git a/‎test/test_datasets.py‎
Lines changed: 0 additions & 5 deletions b/‎test/test_datasets.py‎
Lines changed: 0 additions & 5 deletions
@@ -4,6 +4,7 @@
     train_and_save_model,
     create_task_list_for_surrogate,
     worker,
+    DummyLock,
 )
 
 __all__ = [
@@ -12,4 +13,5 @@
     "train_and_save_model",
     "create_task_list_for_surrogate",
     "worker",
+    "DummyLock",
 ]
@@ -1,9 +1,9 @@
 # Global settings for the benchmark
 training_id: "optimizer_test"
-surrogates: ["MultiONet", "FullyConnected", "LatentNeuralODE", "LatentPoly"] 
-batch_size: [65536, 65536, 512, 512] 
+surrogates: ["MultiONet", "FullyConnected", "LatentNeuralODE", "LatentPoly"]
+batch_size: [65536, 65536, 512, 512]
 epochs: [200, 200, 110, 200] # [20000, 7500, 20000, 15000]
-dataset: 
+dataset:
   name: "cloud"
   log10_transform: True
   log10_transform_params: False
@@ -13,28 +13,28 @@ dataset:
   tolerance: 1e-25
   subset_factor: 1
   log_timesteps: True
-devices: ["cuda:0", "cuda:1", "cuda:2", "cuda:3"] # ["cuda:0", "cuda:1", "cuda:2", "cuda:3", "cuda:4", "cuda:5", "cuda:6", "cuda:7", "cuda:8"] 
+devices: ["cuda:0", "cuda:1", "cuda:2", "cuda:3"] # ["cuda:0", "cuda:1", "cuda:2", "cuda:3", "cuda:4", "cuda:5", "cuda:6", "cuda:7", "cuda:8"]
 seed: 42
 verbose: False
 relative_error_threshold: 1e-10
 checkpointing: True
 
 # Models to train
-interpolation: 
-  enabled: False
+interpolation:
+  enabled: True
   intervals: [2, 3, 4, 5, 6, 7, 8, 9, 10]
 extrapolation:
-  enabled: False
+  enabled: True
   cutoffs: [50, 60, 70, 80, 90]
-sparse: 
-  enabled: False
+sparse:
+  enabled: True
   factors: [2, 4, 8, 16, 32]
 batch_scaling:
-  enabled: False
+  enabled: True
   sizes: [1/16, 1/8, 1/4, 1/2]
-uncertainty: 
-  enabled: False
-  ensemble_size: 5  # Number of models for deep ensemble
+uncertainty:
+  enabled: True
+  ensemble_size: 5 # Number of models for deep ensemble
 
 # Evaluations during benchmark
 losses: True
 
@@ -0,0 +1,135 @@
+import pytest
+import numpy as np
+import torch
+from unittest.mock import patch
+
+from codes.benchmark.bench_fcts import (
+    evaluate_interpolation,
+    evaluate_extrapolation,
+    evaluate_sparse,
+    evaluate_batchsize,
+    evaluate_UQ,
+)
+
+
+# Dummy model to record load calls
+class DummyModel:
+    def __init__(self, device, n_quantities, n_timesteps, n_parameters, config):
+        self._loads = []
+
+    def load(self, training_id, surr_name, model_identifier):
+        self._loads.append(model_identifier)
+
+    def predict(self, data_loader):
+        # targets always zero.  Shape (batch=2, timesteps=4, quantities=1).
+        preds = torch.rand(2, 4, 1)
+        targets = torch.rand(2, 4, 1)
+        return preds, targets
+
+
+# Two standalone fakes: one for heatmap (returns tuple), one for all others (returns None)
+def _fake_heatmap(*args, **kwargs):
+    return ([], [])
+
+
+def _fake_noop(*args, **kwargs):
+    return None
+
+
+@pytest.fixture(autouse=True)
+def patch_plots():
+    import codes.benchmark.bench_fcts as bf
+
+    fake_impl = {}
+    for name in dir(bf):
+        if not name.startswith("plot_"):
+            continue
+        if name == "plot_error_correlation_heatmap":
+            fake_impl[name] = _fake_heatmap
+        else:
+            fake_impl[name] = _fake_noop
+
+    with patch.multiple("codes.benchmark.bench_fcts", **fake_impl):
+        yield
+
+
+@pytest.mark.parametrize(
+    "raw_vals, cfg_key, func, main_bs, expected_nums",
+    [
+        ([2, 3, 5], "interpolation", evaluate_interpolation, None, [1, 2, 3, 5]),
+        ([1, 2, 4], "extrapolation", evaluate_extrapolation, None, [1, 2, 4]),
+        ([2, 4, 8], "sparse", evaluate_sparse, None, [1, 2, 4, 8]),
+        ([0.5, 2], "batch_scaling", evaluate_batchsize, 8, [4, 8, 16]),
+        (3, "uncertainty", evaluate_UQ, None, [0, 1, 2]),
+    ],
+)
+def test_modality_variations(raw_vals, cfg_key, func, main_bs, expected_nums):
+    surr = "TestSurr"
+    cfg = {"training_id": "TID", "surrogates": [surr]}
+    if cfg_key == "uncertainty":
+        cfg["uncertainty"] = {"enabled": True, "ensemble_size": raw_vals}
+    else:
+        cfg[cfg_key] = {"enabled": True}
+        subkey = {
+            "interpolation": "intervals",
+            "extrapolation": "cutoffs",
+            "sparse": "factors",
+            "batch_scaling": "sizes",
+        }[cfg_key]
+        cfg[cfg_key][subkey] = raw_vals
+    if cfg_key == "batch_scaling":
+        cfg["batch_size"] = [main_bs]
+
+    timesteps = np.arange(4)
+    loader = object()
+    labels = ["q"] if func is evaluate_interpolation else None
+
+    model = DummyModel(None, 1, len(timesteps), 0, {})
+
+    # invoke
+    if func is evaluate_interpolation:
+        metrics = func(model, surr, loader, timesteps, cfg, labels)
+    elif func is evaluate_extrapolation:
+        metrics = func(model, surr, loader, timesteps, cfg, labels)
+    elif func is evaluate_sparse:
+        metrics = func(model, surr, loader, timesteps, n_train_samples=10, conf=cfg)
+    elif func is evaluate_batchsize:
+        metrics = func(model, surr, loader, timesteps, cfg)
+    else:
+        metrics = func(model, surr, loader, timesteps, cfg, labels=None)
+
+    lower = surr.lower()
+    # build expected identifiers
+    ids = []
+    if cfg_key == "interpolation":
+        for i in expected_nums:
+            ids.append(f"{lower}_main" if i == 1 else f"{lower}_interpolation_{i}")
+    elif cfg_key == "extrapolation":
+        max_c = len(timesteps)
+        for c in expected_nums:
+            ids.append(f"{lower}_main" if c == max_c else f"{lower}_extrapolation_{c}")
+    elif cfg_key == "sparse":
+        for f in expected_nums:
+            ids.append(f"{lower}_main" if f == 1 else f"{lower}_sparse_{f}")
+    elif cfg_key == "batch_scaling":
+        for bs in expected_nums:
+            ids.append(f"{lower}_main" if bs == main_bs else f"{lower}_batchsize_{bs}")
+    else:  # uncertainty
+        for idx in expected_nums:
+            ids.append(f"{lower}_main" if idx == 0 else f"{lower}_UQ_{idx}")
+
+    assert model._loads == ids
+
+    prefix = {
+        "interpolation": "interval",
+        "extrapolation": "cutoff",
+        "sparse": "factor",
+        "batch_scaling": "batch_size",
+        "uncertainty": None,
+    }[cfg_key]
+
+    if cfg_key != "uncertainty":
+        for num in expected_nums:
+            assert f"{prefix} {num}" in metrics
+    else:
+        assert "average_uncertainty" in metrics
@@ -1,8 +1,3 @@
-"""
-Comprehensive unit tests for dataset functionality, including data loading,
-downloading, and validation of available datasets.
-"""
-
 import os
 import tempfile
Original file line number	Diff line number	Diff line change
`@@ -4,6 +4,7 @@`
`4`	`4`	`train_and_save_model,`
`5`	`5`	`create_task_list_for_surrogate,`
`6`	`6`	`worker,`
	`7`	`+ DummyLock,`
`7`	`8`	`)`
`8`	`9`
`9`	`10`	`__all__ = [`
`@@ -12,4 +13,5 @@`
`12`	`13`	`"train_and_save_model",`
`13`	`14`	`"create_task_list_for_surrogate",`
`14`	`15`	`"worker",`
	`16`	`+ "DummyLock",`
`15`	`17`	`]`