AstroAI-Lab
diff --git a/‎codes/benchmark/bench_fcts.py‎
Lines changed: 27 additions & 7 deletions b/‎codes/benchmark/bench_fcts.py‎
Lines changed: 27 additions & 7 deletions
diff --git a/‎codes/benchmark/bench_plots.py‎
Lines changed: 31 additions & 6 deletions b/‎codes/benchmark/bench_plots.py‎
Lines changed: 31 additions & 6 deletions
diff --git a/‎codes/benchmark/bench_utils.py‎
Lines changed: 1 addition & 4 deletions b/‎codes/benchmark/bench_utils.py‎
Lines changed: 1 addition & 4 deletions
@@ -7,7 +7,7 @@
 from tabulate import tabulate
 from torch.utils.data import DataLoader
 
-from codes.utils import check_and_load_data
+from codes.utils import batch_factor_to_float, check_and_load_data
 
 from .bench_plots import inference_time_bar_plot  # int_ext_sparse,
 from .bench_plots import (  # plot_generalization_errors,; rel_errors_and_uq,
@@ -98,7 +98,13 @@ def run_benchmark(surr_name: str, surrogate_class, conf: dict) -> dict[str, Any]
     n_quantities = train_data.shape[2]
     n_test_samples = n_timesteps * val_data.shape[0]
     n_params = train_params.shape[1] if train_params is not None else 0
-    model = surrogate_class(device, n_quantities, n_timesteps, n_params, model_config)
+    model = surrogate_class(
+        device=device,
+        n_quantities=n_quantities,
+        n_timesteps=n_timesteps,
+        n_parameters=n_params,
+        config=model_config,
+    )
 
     # Placeholder for metrics
     metrics = {}
@@ -231,7 +237,10 @@ def evaluate_accuracy(
     # Calculate relative errors
     absolute_errors = np.abs(preds - targets)
     mean_absolute_error = np.mean(absolute_errors)
-    relative_errors = np.abs(absolute_errors / targets)
+    relative_error_threshold = float(conf.get("relative_error_threshold", 0.0))
+    relative_errors = np.abs(
+        absolute_errors / np.maximum(np.abs(targets), relative_error_threshold)
+    )
 
     # Plot relative errors over time
     plot_relative_errors_over_time(
@@ -729,13 +738,17 @@ def evaluate_batchsize(
         dict: A dictionary containing batch size training metrics.
     """
     training_id = conf["training_id"]
-    batch_sizes = conf["batch_scaling"]["sizes"].copy()
+    batch_factors = conf["batch_scaling"]["sizes"].copy()
     batch_metrics = {}
 
     # Identify the batch size of the main model
     model_idx = conf["surrogates"].index(surr_name)
     main_batch_size = conf["batch_size"][model_idx]
 
+    batch_sizes = [
+        int(main_batch_size * batch_factor_to_float(bf)) for bf in batch_factors
+    ]
+
     # Add main batch size to the list of batch sizes
     if main_batch_size not in batch_sizes:
         batch_sizes.append(main_batch_size)
@@ -840,12 +853,15 @@ def evaluate_UQ(
     errors_time = np.mean(errors, axis=(0, 2))
     avg_correlation, _ = pearsonr(errors.flatten(), preds_std.flatten())
     preds_std_time = np.mean(preds_std, axis=(0, 2))
-    rel_errors = np.abs(errors / targets)
+    rel_error_threshold = float(conf.get("relative_error_threshold", 0.0))
+    rel_errors = np.abs(errors / np.maximum(np.abs(targets), rel_error_threshold))
 
     # Compute a target-weighted, signed difference between predicted uncertainty and error.
     # Negative values indicate overconfidence (PU is too low compared to error),
     # positive values indicate underconfidence.
-    weighted_diff = (preds_std - errors) / targets
+    weighted_diff = (preds_std - errors) / np.maximum(
+        np.abs(targets), rel_error_threshold
+    )
 
     # Plots (existing UQ plots)
     plot_example_predictions_with_uncertainty(
@@ -971,7 +987,11 @@ def compare_main_losses(metrics: dict, config: dict) -> None:
         n_params = metrics[surr_name]["n_params"]
         model_config = get_model_config(surr_name, config)
         model = surrogate_class(
-            device, n_quantities, n_timesteps, n_params, model_config
+            device=device,
+            n_quantities=n_quantities,
+            n_timesteps=n_timesteps,
+            n_parameters=n_params,
+            config=model_config,
         )
 
         def load_losses(model_identifier: str):
 
@@ -7,6 +7,8 @@
 from matplotlib.gridspec import GridSpec
 from scipy.ndimage import gaussian_filter1d
 
+from codes.utils import batch_factor_to_float
+
 from .bench_utils import format_time
 
 # Utility functions for plotting
@@ -157,6 +159,9 @@ def plot_relative_errors_over_time(
     plt.xlabel("Time")
     plt.ylabel("Relative Error")
     plt.xlim(timesteps[0], timesteps[-1])
+    plt.ylim(bottom=1e-8)
+    if conf["dataset"]["log_timesteps"]:
+        plt.xscale("log")
     if show_title:
         plt.title(title)
     plt.legend(loc="center left", bbox_to_anchor=(1, 0.5))
@@ -332,6 +337,8 @@ def plot_average_errors_over_time(
     plt.xlim(timesteps[0], timesteps[-1])
     plt.ylabel("Mean Absolute Error")
     plt.yscale("log")
+    if conf["dataset"]["log_timesteps"]:
+        plt.xscale("log")
     title = f"Mean Absolute Errors over Time ({mode.capitalize()}, {surr_name})"
     filename = f"{mode}_errors_over_time.png"
 
@@ -450,6 +457,8 @@ def plot_example_mode_predictions(
 
         # Set the x-axis limits based on the timesteps array
         ax.set_xlim(timesteps.min(), timesteps.max())
+        if conf["dataset"]["log_timesteps"]:
+            ax.set_xscale("log")
 
     # Add a single x-axis label to the bottom of the figure
     fig.text(0.5, 0.04, "Time", ha="center", va="center", fontsize=12)
@@ -471,10 +480,10 @@ def plot_example_mode_predictions(
 
     # Set the overall title with details depending on the mode
     if mode == "interpolation":
-        title = f"DeepEnsemble: Example Predictions (Interpolation, {surr_name})\n"
+        title = f"Interpolation: Example Predictions (Interpolation, {surr_name})\n"
         extra_info = f"Sample Index: {example_idx}, Training Interval: {metric}"
     elif mode == "extrapolation":
-        title = f"DeepEnsemble: Example Predictions (Extrapolation, {surr_name})\n"
+        title = f"Extrapolation: Example Predictions (Extrapolation, {surr_name})\n"
         extra_info = f"Sample Index: {example_idx}, Cutoff Timestep: {metric}"
     else:
         raise ValueError(
@@ -589,6 +598,8 @@ def plot_example_predictions_with_uncertainty(
 
         # Set the x limit exactly from the lowest to the highest timestep
         ax.set_xlim(timesteps.min(), timesteps.max())
+        if conf["dataset"]["log_timesteps"]:
+            ax.set_xscale("log")
 
     # Add a single x-axis label to the bottom plot
     fig.text(0.5, 0.04, "Time", ha="center", va="center", fontsize=12)
@@ -656,6 +667,8 @@ def plot_average_uncertainty_over_time(
     plt.xlabel("Time")
     plt.ylabel("Average Uncertainty / Mean Absolute Error")
     plt.xlim(timesteps[0], timesteps[-1])
+    if conf["dataset"]["log_timesteps"]:
+        plt.xscale("log")
     if show_title:
         plt.title("Average Uncertainty and Mean Absolute Error Over Time")
     plt.legend()
@@ -835,10 +848,16 @@ def load_losses(model_identifier: str):
 
     # Batchsize losses
     if conf["batch_scaling"]["enabled"]:
-        batch_sizes = conf["batch_scaling"]["sizes"]
+        batch_factors = conf["batch_scaling"]["sizes"]
         batch_train_losses = []
         batch_test_losses = []
-        for batch_size in batch_sizes:
+        batch_sizes = []
+        surr_index = conf["surrogates"].index(surr_name)
+        main_model_bs = conf["batch_size"][surr_index]
+        for batch_factor in batch_factors:
+            batch_factor = batch_factor_to_float(batch_factor)
+            batch_size = int(main_model_bs * batch_factor)
+            batch_sizes.append(batch_size)
             train_loss, test_loss, epochs = load_losses(
                 f"{surr_name.lower()}_batchsize_{batch_size}"
             )
@@ -966,7 +985,9 @@ def plot_error_distribution_per_quantity(
     fig.align_ylabels()
 
     plt.xscale("log")  # Log scale for error magnitudes
-    plt.xlim(10**x_min, 10**x_max)  # Set x-axis range based on log-space calculations
+    plt.xlim(
+        np.maximum(10**x_min, 1e-8), 10**x_max
+    )  # Set x-axis range based on log-space calculations
     plt.xlabel("Relative Error")
     if show_title:
         if num_plots > 1:
@@ -1423,6 +1444,8 @@ def plot_relative_errors(
     plt.yscale("log")
     if show_title:
         plt.title("Comparison of Relative Errors Over Time")
+    if config["dataset"]["log_timesteps"]:
+        plt.xscale("log")
     plt.legend(loc="center left", bbox_to_anchor=(1, 0.5))
 
     if save and config:
@@ -1483,6 +1506,8 @@ def plot_uncertainty_over_time_comparison(
     plt.xlim(timesteps[0], timesteps[-1])
     plt.ylabel("Uncertainty / MAE")
     plt.yscale("log")
+    if config["dataset"]["log_timesteps"]:
+        plt.xscale("log")
     if show_title:
         plt.title("Comparison of Predictive Uncertainty and True MAE over Time")
     plt.legend(loc="center left", bbox_to_anchor=(1, 0.5))
@@ -2266,7 +2291,7 @@ def plot_error_distribution_comparative(
             )
 
     plt.xscale("log")  # Log scale for error magnitudes
-    plt.xlim(10**x_min, 10**x_max)  # Set x-axis range based on log-space calculations
+    plt.xlim(np.maximum(10**x_min, 1e-8), 10**x_max)  # Set x-axis range
 
     if mode == "main":
         title = "Distribution of Surrogate Relative Errors"
 
@@ -452,10 +452,7 @@ def write_metrics_to_yaml(surr_name: str, conf: dict, metrics: dict) -> None:
     write_metrics = convert_to_standard_types(write_metrics)
 
     # Make results directory
-    try:
-        os.makedirs(f"results/{conf['training_id']}")
-    except FileExistsError:
-        pass
+    os.makedirs(f"results/{conf['training_id']}", exist_ok=True)
 
     with open(
         f"results/{conf['training_id']}/{surr_name.lower()}_metrics.yaml",