AstroAI-Lab
diff --git a/‎codes/benchmark/bench_fcts.py‎
Lines changed: 28 additions & 10 deletions b/‎codes/benchmark/bench_fcts.py‎
Lines changed: 28 additions & 10 deletions
diff --git a/‎codes/benchmark/bench_plots.py‎
Lines changed: 10 additions & 8 deletions b/‎codes/benchmark/bench_plots.py‎
Lines changed: 10 additions & 8 deletions
diff --git a/‎codes/benchmark/bench_utils.py‎
Lines changed: 5 additions & 7 deletions b/‎codes/benchmark/bench_utils.py‎
Lines changed: 5 additions & 7 deletions
diff --git a/‎codes/surrogates/AbstractSurrogate/surrogates.py‎
Lines changed: 70 additions & 3 deletions b/‎codes/surrogates/AbstractSurrogate/surrogates.py‎
Lines changed: 70 additions & 3 deletions
@@ -76,23 +76,34 @@ def run_benchmark(surr_name: str, surrogate_class, conf: dict) -> dict[str, Any]
     else:
         batch_size = conf["batch_size"]
 
-    train_data, test_data, val_data, timesteps, n_train_samples, _, labels = (
-        check_and_load_data(
-            conf["dataset"]["name"],
-            verbose=False,
-            log=conf["dataset"]["log10_transform"],
-            normalisation_mode=conf["dataset"]["normalise"],
-        )
+    # Load full data and parameters
+    (
+        (train_data, test_data, val_data),
+        (train_params, test_params, val_params),
+        timesteps,
+        n_train_samples,
+        _,
+        labels,
+    ) = check_and_load_data(
+        conf["dataset"]["name"],
+        verbose=False,
+        log=conf["dataset"]["log10_transform"],
+        log_params=conf.get("log10_transform_params", False),
+        normalisation_mode=conf["dataset"]["normalise"],
+        tolerance=conf["dataset"]["tolerance"],
     )
+
     model_config = get_model_config(surr_name, conf)
     n_timesteps = train_data.shape[1]
     n_quantities = train_data.shape[2]
     n_test_samples = n_timesteps * val_data.shape[0]
-    model = surrogate_class(device, n_quantities, n_timesteps, model_config)
+    n_params = train_params.shape[1] if train_params is not None else 0
+    model = surrogate_class(device, n_quantities, n_timesteps, n_params, model_config)
 
     # Placeholder for metrics
     metrics = {}
     metrics["timesteps"] = timesteps
+    metrics["n_params"] = n_params
 
     # Create dataloader for the validation data
     _, _, val_loader = model.prepare_data(
@@ -101,7 +112,11 @@ def run_benchmark(surr_name: str, surrogate_class, conf: dict) -> dict[str, Any]
         dataset_val=val_data,
         timesteps=timesteps,
         batch_size=batch_size,
-        shuffle=False,
+        shuffle=True,
+        dataset_train_params=train_params,
+        dataset_test_params=test_params,
+        dataset_val_params=val_params,
+        dummy_timesteps=True,
     )
 
     # Plot training losses
@@ -953,8 +968,11 @@ def compare_main_losses(metrics: dict, config: dict) -> None:
         surrogate_class = get_surrogate(surr_name)
         n_timesteps = metrics[surr_name]["timesteps"].shape[0]
         n_quantities = metrics[surr_name]["accuracy"]["absolute_errors"].shape[2]
+        n_params = metrics[surr_name]["n_params"]
         model_config = get_model_config(surr_name, config)
-        model = surrogate_class(device, n_quantities, n_timesteps, model_config)
+        model = surrogate_class(
+            device, n_quantities, n_timesteps, n_params, model_config
+        )
 
         def load_losses(model_identifier: str):
             model.load(training_id, surr_name, model_identifier=model_identifier)
 
@@ -20,7 +20,7 @@ def save_plot(
     dpi: int = 300,
     base_dir: str = "plots",  # Base directory for saving plots
     increase_count: bool = False,  # Whether to increase the count for existing filenames
-    format: str = "pdf",  # Format for saving the plot
+    format: str = "jpg",  # Format for saving the plot
 ) -> None:
     """
     Save the plot to a file, creating necessary directories if they don't exist.
@@ -122,9 +122,9 @@ def plot_relative_errors_over_time(
     p99_lower = np.percentile(relative_errors, 0.5, axis=(0, 2))
 
     plt.figure(figsize=(6, 4))
-    mean_label = f"Mean Error\nMean={mean*100:.2f}%"
+    mean_label = f"Mean Error\nMean={mean * 100:.2f}%"
     plt.plot(timesteps, mean_errors, label=mean_label, color="blue")
-    median_label = f"Median Error\nMedian={median*100:.2f}%"
+    median_label = f"Median Error\nMedian={median * 100:.2f}%"
     plt.plot(timesteps, median_errors, label=median_label, color="red")
 
     # Shading areas
@@ -816,7 +816,9 @@ def load_losses(model_identifier: str):
         uq_train_losses = [main_train_loss]
         uq_test_losses = [main_test_loss]
         for i in range(n_models - 1):
-            train_loss, test_loss, epochs = load_losses(f"{surr_name.lower()}_UQ_{i+1}")
+            train_loss, test_loss, epochs = load_losses(
+                f"{surr_name.lower()}_UQ_{i + 1}"
+            )
             uq_train_losses.append(train_loss)
             uq_test_losses.append(test_loss)
         plot_losses(
@@ -1397,7 +1399,7 @@ def plot_relative_errors(
 
     for i, surrogate in enumerate(mean_errors.keys()):
         mean = np.mean(mean_errors[surrogate])
-        mean_label = f"{surrogate}\nMean = {mean*100:.2f}%"
+        mean_label = f"{surrogate}\nMean = {mean * 100:.2f}%"
         plt.plot(
             timesteps,
             mean_errors[surrogate],
@@ -1406,7 +1408,7 @@ def plot_relative_errors(
             linestyle=linestyles[0],
         )
         median = np.mean(median_errors[surrogate])
-        median_label = f"{surrogate}\nMedian = {median*100:.2f}%"
+        median_label = f"{surrogate}\nMedian = {median * 100:.2f}%"
         plt.plot(
             timesteps,
             median_errors[surrogate],
@@ -2895,7 +2897,7 @@ def rel_errors_and_uq(
 
     for i, surrogate in enumerate(mean_errors.keys()):
         mean = np.mean(mean_errors[surrogate])
-        mean_label = f"{surrogate} Mean={mean*100:.2f} %"
+        mean_label = f"{surrogate} Mean={mean * 100:.2f} %"
         ax1.plot(
             timesteps,
             mean_errors[surrogate],
@@ -2904,7 +2906,7 @@ def rel_errors_and_uq(
             linestyle=linestyles[0],
         )
         median = np.mean(median_errors[surrogate])
-        median_label = f"{surrogate} Median={median*100:.2f} %"
+        median_label = f"{surrogate} Median={median * 100:.2f} %"
         ax1.plot(
             timesteps,
             median_errors[surrogate],
 
@@ -2,6 +2,7 @@
 import importlib.util
 import inspect
 import os
+import time
 from copy import deepcopy
 from dataclasses import asdict
 
@@ -13,8 +14,6 @@
 from codes.surrogates import SurrogateModel, surrogate_classes
 from codes.utils import read_yaml_config
 
-import time
-
 
 def check_surrogate(surrogate: str, conf: dict) -> None:
     """
@@ -219,7 +218,7 @@ def get_required_models_list(surrogate: str, conf: dict) -> list:
     if conf["uncertainty"]["enabled"]:
         n_models = conf["uncertainty"]["ensemble_size"]
         required_models.extend(
-            [f"{surrogate.lower()}_UQ_{i+1}.pth" for i in range(n_models - 1)]
+            [f"{surrogate.lower()}_UQ_{i + 1}.pth" for i in range(n_models - 1)]
         )
 
     return required_models
@@ -296,7 +295,7 @@ def measure_memory_footprint(model: torch.nn.Module, inputs: tuple) -> dict:
 
     # Prepare inputs: move them to the target device
     if isinstance(inputs, (list, tuple)):
-        inputs = tuple(i.to(device) for i in inputs)
+        inputs = tuple((i.to(device) if i is not None else i) for i in inputs)
     else:
         inputs = inputs.to(device)
 
@@ -640,10 +639,9 @@ def save_table_csv(headers: list, rows: list, config: dict) -> None:
     """
     # Convert each cell to a string and remove asterisks
     cleaned_rows = [
-        [str(cell).replace("*", "").strip() for cell in row]
-        for row in rows
+        [str(cell).replace("*", "").strip() for cell in row] for row in rows
     ]
-    
+
     csv_path = f"results/{config['training_id']}/metrics_table.csv"
     with open(csv_path, "w", newline="") as f:
         writer = csv.writer(f)
 
@@ -1,10 +1,12 @@
 import dataclasses
 import os
+import time
 from abc import ABC, abstractmethod
 from datetime import datetime
 from typing import Any, TypeVar
 
 import numpy as np
+import optuna
 import torch
 import yaml
 from torch import Tensor, nn
@@ -71,7 +73,7 @@ class AbstractSurrogateModel(ABC, nn.Module):
             model_name: str,
             subfolder: str,
             training_id: str,
-            data_params: dict,
+            data_info: dict,
         ) -> None:
             Saves the model to disk.
 
@@ -99,6 +101,7 @@ def __init__(
         device: str | None = None,
         n_quantities: int = 29,
         n_timesteps: int = 100,
+        n_parameters: int = 0,
         config: dict | None = None,
     ):
         super().__init__()
@@ -109,6 +112,7 @@ def __init__(
         self.device = device
         self.n_quantities = n_quantities
         self.n_timesteps = n_timesteps
+        self.n_parameters = n_parameters
         self.L1 = nn.L1Loss()
         self.config = config if config is not None else {}
         self.train_duration = None
@@ -265,7 +269,7 @@ def save(
             model_name (str): The name of the model.
             subfolder (str): The subfolder to save the model in.
             training_id (str): The training identifier.
-            data_params (dict): The data parameters.
+            data_info (dict): The data parameters.
         """
 
         # Make the model directory
@@ -329,7 +333,7 @@ def save(
 
         save_attributes = {
             k: v
-            for k, v in self.__dict__.items()
+            for k, v in self.__dict__.copy().items()
             if k != "state_dict" and not k.startswith("_")
         }
         model_dict = {"state_dict": self.state_dict(), "attributes": save_attributes}
@@ -392,6 +396,7 @@ def setup_progress_bar(self, epochs: int, position: int, description: str):
         Returns:
             tqdm: The progress bar.
         """
+
         bar_format = "{l_bar}{bar}| {n_fmt:>5}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt} {postfix}]"
         progress_bar = tqdm(
             range(epochs),
@@ -401,6 +406,9 @@ def setup_progress_bar(self, epochs: int, position: int, description: str):
             bar_format=bar_format,
         )
 
+        # Only used for time_pruning in multi objective optimisation
+        self._trial_start_time = time.time()
+
         return progress_bar
 
     def denormalize(self, data: Tensor) -> Tensor:
@@ -430,5 +438,64 @@ def denormalize(self, data: Tensor) -> Tensor:
 
         return data
 
+    def time_pruning(self, current_epoch: int, total_epochs: int) -> None:
+        """
+        Determine whether a trial should be pruned based on projected runtime,
+        but only after a warmup period (10% of the total epochs).
+
+        Warmup: Do not prune if current_epoch is less than warmup_epochs.
+        After warmup, compute the average epoch time, extrapolate the total runtime,
+        and retrieve the threshold (runtime_threshold) from the study's user attributes.
+        If the projected runtime exceeds the threshold, raise an optuna.TrialPruned exception.
+
+        Args:
+            current_epoch (int): The current epoch count.
+            total_epochs (int): The planned total number of epochs.
+
+        Raises:
+            optuna.TrialPruned: If the projected runtime exceeds the threshold.
+        """
+        # Define warmup period based on 10% of total epochs.
+        warmup_epochs = max(50, int(total_epochs * 0.02))
+        if current_epoch < warmup_epochs:
+            # Do not attempt to prune before the warmup period is complete.
+            # print(
+            #     f"[time_pruning] Warmup period: {current_epoch}/{warmup_epochs} epochs completed. Skipping pruning check."
+            # )
+            return
+
+        elapsed = time.time() - self._trial_start_time
+        completed_epochs = max(current_epoch, 1)
+        average_epoch_time = elapsed / completed_epochs
+        projected_total_time = average_epoch_time * total_epochs
+
+        # Retrieve threshold from study's user attributes.
+        if self.optuna_trial is not None and hasattr(self.optuna_trial, "study"):
+            threshold = self.optuna_trial.study.user_attrs.get(
+                "runtime_threshold", None
+            )
+        else:
+            threshold = None
+
+        # print(
+        #     f"[time_pruning] Epoch: {current_epoch}/{total_epochs} | "
+        #     f"Elapsed: {elapsed:.1f}s | Avg per epoch: {average_epoch_time:.1f}s | "
+        #     f"Projected total: {projected_total_time:.1f}s | Threshold: {threshold:.1f}s"
+        # )
+
+        if threshold is not None:
+            if projected_total_time > threshold:
+                if self.optuna_trial is not None:
+                    tqdm.write(
+                        f"[time_pruning] Projected total time {projected_total_time:.1f}s exceeds threshold {threshold:.1f}s. Pruning trial."
+                    )
+                    self.optuna_trial.set_user_attr(
+                        "prune_reason",
+                        f"Projected runtime {projected_total_time:.1f}s exceeds threshold {threshold:.1f}s",
+                    )
+                raise optuna.TrialPruned(
+                    f"Projected total time {projected_total_time:.1f}s exceeds threshold {threshold:.1f}s"
+                )
+
 
 SurrogateModel = TypeVar("SurrogateModel", bound=AbstractSurrogateModel)