AstroAI-Lab
diff --git a/‎codes/surrogates/AbstractSurrogate/abstract_config.py‎
Lines changed: 16 additions & 0 deletions b/‎codes/surrogates/AbstractSurrogate/abstract_config.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎codes/surrogates/AbstractSurrogate/abstract_surrogate.py‎
Lines changed: 22 additions & 12 deletions b/‎codes/surrogates/AbstractSurrogate/abstract_surrogate.py‎
Lines changed: 22 additions & 12 deletions
@@ -22,6 +22,8 @@ class AbstractSurrogateBaseConfig:
         poly_power (float): Power for polynomial decay scheduler (used only if scheduler == "poly").
         eta_min (float): Multiplier for minimum learning rate for cosine annealing scheduler (used only if scheduler == "cosine").
         activation (nn.Module): Activation function used in the model.
+        loss_function (nn.Module): Loss function used for training.
+        loss_kwargs (float): Additional arguments for the loss function (used only if loss_function == nn.SmoothL1Loss()).
     """
 
     learning_rate: float = 3e-4
@@ -32,3 +34,17 @@ class AbstractSurrogateBaseConfig:
     poly_power: float = 0.9  # Used only if scheduler == "poly"
     eta_min: float = 1e-1  # Used only if scheduler == "cosine"
     activation: nn.Module = nn.ReLU()
+    loss_function: nn.Module = nn.MSELoss()  # Options: nn.MSELoss(), nn.SmoothL1Loss()
+    beta: float = 0.0  # Used only if loss_function == nn.SmoothL1Loss()
+
+    @property
+    def loss(self) -> nn.Module:
+        """
+        Returns the loss function to be used for training.
+
+        If the loss function is nn.SmoothL1Loss, it returns an instance with the specified beta.
+        Otherwise, it returns the loss function as is.
+        """
+        if isinstance(self.loss_function, nn.SmoothL1Loss):
+            return self.loss_function(beta=self.beta)
+        return self.loss_function()
@@ -215,23 +215,23 @@ def fit(
         pass
 
     def predict(
-        self, data_loader: DataLoader, denormalize: bool = True
+        self, data_loader: DataLoader, leave_log: bool = False
     ) -> tuple[Tensor, Tensor]:
         """
         Evaluate the model on the given dataloader.
 
         Args:
             data_loader (DataLoader): The DataLoader object containing the data the
                 model is evaluated on.
-            denormalize (bool): Whether to denormalize the predictions and targets.
+            leave_log (bool): If True, do not exponentiate the data even if log10_transform is True.
 
         Returns:
             tuple[Tensor, Tensor]: The predictions and targets.
         """
         # infer output size
         with torch.inference_mode():
             dummy_inputs = next(iter(data_loader))
-            dummy_outputs, _ = self.forward(dummy_inputs)
+            dummy_outputs, _ = self(dummy_inputs)
             batch_size, out_shape = (
                 dummy_outputs.shape[0],
                 dummy_outputs.shape[-(dummy_outputs.ndim - 1) :],
@@ -247,7 +247,11 @@ def predict(
 
         with torch.inference_mode():
             for inputs in data_loader:
-                preds, targs = self.forward(inputs)
+                inputs = [
+                    x.to(self.device, non_blocking=True) if isinstance(x, Tensor) else x
+                    for x in inputs
+                ]
+                preds, targs = self(inputs)
                 current_batch_size = preds.shape[0]  # get actual batch size
                 predictions[
                     processed_samples : processed_samples + current_batch_size, ...
@@ -261,9 +265,8 @@ def predict(
         predictions = predictions[:processed_samples, ...]
         targets = targets[:processed_samples, ...]
 
-        if denormalize:
-            predictions = self.denormalize(predictions)
-            targets = self.denormalize(targets)
+        predictions = self.denormalize(predictions, leave_log=leave_log)
+        targets = self.denormalize(targets, leave_log=leave_log)
 
         predictions = predictions.reshape(-1, self.n_timesteps, self.n_quantities)
         targets = targets.reshape(-1, self.n_timesteps, self.n_quantities)
@@ -499,7 +502,7 @@ def time_pruning(self, current_epoch: int, total_epochs: int) -> None:
             optuna.TrialPruned: If the projected runtime exceeds the threshold.
         """
         # Define warmup period based on 10% of total epochs.
-        warmup_epochs = max(50, int(total_epochs * 0.02))
+        warmup_epochs = max(10, int(total_epochs * 0.02))
         if current_epoch < warmup_epochs:
             # Do not attempt to prune before the warmup period is complete.
             # print(
@@ -645,7 +648,6 @@ def validate(
         epoch: int,
         train_loader: DataLoader,
         test_loader: DataLoader,
-        criterion: nn.Module,
         optimizer: torch.optim.Optimizer,
         progress_bar: tqdm,
         total_epochs: int,
@@ -665,6 +667,7 @@ def validate(
           - self.checkpoint(test_loss, epoch)
 
         Only runs if (epoch % self.update_epochs) == 0.
+        Main reporting metric is MAE in log10-space (i.e., Δdex). Additionally, MAE in linear space is computed.
         """
 
         # If it's not time to check yet, do nothing.
@@ -679,10 +682,11 @@ def validate(
             optimizer.eval() if hasattr(optimizer, "eval") else None
 
             # Compute losses
-            preds, targets = self.predict(train_loader)
-            self.train_loss[index] = criterion(preds, targets).item()
+            preds, targets = self.predict(train_loader, leave_log=True)
+            self.train_loss[index] = self.L1(preds, targets).item()
+            preds, targets = self.predict(test_loader, leave_log=True)
+            self.test_loss[index] = self.L1(preds, targets).item()
             preds, targets = self.predict(test_loader)
-            self.test_loss[index] = criterion(preds, targets).item()
             self.MAE[index] = self.L1(preds, targets).item()
 
             progress_bar.set_postfix(
@@ -699,6 +703,12 @@ def validate(
                     self.optuna_trial.report(self.test_loss[index], step=epoch)
                     if self.optuna_trial.should_prune():
                         raise optuna.TrialPruned()
+                    elif np.isinf(self.test_loss[index]) or np.isnan(
+                        self.test_loss[index]
+                    ):
+                        raise optuna.TrialPruned(
+                            "Test loss is NaN or Inf, pruning trial."
+                        )
 
             self.checkpoint(self.test_loss[index], epoch)