implemented timeout at model fit method and trial level. Added user config values and defaults. propgate failing trials to score save. Error and timeout failures propagated.

SamoraHunter · SamoraHunter · commit 2d73cf3c9522 · 2026-01-14T15:04:52.000Z
diff --git a/config_hyperopt.yml b/config_hyperopt.yml
@@ -11,6 +11,7 @@ global_params:
   n_iter: 2
   max_param_space_iter_value : 10
   force_second_cv: false # If True, forces a second cross-validation run even if cached results are available. Defaults to False.
+  model_eval_time_limit: 3600
 
 # Experiment settings for the hyperopt run
 experiment:
@@ -103,4 +104,5 @@ hyperopt_search_space:
 
 # Hyperopt-specific settings
 hyperopt_settings:
-  max_evals: 2 # Number of iterations per outcome variable
+  max_evals: 2 # Number of iterations per outcome variable
+  trial_timeout: 1120 # Timeout in seconds for a full trial (data prep + all models)
diff --git a/ml_grid/pipeline/grid_search_cross_validate.py b/ml_grid/pipeline/grid_search_cross_validate.py
@@ -400,6 +400,23 @@ def __init__(
         if self.global_parameters.verbose >= 3:
             self.logger.debug("Running hyperparameter search")
 
+        # Define default scores early to handle timeouts in search phase
+        default_scores = {
+            "test_accuracy": np.array([0.5]),
+            "test_f1": np.array([0.5]),
+            "test_auc": np.array([0.5]),
+            "fit_time": np.array([0]),
+            "score_time": np.array([0]),
+            "train_score": np.array([0.5]),
+            "test_recall": np.array([0.5]),
+        }
+        
+        failed = False
+        scores = None
+        
+        # Initialize start_time early
+        start_time = time.time()
+
         try:
             # Verify initial index alignment
             try:
@@ -435,6 +452,11 @@ def __init__(
             # Pass reset data to search
             current_algorithm = search.run_search(X_train_reset, y_train_reset)
 
+        except TimeoutError:
+            self.logger.warning("Timeout occurred during hyperparameter search.")
+            failed = "Timeout"
+            scores = default_scores
+
         except Exception as e:
             if "dual coefficients or intercepts are not finite" in str(e):
                 self.logger.warning(
@@ -454,7 +476,7 @@ def __init__(
         # --- PERFORMANCE FIX for testing ---
         # If in test_mode, we have already verified that the search runs without crashing.
         # We can skip the final, slow cross-validation and return a dummy score.
-        if getattr(self.global_parameters, "test_mode", False):
+        if not failed and getattr(self.global_parameters, "test_mode", False):
             self.logger.info(
                 "Test mode enabled. Skipping final cross-validation for speed."
             )
@@ -463,7 +485,7 @@ def __init__(
             self._shutdown_h2o_if_needed(current_algorithm)
             return
 
-        if self.global_parameters.verbose >= 3:
+        if not failed and self.global_parameters.verbose >= 3:
             self.logger.debug("Fitting final model")
 
         # In production, we re-fit the best estimator on the full training data before CV.
@@ -472,15 +494,14 @@ def __init__(
 
         metric_list = self.metric_list
 
-        # Catch only one class present AUC not defined:
-
-        if len(np.unique(self.y_train)) < 2:
+        # Catch only one class present AUC not defined (check only if not already failed)
+        if not failed and len(np.unique(self.y_train)) < 2:
             raise ValueError(
                 "Only one class present in y_train. ROC AUC score is not defined "
                 "in that case. grid_search_cross_validate>>>cross_validate"
             )
 
-        if self.global_parameters.verbose >= 1:
+        if not failed and self.global_parameters.verbose >= 1:
             self.logger.info("Getting cross validation scores")
             self.logger.debug(
                 f"X_train shape: {self.X_train.shape}, y_train shape: {self.y_train.shape}"
@@ -490,27 +511,6 @@ def __init__(
         # Set a time threshold in seconds
         time_threshold = 60  # For example, 60 seconds
 
-        start_time = time.time()
-
-        # Define default scores (e.g., mean score of 0.5 for binary classification)
-        # Default scores if cross-validation fails
-        default_scores = {
-            "test_accuracy": np.array(
-                [0.5]
-            ),  # Default to random classifier performance
-            "test_f1": np.array(
-                [0.5]
-            ),  # Default F1 score (again, 0.5 for random classification)
-            "test_auc": np.array(
-                [0.5]
-            ),  # Default ROC AUC score (0.5 for random classifier)
-            "fit_time": np.array([0]),  # No fitting time if the model fails
-            "score_time": np.array([0]),  # No scoring time if the model fails
-            "train_score": np.array([0.5]),  # Default train score
-            "test_recall": np.array([0.5]),
-            #'test_auc': [0.5] # ?
-        }
-
         # --- CRITICAL FIX for H2O multiprocessing error ---
         # H2O models cannot be pickled and sent to other processes for parallel
         # execution with joblib. We must detect if the current algorithm is an
@@ -541,9 +541,10 @@ def __init__(
                 "H2O or Keras model detected. Forcing n_jobs=1 for final cross-validation."
             )
 
-        failed = False
-
         try:
+            if failed:
+                raise TimeoutError
+
             # H2O models require pandas DataFrames with column names, while other
             # sklearn models can benefit from using NumPy arrays.
             if isinstance(current_algorithm, h2o_model_types):
@@ -737,17 +738,19 @@ def __init__(
                 )
 
                 # Set default scores if the AdaBoostClassifier fails
+                failed = True
                 scores = default_scores  # Use default scores
 
             else:
                 self.logger.error(
                     f"An unexpected ValueError occurred during cross-validation: {e}",
                     exc_info=True,
                 )
+                failed = True
                 scores = default_scores  # Use default scores for other errors
 
         except RuntimeError as e:
-            raise e  # raise h2o errors to aid development
+            # raise e  # raise h2o errors to aid development
             # --- FIX for UnboundLocalError with H2OStackedEnsemble ---
             # Catch any RuntimeError, which can be raised by H2O models during fit
             # (e.g., base model training failure) or predict.
@@ -759,12 +762,18 @@ def __init__(
             failed = True
             scores = default_scores
 
+        except TimeoutError:
+            self.logger.warning("Timeout occurred during cross-validation.")
+            failed = "Timeout"
+            scores = default_scores
+
         except Exception as e:
             # Catch any other general exceptions and log them
             self.logger.error(
                 f"An unexpected error occurred during cross-validation: {e}",
                 exc_info=True,
             )
+            failed = True
             scores = default_scores  # Use default scores if an error occurs
 
         # End the timer
@@ -801,7 +810,10 @@ def __init__(
             # plot_auc_results(grid.best_estimator_, X_test_orig, self.y_test_orig, cv)
 
         #         this should be x_test...?
-        best_pred_orig = current_algorithm.predict(self.X_test)  # exp
+        try:
+            best_pred_orig = current_algorithm.predict(self.X_test)  # exp
+        except Exception:
+            best_pred_orig = np.zeros(len(self.X_test))
 
         # Call the update_score_log method on the provided instance
         if self.project_score_save_class_instance:
@@ -822,7 +834,10 @@ def __init__(
             )
 
         # calculate metric for optimisation
-        auc = metrics.roc_auc_score(self.y_test, best_pred_orig)
+        try:
+            auc = metrics.roc_auc_score(self.y_test, best_pred_orig)
+        except Exception:
+            auc = 0.5
 
         self.grid_search_cross_validate_score_result = auc
 
diff --git a/ml_grid/pipeline/main.py b/ml_grid/pipeline/main.py
@@ -1,6 +1,9 @@
 import logging
+import signal
+import time
 import traceback
 from typing import Any, Dict, List, Tuple
+from contextlib import contextmanager
 
 import numpy as np
 from sklearn.model_selection import ParameterGrid
@@ -12,6 +15,56 @@
 from ml_grid.util.project_score_save import project_score_save_class  # Import the class
 
 
+@contextmanager
+def time_limit(seconds):
+    if seconds is None:
+        yield
+        return
+
+    try:
+        seconds_int = int(seconds)
+    except (ValueError, TypeError):
+        logging.getLogger("ml_grid").warning(f"Invalid timeout value: {seconds}. Timeout disabled.")
+        yield
+        return
+
+    if seconds_int <= 0:
+        yield
+        return
+
+    if not hasattr(signal, "SIGALRM"):
+        logging.getLogger("ml_grid").warning("Timeout not supported on this platform (SIGALRM missing).")
+        yield
+        return
+    def signal_handler(signum, frame):
+        raise TimeoutError(f"Timeout of {seconds}s reached")
+
+    # Check for existing alarm (nesting support)
+    previous_remaining = signal.alarm(0)
+    start_time = time.time()
+
+    # Determine effective timeout (min of new and remaining outer)
+    if previous_remaining > 0:
+        effective_seconds = min(seconds_int, previous_remaining)
+    else:
+        effective_seconds = seconds_int
+
+    # Save the old handler
+    original_handler = signal.signal(signal.SIGALRM, signal_handler)
+    signal.alarm(effective_seconds)
+    try:
+        yield
+    finally:
+        signal.alarm(0)
+        signal.signal(signal.SIGALRM, original_handler)
+
+        # Restore previous alarm if it existed, adjusting for elapsed time
+        if previous_remaining > 0:
+            elapsed = time.time() - start_time
+            # Ensure we don't set 0 or negative; if expired, set 1s to trigger immediately
+            remaining_outer = max(1, int(previous_remaining - elapsed))
+            signal.alarm(remaining_outer)
+
 class run:
     """Orchestrates the hyperparameter search for a list of models."""
 
@@ -241,11 +294,24 @@ def execute_single_model(self, args: Tuple) -> float:
         """
         try:
             self.logger.info(f"Starting grid search for {args[2]}...")
-            gscv_instance = grid_search_cross_validate.grid_search_crossvalidate(*args)
-            score = gscv_instance.grid_search_cross_validate_score_result
+            
+            # Retrieve timeout from local_param_dict via ml_grid_object (args[3])
+            timeout = args[3].local_param_dict.get("model_eval_time_limit")
+            if timeout is None:
+                timeout = args[3].global_params.model_eval_time_limit
+            
+            with time_limit(timeout):
+                gscv_instance = grid_search_cross_validate.grid_search_crossvalidate(*args)
+                score = gscv_instance.grid_search_cross_validate_score_result
+            
             self.logger.info(f"Score for {args[2]}: {score:.4f}")
             return score
 
+        except TimeoutError as e:
+            self.logger.warning(f"Timeout occurred for {args[2]}: {e}")
+            self.model_error_list.append([args[0], e, traceback.format_exc()])
+            return 0.0
+
         except Exception as e:
             self.logger.error(
                 f"An exception occurred during grid search for {args[2]}: {e}",
@@ -298,18 +364,31 @@ def multi_run_wrapper(args: Tuple) -> Any:
                     self.logger.info(
                         f"Starting grid search for {self.arg_list[k][2]}..."
                     )
-                    gscv_instance = (
-                        grid_search_cross_validate.grid_search_crossvalidate(
-                            *self.arg_list[k]  # Unpack all arguments
+                    
+                    timeout = self.local_param_dict.get("model_eval_time_limit")
+                    if timeout is None:
+                        timeout = self.global_params.model_eval_time_limit
+
+                    with time_limit(timeout):
+                        gscv_instance = (
+                            grid_search_cross_validate.grid_search_crossvalidate(
+                                *self.arg_list[k]  # Unpack all arguments
+                            )
                         )
-                    )
 
-                    self.highest_score = max(
-                        self.highest_score,
-                        gscv_instance.grid_search_cross_validate_score_result,
-                    )
+                        self.highest_score = max(
+                            self.highest_score,
+                            gscv_instance.grid_search_cross_validate_score_result,
+                        )
                     self.logger.info(f"Current highest score: {self.highest_score:.4f}")
 
+                except TimeoutError as e:
+                    self.logger.warning(f"Timeout occurred for {self.arg_list[k][2]}: {e}")
+                    self.model_error_list.append(
+                        [self.arg_list[k][0], e, traceback.format_exc()]
+                    )
+                    continue
+
                 except (
                     Exception
                 ) as e:  # Catches any exception from grid_search_crossvalidate
diff --git a/ml_grid/util/global_params.py b/ml_grid/util/global_params.py
@@ -96,6 +96,8 @@ class GlobalParameters:
     """Verbosity level for the search object (GridSearchCV, etc.). Defaults to 0."""
     force_second_cv: bool
     """If True, forces a second cross-validation run even if cached results are available. Defaults to False."""
+    model_eval_time_limit: int
+    """The time limit in seconds for a single model evaluation. Defaults to None (no limit)."""
 
     def __new__(cls, *args: Any, **kwargs: Any) -> "GlobalParameters":
         """Creates a new instance if one does not already exist (Singleton pattern)."""
@@ -141,6 +143,7 @@ def __init__(self, debug_level: int = 0, knn_n_jobs: int = -1) -> None:
         self.h2o_show_progress = False
         self.search_verbose = 0
         self.force_second_cv = False
+        self.model_eval_time_limit = None
 
         custom_scorer = make_scorer(custom_roc_auc_score)
         self.metric_list = {
diff --git a/notebooks/unit_test_synthetic.ipynb b/notebooks/unit_test_synthetic.ipynb