Handle TimeoutError propagation and refactor y optimization

SamoraHunter · SamoraHunter · commit 7d22e79ec2f3 · 2026-01-19T21:46:26.000Z
- Propagate TimeoutError in H2O classifier predict methods instead of wrapping
- Extract y optimization logic into reusable _optimize_y() helper method
- Add contiguous array conversion for faster numpy operations
- Disable TensorFlow eager execution for performance
diff --git a/ml_grid/model_classes/H2OBaseClassifier.py b/ml_grid/model_classes/H2OBaseClassifier.py
@@ -642,20 +642,26 @@ def predict(self, X: pd.DataFrame) -> np.ndarray:
             test_h2o = tmp_frame
 
         except Exception as e:
+            if isinstance(e, TimeoutError):
+                raise
             raise RuntimeError(f"Failed to create H2O frame for prediction: {e}")
 
         # Make prediction
         try:
             predictions = self.model_.predict(test_h2o)
         except Exception as e:
             # If prediction failed, it might be because the model was unloaded/GC'd on server.
+            if isinstance(e, TimeoutError):
+                raise
             # Try reloading and predicting again.
             self.logger.debug(f"Prediction failed ({e}), attempting to reload model...")
             try:
                 self._ensure_h2o_is_running()
                 self._ensure_model_is_loaded()
                 predictions = self.model_.predict(test_h2o)
             except Exception as e2:
+                if isinstance(e2, TimeoutError):
+                    raise
                 # --- FIX: Catch H2O backend crashes (NPE) during prediction and fallback ---
                 if "java.lang.NullPointerException" in str(e):
                     self.logger.warning(
@@ -765,20 +771,26 @@ def predict_proba(self, X: pd.DataFrame) -> np.ndarray:
                 destination_frame=f"prob_{uuid.uuid4().hex}",
             )
         except Exception as e:
+            if isinstance(e, TimeoutError):
+                raise
             raise RuntimeError(f"Failed to create H2O frame for prediction: {e}")
 
         # Make prediction
         try:
             predictions = self.model_.predict(test_h2o)
         except Exception as e:
             # Retry logic for unloaded models
+            if isinstance(e, TimeoutError):
+                raise
             self.logger.debug(f"Prediction failed ({e}), attempting to reload model...")
             try:
                 self._ensure_h2o_is_running()
                 self._ensure_model_is_loaded()
                 predictions = self.model_.predict(test_h2o)
             except Exception as e2:
                 # --- FIX: Catch H2O backend crashes (NPE) during prediction and fallback ---
+                if isinstance(e2, TimeoutError):
+                    raise
                 if "java.lang.NullPointerException" in str(e):
                     self.logger.warning(
                         f"H2O backend crashed with NPE during predict_proba(). Returning dummy probabilities. Details: {e}"
diff --git a/ml_grid/pipeline/grid_search_cross_validate.py b/ml_grid/pipeline/grid_search_cross_validate.py
@@ -178,6 +178,8 @@ def __init__(
                     else:
                         # Explicitly set CPU as the visible device for TensorFlow to avoid CUDA init errors
                         tf.config.set_visible_devices([], "GPU")
+                    
+                    tf.config.run_functions_eagerly(False)
                 except Exception as e:
                     self.logger.warning(f"Could not configure GPU for TensorFlow: {e}")
                 finally:
@@ -537,22 +539,7 @@ def __init__(
                 # --- OPTIMIZATION: Convert y to numpy for ALL models ---
                 # This avoids expensive sklearn type_of_target checks on Pandas Series (overhead seen in profiling)
                 # Most sklearn models handle numpy arrays efficiently.
-                if isinstance(y_train_reset.dtype, pd.CategoricalDtype):
-                    y_train_search = y_train_reset.cat.codes.values
-                elif hasattr(y_train_reset, "values"):
-                    y_train_search = y_train_reset.values
-                else:
-                    y_train_search = y_train_reset
-
-                # --- OPTIMIZATION: Force integer encoding for y ---
-                # This avoids expensive np.unique checks on string/object arrays in sklearn (arraysetops.py:unique ~221s)
-                # AND speeds up checks on float arrays (common in H2O/Pandas)
-                if not pd.api.types.is_integer_dtype(y_train_search):
-                    try:
-                        y_train_search = y_train_search.astype(int)
-                    except (ValueError, TypeError):
-                        y_train_search, _ = pd.factorize(y_train_search, sort=True)
-                        y_train_search = y_train_search.astype(int)
+                y_train_search = self._optimize_y(y_train_reset)
 
                 # --- OPTIMIZATION: Skip parameter validation overhead ---
                 # Use set_config to ensure it propagates to all internal calls
@@ -683,21 +670,7 @@ def __init__(
             else:
                 X_train_final = self.X_train.values  # Use NumPy array for other models
                 # Optimization: Pass numpy array for y to avoid pandas overhead in sklearn
-                # If it was converted to categorical (unlikely for sklearn now), get codes
-                if isinstance(self.y_train.dtype, pd.CategoricalDtype):
-                    y_train_final = self.y_train.cat.codes.values
-                else:
-                    y_train_final = self.y_train.values
-
-            # --- OPTIMIZATION: Force integer encoding for y ---
-            # This avoids expensive np.unique checks on string/object arrays in sklearn (arraysetops.py:unique ~173s)
-            # AND speeds up checks on float arrays (common in H2O/Pandas)
-            if not pd.api.types.is_integer_dtype(y_train_final):
-                try:
-                    y_train_final = y_train_final.astype(int)
-                except (ValueError, TypeError):
-                    y_train_final, _ = pd.factorize(y_train_final, sort=True)
-                    y_train_final = y_train_final.astype(int)
+                y_train_final = self._optimize_y(self.y_train)
 
             scores = None
 
@@ -1022,6 +995,27 @@ def __init__(
 
         self._shutdown_h2o_if_needed(current_algorithm)
 
+    def _optimize_y(self, y):
+        """Helper to optimize y for sklearn/H2O to reduce type_of_target overhead."""
+        # Convert to numpy if it's a Series or Categorical
+        if hasattr(y, "dtype") and isinstance(y.dtype, pd.CategoricalDtype):
+            y_opt = y.cat.codes.values
+        elif hasattr(y, "values"):
+            y_opt = y.values
+        else:
+            y_opt = y
+
+        # Force integer encoding
+        if not pd.api.types.is_integer_dtype(y_opt):
+            try:
+                y_opt = y_opt.astype(int)
+            except (ValueError, TypeError):
+                y_opt, _ = pd.factorize(y_opt, sort=True)
+                y_opt = y_opt.astype(int)
+
+        # Ensure contiguous array for speed in np.unique and other ops
+        return np.ascontiguousarray(y_opt)
+
     def _adjust_knn_parameters(self, parameter_space: Union[Dict, List[Dict]]):
         """
         Dynamically adjusts the 'n_neighbors' parameter for KNN-based models