Skip to content

Commit 7d22e79

Browse files
author
SamoraHunter
committed
Handle TimeoutError propagation and refactor y optimization
- Propagate TimeoutError in H2O classifier predict methods instead of wrapping - Extract y optimization logic into reusable _optimize_y() helper method - Add contiguous array conversion for faster numpy operations - Disable TensorFlow eager execution for performance
1 parent 15563bf commit 7d22e79

2 files changed

Lines changed: 37 additions & 31 deletions

File tree

ml_grid/model_classes/H2OBaseClassifier.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,20 +642,26 @@ def predict(self, X: pd.DataFrame) -> np.ndarray:
642642
test_h2o = tmp_frame
643643

644644
except Exception as e:
645+
if isinstance(e, TimeoutError):
646+
raise
645647
raise RuntimeError(f"Failed to create H2O frame for prediction: {e}")
646648

647649
# Make prediction
648650
try:
649651
predictions = self.model_.predict(test_h2o)
650652
except Exception as e:
651653
# If prediction failed, it might be because the model was unloaded/GC'd on server.
654+
if isinstance(e, TimeoutError):
655+
raise
652656
# Try reloading and predicting again.
653657
self.logger.debug(f"Prediction failed ({e}), attempting to reload model...")
654658
try:
655659
self._ensure_h2o_is_running()
656660
self._ensure_model_is_loaded()
657661
predictions = self.model_.predict(test_h2o)
658662
except Exception as e2:
663+
if isinstance(e2, TimeoutError):
664+
raise
659665
# --- FIX: Catch H2O backend crashes (NPE) during prediction and fallback ---
660666
if "java.lang.NullPointerException" in str(e):
661667
self.logger.warning(
@@ -765,20 +771,26 @@ def predict_proba(self, X: pd.DataFrame) -> np.ndarray:
765771
destination_frame=f"prob_{uuid.uuid4().hex}",
766772
)
767773
except Exception as e:
774+
if isinstance(e, TimeoutError):
775+
raise
768776
raise RuntimeError(f"Failed to create H2O frame for prediction: {e}")
769777

770778
# Make prediction
771779
try:
772780
predictions = self.model_.predict(test_h2o)
773781
except Exception as e:
774782
# Retry logic for unloaded models
783+
if isinstance(e, TimeoutError):
784+
raise
775785
self.logger.debug(f"Prediction failed ({e}), attempting to reload model...")
776786
try:
777787
self._ensure_h2o_is_running()
778788
self._ensure_model_is_loaded()
779789
predictions = self.model_.predict(test_h2o)
780790
except Exception as e2:
781791
# --- FIX: Catch H2O backend crashes (NPE) during prediction and fallback ---
792+
if isinstance(e2, TimeoutError):
793+
raise
782794
if "java.lang.NullPointerException" in str(e):
783795
self.logger.warning(
784796
f"H2O backend crashed with NPE during predict_proba(). Returning dummy probabilities. Details: {e}"

ml_grid/pipeline/grid_search_cross_validate.py

Lines changed: 25 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,8 @@ def __init__(
178178
else:
179179
# Explicitly set CPU as the visible device for TensorFlow to avoid CUDA init errors
180180
tf.config.set_visible_devices([], "GPU")
181+
182+
tf.config.run_functions_eagerly(False)
181183
except Exception as e:
182184
self.logger.warning(f"Could not configure GPU for TensorFlow: {e}")
183185
finally:
@@ -537,22 +539,7 @@ def __init__(
537539
# --- OPTIMIZATION: Convert y to numpy for ALL models ---
538540
# This avoids expensive sklearn type_of_target checks on Pandas Series (overhead seen in profiling)
539541
# Most sklearn models handle numpy arrays efficiently.
540-
if isinstance(y_train_reset.dtype, pd.CategoricalDtype):
541-
y_train_search = y_train_reset.cat.codes.values
542-
elif hasattr(y_train_reset, "values"):
543-
y_train_search = y_train_reset.values
544-
else:
545-
y_train_search = y_train_reset
546-
547-
# --- OPTIMIZATION: Force integer encoding for y ---
548-
# This avoids expensive np.unique checks on string/object arrays in sklearn (arraysetops.py:unique ~221s)
549-
# AND speeds up checks on float arrays (common in H2O/Pandas)
550-
if not pd.api.types.is_integer_dtype(y_train_search):
551-
try:
552-
y_train_search = y_train_search.astype(int)
553-
except (ValueError, TypeError):
554-
y_train_search, _ = pd.factorize(y_train_search, sort=True)
555-
y_train_search = y_train_search.astype(int)
542+
y_train_search = self._optimize_y(y_train_reset)
556543

557544
# --- OPTIMIZATION: Skip parameter validation overhead ---
558545
# Use set_config to ensure it propagates to all internal calls
@@ -683,21 +670,7 @@ def __init__(
683670
else:
684671
X_train_final = self.X_train.values # Use NumPy array for other models
685672
# Optimization: Pass numpy array for y to avoid pandas overhead in sklearn
686-
# If it was converted to categorical (unlikely for sklearn now), get codes
687-
if isinstance(self.y_train.dtype, pd.CategoricalDtype):
688-
y_train_final = self.y_train.cat.codes.values
689-
else:
690-
y_train_final = self.y_train.values
691-
692-
# --- OPTIMIZATION: Force integer encoding for y ---
693-
# This avoids expensive np.unique checks on string/object arrays in sklearn (arraysetops.py:unique ~173s)
694-
# AND speeds up checks on float arrays (common in H2O/Pandas)
695-
if not pd.api.types.is_integer_dtype(y_train_final):
696-
try:
697-
y_train_final = y_train_final.astype(int)
698-
except (ValueError, TypeError):
699-
y_train_final, _ = pd.factorize(y_train_final, sort=True)
700-
y_train_final = y_train_final.astype(int)
673+
y_train_final = self._optimize_y(self.y_train)
701674

702675
scores = None
703676

@@ -1022,6 +995,27 @@ def __init__(
1022995

1023996
self._shutdown_h2o_if_needed(current_algorithm)
1024997

998+
def _optimize_y(self, y):
999+
"""Helper to optimize y for sklearn/H2O to reduce type_of_target overhead."""
1000+
# Convert to numpy if it's a Series or Categorical
1001+
if hasattr(y, "dtype") and isinstance(y.dtype, pd.CategoricalDtype):
1002+
y_opt = y.cat.codes.values
1003+
elif hasattr(y, "values"):
1004+
y_opt = y.values
1005+
else:
1006+
y_opt = y
1007+
1008+
# Force integer encoding
1009+
if not pd.api.types.is_integer_dtype(y_opt):
1010+
try:
1011+
y_opt = y_opt.astype(int)
1012+
except (ValueError, TypeError):
1013+
y_opt, _ = pd.factorize(y_opt, sort=True)
1014+
y_opt = y_opt.astype(int)
1015+
1016+
# Ensure contiguous array for speed in np.unique and other ops
1017+
return np.ascontiguousarray(y_opt)
1018+
10251019
def _adjust_knn_parameters(self, parameter_space: Union[Dict, List[Dict]]):
10261020
"""
10271021
Dynamically adjusts the 'n_neighbors' parameter for KNN-based models

0 commit comments

Comments
 (0)