@@ -178,6 +178,8 @@ def __init__(
178178 else :
179179 # Explicitly set CPU as the visible device for TensorFlow to avoid CUDA init errors
180180 tf .config .set_visible_devices ([], "GPU" )
181+
182+ tf .config .run_functions_eagerly (False )
181183 except Exception as e :
182184 self .logger .warning (f"Could not configure GPU for TensorFlow: { e } " )
183185 finally :
@@ -537,22 +539,7 @@ def __init__(
537539 # --- OPTIMIZATION: Convert y to numpy for ALL models ---
538540 # This avoids expensive sklearn type_of_target checks on Pandas Series (overhead seen in profiling)
539541 # Most sklearn models handle numpy arrays efficiently.
540- if isinstance (y_train_reset .dtype , pd .CategoricalDtype ):
541- y_train_search = y_train_reset .cat .codes .values
542- elif hasattr (y_train_reset , "values" ):
543- y_train_search = y_train_reset .values
544- else :
545- y_train_search = y_train_reset
546-
547- # --- OPTIMIZATION: Force integer encoding for y ---
548- # This avoids expensive np.unique checks on string/object arrays in sklearn (arraysetops.py:unique ~221s)
549- # AND speeds up checks on float arrays (common in H2O/Pandas)
550- if not pd .api .types .is_integer_dtype (y_train_search ):
551- try :
552- y_train_search = y_train_search .astype (int )
553- except (ValueError , TypeError ):
554- y_train_search , _ = pd .factorize (y_train_search , sort = True )
555- y_train_search = y_train_search .astype (int )
542+ y_train_search = self ._optimize_y (y_train_reset )
556543
557544 # --- OPTIMIZATION: Skip parameter validation overhead ---
558545 # Use set_config to ensure it propagates to all internal calls
@@ -683,21 +670,7 @@ def __init__(
683670 else :
684671 X_train_final = self .X_train .values # Use NumPy array for other models
685672 # Optimization: Pass numpy array for y to avoid pandas overhead in sklearn
686- # If it was converted to categorical (unlikely for sklearn now), get codes
687- if isinstance (self .y_train .dtype , pd .CategoricalDtype ):
688- y_train_final = self .y_train .cat .codes .values
689- else :
690- y_train_final = self .y_train .values
691-
692- # --- OPTIMIZATION: Force integer encoding for y ---
693- # This avoids expensive np.unique checks on string/object arrays in sklearn (arraysetops.py:unique ~173s)
694- # AND speeds up checks on float arrays (common in H2O/Pandas)
695- if not pd .api .types .is_integer_dtype (y_train_final ):
696- try :
697- y_train_final = y_train_final .astype (int )
698- except (ValueError , TypeError ):
699- y_train_final , _ = pd .factorize (y_train_final , sort = True )
700- y_train_final = y_train_final .astype (int )
673+ y_train_final = self ._optimize_y (self .y_train )
701674
702675 scores = None
703676
@@ -1022,6 +995,27 @@ def __init__(
1022995
1023996 self ._shutdown_h2o_if_needed (current_algorithm )
1024997
998+ def _optimize_y (self , y ):
999+ """Helper to optimize y for sklearn/H2O to reduce type_of_target overhead."""
1000+ # Convert to numpy if it's a Series or Categorical
1001+ if hasattr (y , "dtype" ) and isinstance (y .dtype , pd .CategoricalDtype ):
1002+ y_opt = y .cat .codes .values
1003+ elif hasattr (y , "values" ):
1004+ y_opt = y .values
1005+ else :
1006+ y_opt = y
1007+
1008+ # Force integer encoding
1009+ if not pd .api .types .is_integer_dtype (y_opt ):
1010+ try :
1011+ y_opt = y_opt .astype (int )
1012+ except (ValueError , TypeError ):
1013+ y_opt , _ = pd .factorize (y_opt , sort = True )
1014+ y_opt = y_opt .astype (int )
1015+
1016+ # Ensure contiguous array for speed in np.unique and other ops
1017+ return np .ascontiguousarray (y_opt )
1018+
10251019 def _adjust_knn_parameters (self , parameter_space : Union [Dict , List [Dict ]]):
10261020 """
10271021 Dynamically adjusts the 'n_neighbors' parameter for KNN-based models
0 commit comments