1212from scikeras .wrappers import KerasClassifier
1313from sklearn import metrics
1414from IPython .display import display
15+ from pandas .testing import assert_index_equal
1516from xgboost .core import XGBoostError
1617
1718# from sklearn.utils.testing import ignore_warnings
@@ -71,13 +72,10 @@ def __init__(
7172 sub_sample_parameter_val (int, optional): A value used to limit
7273 the number of iterations in a randomized search. Defaults to 100.
7374 """
74- warnings .filterwarnings ("ignore" )
75-
76- warnings .filterwarnings ("ignore" , category = FutureWarning )
77-
78- warnings .filterwarnings ("ignore" , category = ConvergenceWarning )
79-
75+ # Set each warning filter individually for robustness
8076 warnings .filterwarnings ("ignore" , category = UserWarning )
77+ warnings .filterwarnings ("ignore" , category = ConvergenceWarning )
78+ warnings .filterwarnings ("ignore" , category = FutureWarning )
8179
8280 self .global_params = global_parameters
8381
@@ -143,6 +141,11 @@ def __init__(
143141 start = time .time ()
144142
145143 current_algorithm = algorithm_implementation
144+
145+ # Silence verbose models like CatBoost to keep logs clean
146+ if "catboost" in method_name .lower () and hasattr (current_algorithm , 'set_params' ):
147+ ml_grid_object .logger .info ("Silencing CatBoost verbose output." )
148+ current_algorithm .set_params (verbose = 0 )
146149
147150 if self .verbose >= 1 :
148151 print (f"algorithm_implementation: { algorithm_implementation } " )
@@ -257,8 +260,29 @@ def __init__(
257260 print ("Running hyperparameter search" )
258261
259262 try :
263+ # Verify initial index alignment
264+ try :
265+ assert_index_equal (self .X_train .index , self .y_train .index )
266+ ml_grid_object .logger .debug ("Index alignment PASSED before search.run_search" )
267+ except AssertionError :
268+ ml_grid_object .logger .error ("Index alignment FAILED before search.run_search" )
269+ raise
270+
271+ # Ensure y_train is a Series for consistency
272+ if not isinstance (self .y_train , pd .Series ):
273+ ml_grid_object .logger .error (f"y_train is not a pandas Series, but { type (self .y_train )} . Converting to Series." )
274+ self .y_train = pd .Series (self .y_train , index = self .X_train .index )
275+
276+ # CRITICAL FIX: Reset indices to ensure integer-based indexing for sklearn
277+ # This prevents "String indexing is not supported with 'axis=0'" errors
278+ X_train_reset = self .X_train .reset_index (drop = True )
279+ y_train_reset = self .y_train .reset_index (drop = True )
260280
261- current_algorithm = search .run_search (self .X_train , self .y_train )
281+ ml_grid_object .logger .debug (f"X_train index after reset: { X_train_reset .index [:5 ]} " )
282+ ml_grid_object .logger .debug (f"y_train index after reset: { y_train_reset .index [:5 ]} " )
283+
284+ # Pass reset data to search
285+ current_algorithm = search .run_search (X_train_reset , y_train_reset )
262286
263287 except XGBoostError as e :
264288 if 'cuda' in str (e ).lower () or 'memory' in str (e ).lower ():
@@ -281,22 +305,36 @@ def __init__(
281305 max_iter = n_iter_v ,
282306 ml_grid_object = ml_grid_object
283307 )
284- # Try again with non-gpu method.
285- current_algorithm = search .run_search (self .X_train , self .y_train )
308+ # Try again with CPU method and reset indices
309+ X_train_reset = self .X_train .reset_index (drop = True )
310+ y_train_reset = self .y_train .reset_index (drop = True )
311+ current_algorithm = search .run_search (X_train_reset , y_train_reset )
286312 else :
287313 print ("unknown xgb error" )
288314 print (e )
315+ raise
289316
290317 except Exception as e :
291- print (e )
292- print ("Failed to run search in gridsearch cross validate" )
293-
318+ if "String indexing is not supported with 'axis=0'" in str (e ):
319+ raise TypeError (
320+ "Pandas indexing error: 'String indexing is not supported with 'axis=0''. "
321+ "This typically happens when a pandas Series with a non-standard index is passed to a scikit-learn function. "
322+ "Ensure that target variables (y_train) are converted to numpy arrays using `.values` before fitting or cross-validation."
323+ ) from e
324+ else :
325+ ml_grid_object .logger .error (f"Failed to run search in gridsearch cross validate: { e } " , exc_info = True )
326+ # Re-raise the original exception to allow for higher-level handling if needed
327+ raise e
294328
295329
296330 if self .global_parameters .verbose >= 3 :
297331 print ("Fitting final model" )
298332 #current_algorithm = grid.best_estimator_
299- current_algorithm .fit (self .X_train , self .y_train )
333+ # Use numpy arrays for fitting the final model and for cross-validation.
334+ X_train_final_np = self .X_train .values
335+ y_train_values = self .y_train .values
336+
337+ current_algorithm .fit (X_train_final_np , y_train_values )
300338
301339 metric_list = self .metric_list
302340
@@ -336,8 +374,8 @@ def __init__(
336374 # Perform the cross-validation
337375 scores = cross_validate (
338376 current_algorithm ,
339- self . X_train ,
340- self . y_train ,
377+ X_train_final_np ,
378+ y_train_values , # This is already a numpy array
341379 scoring = self .metric_list ,
342380 cv = self .cv ,
343381 n_jobs = grid_n_jobs , # Full CV on final best model
@@ -354,8 +392,8 @@ def __init__(
354392 try :
355393 scores = cross_validate (
356394 current_algorithm ,
357- self . X_train ,
358- self . y_train ,
395+ X_train_final_np ,
396+ y_train_values ,
359397 scoring = self .metric_list ,
360398 cv = self .cv ,
361399 n_jobs = grid_n_jobs , # Full CV on final best model
@@ -483,4 +521,4 @@ def scale_data(X_train: pd.DataFrame) -> pd.DataFrame:
483521 return X_train_scaled
484522 else :
485523 # If data is already scaled, return it as is
486- return X_train
524+ return X_train
0 commit comments