Skip to content

Commit 5a6bde9

Browse files
author
SamoraHunter
committed
implemented fallback for feature selection/reduction
1 parent 3249996 commit 5a6bde9

1 file changed

Lines changed: 52 additions & 30 deletions

File tree

ml_grid/pipeline/data.py

Lines changed: 52 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -636,8 +636,7 @@ def _select_features_by_importance(self):
636636
target_n_features_eval = int(
637637
(target_n_features / 100) * self.X_train.shape[1]
638638
)
639-
# Ensure at least one feature is selected. The previous logic here
640-
# was incorrect and disabled feature selection entirely.
639+
# Ensure at least one feature is requested
641640
target_n_features_eval = max(1, target_n_features_eval)
642641

643642
if (
@@ -651,19 +650,27 @@ def _select_features_by_importance(self):
651650
self.logger.info(
652651
f"Shape of X_train before feature importance selection: {self.X_train.shape}"
653652
)
654-
655653
self.logger.info(
656654
f"Pre target_n_features {target_n_features}% reduction {target_n_features_eval}/{self.X_train.shape[1]}"
657655
)
658-
try:
659656

657+
# --- STEP 1: Snapshot Valid Data State ---
658+
# We save references to the current valid dataframes so we can revert
659+
# if the selection process returns garbage (empty data).
660+
X_train_backup = self.X_train
661+
y_train_backup = self.y_train
662+
X_test_backup = self.X_test
663+
y_test_backup = self.y_test
664+
X_test_orig_backup = self.X_test_orig
665+
666+
try:
660667
fim = feature_importance_methods()
661668
(
662-
self.X_train,
663-
self.y_train,
664-
self.X_test,
665-
self.y_test,
666-
self.X_test_orig,
669+
X_train_new,
670+
y_train_new,
671+
X_test_new,
672+
y_test_new,
673+
X_test_orig_new,
667674
) = fim.handle_feature_importance_methods(
668675
target_n_features_eval,
669676
X_train=self.X_train,
@@ -673,37 +680,52 @@ def _select_features_by_importance(self):
673680
X_test_orig=self.X_test_orig,
674681
ml_grid_object=self,
675682
)
676-
self._log_feature_transformation(
677-
"Feature Importance",
678-
features_before,
679-
self.X_train.shape[1],
680-
f"Selected top {target_n_features}% features using {fim.feature_method}.",
681-
)
683+
684+
# --- STEP 2: Validate the Result ---
685+
if X_train_new.shape[1] == 0:
686+
# If selection wiped everything out, Trigger the fallback
687+
self.logger.warning(
688+
"Feature importance selection removed ALL features. Reverting to original feature set."
689+
)
690+
# Implicitly keeps the backup data (by NOT overwriting self.X_train)
691+
else:
692+
# Success: Apply the new data
693+
self.X_train = X_train_new
694+
self.y_train = y_train_new
695+
self.X_test = X_test_new
696+
self.y_test = y_test_new
697+
self.X_test_orig = X_test_orig_new
698+
699+
self._log_feature_transformation(
700+
"Feature Importance",
701+
features_before,
702+
self.X_train.shape[1],
703+
f"Selected top {target_n_features}% features using {fim.feature_method}.",
704+
)
705+
self.logger.info(
706+
f"Shape of X_train after feature importance selection: {self.X_train.shape}"
707+
)
708+
682709
self._assert_index_alignment(
683710
self.X_train,
684711
self.y_train,
685712
"After feature selection and y_train reset",
686713
)
687714

688-
self.logger.info(
689-
f"Shape of X_train after feature importance selection: {self.X_train.shape}"
690-
)
691-
692-
if self.X_train.shape[1] == 0:
693-
raise ValueError(
694-
"Feature importance selection removed all features."
695-
)
696-
697-
# Safeguard: Ensure X_train is not empty after feature selection
698-
if self.X_train.shape[1] == 0:
699-
raise ValueError(
700-
"All features were removed by the feature importance selection method. X_train is empty."
701-
)
702-
703715
except Exception as e:
716+
# --- STEP 3: Restore State on Error ---
704717
self.logger.error(
705718
f"Feature importance selection failed: {e}", exc_info=True
706719
)
720+
self.logger.warning("Reverting to pre-selection features due to error.")
721+
722+
# Explicitly ensure we are pointing to the valid backups
723+
self.X_train = X_train_backup
724+
self.y_train = y_train_backup
725+
self.X_test = X_test_backup
726+
self.y_test = y_test_backup
727+
self.X_test_orig = X_test_orig_backup
728+
707729
self._assert_index_alignment(
708730
self.X_train, self.y_train, "After feature selection block"
709731
)

0 commit comments

Comments
 (0)