Skip to content

Commit 780e228

Browse files
author
SamoraHunter
committed
linting
1 parent a371302 commit 780e228

23 files changed

Lines changed: 547 additions & 385 deletions

ml_grid/model_classes/H2ODeepLearningClassifier.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
import pandas as pd
32
from h2o.estimators import H2ODeepLearningEstimator
43
from skopt.space import Categorical, Integer, Real

ml_grid/model_classes/H2OGLMClassifier.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
import numpy as np
21
import pandas as pd
32
from h2o.estimators import H2OGeneralizedLinearEstimator
4-
from skopt.space import Real, Categorical, Integer
3+
from skopt.space import Real, Categorical
54

65
from .H2OBaseClassifier import H2OBaseClassifier
76

ml_grid/model_classes/NeuralNetworkClassifier_class.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ def __init__(
5454

5555
self.parameter_space: Union[List[Dict[str, Any]], Dict[str, Any]]
5656

57-
5857
from ml_grid.util.global_params import global_parameters
5958

6059
if global_parameters.bayessearch:

ml_grid/model_classes/knn_gpu_classifier_class.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ class for the KNNWrapper (GPU-accelerated KNN). It provides parameter spaces for
1111
import pandas as pd
1212
from skopt.space import Categorical, Integer
1313

14-
#from ml_grid.model_classes.knn_wrapper_class import KNNWrapper
14+
# from ml_grid.model_classes.knn_wrapper_class import KNNWrapper
1515
from ml_grid.util import param_space
1616
from ml_grid.util.global_params import global_parameters
1717

@@ -45,7 +45,7 @@ def __init__(
4545
self.y: Optional[pd.Series] = y
4646

4747
# Initialize KNNWrapper for GPU support
48-
#self.algorithm_implementation: KNNWrapper = KNNWrapper()
48+
# self.algorithm_implementation: KNNWrapper = KNNWrapper()
4949
self.method_name: str = "knn__gpu"
5050

5151
# Define the parameter vector space

ml_grid/pipeline/data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -718,7 +718,7 @@ def _select_features_by_importance(self):
718718
f"Feature importance selection failed: {e}", exc_info=True
719719
)
720720
self.logger.warning("Reverting to pre-selection features due to error.")
721-
721+
722722
# Explicitly ensure we are pointing to the valid backups
723723
self.X_train = X_train_backup
724724
self.y_train = y_train_backup

ml_grid/pipeline/data_correlation_matrix.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def handle_correlation_matrix(
4545
use_gpu = False
4646
try:
4747
import cupy as cp
48-
48+
4949
# Check if CUDA is available first (before trying to access device)
5050
if not cp.cuda.is_available():
5151
logger.info("No CUDA-capable GPU detected. Using CPU.")
@@ -72,7 +72,7 @@ def handle_correlation_matrix(
7272
logger.info(f"CUDA runtime error (using CPU): {cuda_err}")
7373
except Exception as device_err:
7474
logger.info(f"Could not access GPU device (using CPU): {device_err}")
75-
75+
7676
except ImportError:
7777
logger.info("CuPy not installed. Using CPU-only mode.")
7878
except Exception as e:
@@ -213,4 +213,4 @@ def _process_on_cpu(
213213
final_drop_set = existing_drops.union(newly_identified_drops)
214214

215215
logger.info(f"CPU complete. Total columns to drop: {len(final_drop_set)}")
216-
return sorted(list(final_drop_set))
216+
return sorted(list(final_drop_set))

ml_grid/pipeline/data_feature_importance_methods.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from ml_grid.pipeline.data_feature_methods import feature_methods
77

8+
89
class feature_importance_methods:
910
"""A class to handle feature selection using different importance methods."""
1011

@@ -47,7 +48,7 @@ def handle_feature_importance_methods(
4748
"""
4849

4950
logger = logging.getLogger("ml_grid")
50-
51+
5152
# Work with copies to avoid modifying the original DataFrames in the calling scope
5253
X_train_copy = X_train.copy()
5354
X_test_copy = X_test.copy()
@@ -56,7 +57,7 @@ def handle_feature_importance_methods(
5657
self.feature_method = ml_grid_object.local_param_dict.get(
5758
"feature_selection_method"
5859
)
59-
60+
6061
# Default to all features initially
6162
features = list(X_train_copy.columns)
6263

@@ -75,14 +76,14 @@ def handle_feature_importance_methods(
7576
)
7677

7778
logger.info(f"target_n_features: {target_n_features}")
78-
79+
7980
# --- Column Validation ---
8081
# Filter the requested 'features' to ensure they actually exist in the DataFrame.
81-
# This handles cases where selectors return indices, 'ColumnX' names, or
82+
# This handles cases where selectors return indices, 'ColumnX' names, or
8283
# names that were dropped/renamed in previous pipeline steps.
83-
84+
8485
valid_features = [f for f in features if f in X_train_copy.columns]
85-
86+
8687
if len(valid_features) == 0:
8788
logger.warning(
8889
f"Feature selection ({self.feature_method}) returned 0 valid features. "
@@ -91,16 +92,18 @@ def handle_feature_importance_methods(
9192
)
9293
valid_features = list(X_train_copy.columns)
9394
elif len(valid_features) < len(features):
94-
logger.warning(
95-
f"{len(features) - len(valid_features)} selected features were not found in X_train columns. Dropped invalid keys."
96-
)
95+
logger.warning(
96+
f"{len(features) - len(valid_features)} selected features were not found in X_train columns. Dropped invalid keys."
97+
)
9798

98-
logger.info(f"Final selected features ({len(valid_features)}): {valid_features}")
99+
logger.info(
100+
f"Final selected features ({len(valid_features)}): {valid_features}"
101+
)
99102

100-
# Apply the validated selection
103+
# Apply the validated selection
101104
X_train_out = X_train_copy[valid_features]
102105
X_test_out = X_test_copy[valid_features]
103106
X_test_orig_out = X_test_orig_copy[valid_features]
104107

105108
# The y series do not need to be modified, as they are already aligned.
106-
return X_train_out, y_train, X_test_out, y_test, X_test_orig_out
109+
return X_train_out, y_train, X_test_out, y_test, X_test_orig_out

ml_grid/pipeline/data_train_test_split.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,17 @@ def get_data_split(
5252
class_counts = y.value_counts()
5353
min_class_count = class_counts.min()
5454
use_stratify = min_class_count >= 2
55-
55+
5656
if not use_stratify:
5757
logger.warning(
5858
f"Cannot use stratified split: smallest class has only {min_class_count} sample(s). "
5959
f"Class distribution: {class_counts.to_dict()}. Using random split instead."
6060
)
6161
# Also disable resampling since we can't properly balance with so few samples
6262
if local_param_dict.get("resample") is not None:
63-
logger.warning("Disabling resampling due to insufficient samples in minority class.")
63+
logger.warning(
64+
"Disabling resampling due to insufficient samples in minority class."
65+
)
6466
local_param_dict["resample"] = None
6567

6668
# First, split into a preliminary training set and a final hold-out test set.
@@ -116,7 +118,7 @@ def get_data_split(
116118
train_class_counts = y_train_processed.value_counts()
117119
min_train_class_count = train_class_counts.min()
118120
use_stratify_second = min_train_class_count >= 2
119-
121+
120122
if not use_stratify_second:
121123
logger.warning(
122124
f"Cannot use stratified split for train/validation: smallest class has only "
@@ -161,4 +163,4 @@ def is_valid_shape(input_data: Union[np.ndarray, pd.DataFrame]) -> bool:
161163

162164
else:
163165
# Input data is neither a numpy array nor a pandas DataFrame
164-
return False
166+
return False

ml_grid/pipeline/grid_search_cross_validate.py

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,15 @@
11
import time
2-
import traceback
32
import logging
43
import warnings
54
from typing import Any, Dict, List, Optional, Union
65

7-
import keras
86
import numpy as np
97
import pandas as pd
108
import tensorflow as tf
119
import torch
1210
from IPython.display import clear_output
13-
from numpy import absolute, mean, std
1411
from scikeras.wrappers import KerasClassifier
1512
from sklearn import metrics
16-
from IPython.display import display
17-
from catboost import CatBoostError
1813
from pandas.testing import assert_index_equal
1914
from xgboost.core import XGBoostError
2015
from ml_grid.model_classes.H2OAutoMLClassifier import H2OAutoMLClassifier
@@ -34,17 +29,8 @@
3429
# from sklearn.utils.testing import ignore_warnings
3530
from sklearn.exceptions import ConvergenceWarning
3631
from sklearn.metrics import *
37-
from sklearn.metrics import (
38-
classification_report,
39-
f1_score,
40-
make_scorer,
41-
matthews_corrcoef,
42-
roc_auc_score,
43-
)
4432
from sklearn.model_selection import (
45-
GridSearchCV,
4633
ParameterGrid,
47-
RandomizedSearchCV,
4834
RepeatedKFold,
4935
KFold,
5036
cross_validate,
@@ -57,7 +43,7 @@
5743
from ml_grid.util.project_score_save import project_score_save_class
5844
from ml_grid.util.validate_parameters import validate_parameters_helper
5945
from sklearn.preprocessing import MinMaxScaler
60-
from ml_grid.util.bayes_utils import calculate_combinations, is_skopt_space
46+
from ml_grid.util.bayes_utils import is_skopt_space
6147
from skopt.space import Categorical
6248

6349

@@ -447,9 +433,7 @@ def __init__(
447433
# Define default scores (e.g., mean score of 0.5 for binary classification)
448434
# Default scores if cross-validation fails
449435
default_scores = {
450-
"test_accuracy": [
451-
0.5 # Default to random classifier performance
452-
],
436+
"test_accuracy": [0.5], # Default to random classifier performance
453437
"test_f1": [0.5], # Default F1 score (again, 0.5 for random classification)
454438
"test_auc": [0.5], # Default ROC AUC score (0.5 for random classifier)
455439
"fit_time": [0], # No fitting time if the model fails

ml_grid/pipeline/hyperparameter_search.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222
)
2323
from ml_grid.model_classes.H2OXGBoostClassifier import H2OXGBoostClassifier
2424
from ml_grid.model_classes.keras_classifier_class import KerasClassifierClass
25-
#from ml_grid.model_classes.knn_wrapper_class import KNNWrapper
25+
26+
# from ml_grid.model_classes.knn_wrapper_class import KNNWrapper
2627
from ml_grid.model_classes.NeuralNetworkKerasClassifier import NeuralNetworkClassifier
2728
from ml_grid.util.global_params import global_parameters
2829
from ml_grid.util.validate_parameters import validate_parameters_helper
@@ -99,7 +100,7 @@ def __init__(
99100

100101
# Custom wrappers that might not be recognized by is_classifier
101102
custom_classifier_types = (
102-
#KNNWrapper,
103+
# KNNWrapper,
103104
H2OAutoMLClassifier,
104105
H2OGBMClassifier,
105106
H2ODRFClassifier,
@@ -192,7 +193,8 @@ def run_search(self, X_train: pd.DataFrame, y_train: pd.Series) -> BaseEstimator
192193

193194
# Also limit n_jobs for Bayesian search and other specific wrappers to avoid issues.
194195
is_single_threaded_search = isinstance(
195-
self.algorithm, ( KerasClassifierClass, NeuralNetworkClassifier) #KNNWrapper,
196+
self.algorithm,
197+
(KerasClassifierClass, NeuralNetworkClassifier), # KNNWrapper,
196198
)
197199

198200
if is_h2o_model or is_single_threaded_search or bayessearch:

0 commit comments

Comments
 (0)