Skip to content

Commit 9a6a9b4

Browse files
committed
Formatting, formatted docstrings for Sphinx, minor fixes, added typing,
1 parent f21256c commit 9a6a9b4

94 files changed

Lines changed: 4458 additions & 2373 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 82 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,52 @@
1+
from typing import Any, Dict, Optional
2+
13
import h2o
24
from h2o.automl import H2OAutoML
35
import numpy as np
46
import pandas as pd
57
from sklearn.base import BaseEstimator, ClassifierMixin
6-
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
8+
from sklearn.utils.validation import check_is_fitted
79

810

911
class H2OAutoMLClassifier(BaseEstimator, ClassifierMixin):
10-
def __init__(self, max_runtime_secs=360, nfolds=2, seed=1):
12+
"""A scikit-learn compatible wrapper for H2O's AutoML.
13+
14+
This class allows H2O's AutoML to be used as a standard scikit-learn
15+
classifier, making it compatible with tools like GridSearchCV and
16+
BayesSearchCV.
17+
"""
18+
19+
def __init__(
20+
self, max_runtime_secs: int = 360, nfolds: int = 2, seed: int = 1
21+
):
22+
"""Initializes the H2OAutoMLClassifier.
23+
24+
Args:
25+
max_runtime_secs (int): Maximum time in seconds to run the AutoML process.
26+
nfolds (int): Number of folds for cross-validation.
27+
seed (int): Random seed for reproducibility.
28+
"""
1129
self.max_runtime_secs = max_runtime_secs
1230
self.nfolds = nfolds
1331
self.seed = seed
14-
self.automl = None
32+
self.automl: Optional[H2OAutoML] = None
33+
self.classes_: Optional[np.ndarray] = None
34+
35+
def fit(self, X: pd.DataFrame, y: pd.Series) -> "H2OAutoMLClassifier":
36+
"""Fits the H2O AutoML model.
1537
16-
def fit(self, X, y):
17-
# X, y = check_X_y(X, y)
38+
This method initializes an H2O cluster, converts the pandas DataFrame
39+
and Series to H2O Frames, and then trains the AutoML model.
40+
41+
Args:
42+
X (pd.DataFrame): The training input samples.
43+
y (pd.Series): The target values.
44+
45+
Returns:
46+
H2OAutoMLClassifier: The fitted estimator.
47+
"""
1848
self.classes_ = np.unique(y)
19-
# outcome_var = y.columns[0]
49+
2050
try:
2151
outcome_var = y.columns[0]
2252
except:
@@ -31,10 +61,7 @@ def fit(self, X, y):
3161
pass
3262

3363
h2o.init()
34-
# train_df = pd.concat([pd.DataFrame(X), pd.Series(y)], axis=1)
35-
3664
train_df = pd.concat([X, y], axis=1)
37-
3865
train_h2o = h2o.H2OFrame(train_df)
3966

4067
train_h2o[y_n] = train_h2o[y_n].asfactor()
@@ -49,33 +76,69 @@ def fit(self, X, y):
4976
self.automl.train(y=y_n, x=x, training_frame=train_h2o)
5077
return self
5178

52-
def predict(self, X):
79+
def predict(self, X: pd.DataFrame) -> np.ndarray:
80+
"""Predicts class labels for samples in X.
81+
82+
Args:
83+
X (pd.DataFrame): The input samples to predict.
84+
85+
Returns:
86+
np.ndarray: The predicted class labels.
87+
"""
5388
check_is_fitted(self)
54-
# X = check_array(X)
55-
# test_h2o = h2o.H2OFrame(pd.DataFrame(X))
5689
test_h2o = h2o.H2OFrame(X)
5790
predictions = self.automl.leader.predict(test_h2o)
5891

59-
# return predictions[:,0]
6092
return predictions["predict"].as_data_frame().values
6193

62-
def predict_proba(self, X):
94+
def predict_proba(self, X: pd.DataFrame) -> np.ndarray:
95+
"""Predicts class probabilities for samples in X.
96+
97+
Note:
98+
This method is not implemented for H2O AutoML.
99+
100+
Args:
101+
X (pd.DataFrame): The input samples.
102+
103+
Raises:
104+
NotImplementedError: H2O AutoML does not support predict_proba.
105+
"""
63106
raise NotImplementedError("H2O AutoML does not support predict_proba.")
64107

65-
def get_params(self, deep=True):
108+
def get_params(self, deep: bool = True) -> Dict[str, Any]:
109+
"""Gets parameters for this estimator.
110+
111+
Args:
112+
deep (bool): If True, will return the parameters for this estimator and
113+
contained subobjects that are estimators.
66114
115+
Returns:
116+
Dict[str, Any]: Parameter names mapped to their values.
117+
"""
67118
return {
68119
"max_runtime_secs": self.max_runtime_secs,
69120
"nfolds": self.nfolds,
70121
"seed": self.seed,
71122
}
72123

73-
def set_params(self, **params):
124+
def set_params(self, **params: Any) -> "H2OAutoMLClassifier":
125+
"""Sets the parameters of this estimator.
126+
127+
Args:
128+
**params (Any): Estimator parameters.
129+
130+
Returns:
131+
H2OAutoMLClassifier: The instance with updated parameters.
132+
"""
74133
for param, value in params.items():
75134
setattr(self, param, value)
76135
return self
77136

78-
def get_leader_params(
79-
self,
80-
):
137+
def get_leader_params(self) -> Dict[str, Any]:
138+
"""Gets the parameters of the best model found by AutoML.
139+
140+
Returns:
141+
Dict[str, Any]: A dictionary of the leader model's parameters.
142+
"""
143+
check_is_fitted(self)
81144
return self.automl.leader.params

ml_grid/model_classes/NeuralNetworkClassifier_class.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1-
"""Define NeuralNetworkClassifier class"""
1+
"""Defines the NeuralNetworkClassifier model class."""
22

3+
from typing import Optional
4+
5+
import pandas as pd
36
from ml_grid.util import param_space
47

58
# from ml_grid.model_classes.nni_sklearn_wrapper import *
@@ -9,14 +12,24 @@
912

1013

1114
class NeuralNetworkClassifier_class:
12-
"""NeuralNetworkClassifier."""
15+
"""NeuralNetworkClassifier with a predefined parameter space."""
1316

14-
def __init__(self, X=None, y=None, parameter_space_size=None, global_parameters=None):
15-
"""_summary_
17+
def __init__(
18+
self,
19+
X: Optional[pd.DataFrame] = None,
20+
y: Optional[pd.Series] = None,
21+
parameter_space_size: Optional[str] = None,
22+
):
23+
"""Initializes the NeuralNetworkClassifier_class.
1624
1725
Args:
18-
X_train (_type_): _description_
19-
y_train (_type_): _description_
26+
X (Optional[pd.DataFrame]): Feature matrix for training.
27+
Defaults to None.
28+
y (Optional[pd.Series]): Target vector for training.
29+
Defaults to None.
30+
parameter_space_size (Optional[str]): Size of the parameter space for
31+
optimization. This is not used in the current implementation
32+
as the parameter space is hardcoded. Defaults to None.
2033
"""
2134
self.X = X
2235
self.y = y

ml_grid/model_classes/adaboost_classifier_class.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from typing import Optional
2+
3+
import pandas as pd
14
from sklearn.ensemble import AdaBoostClassifier
25
from ml_grid.util import param_space
36
from ml_grid.util.global_params import global_parameters
@@ -8,14 +11,21 @@
811
class adaboost_class:
912
"""AdaBoostClassifier with support for both Bayesian and non-Bayesian parameter spaces."""
1013

11-
def __init__(self, X=None, y=None, parameter_space_size=None):
12-
"""
13-
Initialize the adaboost_class.
14+
def __init__(
15+
self,
16+
X: Optional[pd.DataFrame] = None,
17+
y: Optional[pd.Series] = None,
18+
parameter_space_size: Optional[str] = None,
19+
):
20+
"""Initializes the adaboost_class.
1421
1522
Args:
16-
X (_type_): Feature matrix for training (optional).
17-
y (_type_): Target vector for training (optional).
18-
parameter_space_size (_type_): Size of the parameter space for optimization.
23+
X (Optional[pd.DataFrame]): Feature matrix for training.
24+
Defaults to None.
25+
y (Optional[pd.Series]): Target vector for training.
26+
Defaults to None.
27+
parameter_space_size (Optional[str]): Size of the parameter space for
28+
optimization. Defaults to None.
1929
"""
2030
global_params = global_parameters
2131
self.X = X
Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,56 @@
1+
from typing import Any, Union
2+
3+
import numpy as np
4+
import pandas as pd
15
from catboost import CatBoostClassifier
26
from sklearn.base import BaseEstimator, ClassifierMixin
37

48

59
class CatBoostSKLearnWrapper(BaseEstimator, ClassifierMixin):
6-
def __init__(self, **kwargs):
10+
"""A scikit-learn compatible wrapper for the CatBoostClassifier."""
11+
12+
def __init__(self, **kwargs: Any):
13+
"""Initializes the CatBoostSKLearnWrapper.
14+
15+
Args:
16+
**kwargs (Any): Keyword arguments passed directly to the
17+
`catboost.CatBoostClassifier`.
18+
"""
719
self.model = CatBoostClassifier(**kwargs)
820

9-
def fit(self, X, y):
21+
def fit(
22+
self, X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray]
23+
) -> "CatBoostSKLearnWrapper":
24+
"""Fits the CatBoost model.
25+
26+
Args:
27+
X (Union[pd.DataFrame, np.ndarray]): The training input samples.
28+
y (Union[pd.Series, np.ndarray]): The target values.
29+
30+
Returns:
31+
CatBoostSKLearnWrapper: The fitted estimator.
32+
"""
1033
self.model.fit(X, y)
34+
return self
1135

12-
def predict(self, X):
36+
def predict(self, X: Union[pd.DataFrame, np.ndarray]) -> np.ndarray:
37+
"""Predicts class labels for samples in X.
38+
39+
Args:
40+
X (Union[pd.DataFrame, np.ndarray]): The input samples to predict.
41+
42+
Returns:
43+
np.ndarray: The predicted class labels.
44+
"""
1345
return self.model.predict(X)
1446

15-
def predict_proba(self, X):
47+
def predict_proba(self, X: Union[pd.DataFrame, np.ndarray]) -> np.ndarray:
48+
"""Predicts class probabilities for samples in X.
49+
50+
Args:
51+
X (Union[pd.DataFrame, np.ndarray]): The input samples.
52+
53+
Returns:
54+
np.ndarray: The class probabilities of the input samples.
55+
"""
1656
return self.model.predict_proba(X)

ml_grid/model_classes/catboost_classifier_class.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1+
from typing import Optional
2+
13
import numpy as np
4+
import pandas as pd
25
from catboost import CatBoostClassifier
36
from skopt.space import Categorical, Real, Integer
47
from ml_grid.util import param_space
@@ -7,14 +10,21 @@
710
class CatBoost_class:
811
"""CatBoost Classifier with hyperparameter tuning."""
912

10-
def __init__(self, X=None, y=None, parameter_space_size=None):
11-
"""
12-
Initialize the CatBoost_class.
13+
def __init__(
14+
self,
15+
X: Optional[pd.DataFrame] = None,
16+
y: Optional[pd.Series] = None,
17+
parameter_space_size: Optional[str] = None,
18+
):
19+
"""Initializes the CatBoost_class.
1320
1421
Args:
15-
X (_type_): Feature matrix for training (optional).
16-
y (_type_): Target vector for training (optional).
17-
parameter_space_size (_type_): Size of the parameter space for optimization.
22+
X (Optional[pd.DataFrame]): Feature matrix for training.
23+
Defaults to None.
24+
y (Optional[pd.Series]): Target vector for training.
25+
Defaults to None.
26+
parameter_space_size (Optional[str]): Size of the parameter space for
27+
optimization. Defaults to None.
1828
"""
1929
global_params = global_parameters # Fetch global parameters
2030
self.X = X

0 commit comments

Comments
 (0)