hyperopt-sklearn/hpsklearn/components/ensemble/_hist_gradient_boosting.py at 6797eb9daa14aed3f9ebfa704b2f7e38d63a4b78 · mandjevant/hyperopt-sklearn · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
from hpsklearn.components._base import validate

from hyperopt.pyll import scope, Apply
from hyperopt import hp

from sklearn import ensemble
import numpy as np
import numpy.typing as npt

import typing


@scope.define
def sklearn_HistGradientBoostingClassifier(*args, **kwargs):
    return ensemble.HistGradientBoostingClassifier(*args, **kwargs)


@scope.define
def sklearn_HistGradientBoostingRegressor(*args, **kwargs):
    return ensemble.HistGradientBoostingRegressor(*args, **kwargs)


def _hist_gradient_boosting_reg_loss(name: str):
    """
    Declaration of search space 'criterion' parameter for
     hist gradient boosting regressor

    Parameter 'poisson' is also available. Not implemented since
     'poisson' is only available for non-zero, non-negative y data

    Parameter 'gamma' is also available. Not implemented since
     'gamma' is only available for non-negative y data
    """
    return hp.choice(name, ["squared_error", "absolute_error"])


def _hist_gradient_boosting_learning_rate(name: str):
    """
    Declaration search space 'learning_rate' parameter
    """
    return hp.lognormal(name, np.log(0.01), np.log(10.0))


def _hist_gradient_boosting_max_leaf_nodes(name: str):
    """
    Declaration search space 'max_leaf_nodes' parameter
    """
    return scope.int(hp.qnormal(name, 31, 5, 1))


def _hist_gradient_boosting_max_depth(name: str):
    """
    Declaration search space 'max_depth' parameter
    """
    return hp.pchoice(name, [
        (0.15, 2),
        (0.7, 3),  # most common choice.
        (0.15, 4),
    ])


def _hist_gradient_boosting_min_samples_leaf(name: str):
    """
    Declaration search space 'min_samples_leaf' parameter
    """
    return scope.int(hp.qnormal(name, 20, 2, 1))


def _hist_gradient_boosting_random_state(name: str):
    """
    Declaration search space 'random_state' parameter
    """
    return hp.randint(name, 5)


def _hist_gradient_boosting_max_features(name: str):
    """
    Declaration search space 'max_features' parameter
    """
    return hp.uniform(name + ".frac", 0.5, 1.)


@validate(params=["max_bins"],
          validation_test=lambda param: not isinstance(param, int) or 0 < param <= 255,
          msg="Invalid parameter '%s' with value '%s'. "
              "Parameter value must be Parameter value must be within (0, 255].")
@validate(params=["max_leaf_nodes"],
          validation_test=lambda param: not isinstance(param, int) or param > 1,
          msg="Invalid parameter '%s' with value '%s'. "
              "Parameter value must be strictly higher than 1.")
def _hist_gradient_boosting_hp_space(
        name_func,
        learning_rate: typing.Union[float, Apply] = None,
        max_iter: typing.Union[int, Apply] = 100,
        max_leaf_nodes: typing.Union[int, Apply] = "Undefined",
        max_depth: typing.Union[int, Apply] = "Undefined",
        min_samples_leaf: typing.Union[int, Apply] = None,
        l2_regularization: float = 0,
        max_features: typing.Union[float, Apply] = None,
        max_bins: int = 255,
        categorical_features: npt.ArrayLike = None,
        monotonic_cst: npt.ArrayLike = None,
        warm_start: bool = False,
        early_stopping: typing.Union[str, bool] = "auto",
        scoring: typing.Union[str, callable] = "loss",
        validation_fraction: float = 0.1,
        n_iter_no_change: int = 10,
        tol: float = 1e-7,
        verbose: int = False,
        random_state=None,
        **kwargs
):
    """
    Hyper parameter search space for
     hist gradient boosting classifier
     hist gradient boosting regressor
    """
    if not early_stopping and (isinstance(scoring, str | callable) or
                               isinstance(validation_fraction, float) or
                               isinstance(n_iter_no_change, int)):
        raise ValueError("Invalid declaration of parameters."
                         "Parameters 'scoring', 'validation_fraction' and 'n_iter_no_change' "
                         "can only be specified in addition to 'early_stopping'.")
    hp_space = dict(
        learning_rate=_hist_gradient_boosting_learning_rate(name_func("learning_rate"))
        if learning_rate is None else learning_rate,
        max_iter=max_iter,
        max_leaf_nodes=_hist_gradient_boosting_max_leaf_nodes(name_func("max_leaf_nodes"))
        if max_leaf_nodes == "Undefined" else max_leaf_nodes,
        max_depth=_hist_gradient_boosting_max_depth(name_func("max_depth"))
        if max_depth == "Undefined" else max_depth,
        min_samples_leaf=_hist_gradient_boosting_min_samples_leaf(name_func("min_samples_leaf"))
        if min_samples_leaf is None else min_samples_leaf,
        l2_regularization=l2_regularization,
        max_features=_hist_gradient_boosting_max_features(name_func("max_features"))
        if max_features is None else max_features,
        max_bins=max_bins,
        categorical_features=categorical_features,
        monotonic_cst=monotonic_cst,
        warm_start=warm_start,
        early_stopping=early_stopping,
        scoring=scoring,
        validation_fraction=validation_fraction,
        n_iter_no_change=n_iter_no_change,
        tol=tol,
        verbose=verbose,
        random_state=_hist_gradient_boosting_random_state(name_func("random_state"))
        if random_state is None else random_state,
        **kwargs
    )
    return hp_space


@validate(params=["loss"],
          validation_test=lambda param: not isinstance(param, str) or param in ("auto", "binary_crossentropy",
                                                                                "categorical_crossentropy"),
          msg="Invalid parameter '%s' with value '%s'. "
              "Choose 'auto', 'binary_crossentropy', 'categorical_crossentropy'")
def hist_gradient_boosting_classifier(name: str, loss: typing.Union[str, Apply] = "log_loss", **kwargs):
    """
    Return a pyll graph with hyperparameters that will construct
    a sklearn.ensemble.HistGradientBoostingClassifier model.

    Args:
        name: name | str
        loss: 'log_loss' | str

    See help(hpsklearn.components._hist_gradient_boosting._hist_gradient_boosting_regressor) for info on
    additional available HistGradientBoosting arguments.
    """

    def _name(msg):
        return f"{name}.gbc_{msg}"

    hp_space = _hist_gradient_boosting_hp_space(_name, **kwargs)
    hp_space["loss"] = loss

    return scope.sklearn_HistGradientBoostingClassifier(**hp_space)


@validate(params=["loss"],
          validation_test=lambda param: not isinstance(param, str) or param in ("squared_error", "absolute_error",
                                                                                "poisson", "quantile", "gamma"),
          msg="Invalid parameter '%s' with value '%s'. "
              "Choose 'squared_error', 'absolute_error', 'poisson'")
def hist_gradient_boosting_regressor(name: str, loss: typing.Union[str, Apply] = None, **kwargs):
    """
    Return a pyll graph with hyperparameters that will construct
    a sklearn.ensemble.HistGradientBoostingRegressor model.

    Args:
        name: name | str
        loss: choose 'squared_error', 'absolute_error' or 'poisson' | str

    See help(hpsklearn.components._hist_gradient_boosting._hist_gradient_boosting_regressor) for info on
    additional available HistGradientBoosting arguments.
    """

    def _name(msg):
        return f"{name}.gbc_{msg}"

    hp_space = _hist_gradient_boosting_hp_space(_name, **kwargs)
    hp_space["loss"] = _hist_gradient_boosting_reg_loss(_name("loss")) if loss is None else loss

    return scope.sklearn_HistGradientBoostingRegressor(**hp_space)