Skip to content

Commit ae0dc03

Browse files
committed
Refactor: Add test_mode and bayessearch to TS models
This commit introduces a major refactoring of the time-series classifier modules to support distinct parameter spaces for different execution modes: test_mode, bayessearch (Bayesian optimization), and the default grid search. Key changes include: Conditional Parameter Spaces: Each time-series model class now dynamically defines its parameter_space based on global_params.test_mode and global_params.bayessearch. test_mode: Uses a minimal, hardcoded parameter set for fast integration testing. bayessearch: Uses skopt.space objects (Categorical, Integer, Real) for hyperparameter optimization. Default: Uses standard lists for GridSearchCV. Deep Learning Model Enhancements: Deep learning models (FCN, Encoder, TapNet, ResNet, InceptionTime, etc.) are now wrapped in a sklearn.pipeline.Pipeline that includes a TimeSeriesStandardScaler for robust data preprocessing. Custom wrapper classes (e.g., FCNClassifierWrapper) are introduced to handle common aeon/keras issues, such as cloning optimizer states, managing metrics attributes, and preventing NaN probability outputs. The EncoderClassifier now dynamically validates kernel_size against the time series length to prevent runtime errors. New and Updated Classifiers: Added the missing TimeSeriesForestClassifier_module.py. Refactored TSFreshClassifier and SummaryClassifier parameter spaces for better compatibility and clarity. Implemented a _DummyClassifier fallback for TapNetClassifier and ShapeDTW to handle potential import errors in different aeon versions. Dependency and Type Imports: Added skopt.space imports where necessary to support Bayesian search configurations. This refactoring standardizes the behavior of time-series models across the library, improves the robustness of deep learning classifiers, and enables more flexible and efficient hyperparameter tuning strategies.
1 parent a210ad3 commit ae0dc03

26 files changed

Lines changed: 1901 additions & 699 deletions
Lines changed: 85 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from typing import Any, Dict, List
22

33
from aeon.classification.convolution_based import Arsenal
4+
from skopt.space import Categorical, Integer, Real
45

56
from ml_grid.pipeline.data import pipe
67

@@ -38,41 +39,87 @@ def __init__(self, ml_grid_object: pipe):
3839

3940
self.method_name = "Arsenal"
4041

41-
self.parameter_space: Dict[str, List[Any]] = {
42-
"num_kernels": [
43-
1000,
44-
2000,
45-
3000,
46-
], # Number of kernels for each ROCKET transform.
47-
"n_estimators": [
48-
3,
49-
5,
50-
6,
51-
], # Number of estimators to build for the ensemble.
52-
"rocket_transform": [
53-
"rocket",
54-
"minirocket",
55-
], # The type of Rocket transformer to use. #, "multirocket" # broken
56-
# Valid inputs = ["rocket", "minirocket", "multirocket"].
57-
"max_dilations_per_kernel": [
58-
16,
59-
32,
60-
64,
61-
], # MiniRocket and MultiRocket only. The maximum number of dilations per kernel.
62-
"n_features_per_kernel": [
63-
3,
64-
4,
65-
5,
66-
], # MultiRocket only. The number of features per kernel.
67-
"time_limit_in_minutes": time_limit_param, # Time contract to limit build time in minutes, overriding n_estimators. Default of 0 means n_estimators is used.
68-
"contract_max_n_estimators": [
69-
50,
70-
100,
71-
150,
72-
], # Max number of estimators when time_limit_in_minutes is set.
73-
#'save_transformed_data': [True, False], # Save the data transformed in fit for use in _get_train_probs.
74-
"n_jobs": [
75-
n_jobs_model_val
76-
], # The number of jobs to run in parallel for both fit and predict. -1 means using all processors.
77-
"random_state": [random_state_val], # Seed for random number generation.
78-
}
42+
gp = ml_grid_object.global_params
43+
test_mode = getattr(gp, "test_mode", False)
44+
if not test_mode and hasattr(gp, "__dict__"):
45+
test_mode = gp.__dict__.get("test_mode", False)
46+
47+
if test_mode:
48+
self.parameter_space = {
49+
"n_kernels": [100],
50+
"n_estimators": [2],
51+
"rocket_transform": ["rocket"],
52+
"max_dilations_per_kernel": [16],
53+
"n_features_per_kernel": [3],
54+
"time_limit_in_minutes": [0.05],
55+
"contract_max_n_estimators": [5],
56+
"n_jobs": [1],
57+
"random_state": [random_state_val],
58+
}
59+
elif ml_grid_object.global_params.bayessearch:
60+
tl_param = time_limit_param[0]
61+
if not isinstance(tl_param, (Categorical, Integer, Real)):
62+
tl_param = Categorical([tl_param])
63+
64+
self.parameter_space: Dict[str, List[Any]] = {
65+
"n_kernels": Categorical(
66+
[1000, 2000, 3000]
67+
), # Number of kernels for each ROCKET transform.
68+
"n_estimators": Categorical(
69+
[3, 5, 6]
70+
), # Number of estimators to build for the ensemble.
71+
"rocket_transform": Categorical(
72+
["rocket", "minirocket"]
73+
), # The type of Rocket transformer to use.
74+
"max_dilations_per_kernel": Categorical(
75+
[16, 32, 64]
76+
), # MiniRocket and MultiRocket only.
77+
"n_features_per_kernel": Categorical(
78+
[3, 4, 5]
79+
), # MultiRocket only. The number of features per kernel.
80+
"time_limit_in_minutes": tl_param, # Time contract to limit build time in minutes
81+
"contract_max_n_estimators": Categorical(
82+
[50, 100, 150]
83+
), # Max number of estimators when time_limit_in_minutes is set.
84+
"n_jobs": [
85+
n_jobs_model_val
86+
], # The number of jobs to run in parallel for both fit and predict.
87+
"random_state": [
88+
random_state_val
89+
], # Seed for random number generation.
90+
}
91+
else:
92+
self.parameter_space: Dict[str, List[Any]] = {
93+
"n_kernels": [
94+
1000,
95+
2000,
96+
3000,
97+
], # Number of kernels for each ROCKET transform.
98+
"n_estimators": [
99+
3,
100+
5,
101+
6,
102+
], # Number of estimators to build for the ensemble.
103+
"rocket_transform": [
104+
"rocket",
105+
"minirocket",
106+
], # The type of Rocket transformer to use.
107+
"max_dilations_per_kernel": [
108+
16,
109+
32,
110+
64,
111+
], # MiniRocket and MultiRocket only.
112+
"n_features_per_kernel": [
113+
3,
114+
4,
115+
5,
116+
], # MultiRocket only. The number of features per kernel.
117+
"time_limit_in_minutes": time_limit_param,
118+
"contract_max_n_estimators": [
119+
50,
120+
100,
121+
150,
122+
], # Max number of estimators when time_limit_in_minutes is set.
123+
"n_jobs": [n_jobs_model_val],
124+
"random_state": [random_state_val],
125+
}

ml_grid/model_classes_time_series/CNNClassifier_module.py

Lines changed: 59 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
if not hasattr(sklearn.utils.validation, "validate_data"):
77
sklearn.utils.validation.validate_data = sklearn.utils.validation.check_X_y
88

9+
from skopt.space import Categorical, Integer, Real
910
from aeon.classification.deep_learning import TimeCNNClassifier
1011

1112
from ml_grid.pipeline.data import pipe
@@ -49,24 +50,61 @@ def __init__(self, ml_grid_object: pipe):
4950
self.algorithm_implementation = TimeCNNClassifier()
5051
self.method_name = "CNNClassifier"
5152

52-
self.parameter_space = {
53-
#'n_layers': [2, 3, 4],
54-
#'kernel_size': [3, 5, 7],
55-
#'n_filters': [[6, 12], [8, 16], [10, 20]],
56-
#'avg_pool_size': [2, 3, 4],
57-
"activation": ["sigmoid", "relu"],
58-
"padding": ["valid"],
59-
#'strides': [1, 2],
60-
"dilation_rate": [1, 2],
61-
"use_bias": [True],
62-
"random_state": [random_state_val],
63-
"n_epochs": [log_epoch],
64-
"batch_size": [16, 32, 64],
65-
"verbose": [verbose_param],
66-
"loss": ["binary_crossentropy"],
67-
"metrics": ["accuracy"],
68-
#'save_best_model': [True, False],
69-
#'save_last_model': [True, False],
70-
#'best_file_name': ['best_model', 'top_model'],
71-
#'last_file_name': ['last_model', 'final_model'],
72-
}
53+
gp = ml_grid_object.global_params
54+
test_mode = getattr(gp, "test_mode", False)
55+
if not test_mode and hasattr(gp, "__dict__"):
56+
test_mode = gp.__dict__.get("test_mode", False)
57+
58+
if test_mode:
59+
self.parameter_space = {
60+
"activation": ["relu"],
61+
"padding": ["valid"],
62+
"dilation_rate": [1],
63+
"use_bias": [True],
64+
"random_state": [random_state_val],
65+
"n_epochs": [2],
66+
"batch_size": [16],
67+
"verbose": [0],
68+
"loss": ["binary_crossentropy"],
69+
"metrics": [("accuracy",)],
70+
"save_best_model": [False],
71+
"save_last_model": [False],
72+
}
73+
elif ml_grid_object.global_params.bayessearch:
74+
n_epochs_param = log_epoch
75+
if (
76+
isinstance(n_epochs_param, list)
77+
and len(n_epochs_param) >= 1
78+
and isinstance(n_epochs_param[0], (Categorical, Integer, Real))
79+
):
80+
n_epochs_param = n_epochs_param[0]
81+
82+
self.parameter_space = {
83+
"activation": Categorical(["sigmoid", "relu"]),
84+
"padding": Categorical(["valid"]),
85+
"dilation_rate": Categorical([1, 2]),
86+
"use_bias": [True],
87+
"random_state": [random_state_val],
88+
"n_epochs": n_epochs_param,
89+
"batch_size": Categorical([16, 32, 64]),
90+
"verbose": [verbose_param],
91+
"loss": Categorical(["binary_crossentropy"]),
92+
"metrics": Categorical(["accuracy"]),
93+
"save_best_model": [True],
94+
"save_last_model": [False],
95+
}
96+
else:
97+
self.parameter_space = {
98+
"activation": ["sigmoid", "relu"],
99+
"padding": ["valid"],
100+
"dilation_rate": [1, 2],
101+
"use_bias": [True],
102+
"random_state": [random_state_val],
103+
"n_epochs": log_epoch,
104+
"batch_size": [16, 32, 64],
105+
"verbose": [verbose_param],
106+
"loss": ["binary_crossentropy"],
107+
"metrics": ["accuracy"],
108+
"save_best_model": [True],
109+
"save_last_model": [False],
110+
}

ml_grid/model_classes_time_series/Catch22Classifer_module.py

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from aeon.classification.feature_based import Catch22Classifier
44
from sklearn.ensemble import RandomForestClassifier
5+
from skopt.space import Categorical
56
from sklearn.tree import DecisionTreeClassifier
67

78
from ml_grid.pipeline.data import pipe
@@ -38,16 +39,43 @@ def __init__(self, ml_grid_object: pipe):
3839
self.method_name: str = "Catch22Classifier"
3940
self.parameter_space: Dict[str, List[Any]]
4041

41-
self.parameter_space = {
42-
"features": ["all", ["DN_HistogramMode_5", "DN_HistogramMode_10"]],
43-
"catch24": [True, False],
44-
"outlier_norm": [True, False],
45-
"replace_nans": [True, False],
46-
"use_pycatch22": [True, False],
47-
"estimator": [
48-
RandomForestClassifier(n_estimators=200),
49-
DecisionTreeClassifier(),
50-
],
51-
"random_state": [random_state_val],
52-
"n_jobs": [n_jobs_model_val],
53-
}
42+
if getattr(ml_grid_object.global_params, "test_mode", False):
43+
self.parameter_space = {
44+
"features": ["all"],
45+
"estimator": [DecisionTreeClassifier(max_depth=2)],
46+
"n_jobs": [1],
47+
}
48+
return
49+
50+
if ml_grid_object.global_params.bayessearch:
51+
self.parameter_space = {
52+
"features": Categorical(
53+
["all", ("DN_HistogramMode_5", "DN_HistogramMode_10")]
54+
),
55+
"catch24": Categorical([True, False]),
56+
"outlier_norm": Categorical([True, False]),
57+
"replace_nans": Categorical([True, False]),
58+
"use_pycatch22": Categorical([True, False]),
59+
"estimator": Categorical(
60+
[
61+
RandomForestClassifier(n_estimators=200),
62+
DecisionTreeClassifier(),
63+
]
64+
),
65+
"random_state": [random_state_val],
66+
"n_jobs": [n_jobs_model_val],
67+
}
68+
else:
69+
self.parameter_space = {
70+
"features": ["all", ("DN_HistogramMode_5", "DN_HistogramMode_10")],
71+
"catch24": [True, False],
72+
"outlier_norm": [True, False],
73+
"replace_nans": [True, False],
74+
"use_pycatch22": [True, False],
75+
"estimator": [
76+
RandomForestClassifier(n_estimators=200),
77+
DecisionTreeClassifier(),
78+
],
79+
"random_state": [random_state_val],
80+
"n_jobs": [n_jobs_model_val],
81+
}

0 commit comments

Comments
 (0)