-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig_hyperopt.yml
More file actions
147 lines (137 loc) · 4.88 KB
/
config_hyperopt.yml
File metadata and controls
147 lines (137 loc) · 4.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# config_hyperopt.yml
# Configuration for a full hyperparameter optimization search using Hyperopt.
# Global settings for the hyperopt run
global_params:
verbose: 1 # Less verbose for multiple runs
error_raise: true # Don't stop the search on a single trial failure
# Set to true to see H2O's progress bars for parsing and model fitting.
h2o_show_progress: false
# Number of iterations for RandomizedSearchCV and BayesSearchCV
n_iter: 2
max_param_space_iter_value : 10
sub_sample_param_space_pct: 0.05
force_second_cv: false # If True, forces a second cross-validation run even if cached results are available. Defaults to False.
model_eval_time_limit: 3600
test_mode: true # If True, uses minimal parameter spaces and reduced cross-validation for fast testing.
# Experiment settings for the hyperopt run
experiment:
# Base directory for storing all experiment results
experiments_base_dir: "HFE_ML_experiments"
# A descriptive name for this experiment batch
additional_naming: ""
# Data settings for the hyperopt run
data:
# Path to the dataset
file_path: "synthetic_data_generated.csv"
# List of substrings to identify columns to drop
drop_term_list:
- 'chrom'
- 'hfe'
- 'phlebo'
# Set to true to iterate over all 'outcome_var_' columns in the dataset
multiple_outcomes: true
# Models to be used during the hyperopt search
models:
LogisticRegressionClass: true
KNeighborsClassifierClass: true
QuadraticDiscriminantAnalysisClass: true
SVCClass: true
XGBClassifierClass: true
MLPClassifierClass: true
RandomForestClassifierClass: true
GradientBoostingClassifierClass: true
CatBoostClassifierClass: true
GaussianNBClassifierClass: true
H2O_class: false # H2O AutoML
H2O_GBM_class: true
H2O_DRF_class: true
H2O_DeepLearning_class: true
H2O_GLM_class: true
H2O_NaiveBayes_class: true
H2O_RuleFit_class: true
H2O_XGBoost_class: true
H2O_StackedEnsemble_class: false
H2O_GAM_class: true
LightGBMClassifierWrapper: true # noqa
AdaBoostClassifierClass: true
NeuralNetworkClassifier_class: true # noqa
TabTransformerClass: false
# Set the following to true if a GPU is available and configured
kerasClassifier_class: false
knn__gpu_wrapper_class: false
# Additional
TabPFNClassifierClass: false # requires hf token and agreement
AutoGluonClassifierClass: false # disabled by default
TPOTClassifierClass: false # disabled by default
FLAMLClassifierClass: false # disabled by default
AutoKerasClassifierClass: false # disabled by default
# Time-series models to be used during the hyperopt search.
# These are only loaded if time_series_mode is enabled in the pipeline.
ts_models:
# Enabled by default
RocketClassifier: true
TimeSeriesForestClassifier: true
KNeighborsTimeSeriesClassifier: true
Catch22Classifier: true
HIVECOTEV2: false # Disabled due to internal numba typing error in aeon library
TSFreshClassifier: true
Arsenal: true
CNNClassifier: true
ElasticEnsemble: true
EncoderClassifier: true
FCNClassifier: true
FreshPRINCEClassifier: true
InceptionTimeClassifier: false # Disabled due to Keras input shape mismatch error
IndividualInceptionClassifier: false # Disabled due to Keras input shape mismatch error
IndividualTDE: true
MLPClassifier: true
MUSE: true
OrdinalTDE: true
ResNetClassifier: true
SignatureClassifier: true
SummaryClassifier: true
TemporalDictionaryEnsemble: true
# Univariate models, disabled by default
ContractableBOSS: false
HIVECOTEV1: false
# This section defines the parameter search space for Hyperopt.
# The structure uses lists of options, which will be parsed into hp.choice.
hyperopt_search_space:
resample: ['undersample', 'oversample', 'null']
scale: [true, false]
feature_n: [100, 95, 75, 50, 25, 5]
param_space_size: ['medium', 'xsmall']
percent_missing: [99, 95, 80]
correlation_threshold: [0.98, 0.85, 0.5, 0.25]
feature_selection_method: ['anova', 'markov_blanket']
outcome_var_n: ['1'] # Default, will be overridden if multiple_outcomes is true
# Embedding hyperparameters
use_embedding: [true, false]
embedding_method: ['pca', 'svd']
embedding_dim: [32, 64, 128]
scale_features_before_embedding: [true, false]
# Feature group toggles
data:
age: [true, false]
sex: [true, false]
bmi: [true]
ethnicity: [true, false]
bloods: [true, false]
diagnostic_order: [true, false]
drug_order: [true, false]
annotation_n: [true, false]
meta_sp_annotation_n: [true, false]
annotation_mrc_n: [true, false]
meta_sp_annotation_mrc_n: [true, false]
core_02: [False]
bed: [False]
vte_status: [true]
hosp_site: [true]
core_resus: [False]
news: [False]
date_time_stamp: [False]
appointments: [False]
# Hyperopt-specific settings
hyperopt_settings:
max_evals: 2 # Number of iterations per outcome variable
trial_timeout: 1120 # Timeout in seconds for a full trial (data prep + all models)