-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig_single_run.yml
More file actions
97 lines (91 loc) · 2.73 KB
/
config_single_run.yml
File metadata and controls
97 lines (91 loc) · 2.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# config_single_run.yml
# Configuration for a single, deterministic pipeline run for debugging and testing.
# Global settings for logging and error handling
global_params:
verbose: 2
# Verbosity for the scikit-learn search object (GridSearchCV, etc.). 0 is silent.
search_verbose: 0
error_raise: true
# Set to true to see H2O's progress bars for parsing and model fitting.
h2o_show_progress: false
# Experiment settings
experiment:
# Base directory for storing all experiment results, relative to project root
experiments_base_dir: "experiments"
# Optional suffix for the experiment directory name
additional_naming: "SingleRunTest"
# Data pipeline settings
data:
# Path to the dataset, relative to the notebook's directory
file_path: "synthetic_data_generated.csv"
# List of substrings to identify columns to drop
drop_term_list:
- 'chrom'
- 'hfe'
- 'phlebo'
# Override the outcome variable. Set to null or remove to use 'outcome_var_n' from run_params.
outcome_var_override: 'outcome_var_1'
# Model settings
models:
# This dictionary toggles which model classes will be loaded for the single run.
LogisticRegression: true
RandomForestClassifier: true
XGB_class: false
H2O_class: false # H2O AutoML
H2O_GBM_class: true
H2O_DRF_class: true
H2O_DeepLearning_class: true
H2O_GLM_class: true
H2O_NaiveBayes_class: true
H2O_RuleFit_class: true
H2O_XGBoost_class: true
H2O_StackedEnsemble_class: false
H2O_GAM_class: true
knn__gpu_wrapper_class: false
TabPFNClassifierClass: false # requires hf token and agreement
AutoGluonClassifierClass: false
TPOTClassifierClass: false
FLAMLClassifierClass: false
AutoKerasClassifierClass: false
# Time-series models for a single run.
# These are only loaded if time_series_mode is enabled in the pipeline.
ts_models:
RocketClassifier: true
TimeSeriesForestClassifier: false
KNeighborsTimeSeriesClassifier: false
# This section defines a single set of parameters for a standalone run.
run_params:
outcome_var_n: 1
param_space_size: 'xsmall'
scale: true
feature_n: 70
use_embedding: false
embedding_method: 'pca'
embedding_dim: 10
scale_features_before_embedding: true
percent_missing: 80
corr: 0.9
feature_selection_method: 'anova'
test_size: 0.2
random_state: 42
# The 'data' sub-dictionary toggles feature groups on and off.
data:
age: true
sex: true
bmi: true
ethnicity: true
bloods: true
diagnostic_order: true
drug_order: true
annotation_n: true
meta_sp_annotation_n: true
annotation_mrc_n: true
meta_sp_annotation_mrc_n: true
core_02: true
bed: true
vte_status: true
hosp_site: true
core_resus: true
news: true
date_time_stamp: false
appointments: false