Skip to content

Commit 5aa6414

Browse files
committed
improved API documentation. More specifically, we are adding structured docstrings to the class attributes. This is a standard Python practice that allows documentation generators like Sphinx (which you are using) to automatically parse these descriptions and build a rich, detailed, and user-friendly reference for your code's public interface
1 parent 5a84ec5 commit 5aa6414

2 files changed

Lines changed: 114 additions & 4 deletions

File tree

ml_grid/pipeline/data.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,78 @@ class pipe:
3535
can be passed to classifier methods.
3636
"""
3737

38+
base_project_dir: str
39+
"""The root directory for the project, used for saving logs and models."""
40+
41+
additional_naming: Optional[str]
42+
"""An optional string to append to log folder names for better identification."""
43+
44+
local_param_dict: Dict[str, Any]
45+
"""A dictionary of parameters for this specific pipeline run."""
46+
47+
global_params: global_parameters
48+
"""A reference to the global parameters singleton instance."""
49+
50+
verbose: int
51+
"""The verbosity level for logging, inherited from global parameters."""
52+
53+
param_space_index: int
54+
"""The index of the current parameter space permutation being run."""
55+
56+
time_series_mode: bool
57+
"""A flag indicating if the pipeline is running in time-series mode."""
58+
59+
model_class_dict: Optional[Dict[str, bool]]
60+
"""A dictionary specifying which model classes to include in the run."""
61+
62+
df: pd.DataFrame
63+
"""The raw input DataFrame after being read from the source file."""
64+
65+
all_df_columns: List[str]
66+
"""A list of all column names from the original raw DataFrame."""
67+
68+
orignal_feature_names: List[str]
69+
"""A copy of the original feature names before any processing."""
70+
71+
pertubation_columns: List[str]
72+
"""A list of columns selected for inclusion based on `local_param_dict`."""
73+
74+
drop_list: List[str]
75+
"""A list of columns identified to be dropped due to various cleaning steps."""
76+
77+
outcome_variable: str
78+
"""The name of the target variable for the current pipeline run."""
79+
80+
final_column_list: List[str]
81+
"""The final list of feature columns to be used after all filtering."""
82+
83+
X: pd.DataFrame
84+
"""The feature matrix (DataFrame) after all cleaning and selection steps."""
85+
86+
y: pd.Series
87+
"""The target variable (Series) corresponding to the feature matrix `X`."""
88+
89+
X_train: pd.DataFrame
90+
"""The training feature set."""
91+
92+
X_test: pd.DataFrame
93+
"""The validation/testing feature set."""
94+
95+
y_train: pd.Series
96+
"""The training target set."""
97+
98+
y_test: pd.Series
99+
"""The validation/testing target set."""
100+
101+
X_test_orig: pd.DataFrame
102+
"""The original, held-out test set for final validation."""
103+
104+
y_test_orig: pd.Series
105+
"""The target variable for the original, held-out test set."""
106+
107+
model_class_list: List[Any]
108+
"""A list of instantiated model class objects to be evaluated in this run."""
109+
38110
def __init__(
39111
self,
40112
file_name: str,

ml_grid/util/grid_param_space_ga.py

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,48 @@
1010
class Grid:
1111
"""Generates and manages a grid of hyperparameter settings for GA experiments."""
1212

13+
global_params: global_parameters
14+
"""A reference to the global parameters singleton instance."""
15+
16+
verbose: int
17+
"""The verbosity level, inherited from global parameters."""
18+
19+
sample_n: int
20+
"""The number of random settings to sample from the full grid."""
21+
22+
grid: Dict[str, Union[List, Dict]]
23+
"""
24+
The dictionary defining the hyperparameter search space for the GA.
25+
26+
Keys represent different aspects of the experiment:
27+
- **weighted**: The weighting strategy for the ensemble.
28+
- **use_stored_base_learners**: Whether to use pre-trained base learners.
29+
- **store_base_learners**: Whether to save the base learners after training.
30+
- **resample**: The resampling strategy to handle class imbalance.
31+
- **scale**: Whether to apply standard scaling to features.
32+
- **n_features**: The number of features to use (currently 'all').
33+
- **param_space_size**: The size of the hyperparameter space for base learners.
34+
- **n_unique_out**: A parameter for future use.
35+
- **outcome_var_n**: The index of the outcome variable to use.
36+
- **div_p**: A parameter for future use.
37+
- **percent_missing**: The threshold for dropping columns with missing values.
38+
- **corr**: The threshold for dropping highly correlated features.
39+
- **cxpb**: The crossover probability for the genetic algorithm.
40+
- **mutpb**: The mutation probability for the genetic algorithm.
41+
- **indpb**: The independent probability for each attribute to be mutated.
42+
- **t_size**: The tournament size for selection in the genetic algorithm.
43+
- **data**: A nested dictionary specifying which feature categories to include.
44+
"""
45+
46+
settings_list: List[Dict]
47+
"""
48+
A list of hyperparameter combinations sampled from the `grid`. Each element
49+
is a dictionary representing one complete experimental configuration.
50+
"""
51+
52+
settings_list_iterator: it.chain
53+
"""An iterator over the `settings_list`."""
54+
1355
def __init__(self, sample_n: Optional[int] = 1000):
1456
"""Initializes the Grid object for Genetic Algorithms.
1557
@@ -110,7 +152,3 @@ def c_prod(d: Union[Dict, List]) -> Generator[Dict, None, None]:
110152
self.settings_list = random.sample(self.settings_list, sample_size)
111153

112154
self.settings_list_iterator = iter(self.settings_list)
113-
114-
# This is likely not properly functioning. Does not return iteration, instead reinitiates.
115-
# Don't need to subsample, can just generate n number of random choices from grid space.
116-
# function can just return random choice from grid space, terminate at the other end once limit reached.

0 commit comments

Comments
 (0)