improved API documentation. More specifically, we are adding structured docstrings to the class attributes. This is a standard Python practice that allows documentation generators like Sphinx (which you are using) to automatically parse these descriptions and build a rich, detailed, and user-friendly reference for your code's public interface

SamoraHunter · SamoraHunter · commit 5aa641477f22 · 2025-09-16T18:56:44.000+01:00
diff --git a/ml_grid/pipeline/data.py b/ml_grid/pipeline/data.py
@@ -35,6 +35,78 @@ class pipe:
     can be passed to classifier methods.
     """
 
+    base_project_dir: str
+    """The root directory for the project, used for saving logs and models."""
+
+    additional_naming: Optional[str]
+    """An optional string to append to log folder names for better identification."""
+
+    local_param_dict: Dict[str, Any]
+    """A dictionary of parameters for this specific pipeline run."""
+
+    global_params: global_parameters
+    """A reference to the global parameters singleton instance."""
+
+    verbose: int
+    """The verbosity level for logging, inherited from global parameters."""
+
+    param_space_index: int
+    """The index of the current parameter space permutation being run."""
+
+    time_series_mode: bool
+    """A flag indicating if the pipeline is running in time-series mode."""
+
+    model_class_dict: Optional[Dict[str, bool]]
+    """A dictionary specifying which model classes to include in the run."""
+
+    df: pd.DataFrame
+    """The raw input DataFrame after being read from the source file."""
+
+    all_df_columns: List[str]
+    """A list of all column names from the original raw DataFrame."""
+
+    orignal_feature_names: List[str]
+    """A copy of the original feature names before any processing."""
+
+    pertubation_columns: List[str]
+    """A list of columns selected for inclusion based on `local_param_dict`."""
+
+    drop_list: List[str]
+    """A list of columns identified to be dropped due to various cleaning steps."""
+
+    outcome_variable: str
+    """The name of the target variable for the current pipeline run."""
+
+    final_column_list: List[str]
+    """The final list of feature columns to be used after all filtering."""
+
+    X: pd.DataFrame
+    """The feature matrix (DataFrame) after all cleaning and selection steps."""
+
+    y: pd.Series
+    """The target variable (Series) corresponding to the feature matrix `X`."""
+
+    X_train: pd.DataFrame
+    """The training feature set."""
+
+    X_test: pd.DataFrame
+    """The validation/testing feature set."""
+
+    y_train: pd.Series
+    """The training target set."""
+
+    y_test: pd.Series
+    """The validation/testing target set."""
+
+    X_test_orig: pd.DataFrame
+    """The original, held-out test set for final validation."""
+
+    y_test_orig: pd.Series
+    """The target variable for the original, held-out test set."""
+
+    model_class_list: List[Any]
+    """A list of instantiated model class objects to be evaluated in this run."""
+
     def __init__(
         self,
         file_name: str,
diff --git a/ml_grid/util/grid_param_space_ga.py b/ml_grid/util/grid_param_space_ga.py
@@ -10,6 +10,48 @@
 class Grid:
     """Generates and manages a grid of hyperparameter settings for GA experiments."""
 
+    global_params: global_parameters
+    """A reference to the global parameters singleton instance."""
+
+    verbose: int
+    """The verbosity level, inherited from global parameters."""
+
+    sample_n: int
+    """The number of random settings to sample from the full grid."""
+
+    grid: Dict[str, Union[List, Dict]]
+    """
+    The dictionary defining the hyperparameter search space for the GA.
+
+    Keys represent different aspects of the experiment:
+    - **weighted**: The weighting strategy for the ensemble.
+    - **use_stored_base_learners**: Whether to use pre-trained base learners.
+    - **store_base_learners**: Whether to save the base learners after training.
+    - **resample**: The resampling strategy to handle class imbalance.
+    - **scale**: Whether to apply standard scaling to features.
+    - **n_features**: The number of features to use (currently 'all').
+    - **param_space_size**: The size of the hyperparameter space for base learners.
+    - **n_unique_out**: A parameter for future use.
+    - **outcome_var_n**: The index of the outcome variable to use.
+    - **div_p**: A parameter for future use.
+    - **percent_missing**: The threshold for dropping columns with missing values.
+    - **corr**: The threshold for dropping highly correlated features.
+    - **cxpb**: The crossover probability for the genetic algorithm.
+    - **mutpb**: The mutation probability for the genetic algorithm.
+    - **indpb**: The independent probability for each attribute to be mutated.
+    - **t_size**: The tournament size for selection in the genetic algorithm.
+    - **data**: A nested dictionary specifying which feature categories to include.
+    """
+
+    settings_list: List[Dict]
+    """
+    A list of hyperparameter combinations sampled from the `grid`. Each element
+    is a dictionary representing one complete experimental configuration.
+    """
+
+    settings_list_iterator: it.chain
+    """An iterator over the `settings_list`."""
+
     def __init__(self, sample_n: Optional[int] = 1000):
         """Initializes the Grid object for Genetic Algorithms.
 
@@ -110,7 +152,3 @@ def c_prod(d: Union[Dict, List]) -> Generator[Dict, None, None]:
         self.settings_list = random.sample(self.settings_list, sample_size)
 
         self.settings_list_iterator = iter(self.settings_list)
-
-        # This is likely not properly functioning. Does not return iteration, instead reinitiates.
-        # Don't need to subsample, can just generate n number of random choices from grid space.
-        # function can just return random choice from grid space, terminate at the other end once limit reached.