performance modifications

SamoraHunter · SamoraHunter · commit ecc889f6c646 · 2025-09-03T15:49:34.000+01:00
diff --git a/ml_grid/results_processing/plot_algorithms.py b/ml_grid/results_processing/plot_algorithms.py
@@ -11,6 +11,10 @@
 from scipy.stats import ttest_ind
 from typing import List, Dict, Optional, Union, Tuple
 from ml_grid.results_processing.core import get_clean_data
+import warnings
+
+# Maximum number of outcomes to display in stratified plots to avoid clutter.
+MAX_OUTCOMES_FOR_STRATIFIED_PLOT = 20
 
 
 class AlgorithmComparisonPlotter:
@@ -99,6 +103,15 @@ def _plot_stratified_algorithm_boxplots(self, metric: str, algorithms_to_plot: L
             raise ValueError("outcome_variable column not found for stratification")
         
         outcomes = outcomes_to_plot or sorted(self.clean_data['outcome_variable'].unique())
+        if len(outcomes) > MAX_OUTCOMES_FOR_STRATIFIED_PLOT:
+            warnings.warn(
+                f"Found {len(outcomes)} outcomes, which is more than the display limit of {MAX_OUTCOMES_FOR_STRATIFIED_PLOT}. "
+                f"Displaying the first {MAX_OUTCOMES_FOR_STRATIFIED_PLOT}. "
+                "Use the 'outcomes_to_plot' parameter to select specific outcomes.",
+                stacklevel=2
+            )
+            outcomes = outcomes[:MAX_OUTCOMES_FOR_STRATIFIED_PLOT]
+
         n_outcomes = len(outcomes)
         
         # Calculate subplot layout
@@ -130,8 +143,9 @@ def _plot_stratified_algorithm_boxplots(self, metric: str, algorithms_to_plot: L
                     if len(algo_data) > 0:
                         mean_val = algo_data.mean()
                         ax.scatter(j, mean_val, color='red', s=60, marker='D', zorder=10)
-                
-                ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
+
+                ax.tick_params(axis='x', rotation=45)
+                plt.setp(ax.get_xticklabels(), ha='right')
                 ax.set_title(f'{outcome}\n{metric.upper()}', fontsize=11, fontweight='bold')
                 ax.set_xlabel('Algorithm' if i >= len(outcomes) - cols else '')
                 ax.set_ylabel(metric.upper() if i % cols == 0 else '')
@@ -277,6 +291,15 @@ def _plot_stratified_ranking(self, metric: str, algorithms_to_plot: List[str],
                                  outcomes_to_plot: List[str], top_n: int, figsize: Tuple[int, int]):
         """Plot stratified ranking bar charts by outcome."""
         outcomes = outcomes_to_plot or sorted(self.clean_data['outcome_variable'].unique())
+        if len(outcomes) > MAX_OUTCOMES_FOR_STRATIFIED_PLOT:
+            warnings.warn(
+                f"Found {len(outcomes)} outcomes, which is more than the display limit of {MAX_OUTCOMES_FOR_STRATIFIED_PLOT}. "
+                f"Displaying the first {MAX_OUTCOMES_FOR_STRATIFIED_PLOT}. "
+                "Use the 'outcomes_to_plot' parameter to select specific outcomes.",
+                stacklevel=2
+            )
+            outcomes = outcomes[:MAX_OUTCOMES_FOR_STRATIFIED_PLOT]
+
         n_outcomes = len(outcomes)
         
         cols = min(2, n_outcomes)
diff --git a/ml_grid/results_processing/plot_distributions.py b/ml_grid/results_processing/plot_distributions.py
@@ -13,7 +13,7 @@
 from ml_grid.results_processing.core import get_clean_data, stratify_by_outcome
 
 # Maximum number of outcomes to display in stratified plots to avoid clutter.
-MAX_OUTCOMES_FOR_STRATIFIED_PLOT = 10
+MAX_OUTCOMES_FOR_STRATIFIED_PLOT = 20
 MAX_OUTCOMES_FOR_HEATMAP = 25
 
 class DistributionPlotter:
@@ -112,7 +112,7 @@ def _plot_stratified_distributions(self, metrics: List[str], figsize: Tuple[int,
             raise ValueError("outcome_variable column not found for stratification")
         
         outcomes = outcomes_to_plot or sorted(self.clean_data['outcome_variable'].unique())
-        if len(outcomes) > MAX_OUTCOMES_FOR_STRATIFIED_PLOT and outcomes_to_plot is None:
+        if len(outcomes) > MAX_OUTCOMES_FOR_STRATIFIED_PLOT:
             warnings.warn(
                 f"Found {len(outcomes)} outcomes, which is more than the display limit of {MAX_OUTCOMES_FOR_STRATIFIED_PLOT}. "
                 f"Displaying the first {MAX_OUTCOMES_FOR_STRATIFIED_PLOT}. "
@@ -194,7 +194,7 @@ def plot_comparative_distributions(self, metric: str = 'auc',
         
         outcomes = outcomes_to_compare or sorted(self.clean_data['outcome_variable'].unique())
 
-        if len(outcomes) > MAX_OUTCOMES_FOR_STRATIFIED_PLOT and outcomes_to_compare is None:
+        if len(outcomes) > MAX_OUTCOMES_FOR_STRATIFIED_PLOT:
             warnings.warn(
                 f"Found {len(outcomes)} outcomes, which is more than the display limit of {MAX_OUTCOMES_FOR_STRATIFIED_PLOT}. "
                 f"Displaying the first {MAX_OUTCOMES_FOR_STRATIFIED_PLOT}. "
@@ -453,7 +453,7 @@ def plot_metric_correlation_by_outcome(data: pd.DataFrame,
     available_metrics = [col for col in metrics if col in clean_data.columns]
     
     outcomes = outcomes_to_plot or sorted(clean_data['outcome_variable'].unique())
-    if len(outcomes) > MAX_OUTCOMES_FOR_STRATIFIED_PLOT and outcomes_to_plot is None:
+    if len(outcomes) > MAX_OUTCOMES_FOR_STRATIFIED_PLOT:
         warnings.warn(
             f"Found {len(outcomes)} outcomes, which is more than the display limit of {MAX_OUTCOMES_FOR_STRATIFIED_PLOT}. "
             f"Displaying the first {MAX_OUTCOMES_FOR_STRATIFIED_PLOT}. "