Skip to content

Commit c8a81d8

Browse files
author
SamoraHunter
committed
Fix plotting failures and robustify data cleaning
Addressed issues causing empty plots and crashes during result visualization: - `ml_grid/results_processing/core.py`: Updated `get_clean_data` and `validate_data_structure` to robustly handle mixed data types in the 'failed' column (e.g., "Timeout" strings vs. 0/False). Updated `get_outcome_summary` to use this robust cleaning logic. - `ml_grid/results_processing/plot_algorithms.py`: - Removed an erroneous `.reset_index()` call on a Seaborn heatmap object (which returns an Axes, not a DataFrame). - Added a check for empty stability data to prevent crashes when calculating standard deviation for algorithms with only a single run. - `ml_grid/results_processing/filters.py`: Replaced brittle `data["failed"] == 0` checks with `get_clean_data()` to ensure failed runs are correctly filtered across all analysis methods.
1 parent 05266b1 commit c8a81d8

4 files changed

Lines changed: 29 additions & 10 deletions

File tree

ml_grid/results_processing/core.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,7 @@ def get_outcome_summary(self, data: Optional[pd.DataFrame] = None) -> pd.DataFra
416416
available_metrics = [col for col in metrics if col in data.columns]
417417

418418
# Clean data (remove failed runs)
419-
clean_data = data[data["failed"] == 0] if "failed" in data.columns else data
419+
clean_data = get_clean_data(data)
420420

421421
# Group by outcome variable and calculate summary stats
422422
outcome_summary = (
@@ -479,7 +479,10 @@ def validate_data_structure(df: pd.DataFrame) -> Dict[str, Any]:
479479

480480
# Check for data quality issues
481481
if "failed" in df.columns:
482-
failed_count = (df["failed"] == 1).sum()
482+
# Robust check for failures handling mixed types (str/bool/int)
483+
failed_as_str = df["failed"].astype(str).str.lower()
484+
success_values = ["false", "0", "0.0"]
485+
failed_count = (~failed_as_str.isin(success_values)).sum()
483486
if failed_count > 0:
484487
validation_report["data_quality_issues"].append(
485488
f"{failed_count} failed runs detected"
@@ -558,7 +561,11 @@ def get_clean_data(df: pd.DataFrame, remove_failed: bool = True) -> pd.DataFrame
558561
pd.DataFrame: The cleaned DataFrame.
559562
"""
560563
if remove_failed and "failed" in df.columns:
561-
return df[df["failed"] == 0].copy()
564+
# Robustly identify failed rows handling mixed types (bool, int, str)
565+
# We consider the row failed if it is NOT explicitly False, 0, "False", or "0"
566+
failed_as_str = df["failed"].astype(str).str.lower()
567+
success_values = ["false", "0", "0.0"]
568+
return df[failed_as_str.isin(success_values)].copy()
562569
return df.copy()
563570

564571

ml_grid/results_processing/filters.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -504,9 +504,8 @@ def get_outcome_characteristics(self) -> pd.DataFrame:
504504

505505
# Basic counts
506506
char_dict["total_experiments"] = len(outcome_data)
507-
char_dict["successful_experiments"] = len(
508-
outcome_data[outcome_data["failed"] == 0]
509-
)
507+
successful_data = get_clean_data(outcome_data)
508+
char_dict["successful_experiments"] = len(successful_data)
510509
char_dict["success_rate"] = (
511510
char_dict["successful_experiments"] / char_dict["total_experiments"]
512511
)
@@ -518,7 +517,6 @@ def get_outcome_characteristics(self) -> pd.DataFrame:
518517
char_dict[char] = outcome_data[char].median()
519518

520519
# Performance characteristics
521-
successful_data = outcome_data[outcome_data["failed"] == 0]
522520
if len(successful_data) > 0:
523521
for metric in ["auc", "f1", "precision", "recall", "accuracy"]:
524522
if metric in successful_data.columns:
@@ -551,7 +549,7 @@ def find_similar_outcomes(
551549
similarity_metrics = ["auc", "f1"]
552550

553551
# Get successful runs only
554-
successful_data = self.data[self.data["failed"] == 0]
552+
successful_data = get_clean_data(self.data)
555553

556554
# Get reference outcome performance
557555
ref_data = successful_data[

ml_grid/results_processing/plot_algorithms.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -535,9 +535,18 @@ def plot_algorithm_stability(
535535
stability = (
536536
self.clean_data.groupby("method_name")[metric]
537537
.std()
538+
.dropna()
538539
.sort_values(ascending=True)
539540
)
540541

542+
if stability.empty:
543+
warnings.warn(
544+
f"No stability data available for metric '{metric}'. "
545+
"This usually happens if each algorithm was run only once (std is undefined).",
546+
stacklevel=2,
547+
)
548+
return
549+
541550
# Select top N most stable
542551
stability = stability.head(top_n)
543552

@@ -560,7 +569,8 @@ def plot_algorithm_stability(
560569
)
561570
ax.set_ylabel("Algorithm", fontsize=12)
562571

563-
ax.bar_label(ax.containers[0], fmt="%.4f", padding=3)
572+
if ax.containers:
573+
ax.bar_label(ax.containers[0], fmt="%.4f", padding=3)
564574
plt.tight_layout()
565575
plt.show()
566576

@@ -813,6 +823,6 @@ def plot_statistical_significance_heatmap(
813823
cmap="coolwarm_r",
814824
center=0.05,
815825
cbar_kws={"label": "P-value"},
816-
).reset_index()
826+
)
817827
plt.title(title, fontsize=14, fontweight="bold")
818828
plt.show()

ml_grid/util/project_score_save.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,10 @@ def update_score_log(
192192
y_test_np = y_test.values if hasattr(y_test, "values") else y_test
193193
best_pred_np = best_pred_orig.values if hasattr(best_pred_orig, "values") else best_pred_orig
194194

195+
# Ensure 1D arrays to prevent shape mismatch errors
196+
y_test_np = np.ravel(y_test_np)
197+
best_pred_np = np.ravel(best_pred_np)
198+
195199
# Attempt to convert to integers (e.g. "0"/"1" strings from H2O) for faster np.unique
196200
try:
197201
y_test_np = y_test_np.astype(int)

0 commit comments

Comments
 (0)