xRiskLab
diff --git a/‎examples/fastwoe_cap_curve.ipynb‎
Lines changed: 29 additions & 232 deletions b/‎examples/fastwoe_cap_curve.ipynb‎
Lines changed: 29 additions & 232 deletions
diff --git a/‎examples/fastwoe_example.py‎
Lines changed: 4 additions & 12 deletions b/‎examples/fastwoe_example.py‎
Lines changed: 4 additions & 12 deletions
diff --git a/‎examples/fastwoe_explanation.ipynb‎
Lines changed: 1 addition & 3 deletions b/‎examples/fastwoe_explanation.ipynb‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎examples/fastwoe_faiss_kmeans.py‎
Lines changed: 3 additions & 9 deletions b/‎examples/fastwoe_faiss_kmeans.py‎
Lines changed: 3 additions & 9 deletions
diff --git a/‎examples/fastwoe_monotonic.py‎
Lines changed: 6 additions & 22 deletions b/‎examples/fastwoe_monotonic.py‎
Lines changed: 6 additions & 22 deletions
diff --git a/‎examples/fastwoe_multiclass.py‎
Lines changed: 3 additions & 11 deletions b/‎examples/fastwoe_multiclass.py‎
Lines changed: 3 additions & 11 deletions
diff --git a/‎examples/fastwoe_styled_display.ipynb‎
Lines changed: 1 addition & 3 deletions b/‎examples/fastwoe_styled_display.ipynb‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎examples/fastwoe_tree.py‎
Lines changed: 3 additions & 11 deletions b/‎examples/fastwoe_tree.py‎
Lines changed: 3 additions & 11 deletions
diff --git a/‎examples/fastwoe_visualize_woe.ipynb‎
Lines changed: 40 additions & 40 deletions b/‎examples/fastwoe_visualize_woe.ipynb‎
Lines changed: 40 additions & 40 deletions
@@ -485,13 +485,9 @@ def visualize_woe_analysis(df, woe_encoder):
     # Plot 4: WOE vs Event Rate correlation
     ax4 = axes[1, 0]
     all_woe = np.concatenate([region_mapping["woe"], job_mapping["woe"]])
-    all_rates = np.concatenate(
-        [region_mapping["event_rate"], job_mapping["event_rate"]]
-    )
+    all_rates = np.concatenate([region_mapping["event_rate"], job_mapping["event_rate"]])
 
-    ax4.scatter(
-        all_rates, all_woe, alpha=0.7, s=100, color="purple", edgecolors="black"
-    )
+    ax4.scatter(all_rates, all_woe, alpha=0.7, s=100, color="purple", edgecolors="black")
     ax4.set_xlabel("Event Rate")
     ax4.set_ylabel("WOE Value")
     ax4.set_title("WOE vs Event Rate", fontweight="bold", fontsize=12)
@@ -522,9 +518,7 @@ def visualize_woe_analysis(df, woe_encoder):
     theoretical_x = np.linspace(min(all_counts), max(all_counts), 100)
     # Approximate theoretical SE (assuming balanced good/bad split)
     theoretical_y = np.sqrt(2 / theoretical_x)
-    ax5.plot(
-        theoretical_x, theoretical_y, "r--", alpha=0.7, label="Theoretical (balanced)"
-    )
+    ax5.plot(theoretical_x, theoretical_y, "r--", alpha=0.7, label="Theoretical (balanced)")
     ax5.legend()
 
     # Plot 6: Z-scores (statistical significance)
@@ -543,9 +537,7 @@ def visualize_woe_analysis(df, woe_encoder):
         alpha=0.7,
         edgecolor="black",
     )
-    ax6.axhline(
-        y=1.96, color="red", linestyle="--", alpha=0.7, label="95% significance"
-    )
+    ax6.axhline(y=1.96, color="red", linestyle="--", alpha=0.7, label="95% significance")
     ax6.axhline(y=-1.96, color="red", linestyle="--", alpha=0.7)
     ax6.set_title("Statistical Significance (Z-scores)", fontweight="bold", fontsize=12)
     ax6.set_ylabel("Z-score")
 
@@ -902,9 +902,7 @@
     "explanation = explainer.explain_ci(X_test, sample_idx=idx, true_labels=y_test)\n",
     "\n",
     "# With custom confidence level\n",
-    "explanation = explainer.explain_ci(\n",
-    "    X_test, sample_idx=idx, true_labels=y_test, alpha=0.05\n",
-    ")  # 99% CI\n",
+    "explanation = explainer.explain_ci(X_test, sample_idx=idx, true_labels=y_test, alpha=0.05)  # 99% CI\n",
     "\n",
     "# Pretty print format\n",
     "explainer.explain_ci(X_test, sample_idx=idx, true_labels=y_test, return_dict=False)"
 
@@ -72,9 +72,7 @@ def demonstrate_faiss_kmeans():
     print(f"Target distribution: {y.value_counts().to_dict()}")
 
     # Split data
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=0.3, random_state=42
-    )
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
 
     print(f"\nTraining data shape: {X_train.shape}")
     print(f"Test data shape: {X_test.shape}")
@@ -126,9 +124,7 @@ def compare_with_kbins():
 
     # Create sample data
     X, y = create_sample_data(n_samples=2000, n_features=4)
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=0.3, random_state=42
-    )
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
 
     woe_kbins = FastWoe(
         binning_method="kbins",
@@ -165,9 +161,7 @@ def compare_performance():
 
     # Create sample data
     X, y = create_sample_data(n_samples=5000, n_features=6)
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=0.3, random_state=42
-    )
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
 
     methods = {
         "FAISS KMeans": FastWoe(
 
@@ -23,9 +23,7 @@ def create_credit_scoring_data(n_samples=2000, random_state=42):
     np.random.seed(random_state)
 
     # Generate base features
-    income = np.random.lognormal(
-        mean=10, sigma=0.5, size=n_samples
-    )  # Income in thousands
+    income = np.random.lognormal(mean=10, sigma=0.5, size=n_samples)  # Income in thousands
     age = np.random.normal(35, 12, n_samples)
     age = np.clip(age, 18, 80)  # Reasonable age range
     credit_score = np.random.normal(650, 100, n_samples)
@@ -116,9 +114,7 @@ def compare_binning_methods(X, y):
         print("-" * len(method_name))
 
         try:
-            woe = FastWoe(
-                monotonic_cst=monotonic_cst, numerical_threshold=10, **method_config
-            )
+            woe = FastWoe(monotonic_cst=monotonic_cst, numerical_threshold=10, **method_config)
 
             woe.fit(X, y)
 
@@ -143,9 +139,7 @@ def compare_binning_methods(X, y):
             print("📋 Constraints applied:")
             for _, row in summary.iterrows():
                 constraint_map = {-1: "Decreasing", 1: "Increasing", 0: "None"}
-                print(
-                    f"   {row['feature']}: {constraint_map[row['monotonic_constraint']]}"
-                )
+                print(f"   {row['feature']}: {constraint_map[row['monotonic_constraint']]}")
 
         except ImportError as e:
             if "faiss" not in str(e).lower():
@@ -223,9 +217,7 @@ def compare_kbins_strategies(X, y):
             print("📋 Constraints applied:")
             for _, row in summary.iterrows():
                 constraint_map = {-1: "Decreasing", 1: "Increasing", 0: "None"}
-                print(
-                    f"   {row['feature']}: {constraint_map[row['monotonic_constraint']]}"
-                )
+                print(f"   {row['feature']}: {constraint_map[row['monotonic_constraint']]}")
 
         except (ValueError, RuntimeError, AttributeError) as e:
             print(f"❌ Error with {strategy_name}: {e}")
@@ -414,16 +406,8 @@ def print_detailed_woe_analysis(results, X, y):
                 sorted_woe = np.array(woe_values)[sorted_indices]
                 sorted_rates = np.array(event_rates)[sorted_indices]
 
-                for center, woe_val, rate in zip(
-                    sorted_centers, sorted_woe, sorted_rates
-                ):
-                    print(
-                        f"{center:8.1f}"
-                        + " " * 20
-                        + f"{woe_val:8.3f}"
-                        + " " * 8
-                        + f"{rate:.3f}"
-                    )
+                for center, woe_val, rate in zip(sorted_centers, sorted_woe, sorted_rates):
+                    print(f"{center:8.1f}" + " " * 20 + f"{woe_val:8.3f}" + " " * 8 + f"{rate:.3f}")
 
 
 def main():
 
@@ -133,11 +133,7 @@ def main():  # sourcery skip: extract-duplicate-method
 
     print("\n5. Model performance:")
     print("\nClassification Report:")
-    print(
-        classification_report(
-            y_test, y_pred, target_names=[f"Class {i}" for i in range(3)]
-        )
-    )
+    print(classification_report(y_test, y_pred, target_names=[f"Class {i}" for i in range(3)]))
 
     print("\nConfusion Matrix:")
     print(confusion_matrix(y_test, y_pred))
@@ -151,9 +147,7 @@ def main():  # sourcery skip: extract-duplicate-method
     print(feature_importance.head(10).to_string(index=False))
 
     # Show WOE mappings for most important feature
-    print(
-        f"\n7. WOE mapping for most important feature: {feature_importance.iloc[0]['feature']}"
-    )
+    print(f"\n7. WOE mapping for most important feature: {feature_importance.iloc[0]['feature']}")
     most_important_feature = feature_importance.iloc[0]["feature"]
 
     # Extract original feature name and class
@@ -189,9 +183,7 @@ def main():  # sourcery skip: extract-duplicate-method
     # High-confidence predictions for Class 2
     class_2_ci = woe_encoder.predict_ci_class(X_test, class_label=2)
     high_confidence_mask = class_2_ci[:, 0] > 0.3  # Lower bound > 0.3
-    print(
-        f"Samples with high confidence of being Class 2: {high_confidence_mask.sum()}"
-    )
+    print(f"Samples with high confidence of being Class 2: {high_confidence_mask.sum()}")
 
     # Uncertain predictions (wide CI)
     ci_widths = class_0_ci_method[:, 1] - class_0_ci_method[:, 0]
 
@@ -1335,9 +1335,7 @@
     ")\n",
     "def show_comparison():\n",
     "    results = []\n",
-    "    results.extend(\n",
-    "        {\"Model\": name, \"Test Gini\": gini} for name, gini in zip(model_names, ginis)\n",
-    "    )\n",
+    "    results.extend({\"Model\": name, \"Test Gini\": gini} for name, gini in zip(model_names, ginis))\n",
     "    return pd.DataFrame(results)\n",
     "\n",
     "\n",
 
@@ -36,9 +36,7 @@ def create_sample_data(n_samples=1000, random_state=42):
     # Create categorical feature
     X_cat = np.random.choice(["A", "B", "C", "D"], n_samples, p=[0.4, 0.3, 0.2, 0.1])
 
-    return pd.DataFrame(
-        {"numerical_feature": X_num, "categorical_feature": X_cat, "target": y}
-    )
+    return pd.DataFrame({"numerical_feature": X_num, "categorical_feature": X_cat, "target": y})
 
 
 def compare_binning_methods(data):  # sourcery skip: extract-duplicate-method
@@ -61,9 +59,7 @@ def compare_binning_methods(data):  # sourcery skip: extract-duplicate-method
         warn_on_numerical=False,
     )
 
-    fw_traditional.fit(
-        data[["numerical_feature", "categorical_feature"]], data["target"]
-    )
+    fw_traditional.fit(data[["numerical_feature", "categorical_feature"]], data["target"])
 
     # Get binning summary
     summary_traditional = fw_traditional.get_binning_summary()
@@ -113,11 +109,7 @@ def analyze_woe_mappings(fw_traditional, fw_tree):
     mapping_tree = fw_tree.get_mapping("numerical_feature")
 
     print("\nTraditional Binning WOE Mapping:")
-    print(
-        mapping_traditional[["category", "count", "event_rate", "woe", "woe_se"]].round(
-            4
-        )
-    )
+    print(mapping_traditional[["category", "count", "event_rate", "woe", "woe_se"]].round(4))
 
     print("\nTree Binning WOE Mapping:")
     print(mapping_tree[["category", "count", "event_rate", "woe", "woe_se"]].round(4))