Skip to content

Commit ef1ba78

Browse files
committed
add gini shapley values
1 parent 6ff86c2 commit ef1ba78

24 files changed

Lines changed: 1484 additions & 1999 deletions

examples/fastwoe_cap_curve.ipynb

Lines changed: 29 additions & 232 deletions
Large diffs are not rendered by default.

examples/fastwoe_example.py

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -485,13 +485,9 @@ def visualize_woe_analysis(df, woe_encoder):
485485
# Plot 4: WOE vs Event Rate correlation
486486
ax4 = axes[1, 0]
487487
all_woe = np.concatenate([region_mapping["woe"], job_mapping["woe"]])
488-
all_rates = np.concatenate(
489-
[region_mapping["event_rate"], job_mapping["event_rate"]]
490-
)
488+
all_rates = np.concatenate([region_mapping["event_rate"], job_mapping["event_rate"]])
491489

492-
ax4.scatter(
493-
all_rates, all_woe, alpha=0.7, s=100, color="purple", edgecolors="black"
494-
)
490+
ax4.scatter(all_rates, all_woe, alpha=0.7, s=100, color="purple", edgecolors="black")
495491
ax4.set_xlabel("Event Rate")
496492
ax4.set_ylabel("WOE Value")
497493
ax4.set_title("WOE vs Event Rate", fontweight="bold", fontsize=12)
@@ -522,9 +518,7 @@ def visualize_woe_analysis(df, woe_encoder):
522518
theoretical_x = np.linspace(min(all_counts), max(all_counts), 100)
523519
# Approximate theoretical SE (assuming balanced good/bad split)
524520
theoretical_y = np.sqrt(2 / theoretical_x)
525-
ax5.plot(
526-
theoretical_x, theoretical_y, "r--", alpha=0.7, label="Theoretical (balanced)"
527-
)
521+
ax5.plot(theoretical_x, theoretical_y, "r--", alpha=0.7, label="Theoretical (balanced)")
528522
ax5.legend()
529523

530524
# Plot 6: Z-scores (statistical significance)
@@ -543,9 +537,7 @@ def visualize_woe_analysis(df, woe_encoder):
543537
alpha=0.7,
544538
edgecolor="black",
545539
)
546-
ax6.axhline(
547-
y=1.96, color="red", linestyle="--", alpha=0.7, label="95% significance"
548-
)
540+
ax6.axhline(y=1.96, color="red", linestyle="--", alpha=0.7, label="95% significance")
549541
ax6.axhline(y=-1.96, color="red", linestyle="--", alpha=0.7)
550542
ax6.set_title("Statistical Significance (Z-scores)", fontweight="bold", fontsize=12)
551543
ax6.set_ylabel("Z-score")

examples/fastwoe_explanation.ipynb

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -902,9 +902,7 @@
902902
"explanation = explainer.explain_ci(X_test, sample_idx=idx, true_labels=y_test)\n",
903903
"\n",
904904
"# With custom confidence level\n",
905-
"explanation = explainer.explain_ci(\n",
906-
" X_test, sample_idx=idx, true_labels=y_test, alpha=0.05\n",
907-
") # 99% CI\n",
905+
"explanation = explainer.explain_ci(X_test, sample_idx=idx, true_labels=y_test, alpha=0.05) # 99% CI\n",
908906
"\n",
909907
"# Pretty print format\n",
910908
"explainer.explain_ci(X_test, sample_idx=idx, true_labels=y_test, return_dict=False)"

examples/fastwoe_faiss_kmeans.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,7 @@ def demonstrate_faiss_kmeans():
7272
print(f"Target distribution: {y.value_counts().to_dict()}")
7373

7474
# Split data
75-
X_train, X_test, y_train, y_test = train_test_split(
76-
X, y, test_size=0.3, random_state=42
77-
)
75+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
7876

7977
print(f"\nTraining data shape: {X_train.shape}")
8078
print(f"Test data shape: {X_test.shape}")
@@ -126,9 +124,7 @@ def compare_with_kbins():
126124

127125
# Create sample data
128126
X, y = create_sample_data(n_samples=2000, n_features=4)
129-
X_train, X_test, y_train, y_test = train_test_split(
130-
X, y, test_size=0.3, random_state=42
131-
)
127+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
132128

133129
woe_kbins = FastWoe(
134130
binning_method="kbins",
@@ -165,9 +161,7 @@ def compare_performance():
165161

166162
# Create sample data
167163
X, y = create_sample_data(n_samples=5000, n_features=6)
168-
X_train, X_test, y_train, y_test = train_test_split(
169-
X, y, test_size=0.3, random_state=42
170-
)
164+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
171165

172166
methods = {
173167
"FAISS KMeans": FastWoe(

examples/fastwoe_monotonic.py

Lines changed: 6 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,7 @@ def create_credit_scoring_data(n_samples=2000, random_state=42):
2323
np.random.seed(random_state)
2424

2525
# Generate base features
26-
income = np.random.lognormal(
27-
mean=10, sigma=0.5, size=n_samples
28-
) # Income in thousands
26+
income = np.random.lognormal(mean=10, sigma=0.5, size=n_samples) # Income in thousands
2927
age = np.random.normal(35, 12, n_samples)
3028
age = np.clip(age, 18, 80) # Reasonable age range
3129
credit_score = np.random.normal(650, 100, n_samples)
@@ -116,9 +114,7 @@ def compare_binning_methods(X, y):
116114
print("-" * len(method_name))
117115

118116
try:
119-
woe = FastWoe(
120-
monotonic_cst=monotonic_cst, numerical_threshold=10, **method_config
121-
)
117+
woe = FastWoe(monotonic_cst=monotonic_cst, numerical_threshold=10, **method_config)
122118

123119
woe.fit(X, y)
124120

@@ -143,9 +139,7 @@ def compare_binning_methods(X, y):
143139
print("📋 Constraints applied:")
144140
for _, row in summary.iterrows():
145141
constraint_map = {-1: "Decreasing", 1: "Increasing", 0: "None"}
146-
print(
147-
f" {row['feature']}: {constraint_map[row['monotonic_constraint']]}"
148-
)
142+
print(f" {row['feature']}: {constraint_map[row['monotonic_constraint']]}")
149143

150144
except ImportError as e:
151145
if "faiss" not in str(e).lower():
@@ -223,9 +217,7 @@ def compare_kbins_strategies(X, y):
223217
print("📋 Constraints applied:")
224218
for _, row in summary.iterrows():
225219
constraint_map = {-1: "Decreasing", 1: "Increasing", 0: "None"}
226-
print(
227-
f" {row['feature']}: {constraint_map[row['monotonic_constraint']]}"
228-
)
220+
print(f" {row['feature']}: {constraint_map[row['monotonic_constraint']]}")
229221

230222
except (ValueError, RuntimeError, AttributeError) as e:
231223
print(f"❌ Error with {strategy_name}: {e}")
@@ -414,16 +406,8 @@ def print_detailed_woe_analysis(results, X, y):
414406
sorted_woe = np.array(woe_values)[sorted_indices]
415407
sorted_rates = np.array(event_rates)[sorted_indices]
416408

417-
for center, woe_val, rate in zip(
418-
sorted_centers, sorted_woe, sorted_rates
419-
):
420-
print(
421-
f"{center:8.1f}"
422-
+ " " * 20
423-
+ f"{woe_val:8.3f}"
424-
+ " " * 8
425-
+ f"{rate:.3f}"
426-
)
409+
for center, woe_val, rate in zip(sorted_centers, sorted_woe, sorted_rates):
410+
print(f"{center:8.1f}" + " " * 20 + f"{woe_val:8.3f}" + " " * 8 + f"{rate:.3f}")
427411

428412

429413
def main():

examples/fastwoe_multiclass.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -133,11 +133,7 @@ def main(): # sourcery skip: extract-duplicate-method
133133

134134
print("\n5. Model performance:")
135135
print("\nClassification Report:")
136-
print(
137-
classification_report(
138-
y_test, y_pred, target_names=[f"Class {i}" for i in range(3)]
139-
)
140-
)
136+
print(classification_report(y_test, y_pred, target_names=[f"Class {i}" for i in range(3)]))
141137

142138
print("\nConfusion Matrix:")
143139
print(confusion_matrix(y_test, y_pred))
@@ -151,9 +147,7 @@ def main(): # sourcery skip: extract-duplicate-method
151147
print(feature_importance.head(10).to_string(index=False))
152148

153149
# Show WOE mappings for most important feature
154-
print(
155-
f"\n7. WOE mapping for most important feature: {feature_importance.iloc[0]['feature']}"
156-
)
150+
print(f"\n7. WOE mapping for most important feature: {feature_importance.iloc[0]['feature']}")
157151
most_important_feature = feature_importance.iloc[0]["feature"]
158152

159153
# Extract original feature name and class
@@ -189,9 +183,7 @@ def main(): # sourcery skip: extract-duplicate-method
189183
# High-confidence predictions for Class 2
190184
class_2_ci = woe_encoder.predict_ci_class(X_test, class_label=2)
191185
high_confidence_mask = class_2_ci[:, 0] > 0.3 # Lower bound > 0.3
192-
print(
193-
f"Samples with high confidence of being Class 2: {high_confidence_mask.sum()}"
194-
)
186+
print(f"Samples with high confidence of being Class 2: {high_confidence_mask.sum()}")
195187

196188
# Uncertain predictions (wide CI)
197189
ci_widths = class_0_ci_method[:, 1] - class_0_ci_method[:, 0]

examples/fastwoe_styled_display.ipynb

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1335,9 +1335,7 @@
13351335
")\n",
13361336
"def show_comparison():\n",
13371337
" results = []\n",
1338-
" results.extend(\n",
1339-
" {\"Model\": name, \"Test Gini\": gini} for name, gini in zip(model_names, ginis)\n",
1340-
" )\n",
1338+
" results.extend({\"Model\": name, \"Test Gini\": gini} for name, gini in zip(model_names, ginis))\n",
13411339
" return pd.DataFrame(results)\n",
13421340
"\n",
13431341
"\n",

examples/fastwoe_tree.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,7 @@ def create_sample_data(n_samples=1000, random_state=42):
3636
# Create categorical feature
3737
X_cat = np.random.choice(["A", "B", "C", "D"], n_samples, p=[0.4, 0.3, 0.2, 0.1])
3838

39-
return pd.DataFrame(
40-
{"numerical_feature": X_num, "categorical_feature": X_cat, "target": y}
41-
)
39+
return pd.DataFrame({"numerical_feature": X_num, "categorical_feature": X_cat, "target": y})
4240

4341

4442
def compare_binning_methods(data): # sourcery skip: extract-duplicate-method
@@ -61,9 +59,7 @@ def compare_binning_methods(data): # sourcery skip: extract-duplicate-method
6159
warn_on_numerical=False,
6260
)
6361

64-
fw_traditional.fit(
65-
data[["numerical_feature", "categorical_feature"]], data["target"]
66-
)
62+
fw_traditional.fit(data[["numerical_feature", "categorical_feature"]], data["target"])
6763

6864
# Get binning summary
6965
summary_traditional = fw_traditional.get_binning_summary()
@@ -113,11 +109,7 @@ def analyze_woe_mappings(fw_traditional, fw_tree):
113109
mapping_tree = fw_tree.get_mapping("numerical_feature")
114110

115111
print("\nTraditional Binning WOE Mapping:")
116-
print(
117-
mapping_traditional[["category", "count", "event_rate", "woe", "woe_se"]].round(
118-
4
119-
)
120-
)
112+
print(mapping_traditional[["category", "count", "event_rate", "woe", "woe_se"]].round(4))
121113

122114
print("\nTree Binning WOE Mapping:")
123115
print(mapping_tree[["category", "count", "event_rate", "woe", "woe_se"]].round(4))

examples/fastwoe_visualize_woe.ipynb

Lines changed: 40 additions & 40 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)