Skip to content

Commit bc82da4

Browse files
committed
additional code and documentation formatting.
1 parent 6a0a25c commit bc82da4

19 files changed

Lines changed: 262 additions & 40 deletions

athena/active.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class ActiveSubspaces(Subspaces):
3131
Hristache, et al.
3232
:param int n_boot: number of bootstrap samples. Default is 100.
3333
"""
34+
3435
def __init__(self, dim, method='exact', n_boot=100):
3536
super().__init__(dim, method, n_boot)
3637

@@ -313,13 +314,13 @@ def _hit_and_run_inactive(self, reduced_input, n_points):
313314
f, g = b - np.dot(A, z0), np.dot(A, d)
314315

315316
# find an upper bound on the step
316-
min_ind = np.logical_and(g <= 0,
317-
f < -np.sqrt(np.finfo(np.float64).eps))
317+
min_ind = np.logical_and(g <= 0, f
318+
< -np.sqrt(np.finfo(np.float64).eps))
318319
eps_max = np.amin(f[min_ind] / g[min_ind])
319320

320321
# find a lower bound on the step
321-
max_ind = np.logical_and(g > 0,
322-
f < -np.sqrt(np.finfo(np.float64).eps))
322+
max_ind = np.logical_and(g > 0, f
323+
< -np.sqrt(np.finfo(np.float64).eps))
323324
eps_min = np.amax(f[max_ind] / g[max_ind])
324325

325326
# randomly sample eps

athena/compatibility.py

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
"""
2+
Compatibility layer for handling different package versions.
3+
This module provides uniform interfaces for functionality that might
4+
depend on specific versions of packages or alternative implementations.
5+
"""
6+
import numpy as np
7+
import warnings
8+
from packaging import version
9+
10+
# Check if scikit-learn-extra's KMedoids is usable
11+
# with the current NumPy version
12+
SKLEARN_EXTRA_AVAILABLE = False
13+
try:
14+
import sklearn_extra
15+
from sklearn_extra.cluster import KMedoids as SklearnExtraKMedoids
16+
SKLEARN_EXTRA_AVAILABLE = True
17+
18+
# Check if NumPy version is compatible with sklearn_extra
19+
if version.parse(np.__version__) >= version.parse('2.0.0'):
20+
warnings.warn(
21+
"You are using NumPy >= 2.0.0 with scikit-learn-extra which may "
22+
"cause compatibility issues. If you encounter errors, consider "
23+
"using the built-in KMedoids implementation in ATHENA.")
24+
except ImportError:
25+
SklearnExtraKMedoids = None
26+
27+
28+
# Implementation based on scikit-learn's KMeans but adapted for KMedoids
29+
class KMedoids:
30+
"""
31+
K-Medoids clustering.
32+
33+
A custom implementation that doesn't rely on scikit-learn-extra, thus
34+
ensuring compatibility with NumPy 2.0+.
35+
36+
Parameters
37+
----------
38+
n_clusters : int, default=8
39+
The number of clusters to form as well as the number of medoids to generate.
40+
41+
init : {'k-medoids++', 'random'} or array of shape (n_clusters, n_features), default='k-medoids++'
42+
Method for initialization.
43+
44+
max_iter : int, default=300
45+
Maximum number of iterations of the k-medoids algorithm for a single run.
46+
47+
random_state : int, RandomState instance or None, default=None
48+
Determines random number generation for centroid initialization.
49+
"""
50+
51+
def __init__(self,
52+
n_clusters=8,
53+
init='k-medoids++',
54+
max_iter=300,
55+
random_state=None):
56+
self.n_clusters = n_clusters
57+
self.init = init
58+
self.max_iter = max_iter
59+
self.random_state = random_state
60+
self.cluster_centers_ = None
61+
self.labels_ = None
62+
self.inertia_ = None
63+
self.n_iter_ = 0
64+
65+
def _init_medoids(self, X):
66+
"""Initialize the medoids."""
67+
rng = np.random.RandomState(self.random_state)
68+
n_samples = X.shape[0]
69+
70+
if isinstance(self.init, str) and self.init == 'random':
71+
# Random selection
72+
indices = rng.permutation(n_samples)[:self.n_clusters]
73+
self.cluster_centers_ = X[indices].copy()
74+
elif isinstance(self.init, str) and self.init == 'k-medoids++':
75+
# Implementation of k-medoids++ initialization
76+
# Choose the first medoid randomly
77+
indices = np.zeros(self.n_clusters, dtype=int)
78+
indices[0] = rng.randint(n_samples)
79+
80+
# Calculate distances to the first medoid
81+
distances = np.sum((X - X[indices[0]])**2, axis=1)
82+
83+
# Choose remaining medoids
84+
for i in range(1, self.n_clusters):
85+
# Choose point with probability proportional to distance squared
86+
probs = distances / np.sum(distances)
87+
indices[i] = rng.choice(n_samples, p=probs)
88+
89+
# Update distances
90+
new_dist = np.sum((X - X[indices[i]])**2, axis=1)
91+
distances = np.minimum(distances, new_dist)
92+
93+
self.cluster_centers_ = X[indices].copy()
94+
else:
95+
# Use provided initial medoids
96+
self.cluster_centers_ = np.asarray(self.init, dtype=X.dtype)
97+
98+
def fit(self, X):
99+
"""Compute k-medoids clustering."""
100+
X = np.asarray(X)
101+
self._init_medoids(X)
102+
103+
best_labels = None
104+
best_inertia = float('inf')
105+
best_centers = None
106+
107+
for i in range(self.max_iter):
108+
# Assign each point to closest medoid
109+
distances = np.zeros((X.shape[0], self.n_clusters))
110+
for j in range(self.n_clusters):
111+
distances[:, j] = np.sum((X - self.cluster_centers_[j])**2,
112+
axis=1)
113+
114+
labels = np.argmin(distances, axis=1)
115+
116+
# Update medoids
117+
old_centers = self.cluster_centers_.copy()
118+
119+
# For each cluster, update medoid to be the point minimizing inertia
120+
for j in range(self.n_clusters):
121+
cluster_points = X[labels == j]
122+
if len(cluster_points) > 0:
123+
# Compute pairwise distances within cluster
124+
inertias = np.zeros(len(cluster_points))
125+
for k, point in enumerate(cluster_points):
126+
inertias[k] = np.sum(
127+
np.sum((cluster_points - point)**2, axis=1))
128+
129+
# Choose point with minimal inertia as new medoid
130+
min_idx = np.argmin(inertias)
131+
self.cluster_centers_[j] = cluster_points[min_idx].copy()
132+
133+
# Compute inertia
134+
inertia = 0
135+
for j in range(self.n_clusters):
136+
cluster_points = X[labels == j]
137+
if len(cluster_points) > 0:
138+
inertia += np.sum(
139+
np.sum((cluster_points - self.cluster_centers_[j])**2,
140+
axis=1))
141+
142+
# Store best result
143+
if inertia < best_inertia:
144+
best_inertia = inertia
145+
best_labels = labels
146+
best_centers = self.cluster_centers_.copy()
147+
148+
# Check for convergence
149+
center_shift = np.sum(
150+
np.sqrt(np.sum((old_centers - self.cluster_centers_)**2,
151+
axis=1)))
152+
if center_shift < 1e-4:
153+
break
154+
155+
self.labels_ = best_labels
156+
self.cluster_centers_ = best_centers
157+
self.inertia_ = best_inertia
158+
self.n_iter_ = i + 1
159+
160+
return self
161+
162+
def predict(self, X):
163+
"""Predict the closest cluster for each sample in X."""
164+
X = np.asarray(X)
165+
distances = np.zeros((X.shape[0], self.n_clusters))
166+
for j in range(self.n_clusters):
167+
distances[:, j] = np.sum((X - self.cluster_centers_[j])**2, axis=1)
168+
169+
return np.argmin(distances, axis=1)
170+
171+
172+
# Export the appropriate KMedoids implementation
173+
if SKLEARN_EXTRA_AVAILABLE and version.parse(
174+
np.__version__) < version.parse('2.0.0'):
175+
# Use sklearn-extra's implementation when available and NumPy < 2.0
176+
KMedoids = SklearnExtraKMedoids
177+
# Otherwise use our implementation which is compatible with NumPy 2.0+

athena/feature_map.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ class FeatureMap():
3939
4040
:raises TypeError
4141
"""
42+
4243
def __init__(self, distr, bias, input_dim, n_features, params, sigma_f):
4344
if callable(distr):
4445
self.distr = distr
@@ -202,9 +203,11 @@ def tune_pr_matrix(self,
202203
# Reformat bounds for BayesianOptimization package format
203204
# BayesianOptimization uses a dictionary of parameter names and their range tuples
204205
# Unlike GPyOpt which used a list of dictionaries with 'name', 'type', and 'domain' keys
205-
bounds_dict = {f'var_{i}': (bound.start, bound.stop)
206-
for i, bound in enumerate(bounds)}
207-
206+
bounds_dict = {
207+
f'var_{i}': (bound.start, bound.stop)
208+
for i, bound in enumerate(bounds)
209+
}
210+
208211
# Create wrapper for the objective function to handle the format difference
209212
# BayesianOptimization passes parameters as keyword arguments, not as an array
210213
def bayes_wrapper(**kwargs):
@@ -213,25 +216,24 @@ def bayes_wrapper(**kwargs):
213216
# BayesianOptimization maximizes functions by default, but we want to minimize
214217
# So we negate the score (lower scores are better in our original function)
215218
return -func(x, best, **fn_args)
216-
219+
217220
# Initialize optimizer with our wrapper function and parameter bounds
218221
optimizer = BayesianOptimization(
219222
f=bayes_wrapper,
220223
pbounds=bounds_dict,
221224
random_state=42 # For reproducible results
222225
)
223-
226+
224227
# Run optimization
225228
# init_points: how many steps of random exploration to perform
226229
# n_iter: how many steps of bayesian optimization to perform
227-
optimizer.maximize(
228-
init_points=2,
229-
n_iter=maxiter
230-
)
231-
230+
optimizer.maximize(init_points=2, n_iter=maxiter)
231+
232232
# Extract the best parameters found and transform back
233233
# optimizer.max contains the best score and parameters found
234-
best_params = [optimizer.max['params'][f'var_{i}'] for i in range(len(bounds))]
234+
best_params = [
235+
optimizer.max['params'][f'var_{i}'] for i in range(len(bounds))
236+
]
235237
# Apply 10^ transformation as done in the original implementation
236238
self.params = 10**np.array(best_params)
237239
else:

athena/kas.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ class KernelActiveSubspaces(Subspaces):
5555
:cvar numpy.ndarray metric: metric matrix for vectorial active
5656
subspaces.
5757
"""
58+
5859
def __init__(self,
5960
dim,
6061
feature_map=None,

athena/local.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ class MaximumASDimensionReached(Exception):
3636
class ClusterBase():
3737
"""Local Active Subspaces clustering Base class.
3838
"""
39+
3940
def __init__(self):
4041

4142
self.inputs = None
@@ -210,6 +211,7 @@ def plot_clusters(self, save=False, title='2d_clusters', plot=True):
210211

211212
class KMeansAS(ClusterBase):
212213
"""Clustering with k-means"""
214+
213215
def __init__(self):
214216
super().__init__()
215217
self.centers = None
@@ -231,6 +233,7 @@ def _fit_clustering(self):
231233

232234
class KMedoidsAS(ClusterBase):
233235
"""Clustering with k-medoids"""
236+
234237
def __init__(self):
235238
super().__init__()
236239
self.centers = None
@@ -257,6 +260,7 @@ def as_metric(self, X, Y):
257260

258261

259262
class TopDownHierarchicalAS(ClusterBase):
263+
260264
def __init__(self):
261265
"""TODO check states logic.
262266
1. 2 and 4 are exclusives
@@ -402,7 +406,9 @@ def _fit_clustering(self, print_states=False, plot=False):
402406

403407
def refine_one_step(self):
404408
"""Increase the dimension of the Active Subspace once, when possible."""
409+
405410
class LeafUpdate(object):
411+
406412
def __init__(self):
407413
self.score = 0
408414
self.leaves_list = []
@@ -437,6 +443,7 @@ def refine_further(self, minimum_score, plot=False):
437443
print("Start refining: increasing the as dimension when possible.")
438444

439445
class CallRefine(object):
446+
440447
def __init__(self, minimum_score):
441448
self.min = minimum_score
442449

@@ -474,7 +481,9 @@ def _print_state_debug(self):
474481

475482
def _print_leaves_score(self):
476483
"""Print the information of every leaf."""
484+
477485
class ComputeScore(object):
486+
478487
def __init__(self):
479488
self.n_leaves = 0
480489
self.leaves_dim = []
@@ -495,7 +504,9 @@ def __call__(self, node):
495504

496505
def assign_leaf_labels(self):
497506
"""Assign integer labels to the leaves."""
507+
498508
class LeafLabels(object):
509+
499510
def __init__(self):
500511
self.labels_counter = 0
501512

@@ -508,6 +519,7 @@ def __call__(self, node):
508519

509520
def reset_gprs(self):
510521
"""Reset the GPRs of every leaf and root."""
522+
511523
def reset_gpr(node):
512524
node.gpr = None
513525
node.ss = None
@@ -519,7 +531,9 @@ def plot_clusters(self,
519531
save_data=True,
520532
plot=True,
521533
save=True):
534+
522535
class SaveLeafInfo(object):
536+
523537
def __init__(self):
524538
self.n_leaves = 0
525539
self.n_elems = []
@@ -691,6 +705,7 @@ def void_func(*args, **kwargs):
691705

692706

693707
class TopDownNode():
708+
694709
def __init__(self, parent, node_indexes, val_indexes, tree_obj):
695710
"""A TopDownNode is defined by the indexes of the triplets (inputs,
696711
outputs, gradients) of the training data and the parent node. The root
@@ -822,6 +837,7 @@ def refine_further(self, minimum_score):
822837
class NormalizeDivisive():
823838
"""Inner class for normalization of inputs, gradients w.r.t. local
824839
clusters"""
840+
825841
def __init__(self, norm_type, ind, inputs):
826842
self.type = norm_type
827843

@@ -978,7 +994,8 @@ def refine_cluster(self):
978994
return state, self.children
979995

980996
# check if clustering is possible
981-
if self.ind.shape[0] < self.hierarchical.total_clusters + n_clusters:
997+
if self.ind.shape[
998+
0] < self.hierarchical.total_clusters + n_clusters:
982999
state.add(5)
9831000
_log.debug("Refine returns 5 : " + str(state) +
9841001
" and list length " + str(len(self.children)))

athena/local_classification.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
class SpectralClassification(metaclass=abc.ABCMeta):
3434
"""Evaluate the connected components from X, n_neighbours, features and custom
3535
distance that must be defined in concrete class."""
36+
3637
def __init__(self):
3738
self.X = None
3839
self.features = None
@@ -158,6 +159,7 @@ class ClassifyAS(SpectralClassification):
158159
the AS dimension of the n_neighbours neighbouring samples with a resampling
159160
of neighbour_resampling. The local_as_criterion can be 'min' or 'average'
160161
over the batches of neighbouring samples."""
162+
161163
def __init__(self):
162164
super().__init__()
163165

0 commit comments

Comments
 (0)