Skip to content

Commit e40d63a

Browse files
committed
add parallel flowSOM notebook
1 parent 1477e24 commit e40d63a

10 files changed

Lines changed: 404 additions & 21 deletions

File tree

docs/api.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,9 @@ For more background information, see the paper for this software package {cite:p
6363
:toctree: generated
6464
6565
models.FlowSOMEstimator
66+
models.BatchFlowSOMEstimator
6667
models.SOMEstimator
68+
models.BatchSOMEstimator
6769
models.ConsensusCluster
6870
models.BaseClusterEstimator
6971
models.BaseFlowSOMEstimator

docs/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
:maxdepth: 1
88
99
notebooks/example
10+
notebooks/parallel
1011
api.md
1112
changelog.md
1213
contributing.md

docs/notebooks/parallel.ipynb

Lines changed: 369 additions & 0 deletions
Large diffs are not rendered by default.

src/flowsom/models/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,6 @@
22
from .base_cluster_estimator import BaseClusterEstimator # isort:skip
33
from .som_estimator import SOMEstimator # isort:skip
44
from .base_flowsom_estimator import BaseFlowSOMEstimator # isort:skip
5-
from .consensus_cluster import ConsensusCluster
6-
from .flowsom_estimator import FlowSOMEstimator
5+
from .consensus_cluster import ConsensusCluster # isort:skip
6+
from .flowsom_estimator import FlowSOMEstimator # isort:skip
7+
from .batch_flowsom_estimator import BatchFlowSOMEstimator # isort:skip
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
from ._som import SOM_Batch, map_data_to_codes # isort:skip
2-
from .som_estimator import SOMEstimator_batch_init # isort:skip
2+
from .som_estimator import BatchSOMEstimator # isort:skip

src/flowsom/models/batch/_som.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def SOM_Batch(
4343
radii: tuple,
4444
ncodes: int,
4545
rlen: int,
46-
nr_batches: int = 10,
46+
num_batches: int = 10,
4747
distf: Callable[[np.ndarray, np.ndarray], float] = eucl_without_sqrt,
4848
seed=None,
4949
):
@@ -57,7 +57,7 @@ def SOM_Batch(
5757
radii (tuple): The radii.
5858
ncodes (int): The number of codes.
5959
rlen (int): The number of iterations.
60-
nr_batches (int): The number of batches.
60+
num_batches (int): The number of batches.
6161
distf (function): The distance function.
6262
seed (int): The seed for the random number generator.
6363
@@ -82,7 +82,7 @@ def SOM_Batch(
8282
treshold_step = (radii[0] - radii[1]) / niter
8383

8484
# Keep the temporary codes, using the given codes as the initial codes, for every batch
85-
tmp_codes_all = np.empty((nr_batches, ncodes, px), dtype=np.float64)
85+
tmp_codes_all = np.empty((num_batches, ncodes, px), dtype=np.float64)
8686

8787
# Copy the codes as a float64, because the codes are updated in the algorithm
8888
copy_codes = codes.copy().astype(np.float64)
@@ -117,14 +117,14 @@ def SOM_Batch(
117117
init_threshold -= treshold_step
118118

119119
# Choose random data points, for the different batches, and the rlen iterations
120-
data_points_random = np.random.choice(n, nr_batches * rlen * n, replace=True)
120+
data_points_random = np.random.choice(n, num_batches * rlen * n, replace=True)
121121

122122
# Decrease the number of iterations, because the first iterations are already done
123123
rlen = int(rlen / 2)
124124

125125
for iteration in range(rlen):
126126
# Execute the batches in parallel
127-
for batch_nr in prange(nr_batches):
127+
for batch_nr in prange(num_batches):
128128
# Keep the temporary codes, using the given codes as the initial codes
129129
tmp_codes = copy_codes.copy()
130130

src/flowsom/models/batch/som_estimator.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from . import SOM_Batch, map_data_to_codes
99

1010

11-
class SOMEstimator_batch_init(BaseClusterEstimator):
11+
class BatchSOMEstimator(BaseClusterEstimator):
1212
"""Estimate a Self-Organizing Map (SOM) clustering model."""
1313

1414
def __init__(
@@ -23,6 +23,7 @@ def __init__(
2323
map=True,
2424
codes=None,
2525
importance=None,
26+
num_batches=10,
2627
seed=None,
2728
):
2829
super().__init__()
@@ -36,6 +37,7 @@ def __init__(
3637
self.map = map
3738
self.codes = codes
3839
self.importance = importance
40+
self.num_batches = num_batches
3941
self.seed = seed
4042

4143
# Core of the algorithm, where the SOM is executed
@@ -109,15 +111,15 @@ def fit(
109111
alpha = [tuple(alpha[i : i + 2]) for i in range(mst)]
110112

111113
# Define the number of batches
112-
nr_batches = 10
114+
num_batches = self.num_batches
113115

114116
# Split the data for the different batches, where batch with number 0 contains datapoint 0, batch_size, 2*batch_size, ...
115117
data = []
116-
for i in range(nr_batches):
117-
data.append(X[i::nr_batches, :])
118+
for i in range(num_batches):
119+
data.append(X[i::num_batches, :])
118120

119121
# Make sure all the batches have the same amount of data, if not add the last data point to the last batch
120-
for i in range(nr_batches):
122+
for i in range(num_batches):
121123
if data[i].shape[0] < data[0].shape[0]:
122124
data[i] = np.vstack([data[i], X[-1, :]])
123125

@@ -132,7 +134,7 @@ def fit(
132134
ncodes=n_codes,
133135
rlen=self.rlen,
134136
seed=self.seed,
135-
nr_batches=nr_batches,
137+
num_batches=num_batches,
136138
)
137139
if mst != 1:
138140
nhbrdist: list[list[int]] = _dist_mst(codes)
@@ -159,7 +161,7 @@ def fit_predict(self, X, y=None):
159161
return self.labels_
160162

161163

162-
def _dist_mst(codes):
164+
def _dist_mst(codes) -> list[list[int]]:
163165
adjacency = cdist(
164166
codes,
165167
codes,

src/flowsom/models/batch_flowsom_estimator.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
1-
from . import BaseFlowSOMEstimator, ConsensusCluster # isort:skip
2-
from .batch import SOMEstimator_batch_init # isort:skip
1+
from . import BaseFlowSOMEstimator, ConsensusCluster # isort:skip
2+
from .batch import BatchSOMEstimator # isort:skip
33

44

55
class BatchFlowSOMEstimator(BaseFlowSOMEstimator):
66
"""A class that implements the FlowSOM model."""
77

88
def __init__(
99
self,
10-
cluster_model=SOMEstimator_batch_init,
10+
cluster_model=BatchSOMEstimator,
1111
metacluster_model=ConsensusCluster,
1212
**kwargs,
1313
):

src/flowsom/models/pyflowsom_estimator.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
from . import PyFlowSOM_SOMEstimator
21
from . import BaseFlowSOMEstimator, ConsensusCluster
2+
from .pyFlowSOM_som_estimator import PyFlowSOM_SOMEstimator
33

44

55
class PyFlowSOMEstimator(BaseFlowSOMEstimator):
@@ -11,7 +11,6 @@ def __init__(
1111
metacluster_model=ConsensusCluster,
1212
**kwargs,
1313
):
14-
"""Initialize the FlowSOMEstimator object."""
1514
super().__init__(
1615
cluster_model=cluster_model,
1716
metacluster_model=metacluster_model,

tests/models/test_pyFlowSOM.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,21 @@
1+
import pytest
12
from sklearn.metrics import v_measure_score
23

3-
from flowsom.models import PyFlowSOMEstimator
4+
# optional import if pyFlowSOM is installed, otherwise use regular FlowSOM for type checking
5+
try:
6+
from flowsom.models.pyflowsom_estimator import PyFlowSOMEstimator
7+
except ImportError:
8+
from flowsom.models import FlowSOMEstimator as PyFlowSOMEstimator
49

510

11+
@pytest.importorskip("flowsom.models.pyflowsom_estimator")
612
def test_clustering(X):
713
fsom = PyFlowSOMEstimator(n_clusters=10)
814
y_pred = fsom.fit_predict(X)
915
assert y_pred.shape == (100,)
1016

1117

18+
@pytest.importorskip("flowsom.models.pyflowsom_estimator")
1219
def test_clustering_v_measure(X_and_y):
1320
som = PyFlowSOMEstimator(n_clusters=10)
1421
X, y_true = X_and_y
@@ -17,6 +24,7 @@ def test_clustering_v_measure(X_and_y):
1724
assert score > 0.7
1825

1926

27+
@pytest.importorskip("flowsom.models.pyflowsom_estimator")
2028
def test_reproducibility_no_seed(X):
2129
fsom_1 = PyFlowSOMEstimator(n_clusters=10)
2230
fsom_2 = PyFlowSOMEstimator(n_clusters=10)
@@ -26,6 +34,7 @@ def test_reproducibility_no_seed(X):
2634
assert not all(y_pred_1 == y_pred_2)
2735

2836

37+
@pytest.importorskip("flowsom.models.pyflowsom_estimator")
2938
def test_reproducibility_seed(X):
3039
fsom_1 = PyFlowSOMEstimator(n_clusters=10, seed=0)
3140
fsom_2 = PyFlowSOMEstimator(n_clusters=10, seed=0)

0 commit comments

Comments
 (0)