Skip to content

Commit 3885f85

Browse files
rmse -> distance correlation (#811)
* rmse -> distance correlation * import stats Former-commit-id: 8dd58b6
1 parent 091bfb8 commit 3885f85

3 files changed

Lines changed: 21 additions & 22 deletions

File tree

openproblems/tasks/dimensionality_reduction/README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,12 @@ data for visualization and interpretation.
2323

2424
## The metrics
2525

26-
* **Root mean square error**: the square root of the mean squared difference between
26+
* **Distance correlation**: the Spearman correlation between
2727
ground truth distances in the high-dimensional data and Euclidean distances in the
28-
dimension-reduced data, invariant to scalar multiplication. *RMSE* computes
29-
high-dimensional distances in Euclidean space, while *RMSE (spectral)* computes
30-
[diffusion distances](http://dx.doi.org/10.1016/j.acha.2006.04.006) (i.e. Euclidean
31-
distances on the [Laplacian Eigenmap](http://dx.doi.org/10.1162/089976603321780317)).
28+
dimension-reduced data, invariant to scalar multiplication. *Distance correlation*
29+
computes high-dimensional distances in Euclidean space, while *Distance correlation
30+
(spectral)* computes [diffusion distances](http://dx.doi.org/10.1016/j.acha.2006.04.006)
31+
(i.e. Euclidean distances on the [Laplacian Eigenmap](http://dx.doi.org/10.1162/089976603321780317)).
3232
* **Trustworthiness**: a measurement of similarity between the rank of each point's
3333
nearest neighbors in the high-dimensional data and the reduced data ([Venna & Kaski,
3434
2001](https://openproblems.bio/bibliography#venna2001neighborhood)).
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
from .density import density_preservation
2+
from .distance_correlation import distance_correlation
3+
from .distance_correlation import distance_correlation_spectral
24
from .nn_ranking import continuity
35
from .nn_ranking import lcmc
46
from .nn_ranking import qglobal
57
from .nn_ranking import qlocal
68
from .nn_ranking import qnn
79
from .nn_ranking import qnn_auc
8-
from .root_mean_square_error import rmse
9-
from .root_mean_square_error import rmse_spectral
1010
from .trustworthiness import trustworthiness

openproblems/tasks/dimensionality_reduction/metrics/root_mean_square_error.py renamed to openproblems/tasks/dimensionality_reduction/metrics/distance_correlation.py

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,23 @@
22
from ....tools.normalize import log_cpm
33

44

5-
def _rmse(X, X_emb):
6-
import scipy.optimize
5+
def _distance_correlation(X, X_emb):
76
import scipy.spatial
7+
import scipy.stats
88

99
high_dimensional_distance_vector = scipy.spatial.distance.pdist(X)
1010
low_dimensional_distance_vector = scipy.spatial.distance.pdist(X_emb)
11-
_, rmse = scipy.optimize.nnls(
12-
low_dimensional_distance_vector[:, None], high_dimensional_distance_vector
13-
)
14-
return rmse
11+
return scipy.stats.spearmanr(
12+
low_dimensional_distance_vector, high_dimensional_distance_vector
13+
)[0]
1514

1615

1716
@metric(
18-
metric_name="RMSE",
19-
maximize=False,
20-
paper_reference="kruskal1964mds",
17+
metric_name="Distance correlation",
18+
maximize=True,
19+
paper_reference="schober2018correlation",
2120
)
22-
def rmse(adata, n_svd=200):
21+
def distance_correlation(adata, n_svd=200):
2322
"""Calculate the root mean squared error.
2423
2524
Computes (RMSE) between the full (or processed) data matrix and the
@@ -30,15 +29,15 @@ def rmse(adata, n_svd=200):
3029
adata = log_cpm(adata)
3130

3231
X = sklearn.decomposition.TruncatedSVD(n_svd).fit_transform(adata.X)
33-
return _rmse(X, adata.obsm["X_emb"])
32+
return _distance_correlation(X, adata.obsm["X_emb"])
3433

3534

3635
@metric(
37-
metric_name="RMSE (spectral)",
38-
maximize=False,
36+
metric_name="Distance correlation (spectral)",
37+
maximize=True,
3938
paper_reference="coifman2006diffusion",
4039
)
41-
def rmse_spectral(adata, n_comps=200):
40+
def distance_correlation_spectral(adata, n_comps=200):
4241
"""Calculate the spectral root mean squared error
4342
4443
Computes (RMSE) between high-dimensional Laplacian eigenmaps on the full (or
@@ -57,4 +56,4 @@ def rmse_spectral(adata, n_comps=200):
5756
X = umap.spectral.spectral_layout(
5857
adata.X, graph, n_comps, random_state=np.random.default_rng()
5958
)
60-
return _rmse(X, adata.obsm["X_emb"])
59+
return _distance_correlation(X, adata.obsm["X_emb"])

0 commit comments

Comments
 (0)