Skip to content

Commit fb6c921

Browse files
CPM -> CP10k (#812)
* CPM -> CP10k * fix in docs * bad diff Former-commit-id: 0a0e902
1 parent 5b2633b commit fb6c921

31 files changed

Lines changed: 200 additions & 200 deletions

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ docker run \
334334
openproblems-cli test \
335335
--task label_projection \
336336
--dataset zebrafish_labs \
337-
--method logistic_regression_log_cpm \
337+
--method logistic_regression_log_cp10k \
338338
--metric f1
339339
```
340340

openproblems/api/README.md

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ For example:
3939
# Download a task-specific dataset and save it to `dataset.h5ad`
4040
openproblems-cli load --task label_projection --output dataset.h5ad pancreas_batch
4141
# Run a method on a datasets and save output to `method.h5ad`
42-
openproblems-cli run --task label_projection --input dataset.h5ad --output method.h5ad logistic_regression_log_cpm
42+
openproblems-cli run --task label_projection --input dataset.h5ad --output method.h5ad logistic_regression_log_cp10k
4343
# Evaluate the performance of a previously run method using the `accuracy` metric
4444
openproblems-cli evaluate --task label_projection --input method.h5ad accuracy
4545
```
@@ -67,11 +67,11 @@ zebrafish_labs
6767
zebrafish_random
6868

6969
> openproblems-cli list --methods --task label_projection
70-
knn_classifier_log_cpm
70+
knn_classifier_log_cp10k
7171
knn_classifier_scran
72-
logistic_regression_log_cpm
72+
logistic_regression_log_cp10k
7373
logistic_regression_scran
74-
mlp_log_cpm
74+
mlp_log_cp10k
7575
mlp_scran
7676

7777
> openproblems-cli list --metrics --task label_projection
@@ -100,11 +100,11 @@ zebrafish_labs
100100
zebrafish_random
101101
$ openproblems-cli load --task label_projection --output dataset.h5ad pancreas_batch
102102
$ openproblems-cli list --methods --task label_projection
103-
logistic_regression_log_cpm
103+
logistic_regression_log_cp10k
104104
logistic_regression_scran
105-
mlp_log_cpm
105+
mlp_log_cp10k
106106
mlp_scran
107-
$ openproblems-cli run --task label_projection --input dataset.h5ad --output method.h5ad logistic_regression_log_cpm
107+
$ openproblems-cli run --task label_projection --input dataset.h5ad --output method.h5ad logistic_regression_log_cp10k
108108
$ openproblems-cli list --metrics --task label_projection
109109
$ openproblems-cli evaluate --task label_projection --input method.h5ad accuracy
110110
0.9521233432512848
@@ -121,7 +121,7 @@ openproblems-cli image --datasets --task label_projection pancreas_batch
121121
docker run -dt openproblems-cli load --task label_projection --output dataset.h5ad pancreas_batch
122122
openproblems-cli list --methods --task label_projection
123123
openproblems-cli image --methods --task label_projection logistic_regression_scran
124-
openproblems-cli run --task label_projection --input dataset.h5ad --output method.h5ad logistic_regression_log_cpm
124+
openproblems-cli run --task label_projection --input dataset.h5ad --output method.h5ad logistic_regression_log_cp10k
125125
openproblems-cli list --metrics --task label_projection
126126
openproblems-cli image --metrics --task label_projection accuracy
127127
openproblems-cli evaluate --task label_projection --input method.h5ad accuracy
@@ -147,13 +147,13 @@ $ openproblems-cli image --datasets --task label_projection pancreas_batch
147147
openproblems
148148
$ docker run -dt singlecellopenproblems/openproblems openproblems-cli load --task label_projection --output dataset.h5ad pancreas_batch
149149
$ openproblems-cli list --methods --task label_projection
150-
logistic_regression_log_cpm
150+
logistic_regression_log_cp10k
151151
logistic_regression_scran
152-
mlp_log_cpm
152+
mlp_log_cp10k
153153
mlp_scran
154154
$ openproblems-cli image --methods --task label_projection logistic_regression_scran
155155
openproblems-r-base
156-
$ docker run -dt singlecellopenproblems/openproblems-r-base openproblems-cli run --task label_projection --input dataset.h5ad --output method.h5ad logistic_regression_log_cpm
156+
$ docker run -dt singlecellopenproblems/openproblems-r-base openproblems-cli run --task label_projection --input dataset.h5ad --output method.h5ad logistic_regression_log_cp10k
157157
$ openproblems-cli list --metrics --task label_projection
158158
accuracy
159159
f1

openproblems/tasks/_cell_cell_communication/_common/methods/liana.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from .....tools.conversion import r_function
22
from .....tools.decorators import method
3-
from .....tools.normalize import log_cpm
3+
from .....tools.normalize import log_cp10k
44
from .....tools.utils import check_r_version
55
from ..utils import aggregate_method_scores
66
from ..utils import ligand_receptor_resource
@@ -41,9 +41,9 @@ def _liana(
4141
**kwargs,
4242
):
4343
# log-normalize
44-
adata = log_cpm(adata)
45-
adata.layers["logcounts"] = adata.layers["log_cpm"]
46-
del adata.layers["log_cpm"]
44+
adata = log_cp10k(adata)
45+
adata.layers["logcounts"] = adata.layers["log_cp10k"]
46+
del adata.layers["log_cp10k"]
4747

4848
# Run LIANA
4949
liana_res = _r_liana(

openproblems/tasks/dimensionality_reduction/README.md

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,12 @@ data for visualization and interpretation.
4242

4343
## API
4444

45-
WARNING: other than most tasks, `adata.X` should contain log CPM-normalized data,
45+
WARNING: other than most tasks, `adata.X` should contain log CP10k-normalized data,
4646
This is the case as we are computing ground truth metrics on normalized data,
4747
which means methods which use this same normalization are likely to score more
4848
highly on these metrics.
4949

50-
**Datasets** should provide *log CPM normalized counts* in `adata.X` and store the
50+
**Datasets** should provide *log CP10k normalized counts* in `adata.X` and store the
5151
original number of genes (i.e., `adata.shape[1]`) in `adata.uns["n_genes"]`.
5252

5353
**Methods** should assign dimensionally-reduced 2D embedding coordinates to
@@ -66,11 +66,11 @@ pre-processing functions are available as part of the `tools` module. Where poss
6666
each **method** should first call one of these functions and use the processed `adata.X`
6767
slot as the input to the method. Raw counts are also stored in `adata.layers["counts"]`
6868
by the standard pre-processing functions, if a method performs its own pre-processing.
69-
For most methods a standard pre-processing from `log_cpm()`, which normalizes the
70-
expression matrix to counts per million (CPM), can be used directly from `adata.X`.
69+
For most methods a standard pre-processing from `log_cp10k()`, which normalizes the
70+
expression matrix to counts per 10,000 (CP10k), can be used directly from `adata.X`.
7171
Variants of methods can be created by applying different pre-processing prior to the
7272
method itself (see `phate.py` for an example). *Note that using a normalization method
73-
different from that used for the metrics (log CPM) may lead to artificially poor method
73+
different from that used for the metrics (log CP10k) may lead to artificially poor method
7474
performance.*
7575

7676
## The methods
@@ -138,7 +138,7 @@ from [umap-learn](https://umap-learn.readthedocs.io/en/latest/densmap_demo.html)
138138

139139
**Variants:**
140140

141-
* The (logCPM-normalized, 1000 HVG) expression matrix
141+
* The (logCP10k-normalized, 1000 HVG) expression matrix
142142
* 50 principal components
143143

144144
### Potential of heat-diffusion for affinity-based transition embedding (PHATE)
@@ -157,8 +157,8 @@ This implementation is from the [phate package](https://phate.readthedocs.io/en/
157157

158158
**Variants:**
159159

160-
* The square-root CPM transformed expression matrix
161-
* 50 principal components of the logCPM-normalised, 1000 HVG expression matrix
160+
* The square-root CP10k transformed expression matrix
161+
* 50 principal components of the logCP10k-normalised, 1000 HVG expression matrix
162162

163163
### ivis
164164

@@ -177,7 +177,7 @@ package](https://neuralee.readthedocs.io/en/latest/).
177177
**Variants:**
178178

179179
* Scaled 500 HVGs from a logged expression matrix (no library size normalization)
180-
* LogCPM-normalised, 1000 HVG expression matrix
180+
* LogCP10k-normalised, 1000 HVG expression matrix
181181

182182
### scvis
183183

openproblems/tasks/dimensionality_reduction/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"Reduction of high-dimensional datasets to 2D for visualization & interpretation"
1010
)
1111

12-
DEFAULT_LAYER = "log_cpm"
12+
DEFAULT_LAYER = "log_cp10k"
1313

1414
DATASETS = utils.get_callable_members(datasets)
1515
METHODS = utils.get_callable_members(methods)

openproblems/tasks/dimensionality_reduction/api.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from ...data.sample import load_sample_data
22
from ...tools.decorators import dataset
3-
from ...tools.normalize import log_cpm
3+
from ...tools.normalize import log_cp10k
44

55
import numpy as np
66

@@ -16,7 +16,7 @@ def check_method(adata, is_baseline=False):
1616
"""Check that method output fits expected API."""
1717
# check adata.X has not changed
1818
assert adata.uns["n_genes"] == adata.shape[1]
19-
assert adata.X is adata.layers["log_cpm"]
19+
assert adata.X is adata.layers["log_cp10k"]
2020
# check output
2121
assert "X_emb" in adata.obsm
2222
if not is_baseline:
@@ -29,7 +29,7 @@ def check_method(adata, is_baseline=False):
2929
def sample_dataset():
3030
"""Create a simple dataset to use for testing methods in this task."""
3131
adata = load_sample_data()
32-
adata = log_cpm(adata)
32+
adata = log_cp10k(adata)
3333
adata.uns["n_genes"] = adata.shape[1]
3434
return adata
3535

openproblems/tasks/dimensionality_reduction/datasets/mouse_blood_olsson_labelled.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from ....data.mouse_blood_olsson_labelled import load_olsson_2016_mouse_blood
22
from ....tools.decorators import dataset
3-
from ....tools.normalize import log_cpm
3+
from ....tools.normalize import log_cp10k
44

55

66
@dataset(
@@ -14,4 +14,4 @@
1414
def olsson_2016_mouse_blood(test=False):
1515
adata = load_olsson_2016_mouse_blood(test=test)
1616
adata.uns["n_genes"] = adata.shape[1]
17-
return log_cpm(adata)
17+
return log_cp10k(adata)

openproblems/tasks/dimensionality_reduction/datasets/mouse_hspc_nestorowa2016.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from ....data.mouse_hspc_nestorowa2016 import load_mouse_hspc_nestorowa2016
22
from ....tools.decorators import dataset
3-
from ....tools.normalize import log_cpm
3+
from ....tools.normalize import log_cp10k
44

55

66
@dataset(
@@ -14,4 +14,4 @@
1414
def mouse_hspc_nestorowa2016(test=False):
1515
adata = load_mouse_hspc_nestorowa2016(test=test)
1616
adata.uns["n_genes"] = adata.shape[1]
17-
return log_cpm(adata)
17+
return log_cp10k(adata)

openproblems/tasks/dimensionality_reduction/datasets/tenx_5k_pbmc.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from ....data.tenx import load_tenx_5k_pbmc
22
from ....tools.decorators import dataset
3-
from ....tools.normalize import log_cpm
3+
from ....tools.normalize import log_cp10k
44

55

66
@dataset(
@@ -16,4 +16,4 @@
1616
def tenx_5k_pbmc(test=False):
1717
adata = load_tenx_5k_pbmc(test=test)
1818
adata.uns["n_genes"] = adata.shape[1]
19-
return log_cpm(adata)
19+
return log_cp10k(adata)
Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,26 @@
11
from .baseline import random_features
22
from .baseline import true_features
3-
from .baseline import true_features_log_cpm
4-
from .baseline import true_features_log_cpm_hvg
3+
from .baseline import true_features_log_cp10k
4+
from .baseline import true_features_log_cp10k_hvg
55
from .neuralee import neuralee_default
6-
from .neuralee import neuralee_logCPM_1kHVG
7-
from .pca import pca_logCPM
8-
from .pca import pca_logCPM_1kHVG
6+
from .neuralee import neuralee_logCP10k_1kHVG
7+
from .pca import pca_logCP10k
8+
from .pca import pca_logCP10k_1kHVG
99
from .phate import phate_default
10-
from .phate import phate_logCPM
11-
from .phate import phate_logCPM_1kHVG
10+
from .phate import phate_logCP10k
11+
from .phate import phate_logCP10k_1kHVG
1212
from .phate import phate_sqrt
13-
from .pymde import pymde_distances_log_cpm
14-
from .pymde import pymde_distances_log_cpm_hvg
15-
from .pymde import pymde_neighbors_log_cpm
16-
from .pymde import pymde_neighbors_log_cpm_hvg
17-
from .tsne import tsne_logCPM
18-
from .tsne import tsne_logCPM_1kHVG
19-
from .umap import densmap_logCPM
20-
from .umap import densmap_logCPM_1kHVG
21-
from .umap import densmap_pca_logCPM
22-
from .umap import densmap_pca_logCPM_1kHVG
23-
from .umap import umap_logCPM
24-
from .umap import umap_logCPM_1kHVG
25-
from .umap import umap_pca_logCPM
26-
from .umap import umap_pca_logCPM_1kHVG
13+
from .pymde import pymde_distances_log_cp10k
14+
from .pymde import pymde_distances_log_cp10k_hvg
15+
from .pymde import pymde_neighbors_log_cp10k
16+
from .pymde import pymde_neighbors_log_cp10k_hvg
17+
from .tsne import tsne_logCP10k
18+
from .tsne import tsne_logCP10k_1kHVG
19+
from .umap import densmap_logCP10k
20+
from .umap import densmap_logCP10k_1kHVG
21+
from .umap import densmap_pca_logCP10k
22+
from .umap import densmap_pca_logCP10k_1kHVG
23+
from .umap import umap_logCP10k
24+
from .umap import umap_logCP10k_1kHVG
25+
from .umap import umap_pca_logCP10k
26+
from .umap import umap_pca_logCP10k_1kHVG

0 commit comments

Comments
 (0)