openproblems-bio
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 1 addition & 1 deletion b/‎CONTRIBUTING.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎openproblems/api/README.md‎
Lines changed: 11 additions & 11 deletions b/‎openproblems/api/README.md‎
Lines changed: 11 additions & 11 deletions
diff --git a/‎openproblems/tasks/_cell_cell_communication/_common/methods/liana.py‎
Lines changed: 4 additions & 4 deletions b/‎openproblems/tasks/_cell_cell_communication/_common/methods/liana.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎openproblems/tasks/dimensionality_reduction/README.md‎
Lines changed: 9 additions & 9 deletions b/‎openproblems/tasks/dimensionality_reduction/README.md‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎openproblems/tasks/dimensionality_reduction/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎openproblems/tasks/dimensionality_reduction/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎openproblems/tasks/dimensionality_reduction/api.py‎
Lines changed: 3 additions & 3 deletions b/‎openproblems/tasks/dimensionality_reduction/api.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎openproblems/tasks/dimensionality_reduction/datasets/mouse_blood_olsson_labelled.py‎
Lines changed: 2 additions & 2 deletions b/‎openproblems/tasks/dimensionality_reduction/datasets/mouse_blood_olsson_labelled.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎openproblems/tasks/dimensionality_reduction/datasets/mouse_hspc_nestorowa2016.py‎
Lines changed: 2 additions & 2 deletions b/‎openproblems/tasks/dimensionality_reduction/datasets/mouse_hspc_nestorowa2016.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎openproblems/tasks/dimensionality_reduction/datasets/tenx_5k_pbmc.py‎
Lines changed: 2 additions & 2 deletions b/‎openproblems/tasks/dimensionality_reduction/datasets/tenx_5k_pbmc.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎openproblems/tasks/dimensionality_reduction/methods/__init__.py‎
Lines changed: 21 additions & 21 deletions b/‎openproblems/tasks/dimensionality_reduction/methods/__init__.py‎
Lines changed: 21 additions & 21 deletions
@@ -334,7 +334,7 @@ docker run \
 openproblems-cli test \
   --task label_projection \
   --dataset zebrafish_labs \
-  --method logistic_regression_log_cpm \
+  --method logistic_regression_log_cp10k \
   --metric f1
 ```
 
 
@@ -39,7 +39,7 @@ For example:
 # Download a task-specific dataset and save it to `dataset.h5ad`
 openproblems-cli load --task label_projection --output dataset.h5ad pancreas_batch
 # Run a method on a datasets and save output to `method.h5ad`
-openproblems-cli run --task label_projection --input dataset.h5ad --output method.h5ad logistic_regression_log_cpm
+openproblems-cli run --task label_projection --input dataset.h5ad --output method.h5ad logistic_regression_log_cp10k
 # Evaluate the performance of a previously run method using the `accuracy` metric
 openproblems-cli evaluate --task label_projection --input method.h5ad accuracy
 ```
@@ -67,11 +67,11 @@ zebrafish_labs
 zebrafish_random
 
 > openproblems-cli list --methods --task label_projection
-knn_classifier_log_cpm
+knn_classifier_log_cp10k
 knn_classifier_scran
-logistic_regression_log_cpm
+logistic_regression_log_cp10k
 logistic_regression_scran
-mlp_log_cpm
+mlp_log_cp10k
 mlp_scran
 
 > openproblems-cli list --metrics --task label_projection
@@ -100,11 +100,11 @@ zebrafish_labs
 zebrafish_random
 $ openproblems-cli load --task label_projection --output dataset.h5ad pancreas_batch
 $ openproblems-cli list --methods --task label_projection
-logistic_regression_log_cpm
+logistic_regression_log_cp10k
 logistic_regression_scran
-mlp_log_cpm
+mlp_log_cp10k
 mlp_scran
-$ openproblems-cli run --task label_projection --input dataset.h5ad --output method.h5ad logistic_regression_log_cpm
+$ openproblems-cli run --task label_projection --input dataset.h5ad --output method.h5ad logistic_regression_log_cp10k
 $ openproblems-cli list --metrics --task label_projection
 $ openproblems-cli evaluate --task label_projection --input method.h5ad accuracy
 0.9521233432512848
@@ -121,7 +121,7 @@ openproblems-cli image --datasets --task label_projection pancreas_batch
 docker run -dt openproblems-cli load --task label_projection --output dataset.h5ad pancreas_batch
 openproblems-cli list --methods --task label_projection
 openproblems-cli image --methods --task label_projection logistic_regression_scran
-openproblems-cli run --task label_projection --input dataset.h5ad --output method.h5ad logistic_regression_log_cpm
+openproblems-cli run --task label_projection --input dataset.h5ad --output method.h5ad logistic_regression_log_cp10k
 openproblems-cli list --metrics --task label_projection
 openproblems-cli image --metrics --task label_projection accuracy
 openproblems-cli evaluate --task label_projection --input method.h5ad accuracy
@@ -147,13 +147,13 @@ $ openproblems-cli image --datasets --task label_projection pancreas_batch
 openproblems
 $ docker run -dt singlecellopenproblems/openproblems openproblems-cli load --task label_projection --output dataset.h5ad pancreas_batch
 $ openproblems-cli list --methods --task label_projection
-logistic_regression_log_cpm
+logistic_regression_log_cp10k
 logistic_regression_scran
-mlp_log_cpm
+mlp_log_cp10k
 mlp_scran
 $ openproblems-cli image --methods --task label_projection logistic_regression_scran
 openproblems-r-base
-$ docker run -dt singlecellopenproblems/openproblems-r-base openproblems-cli run --task label_projection --input dataset.h5ad --output method.h5ad logistic_regression_log_cpm
+$ docker run -dt singlecellopenproblems/openproblems-r-base openproblems-cli run --task label_projection --input dataset.h5ad --output method.h5ad logistic_regression_log_cp10k
 $ openproblems-cli list --metrics --task label_projection
 accuracy
 f1
 
@@ -1,6 +1,6 @@
 from .....tools.conversion import r_function
 from .....tools.decorators import method
-from .....tools.normalize import log_cpm
+from .....tools.normalize import log_cp10k
 from .....tools.utils import check_r_version
 from ..utils import aggregate_method_scores
 from ..utils import ligand_receptor_resource
@@ -41,9 +41,9 @@ def _liana(
     **kwargs,
 ):
     # log-normalize
-    adata = log_cpm(adata)
-    adata.layers["logcounts"] = adata.layers["log_cpm"]
-    del adata.layers["log_cpm"]
+    adata = log_cp10k(adata)
+    adata.layers["logcounts"] = adata.layers["log_cp10k"]
+    del adata.layers["log_cp10k"]
 
     # Run LIANA
     liana_res = _r_liana(
 
@@ -42,12 +42,12 @@ data for visualization and interpretation.
 
 ## API
 
-WARNING: other than most tasks, `adata.X` should contain log CPM-normalized data,
+WARNING: other than most tasks, `adata.X` should contain log CP10k-normalized data,
    This is the case as we are computing ground truth metrics on normalized data,
    which means methods which use this same normalization are likely to score more
    highly on these metrics.
 
-**Datasets** should provide *log CPM normalized counts* in `adata.X` and store the
+**Datasets** should provide *log CP10k normalized counts* in `adata.X` and store the
 original number of genes (i.e., `adata.shape[1]`) in `adata.uns["n_genes"]`.
 
 **Methods** should assign dimensionally-reduced 2D embedding coordinates to
@@ -66,11 +66,11 @@ pre-processing functions are available as part of the `tools` module. Where poss
 each **method** should first call one of these functions and use the processed `adata.X`
 slot as the input to the method. Raw counts are also stored in `adata.layers["counts"]`
 by the standard pre-processing functions, if a method performs its own pre-processing.
-For most methods a standard pre-processing from `log_cpm()`, which normalizes the
-expression matrix to counts per million (CPM), can be used directly from `adata.X`.
+For most methods a standard pre-processing from `log_cp10k()`, which normalizes the
+expression matrix to counts per 10,000 (CP10k), can be used directly from `adata.X`.
 Variants of methods can be created by applying different pre-processing prior to the
 method itself (see `phate.py` for an example). *Note that using a normalization method
-different from that used for the metrics (log CPM) may lead to artificially poor method
+different from that used for the metrics (log CP10k) may lead to artificially poor method
 performance.*
 
 ## The methods
@@ -138,7 +138,7 @@ from [umap-learn](https://umap-learn.readthedocs.io/en/latest/densmap_demo.html)
 
 **Variants:**
 
-* The (logCPM-normalized, 1000 HVG) expression matrix
+* The (logCP10k-normalized, 1000 HVG) expression matrix
 * 50 principal components
 
 ### Potential of heat-diffusion for affinity-based transition embedding (PHATE)
@@ -157,8 +157,8 @@ This implementation is from the [phate package](https://phate.readthedocs.io/en/
 
 **Variants:**
 
-* The square-root CPM transformed expression matrix
-* 50 principal components of the logCPM-normalised, 1000 HVG expression matrix
+* The square-root CP10k transformed expression matrix
+* 50 principal components of the logCP10k-normalised, 1000 HVG expression matrix
 
 ### ivis
 
@@ -177,7 +177,7 @@ package](https://neuralee.readthedocs.io/en/latest/).
 **Variants:**
 
 * Scaled 500 HVGs from a logged expression matrix (no library size normalization)
-* LogCPM-normalised, 1000 HVG expression matrix
+* LogCP10k-normalised, 1000 HVG expression matrix
 
 ### scvis
 
 
@@ -9,7 +9,7 @@
     "Reduction of high-dimensional datasets to 2D for visualization & interpretation"
 )
 
-DEFAULT_LAYER = "log_cpm"
+DEFAULT_LAYER = "log_cp10k"
 
 DATASETS = utils.get_callable_members(datasets)
 METHODS = utils.get_callable_members(methods)
 
@@ -1,6 +1,6 @@
 from ...data.sample import load_sample_data
 from ...tools.decorators import dataset
-from ...tools.normalize import log_cpm
+from ...tools.normalize import log_cp10k
 
 import numpy as np
 
@@ -16,7 +16,7 @@ def check_method(adata, is_baseline=False):
     """Check that method output fits expected API."""
     # check adata.X has not changed
     assert adata.uns["n_genes"] == adata.shape[1]
-    assert adata.X is adata.layers["log_cpm"]
+    assert adata.X is adata.layers["log_cp10k"]
     # check output
     assert "X_emb" in adata.obsm
     if not is_baseline:
@@ -29,7 +29,7 @@ def check_method(adata, is_baseline=False):
 def sample_dataset():
     """Create a simple dataset to use for testing methods in this task."""
     adata = load_sample_data()
-    adata = log_cpm(adata)
+    adata = log_cp10k(adata)
     adata.uns["n_genes"] = adata.shape[1]
     return adata
 
 
@@ -1,6 +1,6 @@
 from ....data.mouse_blood_olsson_labelled import load_olsson_2016_mouse_blood
 from ....tools.decorators import dataset
-from ....tools.normalize import log_cpm
+from ....tools.normalize import log_cp10k
 
 
 @dataset(
@@ -14,4 +14,4 @@
 def olsson_2016_mouse_blood(test=False):
     adata = load_olsson_2016_mouse_blood(test=test)
     adata.uns["n_genes"] = adata.shape[1]
-    return log_cpm(adata)
+    return log_cp10k(adata)
@@ -1,6 +1,6 @@
 from ....data.mouse_hspc_nestorowa2016 import load_mouse_hspc_nestorowa2016
 from ....tools.decorators import dataset
-from ....tools.normalize import log_cpm
+from ....tools.normalize import log_cp10k
 
 
 @dataset(
@@ -14,4 +14,4 @@
 def mouse_hspc_nestorowa2016(test=False):
     adata = load_mouse_hspc_nestorowa2016(test=test)
     adata.uns["n_genes"] = adata.shape[1]
-    return log_cpm(adata)
+    return log_cp10k(adata)
@@ -1,6 +1,6 @@
 from ....data.tenx import load_tenx_5k_pbmc
 from ....tools.decorators import dataset
-from ....tools.normalize import log_cpm
+from ....tools.normalize import log_cp10k
 
 
 @dataset(
@@ -16,4 +16,4 @@
 def tenx_5k_pbmc(test=False):
     adata = load_tenx_5k_pbmc(test=test)
     adata.uns["n_genes"] = adata.shape[1]
-    return log_cpm(adata)
+    return log_cp10k(adata)
@@ -1,26 +1,26 @@
 from .baseline import random_features
 from .baseline import true_features
-from .baseline import true_features_log_cpm
-from .baseline import true_features_log_cpm_hvg
+from .baseline import true_features_log_cp10k
+from .baseline import true_features_log_cp10k_hvg
 from .neuralee import neuralee_default
-from .neuralee import neuralee_logCPM_1kHVG
-from .pca import pca_logCPM
-from .pca import pca_logCPM_1kHVG
+from .neuralee import neuralee_logCP10k_1kHVG
+from .pca import pca_logCP10k
+from .pca import pca_logCP10k_1kHVG
 from .phate import phate_default
-from .phate import phate_logCPM
-from .phate import phate_logCPM_1kHVG
+from .phate import phate_logCP10k
+from .phate import phate_logCP10k_1kHVG
 from .phate import phate_sqrt
-from .pymde import pymde_distances_log_cpm
-from .pymde import pymde_distances_log_cpm_hvg
-from .pymde import pymde_neighbors_log_cpm
-from .pymde import pymde_neighbors_log_cpm_hvg
-from .tsne import tsne_logCPM
-from .tsne import tsne_logCPM_1kHVG
-from .umap import densmap_logCPM
-from .umap import densmap_logCPM_1kHVG
-from .umap import densmap_pca_logCPM
-from .umap import densmap_pca_logCPM_1kHVG
-from .umap import umap_logCPM
-from .umap import umap_logCPM_1kHVG
-from .umap import umap_pca_logCPM
-from .umap import umap_pca_logCPM_1kHVG
+from .pymde import pymde_distances_log_cp10k
+from .pymde import pymde_distances_log_cp10k_hvg
+from .pymde import pymde_neighbors_log_cp10k
+from .pymde import pymde_neighbors_log_cp10k_hvg
+from .tsne import tsne_logCP10k
+from .tsne import tsne_logCP10k_1kHVG
+from .umap import densmap_logCP10k
+from .umap import densmap_logCP10k_1kHVG
+from .umap import densmap_pca_logCP10k
+from .umap import densmap_pca_logCP10k_1kHVG
+from .umap import umap_logCP10k
+from .umap import umap_logCP10k_1kHVG
+from .umap import umap_pca_logCP10k
+from .umap import umap_pca_logCP10k_1kHVG
Original file line number	Diff line number	Diff line change
`@@ -9,7 +9,7 @@`
`9`	`9`	`"Reduction of high-dimensional datasets to 2D for visualization & interpretation"`
`10`	`10`	`)`
`11`	`11`
`12`		`-DEFAULT_LAYER = "log_cpm"`
	`12`	`+DEFAULT_LAYER = "log_cp10k"`
`13`	`13`
`14`	`14`	`DATASETS = utils.get_callable_members(datasets)`
`15`	`15`	`METHODS = utils.get_callable_members(methods)`