openproblems-bio
diff --git a/‎main.bib‎
Lines changed: 13 additions & 0 deletions b/‎main.bib‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎openproblems/tasks/_batch_integration/_common/methods/baseline.py‎
Lines changed: 21 additions & 21 deletions b/‎openproblems/tasks/_batch_integration/_common/methods/baseline.py‎
Lines changed: 21 additions & 21 deletions
diff --git a/‎openproblems/tasks/_batch_integration/batch_integration_embed/README.md‎
Lines changed: 0 additions & 31 deletions b/‎openproblems/tasks/_batch_integration/batch_integration_embed/README.md‎
Lines changed: 0 additions & 31 deletions
diff --git a/‎openproblems/tasks/_batch_integration/batch_integration_embed/methods/baseline.py‎
Lines changed: 10 additions & 14 deletions b/‎openproblems/tasks/_batch_integration/batch_integration_embed/methods/baseline.py‎
Lines changed: 10 additions & 14 deletions
diff --git a/‎openproblems/tasks/_batch_integration/batch_integration_embed/metrics/cc_score.py‎
Lines changed: 4 additions & 0 deletions b/‎openproblems/tasks/_batch_integration/batch_integration_embed/metrics/cc_score.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎openproblems/tasks/_batch_integration/batch_integration_embed/metrics/iso_label_sil.py‎
Lines changed: 5 additions & 0 deletions b/‎openproblems/tasks/_batch_integration/batch_integration_embed/metrics/iso_label_sil.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎openproblems/tasks/_batch_integration/batch_integration_embed/metrics/kBET.py‎
Lines changed: 6 additions & 0 deletions b/‎openproblems/tasks/_batch_integration/batch_integration_embed/metrics/kBET.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎openproblems/tasks/_batch_integration/batch_integration_embed/metrics/pcr.py‎
Lines changed: 6 additions & 0 deletions b/‎openproblems/tasks/_batch_integration/batch_integration_embed/metrics/pcr.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎openproblems/tasks/_batch_integration/batch_integration_embed/metrics/sil_batch.py‎
Lines changed: 5 additions & 0 deletions b/‎openproblems/tasks/_batch_integration/batch_integration_embed/metrics/sil_batch.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎openproblems/tasks/_batch_integration/batch_integration_embed/metrics/silhouette.py‎
Lines changed: 4 additions & 0 deletions b/‎openproblems/tasks/_batch_integration/batch_integration_embed/metrics/silhouette.py‎
Lines changed: 4 additions & 0 deletions
@@ -22,6 +22,19 @@ @article{agrawal2021mde
   doi           = {10.1561/2200000090},
   url           = {https://doi.org/10.1561/2200000090},
 }
+@article{aliee2021autogenes,
+  title         = {{AutoGeneS}: Automatic gene selection using multi-objective optimization for {RNA}-seq deconvolution},
+  author        = {Hananeh Aliee and Fabian J. Theis},
+  year          = {2021},
+  month         = jul,
+  journal       = {Cell Systems},
+  publisher     = {Elsevier {BV}},
+  volume        = {12},
+  number        = {7},
+  pages         = {706--715.e4},
+  doi           = {10.1016/j.cels.2021.05.006},
+  url           = {https://doi.org/10.1016/j.cels.2021.05.006},
+}
 @article{andersson2020single,
   title         = {Single-cell and spatial transcriptomics enables probabilistic inference of cell type topography},
   author        = {Alma Andersson and Joseph Bergenstr{\aa}hle and Michaela Asp and Ludvig Bergenstr{\aa}hle and Aleksandra Jurek and Jos{\'{e}} Fern{\'{a}}ndez Navarro and Joakim Lundeberg},
 
@@ -1,7 +1,6 @@
-from .....tools.decorators import method
+from .....tools.decorators import baseline_method
 from .....tools.utils import check_version
 
-import functools
 import numpy as np
 
 
@@ -47,18 +46,12 @@ def _random_embedding(partition, jitter=0.01):
     return embedding
 
 
-_baseline_method = functools.partial(
-    method,
-    paper_name="Open Problems for Single Cell Analysis",
-    paper_reference="openproblems",
-    paper_year=2022,
-    code_url="https://github.com/openproblems-bio/openproblems",
-    is_baseline=True,
-)
-
-
-@_baseline_method(
+@baseline_method(
     method_name="No Integration",
+    method_summary=(
+        "Cells are embedded by PCA on the unintegrated data. A graph is built on this"
+        " PCA embedding."
+    ),
 )
 def no_integration(adata, test=False):
     adata.obsp["connectivities"] = adata.obsp["uni_connectivities"]
@@ -69,8 +62,12 @@ def no_integration(adata, test=False):
     return adata
 
 
-@_baseline_method(
+@baseline_method(
     method_name="Random Integration",
+    method_summary=(
+        "Feature values, embedding coordinates, and graph connectivity are all randomly"
+        " permuted"
+    ),
 )
 def random_integration(adata, test=False):
     adata.X = _randomize_features(adata.X)
@@ -80,13 +77,12 @@ def random_integration(adata, test=False):
     return adata
 
 
-@_baseline_method(
+@baseline_method(
     method_name="Random Integration by Celltype",
-    paper_name="Random Integration by Celltype (baseline)",
-    paper_reference="openproblems",
-    paper_year=2022,
-    code_url="https://github.com/openproblems-bio/openproblems",
-    is_baseline=True,
+    method_summary=(
+        "Feature values, embedding coordinates, and graph connectivity are all randomly"
+        " permuted within each celltype label"
+    ),
 )
 def celltype_random_integration(adata, test=False):
     adata.obsm["X_emb"] = _randomize_features(
@@ -101,8 +97,12 @@ def celltype_random_integration(adata, test=False):
     return adata
 
 
-@_baseline_method(
+@baseline_method(
     method_name="Random Integration by Batch",
+    method_summary=(
+        "Feature values, embedding coordinates, and graph connectivity are all randomly"
+        " permuted within each batch label"
+    ),
 )
 def batch_random_integration(adata, test=False):
     adata.obsm["X_emb"] = _randomize_features(
 
@@ -20,37 +20,6 @@ This sub-task was taken from a
 [benchmarking study of data integration
 methods](https://openproblems.bio/bibliography#luecken2022benchmarking).
 
-## The metrics
-
-Metrics for batch integration (embed) measure how well batches are mixed while
-biological signals are preserved. They are divided into batch correction and biological
-variance conservation metrics.
-
-### Batch correction
-
-* **kBET**: kBET determines whether the label composition of a k nearest neighborhood of
-a cell is similar to the expected (global) label composition
-([Buettner et al., Nat Meth 2019](https://openproblems.bio/bibliography#bttner2018test)).
-  The test is repeated for a random subset of cells,
-and the results are summarized as a rejection rate over all tested neighborhoods.
-* **Silhouette batch score**: The absolute silhouette width is computed over batch
-labels per cell. As 0 then indicates that batches are well mixed and any deviation from
-0 indicates a batch effect, we use the 1-abs(ASW) to map the score to the scale [0;1].
-* **Principal component regression (PC regression)**: This compare the explained
-variance by batch before and after integration. It returns a score between 0 and 1
-(scaled=True) with 0 if the variance contribution hasn’t changed. The larger the score,
-the more different the variance contributions are before and after integration.
-
-### Biological variance conservation
-
-* **Cell cycle score**: The cell-cycle conservation score evaluates how well the
-cell-cycle effect can be captured before and after integration.
-* **Isolated label silhouette**: This score evaluates the compactness for the label(s)
-that is(are) shared by fewest batches. It indicates how well rare cell types can be
-preserved after integration.
-* **Cell type ASW**: The absolute silhouette with is computed on cell identity labels,
-measuring their compactness.
-
 ## API
 
 WARNING: other than most tasks, `adata.X` should contain log-normalized data.
 
@@ -1,41 +1,37 @@
-from .....tools.decorators import method
+from .....tools.decorators import baseline_method
 from .....tools.utils import check_version
 from ..._common.methods.baseline import _random_embedding
 
-import functools
 import numpy as np
 import scanpy as sc
 
-_baseline_method = functools.partial(
-    method,
-    paper_name="Open Problems for Single Cell Analysis",
-    paper_reference="openproblems",
-    paper_year=2022,
-    code_url="https://github.com/openproblems-bio/openproblems",
-    is_baseline=True,
-)
-
 
-@_baseline_method(
+@baseline_method(
     method_name="Random Embedding by Celltype (with jitter)",
+    method_summary=(
+        "Cells are embedded as a one-hot encoding of celltype labels, with a small"
+        " amount of random noise added to the embedding"
+    ),
 )
 def celltype_random_embedding_jitter(adata, test=False):
     adata.obsm["X_emb"] = _random_embedding(partition=adata.obs["labels"], jitter=0.01)
     adata.uns["method_code_version"] = check_version("openproblems")
     return adata
 
 
-@_baseline_method(
+@baseline_method(
     method_name="Random Embedding by Celltype",
+    method_summary="Cells are embedded as a one-hot encoding of celltype labels",
 )
 def celltype_random_embedding(adata, test=False):
     adata.obsm["X_emb"] = _random_embedding(partition=adata.obs["labels"], jitter=None)
     adata.uns["method_code_version"] = check_version("openproblems")
     return adata
 
 
-@_baseline_method(
+@baseline_method(
     method_name="No Integration by Batch",
+    method_summary="Cells are embedded by computing PCA independently on each batch",
 )
 def no_integration_batch(adata, test=False):
     """Compute PCA independently on each batch
 
@@ -21,6 +21,10 @@
 
 @metric(
     metric_name="Cell Cycle Score",
+    metric_summary=(
+        "The cell-cycle conservation score evaluates how well the cell-cycle effect can"
+        " be captured before and after integration."
+    ),
     paper_reference="luecken2022benchmarking",
     maximize=True,
     image="openproblems-r-pytorch",
 
@@ -14,6 +14,11 @@
 
 @metric(
     metric_name="Isolated label Silhouette",
+    metric_summary=(
+        "This score evaluates the compactness for the label(s) that is(are) shared by"
+        " fewest batches. It indicates how well rare cell types can be preserved after"
+        " integration."
+    ),
     paper_reference="luecken2022benchmarking",
     maximize=True,
     image="openproblems-r-pytorch",
 
@@ -26,6 +26,12 @@
 
 @metric(
     metric_name="kBET",
+    metric_summary=(
+        "kBET determines whether the label composition of a k nearest neighborhood of a"
+        " cell is similar to the expected (global) label composition. The test is"
+        " repeated for a random subset of cells, and the results are summarized as a"
+        " rejection rate over all tested neighborhoods."
+    ),
     paper_reference="bttner2018test",
     maximize=True,
     image="openproblems-r-extras",
 
@@ -18,6 +18,12 @@
 
 @metric(
     metric_name="PC Regression",
+    metric_summary=(
+        "This compares the explained variance by batch before and after integration. It"
+        " returns a score between 0 and 1 (scaled=True) with 0 if the variance"
+        " contribution hasn’t changed. The larger the score, the more different the"
+        " variance contributions are before and after integration."
+    ),
     paper_reference="luecken2022benchmarking",
     maximize=True,
     image="openproblems-r-pytorch",
 
@@ -23,6 +23,11 @@
 
 @metric(
     metric_name="Batch ASW",
+    metric_summary=(
+        "The absolute silhouette width is computed over batch labels per cell. As 0"
+        " then indicates that batches are well mixed and any deviation from 0 indicates"
+        " a batch effect, we use the 1-abs(ASW) to map the score to the scale [0;1]."
+    ),
     paper_reference="luecken2022benchmarking",
     maximize=True,
     image="openproblems-r-pytorch",
 
@@ -11,6 +11,10 @@
 
 @metric(
     metric_name="Silhouette",
+    metric_summary=(
+        "The absolute silhouette with is computed on cell identity labels, measuring"
+        " their compactness."
+    ),
     paper_reference="luecken2022benchmarking",
     maximize=True,
     image="openproblems-r-pytorch",