Skip to content

Commit 053663a

Browse files
LouisK92rcannood
andauthored
Add processed spatial data aggregation and quality metrics (#89)
* Add component for aggregating processed spatial data * Add quality metrics * Add aggreagtion and quality metrics prototypical to workflow * Add new steps to create test resources script * update quality metrics flow * Fix quality metric and aggregation step naming * Add optional saving of spatial data and generalise assigned transcripts metrics --------- Co-authored-by: Robrecht Cannoodt <rcannood@gmail.com>
1 parent de16242 commit 053663a

12 files changed

Lines changed: 588 additions & 9 deletions

File tree

scripts/create_test_resources/test_pipeline.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,14 @@ viash run src/methods_qc_filter/basic_qc_filter/config.vsh.yaml -- \
7272
--input $OUT_DIR/spatial_corrected_counts.h5ad \
7373
--output $OUT_DIR/spatial_qc_col.h5ad
7474

75+
# run aggregation of spatial data files
76+
viash run src/methods_data_aggregation/aggregate_spatial_data/config.vsh.yaml -- \
77+
--input_raw_sp $OUT_DIR/raw_ist.zarr \
78+
--input_transcript_assignments $OUT_DIR/transcript_assignments.zarr \
79+
--input_qc_col $OUT_DIR/spatial_qc_col.h5ad \
80+
--input_spatial_corrected_counts $OUT_DIR/spatial_corrected_counts.h5ad \
81+
--output $OUT_DIR/spatial_processed_complete.zarr
82+
7583
# run a metric
7684
viash run src/metrics/similarity/config.vsh.yaml -- \
7785
--input $OUT_DIR/spatial_corrected_counts.h5ad \
@@ -93,6 +101,7 @@ output_spatial_normalized_counts: !file spatial_normalized_counts.h5ad
93101
output_spatial_with_cell_types: !file spatial_with_cell_types.h5ad
94102
output_spatial_corrected_counts: !file spatial_corrected_counts.h5ad
95103
output_spatial_qc_col: !file spatial_qc_col.h5ad
104+
output_spatial_processed_complete: !file spatial_processed_complete.zarr
96105
output_score: !file score.h5ad
97106
EOL
98107

scripts/run_benchmark/run_test_local.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ cat > /tmp/params.yaml << HERE
2020
id: mouse_brain_combined
2121
input_sc: resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad
2222
input_sp: resources_test/task_ist_preprocessing/mouse_brain_combined/raw_ist.zarr
23+
save_spatial_data: false
2324
default_methods:
2425
- custom_segmentation
2526
- basic_transcript_assignment

src/api/comp_quality_metric.yaml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
namespace: metrics
2+
info:
3+
type: metric
4+
type_info:
5+
label: Quality Metric
6+
summary: A quality metric for evaluating iST preprocessing methods
7+
description: |
8+
This metric evaluates the quality of the iST preprocessing without using a reference.
9+
arguments:
10+
- name: --input
11+
__merge__: file_spatial_processed_complete.yaml
12+
required: true
13+
direction: input
14+
- name: --output
15+
required: true
16+
direction: output
17+
__merge__: file_score.yaml
18+
19+
test_resources:
20+
- path: /resources_test/task_ist_preprocessing/mouse_brain_combined
21+
dest: resources_test/task_ist_preprocessing/mouse_brain_combined
22+
- type: python_script
23+
path: /common/component_tests/run_and_check_output.py
24+
- type: python_script
25+
path: /common/component_tests/check_config.py
26+
27+
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
info:
2+
type: method
3+
subtype: method_spatial_data_aggregation
4+
type_info:
5+
label: Spatial data aggregation
6+
summary: Aggregate different files of the raw and processed spatial data to one zarr file.
7+
description: |
8+
This component aggregates different files of the raw and processed spatial data to one zarr file.
9+
10+
argument_groups:
11+
- name: Inputs
12+
arguments:
13+
- name: "--input_raw_sp"
14+
__merge__: file_raw_ist.yaml
15+
required: true
16+
direction: input
17+
- name: "--input_transcript_assignments"
18+
__merge__: file_transcript_assignments.yaml
19+
required: true
20+
direction: input
21+
- name: "--input_qc_col"
22+
__merge__: file_spatial_qc_col.yaml
23+
required: true
24+
direction: input
25+
- name: "--input_spatial_corrected_counts"
26+
__merge__: file_spatial_corrected_counts.yaml
27+
required: true
28+
direction: input
29+
- name: Outputs
30+
arguments:
31+
- name: "--output"
32+
__merge__: file_spatial_processed_complete.yaml
33+
required: true
34+
direction: output
35+
default: "$id/output.zarr"
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
type: file
2+
example: "resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_processed_complete.zarr"
3+
label: "Spatial Processed Complete"
4+
summary: A spatial transcriptomics dataset that includes all raw and processed data.
5+
description: |
6+
This dataset contains images, points with cell ids, and an anndata table with processed counts and cell type annotations.
7+
8+
info:
9+
format:
10+
type: spatialdata_zarr
11+
images:
12+
- type: object
13+
name: image
14+
description: The raw image data
15+
required: true
16+
- type: object
17+
name: image_3D
18+
description: The raw 3D image data
19+
required: false
20+
- type: object
21+
name: he_image
22+
description: H&E image data
23+
required: false
24+
points:
25+
- type: dataframe
26+
name: transcripts
27+
description: Point cloud data of transcripts
28+
required: true
29+
columns:
30+
- type: float
31+
name: "x"
32+
required: true
33+
description: x-coordinate of the point
34+
- type: float
35+
name: "y"
36+
required: true
37+
description: y-coordinate of the point
38+
- type: float
39+
name: "z"
40+
required: false
41+
description: z-coordinate of the point
42+
- type: categorical
43+
name: feature_name
44+
required: true
45+
description: Name of the feature
46+
- type: integer
47+
name: "cell_id"
48+
required: true
49+
description: Unique identifier of the cell
50+
- type: long
51+
name: transcript_id
52+
required: true
53+
description: Unique identifier of the transcript
54+
shapes:
55+
- type: dataframe
56+
name: "cell_boundaries"
57+
description: Cell boundaries
58+
required: false
59+
columns:
60+
- type: object
61+
name: "geometry"
62+
required: true
63+
description: Geometry of the cell boundary
64+
tables:
65+
- type: anndata
66+
name: "counts"
67+
description: Counts of the transcripts
68+
required: true
69+
layers:
70+
- type: integer
71+
name: counts
72+
description: Raw counts
73+
required: true
74+
- type: integer
75+
name: normalized
76+
description: Normalized counts
77+
required: true
78+
- type: double
79+
name: normalized_uncorrected
80+
description: Uncorrected normalized expression
81+
required: false
82+
obs:
83+
- type: string
84+
name: cell_id
85+
description: Unique identifier for the cell (from assignment step)
86+
required: true
87+
- type: string
88+
name: centroid_x
89+
description: X coordinate of the cell
90+
required: true
91+
- type: string
92+
name: centroid_y
93+
description: Y coordinate of the cell
94+
required: true
95+
- type: string
96+
name: centroid_z
97+
description: Z coordinate of the cell
98+
required: false
99+
- type: string
100+
name: n_counts
101+
description: Number of counts in the cell
102+
required: true
103+
- type: string
104+
name: n_genes
105+
description: Number of genes in the cell
106+
required: true
107+
- type: string
108+
name: volume
109+
description: Volume of the cell
110+
required: true
111+
- type: string
112+
name: cell_type
113+
description: Cell type of the cell
114+
required: true
115+
var:
116+
- type: string
117+
name: gene_name
118+
description: Name of the gene
119+
required: true
120+
- type: string
121+
name: n_counts
122+
description: Number of counts of the gene
123+
required: true
124+
- type: string
125+
name: n_cells
126+
description: Number of cells expressing the gene
127+
required: true
128+
- type: anndata
129+
name: "metadata"
130+
description: Metadata of spatial dataset
131+
required: true
132+
uns:
133+
- type: string
134+
name: dataset_id
135+
required: true
136+
description: A unique identifier for the dataset
137+
- type: string
138+
name: dataset_name
139+
required: true
140+
description: A human-readable name for the dataset
141+
- type: string
142+
name: dataset_url
143+
required: true
144+
description: Link to the original source of the dataset
145+
- type: string
146+
name: dataset_reference
147+
required: true
148+
description: Bibtex reference of the paper in which the dataset was published
149+
- type: string
150+
name: dataset_summary
151+
required: true
152+
description: Short description of the dataset
153+
- type: string
154+
name: dataset_description
155+
required: true
156+
description: Long description of the dataset
157+
- type: string
158+
name: dataset_organism
159+
required: true
160+
description: The organism of the sample in the dataset
161+
- type: string
162+
name: segmentation_id
163+
required: true
164+
multiple: true
165+
description: A unique identifier for the segmentation
166+
obs:
167+
- type: string
168+
name: cell_id
169+
required: true
170+
description: A unique identifier for the cell
171+
var:
172+
- type: string
173+
name: gene_ids
174+
required: true
175+
description: Unique identifier for the gene
176+
- type: string
177+
name: feature_types
178+
required: true
179+
description: Type of the feature
180+
obsm:
181+
- type: double
182+
name: spatial
183+
required: true
184+
description: Spatial coordinates of the cell
185+
coordinate_systems:
186+
- type: object
187+
name: global
188+
description: Coordinate system of the replicate
189+
required: true
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
__merge__: /src/api/comp_spatial_data_aggregation.yaml
2+
3+
name: aggregate_spatial_data
4+
namespace: methods_data_aggregation
5+
label: "Spatial data aggregation"
6+
summary: "Aggregate different files of the raw and processed spatial data to one zarr file."
7+
description: |
8+
This component aggregates different files of the raw and processed spatial data to one zarr file.
9+
10+
links:
11+
documentation: "https://github.com/openproblems-bio/task_ist_preprocessing"
12+
repository: "https://github.com/openproblems-bio/task_ist_preprocessing"
13+
references:
14+
doi: "10.1101/2023.02.13.528102"
15+
16+
resources:
17+
- type: python_script
18+
path: script.py
19+
20+
engines:
21+
- type: docker
22+
image: openproblems/base_python:1
23+
__merge__:
24+
- /src/base/setup_spatialdata_partial.yaml
25+
- type: native
26+
27+
runners:
28+
- type: executable
29+
- type: nextflow
30+
directives:
31+
label: [ midtime, lowcpu, lowmem ]
32+
33+
test_resources:
34+
- path: /resources_test/task_ist_preprocessing/mouse_brain_combined
35+
dest: resources_test/task_ist_preprocessing/mouse_brain_combined
36+
- type: python_script
37+
path: /common/component_tests/run_and_check_output.py
38+
- type: python_script
39+
path: /common/component_tests/check_config.py

0 commit comments

Comments
 (0)