openproblems-bio
diff --git a/‎scripts/create_test_resources/test_pipeline.sh‎
Lines changed: 9 additions & 0 deletions b/‎scripts/create_test_resources/test_pipeline.sh‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎scripts/run_benchmark/run_test_local.sh‎
Lines changed: 1 addition & 0 deletions b/‎scripts/run_benchmark/run_test_local.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/api/comp_quality_metric.yaml‎
Lines changed: 27 additions & 0 deletions b/‎src/api/comp_quality_metric.yaml‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎src/api/comp_spatial_data_aggregation.yaml‎
Lines changed: 35 additions & 0 deletions b/‎src/api/comp_spatial_data_aggregation.yaml‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎src/api/file_spatial_processed_complete.yaml‎
Lines changed: 189 additions & 0 deletions b/‎src/api/file_spatial_processed_complete.yaml‎
Lines changed: 189 additions & 0 deletions
diff --git a/‎src/methods_data_aggregation/aggregate_spatial_data/config.vsh.yaml‎
Lines changed: 39 additions & 0 deletions b/‎src/methods_data_aggregation/aggregate_spatial_data/config.vsh.yaml‎
Lines changed: 39 additions & 0 deletions
@@ -72,6 +72,14 @@ viash run src/methods_qc_filter/basic_qc_filter/config.vsh.yaml -- \
   --input $OUT_DIR/spatial_corrected_counts.h5ad \
   --output $OUT_DIR/spatial_qc_col.h5ad
 
+# run aggregation of spatial data files
+viash run src/methods_data_aggregation/aggregate_spatial_data/config.vsh.yaml -- \
+  --input_raw_sp $OUT_DIR/raw_ist.zarr \
+  --input_transcript_assignments $OUT_DIR/transcript_assignments.zarr \
+  --input_qc_col $OUT_DIR/spatial_qc_col.h5ad \
+  --input_spatial_corrected_counts $OUT_DIR/spatial_corrected_counts.h5ad \
+  --output $OUT_DIR/spatial_processed_complete.zarr
+
 # run a metric
 viash run src/metrics/similarity/config.vsh.yaml -- \
   --input $OUT_DIR/spatial_corrected_counts.h5ad \
@@ -93,6 +101,7 @@ output_spatial_normalized_counts: !file spatial_normalized_counts.h5ad
 output_spatial_with_cell_types: !file spatial_with_cell_types.h5ad
 output_spatial_corrected_counts: !file spatial_corrected_counts.h5ad
 output_spatial_qc_col: !file spatial_qc_col.h5ad
+output_spatial_processed_complete: !file spatial_processed_complete.zarr
 output_score: !file score.h5ad
 EOL
 
 
@@ -20,6 +20,7 @@ cat > /tmp/params.yaml << HERE
 id: mouse_brain_combined
 input_sc: resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad
 input_sp: resources_test/task_ist_preprocessing/mouse_brain_combined/raw_ist.zarr
+save_spatial_data: false
 default_methods:
   - custom_segmentation
   - basic_transcript_assignment
 
@@ -0,0 +1,27 @@
+namespace: metrics
+info:
+  type: metric
+  type_info:
+    label: Quality Metric
+    summary: A quality metric for evaluating iST preprocessing methods
+    description: |
+      This metric evaluates the quality of the iST preprocessing without using a reference.
+arguments:
+  - name: --input
+    __merge__: file_spatial_processed_complete.yaml
+    required: true
+    direction: input
+  - name: --output
+    required: true
+    direction: output
+    __merge__: file_score.yaml
+
+test_resources:
+  - path: /resources_test/task_ist_preprocessing/mouse_brain_combined
+    dest: resources_test/task_ist_preprocessing/mouse_brain_combined
+  - type: python_script
+    path: /common/component_tests/run_and_check_output.py
+  - type: python_script
+    path: /common/component_tests/check_config.py
+
+
@@ -0,0 +1,35 @@
+info:
+  type: method
+  subtype: method_spatial_data_aggregation
+  type_info:
+    label: Spatial data aggregation
+    summary: Aggregate different files of the raw and processed spatial data to one zarr file.
+    description: |
+      This component aggregates different files of the raw and processed spatial data to one zarr file.
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: "--input_raw_sp"
+        __merge__: file_raw_ist.yaml
+        required: true
+        direction: input
+      - name: "--input_transcript_assignments"
+        __merge__: file_transcript_assignments.yaml
+        required: true
+        direction: input
+      - name: "--input_qc_col"
+        __merge__: file_spatial_qc_col.yaml
+        required: true
+        direction: input
+      - name: "--input_spatial_corrected_counts"
+        __merge__: file_spatial_corrected_counts.yaml
+        required: true
+        direction: input
+  - name: Outputs
+    arguments:
+      - name: "--output"
+        __merge__: file_spatial_processed_complete.yaml
+        required: true
+        direction: output
+        default: "$id/output.zarr"
@@ -0,0 +1,189 @@
+type: file
+example: "resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_processed_complete.zarr"
+label: "Spatial Processed Complete"
+summary: A spatial transcriptomics dataset that includes all raw and processed data.
+description: |
+  This dataset contains images, points with cell ids, and an anndata table with processed counts and cell type annotations.
+  
+info:
+  format:
+    type: spatialdata_zarr
+    images:
+      - type: object
+        name: image
+        description: The raw image data
+        required: true
+      - type: object
+        name: image_3D
+        description: The raw 3D image data
+        required: false
+      - type: object
+        name: he_image
+        description: H&E image data
+        required: false 
+    points:
+      - type: dataframe
+        name: transcripts
+        description: Point cloud data of transcripts
+        required: true
+        columns:
+          - type: float
+            name: "x"
+            required: true
+            description: x-coordinate of the point
+          - type: float
+            name: "y"
+            required: true
+            description: y-coordinate of the point
+          - type: float
+            name: "z"
+            required: false
+            description: z-coordinate of the point
+          - type: categorical
+            name: feature_name
+            required: true
+            description: Name of the feature
+          - type: integer
+            name: "cell_id"
+            required: true
+            description: Unique identifier of the cell
+          - type: long
+            name: transcript_id
+            required: true
+            description: Unique identifier of the transcript
+    shapes:
+      - type: dataframe
+        name: "cell_boundaries"
+        description: Cell boundaries
+        required: false
+        columns:
+          - type: object
+            name: "geometry"
+            required: true
+            description: Geometry of the cell boundary
+    tables:
+      - type: anndata
+        name: "counts"
+        description: Counts of the transcripts
+        required: true
+        layers: 
+          - type: integer
+            name: counts
+            description: Raw counts
+            required: true
+          - type: integer
+            name: normalized
+            description: Normalized counts
+            required: true
+          - type: double
+            name: normalized_uncorrected
+            description: Uncorrected normalized expression
+            required: false
+        obs:
+          - type: string
+            name: cell_id
+            description: Unique identifier for the cell (from assignment step)
+            required: true
+          - type: string
+            name: centroid_x
+            description: X coordinate of the cell
+            required: true
+          - type: string
+            name: centroid_y
+            description: Y coordinate of the cell
+            required: true
+          - type: string
+            name: centroid_z
+            description: Z coordinate of the cell
+            required: false
+          - type: string
+            name: n_counts
+            description: Number of counts in the cell
+            required: true
+          - type: string
+            name: n_genes
+            description: Number of genes in the cell
+            required: true
+          - type: string
+            name: volume
+            description: Volume of the cell
+            required: true
+          - type: string
+            name: cell_type
+            description: Cell type of the cell
+            required: true
+        var:
+          - type: string
+            name: gene_name
+            description: Name of the gene
+            required: true
+          - type: string
+            name: n_counts
+            description: Number of counts of the gene
+            required: true
+          - type: string
+            name: n_cells
+            description: Number of cells expressing the gene
+            required: true
+      - type: anndata
+        name: "metadata"
+        description: Metadata of spatial dataset
+        required: true
+        uns:
+          - type: string
+            name: dataset_id
+            required: true
+            description: A unique identifier for the dataset
+          - type: string
+            name: dataset_name
+            required: true
+            description: A human-readable name for the dataset
+          - type: string
+            name: dataset_url
+            required: true
+            description: Link to the original source of the dataset
+          - type: string
+            name: dataset_reference
+            required: true
+            description: Bibtex reference of the paper in which the dataset was published
+          - type: string
+            name: dataset_summary
+            required: true
+            description: Short description of the dataset
+          - type: string
+            name: dataset_description
+            required: true
+            description: Long description of the dataset
+          - type: string
+            name: dataset_organism
+            required: true
+            description: The organism of the sample in the dataset
+          - type: string
+            name: segmentation_id
+            required: true
+            multiple: true
+            description: A unique identifier for the segmentation
+        obs:
+          - type: string
+            name: cell_id
+            required: true
+            description: A unique identifier for the cell
+        var:
+          - type: string
+            name: gene_ids
+            required: true
+            description: Unique identifier for the gene
+          - type: string
+            name: feature_types
+            required: true
+            description: Type of the feature
+        obsm:
+          - type: double
+            name: spatial
+            required: true
+            description: Spatial coordinates of the cell
+    coordinate_systems:
+      - type: object
+        name: global
+        description: Coordinate system of the replicate
+        required: true
@@ -0,0 +1,39 @@
+__merge__: /src/api/comp_spatial_data_aggregation.yaml
+
+name: aggregate_spatial_data
+namespace: methods_data_aggregation
+label: "Spatial data aggregation"
+summary: "Aggregate different files of the raw and processed spatial data to one zarr file."
+description: |
+  This component aggregates different files of the raw and processed spatial data to one zarr file.
+
+links:
+  documentation: "https://github.com/openproblems-bio/task_ist_preprocessing"
+  repository: "https://github.com/openproblems-bio/task_ist_preprocessing"
+references:
+  doi: "10.1101/2023.02.13.528102"
+
+resources:
+  - type: python_script
+    path: script.py
+
+engines:
+  - type: docker
+    image: openproblems/base_python:1
+    __merge__: 
+      - /src/base/setup_spatialdata_partial.yaml
+  - type: native
+
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [ midtime, lowcpu, lowmem ]
+
+test_resources:
+  - path: /resources_test/task_ist_preprocessing/mouse_brain_combined
+    dest: resources_test/task_ist_preprocessing/mouse_brain_combined
+  - type: python_script
+    path: /common/component_tests/run_and_check_output.py
+  - type: python_script
+    path: /common/component_tests/check_config.py