Update the way we provide the benchmark config (#99)

LouisK92 · web-flow · commit bc7cc8c93134 · 2025-10-29T15:51:53.000+01:00
diff --git a/scripts/create_resources/combine/process_datasets.sh b/scripts/create_resources/combine/process_datasets.sh
@@ -326,8 +326,6 @@ param_list:
     dataset_description: "Spatial data: Bruker CosMx Human Lung Cancer Lung13; Single cell data: 2024Zuani_human_nsclc_sc"
     dataset_organism: "homo_sapiens"
 
-
-
 output_sc: "\$id/output_sc.h5ad"
 output_sp: "\$id/output_sp.zarr"
 output_state: "\$id/state.yaml"
diff --git a/scripts/run_benchmark/run_full_local.sh b/scripts/run_benchmark/run_full_local.sh
@@ -20,14 +20,69 @@ echo "  Make sure to run 'scripts/project/build_all_docker_containers.sh'!"
 RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
 publish_dir="resources/results/${RUN_ID}"
 
+cat > /tmp/params_settings.yaml << HERE
+default_methods:
+  - custom_segmentation
+  - basic_transcript_assignment
+  - basic_count_aggregation
+  - basic_qc_filter
+  - alpha_shapes
+  - normalize_by_volume
+  - ssam
+  - no_correction
+segmentation_methods:
+  - custom_segmentation
+  # - cellpose
+  - binning
+  # - stardist
+  # - watershed
+transcript_assignment_methods:
+  - basic_transcript_assignment
+  #- baysor
+  # - clustermap
+  # - pciseq
+  # - comseg
+  # - proseg
+count_aggregation_methods:
+  - basic_count_aggregation
+qc_filtering_methods:
+  - basic_qc_filter
+volume_calculation_methods:
+  - alpha_shapes
+normalization_methods:
+  - normalize_by_volume
+  # - normalize_by_counts
+  # - spanorm
+celltype_annotation_methods:
+  - ssam
+  # - tacco
+  # - moscot
+expression_correction_methods:
+  - no_correction
+  # - gene_efficiency_correction
+  # - resolvi_correction
+method_parameters_yaml: /tmp/method_params.yaml
+HERE
+
 # write the parameters to file
 cat > /tmp/params.yaml << HERE
 input_states: resources/datasets/**/state.yaml
 rename_keys: 'input_sc:output_sc;input_sp:output_sp'
+save_spatial_data: false
+settings: '$(yq -o json /tmp/params_settings.yaml | jq -c .)'
 output_state: "state.yaml"
 publish_dir: "$publish_dir"
 HERE
 
+cat > /tmp/method_params.yaml << HERE
+parameters:
+  binning:
+    default:
+      bin_size: 30
+    sweep:
+      bin_size: [20, 30, 40]
+HERE
+
 # run the benchmark
 nextflow run . \
   -main-script target/nextflow/workflows/run_benchmark/main.nf \
diff --git a/scripts/run_benchmark/run_full_seqeracloud.sh b/scripts/run_benchmark/run_full_seqeracloud.sh
@@ -12,25 +12,7 @@ set -e
 RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
 publish_dir="s3://openproblems-data/resources/task_ist_preprocessing/results/${RUN_ID}"
 
-# input_dir="s3://openproblems-data/resources/task_ist_preprocessing/datasets"
-# cat > /tmp/params.yaml << HERE
-# param_list:
-
-#   - id: "mouse_brain_combined/rep1"
-#     input_sp: "$input_dir/mouse_brain_combined/rep1/output_sp.zarr"
-#     input_sc: "$input_dir/mouse_brain_combined/rep1/output_sc.h5ad"
-
-# output_sc: "\$id/output_sc.h5ad"
-# output_sp: "\$id/output_sp.zarr"
-# output_state: "\$id/state.yaml"
-# publish_dir: "$publish_dir"
-# HERE
-
-# write the parameters to file
-cat > /tmp/params.yaml << HERE
-input_states: s3://openproblems-data/resources/task_ist_preprocessing/datasets/**/state.yaml
-rename_keys: 'input_sc:output_sc;input_sp:output_sp'
-save_spatial_data: false
+cat > /tmp/params_settings.yaml << HERE
 default_methods:
   - custom_segmentation
   - basic_transcript_assignment
@@ -49,7 +31,7 @@ segmentation_methods:
 transcript_assignment_methods:
   - basic_transcript_assignment
   - baysor
-  # - clustermap
+  - clustermap
   - pciseq
   - comseg
   - proseg
@@ -71,10 +53,30 @@ expression_correction_methods:
   - no_correction
   - gene_efficiency_correction
   - resolvi_correction
+method_parameters_yaml: /tmp/method_params.yaml
+HERE
+
+# write the parameters to file
+cat > /tmp/params.yaml << HERE
+input_states: s3://openproblems-data/resources/task_ist_preprocessing/datasets/**/state.yaml
+rename_keys: 'input_sc:output_sc;input_sp:output_sp'
+save_spatial_data: false
+settings: '$(yq -o json /tmp/params_settings.yaml | jq -c .)'
 output_state: "state.yaml"
 publish_dir: "$publish_dir"
 HERE
 
+# NOTE: this file needs to be made available on the seqera cloud workspace and the 
+#       path needs to be added above (method_parameters_yaml)
+cat > /tmp/method_params.yaml << HERE
+parameters:
+  binning:
+    default:
+      bin_size: 30
+    sweep:
+      bin_size: [20, 30, 40]
+HERE
+
 tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \
   --revision build/main \
   --pull-latest \
diff --git a/scripts/run_benchmark/run_test_local.sh b/scripts/run_benchmark/run_test_local.sh
@@ -15,12 +15,7 @@ echo "  Make sure to run 'scripts/project/build_all_docker_containers.sh'!"
 RUN_ID="testrun_$(date +%Y-%m-%d_%H-%M-%S)"
 publish_dir="temp/results/${RUN_ID}"
 
-# Write the parameters to file
-cat > /tmp/params.yaml << HERE
-id: mouse_brain_combined
-input_sc: resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad
-input_sp: resources_test/task_ist_preprocessing/mouse_brain_combined/raw_ist.zarr
-save_spatial_data: false
+cat > /tmp/params_settings.yaml << HERE
 default_methods:
   - custom_segmentation
   - basic_transcript_assignment
@@ -62,10 +57,30 @@ expression_correction_methods:
   # - gene_efficiency_correction
   # - resolvi_correction
 method_parameters_yaml: /tmp/method_params.yaml
+HERE
+
+# Write the parameters to file (input_states version, NOTE: enable `-entry auto` for this)
+cat > /tmp/params.yaml << HERE
+input_states: resources_test/task_ist_preprocessing/**/state.yaml
+rename_keys: 'input_sc:output_sc;input_sp:output_sp'
+save_spatial_data: false
+settings: '$(yq -o json /tmp/params_settings.yaml | jq -c .)'
 output_state: "state.yaml"
 publish_dir: "$publish_dir"
 HERE
 
+# #Write the parameters to file (specific id version, NOTE: disable `-entry auto` for this)
+# cat > /tmp/params.yaml << HERE
+# id: mouse_brain_combined
+# input_sc: resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad
+# input_sp: resources_test/task_ist_preprocessing/mouse_brain_combined/raw_ist.zarr
+# save_spatial_data: true
+# $(cat /tmp/params_settings.yaml)
+# output_state: "state.yaml"
+# publish_dir: "$publish_dir"
+# HERE
+
+
 cat > /tmp/method_params.yaml << HERE
 parameters:
   binning:
@@ -79,5 +94,6 @@ nextflow run . \
   -main-script target/nextflow/workflows/run_benchmark/main.nf \
   -profile docker \
   -resume \
+  -entry auto \
   -c common/nextflow_helpers/labels_ci.config \
   -params-file /tmp/params.yaml
diff --git a/scripts/run_benchmark/run_test_seqeracloud.sh b/scripts/run_benchmark/run_test_seqeracloud.sh
@@ -11,26 +11,95 @@ set -e
 resources_test_s3=s3://openproblems-data/resources_test/task_ist_preprocessing
 publish_dir_s3="s3://openproblems-nextflow/temp/results/$(date +%Y-%m-%d_%H-%M-%S)"
 
-# write the parameters to file
+cat > /tmp/params_settings.yaml << HERE
+default_methods:
+  - custom_segmentation
+  - basic_transcript_assignment
+  - basic_count_aggregation
+  - basic_qc_filter
+  - alpha_shapes
+  - normalize_by_volume
+  - ssam
+  - no_correction
+segmentation_methods:
+  - custom_segmentation
+  - cellpose
+  - binning
+  - stardist
+  - watershed
+transcript_assignment_methods:
+  - basic_transcript_assignment
+  - baysor
+  - clustermap
+  - pciseq
+  - comseg
+  - proseg
+count_aggregation_methods:
+  - basic_count_aggregation
+qc_filtering_methods:
+  - basic_qc_filter
+volume_calculation_methods:
+  - alpha_shapes
+normalization_methods:
+  - normalize_by_volume
+  - normalize_by_counts
+  - spanorm
+celltype_annotation_methods:
+  - ssam
+  - tacco
+  - moscot
+expression_correction_methods:
+  - no_correction
+  - gene_efficiency_correction
+  - resolvi_correction
+#method_parameters_yaml: /tmp/method_params.yaml
+HERE
+
+# Write the parameters to file (input_states version, NOTE: enable `-entry_name auto` for this)
 cat > /tmp/params.yaml << HERE
-id: mouse_brain_combined
-input_sc: $resources_test_s3/mouse_brain_combined/scrnaseq_reference.h5ad
-input_sp: $resources_test_s3/mouse_brain_combined/raw_ist.zarr
+input_states: $resources_test_s3/**/state.yaml
+rename_keys: 'input_sc:output_sc;input_sp:output_sp'
+save_spatial_data: false
+settings: '$(yq -o json /tmp/params_settings.yaml | jq -c .)'
 output_state: "state.yaml"
-publish_dir: $publish_dir_s3
+publish_dir: "$publish_dir_s3"
+HERE
+
+# # write the parameters to file (specific id version, NOTE: disable `-entry_name auto` for this)
+# cat > /tmp/params.yaml << HERE
+# id: mouse_brain_combined
+# input_sc: $resources_test_s3/mouse_brain_combined/scrnaseq_reference.h5ad
+# input_sp: $resources_test_s3/mouse_brain_combined/raw_ist.zarr
+# save_spatial_data: false
+# settings: '$(yq -o json /tmp/params_settings.yaml | jq -c .)'
+# output_state: "state.yaml"
+# publish_dir: $publish_dir_s3
+# HERE
+
+# NOTE: this file needs to be made available on the seqera cloud workspace and the 
+#       path needs to be added above (method_parameters_yaml)
+cat > /tmp/method_params.yaml << HERE
+parameters:
+  binning:
+    default:
+      bin_size: 30
+    sweep:
+      bin_size: [20, 30, 40]
 HERE
 
+
 tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \
   --revision build/main \
   --pull-latest \
   --main-script target/nextflow/workflows/run_benchmark/main.nf \
   --workspace 53907369739130 \
   --params-file /tmp/params.yaml \
+  --entry-name auto \
   --config common/nextflow_helpers/labels_tw.config \
   --labels task_ist_preprocessing,test
 
-aws s3 sync \
-  s3://openproblems-nextflow/temp/results \
-  temp_results \
-  --profile op \
-  --dryrun
+# aws s3 sync \
+#   s3://openproblems-nextflow/temp/results \
+#   temp_results \
+#   --profile op \
+#   --dryrun