From 37b757d635ac5421b2a100af25be908db2725ae6 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sat, 23 May 2026 00:09:19 +0800 Subject: [PATCH 1/2] Add gcta/makebksparse module --- .../nf-core/gcta/makebksparse/environment.yml | 7 + modules/nf-core/gcta/makebksparse/main.nf | 41 ++++++ modules/nf-core/gcta/makebksparse/meta.yml | 76 +++++++++++ .../gcta/makebksparse/tests/main.nf.test | 124 ++++++++++++++++++ .../gcta/makebksparse/tests/main.nf.test.snap | 78 +++++++++++ 5 files changed, 326 insertions(+) create mode 100644 modules/nf-core/gcta/makebksparse/environment.yml create mode 100644 modules/nf-core/gcta/makebksparse/main.nf create mode 100644 modules/nf-core/gcta/makebksparse/meta.yml create mode 100644 modules/nf-core/gcta/makebksparse/tests/main.nf.test create mode 100644 modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap diff --git a/modules/nf-core/gcta/makebksparse/environment.yml b/modules/nf-core/gcta/makebksparse/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/makebksparse/main.nf b/modules/nf-core/gcta/makebksparse/main.nf new file mode 100644 index 000000000000..6e266b148d4f --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/main.nf @@ -0,0 +1,41 @@ +process GCTA_MAKEBKSPARSE { + tag "${meta.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data' + : 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}" + + input: + tuple val(meta), path(grm_files) + val cutoff + + output: + tuple val(meta), path("*_sp.grm.*"), emit: sparse_grm_files + tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def dense_prefix = meta.id + meta = meta + [id: "${prefix}_sp"] + """ + gcta \\ + --grm ${dense_prefix} \\ + --make-bK-sparse ${cutoff} \\ + --out ${prefix}_sp \\ + --thread-num ${task.cpus} \\ + ${args} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + meta = meta + [id: "${prefix}_sp"] + """ + touch ${prefix}_sp.grm.id + touch ${prefix}_sp.grm.sp + """ +} diff --git a/modules/nf-core/gcta/makebksparse/meta.yml b/modules/nf-core/gcta/makebksparse/meta.yml new file mode 100644 index 000000000000..7a8f8e206a51 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/meta.yml @@ -0,0 +1,76 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_makebksparse" +description: Create a sparse GRM from a dense GRM for downstream fastGWA analyses +keywords: + - gcta + - genome-wide complex trait analysis + - grm + - genetic relationship matrix + - sparse + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" + licence: ["GPL-3.0-only"] + identifier: "biotools:gcta" + +input: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + `meta.id` is required and is the dense GRM basename contract used by `gcta --grm`. + Input files must therefore be staged as `.grm.id`, `.grm.bin`, and `.grm.N.bin`. + - grm_files: + type: file + description: Dense GRM sidecar files + pattern: "*.grm.{id,bin,N.bin}" + ontologies: [] + - cutoff: + type: float + description: Sparse GRM cutoff passed to `--make-bK-sparse` + +output: + sparse_grm_files: + - - meta: + type: map + description: | + Groovy map containing sparse GRM metadata + e.g. `[ id:'plink_simulated_sp' ]` + `meta.id` is set to the sparse GRM basename emitted by this module and can be used downstream as `--grm-sparse`. + - "*_sp.grm.*": + type: file + description: Sparse GRM sidecar files + pattern: "*_sp.grm.{id,sp}" + ontologies: [] + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": + type: eval + description: The command used to retrieve the GCTA version + +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": + type: eval + description: The command used to retrieve the GCTA version + +authors: + - "@lyh970817" +maintainers: + - "@lyh970817" diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test b/modules/nf-core/gcta/makebksparse/tests/main.nf.test new file mode 100644 index 000000000000..672dd9f4ce86 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test @@ -0,0 +1,124 @@ +nextflow_process { + + name "Test Process GCTA_MAKEBKSPARSE" + script "../main.nf" + process "GCTA_MAKEBKSPARSE" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/makebksparse" + tag "gcta/makegrm" + + setup { + run("GCTA_MAKEGRM", alias: "GCTA_MAKEGRM_CONTRACT") { + script "../../makegrm/main.nf" + process { + """ + file('contract_dense.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'contract_dense' ], + file('contract_dense.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + """ + } + } + + run("GCTA_MAKEGRM", alias: "GCTA_MAKEGRM_STUB") { + script "../../makegrm/main.nf" + process { + """ + file('stub_dense.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'stub_dense' ], + file('stub_dense.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + """ + } + } + } + + test("homo_sapiens popgen - create sparse GRM") { + when { + process { + """ + input[0] = GCTA_MAKEGRM_CONTRACT.out.grm_files + input[1] = Channel.value(0.05) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.sparse_grm_files.size() == 1 }, + { assert process.out.sparse_grm_files.get(0).get(0).id == "contract_dense_sp" }, + { assert snapshot(process.out.sparse_grm_files).match("sparse_grm_files") }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("versions") } + ) + } + } + + test("homo_sapiens popgen - fail when meta id does not match dense GRM basename") { + when { + process { + """ + input[0] = GCTA_MAKEGRM_CONTRACT.out.grm_files.map { meta, grm_files -> + [[ id:'contract_dense_mismatch' ], grm_files] + } + input[1] = Channel.value(0.05) + """ + } + } + + then { + assertAll( + { assert !process.success }, + { assert process.exitStatus != 0 } + ) + } + } + + test("homo_sapiens popgen - create sparse GRM - stub") { + options "-stub" + + when { + process { + """ + input[0] = GCTA_MAKEGRM_STUB.out.grm_files + input[1] = Channel.value(0.05) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.sparse_grm_files.size() == 1 }, + { assert process.out.sparse_grm_files.get(0).get(0).id == "stub_dense_sp" }, + { assert snapshot(process.out.sparse_grm_files).match("stub_sparse_grm_files") }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("stub_versions") } + ) + } + } +} diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap new file mode 100644 index 000000000000..964f0b13108b --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap @@ -0,0 +1,78 @@ +{ + "stub_sparse_grm_files": { + "content": [ + [ + [ + { + "id": "stub_dense_sp" + }, + [ + "stub_dense_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "stub_dense_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T21:12:21.136559698" + }, + "versions": { + "content": [ + { + "versions_gcta": [ + [ + "GCTA_MAKEBKSPARSE", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-21T00:30:38.045354436" + }, + "sparse_grm_files": { + "content": [ + [ + [ + { + "id": "contract_dense_sp" + }, + [ + "contract_dense_sp.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", + "contract_dense_sp.grm.sp:md5,1b78fe4b14c8690943d7687dd22ba85a" + ] + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T21:09:25.501833656" + }, + "stub_versions": { + "content": [ + { + "versions_gcta": [ + [ + "GCTA_MAKEBKSPARSE", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-21T00:30:48.775770627" + } +} From 65986b92b7fd2aaa2f2ed70c2441d18172021cef Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Tue, 26 May 2026 22:18:44 +0800 Subject: [PATCH 2/2] Use sanitized snapshots for gcta/makebksparse tests --- .../gcta/makebksparse/tests/main.nf.test | 6 +- .../gcta/makebksparse/tests/main.nf.test.snap | 66 +++++++------------ 2 files changed, 26 insertions(+), 46 deletions(-) diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test b/modules/nf-core/gcta/makebksparse/tests/main.nf.test index 672dd9f4ce86..6e6eaf5bf303 100644 --- a/modules/nf-core/gcta/makebksparse/tests/main.nf.test +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test @@ -73,8 +73,7 @@ nextflow_process { { assert process.success }, { assert process.out.sparse_grm_files.size() == 1 }, { assert process.out.sparse_grm_files.get(0).get(0).id == "contract_dense_sp" }, - { assert snapshot(process.out.sparse_grm_files).match("sparse_grm_files") }, - { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("versions") } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } } @@ -116,8 +115,7 @@ nextflow_process { { assert process.success }, { assert process.out.sparse_grm_files.size() == 1 }, { assert process.out.sparse_grm_files.get(0).get(0).id == "stub_dense_sp" }, - { assert snapshot(process.out.sparse_grm_files).match("stub_sparse_grm_files") }, - { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("stub_versions") } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } } diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap index 964f0b13108b..7bee50c5cfe5 100644 --- a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap @@ -1,27 +1,18 @@ { - "stub_sparse_grm_files": { + "homo_sapiens popgen - create sparse GRM": { "content": [ - [ - [ - { - "id": "stub_dense_sp" - }, + { + "sparse_grm_files": [ [ - "stub_dense_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", - "stub_dense_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" + { + "id": "contract_dense_sp" + }, + [ + "contract_dense_sp.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", + "contract_dense_sp.grm.sp:md5,1b78fe4b14c8690943d7687dd22ba85a" + ] ] - ] - ] - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.4" - }, - "timestamp": "2026-05-15T21:12:21.136559698" - }, - "versions": { - "content": [ - { + ], "versions_gcta": [ [ "GCTA_MAKEBKSPARSE", @@ -35,31 +26,22 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-21T00:30:38.045354436" - }, - "sparse_grm_files": { - "content": [ - [ - [ - { - "id": "contract_dense_sp" - }, - [ - "contract_dense_sp.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", - "contract_dense_sp.grm.sp:md5,1b78fe4b14c8690943d7687dd22ba85a" - ] - ] - ] - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.4" - }, "timestamp": "2026-05-15T21:09:25.501833656" }, - "stub_versions": { + "homo_sapiens popgen - create sparse GRM - stub": { "content": [ { + "sparse_grm_files": [ + [ + { + "id": "stub_dense_sp" + }, + [ + "stub_dense_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "stub_dense_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], "versions_gcta": [ [ "GCTA_MAKEBKSPARSE", @@ -73,6 +55,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-21T00:30:48.775770627" + "timestamp": "2026-05-15T21:12:21.136559698" } }