diff --git a/modules/nf-core/gcta/makebksparse/environment.yml b/modules/nf-core/gcta/makebksparse/environment.yml new file mode 100644 index 00000000000..3e22ea7b9f2 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/makebksparse/main.nf b/modules/nf-core/gcta/makebksparse/main.nf new file mode 100644 index 00000000000..6e266b148d4 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/main.nf @@ -0,0 +1,41 @@ +process GCTA_MAKEBKSPARSE { + tag "${meta.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data' + : 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}" + + input: + tuple val(meta), path(grm_files) + val cutoff + + output: + tuple val(meta), path("*_sp.grm.*"), emit: sparse_grm_files + tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def dense_prefix = meta.id + meta = meta + [id: "${prefix}_sp"] + """ + gcta \\ + --grm ${dense_prefix} \\ + --make-bK-sparse ${cutoff} \\ + --out ${prefix}_sp \\ + --thread-num ${task.cpus} \\ + ${args} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + meta = meta + [id: "${prefix}_sp"] + """ + touch ${prefix}_sp.grm.id + touch ${prefix}_sp.grm.sp + """ +} diff --git a/modules/nf-core/gcta/makebksparse/meta.yml b/modules/nf-core/gcta/makebksparse/meta.yml new file mode 100644 index 00000000000..7a8f8e206a5 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/meta.yml @@ -0,0 +1,76 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_makebksparse" +description: Create a sparse GRM from a dense GRM for downstream fastGWA analyses +keywords: + - gcta + - genome-wide complex trait analysis + - grm + - genetic relationship matrix + - sparse + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" + licence: ["GPL-3.0-only"] + identifier: "biotools:gcta" + +input: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + `meta.id` is required and is the dense GRM basename contract used by `gcta --grm`. + Input files must therefore be staged as `.grm.id`, `.grm.bin`, and `.grm.N.bin`. + - grm_files: + type: file + description: Dense GRM sidecar files + pattern: "*.grm.{id,bin,N.bin}" + ontologies: [] + - cutoff: + type: float + description: Sparse GRM cutoff passed to `--make-bK-sparse` + +output: + sparse_grm_files: + - - meta: + type: map + description: | + Groovy map containing sparse GRM metadata + e.g. `[ id:'plink_simulated_sp' ]` + `meta.id` is set to the sparse GRM basename emitted by this module and can be used downstream as `--grm-sparse`. + - "*_sp.grm.*": + type: file + description: Sparse GRM sidecar files + pattern: "*_sp.grm.{id,sp}" + ontologies: [] + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": + type: eval + description: The command used to retrieve the GCTA version + +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": + type: eval + description: The command used to retrieve the GCTA version + +authors: + - "@lyh970817" +maintainers: + - "@lyh970817" diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test b/modules/nf-core/gcta/makebksparse/tests/main.nf.test new file mode 100644 index 00000000000..6e6eaf5bf30 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test @@ -0,0 +1,122 @@ +nextflow_process { + + name "Test Process GCTA_MAKEBKSPARSE" + script "../main.nf" + process "GCTA_MAKEBKSPARSE" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/makebksparse" + tag "gcta/makegrm" + + setup { + run("GCTA_MAKEGRM", alias: "GCTA_MAKEGRM_CONTRACT") { + script "../../makegrm/main.nf" + process { + """ + file('contract_dense.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'contract_dense' ], + file('contract_dense.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + """ + } + } + + run("GCTA_MAKEGRM", alias: "GCTA_MAKEGRM_STUB") { + script "../../makegrm/main.nf" + process { + """ + file('stub_dense.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'stub_dense' ], + file('stub_dense.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + """ + } + } + } + + test("homo_sapiens popgen - create sparse GRM") { + when { + process { + """ + input[0] = GCTA_MAKEGRM_CONTRACT.out.grm_files + input[1] = Channel.value(0.05) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.sparse_grm_files.size() == 1 }, + { assert process.out.sparse_grm_files.get(0).get(0).id == "contract_dense_sp" }, + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + } + + test("homo_sapiens popgen - fail when meta id does not match dense GRM basename") { + when { + process { + """ + input[0] = GCTA_MAKEGRM_CONTRACT.out.grm_files.map { meta, grm_files -> + [[ id:'contract_dense_mismatch' ], grm_files] + } + input[1] = Channel.value(0.05) + """ + } + } + + then { + assertAll( + { assert !process.success }, + { assert process.exitStatus != 0 } + ) + } + } + + test("homo_sapiens popgen - create sparse GRM - stub") { + options "-stub" + + when { + process { + """ + input[0] = GCTA_MAKEGRM_STUB.out.grm_files + input[1] = Channel.value(0.05) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.sparse_grm_files.size() == 1 }, + { assert process.out.sparse_grm_files.get(0).get(0).id == "stub_dense_sp" }, + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap new file mode 100644 index 00000000000..7bee50c5cfe --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap @@ -0,0 +1,60 @@ +{ + "homo_sapiens popgen - create sparse GRM": { + "content": [ + { + "sparse_grm_files": [ + [ + { + "id": "contract_dense_sp" + }, + [ + "contract_dense_sp.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", + "contract_dense_sp.grm.sp:md5,1b78fe4b14c8690943d7687dd22ba85a" + ] + ] + ], + "versions_gcta": [ + [ + "GCTA_MAKEBKSPARSE", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T21:09:25.501833656" + }, + "homo_sapiens popgen - create sparse GRM - stub": { + "content": [ + { + "sparse_grm_files": [ + [ + { + "id": "stub_dense_sp" + }, + [ + "stub_dense_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "stub_dense_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions_gcta": [ + [ + "GCTA_MAKEBKSPARSE", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T21:12:21.136559698" + } +}