From 38c559b2a8b9a8cda62fa215521f581eb979c773 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 9 Aug 2023 13:15:52 +0000 Subject: [PATCH] Add GATK's CreateReadCountPanelOfNormals and DenoiseReadCounts (#3709) * pon * denoisereadcounts * add stub * update version string * review suggestions * Update modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf [skip ci] Co-authored-by: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com> * Update modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf [skip ci] Co-authored-by: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com> * Update modules/nf-core/gatk4/denoisereadcounts/main.nf [skip ci] Co-authored-by: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com> * Update modules/nf-core/gatk4/denoisereadcounts/main.nf Co-authored-by: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com> --------- Co-authored-by: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com> --- .../createreadcountpanelofnormals/main.nf | 54 +++++++++++++++++ .../createreadcountpanelofnormals/meta.yml | 46 +++++++++++++++ .../nf-core/gatk4/denoisereadcounts/main.nf | 59 +++++++++++++++++++ .../nf-core/gatk4/denoisereadcounts/meta.yml | 59 +++++++++++++++++++ tests/config/pytest_modules.yml | 8 +++ .../createreadcountpanelofnormals/main.nf | 37 ++++++++++++ .../nextflow.config | 13 ++++ .../createreadcountpanelofnormals/test.yml | 17 ++++++ .../nf-core/gatk4/denoisereadcounts/main.nf | 40 +++++++++++++ .../gatk4/denoisereadcounts/nextflow.config | 13 ++++ .../nf-core/gatk4/denoisereadcounts/test.yml | 19 ++++++ 11 files changed, 365 insertions(+) create mode 100644 modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf create mode 100644 modules/nf-core/gatk4/createreadcountpanelofnormals/meta.yml create mode 100644 modules/nf-core/gatk4/denoisereadcounts/main.nf create mode 100644 modules/nf-core/gatk4/denoisereadcounts/meta.yml create mode 100644 tests/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf create mode 100644 tests/modules/nf-core/gatk4/createreadcountpanelofnormals/nextflow.config create mode 100644 tests/modules/nf-core/gatk4/createreadcountpanelofnormals/test.yml create mode 100644 tests/modules/nf-core/gatk4/denoisereadcounts/main.nf create mode 100644 tests/modules/nf-core/gatk4/denoisereadcounts/nextflow.config create mode 100644 tests/modules/nf-core/gatk4/denoisereadcounts/test.yml diff --git a/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf b/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf new file mode 100644 index 00000000000..1f30dceee39 --- /dev/null +++ b/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf @@ -0,0 +1,54 @@ +process GATK4_CREATEREADCOUNTPANELOFNORMALS { + tag "$meta.id" + label 'process_single' + + conda "bioconda::gatk4=4.4.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(counts) + + output: + tuple val(meta), path("*.hdf5"), emit: pon + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input_list = counts.collect(){"--input $it"}.join(" ") + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK CreateReadCountPanelOfNormals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M" CreateReadCountPanelOfNormals \\ + ${args} \\ + ${input_list} \\ + --output ${prefix}.hdf5 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.hdf5 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/createreadcountpanelofnormals/meta.yml b/modules/nf-core/gatk4/createreadcountpanelofnormals/meta.yml new file mode 100644 index 00000000000..f86ac768952 --- /dev/null +++ b/modules/nf-core/gatk4/createreadcountpanelofnormals/meta.yml @@ -0,0 +1,46 @@ +name: "gatk4_createreadcountpanelofnormals" +description: Creates a panel of normals (PoN) for read-count denoising given the read counts for samples in the panel. +keywords: + - gatk4 + - createreadcountpanelofnormals + - panelofnormals +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + tool_dev_url: "https://github.com/broadinstitute/gatk" + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - counts: + type: file + description: Read counts in hdf5 or tsv format. + pattern: "*.{hdf5,tsv}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - pon: + type: file + description: Panel-of-normals file. + pattern: "*.{hdf5}" + +authors: + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/denoisereadcounts/main.nf b/modules/nf-core/gatk4/denoisereadcounts/main.nf new file mode 100644 index 00000000000..e60aeda1925 --- /dev/null +++ b/modules/nf-core/gatk4/denoisereadcounts/main.nf @@ -0,0 +1,59 @@ +process GATK4_DENOISEREADCOUNTS { + tag "$meta.id" + label 'process_single' + + conda "bioconda::gatk4=4.4.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(counts) + tuple val(meta2), path(pon) + + output: + tuple val(meta), path("*_standardizedCR.tsv"), emit: standardized + tuple val(meta), path("*_denoisedCR.tsv") , emit: denoised + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK DenoiseReadCounts] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M" DenoiseReadCounts \\ + ${args} \\ + --tmp-dir . \\ + --input ${counts} \\ + --count-panel-of-normals ${pon} \\ + --standardized-copy-ratios ${prefix}_standardizedCR.tsv \\ + --denoised-copy-ratios ${prefix}_denoisedCR.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_standardizedCR.tsv + touch ${prefix}_denoisedCR.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/denoisereadcounts/meta.yml b/modules/nf-core/gatk4/denoisereadcounts/meta.yml new file mode 100644 index 00000000000..0bb2f389d00 --- /dev/null +++ b/modules/nf-core/gatk4/denoisereadcounts/meta.yml @@ -0,0 +1,59 @@ +name: "gatk4_denoisereadcounts" +description: Denoises read counts to produce denoised copy ratios +keywords: + - gatk4 + - denoisereadcounts + - copyratios +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + tool_dev_url: "https://github.com/broadinstitute/gatk" + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - counts: + type: file + description: Read counts in hdf5 or tsv format. + pattern: "*.{hdf5,tsv}" + - pon: + type: file + description: Panel of normals file hdf5 or tsv format. + pattern: "*.{hdf5}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - standardized: + type: file + description: Standardized copy ratios file. + pattern: "*.{tsv}" + - denoised: + type: file + description: Denoised copy ratios file + pattern: "*.{tsv}" + +authors: + - "@ramprasadn" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 0144bc892ef..b7cf7e17e6f 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1363,6 +1363,10 @@ gatk4/condensedepthevidence: - modules/nf-core/gatk4/condensedepthevidence/** - tests/modules/nf-core/gatk4/condensedepthevidence/** +gatk4/createreadcountpanelofnormals: + - modules/nf-core/gatk4/createreadcountpanelofnormals/** + - tests/modules/nf-core/gatk4/createreadcountpanelofnormals/** + gatk4/createsequencedictionary: - modules/nf-core/gatk4/createsequencedictionary/** - tests/modules/nf-core/gatk4/createsequencedictionary/** @@ -1371,6 +1375,10 @@ gatk4/createsomaticpanelofnormals: - modules/nf-core/gatk4/createsomaticpanelofnormals/** - tests/modules/nf-core/gatk4/createsomaticpanelofnormals/** +gatk4/denoisereadcounts: + - modules/nf-core/gatk4/denoisereadcounts/** + - tests/modules/nf-core/gatk4/denoisereadcounts/** + gatk4/determinegermlinecontigploidy: - modules/nf-core/gatk4/determinegermlinecontigploidy/** - tests/modules/nf-core/gatk4/determinegermlinecontigploidy/** diff --git a/tests/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf b/tests/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf new file mode 100644 index 00000000000..f45144db259 --- /dev/null +++ b/tests/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf @@ -0,0 +1,37 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK4_CREATEREADCOUNTPANELOFNORMALS } from '../../../../../modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf' +include { GATK4_COLLECTREADCOUNTS } from '../../../../../modules/nf-core/gatk4/collectreadcounts/main.nf' +include { GATK4_PREPROCESSINTERVALS } from '../../../../../modules/nf-core/gatk4/preprocessintervals/main.nf' + +workflow test_gatk4_createreadcountpanelofnormals { + + fasta = Channel.of([ [ id:'test' ], file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]).collect() + fai = Channel.of([ [ id:'test' ], file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)]).collect() + dict = Channel.of([ [ id:'test' ], file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)]).collect() + + GATK4_PREPROCESSINTERVALS ( fasta, fai, dict, [[],[]], [[],[]]).interval_list + .map {meta,list -> list} + .set {ch_intervals} + + input = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + ], + [ + [ id:'test2', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam_bai'], checkIfExists: true), + ]).combine(ch_intervals) + + GATK4_COLLECTREADCOUNTS ( input, fasta, fai, dict ) + + GATK4_CREATEREADCOUNTPANELOFNORMALS ( + GATK4_COLLECTREADCOUNTS.out.tsv + .map({ meta, tsv -> [ [id:'test'], tsv ] }) + .groupTuple() + ) +} diff --git a/tests/modules/nf-core/gatk4/createreadcountpanelofnormals/nextflow.config b/tests/modules/nf-core/gatk4/createreadcountpanelofnormals/nextflow.config new file mode 100644 index 00000000000..a8c5250b024 --- /dev/null +++ b/tests/modules/nf-core/gatk4/createreadcountpanelofnormals/nextflow.config @@ -0,0 +1,13 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: GATK4_COLLECTREADCOUNTS { + ext.args = "--format TSV --interval-merging-rule OVERLAPPING_ONLY" + } + + withName: GATK4_CREATEREADCOUNTPANELOFNORMALS { + ext.args = "--minimum-interval-median-percentile 1.0 --number-of-eigensamples 2" + } + +} diff --git a/tests/modules/nf-core/gatk4/createreadcountpanelofnormals/test.yml b/tests/modules/nf-core/gatk4/createreadcountpanelofnormals/test.yml new file mode 100644 index 00000000000..829079feaa5 --- /dev/null +++ b/tests/modules/nf-core/gatk4/createreadcountpanelofnormals/test.yml @@ -0,0 +1,17 @@ +- name: "gatk4 createreadcountpanelofnormals" + command: nextflow run ./tests/modules/nf-core/gatk4/createreadcountpanelofnormals -entry test_gatk4_createreadcountpanelofnormals -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/gatk4/createreadcountpanelofnormals/nextflow.config + tags: + - "gatk4" + - "gatk4/createreadcountpanelofnormals" + files: + - path: "output/gatk4/test.hdf5" + - path: "output/gatk4/versions.yml" + +- name: "gatk4 createreadcountpanelofnormals stub" + command: nextflow run ./tests/modules/nf-core/gatk4/createreadcountpanelofnormals -entry test_gatk4_createreadcountpanelofnormals -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/gatk4/createreadcountpanelofnormals/nextflow.config -stub + tags: + - "gatk4" + - "gatk4/createreadcountpanelofnormals" + files: + - path: "output/gatk4/test.hdf5" + - path: "output/gatk4/versions.yml" diff --git a/tests/modules/nf-core/gatk4/denoisereadcounts/main.nf b/tests/modules/nf-core/gatk4/denoisereadcounts/main.nf new file mode 100644 index 00000000000..e2bd1b69401 --- /dev/null +++ b/tests/modules/nf-core/gatk4/denoisereadcounts/main.nf @@ -0,0 +1,40 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK4_CREATEREADCOUNTPANELOFNORMALS } from '../../../../../modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf' +include { GATK4_COLLECTREADCOUNTS } from '../../../../../modules/nf-core/gatk4/collectreadcounts/main.nf' +include { GATK4_PREPROCESSINTERVALS } from '../../../../../modules/nf-core/gatk4/preprocessintervals/main.nf' +include { GATK4_DENOISEREADCOUNTS } from '../../../../../modules/nf-core/gatk4/denoisereadcounts/main.nf' + +workflow test_gatk4_denoisereadcounts { + + fasta = Channel.of([ [ id:'test' ], file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]).collect() + fai = Channel.of([ [ id:'test' ], file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)]).collect() + dict = Channel.of([ [ id:'test' ], file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)]).collect() + + GATK4_PREPROCESSINTERVALS ( fasta, fai, dict, [[],[]], [[],[]]).interval_list + .map {meta,list -> list} + .set {ch_intervals} + + input = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + ], + [ + [ id:'test2', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam_bai'], checkIfExists: true), + ]) + + GATK4_COLLECTREADCOUNTS ( input.combine(ch_intervals), fasta, fai, dict ) + + GATK4_CREATEREADCOUNTPANELOFNORMALS ( + GATK4_COLLECTREADCOUNTS.out.tsv + .map({ meta, tsv -> [ [id:'test'], tsv ] }) + .groupTuple() + ) + + GATK4_DENOISEREADCOUNTS ( GATK4_COLLECTREADCOUNTS.out.tsv.first(), GATK4_CREATEREADCOUNTPANELOFNORMALS.out.pon ) +} diff --git a/tests/modules/nf-core/gatk4/denoisereadcounts/nextflow.config b/tests/modules/nf-core/gatk4/denoisereadcounts/nextflow.config new file mode 100644 index 00000000000..a8c5250b024 --- /dev/null +++ b/tests/modules/nf-core/gatk4/denoisereadcounts/nextflow.config @@ -0,0 +1,13 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: GATK4_COLLECTREADCOUNTS { + ext.args = "--format TSV --interval-merging-rule OVERLAPPING_ONLY" + } + + withName: GATK4_CREATEREADCOUNTPANELOFNORMALS { + ext.args = "--minimum-interval-median-percentile 1.0 --number-of-eigensamples 2" + } + +} diff --git a/tests/modules/nf-core/gatk4/denoisereadcounts/test.yml b/tests/modules/nf-core/gatk4/denoisereadcounts/test.yml new file mode 100644 index 00000000000..c0fd4a85b7e --- /dev/null +++ b/tests/modules/nf-core/gatk4/denoisereadcounts/test.yml @@ -0,0 +1,19 @@ +- name: "gatk4 denoisereadcounts" + command: nextflow run ./tests/modules/nf-core/gatk4/denoisereadcounts -entry test_gatk4_denoisereadcounts -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/gatk4/denoisereadcounts/nextflow.config + tags: + - "gatk4" + - "gatk4/denoisereadcounts" + files: + - path: "output/gatk4/test_standardizedCR.tsv" + - path: "output/gatk4/test_denoisedCR.tsv" + - path: "output/gatk4/versions.yml" + +- name: "gatk4 denoisereadcounts stub" + command: nextflow run ./tests/modules/nf-core/gatk4/denoisereadcounts -entry test_gatk4_denoisereadcounts -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/gatk4/denoisereadcounts/nextflow.config -stub + tags: + - "gatk4" + - "gatk4/denoisereadcounts" + files: + - path: "output/gatk4/test_standardizedCR.tsv" + - path: "output/gatk4/test_denoisedCR.tsv" + - path: "output/gatk4/versions.yml"