Skip to content

Commit

Permalink
Add GATK's CreateReadCountPanelOfNormals and DenoiseReadCounts (nf-co…
Browse files Browse the repository at this point in the history
…re#3709)

* pon

* denoisereadcounts

* add stub

* update version string

* review suggestions

* Update modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf [skip ci]

Co-authored-by: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com>

* Update modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf [skip ci]

Co-authored-by: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com>

* Update modules/nf-core/gatk4/denoisereadcounts/main.nf [skip ci]

Co-authored-by: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com>

* Update modules/nf-core/gatk4/denoisereadcounts/main.nf

Co-authored-by: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com>

---------

Co-authored-by: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com>
  • Loading branch information
ramprasadn and nvnieuwk authored Aug 9, 2023
1 parent 2fe9363 commit 38c559b
Show file tree
Hide file tree
Showing 11 changed files with 365 additions and 0 deletions.
54 changes: 54 additions & 0 deletions modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
process GATK4_CREATEREADCOUNTPANELOFNORMALS {
tag "$meta.id"
label 'process_single'

conda "bioconda::gatk4=4.4.0.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0':
'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }"

input:
tuple val(meta), path(counts)

output:
tuple val(meta), path("*.hdf5"), emit: pon
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def input_list = counts.collect(){"--input $it"}.join(" ")

def avail_mem = 3072
if (!task.memory) {
log.info '[GATK CreateReadCountPanelOfNormals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = (task.memory.mega*0.8).intValue()
}
"""
gatk --java-options "-Xmx${avail_mem}M" CreateReadCountPanelOfNormals \\
${args} \\
${input_list} \\
--output ${prefix}.hdf5
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.hdf5
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
END_VERSIONS
"""
}
46 changes: 46 additions & 0 deletions modules/nf-core/gatk4/createreadcountpanelofnormals/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: "gatk4_createreadcountpanelofnormals"
description: Creates a panel of normals (PoN) for read-count denoising given the read counts for samples in the panel.
keywords:
- gatk4
- createreadcountpanelofnormals
- panelofnormals
tools:
- gatk4:
description: |
Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
with a primary focus on variant discovery and genotyping. Its powerful processing engine
and high-performance computing features make it capable of taking on projects of any size.
homepage: https://gatk.broadinstitute.org/hc/en-us
documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s
doi: 10.1158/1538-7445.AM2017-3590
tool_dev_url: "https://github.com/broadinstitute/gatk"
licence: ["Apache-2.0"]

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test', single_end:false ]`
- counts:
type: file
description: Read counts in hdf5 or tsv format.
pattern: "*.{hdf5,tsv}"

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test', single_end:false ]`
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- pon:
type: file
description: Panel-of-normals file.
pattern: "*.{hdf5}"

authors:
- "@ramprasadn"
59 changes: 59 additions & 0 deletions modules/nf-core/gatk4/denoisereadcounts/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
process GATK4_DENOISEREADCOUNTS {
tag "$meta.id"
label 'process_single'

conda "bioconda::gatk4=4.4.0.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0':
'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }"

input:
tuple val(meta), path(counts)
tuple val(meta2), path(pon)

output:
tuple val(meta), path("*_standardizedCR.tsv"), emit: standardized
tuple val(meta), path("*_denoisedCR.tsv") , emit: denoised
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

def avail_mem = 3072
if (!task.memory) {
log.info '[GATK DenoiseReadCounts] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = (task.memory.mega*0.8).intValue()
}
"""
gatk --java-options "-Xmx${avail_mem}M" DenoiseReadCounts \\
${args} \\
--tmp-dir . \\
--input ${counts} \\
--count-panel-of-normals ${pon} \\
--standardized-copy-ratios ${prefix}_standardizedCR.tsv \\
--denoised-copy-ratios ${prefix}_denoisedCR.tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}_standardizedCR.tsv
touch ${prefix}_denoisedCR.tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
END_VERSIONS
"""
}
59 changes: 59 additions & 0 deletions modules/nf-core/gatk4/denoisereadcounts/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
name: "gatk4_denoisereadcounts"
description: Denoises read counts to produce denoised copy ratios
keywords:
- gatk4
- denoisereadcounts
- copyratios
tools:
- gatk4:
description: |
Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
with a primary focus on variant discovery and genotyping. Its powerful processing engine
and high-performance computing features make it capable of taking on projects of any size.
homepage: https://gatk.broadinstitute.org/hc/en-us
documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s
doi: 10.1158/1538-7445.AM2017-3590
tool_dev_url: "https://github.com/broadinstitute/gatk"
licence: ["Apache-2.0"]

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test', single_end:false ]`
- meta2:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test', single_end:false ]`
- counts:
type: file
description: Read counts in hdf5 or tsv format.
pattern: "*.{hdf5,tsv}"
- pon:
type: file
description: Panel of normals file hdf5 or tsv format.
pattern: "*.{hdf5}"

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test', single_end:false ]`
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- standardized:
type: file
description: Standardized copy ratios file.
pattern: "*.{tsv}"
- denoised:
type: file
description: Denoised copy ratios file
pattern: "*.{tsv}"

authors:
- "@ramprasadn"
8 changes: 8 additions & 0 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1363,6 +1363,10 @@ gatk4/condensedepthevidence:
- modules/nf-core/gatk4/condensedepthevidence/**
- tests/modules/nf-core/gatk4/condensedepthevidence/**

gatk4/createreadcountpanelofnormals:
- modules/nf-core/gatk4/createreadcountpanelofnormals/**
- tests/modules/nf-core/gatk4/createreadcountpanelofnormals/**

gatk4/createsequencedictionary:
- modules/nf-core/gatk4/createsequencedictionary/**
- tests/modules/nf-core/gatk4/createsequencedictionary/**
Expand All @@ -1371,6 +1375,10 @@ gatk4/createsomaticpanelofnormals:
- modules/nf-core/gatk4/createsomaticpanelofnormals/**
- tests/modules/nf-core/gatk4/createsomaticpanelofnormals/**

gatk4/denoisereadcounts:
- modules/nf-core/gatk4/denoisereadcounts/**
- tests/modules/nf-core/gatk4/denoisereadcounts/**

gatk4/determinegermlinecontigploidy:
- modules/nf-core/gatk4/determinegermlinecontigploidy/**
- tests/modules/nf-core/gatk4/determinegermlinecontigploidy/**
Expand Down
37 changes: 37 additions & 0 deletions tests/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { GATK4_CREATEREADCOUNTPANELOFNORMALS } from '../../../../../modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf'
include { GATK4_COLLECTREADCOUNTS } from '../../../../../modules/nf-core/gatk4/collectreadcounts/main.nf'
include { GATK4_PREPROCESSINTERVALS } from '../../../../../modules/nf-core/gatk4/preprocessintervals/main.nf'

workflow test_gatk4_createreadcountpanelofnormals {

fasta = Channel.of([ [ id:'test' ], file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]).collect()
fai = Channel.of([ [ id:'test' ], file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)]).collect()
dict = Channel.of([ [ id:'test' ], file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)]).collect()

GATK4_PREPROCESSINTERVALS ( fasta, fai, dict, [[],[]], [[],[]]).interval_list
.map {meta,list -> list}
.set {ch_intervals}

input = Channel.of([
[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
],
[
[ id:'test2', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam_bai'], checkIfExists: true),
]).combine(ch_intervals)

GATK4_COLLECTREADCOUNTS ( input, fasta, fai, dict )

GATK4_CREATEREADCOUNTPANELOFNORMALS (
GATK4_COLLECTREADCOUNTS.out.tsv
.map({ meta, tsv -> [ [id:'test'], tsv ] })
.groupTuple()
)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
process {

publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }

withName: GATK4_COLLECTREADCOUNTS {
ext.args = "--format TSV --interval-merging-rule OVERLAPPING_ONLY"
}

withName: GATK4_CREATEREADCOUNTPANELOFNORMALS {
ext.args = "--minimum-interval-median-percentile 1.0 --number-of-eigensamples 2"
}

}
17 changes: 17 additions & 0 deletions tests/modules/nf-core/gatk4/createreadcountpanelofnormals/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
- name: "gatk4 createreadcountpanelofnormals"
command: nextflow run ./tests/modules/nf-core/gatk4/createreadcountpanelofnormals -entry test_gatk4_createreadcountpanelofnormals -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/gatk4/createreadcountpanelofnormals/nextflow.config
tags:
- "gatk4"
- "gatk4/createreadcountpanelofnormals"
files:
- path: "output/gatk4/test.hdf5"
- path: "output/gatk4/versions.yml"

- name: "gatk4 createreadcountpanelofnormals stub"
command: nextflow run ./tests/modules/nf-core/gatk4/createreadcountpanelofnormals -entry test_gatk4_createreadcountpanelofnormals -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/gatk4/createreadcountpanelofnormals/nextflow.config -stub
tags:
- "gatk4"
- "gatk4/createreadcountpanelofnormals"
files:
- path: "output/gatk4/test.hdf5"
- path: "output/gatk4/versions.yml"
40 changes: 40 additions & 0 deletions tests/modules/nf-core/gatk4/denoisereadcounts/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { GATK4_CREATEREADCOUNTPANELOFNORMALS } from '../../../../../modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf'
include { GATK4_COLLECTREADCOUNTS } from '../../../../../modules/nf-core/gatk4/collectreadcounts/main.nf'
include { GATK4_PREPROCESSINTERVALS } from '../../../../../modules/nf-core/gatk4/preprocessintervals/main.nf'
include { GATK4_DENOISEREADCOUNTS } from '../../../../../modules/nf-core/gatk4/denoisereadcounts/main.nf'

workflow test_gatk4_denoisereadcounts {

fasta = Channel.of([ [ id:'test' ], file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]).collect()
fai = Channel.of([ [ id:'test' ], file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)]).collect()
dict = Channel.of([ [ id:'test' ], file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)]).collect()

GATK4_PREPROCESSINTERVALS ( fasta, fai, dict, [[],[]], [[],[]]).interval_list
.map {meta,list -> list}
.set {ch_intervals}

input = Channel.of([
[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
],
[
[ id:'test2', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam_bai'], checkIfExists: true),
])

GATK4_COLLECTREADCOUNTS ( input.combine(ch_intervals), fasta, fai, dict )

GATK4_CREATEREADCOUNTPANELOFNORMALS (
GATK4_COLLECTREADCOUNTS.out.tsv
.map({ meta, tsv -> [ [id:'test'], tsv ] })
.groupTuple()
)

GATK4_DENOISEREADCOUNTS ( GATK4_COLLECTREADCOUNTS.out.tsv.first(), GATK4_CREATEREADCOUNTPANELOFNORMALS.out.pon )
}
13 changes: 13 additions & 0 deletions tests/modules/nf-core/gatk4/denoisereadcounts/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
process {

publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }

withName: GATK4_COLLECTREADCOUNTS {
ext.args = "--format TSV --interval-merging-rule OVERLAPPING_ONLY"
}

withName: GATK4_CREATEREADCOUNTPANELOFNORMALS {
ext.args = "--minimum-interval-median-percentile 1.0 --number-of-eigensamples 2"
}

}
19 changes: 19 additions & 0 deletions tests/modules/nf-core/gatk4/denoisereadcounts/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
- name: "gatk4 denoisereadcounts"
command: nextflow run ./tests/modules/nf-core/gatk4/denoisereadcounts -entry test_gatk4_denoisereadcounts -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/gatk4/denoisereadcounts/nextflow.config
tags:
- "gatk4"
- "gatk4/denoisereadcounts"
files:
- path: "output/gatk4/test_standardizedCR.tsv"
- path: "output/gatk4/test_denoisedCR.tsv"
- path: "output/gatk4/versions.yml"

- name: "gatk4 denoisereadcounts stub"
command: nextflow run ./tests/modules/nf-core/gatk4/denoisereadcounts -entry test_gatk4_denoisereadcounts -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/gatk4/denoisereadcounts/nextflow.config -stub
tags:
- "gatk4"
- "gatk4/denoisereadcounts"
files:
- path: "output/gatk4/test_standardizedCR.tsv"
- path: "output/gatk4/test_denoisedCR.tsv"
- path: "output/gatk4/versions.yml"

0 comments on commit 38c559b

Please sign in to comment.