Skip to content

Commit

Permalink
Update chopper module to take optional fasta file (#6913)
Browse files Browse the repository at this point in the history
* Update chopper module to take optional fasta file

* linting issues

* Upgrade chopper biocontainer, add stub, and use only data in test-data for testing

* bump chopper version in conda
  • Loading branch information
muabnezor authored Nov 1, 2024
1 parent 6c32d37 commit 2273783
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 15 deletions.
2 changes: 1 addition & 1 deletion modules/nf-core/chopper/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ channels:
- conda-forge
- bioconda
dependencies:
- bioconda::chopper=0.3.0
- bioconda::chopper=0.9.0
18 changes: 16 additions & 2 deletions modules/nf-core/chopper/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ process CHOPPER {

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/chopper:0.3.0--hd03093a_0':
'biocontainers/chopper:0.3.0--hd03093a_0' }"
'https://depot.galaxyproject.org/singularity/chopper:0.9.0--hdcf5f25_0':
'biocontainers/chopper:0.9.0--hdcf5f25_0' }"

input:
tuple val(meta), path(fastq)
path fasta

output:
tuple val(meta), path("*.fastq.gz") , emit: fastq
Expand All @@ -22,6 +23,7 @@ process CHOPPER {
def args2 = task.ext.args2 ?: ''
def args3 = task.ext.args3 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def fasta_filtering = fasta ? "--contam ${fasta}" : ""

if ("$fastq" == "${prefix}.fastq.gz") error "Input and output names are the same, set prefix in module configuration to disambiguate!"
"""
Expand All @@ -30,6 +32,7 @@ process CHOPPER {
$fastq | \\
chopper \\
--threads $task.cpus \\
$fasta_filtering \\
$args2 | \\
gzip \\
$args3 > ${prefix}.fastq.gz
Expand All @@ -39,4 +42,15 @@ process CHOPPER {
chopper: \$(chopper --version 2>&1 | cut -d ' ' -f 2)
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
echo | gzip > ${prefix}.fastq.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
chopper: \$(chopper --version 2>&1 | cut -d ' ' -f 2)
END_VERSIONS
"""
}
4 changes: 4 additions & 0 deletions modules/nf-core/chopper/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ input:
type: file
description: FastQ with reads from long read sequencing e.g. PacBio or ONT
pattern: "*.{fastq.gz}"
- - fasta:
type: file
description: An optional reference fasta file against which to remove reads that align to it.
pattern: "*.fasta"
output:
- fastq:
- meta:
Expand Down
74 changes: 68 additions & 6 deletions modules/nf-core/chopper/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,43 @@ nextflow_process {
tag "modules"
tag "modules_nfcore"

test("Should run without failures") {
test("test with lambda reference") {

when {
params {
outdir = "$outputDir"
}
process {
"""
input[0] = [
[id:'test_out' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test_2.fastq.gz', checkIfExists: true)
]
input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
"""
}
}

then {

def fastq_content = path(process.out.fastq.get(0).get(1)).linesGzip

assertAll(
{ assert process.success },
// original pytest checks
{ assert process.out.fastq.get(0).get(1) ==~ ".*/test_out.fastq.gz" },
{ assert !fastq_content.contains("@a52a642e-88d0-4584-babd-414ea84db484 runid=71c83ae0021f873e29b130c6562a4c27185f93b8 read=2768 ch=489 start_time=2021-08-11T12:07:39Z flow_cell_id=FAQ57606 protocol_group_id=210811_47CoV_SA sample_id=CS5 barcode=barcode04 barcode_alias=barcode04")},
// additional nf-test checks
// Order of reads is not deterministic, so only assess whether the number of reads is correct
{ assert snapshot(
fastq_content.size(),
process.out.versions
).match() }
)
}
}

test("test without lambda reference") {

when {
params {
Expand All @@ -19,6 +55,7 @@ nextflow_process {
[id:'test_out' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true)
]
input[1] = []
"""
}
}
Expand All @@ -34,12 +71,37 @@ nextflow_process {
{ assert fastq_content.contains("@2109d790-67ec-4fd1-8931-6c7e61908ff3 runid=97ca62ca093ff43533aa34c38a10b1d6325e7e7b read=52274 ch=243 start_time=2021-02-05T23:27:30Z flow_cell_id=FAP51364 protocol_group_id=data sample_id=RN20097 barcode=barcode01 barcode_alias=barcode01")},
// additional nf-test checks
// Order of reads is not deterministic, so only assess whether the number of reads is correct
{ assert snapshot(fastq_content.size()).match("number_of_lines") },
{ assert snapshot(process.out.versions).match("versions") }

{ assert snapshot(
fastq_content.size(),
process.out.versions
).match() }
)
}

}

}
test("test-chopper-stub") {
options '-stub'

when {
process {
"""
input[0] = [
[id:'test_out' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true)
]
input[1] = []
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out,
process.out.versions
).match() }
)
}
}
}
60 changes: 54 additions & 6 deletions modules/nf-core/chopper/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -1,16 +1,64 @@
{
"versions": {
"test without lambda reference": {
"content": [
400,
[
"versions.yml:md5,5fe28ea455482c9fe88603ddcc461881"
"versions.yml:md5,74a27493c09d0c481f6e52b517e12023"
]
],
"timestamp": "2023-10-20T08:27:24.592662298"
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-31T15:14:36.37897815"
},
"number_of_lines": {
"test with lambda reference": {
"content": [
400
15984,
[
"versions.yml:md5,74a27493c09d0c481f6e52b517e12023"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-31T15:14:31.324993049"
},
"test-chopper-stub": {
"content": [
{
"0": [
[
{
"id": "test_out"
},
"test_out.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"1": [
"versions.yml:md5,74a27493c09d0c481f6e52b517e12023"
],
"fastq": [
[
{
"id": "test_out"
},
"test_out.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"versions": [
"versions.yml:md5,74a27493c09d0c481f6e52b517e12023"
]
},
[
"versions.yml:md5,74a27493c09d0c481f6e52b517e12023"
]
],
"timestamp": "2023-10-20T08:27:24.581289647"
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-31T15:29:08.715579423"
}
}

0 comments on commit 2273783

Please sign in to comment.