From cc0ae3a303465788305c771796a1a6e6732aaf9f Mon Sep 17 00:00:00 2001 From: Daniel-VM Date: Thu, 29 Aug 2024 14:12:20 +0200 Subject: [PATCH 1/6] added module cat_fastq --- assets/schema_input.json | 2 +- modules.json | 5 + modules/nf-core/cat/fastq/environment.yml | 7 + modules/nf-core/cat/fastq/main.nf | 79 ++++ modules/nf-core/cat/fastq/meta.yml | 42 ++ modules/nf-core/cat/fastq/tests/main.nf.test | 248 ++++++++++++ .../nf-core/cat/fastq/tests/main.nf.test.snap | 376 ++++++++++++++++++ modules/nf-core/cat/fastq/tests/tags.yml | 2 + .../utils_nfcore_bacass_pipeline/main.nf | 2 +- workflows/bacass.nf | 38 +- 10 files changed, 789 insertions(+), 12 deletions(-) create mode 100644 modules/nf-core/cat/fastq/environment.yml create mode 100644 modules/nf-core/cat/fastq/main.nf create mode 100644 modules/nf-core/cat/fastq/meta.yml create mode 100644 modules/nf-core/cat/fastq/tests/main.nf.test create mode 100644 modules/nf-core/cat/fastq/tests/main.nf.test.snap create mode 100644 modules/nf-core/cat/fastq/tests/tags.yml diff --git a/assets/schema_input.json b/assets/schema_input.json index 3cf372d..a2681a4 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -10,7 +10,7 @@ "ID": { "type": "string", "pattern": "^\\S+$", - "unique": true, + "unique": false, "errorMessage": "Sample name must be provided and cannot contain spaces", "meta": ["id"] }, diff --git a/modules.json b/modules.json index ba06f0a..065534e 100644 --- a/modules.json +++ b/modules.json @@ -20,6 +20,11 @@ "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, + "cat/fastq": { + "branch": "master", + "git_sha": "1ceaa8ba4d0fd886dbca0e545815d905b7407de7", + "installed_by": ["modules"] + }, "dragonflye": { "branch": "master", "git_sha": "a3ae45e99f68e513b0feb8d5f27e2fdd0d0f083d", diff --git a/modules/nf-core/cat/fastq/environment.yml b/modules/nf-core/cat/fastq/environment.yml new file mode 100644 index 0000000..8c69b12 --- /dev/null +++ b/modules/nf-core/cat/fastq/environment.yml @@ -0,0 +1,7 @@ +name: cat_fastq +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::coreutils=8.30 diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf new file mode 100644 index 0000000..b68e5f9 --- /dev/null +++ b/modules/nf-core/cat/fastq/main.nf @@ -0,0 +1,79 @@ +process CAT_FASTQ { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(reads, stageAs: "input*/*") + + output: + tuple val(meta), path("*.merged.fastq.gz"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size >= 1) { + """ + cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size >= 2) { + def read1 = [] + def read2 = [] + readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v } + """ + cat ${read1.join(' ')} > ${prefix}_1.merged.fastq.gz + cat ${read2.join(' ')} > ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size >= 1) { + """ + echo '' | gzip > ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size >= 2) { + """ + echo '' | gzip > ${prefix}_1.merged.fastq.gz + echo '' | gzip > ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } +} diff --git a/modules/nf-core/cat/fastq/meta.yml b/modules/nf-core/cat/fastq/meta.yml new file mode 100644 index 0000000..db4ac3c --- /dev/null +++ b/modules/nf-core/cat/fastq/meta.yml @@ -0,0 +1,42 @@ +name: cat_fastq +description: Concatenates fastq files +keywords: + - cat + - fastq + - concatenate +tools: + - cat: + description: | + The cat utility reads files sequentially, writing them to the standard output. + documentation: https://www.gnu.org/software/coreutils/manual/html_node/cat-invocation.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files to be concatenated. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: Merged fastq file + pattern: "*.{merged.fastq.gz}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test b/modules/nf-core/cat/fastq/tests/main.nf.test new file mode 100644 index 0000000..f88a78b --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test @@ -0,0 +1,248 @@ +// NOTE The version snaps may not be consistant +// https://github.com/nf-core/modules/pull/4087#issuecomment-1767948035 +nextflow_process { + + name "Test Process CAT_FASTQ" + script "../main.nf" + process "CAT_FASTQ" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/fastq" + + test("test_cat_fastq_single_end") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_same_name") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end_same_name") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_single_file") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_same_name - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end_same_name - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_single_file - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test.snap b/modules/nf-core/cat/fastq/tests/main.nf.test.snap new file mode 100644 index 0000000..aec119a --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test.snap @@ -0,0 +1,376 @@ +{ + "test_cat_fastq_single_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-01-17T17:30:39.816981" + }, + "test_cat_fastq_single_end_same_name": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-01-17T17:32:35.229332" + }, + "test_cat_fastq_single_end_single_file": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-01-17T17:34:00.058829" + }, + "test_cat_fastq_paired_end_same_name": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-01-17T17:33:33.031555" + }, + "test_cat_fastq_single_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T12:07:28.244999" + }, + "test_cat_fastq_paired_end_same_name - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T12:07:57.070911" + }, + "test_cat_fastq_single_end_same_name - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T12:07:46.796254" + }, + "test_cat_fastq_paired_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-01-17T17:32:02.270935" + }, + "test_cat_fastq_paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T12:07:37.807553" + }, + "test_cat_fastq_single_end_single_file - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T12:14:51.861264" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/fastq/tests/tags.yml b/modules/nf-core/cat/fastq/tests/tags.yml new file mode 100644 index 0000000..6ac4361 --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/tags.yml @@ -0,0 +1,2 @@ +cat/fastq: + - modules/nf-core/cat/fastq/** diff --git a/subworkflows/local/utils_nfcore_bacass_pipeline/main.nf b/subworkflows/local/utils_nfcore_bacass_pipeline/main.nf index 3cdbca3..c1febf3 100644 --- a/subworkflows/local/utils_nfcore_bacass_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_bacass_pipeline/main.nf @@ -88,7 +88,7 @@ workflow PIPELINE_INITIALISATION { } .map { meta, fastqs, longread, fast5 -> - return [ meta, fastqs.flatten(), longread, fast5 ] + return [ meta, fastqs, longread[0], fast5[0] ] } .set { ch_samplesheet } diff --git a/workflows/bacass.nf b/workflows/bacass.nf index 01d17f9..c6d07aa 100644 --- a/workflows/bacass.nf +++ b/workflows/bacass.nf @@ -20,7 +20,7 @@ include { MULTIQC_CUSTOM } from '../modules/local/multiqc_custom' // MODULE: Installed directly from nf-core/modules // include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { CAT_FASTQ } from '../modules/nf-core/cat/fastq' include { NANOPLOT } from '../modules/nf-core/nanoplot/main' include { PORECHOP_PORECHOP } from '../modules/nf-core/porechop/porechop/main' include { CANU } from '../modules/nf-core/canu/main' @@ -77,21 +77,18 @@ workflow BACASS { main: ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() + // // SUBWORKFLOW: Read in samplesheet, validate and stage input files // def criteria = multiMapCriteria { - meta, fastq_1, fastq_2, long_fastq, fast5 -> - shortreads: fastq_1 != 'NA' ? tuple(meta, [file(fastq_1, checkIfExists: true), file(fastq_2, checkIfExists: true)]) : null - longreads: long_fastq != 'NA' ? tuple(meta, file(long_fastq, checkIfExists: true)) : null - fast5: fast5 != 'NA' ? tuple(meta, file(fast5, checkIfExists: true)) : null + meta, fastqs, long_fastq, fast5 -> + shortreads: meta.single_end != 'NA' ? tuple(meta, fastqs) : null + longreads: long_fastq != 'NA' ? tuple(meta,long_fastq) : null + fast5: fast5 != 'NA' ? tuple(meta, fast5) : null } // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ ch_samplesheet - .map { - meta, fastqs, long_fastq, fast5 -> - return [meta, fastqs[0], fastqs[1], long_fastq[0], fast5[0]] - } .multiMap (criteria) .set { ch_input } @@ -99,7 +96,7 @@ workflow BACASS { ch_input .shortreads .filter{ it != null } - .set { ch_shortreads } + .set { ch_shortreads_cat } ch_input .longreads .filter{ it != null } @@ -109,6 +106,27 @@ workflow BACASS { .filter{ it != null } .set { ch_fast5 } + // + // MODULE: Concatenate FastQ files from same sample if required (shortreads) + // + ch_shortreads + .branch{ + meta, fastqs -> + single: fastqs.size() == 1 + return [meta, fastqs.flatten() ] + multiple: fastqs.size() > 1 + return [meta, fastqs.flatten() ] + } + .set { ch_shortreads_cat } + + CAT_FASTQ ( + ch_shortreads_cat.multiple + ) + .reads + .mix( ch_shortreads_cat.single ) + .set { ch_shortreads } + ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first().ifEmpty(null)) + // // SUBWORKFLOW: Short reads QC and trim adapters // From b2ab59ce83378cd03b19fb1e7e49af54bb6ae0ea Mon Sep 17 00:00:00 2001 From: Daniel-VM Date: Thu, 29 Aug 2024 15:56:02 +0200 Subject: [PATCH 2/6] fix wrong variable naming --- workflows/bacass.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/bacass.nf b/workflows/bacass.nf index c6d07aa..8b23ab9 100644 --- a/workflows/bacass.nf +++ b/workflows/bacass.nf @@ -96,7 +96,7 @@ workflow BACASS { ch_input .shortreads .filter{ it != null } - .set { ch_shortreads_cat } + .set { ch_shortreads } ch_input .longreads .filter{ it != null } From 1f2b944a73e41eeb3de6c088a45e2c9a84e5a0af Mon Sep 17 00:00:00 2001 From: Daniel-VM Date: Thu, 29 Aug 2024 16:06:30 +0200 Subject: [PATCH 3/6] update changelog #158 --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e314323..f4ceb20 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#176](https://github.com/nf-core/bacass/pull/176) Update nf-core/bacass to nf-core-tools v3.0.2 `TEMPLATE`. - [#166](https://github.com/nf-core/bacass/pull/166) Added FastQC after-trimming section to MultiQC report. +- [#158](https://github.com/nf-core/bacass/pull/158) Support automatic concatenation of FastQ files for the same sample. ### `Fixed` From 2df4fca9a6da4f8e408789163af1eca2bafddb02 Mon Sep 17 00:00:00 2001 From: Daniel-VM Date: Thu, 29 Aug 2024 16:38:31 +0200 Subject: [PATCH 4/6] fixed channel versions and rename variables --- workflows/bacass.nf | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/workflows/bacass.nf b/workflows/bacass.nf index 8b23ab9..85f400a 100644 --- a/workflows/bacass.nf +++ b/workflows/bacass.nf @@ -113,19 +113,19 @@ workflow BACASS { .branch{ meta, fastqs -> single: fastqs.size() == 1 - return [meta, fastqs.flatten() ] + return [ meta, fastqs.flatten() ] multiple: fastqs.size() > 1 - return [meta, fastqs.flatten() ] + return [ meta, fastqs.flatten() ] } - .set { ch_shortreads_cat } + .set { ch_shortreads_fastqs } - CAT_FASTQ ( - ch_shortreads_cat.multiple - ) - .reads - .mix( ch_shortreads_cat.single ) - .set { ch_shortreads } - ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first().ifEmpty(null)) + CAT_FASTQ ( + ch_shortreads_fastqs.multiple + ) + .reads + .mix( ch_shortreads_fastqs.single ) + .set { ch_shortreads_concat } + ch_versions = ch_versions.mix(CAT_FASTQ.out.versions) // // SUBWORKFLOW: Short reads QC and trim adapters @@ -135,7 +135,7 @@ workflow BACASS { ch_fastp_json_multiqc = Channel.empty() if (params.assembly_type != 'long'){ FASTQ_TRIM_FASTP_FASTQC ( - ch_shortreads, + ch_shortreads_concat, [], params.save_trimmed_fail, [], From cb0c18d6707c9ff57309ddbe287930ba831fd41b Mon Sep 17 00:00:00 2001 From: Daniel-VM Date: Tue, 15 Oct 2024 17:11:34 +0200 Subject: [PATCH 5/6] align spaces --- workflows/bacass.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/bacass.nf b/workflows/bacass.nf index 85f400a..293056c 100644 --- a/workflows/bacass.nf +++ b/workflows/bacass.nf @@ -19,7 +19,7 @@ include { MULTIQC_CUSTOM } from '../modules/local/multiqc_custom' // // MODULE: Installed directly from nf-core/modules // -include { FASTQC } from '../modules/nf-core/fastqc/main' +include { FASTQC } from '../modules/nf-core/fastqc/main' include { CAT_FASTQ } from '../modules/nf-core/cat/fastq' include { NANOPLOT } from '../modules/nf-core/nanoplot/main' include { PORECHOP_PORECHOP } from '../modules/nf-core/porechop/porechop/main' From 10a7df89e922cee689feaf04c57d075b34352f27 Mon Sep 17 00:00:00 2001 From: Daniel-VM Date: Thu, 24 Oct 2024 15:42:19 +0200 Subject: [PATCH 6/6] include resequenced samples in test profile --- conf/test.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test.config b/conf/test.config index 166c9b0..ea960ea 100644 --- a/conf/test.config +++ b/conf/test.config @@ -23,7 +23,7 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Input data - input = params.pipelines_testdata_base_path + 'bacass/bacass_short.tsv' + input = params.pipelines_testdata_base_path + 'bacass/bacass_short_reseq.tsv' // some extra args to speed tests up prokka_args = " --fast"