diff --git a/CHANGELOG.md b/CHANGELOG.md index e90969fa..40475721 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#439]](https://github.com/nf-core/smrnaseq/pull/439) - Fix [Fix paired end samples processing](https://github.com/nf-core/smrnaseq/issues/415) - Fix paired end sample handling and add test profile. - [[#441]](https://github.com/nf-core/smrnaseq/pull/441) - Migrate [local contaminant bowtie to nf-core](https://github.com/nf-core/smrnaseq/issues/436) - Replace local processes with `BOWTIE2_ALIGN`. - [[#443]](https://github.com/nf-core/smrnaseq/pull/443) - Migrate [mirna and genome_quant bowtie to nf-core](https://github.com/nf-core/smrnaseq/issues/436) - Replace local processes with `BOWTIE_ALIGN`. +- [[#447]](https://github.com/nf-core/smrnaseq/pull/447) - Fix [Minor fixes and general pipeline cleanup](https://github.com/nf-core/smrnaseq/issues/400) - Update variable and processes names, update channel comments, remove unused modules and params. - [[#448]](https://github.com/nf-core/smrnaseq/pull/448) - Migrate local mirdeep to [nf-core mirdeep2 modules and subworkflow](https://github.com/nf-core/smrnaseq/issues/443) and generate [test profile for mirdeep2](https://github.com/nf-core/smrnaseq/issues/399). ## v2.3.1 - 2024-04-18 - Gray Zinc Dalmation Patch diff --git a/conf/modules.config b/conf/modules.config index 5867e9c3..d092448c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -479,7 +479,7 @@ process { } - withName: 'NFCORE_SMRNASEQ:MIRNA_QUANT:TABLE_MERGE' { + withName: 'NFCORE_SMRNASEQ:MIRNA_QUANT:DATATABLE_MERGE' { publishDir = [ path: { "${params.outdir}/mirna_quant/mirtop" }, mode: params.publish_dir_mode, diff --git a/conf/test.config b/conf/test.config index 9673566a..a5c0ca38 100644 --- a/conf/test.config +++ b/conf/test.config @@ -28,7 +28,6 @@ params { mirtrace_species = 'hsa' skip_mirdeep = true save_merged = false - save_aligned_mirna_quant = false } diff --git a/conf/test_contamination.config b/conf/test_contamination.config index 70e94fe2..5e089656 100644 --- a/conf/test_contamination.config +++ b/conf/test_contamination.config @@ -27,7 +27,6 @@ params { mirtrace_species = 'hsa' skip_mirdeep = true save_merged = false - save_aligned_mirna_quant = false filter_contamination = true diff --git a/main.nf b/main.nf index 35bc98d0..a45bf433 100644 --- a/main.nf +++ b/main.nf @@ -78,7 +78,9 @@ workflow { params.monochrome_logs, args, params.outdir, - params.input + params.input, + params.three_prime_adapter, + params.phred_offset ) // @@ -102,6 +104,8 @@ workflow { PREPARE_GENOME.out.other_contamination, ch_versions, PIPELINE_INITIALISATION.out.samplesheet, + PIPELINE_INITIALISATION.out.three_prime_adapter, + PIPELINE_INITIALISATION.out.phred_offset ) // diff --git a/modules.json b/modules.json index 52abe170..5cde8272 100644 --- a/modules.json +++ b/modules.json @@ -36,11 +36,6 @@ "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["modules"] }, - "cat/cat": { - "branch": "master", - "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"] - }, "cat/fastq": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", @@ -106,11 +101,6 @@ "git_sha": "7c316cae26baf55e0add993bed2b0c9f7105c653", "installed_by": ["modules"] }, - "pigz/uncompress": { - "branch": "master", - "git_sha": "c00055a0b13d622b4f1f51a8e5be31deaf99ded7", - "installed_by": ["modules"] - }, "samtools/flagstat": { "branch": "master", "git_sha": "763d4b5c05ffda3ac1ac969dc67f7458cfb2eb1d", @@ -169,7 +159,7 @@ }, "untarfiles": { "branch": "master", - "git_sha": "958e4a6031deefa327f339f11d9baf1ab5a32d5f", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] } } diff --git a/modules/local/bowtie_map_mirna.nf b/modules/local/bowtie_map_mirna.nf deleted file mode 100644 index 252dc407..00000000 --- a/modules/local/bowtie_map_mirna.nf +++ /dev/null @@ -1,54 +0,0 @@ -process BOWTIE_MAP_SEQ { - tag "$meta.id" - label 'process_medium' - - conda 'bowtie=1.3.0 bioconda::samtools=1.20' - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-ffbf83a6b0ab6ec567a336cf349b80637135bca3:40128b496751b037e2bd85f6789e83d4ff8a4837-0' : - 'biocontainers/mulled-v2-ffbf83a6b0ab6ec567a336cf349b80637135bca3:40128b496751b037e2bd85f6789e83d4ff8a4837-0' }" - - input: - tuple val(meta), path(reads) - path index - - output: - tuple val(meta), path("*bam") , emit: bam - tuple val(meta), path('unmapped/*fq.gz'), emit: unmapped - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - """ - INDEX=`find -L ./ -name "*.3.ebwt" | sed 's/.3.ebwt//'` - bowtie \\ - -x \$INDEX \\ - -q <(zcat $reads) \\ - -p ${task.cpus} \\ - -t \\ - -k 50 \\ - --best \\ - --strata \\ - -e 99999 \\ - --chunkmbs 2048 \\ - --un ${meta.id}_unmapped.fq -S > ${meta.id}.sam - - samtools view -bS ${meta.id}.sam > ${meta.id}.bam - - if [ ! -f "${meta.id}_unmapped.fq" ] - then - touch ${meta.id}_unmapped.fq - fi - gzip ${meta.id}_unmapped.fq - mkdir unmapped - mv ${meta.id}_unmapped.fq.gz unmapped/. - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bowtie: \$(echo \$(bowtie --version 2>&1) | sed 's/^.*bowtie-align-s version //; s/ .*\$//') - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ - -} diff --git a/modules/local/datatable_merge/datatable_merge.nf b/modules/local/datatable_merge/main.nf similarity index 96% rename from modules/local/datatable_merge/datatable_merge.nf rename to modules/local/datatable_merge/main.nf index ef1afc05..e231a738 100644 --- a/modules/local/datatable_merge/datatable_merge.nf +++ b/modules/local/datatable_merge/main.nf @@ -1,4 +1,4 @@ -process TABLE_MERGE { +process DATATABLE_MERGE { label 'process_medium' conda 'conda-forge::r-data.table=1.12.2' diff --git a/modules/local/datatable_merge/tests/datatable_merge.nf.test b/modules/local/datatable_merge/tests/datatable_merge.nf.test index 78ed4491..c7485af8 100644 --- a/modules/local/datatable_merge/tests/datatable_merge.nf.test +++ b/modules/local/datatable_merge/tests/datatable_merge.nf.test @@ -1,11 +1,11 @@ nextflow_process { - name "Test Process TABLE_MERGE" - script "../datatable_merge.nf" - process "TABLE_MERGE" + name "Test Process DATATABLE_MERGE" + script "../main.nf" + process "DATATABLE_MERGE" tag "modules" tag "modules_local" - tag "table_merge" + tag "datatable_merge" test("Contains hsa-miR-365b-3p, hsa-miR-7-5p, hsa-miR-103a-3p") { @@ -15,7 +15,7 @@ nextflow_process { } process { """ - input[0] = [file("https://github.com/nf-core/test-datasets/raw/smrnaseq/nf-test_data/datatable_merge/small_mirtop_dataset.txt", checkIfExists: true)] + input[0] = [[],file("https://github.com/nf-core/test-datasets/raw/smrnaseq/nf-test_data/datatable_merge/small_mirtop_dataset.txt", checkIfExists: true)] """ } } @@ -46,7 +46,7 @@ nextflow_process { } process { """ - input[0] = [file("https://github.com/nf-core/test-datasets/raw/smrnaseq/nf-test_data/datatable_merge/small_mirtop_dataset.txt", checkIfExists: true)] + input[0] = [[],file("https://github.com/nf-core/test-datasets/raw/smrnaseq/nf-test_data/datatable_merge/small_mirtop_dataset.txt", checkIfExists: true)] """ } } diff --git a/modules/local/datatable_merge/tests/datatable_merge.nf.test.snap b/modules/local/datatable_merge/tests/datatable_merge.nf.test.snap index dc37576f..7fce7ed9 100644 --- a/modules/local/datatable_merge/tests/datatable_merge.nf.test.snap +++ b/modules/local/datatable_merge/tests/datatable_merge.nf.test.snap @@ -6,13 +6,13 @@ "mirna.tsv:md5,f59a6aeb15588c43c2977950a1b0a080" ], "1": [ - "versions.yml:md5,3a8ba7faff9d6dadf80e1a1b026acbe1" + "versions.yml:md5,13bf3c8bbf1285dfc0ef547dcbb692b2" ], "mirna_tsv": [ "mirna.tsv:md5,f59a6aeb15588c43c2977950a1b0a080" ], "versions": [ - "versions.yml:md5,3a8ba7faff9d6dadf80e1a1b026acbe1" + "versions.yml:md5,13bf3c8bbf1285dfc0ef547dcbb692b2" ] } ], @@ -20,7 +20,7 @@ "nf-test": "0.8.4", "nextflow": "24.04.4" }, - "timestamp": "2024-08-21T14:27:11.151441241" + "timestamp": "2024-09-30T12:57:47.129770995" }, "Does not contain hsa-miR-107, hsa-miR-365a-3p": { "content": [ @@ -29,13 +29,13 @@ "mirna.tsv:md5,f59a6aeb15588c43c2977950a1b0a080" ], "1": [ - "versions.yml:md5,3a8ba7faff9d6dadf80e1a1b026acbe1" + "versions.yml:md5,13bf3c8bbf1285dfc0ef547dcbb692b2" ], "mirna_tsv": [ "mirna.tsv:md5,f59a6aeb15588c43c2977950a1b0a080" ], "versions": [ - "versions.yml:md5,3a8ba7faff9d6dadf80e1a1b026acbe1" + "versions.yml:md5,13bf3c8bbf1285dfc0ef547dcbb692b2" ] } ], @@ -43,6 +43,6 @@ "nf-test": "0.8.4", "nextflow": "24.04.4" }, - "timestamp": "2024-08-21T14:27:39.584509005" + "timestamp": "2024-09-30T12:57:56.990602055" } } \ No newline at end of file diff --git a/modules/local/edger_qc/edger_qc.nf b/modules/local/edger_qc/main.nf similarity index 100% rename from modules/local/edger_qc/edger_qc.nf rename to modules/local/edger_qc/main.nf diff --git a/modules/local/edger_qc/tests/edger_qc.nf.test b/modules/local/edger_qc/tests/edger_qc.nf.test index 53d3148c..f78de707 100644 --- a/modules/local/edger_qc/tests/edger_qc.nf.test +++ b/modules/local/edger_qc/tests/edger_qc.nf.test @@ -1,7 +1,7 @@ nextflow_process { name "Test Process EDGER_QC" - script "../edger_qc.nf" + script "../main.nf" process "EDGER_QC" test("Should not produce MDS plot") { diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml deleted file mode 100644 index 9b01c865..00000000 --- a/modules/nf-core/cat/cat/environment.yml +++ /dev/null @@ -1,5 +0,0 @@ -channels: - - conda-forge - - bioconda -dependencies: - - conda-forge::pigz=2.3.4 diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf deleted file mode 100644 index 2862c64c..00000000 --- a/modules/nf-core/cat/cat/main.nf +++ /dev/null @@ -1,78 +0,0 @@ -process CAT_CAT { - tag "$meta.id" - label 'process_low' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : - 'biocontainers/pigz:2.3.4' }" - - input: - tuple val(meta), path(files_in) - - output: - tuple val(meta), path("${prefix}"), emit: file_out - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def file_list = files_in.collect { it.toString() } - - // choose appropriate concatenation tool depending on input and output format - - // | input | output | command1 | command2 | - // |-----------|------------|----------|----------| - // | gzipped | gzipped | cat | | - // | ungzipped | ungzipped | cat | | - // | gzipped | ungzipped | zcat | | - // | ungzipped | gzipped | cat | pigz | - - // Use input file ending as default - prefix = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}" - out_zip = prefix.endsWith('.gz') - in_zip = file_list[0].endsWith('.gz') - command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' - command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : '' - if(file_list.contains(prefix.trim())) { - error "The name of the input file can't be the same as for the output prefix in the " + - "module CAT_CAT (currently `$prefix`). Please choose a different one." - } - """ - $command1 \\ - $args \\ - ${file_list.join(' ')} \\ - $command2 \\ - > ${prefix} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) - END_VERSIONS - """ - - stub: - def file_list = files_in.collect { it.toString() } - prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" - if(file_list.contains(prefix.trim())) { - error "The name of the input file can't be the same as for the output prefix in the " + - "module CAT_CAT (currently `$prefix`). Please choose a different one." - } - """ - touch $prefix - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) - END_VERSIONS - """ -} - -// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz -def getFileSuffix(filename) { - def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/ - return match ? match[0][1] : filename.substring(filename.lastIndexOf('.')) -} diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml deleted file mode 100644 index 00a8db0b..00000000 --- a/modules/nf-core/cat/cat/meta.yml +++ /dev/null @@ -1,36 +0,0 @@ -name: cat_cat -description: A module for concatenation of gzipped or uncompressed files -keywords: - - concatenate - - gzip - - cat -tools: - - cat: - description: Just concatenation - documentation: https://man7.org/linux/man-pages/man1/cat.1.html - licence: ["GPL-3.0-or-later"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - files_in: - type: file - description: List of compressed / uncompressed files - pattern: "*" -output: - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - file_out: - type: file - description: Concatenated file. Will be gzipped if file_out ends with ".gz" - pattern: "${file_out}" -authors: - - "@erikrikarddaniel" - - "@FriederikeHanssen" -maintainers: - - "@erikrikarddaniel" - - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test deleted file mode 100644 index 9cb16178..00000000 --- a/modules/nf-core/cat/cat/tests/main.nf.test +++ /dev/null @@ -1,191 +0,0 @@ -nextflow_process { - - name "Test Process CAT_CAT" - script "../main.nf" - process "CAT_CAT" - tag "modules" - tag "modules_nfcore" - tag "cat" - tag "cat/cat" - - test("test_cat_name_conflict") { - when { - params { - outdir = "${outputDir}" - } - process { - """ - input[0] = - [ - [ id:'genome', single_end:true ], - [ - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) - ] - ] - """ - } - } - then { - assertAll( - { assert !process.success }, - { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") }, - { assert snapshot(process.out.versions).match() } - ) - } - } - - test("test_cat_unzipped_unzipped") { - when { - params { - outdir = "${outputDir}" - } - process { - """ - input[0] = - [ - [ id:'test', single_end:true ], - [ - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) - ] - ] - """ - } - } - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - - test("test_cat_zipped_zipped") { - when { - params { - outdir = "${outputDir}" - } - process { - """ - input[0] = - [ - [ id:'test', single_end:true ], - [ - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) - ] - ] - """ - } - } - then { - def lines = path(process.out.file_out.get(0).get(1)).linesGzip - assertAll( - { assert process.success }, - { assert snapshot( - lines[0..5], - lines.size(), - process.out.versions - ).match() - } - ) - } - } - - test("test_cat_zipped_unzipped") { - config './nextflow_zipped_unzipped.config' - - when { - params { - outdir = "${outputDir}" - } - process { - """ - input[0] = - [ - [ id:'test', single_end:true ], - [ - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) - ] - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("test_cat_unzipped_zipped") { - config './nextflow_unzipped_zipped.config' - when { - params { - outdir = "${outputDir}" - } - process { - """ - input[0] = - [ - [ id:'test', single_end:true ], - [ - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) - ] - ] - """ - } - } - then { - def lines = path(process.out.file_out.get(0).get(1)).linesGzip - assertAll( - { assert process.success }, - { assert snapshot( - lines[0..5], - lines.size(), - process.out.versions - ).match() - } - ) - } - } - - test("test_cat_one_file_unzipped_zipped") { - config './nextflow_unzipped_zipped.config' - when { - params { - outdir = "${outputDir}" - } - process { - """ - input[0] = - [ - [ id:'test', single_end:true ], - [ - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ] - ] - """ - } - } - then { - def lines = path(process.out.file_out.get(0).get(1)).linesGzip - assertAll( - { assert process.success }, - { assert snapshot( - lines[0..5], - lines.size(), - process.out.versions - ).match() - } - ) - } - } -} diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap deleted file mode 100644 index b7623ee6..00000000 --- a/modules/nf-core/cat/cat/tests/main.nf.test.snap +++ /dev/null @@ -1,147 +0,0 @@ -{ - "test_cat_unzipped_unzipped": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": true - }, - "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" - ] - ], - "1": [ - "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" - ], - "file_out": [ - [ - { - "id": "test", - "single_end": true - }, - "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" - ] - ], - "versions": [ - "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" - }, - "timestamp": "2023-10-16T14:32:18.500464399" - }, - "test_cat_zipped_unzipped": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": true - }, - "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" - ] - ], - "1": [ - "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" - ], - "file_out": [ - [ - { - "id": "test", - "single_end": true - }, - "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" - ] - ], - "versions": [ - "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" - }, - "timestamp": "2023-10-16T14:32:49.642741302" - }, - "test_cat_zipped_zipped": { - "content": [ - [ - "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab", - "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1", - "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", - "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", - "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1", - "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1" - ], - 78, - [ - "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:51:46.802978" - }, - "test_cat_name_conflict": { - "content": [ - [ - - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:51:29.45394" - }, - "test_cat_one_file_unzipped_zipped": { - "content": [ - [ - ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", - "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", - "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", - "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", - "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", - "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" - ], - 374, - [ - "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:52:02.774016" - }, - "test_cat_unzipped_zipped": { - "content": [ - [ - ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", - "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", - "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", - "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", - "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", - "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" - ], - 375, - [ - "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:51:57.581523" - } -} \ No newline at end of file diff --git a/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config deleted file mode 100644 index ec26b0fd..00000000 --- a/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config +++ /dev/null @@ -1,6 +0,0 @@ - -process { - withName: CAT_CAT { - ext.prefix = 'cat.txt.gz' - } -} diff --git a/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config deleted file mode 100644 index fbc79783..00000000 --- a/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config +++ /dev/null @@ -1,8 +0,0 @@ - -process { - - withName: CAT_CAT { - ext.prefix = 'cat.txt' - } - -} diff --git a/modules/nf-core/cat/cat/tests/tags.yml b/modules/nf-core/cat/cat/tests/tags.yml deleted file mode 100644 index 37b578f5..00000000 --- a/modules/nf-core/cat/cat/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -cat/cat: - - modules/nf-core/cat/cat/** diff --git a/modules/nf-core/pigz/uncompress/main.nf b/modules/nf-core/pigz/uncompress/main.nf deleted file mode 100644 index 11e43dff..00000000 --- a/modules/nf-core/pigz/uncompress/main.nf +++ /dev/null @@ -1,48 +0,0 @@ -process PIGZ_UNCOMPRESS { - label 'process_low' - //stageInMode 'copy' // this directive can be set in case the original input should be kept - - conda "conda-forge::pigz" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pigz:2.8': - 'biocontainers/pigz:2.8' }" - - input: - tuple val(meta), path(zip) - - output: - tuple val(meta), path("${uncompressed_filename}") , emit: file - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - uncompressed_filename = zip.toString() - '.gz' - // calling pigz -f to make it follow symlinks - """ - unpigz \\ - -p $task.cpus \\ - -fk \\ - $args \\ - ${zip} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - uncompressed_filename = zip.toString() - '.gz' - """ - touch ${zip.dropRight(3)} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\w*//' )) - END_VERSIONS - """ -} diff --git a/modules/nf-core/pigz/uncompress/meta.yml b/modules/nf-core/pigz/uncompress/meta.yml deleted file mode 100644 index c2d16cd4..00000000 --- a/modules/nf-core/pigz/uncompress/meta.yml +++ /dev/null @@ -1,42 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: "pigz_uncompress" -description: write your description here -keywords: - - uncompress - - gzip - - parallelized -tools: - - "pigz": - description: "Parallel implementation of the gzip algorithm." - homepage: "https://zlib.net/pigz/" - documentation: "https://zlib.net/pigz/pigz.pdf" - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test']` - - zip: - type: file - description: Gzipped file - pattern: "*.{gzip}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test']` - - file: - type: file - description: File to compress - pattern: "*" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@lrauschning" diff --git a/modules/nf-core/pigz/uncompress/tests/main.nf.test b/modules/nf-core/pigz/uncompress/tests/main.nf.test deleted file mode 100644 index 62ab27e2..00000000 --- a/modules/nf-core/pigz/uncompress/tests/main.nf.test +++ /dev/null @@ -1,33 +0,0 @@ -nextflow_process { - - name "Test Process PIGZ_UNCOMPRESS" - script "modules/nf-core/pigz/uncompress/main.nf" - process "PIGZ_UNCOMPRESS" - tag "modules" - tag "modules_nfcore" - tag "pigz" - tag "pigz/uncompress" - - test("Should run without failures") { - - when { - params { - outdir = "$outputDir" - } - process { - """ - input[0] = [ [ id:'test'], - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) - ] - """ - } - } - - then { - assert process.success - assert snapshot(process.out).match() - } - - } - -} \ No newline at end of file diff --git a/modules/nf-core/pigz/uncompress/tests/main.nf.test.snap b/modules/nf-core/pigz/uncompress/tests/main.nf.test.snap deleted file mode 100644 index 126dd7d6..00000000 --- a/modules/nf-core/pigz/uncompress/tests/main.nf.test.snap +++ /dev/null @@ -1,35 +0,0 @@ -{ - "Should run without failures": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" - ] - ], - "1": [ - "versions.yml:md5,a2d5ce72baa8b303f25afb9cf094f683" - ], - "file": [ - [ - { - "id": "test" - }, - "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" - ] - ], - "versions": [ - "versions.yml:md5,a2d5ce72baa8b303f25afb9cf094f683" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.03.0" - }, - "timestamp": "2024-05-15T16:43:21.55056643" - } -} \ No newline at end of file diff --git a/modules/nf-core/pigz/uncompress/tests/tags.yml b/modules/nf-core/pigz/uncompress/tests/tags.yml deleted file mode 100644 index 6719a90a..00000000 --- a/modules/nf-core/pigz/uncompress/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -pigz/uncompress: - - modules/nf-core/pigz/uncompress/** diff --git a/modules/nf-core/untarfiles/meta.yml b/modules/nf-core/untarfiles/meta.yml index 38108826..1d23eb25 100644 --- a/modules/nf-core/untarfiles/meta.yml +++ b/modules/nf-core/untarfiles/meta.yml @@ -10,30 +10,33 @@ tools: Extract tar.gz files. documentation: https://www.gnu.org/software/tar/manual/ licence: ["GPL-3.0-or-later"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - archive: - type: file - description: File to be untar - pattern: "*.{tar}.{gz}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untar + pattern: "*.{tar}.{gz}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - files: - type: string - description: A list containing references to individual archive files - pattern: "*/**" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/**: + type: string + description: A list containing references to individual archive files + pattern: "*/**" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@joseespinosa" - "@drpatelh" diff --git a/subworkflows/local/contaminant_filter/main.nf b/subworkflows/local/contaminant_filter/main.nf index b9cc3596..456ffc20 100644 --- a/subworkflows/local/contaminant_filter/main.nf +++ b/subworkflows/local/contaminant_filter/main.nf @@ -318,7 +318,7 @@ workflow CONTAMINANT_FILTER { FILTER_STATS.out.stats.dump(tag:"FILTER_STATS.out.stats") emit: - filtered_reads = other_cont_reads // channel: [ val(meta), path(fastq) ] - filter_stats = FILTER_STATS.out.stats // channel: [ path(stats) ] - versions = ch_versions.mix(FILTER_STATS.out.versions) + filtered_reads = other_cont_reads // channel: [ val(meta), path(fastq) ] + filter_stats = FILTER_STATS.out.stats // channel: [ path(stats) ] + versions = ch_versions.mix(FILTER_STATS.out.versions) // channel: [ versions.yml ] } diff --git a/subworkflows/local/genome_quant.nf b/subworkflows/local/genome_quant.nf index abc73893..93666a2d 100644 --- a/subworkflows/local/genome_quant.nf +++ b/subworkflows/local/genome_quant.nf @@ -7,7 +7,7 @@ include { BOWTIE_ALIGN as BOWTIE_MAP_GENOME } from '../../modules/nf-core/bowtie workflow GENOME_QUANT { take: - ch_bowtie_index // channel: [genome.1.ebwt, genome.2.ebwt, genome.3.ebwt, genome.4.ebwt, genome.rev.1.ebwt, genome.rev.2.ebwt] + ch_bowtie_index // channel: [ val(meta), [ path(directory_index) ] ] ch_fasta // channel: [ val(meta), path(fasta) ] ch_reads // channel: [ val(meta), [ reads ] ] @@ -21,9 +21,6 @@ workflow GENOME_QUANT { ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) emit: - fasta = ch_fasta //TODO: This fasta is the same one that was used as input, ask the original developer, if they meant to have something else here - index = ch_bowtie_index //TODO: Same here, are we outputting the right files? We can remove these channels if we are. - stats = BAM_SORT_STATS_SAMTOOLS.out.stats - - versions = ch_versions + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/mirna_quant.nf b/subworkflows/local/mirna_quant.nf index c8d75941..436b6253 100644 --- a/subworkflows/local/mirna_quant.nf +++ b/subworkflows/local/mirna_quant.nf @@ -18,9 +18,9 @@ include { BOWTIE_ALIGN as BOWTIE_MAP_MATURE include { BAM_SORT_STATS_SAMTOOLS as BAM_STATS_MATURE BAM_SORT_STATS_SAMTOOLS as BAM_STATS_HAIRPIN } from '../nf-core/bam_sort_stats_samtools' -include { SEQCLUSTER_COLLAPSE } from '../../modules/nf-core/seqcluster/collapse/main' -include { TABLE_MERGE } from '../../modules/local/datatable_merge/datatable_merge.nf' -include { EDGER_QC } from '../../modules/local/edger_qc/edger_qc.nf' +include { SEQCLUSTER_COLLAPSE } from '../../modules/nf-core/seqcluster/collapse/main' +include { DATATABLE_MERGE } from '../../modules/local/datatable_merge/main' +include { EDGER_QC } from '../../modules/local/edger_qc/main' include { BAM_STATS_MIRNA_MIRTOP } from '../../subworkflows/nf-core/bam_stats_mirna_mirtop/main' include { CSVTK_JOIN } from '../../modules/nf-core/csvtk/join/main' @@ -28,7 +28,7 @@ workflow MIRNA_QUANT { take: ch_reference_mature // channel: [ val(meta), fasta file] ch_reference_hairpin // channel: [ val(meta), fasta file] - ch_mirna_gtf // channel: [ path(GTF) ] + ch_mirna_gtf // channel: [ val(meta), path(gtf) ] ch_reads_for_mirna // channel: [ val(meta), [ reads ] ] ch_mirtrace_species // channel: [ val(string) ] @@ -96,7 +96,7 @@ workflow MIRNA_QUANT { // nf-core/mirtop - ch_mirna_gtf_species = ch_mirna_gtf + ch_mirna_gtf_species = ch_mirna_gtf.map{ meta,gtf -> gtf } .combine(ch_mirtrace_species) .map{ gtf, species -> [ [id:species.toString()], gtf, species ] } .collect() @@ -115,20 +115,20 @@ workflow MIRNA_QUANT { CSVTK_JOIN ( ch_tsvs ) - TABLE_MERGE ( CSVTK_JOIN.out.csv ) - ch_versions = ch_versions.mix(TABLE_MERGE.out.versions) + DATATABLE_MERGE ( CSVTK_JOIN.out.csv ) + ch_versions = ch_versions.mix(DATATABLE_MERGE.out.versions) ch_reads_genome = BOWTIE_MAP_HAIRPIN.out.fastq .map { add_suffix(it, "genome") } emit: - fasta_mature = FORMAT_MATURE.out.formatted_fasta // channel: [ val(meta), path(fasta) ] + fasta_mature = FORMAT_MATURE.out.formatted_fasta // channel: [ val(meta), path(fasta) ] fasta_hairpin = FORMAT_HAIRPIN.out.formatted_fasta // channel: [ val(meta), path(fasta) ] - unmapped = ch_reads_genome // channel: [ val(meta), path(bam) ] - mature_stats = BAM_STATS_MATURE.out.stats //TODO not used for antything, should we remove them? - hairpin_stats = BAM_STATS_HAIRPIN.out.stats //TODO not used for antything, should we remove them? - mirtop_logs = ch_mirtop_logs // channel: [ val(meta), path(log) ] - versions = ch_versions + unmapped = ch_reads_genome // channel: [ val(meta), path(bam) ] + mature_stats = BAM_STATS_MATURE.out.stats // channel: [ val(meta), [ stats ] ] + hairpin_stats = BAM_STATS_HAIRPIN.out.stats // channel: [ val(meta), [ stats ] ] + mirtop_logs = ch_mirtop_logs // channel: [ val(meta), path(log) ] + versions = ch_versions // channel: [ versions.yml ] } def add_suffix(row, suffix) { diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 0158cbfd..c752a79b 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -26,23 +26,23 @@ workflow PREPARE_GENOME { ch_versions = Channel.empty() // Parameter channel handling - ch_fasta = val_fasta ? Channel.fromPath(val_fasta, checkIfExists: true).map{ it -> [ [id:it.baseName], it ] }.collect() : Channel.empty() - ch_bowtie_index = val_bowtie_index ? Channel.fromPath(val_bowtie_index, checkIfExists: true).map{ it -> [ [], it ] }.collect() : Channel.empty() + ch_fasta = val_fasta ? Channel.fromPath(val_fasta, checkIfExists: true).map{ it -> [ [id:it.baseName], it ] }.collect() : Channel.empty() + ch_bowtie_index = val_bowtie_index ? Channel.fromPath(val_bowtie_index, checkIfExists: true).map{ it -> [ [], it ] }.collect() : Channel.empty() bool_mirtrace_species = val_mirtrace_species ? true : false bool_has_fasta = val_fasta ? true : false ch_mirtrace_species = val_mirtrace_species ? Channel.value(val_mirtrace_species) : Channel.empty() mirna_gtf_from_species = val_mirtrace_species ? (val_mirtrace_species == 'hsa' ? "https://raw.githubusercontent.com/nf-core/test-datasets/smrnaseq/reference/hsa.gff3" : "https://mirbase.org/download/CURRENT/genomes/${val_mirtrace_species}.gff3") : false - ch_mirna_gtf = val_mirna_gtf ? Channel.fromPath(val_mirna_gtf, checkIfExists: true) : ( mirna_gtf_from_species ? Channel.fromPath(mirna_gtf_from_species, checkIfExists: true).collect() : Channel.empty() ) + ch_mirna_gtf = val_mirna_gtf ? Channel.fromPath(val_mirna_gtf, checkIfExists: true).map{ it -> [ [id:it.baseName], it ] }.collect() : ( mirna_gtf_from_species ? Channel.fromPath(mirna_gtf_from_species, checkIfExists: true).map{ it -> [ [id:it.baseName], it ] }.collect() : Channel.empty() ) ch_mirna_adapters = params.with_umi ? [] : Channel.fromPath(val_fastp_known_mirna_adapters, checkIfExists: true).collect() - ch_rrna = val_rrna ? Channel.fromPath(val_rrna).map{ it -> [ [id:'rRNA'], it ] } : Channel.empty() - ch_trna = val_trna ? Channel.fromPath(val_trna).map{ it -> [ [id:'tRNA'], it ] }.collect() : Channel.empty() - ch_cdna = val_cdna ? Channel.fromPath(val_cdna).map{ it -> [ [id:'cDNA'], it ] }.collect() : Channel.empty() - ch_ncrna = val_ncrna ? Channel.fromPath(val_ncrna).map{ it -> [ [id:'ncRNA'], it ] }.collect() : Channel.empty() - ch_pirna = val_pirna ? Channel.fromPath(val_pirna).map{ it -> [ [id:'piRNA'], it ] }.collect() : Channel.empty() - ch_other_contamination = val_other_contamination ? Channel.fromPath(val_other_contamination).map{ it -> [ [id:'other'], it ] }.collect() : Channel.empty() + ch_rrna = val_rrna ? Channel.fromPath(val_rrna, checkIfExists: true).map{ it -> [ [id:'rRNA'], it ] }.collect() : Channel.empty() + ch_trna = val_trna ? Channel.fromPath(val_trna, checkIfExists: true).map{ it -> [ [id:'tRNA'], it ] }.collect() : Channel.empty() + ch_cdna = val_cdna ? Channel.fromPath(val_cdna, checkIfExists: true).map{ it -> [ [id:'cDNA'], it ] }.collect() : Channel.empty() + ch_ncrna = val_ncrna ? Channel.fromPath(val_ncrna, checkIfExists: true).map{ it -> [ [id:'ncRNA'], it ] }.collect() : Channel.empty() + ch_pirna = val_pirna ? Channel.fromPath(val_pirna, checkIfExists: true).map{ it -> [ [id:'piRNA'], it ] }.collect() : Channel.empty() + ch_other_contamination = val_other_contamination ? Channel.fromPath(val_other_contamination, checkIfExists: true).map{ it -> [ [id:'other'], it ] }.collect() : Channel.empty() // even if bowtie index is specified, there still needs to be a fasta. // without fasta, no genome analysis. @@ -90,24 +90,24 @@ workflow PREPARE_GENOME { } ch_reference_mature = params.mirgenedb_mature ? Channel.fromPath(params.mirgenedb_mature, checkIfExists: true).map{ it -> [ [id:it.baseName], it ] }.collect() : { exit 1, "Mature miRNA fasta file not found via --mirgenedb_mature: ${params.mirgenedb_mature}" } ch_reference_hairpin = params.mirgenedb_hairpin ? Channel.fromPath(params.mirgenedb_hairpin, checkIfExists: true).map{ it -> [ [id:it.baseName], it ] }.collect() : { exit 1, "Hairpin miRNA fasta file not found via --mirgenedb_hairpin: ${params.mirgenedb_hairpin}" } - ch_mirna_gtf = params.mirgenedb_gff ? Channel.fromPath(params.mirgenedb_gff, checkIfExists: true).collect() : { exit 1, "MirGeneDB gff file not found via --mirgenedb_gff: ${params.mirgenedb_gff}"} + ch_mirna_gtf = params.mirgenedb_gff ? Channel.fromPath(params.mirgenedb_gff, checkIfExists: true).map{ it -> [ [id:it.baseName], it ] }.collect() : { exit 1, "MirGeneDB gff file not found via --mirgenedb_gff: ${params.mirgenedb_gff}"} } emit: fasta = ch_fasta // channel: [ val(meta), path(fasta) ] has_fasta = bool_has_fasta // boolean - bowtie_index = ch_bowtie_index // channel: [genome.1.ebwt, genome.2.ebwt, genome.3.ebwt, genome.4.ebwt, genome.rev.1.ebwt, genome.rev.2.ebwt] + bowtie_index = ch_bowtie_index // channel: [ val(meta), [ path(directory_index) ] ] versions = ch_versions // channel: [ versions.yml ] mirtrace_species = ch_mirtrace_species // channel: [ val(string) ] has_mirtrace_species = bool_mirtrace_species // boolean reference_mature = ch_reference_mature // channel: [ val(meta), path(fasta) ] reference_hairpin = ch_reference_hairpin // channel: [ val(meta), path(fasta) ] - mirna_gtf = ch_mirna_gtf // channel: [ path(GTF) ] - rrna = ch_rrna // channel: [ path(fasta) ] - trna = ch_trna // channel: [ path(fasta) ] - cdna = ch_cdna // channel: [ path(fasta) ] - ncrna = ch_ncrna // channel: [ path(fasta) ] - pirna = ch_pirna // channel: [ path(fasta) ] - other_contamination = ch_other_contamination // channel: [ path(fasta) ] - mirna_adapters = ch_mirna_adapters // channel: [ path(fasta) ] + mirna_gtf = ch_mirna_gtf // channel: [ val(meta), path(gtf) ] + rrna = ch_rrna // channel: [ val(meta), path(fasta) ] + trna = ch_trna // channel: [ val(meta), path(fasta) ] + cdna = ch_cdna // channel: [ val(meta), path(fasta) ] + ncrna = ch_ncrna // channel: [ val(meta), path(fasta) ] + pirna = ch_pirna // channel: [ val(meta), path(fasta) ] + other_contamination = ch_other_contamination // channel: [ val(meta), path(fasta) ] + mirna_adapters = ch_mirna_adapters // channel: [ val(meta), path(fasta) ] } diff --git a/subworkflows/local/utils_nfcore_smrnaseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_smrnaseq_pipeline/main.nf index 0cbb4634..5f84609c 100644 --- a/subworkflows/local/utils_nfcore_smrnaseq_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_smrnaseq_pipeline/main.nf @@ -38,11 +38,15 @@ workflow PIPELINE_INITIALISATION { nextflow_cli_args // array: List of positional nextflow CLI args outdir // string: The output directory where the results will be saved input // string: Path to input samplesheet + val_three_prime_adapter // string: Sequencing adapter sequence to use for trimming + val_phred_offset // string: The PHRED quality offset to be used for any input fastq files main: //Channel definitions - ch_versions = Channel.empty() + ch_versions = Channel.empty() + ch_three_prime_adapter = Channel.value(val_three_prime_adapter) + ch_phred_offset = Channel.value(val_phred_offset) // // Print version and exit if required and dump pipeline parameters to JSON file @@ -102,8 +106,10 @@ workflow PIPELINE_INITIALISATION { } emit: - samplesheet = ch_samplesheet // channel: sample fastqs parsed from --input - versions = ch_versions // channel: [ versions.yml ] + samplesheet = ch_samplesheet // channel: sample fastqs parsed from --input + versions = ch_versions // channel: [ versions.yml ] + three_prime_adapter = ch_three_prime_adapter // channel: [ val(string) ] + phred_offset = ch_phred_offset // channel: [ val(string) ] } /* diff --git a/tests/test_contamination_tech_reps.nf.test.snap b/tests/test_contamination_tech_reps.nf.test.snap index a7ac1a96..91a343f4 100644 --- a/tests/test_contamination_tech_reps.nf.test.snap +++ b/tests/test_contamination_tech_reps.nf.test.snap @@ -34,13 +34,13 @@ }, "software_versions": { "content": [ - "{BLAT_CDNA={blat=36}, BLAT_NCRNA={blat=36}, BOWTIE2_ALIGN_CDNA={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE2_ALIGN_NCRNA={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE2_ALIGN_TRNA={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, CAT_FASTQ={cat=8.3}, FASTP={fastp=0.23.4}, FILTER_STATS={BusyBox=1.32.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, GAWK_CDNA={gawk=5.3.0}, GAWK_NCRNA={gawk=5.3.0}, INDEX_CDNA={bowtie2=2.5.2}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, INDEX_NCRNA={bowtie2=2.5.2}, INDEX_TRNA={bowtie2=2.5.2}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, SEQKIT_GREP_CDNA={seqkit=2.8.0}, SEQKIT_GREP_NCRNA={seqkit=2.8.0}, STATS_GAWK_CDNA={gawk=5.3.0}, STATS_GAWK_NCRNA={gawk=5.3.0}, STATS_GAWK_TRNA={gawk=5.3.0}, TABLE_MERGE={r-base=3.6.2}, Workflow={nf-core/smrnaseq=v2.3.2dev}}" + "{BLAT_CDNA={blat=36}, BLAT_NCRNA={blat=36}, BOWTIE2_ALIGN_CDNA={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE2_ALIGN_NCRNA={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE2_ALIGN_TRNA={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, CAT_FASTQ={cat=8.3}, DATATABLE_MERGE={r-base=3.6.2}, FASTP={fastp=0.23.4}, FILTER_STATS={BusyBox=1.32.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, GAWK_CDNA={gawk=5.3.0}, GAWK_NCRNA={gawk=5.3.0}, INDEX_CDNA={bowtie2=2.5.2}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, INDEX_NCRNA={bowtie2=2.5.2}, INDEX_TRNA={bowtie2=2.5.2}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, SEQKIT_GREP_CDNA={seqkit=2.8.0}, SEQKIT_GREP_NCRNA={seqkit=2.8.0}, STATS_GAWK_CDNA={gawk=5.3.0}, STATS_GAWK_NCRNA={gawk=5.3.0}, STATS_GAWK_TRNA={gawk=5.3.0}, Workflow={nf-core/smrnaseq=v2.3.2dev}}" ], "meta": { "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-26T00:13:46.736133" + "timestamp": "2024-09-30T15:53:12.532061" }, "mirna_quant_bam": { "content": [ @@ -117,4 +117,4 @@ }, "timestamp": "2024-09-20T18:06:26.655506797" } -} \ No newline at end of file +} diff --git a/tests/test_nextflex.nf.test.snap b/tests/test_nextflex.nf.test.snap index 9438f92b..58017db9 100644 --- a/tests/test_nextflex.nf.test.snap +++ b/tests/test_nextflex.nf.test.snap @@ -34,13 +34,13 @@ }, "software_versions": { "content": [ - "{BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, FASTP={fastp=0.23.4}, FASTQC_RAW={fastqc=0.12.1}, FASTQC_TRIM={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, TABLE_MERGE={r-base=3.6.2}, Workflow={nf-core/smrnaseq=v2.3.2dev}}" + "{BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, DATATABLE_MERGE={r-base=3.6.2}, FASTP={fastp=0.23.4}, FASTQC_RAW={fastqc=0.12.1}, FASTQC_TRIM={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, Workflow={nf-core/smrnaseq=v2.3.2dev}}" ], "meta": { "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-26T00:29:08.657006" + "timestamp": "2024-09-30T16:10:06.50556" }, "mirna_quant_bam": { "content": [ @@ -142,4 +142,4 @@ }, "timestamp": "2024-09-20T17:11:24.369706104" } -} \ No newline at end of file +} diff --git a/tests/test_skipfastp.nf.test.snap b/tests/test_skipfastp.nf.test.snap index 04904b0b..b2942031 100644 --- a/tests/test_skipfastp.nf.test.snap +++ b/tests/test_skipfastp.nf.test.snap @@ -41,13 +41,13 @@ }, "software_versions": { "content": [ - "{BOWTIE_MAP_GENOME={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, FASTQC_RAW={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, TABLE_MERGE={r-base=3.6.2}, Workflow={nf-core/smrnaseq=v2.3.2dev}}" + "{BOWTIE_MAP_GENOME={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, DATATABLE_MERGE={r-base=3.6.2}, FASTQC_RAW={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, Workflow={nf-core/smrnaseq=v2.3.2dev}}" ], "meta": { "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-26T00:53:04.938088" + "timestamp": "2024-09-30T16:13:51.93255" }, "mirna_quant_bam": { "content": [ @@ -142,4 +142,4 @@ }, "timestamp": "2024-09-20T18:29:58.277371938" } -} \ No newline at end of file +} diff --git a/tests/test_umi.nf.test.snap b/tests/test_umi.nf.test.snap index ad477134..7af606d2 100644 --- a/tests/test_umi.nf.test.snap +++ b/tests/test_umi.nf.test.snap @@ -41,13 +41,13 @@ }, "software_versions": { "content": [ - "{BOWTIE_MAP_GENOME={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, FASTP={fastp=0.23.4}, FASTP_LENGTH_FILTER={fastp=0.23.4}, FASTQC_RAW={fastqc=0.12.1}, FASTQC_TRIM={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, TABLE_MERGE={r-base=3.6.2}, UMICOLLAPSE_FASTQ={umicollapse=1.0.0-1}, Workflow={nf-core/smrnaseq=v2.3.2dev}}" + "{BOWTIE_MAP_GENOME={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, DATATABLE_MERGE={r-base=3.6.2}, FASTP={fastp=0.23.4}, FASTP_LENGTH_FILTER={fastp=0.23.4}, FASTQC_RAW={fastqc=0.12.1}, FASTQC_TRIM={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, UMICOLLAPSE_FASTQ={umicollapse=1.0.0-1}, Workflow={nf-core/smrnaseq=v2.3.2dev}}" ], "meta": { "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-26T01:02:56.214416" + "timestamp": "2024-09-30T16:23:12.559357" }, "mirna_quant_bam": { "content": [ @@ -160,4 +160,4 @@ }, "timestamp": "2024-09-20T19:12:28.290360163" } -} \ No newline at end of file +} diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index fd3e31aa..e4848c8a 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -51,17 +51,19 @@ workflow NFCORE_SMRNASEQ { ch_mirtrace_species // channel: [ val(string) ] ch_reference_mature // channel: [ val(meta), path(fasta) ] ch_reference_hairpin // channel: [ val(meta), path(fasta) ] - ch_mirna_gtf // channel: [ path(GTF) ] + ch_mirna_gtf // channel: [ val(meta), path(gtf) ] ch_fasta // channel: [ val(meta), path(fasta) ] - ch_bowtie_index // channel: [ genome.1.ebwt, genome.2.ebwt, genome.3.ebwt, genome.4.ebwt, genome.rev.1.ebwt, genome.rev.2.ebwt ] - ch_rrna // channel: [ path(fasta) ] - ch_trna // channel: [ path(fasta) ] + ch_bowtie_index // channel: [ val(meta), [ path(directory_index) ] ] + ch_rrna // channel: [ val(meta), path(fasta) ] + ch_trna // channel: [ val(meta), path(fasta) ] ch_cdna // channel: [ val(meta), path(fasta) ] ch_ncrna // channel: [ val(meta), path(fasta) ] ch_pirna // channel: [ val(meta), path(fasta) ] ch_other_contamination // channel: [ val(meta), path(fasta) ] ch_versions // channel: [ path(versions.yml) ] ch_samplesheet // channel: sample fastqs parsed from --input + ch_three_prime_adapter // channel: [ val(string) ] + ch_phred_offset // channel: [ val(string) ] main: // @@ -145,13 +147,11 @@ workflow NFCORE_SMRNASEQ { // // MODULE: mirtrace QC // - three_prime_adapter = Channel.value(params.three_prime_adapter) - phred_offset = Channel.value(params.phred_offset) ch_mirtrace_config = ch_reads_for_mirna .transpose() - .combine(three_prime_adapter) - .combine(phred_offset) + .combine(ch_three_prime_adapter) + .combine(ch_phred_offset) .collectFile { meta, reads, adapter, phred -> def config_filename = "${meta.id}.data" [ config_filename, "./${reads.getFileName().toString()},${meta.id},${adapter},${phred}\n" ] @@ -217,11 +217,11 @@ workflow NFCORE_SMRNASEQ { genome_stats = GENOME_QUANT.out.stats ch_versions = ch_versions.mix(GENOME_QUANT.out.versions) - hairpin_clean = MIRNA_QUANT.out.fasta_hairpin.map { it -> it[1] } - mature_clean = MIRNA_QUANT.out.fasta_mature.map { it -> it[1] } + ch_hairpin_clean = MIRNA_QUANT.out.fasta_hairpin.map { it -> it[1] } + ch_mature_clean = MIRNA_QUANT.out.fasta_mature.map { it -> it[1] } - ch_mature_hairpin = mature_clean - .combine(hairpin_clean) + ch_mature_hairpin = ch_mature_clean + .combine(ch_hairpin_clean) .map { mature, hairpin -> [[id: 'mature_hairpin'], mature, hairpin, []] }