From d783ef9deb98dbd4adfabb5ab0858ba60a658057 Mon Sep 17 00:00:00 2001 From: Christian Kubica Date: Tue, 10 May 2022 14:35:54 +0200 Subject: [PATCH 001/117] REVERT CHANGES Reverting changes to a non-linted version and added the umitools modules. --- modules.json | 8 ++- .../nf-core/modules/umitools/dedup/main.nf | 41 +++++++++++++ .../nf-core/modules/umitools/dedup/meta.yml | 59 +++++++++++++++++++ .../nf-core/modules/umitools/extract/main.nf | 55 +++++++++++++++++ .../nf-core/modules/umitools/extract/meta.yml | 47 +++++++++++++++ 5 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 modules/nf-core/modules/umitools/dedup/main.nf create mode 100644 modules/nf-core/modules/umitools/dedup/meta.yml create mode 100644 modules/nf-core/modules/umitools/extract/main.nf create mode 100644 modules/nf-core/modules/umitools/extract/meta.yml diff --git a/modules.json b/modules.json index 81208b16..071935f8 100644 --- a/modules.json +++ b/modules.json @@ -32,7 +32,13 @@ }, "trimgalore": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "umitools/dedup": { + "git_sha": "f425aa3cea10015fe9b345b9d6dcc2336b53155f" + }, + "umitools/extract": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/modules/umitools/dedup/main.nf b/modules/nf-core/modules/umitools/dedup/main.nf new file mode 100644 index 00000000..dfcbcf2f --- /dev/null +++ b/modules/nf-core/modules/umitools/dedup/main.nf @@ -0,0 +1,41 @@ +process UMITOOLS_DEDUP { + tag "$meta.id" + label "process_medium" + + conda (params.enable_conda ? "bioconda::umi_tools=1.1.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.2--py38h4a8c8d9_0' : + 'quay.io/biocontainers/umi_tools:1.1.2--py38h4a8c8d9_0' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.bam") , emit: bam + tuple val(meta), path("*edit_distance.tsv"), emit: tsv_edit_distance + tuple val(meta), path("*per_umi.tsv") , emit: tsv_per_umi + tuple val(meta), path("*per_position.tsv") , emit: tsv_umi_per_position + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired = meta.single_end ? "" : "--paired" + """ + umi_tools \\ + dedup \\ + -I $bam \\ + -S ${prefix}.bam \\ + --output-stats $prefix \\ + $paired \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/umitools/dedup/meta.yml b/modules/nf-core/modules/umitools/dedup/meta.yml new file mode 100644 index 00000000..eee8952f --- /dev/null +++ b/modules/nf-core/modules/umitools/dedup/meta.yml @@ -0,0 +1,59 @@ +name: umitools_dedup +description: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read. +keywords: + - umitools + - deduplication +tools: + - umi_tools: + description: > + UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) + and single cell RNA-Seq cell barcodes + documentation: https://umi-tools.readthedocs.io/en/latest/ + license: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: | + BAM file containing reads to be deduplicated via UMIs. + pattern: "*.{bam}" + - bai: + type: file + description: | + BAM index files corresponding to the input BAM file. + pattern: "*.{bai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file with deduplicated UMIs. + pattern: "*.{bam}" + - tsv_edit_distance: + type: file + description: Reports the (binned) average edit distance between the UMIs at each position. + pattern: "*edit_distance.tsv" + - tsv_per_umi: + type: file + description: UMI-level summary statistics. + pattern: "*per_umi.tsv" + - tsv_umi_per_position: + type: file + description: Tabulates the counts for unique combinations of UMI and position. + pattern: "*per_position.tsv" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@drpatelh" + - "@grst" + - "@klkeys" diff --git a/modules/nf-core/modules/umitools/extract/main.nf b/modules/nf-core/modules/umitools/extract/main.nf new file mode 100644 index 00000000..22a405b9 --- /dev/null +++ b/modules/nf-core/modules/umitools/extract/main.nf @@ -0,0 +1,55 @@ +process UMITOOLS_EXTRACT { + tag "$meta.id" + label "process_low" + + conda (params.enable_conda ? "bioconda::umi_tools=1.1.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.2--py38h4a8c8d9_0' : + 'quay.io/biocontainers/umi_tools:1.1.2--py38h4a8c8d9_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.fastq.gz"), emit: reads + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.single_end) { + """ + umi_tools \\ + extract \\ + -I $reads \\ + -S ${prefix}.umi_extract.fastq.gz \\ + $args \\ + > ${prefix}.umi_extract.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') + END_VERSIONS + """ + } else { + """ + umi_tools \\ + extract \\ + -I ${reads[0]} \\ + --read2-in=${reads[1]} \\ + -S ${prefix}.umi_extract_1.fastq.gz \\ + --read2-out=${prefix}.umi_extract_2.fastq.gz \\ + $args \\ + > ${prefix}.umi_extract.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/modules/umitools/extract/meta.yml b/modules/nf-core/modules/umitools/extract/meta.yml new file mode 100644 index 00000000..7fc23f72 --- /dev/null +++ b/modules/nf-core/modules/umitools/extract/meta.yml @@ -0,0 +1,47 @@ +name: umitools_extract +description: Extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place +keywords: + - umitools + - extract +tools: + - umi_tools: + description: > + UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) + and single cell RNA-Seq cell barcodes + documentation: https://umi-tools.readthedocs.io/en/latest/ + license: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: list + description: | + List of input FASTQ files whose UMIs will be extracted. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: > + Extracted FASTQ files. | + For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | + For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. + pattern: "*.{fastq.gz}" + - log: + type: file + description: Logfile for umi_tools + pattern: "*.{log}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@drpatelh" + - "@grst" From 1043932a309d7ce573973225f16e62e4f7f82e3e Mon Sep 17 00:00:00 2001 From: Christian Kubica Date: Tue, 10 May 2022 15:58:50 +0200 Subject: [PATCH 002/117] INCLUDE UMITOOLS WORKFLOW Added the umitools workflow and integrated it into the smrnaseq workflow --- .../nf-core/fastqc_umitools_trimgalore.nf | 78 +++++++++++++++++++ workflows/smrnaseq.nf | 14 ++-- 2 files changed, 86 insertions(+), 6 deletions(-) create mode 100644 subworkflows/nf-core/fastqc_umitools_trimgalore.nf diff --git a/subworkflows/nf-core/fastqc_umitools_trimgalore.nf b/subworkflows/nf-core/fastqc_umitools_trimgalore.nf new file mode 100644 index 00000000..ca158e7a --- /dev/null +++ b/subworkflows/nf-core/fastqc_umitools_trimgalore.nf @@ -0,0 +1,78 @@ +// +// Read QC, UMI extraction and trimming +// + +nextflow.enable.dsl=2 + +include { FASTQC } from '../../modules/nf-core/modules/fastqc/main' +include { UMITOOLS_EXTRACT } from '../../modules/nf-core/modules/umitools/extract/main' +include { TRIMGALORE } from '../../modules/nf-core/modules/trimgalore/main' + +workflow FASTQC_UMITOOLS_TRIMGALORE { + take: + reads // channel: [ val(meta), [ reads ] ] + skip_fastqc // boolean: true/false + with_umi // boolean: true/false + skip_trimming // boolean: true/false + umi_discard_read // integer: 0, 1 or 2 + + main: + + ch_versions = Channel.empty() + fastqc_html = Channel.empty() + fastqc_zip = Channel.empty() + if (!skip_fastqc) { + FASTQC ( reads ).html.set { fastqc_html } + fastqc_zip = FASTQC.out.zip + ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + } + + umi_reads = reads + umi_log = Channel.empty() + if (with_umi) { + UMITOOLS_EXTRACT ( reads ).reads.set { umi_reads } + umi_log = UMITOOLS_EXTRACT.out.log + ch_versions = ch_versions.mix(UMITOOLS_EXTRACT.out.versions.first()) + + // Discard R1 / R2 if required + if (umi_discard_read in [1,2]) { + UMITOOLS_EXTRACT + .out + .reads + .map { meta, reads -> + if (!meta.single_end) { + meta['single_end'] = true + reads = reads[umi_discard_read % 2] + } + return [ meta, reads ] + } + .set { umi_reads } + } + } + + trim_reads = umi_reads + trim_html = Channel.empty() + trim_zip = Channel.empty() + trim_log = Channel.empty() + if (!skip_trimming) { + TRIMGALORE ( umi_reads ).reads.set { trim_reads } + trim_html = TRIMGALORE.out.html + trim_zip = TRIMGALORE.out.zip + trim_log = TRIMGALORE.out.log + ch_versions = ch_versions.mix(TRIMGALORE.out.versions.first()) + } + + emit: + reads = trim_reads // channel: [ val(meta), [ reads ] ] + + fastqc_html // channel: [ val(meta), [ html ] ] + fastqc_zip // channel: [ val(meta), [ zip ] ] + + umi_log // channel: [ val(meta), [ log ] ] + + trim_html // channel: [ val(meta), [ html ] ] + trim_zip // channel: [ val(meta), [ zip ] ] + trim_log // channel: [ val(meta), [ txt ] ] + + versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] +} \ No newline at end of file diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index ff8669bf..1613425c 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -55,7 +55,7 @@ if (params.mature) { reference_mature = file(params.mature, checkIfExists: true) if (params.hairpin) { reference_hairpin = file(params.hairpin, checkIfExists: true) } else { exit 1, "Hairpin miRNA fasta file not found: ${params.hairpin}" } include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { FASTQC_TRIMGALORE } from '../subworkflows/nf-core/fastqc_trimgalore' +include { FASTQC_UMITOOLS_TRIMGALORE } from '../subworkflows/nf-core/fastqc_umitools_trimgalore' include { MIRNA_QUANT } from '../subworkflows/local/mirna_quant' include { GENOME_QUANT } from '../subworkflows/local/genome_quant' include { MIRTRACE } from '../subworkflows/local/mirtrace' @@ -133,14 +133,16 @@ workflow SMRNASEQ { // // SUBWORKFLOW: Read QC, extract UMI and trim adapters // - FASTQC_TRIMGALORE ( + FASTQC_UMITOOLS_TRIMGALORE ( ch_cat_fastq, + params.with_umi, + params.umi_discard_read, params.skip_fastqc || params.skip_qc, params.skip_trimming ) - ch_versions = ch_versions.mix(FASTQC_TRIMGALORE.out.versions) + ch_versions = ch_versions.mix(FASTQC_UMITOOLS_TRIMGALORE.out.versions) - reads_for_mirna = FASTQC_TRIMGALORE.out.reads + reads_for_mirna = FASTQC_UMITOOLS_TRIMGALORE.out.reads MIRNA_QUANT ( reference_mature, reference_hairpin, @@ -161,7 +163,7 @@ workflow SMRNASEQ { ch_versions = ch_versions.mix(GENOME_QUANT.out.versions) if (!params.skip_mirdeep) { - MIRDEEP2 (FASTQC_TRIMGALORE.out.reads, GENOME_QUANT.out.fasta, GENOME_QUANT.out.indices, MIRNA_QUANT.out.fasta_hairpin, MIRNA_QUANT.out.fasta_mature) + MIRDEEP2 (FASTQC_UMITOOLS_TRIMGALORE.out.reads, GENOME_QUANT.out.fasta, GENOME_QUANT.out.indices, MIRNA_QUANT.out.fasta_hairpin, MIRNA_QUANT.out.fasta_mature) ch_versions = ch_versions.mix(MIRDEEP2.out.versions) } } @@ -186,7 +188,7 @@ workflow SMRNASEQ { ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC_TRIMGALORE.out.fastqc_zip.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC_UMITOOLS_TRIMGALORE.out.fastqc_zip.collect{it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(MIRNA_QUANT.out.mature_stats.collect({it[1]}).ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(MIRNA_QUANT.out.hairpin_stats.collect({it[1]}).ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(genome_stats.collect({it[1]}).ifEmpty([])) From 27fd482b7eba23b47b8526a1eb32bee489bc3d53 Mon Sep 17 00:00:00 2001 From: Christian Kubica Date: Fri, 13 May 2022 13:03:20 +0200 Subject: [PATCH 003/117] ADD DOCUMENTATION Add additional documentation to use UMI tools as part of the pipeline. Most of the documentation has been copied from nf-core/rnaseq. --- CHANGELOG.md | 16 +++++++++++----- README.md | 17 +++++++++-------- docs/output.md | 16 ++++++++++++++++ nextflow.config | 8 ++++++++ nextflow_schema.json | 45 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 89 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e1b6875e..db1eec43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,14 +21,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Other enhancements & fixes - [#134](https://github.com/nf-core/smrnaseq/issues/134) - Fixed colSum of zero issues for edgeR_miRBase.R script +- [#49](https://github.com/nf-core/smrnaseq/issues/49) - Integrated the existing umitools modules into the pipeline. ### Parameters -| Old parameter | New parameter | -| -------------------- | ---------------- | -| `--conda` | `--enable_conda` | -| `--clusterOptions` | | -| `--publish_dir_mode` | | +| Old parameter | New parameter | +| -------------------- | --------------------------- | +| `--conda` | `--enable_conda` | +| `--clusterOptions` | | +| `--publish_dir_mode` | | +| | `--with_umi` | +| | `--umitools_extract_method` | +| | `--umitools_bc_pattern` | +| | `--umi_discard_read` | +| | `--save_umi_intermeds` | > **NB:** Parameter has been **updated** if both old and new parameter information is present. > **NB:** Parameter has been **added** if just the new parameter information is present. diff --git a/README.md b/README.md index 6d3efa52..e4294f99 100644 --- a/README.md +++ b/README.md @@ -31,27 +31,28 @@ On release, automated continuous integration tests run the pipeline on a full-si ## Pipeline summary 1. Raw read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) -2. Adapter trimming ([`Trim Galore!`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/)) +2. UMI barcode extraction ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools)) +3. Adapter trimming ([`Trim Galore!`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/)) 1. Insert Size calculation 2. Collapse reads ([`seqcluster`](https://seqcluster.readthedocs.io/mirna_annotation.html#processing-of-reads)) -3. Alignment against miRBase mature miRNA ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) -4. Alignment against miRBase hairpin +4. Alignment against miRBase mature miRNA ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) +5. Alignment against miRBase hairpin 1. Unaligned reads from step 3 ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) 2. Collapsed reads from step 2.2 ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) -5. Post-alignment processing of miRBase hairpin +6. Post-alignment processing of miRBase hairpin 1. Basic statistics from step 3 and step 4.1 ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) 2. Analysis on miRBase hairpin counts ([`edgeR`](https://bioconductor.org/packages/release/bioc/html/edgeR.html)) - TMM normalization and a table of top expression hairpin - MDS plot clustering samples - Heatmap of sample similarities 3. miRNA and isomiR annotation from step 4.1 ([`mirtop`](https://github.com/miRTop/mirtop)) -6. Alignment against host reference genome ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) +7. Alignment against host reference genome ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) 1. Post-alignment processing of alignment against host reference genome ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) -7. Novel miRNAs and known miRNAs discovery ([`MiRDeep2`](https://www.mdc-berlin.de/content/mirdeep2-documentation)) +8. Novel miRNAs and known miRNAs discovery ([`MiRDeep2`](https://www.mdc-berlin.de/content/mirdeep2-documentation)) 1. Mapping against reference genome with the mapper module 2. Known and novel miRNA discovery with the mirdeep2 module -8. miRNA quality control ([`mirtrace`](https://github.com/friedlanderlab/mirtrace)) -9. Present QC for raw read, alignment, and expression results ([`MultiQC`](http://multiqc.info/)) +9. miRNA quality control ([`mirtrace`](https://github.com/friedlanderlab/mirtrace)) +10. Present QC for raw read, alignment, and expression results ([`MultiQC`](http://multiqc.info/)) ## Quick Start diff --git a/docs/output.md b/docs/output.md index ce1f8347..79f1d6d9 100644 --- a/docs/output.md +++ b/docs/output.md @@ -13,6 +13,7 @@ The directories listed below will be created in the results directory after the The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: - [FastQC](#fastqc) - read quality control +- [UMI-tools extract](#umi-tools-extract) - UMI barcode extraction - [TrimGalore](#trimgalore) - adapter trimming - [Bowtie](#bowtie) - alignment against mature miRNAs and miRNA precursors (hairpins) - [SAMtools](#samtools) - alignment result processing and feature counting @@ -39,6 +40,21 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d ![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png) +## UMI-tools extract + +
+Output files + +- `umitools/` + - `*.fastq.gz`: If `--save_umi_intermeds` is specified, FastQ files **after** UMI extraction will be placed in this directory. + - `*.log`: Log file generated by the UMI-tools `extract` command. + +
+ +[UMI-tools](https://github.com/CGATOxford/UMI-tools) deduplicates reads based on unique molecular identifiers (UMIs) to address PCR-bias. Firstly, the UMI-tools `extract` command removes the UMI barcode information from the read sequence and adds it to the read name. Secondly, reads are deduplicated based on UMI identifier after mapping as highlighted in the [UMI-tools dedup](#umi-tools-dedup) section. + +To facilitate processing of input data which has the UMI barcode already embedded in the read name from the start, `--skip_umi_extract` can be specified in conjunction with `--with_umi`. + ## TrimGalore [TrimGalore](http://www.bioinformatics.babraham.ac.uk/projects/trim_galore/) is used for removal of adapter contamination and trimming of low quality regions. TrimGalore uses [Cutadapt](https://github.com/marcelm/cutadapt) for adapter trimming and runs FastQC after it finishes. diff --git a/nextflow.config b/nextflow.config index c5fa807d..4d724120 100644 --- a/nextflow.config +++ b/nextflow.config @@ -27,6 +27,14 @@ params { mature = "https://mirbase.org/ftp/CURRENT/mature.fa.gz" hairpin = "https://mirbase.org/ftp/CURRENT/hairpin.fa.gz" + // UMI handling + with_umi = false + skip_umi_extract = false + umitools_extract_method = 'string' + umitools_bc_pattern = null + umi_discard_read = null + save_umi_intermeds = false + // Trimming options clip_r1 = 0 three_prime_clip_r1 = 0 diff --git a/nextflow_schema.json b/nextflow_schema.json index 027f1b37..5019408f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -50,6 +50,48 @@ } } }, + "umi_options": { + "title": "UMI options", + "type": "object", + "description": "Options for processing reads with unique molecular identifiers", + "default": "", + "properties": { + "with_umi": { + "type": "boolean", + "fa_icon": "fas fa-barcode", + "description": "Enable UMI-based read deduplication." + }, + "umitools_extract_method": { + "type": "string", + "default": "string", + "fa_icon": "fas fa-barcode", + "description": "UMI pattern to use. Can be either 'string' (default) or 'regex'.", + "help_text": "More details can be found in the [UMI-tools documentation](https://umi-tools.readthedocs.io/en/latest/reference/extract.html#extract-method).\n" + }, + "skip_umi_extract": { + "type": "boolean", + "fa_icon": "fas fa-compress-alt", + "description": "Skip the UMI extraction from the read in case the UMIs have been moved to the headers in advance of the pipeline run." + }, + "umitools_bc_pattern": { + "type": "string", + "fa_icon": "fas fa-barcode", + "help_text": "More details can be found in the [UMI-tools documentation](https://umi-tools.readthedocs.io/en/latest/reference/extract.html#extract-method).", + "description": "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the first 6 nucleotides of the read are from the UMI." + }, + "umi_discard_read": { + "type": "integer", + "fa_icon": "fas fa-barcode", + "description": "After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively." + }, + "save_umi_intermeds": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "If this option is specified, intermediate FastQ and BAM files produced by UMI-tools are also saved in the results directory." + } + }, + "fa_icon": "fas fa-barcode" + }, "reference_genome_options": { "title": "Reference genome options", "type": "object", @@ -379,6 +421,9 @@ { "$ref": "#/definitions/input_output_options" }, + { + "$ref": "#/definitions/umi_options" + }, { "$ref": "#/definitions/reference_genome_options" }, From ee673b095f667b119bf1102e370b253a37ce8d4a Mon Sep 17 00:00:00 2001 From: Christian Kubica Date: Mon, 13 Jun 2022 13:22:37 +0200 Subject: [PATCH 004/117] ADD SAMTOOLS BAM2FQ MODULE The bam2fq module is neccessary to convert the deduplicated bam files back into a fastq format to be fed into the existing pipeline. --- modules.json | 5 +- .../nf-core/modules/samtools/bam2fq/main.nf | 56 +++++++++++++++++++ .../nf-core/modules/samtools/bam2fq/meta.yml | 55 ++++++++++++++++++ 3 files changed, 115 insertions(+), 1 deletion(-) create mode 100644 modules/nf-core/modules/samtools/bam2fq/main.nf create mode 100644 modules/nf-core/modules/samtools/bam2fq/meta.yml diff --git a/modules.json b/modules.json index 071935f8..c328879e 100644 --- a/modules.json +++ b/modules.json @@ -15,6 +15,9 @@ "multiqc": { "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" }, + "samtools/bam2fq": { + "git_sha": "5510ea39fe638594bc26ac34cadf4a84bf27d159" + }, "samtools/flagstat": { "git_sha": "1ad73f1b2abdea9398680d6d20014838135c9a35" }, @@ -41,4 +44,4 @@ } } } -} \ No newline at end of file +} diff --git a/modules/nf-core/modules/samtools/bam2fq/main.nf b/modules/nf-core/modules/samtools/bam2fq/main.nf new file mode 100644 index 00000000..9301d1d3 --- /dev/null +++ b/modules/nf-core/modules/samtools/bam2fq/main.nf @@ -0,0 +1,56 @@ +process SAMTOOLS_BAM2FQ { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(inputbam) + val split + + output: + tuple val(meta), path("*.fq.gz"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + if (split){ + """ + samtools \\ + bam2fq \\ + $args \\ + -@ $task.cpus \\ + -1 ${prefix}_1.fq.gz \\ + -2 ${prefix}_2.fq.gz \\ + -0 ${prefix}_other.fq.gz \\ + -s ${prefix}_singleton.fq.gz \\ + $inputbam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + } else { + """ + samtools \\ + bam2fq \\ + $args \\ + -@ $task.cpus \\ + $inputbam | gzip --no-name > ${prefix}_interleaved.fq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/modules/samtools/bam2fq/meta.yml b/modules/nf-core/modules/samtools/bam2fq/meta.yml new file mode 100644 index 00000000..319a60cf --- /dev/null +++ b/modules/nf-core/modules/samtools/bam2fq/meta.yml @@ -0,0 +1,55 @@ +name: samtools_bam2fq +description: | + The module uses bam2fq method from samtools to + convert a SAM, BAM or CRAM file to FASTQ format +keywords: + - bam2fq + - samtools + - fastq +tools: + - samtools: + description: Tools for dealing with SAM, BAM and CRAM files + homepage: None + documentation: http://www.htslib.org/doc/1.1/samtools.html + tool_dev_url: None + doi: "" + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - inputbam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - split: + type: boolean + description: | + TRUE/FALSE value to indicate if reads should be separated into + /1, /2 and if present other, or singleton. + Note: choosing TRUE will generate 4 different files. + Choosing FALSE will produce a single file, which will be interleaved in case + the input contains paired reads. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads: + type: file + description: | + FASTQ files, which will be either a group of 4 files (read_1, read_2, other and singleton) + or a single interleaved .fq.gz file if the user chooses not to split the reads. + pattern: "*.fq.gz" + +authors: + - "@lescai" From 0bc65e456596ba8dca61b62616b351dc04c1af97 Mon Sep 17 00:00:00 2001 From: Christian Kubica Date: Tue, 14 Jun 2022 14:13:23 +0200 Subject: [PATCH 005/117] ADD UMITOOLS EXTRACT ARGS Added the umitools extract modules.config lines from nf-core/rnaseq to this pipeline. --- conf/modules.config | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index c802ccf3..92fadab2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -38,6 +38,30 @@ process { } } +if (params.with_umi && !params.skip_umi_extract) { + process { + withName: '.*:FASTQC_UMITOOLS_TRIMGALORE:UMITOOLS_EXTRACT' { + ext.args = [ + params.umitools_extract_method ? "--extract-method=${params.umitools_extract_method}" : '', + params.umitools_bc_pattern ? "--bc-pattern='${params.umitools_bc_pattern}'" : '', + ].join(' ').trim() + publishDir = [ + [ + path: { "${params.outdir}/umitools" }, + mode: params.publish_dir_mode, + pattern: "*.log" + ], + [ + path: { "${params.outdir}/umitools" }, + mode: params.publish_dir_mode, + pattern: "*.fastq.gz", + enabled: params.save_umi_intermeds + ] + ] + } + } +} + // // Genome preparation options // @@ -89,7 +113,7 @@ process { if (!(params.skip_fastqc || params.skip_qc)) { process { - withName: '.*:FASTQC_TRIMGALORE:FASTQC' { + withName: '.*:FASTQC_UMITOOLS_TRIMGALORE:FASTQC' { ext.args = '--quiet' } } @@ -97,7 +121,7 @@ if (!(params.skip_fastqc || params.skip_qc)) { if (!params.skip_trimming) { process { - withName: '.*:FASTQC_TRIMGALORE:TRIMGALORE' { + withName: '.*:FASTQC_UMITOOLS_TRIMGALORE:TRIMGALORE' { ext.args = '--fastqc' publishDir = [ [ From 8d14f90a58b51460dfb41a47a7487ecbc272d40e Mon Sep 17 00:00:00 2001 From: Christian Kubica Date: Wed, 15 Jun 2022 09:41:18 +0200 Subject: [PATCH 006/117] UPDATE MODULES.CONFIG Added configurations for umi deduplication. --- conf/modules.config | 97 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 73 insertions(+), 24 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 92fadab2..c1a74e21 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -38,30 +38,6 @@ process { } } -if (params.with_umi && !params.skip_umi_extract) { - process { - withName: '.*:FASTQC_UMITOOLS_TRIMGALORE:UMITOOLS_EXTRACT' { - ext.args = [ - params.umitools_extract_method ? "--extract-method=${params.umitools_extract_method}" : '', - params.umitools_bc_pattern ? "--bc-pattern='${params.umitools_bc_pattern}'" : '', - ].join(' ').trim() - publishDir = [ - [ - path: { "${params.outdir}/umitools" }, - mode: params.publish_dir_mode, - pattern: "*.log" - ], - [ - path: { "${params.outdir}/umitools" }, - mode: params.publish_dir_mode, - pattern: "*.fastq.gz", - enabled: params.save_umi_intermeds - ] - ] - } - } -} - // // Genome preparation options // @@ -101,6 +77,7 @@ process { // // Read QC and trimming options // + process { withName: 'MIRTRACE_RUN' { publishDir = [ @@ -145,6 +122,78 @@ if (!params.skip_trimming) { } } +if (params.with_umi && !params.skip_umi_extract) { + process { + withName: '.*:FASTQC_UMITOOLS_TRIMGALORE:UMITOOLS_EXTRACT' { + ext.args = [ + params.umitools_extract_method ? "--extract-method=${params.umitools_extract_method}" : '', + params.umitools_bc_pattern ? "--bc-pattern='${params.umitools_bc_pattern}'" : '', + ].join(' ').trim() + publishDir = [ + [ + path: { "${params.outdir}/umitools" }, + mode: params.publish_dir_mode, + pattern: "*.log" + ], + [ + path: { "${params.outdir}/umitools" }, + mode: params.publish_dir_mode, + pattern: "*.fastq.gz", + enabled: params.save_umi_intermeds + ] + ] + } + } +} + +// +// UMI tools deduplication +// + +if (params.with_umi) { + process { + withName: '.*:DEDUPLICATE_UMIS:UMITOOLS_DEDUP' { + ext.args = { meta.single_end ? '' : '--unpaired-reads=discard --chimeric-pairs=discard' } + ext.prefix = { "${meta.id}.umi_dedup.sorted" } + publishDir = [ + [ + path: { "${params.outdir}/umi_dedup/umitools" }, + mode: params.publish_dir_mode, + pattern: '*.tsv' + ], + [ + path: { "${params.outdir}/umi_dedup" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: ( + params.save_umi_intermeds + ) + ] + ] + } + + withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_SAMTOOLS:SAMTOOLS_INDEX' { + ext.prefix = { "${meta.id}.umi_dedup.sorted" } + publishDir = [ + path: { "${params.outdir}/umi_dedup" }, + mode: params.publish_dir_mode, + pattern: '*.{bai,csi}', + enabled: ( + params.save_umi_intermeds + ) + ] + } + + withName: '.*:DEDUPLICATE_UMIS:BAM_STATS_SAMTOOLS:.*' { + publishDir = [ + path: { "${params.outdir}/umi_dedup/samtools_stats" }, + mode: params.publish_dir_mode, + pattern: '*.{stats,flagstat,idxstats}' + ] + } + } +} + // // Quantification // From 23f96d83ea5bfcf40810d58797dc8f3b060a0303 Mon Sep 17 00:00:00 2001 From: Christian Kubica Date: Wed, 15 Jun 2022 09:43:43 +0200 Subject: [PATCH 007/117] INCLUDE UMITOOLS DEDUP WORKFLOW Initial comit of the umi dedup subworkflow. The workflow combines already existing modules of the pipeline and nf-core module to deduplicate the reads by mapping them to the species genome and re-converting them to fastq after deduplication. --- subworkflows/local/umi_dedup.nf | 69 +++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 subworkflows/local/umi_dedup.nf diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf new file mode 100644 index 00000000..f97b8e8c --- /dev/null +++ b/subworkflows/local/umi_dedup.nf @@ -0,0 +1,69 @@ +// +// Deduplicate the UMI reads by mapping them to the complete genome. +// + +include { INDEX_GENOME } from '../../modules/local/bowtie_genome' +include { BOWTIE_MAP_SEQ as BOWTIE_MAP_GENOME } from '../../modules/local/bowtie_map_mirna' +include { BAM_SORT_SAMTOOLS } from '../../subworkflows/nf-core/bam_sort_samtools' +include { UMITOOLS_DEDUP } from '../../modules/nf-core/modules/umitools/dedup/main' +include { SAMTOOLS_BAM2FQ } from '../../modules/nf-core/modules/samtools/bam2fq/main' + +workflow DEDUPLICATE_UMIS { + take: + fasta + bt_index + reads // channel: [ val(meta), [ reads ] ] + + main: + + ch_versions = Channel.empty() + ch_dedup_stats = Channel.empty() + + if (!bt_index){ + INDEX_GENOME ( fasta ) + bt_indices = INDEX_GENOME.out.bt_indices + fasta_formatted = INDEX_GENOME.out.fasta + ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) + } else { + bt_indices = Channel.fromPath("${bt_index}**ebwt", checkIfExists: true).ifEmpty { exit 1, "Bowtie1 index directory not found: ${bt_index}" } + fasta_formatted = fasta + } + + if (bt_indices){ + + reads.view() + + BOWTIE_MAP_GENOME ( reads, bt_indices.collect() ) + ch_versions = ch_versions.mix(BOWTIE_MAP_GENOME.out.versions) + + BAM_SORT_SAMTOOLS ( BOWTIE_MAP_GENOME.out.bam, Channel.empty() ) + ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions) + + BAM_SORT_SAMTOOLS.out.bam.view() + ch_umi_dedup = BAM_SORT_SAMTOOLS.out.bam.join(BAM_SORT_SAMTOOLS.out.bai) + + ch_umi_dedup.view() + + UMITOOLS_DEDUP ( ch_umi_dedup ) + ch_versions = ch_versions.mix(UMITOOLS_DEDUP.out.versions) + ch_dedup_stats = ch_dedup_stats.mix(UMITOOLS_DEDUP.out.tsv_edit_distance).join(UMITOOLS_DEDUP.out.tsv_per_umi).join(UMITOOLS_DEDUP.out.tsv_umi_per_position) + + SAMTOOLS_BAM2FQ ( UMITOOLS_DEDUP.out.bam, false ) + ch_versions = ch_versions.mix(SAMTOOLS_BAM2FQ.out.versions) + } + + emit: + reads = SAMTOOLS_BAM2FQ.out.reads // channel: [ val(meta), [ reads ] ] + indices = bt_indices + stats = ch_dedup_stats + versions = ch_versions +} + + +def add_suffix(row, suffix) { + def meta = [:] + meta.id = "${row[0].id}_${suffix}" + def array = [] + array = [ meta, row[1] ] + return array +} \ No newline at end of file From 944d27789fb85915efd6f5e60f43fcc77667a892 Mon Sep 17 00:00:00 2001 From: Christian Kubica Date: Wed, 15 Jun 2022 09:47:40 +0200 Subject: [PATCH 008/117] INCLUDE UMITOOLS DEDUP includes the optional umitools deduplication step after the read QC. --- workflows/smrnaseq.nf | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 1613425c..27bf842a 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -54,12 +54,13 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multi if (params.mature) { reference_mature = file(params.mature, checkIfExists: true) } else { exit 1, "Mature miRNA fasta file not found: ${params.mature}" } if (params.hairpin) { reference_hairpin = file(params.hairpin, checkIfExists: true) } else { exit 1, "Hairpin miRNA fasta file not found: ${params.hairpin}" } -include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { INPUT_CHECK } from '../subworkflows/local/input_check' include { FASTQC_UMITOOLS_TRIMGALORE } from '../subworkflows/nf-core/fastqc_umitools_trimgalore' -include { MIRNA_QUANT } from '../subworkflows/local/mirna_quant' -include { GENOME_QUANT } from '../subworkflows/local/genome_quant' -include { MIRTRACE } from '../subworkflows/local/mirtrace' -include { MIRDEEP2 } from '../subworkflows/local/mirdeep2' +include { DEDUPLICATE_UMIS } from '../subworkflows/local/umi_dedup' +include { MIRNA_QUANT } from '../subworkflows/local/mirna_quant' +include { GENOME_QUANT } from '../subworkflows/local/genome_quant' +include { MIRTRACE } from '../subworkflows/local/mirtrace' +include { MIRDEEP2 } from '../subworkflows/local/mirdeep2' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -135,14 +136,26 @@ workflow SMRNASEQ { // FASTQC_UMITOOLS_TRIMGALORE ( ch_cat_fastq, - params.with_umi, - params.umi_discard_read, params.skip_fastqc || params.skip_qc, - params.skip_trimming + params.with_umi, + params.skip_trimming, + params.umi_discard_read ) ch_versions = ch_versions.mix(FASTQC_UMITOOLS_TRIMGALORE.out.versions) reads_for_mirna = FASTQC_UMITOOLS_TRIMGALORE.out.reads + + reads_for_mirna.view() + + if (params.with_umi){ + if (fasta){ + fasta_ch = file(fasta) + DEDUPLICATE_UMIS (fasta_ch, bt_index, FASTQC_UMITOOLS_TRIMGALORE.out.reads) + reads_for_mirna = DEDUPLICATE_UMIS.out.reads + ch_versions = ch_versions.mix(DEDUPLICATE_UMIS.out.versions) + } + } + MIRNA_QUANT ( reference_mature, reference_hairpin, From ddb3dbaa92b6243b3dc498290062fd3b7f761925 Mon Sep 17 00:00:00 2001 From: CKComputomics Date: Wed, 15 Jun 2022 13:55:52 +0200 Subject: [PATCH 009/117] ADD SAMTOOLS SORT CONFIG Added additional configuration to change the output file name of samtools sort. --- conf/modules.config | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index c1a74e21..92832223 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -172,8 +172,20 @@ if (params.with_umi) { ] } + withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_SAMTOOLS:SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}.sorted" } + publishDir = [ + path: { "${params.outdir}/umi_dedup" }, + mode: params.publish_dir_mode, + pattern: '*.{bam}', + enabled: ( + params.save_umi_intermeds + ) + ] + } + withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_SAMTOOLS:SAMTOOLS_INDEX' { - ext.prefix = { "${meta.id}.umi_dedup.sorted" } + ext.prefix = { "${meta.id}.sorted" } publishDir = [ path: { "${params.outdir}/umi_dedup" }, mode: params.publish_dir_mode, From b2ef66a76772f2299a705a10a42622dbc3d32407 Mon Sep 17 00:00:00 2001 From: CKComputomics Date: Wed, 15 Jun 2022 15:28:36 +0200 Subject: [PATCH 010/117] FIX TYPO --- subworkflows/local/umi_dedup.nf | 6 ------ 1 file changed, 6 deletions(-) diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf index f97b8e8c..5886ed52 100644 --- a/subworkflows/local/umi_dedup.nf +++ b/subworkflows/local/umi_dedup.nf @@ -30,8 +30,6 @@ workflow DEDUPLICATE_UMIS { } if (bt_indices){ - - reads.view() BOWTIE_MAP_GENOME ( reads, bt_indices.collect() ) ch_versions = ch_versions.mix(BOWTIE_MAP_GENOME.out.versions) @@ -39,11 +37,7 @@ workflow DEDUPLICATE_UMIS { BAM_SORT_SAMTOOLS ( BOWTIE_MAP_GENOME.out.bam, Channel.empty() ) ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions) - BAM_SORT_SAMTOOLS.out.bam.view() ch_umi_dedup = BAM_SORT_SAMTOOLS.out.bam.join(BAM_SORT_SAMTOOLS.out.bai) - - ch_umi_dedup.view() - UMITOOLS_DEDUP ( ch_umi_dedup ) ch_versions = ch_versions.mix(UMITOOLS_DEDUP.out.versions) ch_dedup_stats = ch_dedup_stats.mix(UMITOOLS_DEDUP.out.tsv_edit_distance).join(UMITOOLS_DEDUP.out.tsv_per_umi).join(UMITOOLS_DEDUP.out.tsv_umi_per_position) From 29ec7dae7ec06fe74ec1fd0d4745e1e81d06087c Mon Sep 17 00:00:00 2001 From: CKComputomics Date: Wed, 15 Jun 2022 15:28:47 +0200 Subject: [PATCH 011/117] ADD DEDUP DOCUMENTATION Added the documentation detailing the output files of the UMI-tools deduplication step. --- docs/output.md | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index 79f1d6d9..e0c2bd67 100644 --- a/docs/output.md +++ b/docs/output.md @@ -15,6 +15,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [FastQC](#fastqc) - read quality control - [UMI-tools extract](#umi-tools-extract) - UMI barcode extraction - [TrimGalore](#trimgalore) - adapter trimming +- [UMI-tools deduplicate](#umi-tools-deduplicate) - read deduplication - [Bowtie](#bowtie) - alignment against mature miRNAs and miRNA precursors (hairpins) - [SAMtools](#samtools) - alignment result processing and feature counting - [edgeR](#edger) - normalization, MDS plot and sample pairwise distance heatmap @@ -51,7 +52,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d -[UMI-tools](https://github.com/CGATOxford/UMI-tools) deduplicates reads based on unique molecular identifiers (UMIs) to address PCR-bias. Firstly, the UMI-tools `extract` command removes the UMI barcode information from the read sequence and adds it to the read name. Secondly, reads are deduplicated based on UMI identifier after mapping as highlighted in the [UMI-tools dedup](#umi-tools-dedup) section. +[UMI-tools](https://github.com/CGATOxford/UMI-tools) deduplicates reads based on unique molecular identifiers (UMIs) to address PCR-bias. Firstly, the UMI-tools `extract` command removes the UMI barcode information from the read sequence and adds it to the read name. Secondly, reads are deduplicated based on UMI identifier after mapping as highlighted in the [UMI-tools deduplicate](#umi-tools-deduplicate) section. To facilitate processing of input data which has the UMI barcode already embedded in the read name from the start, `--skip_umi_extract` can be specified in conjunction with `--with_umi`. @@ -74,6 +75,20 @@ This is an example of the output we can get: ![cutadapt](images/cutadapt_plot.png) +## UMI-tools deduplicate + +
+Output files + +- `umi_dedup/` + - `*.tsv`: Results statistics files detailing the UMI deduplication results. + - `*.bam`: If `--save_umi_intermeds` is specified, the deduplicated bam files **after** UMI deduplication will be placed in this directory. In addition the sorted and indexed files will be placed there as well. + - `samtools_stats/` + - `*.{stats,flagstat,idxstats}:` Statistics on the mappings underlying the UMI deduplication. +
+ +[UMI-tools](https://github.com/CGATOxford/UMI-tools) deduplicates reads based on unique molecular identifiers (UMIs) to address PCR-bias. Firstly, the UMI-tools `extract` command removes the UMI barcode information from the read sequence and adds it to the read name as highlighted in the [UMI-tools extract](#umi-tools-extract) section. The reads are deduplicated based on an alignment against the full genome of the species. The deduplicated reads are converted into fastq format and then used in the pipeline. + ## Bowtie [Bowtie](http://bowtie-bio.sourceforge.net/index.shtml) is used for mapping adapter trimmed reads against the mature miRNAs and miRNA precursors (hairpins) in [miRBase](http://www.mirbase.org/). From afa1ad76c7ece9d5a521e44a16abe522655c9808 Mon Sep 17 00:00:00 2001 From: CKComputomics Date: Wed, 15 Jun 2022 15:38:42 +0200 Subject: [PATCH 012/117] ADD DEDUP STEP --- README.md | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index e4294f99..e3ceb707 100644 --- a/README.md +++ b/README.md @@ -35,24 +35,25 @@ On release, automated continuous integration tests run the pipeline on a full-si 3. Adapter trimming ([`Trim Galore!`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/)) 1. Insert Size calculation 2. Collapse reads ([`seqcluster`](https://seqcluster.readthedocs.io/mirna_annotation.html#processing-of-reads)) -4. Alignment against miRBase mature miRNA ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) -5. Alignment against miRBase hairpin +4. UMI barcode deduplication ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools)) +5. Alignment against miRBase mature miRNA ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) +6. Alignment against miRBase hairpin 1. Unaligned reads from step 3 ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) 2. Collapsed reads from step 2.2 ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) -6. Post-alignment processing of miRBase hairpin +7. Post-alignment processing of miRBase hairpin 1. Basic statistics from step 3 and step 4.1 ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) 2. Analysis on miRBase hairpin counts ([`edgeR`](https://bioconductor.org/packages/release/bioc/html/edgeR.html)) - TMM normalization and a table of top expression hairpin - MDS plot clustering samples - Heatmap of sample similarities 3. miRNA and isomiR annotation from step 4.1 ([`mirtop`](https://github.com/miRTop/mirtop)) -7. Alignment against host reference genome ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) +8. Alignment against host reference genome ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) 1. Post-alignment processing of alignment against host reference genome ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) -8. Novel miRNAs and known miRNAs discovery ([`MiRDeep2`](https://www.mdc-berlin.de/content/mirdeep2-documentation)) +9. Novel miRNAs and known miRNAs discovery ([`MiRDeep2`](https://www.mdc-berlin.de/content/mirdeep2-documentation)) 1. Mapping against reference genome with the mapper module 2. Known and novel miRNA discovery with the mirdeep2 module -9. miRNA quality control ([`mirtrace`](https://github.com/friedlanderlab/mirtrace)) -10. Present QC for raw read, alignment, and expression results ([`MultiQC`](http://multiqc.info/)) +10. miRNA quality control ([`mirtrace`](https://github.com/friedlanderlab/mirtrace)) +11. Present QC for raw read, alignment, and expression results ([`MultiQC`](http://multiqc.info/)) ## Quick Start From c72ac5be2993be2a65e90c0d4bc99385fd2d661b Mon Sep 17 00:00:00 2001 From: CKComputomics Date: Wed, 15 Jun 2022 15:43:39 +0200 Subject: [PATCH 013/117] ADD UMITOOLS VERSION --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index db1eec43..32df9aaf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,7 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Other enhancements & fixes - [#134](https://github.com/nf-core/smrnaseq/issues/134) - Fixed colSum of zero issues for edgeR_miRBase.R script -- [#49](https://github.com/nf-core/smrnaseq/issues/49) - Integrated the existing umitools modules into the pipeline. +- [#49](https://github.com/nf-core/smrnaseq/issues/49) - Integrated the existing umitools modules into the pipeline and extend the deduplication step. ### Parameters @@ -61,6 +61,7 @@ Note, since the pipeline is now using Nextflow DSL2, each process will be run wi | `seqkit` | 0.16.0 | 2.0.0 | | `trim-galore` | 0.6.6 | 0.6.7 | | `bioconvert` | - | 0.4.3 | +| `umi_tools` | - | 1.1.2 | | `htseq` | - | - | | `markdown` | - | - | | `pymdown-extensions` | - | - | From f44228931a3342e700b6e900e7ddfdcda91fc5d4 Mon Sep 17 00:00:00 2001 From: CKComputomics Date: Mon, 20 Jun 2022 15:26:09 +0200 Subject: [PATCH 014/117] MERGE DEDUPLICATED AND UNMAPPED READS AFTER DEDUPLICATION After deduplication the reads that remained unaligned to the provided reference genome are merged with the set of deduplicated reads to enable the use of the full spectrum of reads, independent of potential reference bias. This behaviour can be deactivated by setting --umi_merge_unmapped false --- conf/modules.config | 2 +- modules/local/join_reads.nf | 21 +++++++++++++++++++++ subworkflows/local/umi_dedup.nf | 25 +++++++++++++++++++------ workflows/smrnaseq.nf | 4 +--- 4 files changed, 42 insertions(+), 10 deletions(-) create mode 100644 modules/local/join_reads.nf diff --git a/conf/modules.config b/conf/modules.config index 92832223..ed78630e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -196,7 +196,7 @@ if (params.with_umi) { ] } - withName: '.*:DEDUPLICATE_UMIS:BAM_STATS_SAMTOOLS:.*' { + withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { publishDir = [ path: { "${params.outdir}/umi_dedup/samtools_stats" }, mode: params.publish_dir_mode, diff --git a/modules/local/join_reads.nf b/modules/local/join_reads.nf new file mode 100644 index 00000000..9f68f062 --- /dev/null +++ b/modules/local/join_reads.nf @@ -0,0 +1,21 @@ +process JOIN_FASTQS { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? 'bioconda::samtools=1.13' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ffbf83a6b0ab6ec567a336cf349b80637135bca3:40128b496751b037e2bd85f6789e83d4ff8a4837-0' : + 'quay.io/biocontainers/mulled-v2-ffbf83a6b0ab6ec567a336cf349b80637135bca3:40128b496751b037e2bd85f6789e83d4ff8a4837-0' }" + + input: + tuple val(meta), path(reads) + tuple val(unmapped_meta), path(unmapped_reads) + + output: + tuple val(meta), path('*_merged.fq.gz'), emit: merged + script: + """ + cat ${reads} ${unmapped_reads} > ${meta.id}_merged.fq.gz + """ + +} diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf index 5886ed52..0e054d9b 100644 --- a/subworkflows/local/umi_dedup.nf +++ b/subworkflows/local/umi_dedup.nf @@ -3,10 +3,11 @@ // include { INDEX_GENOME } from '../../modules/local/bowtie_genome' -include { BOWTIE_MAP_SEQ as BOWTIE_MAP_GENOME } from '../../modules/local/bowtie_map_mirna' +include { BOWTIE_MAP_SEQ as UMI_MAP_GENOME } from '../../modules/local/bowtie_map_mirna' include { BAM_SORT_SAMTOOLS } from '../../subworkflows/nf-core/bam_sort_samtools' include { UMITOOLS_DEDUP } from '../../modules/nf-core/modules/umitools/dedup/main' include { SAMTOOLS_BAM2FQ } from '../../modules/nf-core/modules/samtools/bam2fq/main' +include { JOIN_FASTQS } from '../../modules/local/join_reads' workflow DEDUPLICATE_UMIS { take: @@ -31,10 +32,10 @@ workflow DEDUPLICATE_UMIS { if (bt_indices){ - BOWTIE_MAP_GENOME ( reads, bt_indices.collect() ) - ch_versions = ch_versions.mix(BOWTIE_MAP_GENOME.out.versions) + UMI_MAP_GENOME ( reads, bt_indices.collect() ) + ch_versions = ch_versions.mix(UMI_MAP_GENOME.out.versions) - BAM_SORT_SAMTOOLS ( BOWTIE_MAP_GENOME.out.bam, Channel.empty() ) + BAM_SORT_SAMTOOLS ( UMI_MAP_GENOME.out.bam, Channel.empty() ) ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions) ch_umi_dedup = BAM_SORT_SAMTOOLS.out.bam.join(BAM_SORT_SAMTOOLS.out.bai) @@ -44,12 +45,24 @@ workflow DEDUPLICATE_UMIS { SAMTOOLS_BAM2FQ ( UMITOOLS_DEDUP.out.bam, false ) ch_versions = ch_versions.mix(SAMTOOLS_BAM2FQ.out.versions) + + ch_dedup_reads = SAMTOOLS_BAM2FQ.out.reads + + if ( params.umi_merge_unmapped ) { + + JOIN_FASTQS ( + SAMTOOLS_BAM2FQ.out.reads, + UMI_MAP_GENOME.out.unmapped + ) + ch_dedup_reads = JOIN_FASTQS.out.merged + } } emit: - reads = SAMTOOLS_BAM2FQ.out.reads // channel: [ val(meta), [ reads ] ] +// reads = SAMTOOLS_BAM2FQ.out.reads // channel: [ val(meta), [ reads ] ] + reads = ch_dedup_reads indices = bt_indices - stats = ch_dedup_stats +// stats = ch_dedup_stats versions = ch_versions } diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 27bf842a..54f0e505 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -144,9 +144,7 @@ workflow SMRNASEQ { ch_versions = ch_versions.mix(FASTQC_UMITOOLS_TRIMGALORE.out.versions) reads_for_mirna = FASTQC_UMITOOLS_TRIMGALORE.out.reads - - reads_for_mirna.view() - + if (params.with_umi){ if (fasta){ fasta_ch = file(fasta) From f9ca542f8d4710a3b779eda7e366b4ac70ffb2e7 Mon Sep 17 00:00:00 2001 From: CKComputomics Date: Mon, 20 Jun 2022 15:29:00 +0200 Subject: [PATCH 015/117] ADD MISSING OPTION --- nextflow.config | 1 + 1 file changed, 1 insertion(+) diff --git a/nextflow.config b/nextflow.config index 4d724120..78df9386 100644 --- a/nextflow.config +++ b/nextflow.config @@ -34,6 +34,7 @@ params { umitools_bc_pattern = null umi_discard_read = null save_umi_intermeds = false + umi_merge_unmapped = true // Trimming options clip_r1 = 0 From b9747170589a60215f57d04bf39426bd5fca7868 Mon Sep 17 00:00:00 2001 From: CKComputomics Date: Mon, 20 Jun 2022 15:29:36 +0200 Subject: [PATCH 016/117] ADD NEWLINE --- modules/local/mirdeep2_run.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/local/mirdeep2_run.nf b/modules/local/mirdeep2_run.nf index 8ef6e804..5444ecfc 100644 --- a/modules/local/mirdeep2_run.nf +++ b/modules/local/mirdeep2_run.nf @@ -37,4 +37,3 @@ process MIRDEEP2_RUN { END_VERSIONS """ } - From 4610be12d815689d0918f8c335b45dd6cfbd0107 Mon Sep 17 00:00:00 2001 From: CKComputomics Date: Tue, 21 Jun 2022 10:20:00 +0200 Subject: [PATCH 017/117] CLEAN CODE --- subworkflows/local/umi_dedup.nf | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf index 0e054d9b..94341a96 100644 --- a/subworkflows/local/umi_dedup.nf +++ b/subworkflows/local/umi_dedup.nf @@ -59,18 +59,8 @@ workflow DEDUPLICATE_UMIS { } emit: -// reads = SAMTOOLS_BAM2FQ.out.reads // channel: [ val(meta), [ reads ] ] reads = ch_dedup_reads indices = bt_indices -// stats = ch_dedup_stats + stats = ch_dedup_stats versions = ch_versions } - - -def add_suffix(row, suffix) { - def meta = [:] - meta.id = "${row[0].id}_${suffix}" - def array = [] - array = [ meta, row[1] ] - return array -} \ No newline at end of file From 67b2caceadeae51cdfc86e5a966dbeee66a4e69f Mon Sep 17 00:00:00 2001 From: CKComputomics Date: Tue, 21 Jun 2022 10:21:45 +0200 Subject: [PATCH 018/117] ADD DOCUMENTATION --- workflows/smrnaseq.nf | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 54f0e505..51e412e8 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -144,7 +144,10 @@ workflow SMRNASEQ { ch_versions = ch_versions.mix(FASTQC_UMITOOLS_TRIMGALORE.out.versions) reads_for_mirna = FASTQC_UMITOOLS_TRIMGALORE.out.reads - + + // + // SUBWORKFLOW: Deduplicate UMIs by mapping them to the genome + // if (params.with_umi){ if (fasta){ fasta_ch = file(fasta) From 23fc985ab537abe0aa55771a693e831e6eae9f92 Mon Sep 17 00:00:00 2001 From: CKComputomics Date: Tue, 21 Jun 2022 10:25:34 +0200 Subject: [PATCH 019/117] ADD UMI_MERGE_UNMAPPED COMMAND --- nextflow_schema.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/nextflow_schema.json b/nextflow_schema.json index 5019408f..4547be90 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -88,6 +88,11 @@ "type": "boolean", "fa_icon": "fas fa-save", "description": "If this option is specified, intermediate FastQ and BAM files produced by UMI-tools are also saved in the results directory." + }, + "umi_merge_unmapped": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "Unless deactivated the deduplicated reads are merged with the reads that could not be placed to reduce the potential reference bias" } }, "fa_icon": "fas fa-barcode" From be241eafd97c5943be3614a0b843e28fec6b5be3 Mon Sep 17 00:00:00 2001 From: CKComputomics Date: Tue, 21 Jun 2022 10:31:32 +0200 Subject: [PATCH 020/117] FINALIZE DOCUMENTATION Information on the new --umi_merge_unmapped command were added to both the CHANGELOG, as well as the output markdown script. --- CHANGELOG.md | 1 + docs/output.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 32df9aaf..8fb364d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | | `--umitools_bc_pattern` | | | `--umi_discard_read` | | | `--save_umi_intermeds` | +| | `--umi_merge_unmapped` | > **NB:** Parameter has been **updated** if both old and new parameter information is present. > **NB:** Parameter has been **added** if just the new parameter information is present. diff --git a/docs/output.md b/docs/output.md index e0c2bd67..14cc4e6d 100644 --- a/docs/output.md +++ b/docs/output.md @@ -87,7 +87,7 @@ This is an example of the output we can get: - `*.{stats,flagstat,idxstats}:` Statistics on the mappings underlying the UMI deduplication. -[UMI-tools](https://github.com/CGATOxford/UMI-tools) deduplicates reads based on unique molecular identifiers (UMIs) to address PCR-bias. Firstly, the UMI-tools `extract` command removes the UMI barcode information from the read sequence and adds it to the read name as highlighted in the [UMI-tools extract](#umi-tools-extract) section. The reads are deduplicated based on an alignment against the full genome of the species. The deduplicated reads are converted into fastq format and then used in the pipeline. +[UMI-tools](https://github.com/CGATOxford/UMI-tools) deduplicates reads based on unique molecular identifiers (UMIs) to address PCR-bias. Firstly, the UMI-tools `extract` command removes the UMI barcode information from the read sequence and adds it to the read name as highlighted in the [UMI-tools extract](#umi-tools-extract) section. The reads are deduplicated based on an alignment against the full genome of the species. The deduplicated reads are then converted into fastq format and merged with the reads that remained unmapped in order to reduce potential reference bias. This behavior can be stopped by setting `--umi_merge_unmapped false`. The resulting fastq files are used in the remaining steps of the pipeline. ## Bowtie From 8b433f1876749f277cf946f84b52407434fdcbdc Mon Sep 17 00:00:00 2001 From: CKComputomics Date: Tue, 21 Jun 2022 10:37:09 +0200 Subject: [PATCH 021/117] UPDATE MAIL TEMPLATE --- assets/email_template.html | 142 +++++++++++-------------------------- 1 file changed, 42 insertions(+), 100 deletions(-) diff --git a/assets/email_template.html b/assets/email_template.html index 05d590d3..e75e86ac 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -1,111 +1,53 @@ - - - - + + + + - - - nf-core/smrnaseq Pipeline Report - - -
- + + nf-core/smrnaseq Pipeline Report + + +
-

nf-core/smrnaseq v${version}

-

Run Name: $runName

+ - <% if (!success){ out << """ -
-

nf-core/smrnaseq execution completed unsuccessfully!

+

nf-core/smrnaseq v${version}

+

Run Name: $runName

+ +<% if (!success){ + out << """ +
+

nf-core/smrnaseq execution completed unsuccessfully!

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

The full error message was:

-
${errorReport}
-
- """ } else { out << """ -
+
${errorReport}
+
+ """ +} else { + out << """ +
nf-core/smrnaseq execution completed successfully! -
- """ } %> +
+ """ +} +%> -

The workflow was completed at $dateComplete (duration: $duration)

-

The command used to launch the workflow was as follows:

-
-$commandLine
+

The workflow was completed at $dateComplete (duration: $duration)

+

The command used to launch the workflow was as follows:

+
$commandLine
-

Pipeline Configuration:

- - - <% out << summary.collect{ k,v -> " - - - - - " }.join("\n") %> - -
- $k - -
$v
-
+

Pipeline Configuration:

+ + + <% out << summary.collect{ k,v -> "" }.join("\n") %> + +
$k
$v
-

nf-core/smrnaseq

-

https://github.com/nf-core/smrnaseq

-
- +

nf-core/smrnaseq

+

https://github.com/nf-core/smrnaseq

+ +
+ + From 0e732ede28b931b4e233d07cc092526753de95c1 Mon Sep 17 00:00:00 2001 From: CKComputomics Date: Tue, 21 Jun 2022 10:43:38 +0200 Subject: [PATCH 022/117] CHANGE DAG OUTPUT TO HTML --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 78df9386..7e528b38 100644 --- a/nextflow.config +++ b/nextflow.config @@ -192,7 +192,7 @@ trace { } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.svg" + file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" } manifest { From 8f426b56ac14b90bef00aa63ff57ecdb6c843e1f Mon Sep 17 00:00:00 2001 From: CKComputomics Date: Tue, 21 Jun 2022 10:56:12 +0200 Subject: [PATCH 023/117] PLEASE PRETTIER --- assets/email_template.html | 141 ++++++++++++++++++++++++++----------- docs/output.md | 5 +- 2 files changed, 101 insertions(+), 45 deletions(-) diff --git a/assets/email_template.html b/assets/email_template.html index e75e86ac..d856192a 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -1,53 +1,110 @@ - - - - + + + + - - nf-core/smrnaseq Pipeline Report - - -
+ + nf-core/smrnaseq Pipeline Report + + +
+ - +

nf-core/smrnaseq v${version}

+

Run Name: $runName

-

nf-core/smrnaseq v${version}

-

Run Name: $runName

- -<% if (!success){ - out << """ -
-

nf-core/smrnaseq execution completed unsuccessfully!

+ <% if (!success){ out << """ +
+

nf-core/smrnaseq execution completed unsuccessfully!

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

The full error message was:

-
${errorReport}
-
- """ -} else { - out << """ -
+
${errorReport}
+
+ """ } else { out << """ +
nf-core/smrnaseq execution completed successfully! -
- """ -} -%> +
+ """ } %> -

The workflow was completed at $dateComplete (duration: $duration)

-

The command used to launch the workflow was as follows:

-
$commandLine
+

The workflow was completed at $dateComplete (duration: $duration)

+

The command used to launch the workflow was as follows:

+
+$commandLine
-

Pipeline Configuration:

- - - <% out << summary.collect{ k,v -> "" }.join("\n") %> - -
$k
$v
+

Pipeline Configuration:

+ + + <% out << summary.collect{ k,v -> " + + + + + " }.join("\n") %> + +
+ $k + +
$v
+
-

nf-core/smrnaseq

-

https://github.com/nf-core/smrnaseq

- -
- - +

nf-core/smrnaseq

+

https://github.com/nf-core/smrnaseq

+
+ diff --git a/docs/output.md b/docs/output.md index 14cc4e6d..c2b28d06 100644 --- a/docs/output.md +++ b/docs/output.md @@ -83,9 +83,8 @@ This is an example of the output we can get: - `umi_dedup/` - `*.tsv`: Results statistics files detailing the UMI deduplication results. - `*.bam`: If `--save_umi_intermeds` is specified, the deduplicated bam files **after** UMI deduplication will be placed in this directory. In addition the sorted and indexed files will be placed there as well. - - `samtools_stats/` - - `*.{stats,flagstat,idxstats}:` Statistics on the mappings underlying the UMI deduplication. - + - `samtools_stats/` - `*.{stats,flagstat,idxstats}:` Statistics on the mappings underlying the UMI deduplication. + [UMI-tools](https://github.com/CGATOxford/UMI-tools) deduplicates reads based on unique molecular identifiers (UMIs) to address PCR-bias. Firstly, the UMI-tools `extract` command removes the UMI barcode information from the read sequence and adds it to the read name as highlighted in the [UMI-tools extract](#umi-tools-extract) section. The reads are deduplicated based on an alignment against the full genome of the species. The deduplicated reads are then converted into fastq format and merged with the reads that remained unmapped in order to reduce potential reference bias. This behavior can be stopped by setting `--umi_merge_unmapped false`. The resulting fastq files are used in the remaining steps of the pipeline. From 039843f42d61801163491d4518ec295203ba9a1f Mon Sep 17 00:00:00 2001 From: CKComputomics Date: Tue, 21 Jun 2022 13:06:41 +0200 Subject: [PATCH 024/117] FIX MERGE ERROR --- workflows/smrnaseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index f66c3a7c..043796d9 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -166,7 +166,7 @@ workflow SMRNASEQ { } } - reads_for_mirna = FASTQC_TRIMGALORE.out.reads + reads_for_mirna = FASTQC_UMITOOLS_TRIMGALORE.out.reads // // SUBWORKFLOW: remove contaminants from reads // From 53c097c366ca885ab6b7145a33de48a5f1c7fff6 Mon Sep 17 00:00:00 2001 From: CKComputomics Date: Tue, 21 Jun 2022 13:12:22 +0200 Subject: [PATCH 025/117] MAKE PRETTIER HAPPY --- CHANGELOG.md | 29 ++++++++++++++--------------- README.md | 8 ++++---- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 03ae1048..cc0ba654 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,20 +16,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Parameters -| Old parameter | New parameter | -| ------------- | ------------------------ | -| | `--mirGeneDB` | -| | `--mirGeneDB_species` | -| | `--mirGeneDB_gff` | -| | `--mirGeneDB_mature` | -| | `--mirGeneDB_hairpin` | -| | `--contamination_filter` | -| | `--rrna` | -| | `--trna` | -| | `--cdna` | -| | `--ncrna` | -| | `--pirna` | -| | `--other_contamination` | +| Old parameter | New parameter | +| ------------- | --------------------------- | +| | `--mirGeneDB` | +| | `--mirGeneDB_species` | +| | `--mirGeneDB_gff` | +| | `--mirGeneDB_mature` | +| | `--mirGeneDB_hairpin` | +| | `--contamination_filter` | +| | `--rrna` | +| | `--trna` | +| | `--cdna` | +| | `--ncrna` | +| | `--pirna` | +| | `--other_contamination` | | | `--with_umi` | | | `--umitools_extract_method` | | | `--umitools_bc_pattern` | @@ -37,7 +37,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | | `--save_umi_intermeds` | | | `--umi_merge_unmapped` | - ## [v2.0.0](https://github.com/nf-core/smrnaseq/releases/tag/2.0.0) - 2022-05-31 Aqua Zinc Chihuahua ### Major enhancements diff --git a/README.md b/README.md index 5177598a..f6198652 100644 --- a/README.md +++ b/README.md @@ -48,10 +48,10 @@ On release, automated continuous integration tests run the pipeline on a full-si 9. Alignment against host reference genome ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) 1. Post-alignment processing of alignment against host reference genome ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) 10. Novel miRNAs and known miRNAs discovery ([`MiRDeep2`](https://www.mdc-berlin.de/content/mirdeep2-documentation)) - 1. Mapping against reference genome with the mapper module - 2. Known and novel miRNA discovery with the mirdeep2 module -11. miRNA quality control ([`mirtrace`](https://github.com/friedlanderlab/mirtrace)) -12. Present QC for raw read, alignment, and expression results ([`MultiQC`](http://multiqc.info/)) +11. Mapping against reference genome with the mapper module +12. Known and novel miRNA discovery with the mirdeep2 module +13. miRNA quality control ([`mirtrace`](https://github.com/friedlanderlab/mirtrace)) +14. Present QC for raw read, alignment, and expression results ([`MultiQC`](http://multiqc.info/)) ## Quick Start From 608c4140a989b567dd472fb852c08984eac4e2d0 Mon Sep 17 00:00:00 2001 From: Christian Kubica Date: Wed, 22 Jun 2022 15:26:29 +0200 Subject: [PATCH 026/117] ADD NF-CORE CAT Includes the nf-core cat module to replace the custom concatenation module. --- modules.json | 3 ++ modules/nf-core/modules/cat/cat/main.nf | 62 ++++++++++++++++++++++++ modules/nf-core/modules/cat/cat/meta.yml | 37 ++++++++++++++ 3 files changed, 102 insertions(+) create mode 100644 modules/nf-core/modules/cat/cat/main.nf create mode 100644 modules/nf-core/modules/cat/cat/meta.yml diff --git a/modules.json b/modules.json index c328879e..c74fa935 100644 --- a/modules.json +++ b/modules.json @@ -3,6 +3,9 @@ "homePage": "https://github.com/nf-core/smrnaseq", "repos": { "nf-core/modules": { + "cat/cat": { + "git_sha": "eeda4136c096688d04cc40bb3c70d948213ed641" + }, "cat/fastq": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, diff --git a/modules/nf-core/modules/cat/cat/main.nf b/modules/nf-core/modules/cat/cat/main.nf new file mode 100644 index 00000000..40e53f3e --- /dev/null +++ b/modules/nf-core/modules/cat/cat/main.nf @@ -0,0 +1,62 @@ +process CAT_CAT { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "conda-forge::pigz=2.3.4" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : + 'quay.io/biocontainers/pigz:2.3.4' }" + + input: + tuple val(meta), path(files_in) + + output: + tuple val(meta), path("${prefix}"), emit: file_out + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def file_list = files_in.collect { it.toString() } + + // | input | output | command1 | command2 | + // |-----------|------------|----------|----------| + // | gzipped | gzipped | cat | | + // | ungzipped | ungzipped | cat | | + // | gzipped | ungzipped | zcat | | + // | ungzipped | gzipped | cat | pigz | + + // Use input file ending as default + prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + out_zip = prefix.endsWith('.gz') + in_zip = file_list[0].endsWith('.gz') + command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' + command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : '' + """ + $command1 \\ + $args \\ + ${file_list.join(' ')} \\ + $command2 \\ + > ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def file_list = files_in.collect { it.toString() } + prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + """ + touch $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/cat/cat/meta.yml b/modules/nf-core/modules/cat/cat/meta.yml new file mode 100644 index 00000000..5eeff5a6 --- /dev/null +++ b/modules/nf-core/modules/cat/cat/meta.yml @@ -0,0 +1,37 @@ +name: cat_cat +description: A module for concatenation of gzipped or uncompressed files +keywords: + - concatenate + - gzip + - cat +tools: + - cat: + description: Just concatenation + homepage: None + documentation: https://man7.org/linux/man-pages/man1/cat.1.html + tool_dev_url: None + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files_in: + type: file + description: List of compressed / uncompressed files + pattern: "*" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - file_out: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" + +authors: + - "@erikrikarddaniel" + - "@FriederikeHanssen" From 6d305c25ba933082b8a6003a8670fe62c032ad1b Mon Sep 17 00:00:00 2001 From: Christian Kubica Date: Wed, 22 Jun 2022 15:27:32 +0200 Subject: [PATCH 027/117] REPLACE CUSTOM CAT WITH NF-CORE CAT Implements the use of the nf-core cat module. --- subworkflows/local/umi_dedup.nf | 24 +++++++++++++----------- workflows/smrnaseq.nf | 7 +++++-- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf index 94341a96..12033274 100644 --- a/subworkflows/local/umi_dedup.nf +++ b/subworkflows/local/umi_dedup.nf @@ -7,7 +7,7 @@ include { BOWTIE_MAP_SEQ as UMI_MAP_GENOME } from '../../modules/local/bowtie include { BAM_SORT_SAMTOOLS } from '../../subworkflows/nf-core/bam_sort_samtools' include { UMITOOLS_DEDUP } from '../../modules/nf-core/modules/umitools/dedup/main' include { SAMTOOLS_BAM2FQ } from '../../modules/nf-core/modules/samtools/bam2fq/main' -include { JOIN_FASTQS } from '../../modules/local/join_reads' +include { CAT_CAT } from '../../modules/nf-core/modules/cat/cat/main' workflow DEDUPLICATE_UMIS { take: @@ -22,17 +22,17 @@ workflow DEDUPLICATE_UMIS { if (!bt_index){ INDEX_GENOME ( fasta ) - bt_indices = INDEX_GENOME.out.bt_indices + bt_index = INDEX_GENOME.out.bowtie_indices fasta_formatted = INDEX_GENOME.out.fasta ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) } else { - bt_indices = Channel.fromPath("${bt_index}**ebwt", checkIfExists: true).ifEmpty { exit 1, "Bowtie1 index directory not found: ${bt_index}" } + bt_index = Channel.fromPath("${bt_index}**ebwt", checkIfExists: true).ifEmpty { exit 1, "Bowtie1 index directory not found: ${bt_index}" } fasta_formatted = fasta } - if (bt_indices){ + if (bt_index){ - UMI_MAP_GENOME ( reads, bt_indices.collect() ) + UMI_MAP_GENOME ( reads, bt_index.collect() ) ch_versions = ch_versions.mix(UMI_MAP_GENOME.out.versions) BAM_SORT_SAMTOOLS ( UMI_MAP_GENOME.out.bam, Channel.empty() ) @@ -49,18 +49,20 @@ workflow DEDUPLICATE_UMIS { ch_dedup_reads = SAMTOOLS_BAM2FQ.out.reads if ( params.umi_merge_unmapped ) { + + SAMTOOLS_BAM2FQ.out.reads + .join(UMI_MAP_GENOME.out.unmapped) + .map { meta, file1, file2 -> [meta, [file1, file2]]} + .set { ch_cat } - JOIN_FASTQS ( - SAMTOOLS_BAM2FQ.out.reads, - UMI_MAP_GENOME.out.unmapped - ) - ch_dedup_reads = JOIN_FASTQS.out.merged + CAT_CAT ( ch_cat ) + ch_dedup_reads = CAT_CAT.out.file_out } } emit: reads = ch_dedup_reads - indices = bt_indices + indices = bt_index stats = ch_dedup_stats versions = ch_versions } diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 043796d9..dc30e875 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -160,13 +160,16 @@ workflow SMRNASEQ { if (params.with_umi){ if (fasta){ fasta_ch = file(fasta) - DEDUPLICATE_UMIS (fasta_ch, bt_index, FASTQC_UMITOOLS_TRIMGALORE.out.reads) + DEDUPLICATE_UMIS ( + fasta_ch, + bt_index, + FASTQC_UMITOOLS_TRIMGALORE.out.reads + ) reads_for_mirna = DEDUPLICATE_UMIS.out.reads ch_versions = ch_versions.mix(DEDUPLICATE_UMIS.out.versions) } } - reads_for_mirna = FASTQC_UMITOOLS_TRIMGALORE.out.reads // // SUBWORKFLOW: remove contaminants from reads // From 57a8dbaab7f3614899952f4f880f6dcfa8c1c242 Mon Sep 17 00:00:00 2001 From: Christian Kubica Date: Wed, 22 Jun 2022 15:30:58 +0200 Subject: [PATCH 028/117] REMOVE UNUSED MODULE deletes the now unused conatenation module. --- modules/local/join_reads.nf | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 modules/local/join_reads.nf diff --git a/modules/local/join_reads.nf b/modules/local/join_reads.nf deleted file mode 100644 index 9f68f062..00000000 --- a/modules/local/join_reads.nf +++ /dev/null @@ -1,21 +0,0 @@ -process JOIN_FASTQS { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? 'bioconda::samtools=1.13' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-ffbf83a6b0ab6ec567a336cf349b80637135bca3:40128b496751b037e2bd85f6789e83d4ff8a4837-0' : - 'quay.io/biocontainers/mulled-v2-ffbf83a6b0ab6ec567a336cf349b80637135bca3:40128b496751b037e2bd85f6789e83d4ff8a4837-0' }" - - input: - tuple val(meta), path(reads) - tuple val(unmapped_meta), path(unmapped_reads) - - output: - tuple val(meta), path('*_merged.fq.gz'), emit: merged - script: - """ - cat ${reads} ${unmapped_reads} > ${meta.id}_merged.fq.gz - """ - -} From 04b852cec33782bc51b05e5ed555cb93787d3871 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Thu, 12 Oct 2023 09:06:58 +0100 Subject: [PATCH 029/117] Re-organized code mirna_quant to make it simpler to read --- subworkflows/local/mirna_quant.nf | 37 +++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/subworkflows/local/mirna_quant.nf b/subworkflows/local/mirna_quant.nf index dfa16ab4..e26839bc 100644 --- a/subworkflows/local/mirna_quant.nf +++ b/subworkflows/local/mirna_quant.nf @@ -33,24 +33,17 @@ workflow MIRNA_QUANT { main: ch_versions = Channel.empty() + + PARSE_MATURE ( mature ).parsed_fasta.set { mirna_parsed } ch_versions = ch_versions.mix(PARSE_MATURE.out.versions) FORMAT_MATURE ( mirna_parsed ) ch_versions = ch_versions.mix(FORMAT_MATURE.out.versions) - PARSE_HAIRPIN ( hairpin ).parsed_fasta.set { hairpin_parsed } - ch_versions = ch_versions.mix(PARSE_HAIRPIN.out.versions) - - FORMAT_HAIRPIN ( hairpin_parsed ) - ch_versions = ch_versions.mix(FORMAT_HAIRPIN.out.versions) - INDEX_MATURE ( FORMAT_MATURE.out.formatted_fasta ).index.set { mature_bowtie } ch_versions = ch_versions.mix(INDEX_MATURE.out.versions) - INDEX_HAIRPIN ( FORMAT_HAIRPIN.out.formatted_fasta ).index.set { hairpin_bowtie } - ch_versions = ch_versions.mix(INDEX_HAIRPIN.out.versions) - reads .map { add_suffix(it, "mature") } .dump (tag:'msux') @@ -64,15 +57,28 @@ workflow MIRNA_QUANT { .dump (tag:'hsux') .set { reads_hairpin } - BOWTIE_MAP_HAIRPIN ( reads_hairpin, hairpin_bowtie.collect() ) - ch_versions = ch_versions.mix(BOWTIE_MAP_HAIRPIN.out.versions) - BAM_STATS_MATURE ( BOWTIE_MAP_MATURE.out.bam, FORMAT_MATURE.out.formatted_fasta ) ch_versions = ch_versions.mix(BAM_STATS_MATURE.out.versions) + + + PARSE_HAIRPIN ( hairpin ).parsed_fasta.set { hairpin_parsed } + ch_versions = ch_versions.mix(PARSE_HAIRPIN.out.versions) + + FORMAT_HAIRPIN ( hairpin_parsed ) + ch_versions = ch_versions.mix(FORMAT_HAIRPIN.out.versions) + + INDEX_HAIRPIN ( FORMAT_HAIRPIN.out.formatted_fasta ).index.set { hairpin_bowtie } + ch_versions = ch_versions.mix(INDEX_HAIRPIN.out.versions) + + BOWTIE_MAP_HAIRPIN ( reads_hairpin, hairpin_bowtie.collect() ) + ch_versions = ch_versions.mix(BOWTIE_MAP_HAIRPIN.out.versions) + BAM_STATS_HAIRPIN ( BOWTIE_MAP_HAIRPIN.out.bam, FORMAT_HAIRPIN.out.formatted_fasta ) ch_versions = ch_versions.mix(BAM_STATS_HAIRPIN.out.versions) + + BAM_STATS_MATURE.out.idxstats.collect{it[1]} .mix(BAM_STATS_HAIRPIN.out.idxstats.collect{it[1]}) .dump(tag:'edger') @@ -81,6 +87,8 @@ workflow MIRNA_QUANT { .set { edger_input } EDGER_QC ( edger_input ) + + reads .map { add_suffix(it, "seqcluster") } .dump (tag:'ssux') @@ -92,6 +100,9 @@ workflow MIRNA_QUANT { BOWTIE_MAP_SEQCLUSTER ( reads_collapsed, hairpin_bowtie.collect() ) ch_versions = ch_versions.mix(BOWTIE_MAP_SEQCLUSTER.out.versions) + + + ch_mirtop_logs = Channel.empty() if (params.mirtrace_species){ MIRTOP_QUANT ( BOWTIE_MAP_SEQCLUSTER.out.bam.collect{it[1]}, FORMAT_HAIRPIN.out.formatted_fasta.collect{it[1]}, gtf ) @@ -106,6 +117,8 @@ workflow MIRNA_QUANT { .dump (tag:'gsux') .set { reads_genome } + + emit: fasta_mature = FORMAT_MATURE.out.formatted_fasta fasta_hairpin = FORMAT_HAIRPIN.out.formatted_fasta From f285867b7a1e370148a0de57338fdd7c5a04af6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Thu, 12 Oct 2023 09:07:47 +0100 Subject: [PATCH 030/117] Switched to HTML less hairpin.fa and mature.fa --- conf/test.config | 4 ++-- nextflow.config | 4 ++-- nextflow_schema.json | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/conf/test.config b/conf/test.config index 1a81afee..450ef11d 100644 --- a/conf/test.config +++ b/conf/test.config @@ -23,8 +23,8 @@ params { input = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/samplesheet/v2.0/samplesheet.csv' fasta = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genome.fa' - mature = 'https://mirbase.org/download/CURRENT/mature.fa' - hairpin = 'https://mirbase.org/download/CURRENT/hairpin.fa' + mature = 'https://mirbase.org/download/mature.fa' + hairpin = 'https://mirbase.org/download/hairpin.fa' mirna_gtf = 'https://mirbase.org/download/hsa.gff3' mirtrace_species = 'hsa' protocol = 'illumina' diff --git a/nextflow.config b/nextflow.config index 059feffe..f1a3dbcf 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,8 +20,8 @@ params { igenomes_base = 's3://ngi-igenomes/igenomes' igenomes_ignore = false mirna_gtf = null - mature = "https://mirbase.org/download/CURRENT/mature.fa" - hairpin = "https://mirbase.org/download/CURRENT/hairpin.fa" + mature = "https://mirbase.org/download/mature.fa" + hairpin = "https://mirbase.org/download/hairpin.fa" mirgenedb = false mirgenedb_mature = null mirgenedb_hairpin = null diff --git a/nextflow_schema.json b/nextflow_schema.json index d0162e47..cc061902 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -102,7 +102,7 @@ "description": "Path to FASTA file with mature miRNAs.", "fa_icon": "fas fa-wheelchair", "help_text": "Typically this will be the `mature.fa` file from miRBase. Can be given either as a plain text `.fa` file or a compressed `.gz` file.\n\nDefaults to the current miRBase release URL, from which the file will be downloaded.", - "default": "https://mirbase.org/download/CURRENT/mature.fa" + "default": "https://mirbase.org/download/mature.fa" }, "mirgenedb_mature": { "type": "string", @@ -114,7 +114,7 @@ "description": "Path to FASTA file with miRNAs precursors.", "fa_icon": "fab fa-cuttlefish", "help_text": "Typically this will be the `mature.fa` file from miRBase. Can be given either as a plain text `.fa` file or a compressed `.gz` file.\n\nDefaults to the current miRBase release URL, from which the file will be downloaded.", - "default": "https://mirbase.org/download/CURRENT/hairpin.fa" + "default": "https://mirbase.org/download/hairpin.fa" }, "mirgenedb_hairpin": { "type": "string", From 6948bad72d1355c3704d17f4a2f0df8a305078f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Mon, 16 Oct 2023 08:39:16 +0100 Subject: [PATCH 031/117] detect bowtie2 idx instead of bowtie1 idx --- modules/local/bowtie_map_contaminants.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/bowtie_map_contaminants.nf b/modules/local/bowtie_map_contaminants.nf index d10f13b5..e47ee7c7 100644 --- a/modules/local/bowtie_map_contaminants.nf +++ b/modules/local/bowtie_map_contaminants.nf @@ -22,7 +22,7 @@ process BOWTIE_MAP_CONTAMINANTS { script: """ - INDEX=`find -L ./ -name "*.3.ebwt" | sed 's/.3.ebwt//'` + INDEX=`find -L ./ -name "*.3.bt2" | sed 's/.3.bt2//'` bowtie2 \\ --threads ${task.cpus} \\ --very-sensitive-local \\ From 16bb675f2bf2c5105cc9f956a8c6a93eea09523e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Mon, 16 Oct 2023 10:22:09 +0100 Subject: [PATCH 032/117] print mirQuant inputs --- workflows/smrnaseq.nf | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 049851d0..60c6106f 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -178,6 +178,11 @@ workflow SMRNASEQ { } + print reference_mature + print reference_hairpin + print mirna_gtf + print mirna_reads + MIRNA_QUANT ( [ [:], reference_mature], [ [:], reference_hairpin], From 90527a2ab0bd1580d34f6ab39b643315746929d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Mon, 16 Oct 2023 11:29:57 +0100 Subject: [PATCH 033/117] debug mirdeeprun --- subworkflows/local/mirdeep2.nf | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/subworkflows/local/mirdeep2.nf b/subworkflows/local/mirdeep2.nf index f8098ba5..484beabd 100644 --- a/subworkflows/local/mirdeep2.nf +++ b/subworkflows/local/mirdeep2.nf @@ -23,6 +23,13 @@ workflow MIRDEEP2 { MIRDEEP2_MAPPER ( MIRDEEP2_PIGZ.out.reads, index ) ch_versions = ch_versions.mix(MIRDEEP2_MAPPER.out.versions.first()) + + MIRDEEP2_MAPPER.out.mirdeep2_inputs.view{ it -> "DEBUG: MIRDEEP2_MAPPER.out.mirdeep2_inputs: " } + + print "DEBUG fasta:" + fasta + print "DEBUG hairpin:" + hairpin + print "DEBUG mature:" + mature + MIRDEEP2_RUN ( fasta, MIRDEEP2_MAPPER.out.mirdeep2_inputs, hairpin, mature ) ch_versions = ch_versions.mix(MIRDEEP2_RUN.out.versions.first()) From 39f0597536c3308fbbdef7c0f778ec4d61a772e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Mon, 16 Oct 2023 11:32:12 +0100 Subject: [PATCH 034/117] remove previous debug print --- workflows/smrnaseq.nf | 5 ----- 1 file changed, 5 deletions(-) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 60c6106f..049851d0 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -178,11 +178,6 @@ workflow SMRNASEQ { } - print reference_mature - print reference_hairpin - print mirna_gtf - print mirna_reads - MIRNA_QUANT ( [ [:], reference_mature], [ [:], reference_hairpin], From c198eda989caaccd3c86fc5131b0787161dda0ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Mon, 16 Oct 2023 12:16:07 +0100 Subject: [PATCH 035/117] add more debug messages --- subworkflows/local/mirdeep2.nf | 6 +++--- workflows/smrnaseq.nf | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/mirdeep2.nf b/subworkflows/local/mirdeep2.nf index 484beabd..e7ca4789 100644 --- a/subworkflows/local/mirdeep2.nf +++ b/subworkflows/local/mirdeep2.nf @@ -26,9 +26,9 @@ workflow MIRDEEP2 { MIRDEEP2_MAPPER.out.mirdeep2_inputs.view{ it -> "DEBUG: MIRDEEP2_MAPPER.out.mirdeep2_inputs: " } - print "DEBUG fasta:" + fasta - print "DEBUG hairpin:" + hairpin - print "DEBUG mature:" + mature + println "DEBUG fasta:" + fasta + println "DEBUG hairpin:" + hairpin + println "DEBUG mature:" + mature MIRDEEP2_RUN ( fasta, MIRDEEP2_MAPPER.out.mirdeep2_inputs, hairpin, mature ) ch_versions = ch_versions.mix(MIRDEEP2_RUN.out.versions.first()) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 049851d0..a463de5a 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -195,6 +195,12 @@ workflow SMRNASEQ { genome_stats = GENOME_QUANT.out.stats ch_versions = ch_versions.mix(GENOME_QUANT.out.versions) + FASTQC_FASTP.out.reads.view { it -> println "DEBUG ==> FASTQC_FASTP.out.reads = $it" } + GENOME_QUANT.out.fasta.view { it -> println "DEBUG ==> GENOME_QUANT.out.fasta = $it" } + GENOME_QUANT.out.index.view { it -> println "DEBUG ==> GENOME_QUANT.out.index = $it" } + MIRNA_QUANT.out.fasta_hairpin.view { it -> println "DEBUG ==> MIRNA_QUANT.out.fasta_hairpin = $it" } + MIRNA_QUANT.out.fasta_mature.view { it -> println "DEBUG ==> MIRNA_QUANT.out.fasta_mature = $it" } + if (!params.skip_mirdeep) { MIRDEEP2 ( FASTQC_FASTP.out.reads, From 85cadb0d53e4eb422934d8e4f379a53a79f99822 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Mon, 16 Oct 2023 12:22:15 +0100 Subject: [PATCH 036/117] remove prints --- subworkflows/local/mirdeep2.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/mirdeep2.nf b/subworkflows/local/mirdeep2.nf index e7ca4789..a08023a0 100644 --- a/subworkflows/local/mirdeep2.nf +++ b/subworkflows/local/mirdeep2.nf @@ -24,11 +24,11 @@ workflow MIRDEEP2 { ch_versions = ch_versions.mix(MIRDEEP2_MAPPER.out.versions.first()) - MIRDEEP2_MAPPER.out.mirdeep2_inputs.view{ it -> "DEBUG: MIRDEEP2_MAPPER.out.mirdeep2_inputs: " } + // MIRDEEP2_MAPPER.out.mirdeep2_inputs.view{ it -> "DEBUG: MIRDEEP2_MAPPER.out.mirdeep2_inputs: " } - println "DEBUG fasta:" + fasta - println "DEBUG hairpin:" + hairpin - println "DEBUG mature:" + mature + // println "DEBUG fasta:" + fasta + // println "DEBUG hairpin:" + hairpin + // println "DEBUG mature:" + mature MIRDEEP2_RUN ( fasta, MIRDEEP2_MAPPER.out.mirdeep2_inputs, hairpin, mature ) ch_versions = ch_versions.mix(MIRDEEP2_RUN.out.versions.first()) From 7c703c9b0a78781ed9d80fc5db42967e1376e7a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Mon, 16 Oct 2023 12:34:32 +0100 Subject: [PATCH 037/117] remove metq from mature and hairpin --- workflows/smrnaseq.nf | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index a463de5a..be8ae374 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -195,11 +195,15 @@ workflow SMRNASEQ { genome_stats = GENOME_QUANT.out.stats ch_versions = ch_versions.mix(GENOME_QUANT.out.versions) + hairpin_clean = MIRNA_QUANT.out.fasta_hairpin.map { it -> it[1] } + mature_clean = MIRNA_QUANT.out.fasta_mature.map { it -> it[1] } FASTQC_FASTP.out.reads.view { it -> println "DEBUG ==> FASTQC_FASTP.out.reads = $it" } GENOME_QUANT.out.fasta.view { it -> println "DEBUG ==> GENOME_QUANT.out.fasta = $it" } GENOME_QUANT.out.index.view { it -> println "DEBUG ==> GENOME_QUANT.out.index = $it" } - MIRNA_QUANT.out.fasta_hairpin.view { it -> println "DEBUG ==> MIRNA_QUANT.out.fasta_hairpin = $it" } - MIRNA_QUANT.out.fasta_mature.view { it -> println "DEBUG ==> MIRNA_QUANT.out.fasta_mature = $it" } + // MIRNA_QUANT.out.fasta_hairpin.view { it -> println "DEBUG ==> MIRNA_QUANT.out.fasta_hairpin = $it" } + // MIRNA_QUANT.out.fasta_mature.view { it -> println "DEBUG ==> MIRNA_QUANT.out.fasta_mature = $it" } + hairpin_clean.view { it -> println "DEBUG ==> hairpin_clean = $it" } + mature_clean.view { it -> println "DEBUG ==> mature_clean = $it" } if (!params.skip_mirdeep) { MIRDEEP2 ( From e8d04d21a4b6ce742389cfdf2fd73daca27523fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Mon, 16 Oct 2023 12:37:32 +0100 Subject: [PATCH 038/117] connect clean hairpin / mature to mirdeep2 --- workflows/smrnaseq.nf | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index be8ae374..f4415d12 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -210,8 +210,10 @@ workflow SMRNASEQ { FASTQC_FASTP.out.reads, GENOME_QUANT.out.fasta, GENOME_QUANT.out.index.collect(), - MIRNA_QUANT.out.fasta_hairpin, - MIRNA_QUANT.out.fasta_mature + // MIRNA_QUANT.out.fasta_hairpin, + // MIRNA_QUANT.out.fasta_mature + hairpin_clean, + mature_clean ) ch_versions = ch_versions.mix(MIRDEEP2.out.versions) } From 9285dfbe175e269f9dbd6e051802e3c537b2798c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Mon, 16 Oct 2023 12:41:06 +0100 Subject: [PATCH 039/117] remove meta from fasta in mirdeep2_run --- modules/local/mirdeep2_run.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/mirdeep2_run.nf b/modules/local/mirdeep2_run.nf index e4e2aaaf..b676aa10 100644 --- a/modules/local/mirdeep2_run.nf +++ b/modules/local/mirdeep2_run.nf @@ -10,7 +10,7 @@ process MIRDEEP2_RUN { 'biocontainers/mirdeep2:2.0.1.3--hdfd78af_1' }" input: - tuple val(meta2), path(fasta) + path(fasta) tuple path(reads), path(arf) path hairpin path mature From 3a3f11bf057c9422d8e7eb40425b8e2c7f6ac704 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Mon, 16 Oct 2023 14:27:57 +0100 Subject: [PATCH 040/117] Clean debug prints --- subworkflows/local/mirdeep2.nf | 7 ------- workflows/smrnaseq.nf | 9 --------- 2 files changed, 16 deletions(-) diff --git a/subworkflows/local/mirdeep2.nf b/subworkflows/local/mirdeep2.nf index a08023a0..f8098ba5 100644 --- a/subworkflows/local/mirdeep2.nf +++ b/subworkflows/local/mirdeep2.nf @@ -23,13 +23,6 @@ workflow MIRDEEP2 { MIRDEEP2_MAPPER ( MIRDEEP2_PIGZ.out.reads, index ) ch_versions = ch_versions.mix(MIRDEEP2_MAPPER.out.versions.first()) - - // MIRDEEP2_MAPPER.out.mirdeep2_inputs.view{ it -> "DEBUG: MIRDEEP2_MAPPER.out.mirdeep2_inputs: " } - - // println "DEBUG fasta:" + fasta - // println "DEBUG hairpin:" + hairpin - // println "DEBUG mature:" + mature - MIRDEEP2_RUN ( fasta, MIRDEEP2_MAPPER.out.mirdeep2_inputs, hairpin, mature ) ch_versions = ch_versions.mix(MIRDEEP2_RUN.out.versions.first()) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index f4415d12..17eb985d 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -197,21 +197,12 @@ workflow SMRNASEQ { hairpin_clean = MIRNA_QUANT.out.fasta_hairpin.map { it -> it[1] } mature_clean = MIRNA_QUANT.out.fasta_mature.map { it -> it[1] } - FASTQC_FASTP.out.reads.view { it -> println "DEBUG ==> FASTQC_FASTP.out.reads = $it" } - GENOME_QUANT.out.fasta.view { it -> println "DEBUG ==> GENOME_QUANT.out.fasta = $it" } - GENOME_QUANT.out.index.view { it -> println "DEBUG ==> GENOME_QUANT.out.index = $it" } - // MIRNA_QUANT.out.fasta_hairpin.view { it -> println "DEBUG ==> MIRNA_QUANT.out.fasta_hairpin = $it" } - // MIRNA_QUANT.out.fasta_mature.view { it -> println "DEBUG ==> MIRNA_QUANT.out.fasta_mature = $it" } - hairpin_clean.view { it -> println "DEBUG ==> hairpin_clean = $it" } - mature_clean.view { it -> println "DEBUG ==> mature_clean = $it" } if (!params.skip_mirdeep) { MIRDEEP2 ( FASTQC_FASTP.out.reads, GENOME_QUANT.out.fasta, GENOME_QUANT.out.index.collect(), - // MIRNA_QUANT.out.fasta_hairpin, - // MIRNA_QUANT.out.fasta_mature hairpin_clean, mature_clean ) From c6efa86cf50dc2f359be18b309bea2bfe9590382 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Tue, 17 Oct 2023 09:30:51 +0100 Subject: [PATCH 041/117] update bowtie map contaminant with right options --- modules/local/bowtie_map_contaminants.nf | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/modules/local/bowtie_map_contaminants.nf b/modules/local/bowtie_map_contaminants.nf index e47ee7c7..10f5dadd 100644 --- a/modules/local/bowtie_map_contaminants.nf +++ b/modules/local/bowtie_map_contaminants.nf @@ -22,16 +22,17 @@ process BOWTIE_MAP_CONTAMINANTS { script: """ - INDEX=`find -L ./ -name "*.3.bt2" | sed 's/.3.bt2//'` + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"` bowtie2 \\ + -x \$INDEX \\ + -U ${reads} \\ --threads ${task.cpus} \\ + --un ${meta.id}.${contaminant_type}.filter.unmapped.contaminant.fastq \\ --very-sensitive-local \\ -k 1 \\ - -x \$INDEX \\ - --un ${meta.id}.${contaminant_type}.filter.unmapped.contaminant.fastq \\ - ${reads} \\ + -S ${meta.id}.filter.contaminant.sam \\ ${args} \\ - -S ${meta.id}.filter.contaminant.sam > ${meta.id}.contaminant_bowtie.log 2>&1 + > ${meta.id}.contaminant_bowtie.log 2>&1 # extracting number of reads from bowtie logs awk -v type=${contaminant_type} 'BEGIN{tot=0} {if(NR==4 || NR == 5){tot += \$1}} END {print "\\""type"\\": "tot }' ${meta.id}.contaminant_bowtie.log | tr -d , > filtered.${meta.id}_${contaminant_type}.stats From 0be5a172404e76da496418afc23e7ff0c061f301 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Tue, 17 Oct 2023 09:45:33 +0100 Subject: [PATCH 042/117] define args variable --- modules/local/bowtie_map_contaminants.nf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/local/bowtie_map_contaminants.nf b/modules/local/bowtie_map_contaminants.nf index 10f5dadd..d744b1fd 100644 --- a/modules/local/bowtie_map_contaminants.nf +++ b/modules/local/bowtie_map_contaminants.nf @@ -21,6 +21,8 @@ process BOWTIE_MAP_CONTAMINANTS { task.ext.when == null || task.ext.when script: + def args = task.ext.args ?: "" + """ INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"` bowtie2 \\ From 13fc7a47725374e1dded9a9f42ff5815657ec033 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Tue, 17 Oct 2023 14:49:44 +0100 Subject: [PATCH 043/117] FIx? - include fastqc trim to multiQC --- workflows/smrnaseq.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 17eb985d..f73bc79c 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -230,7 +230,8 @@ workflow SMRNASEQ { ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(FASTQC_FASTP.out.fastqc_raw_zip.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC_FASTP.out.trim_json.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC_FASTP.out.fastqc_trim_zip.collect{it[1]}.ifEmpty([])) + // ch_multiqc_files = ch_multiqc_files.mix(FASTQC_FASTP.out.trim_json.collect{it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(contamination_stats.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(genome_stats.collect({it[1]}).ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(MIRNA_QUANT.out.mature_stats.collect({it[1]}).ifEmpty([])) From 71590dba904f39fa3d524f3eefee4c0df7d4f2f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Tue, 17 Oct 2023 15:32:04 +0100 Subject: [PATCH 044/117] Refresh pipeline summary --- README.md | 55 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 44066051..dea262d8 100644 --- a/README.md +++ b/README.md @@ -26,29 +26,38 @@ You can find numerous talks on the nf-core events page from various topics inclu ## Pipeline summary -1. Raw read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) -2. Adapter trimming ([`Trim Galore!`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/)) - 1. Insert Size calculation - 2. Collapse reads ([`seqcluster`](https://seqcluster.readthedocs.io/mirna_annotation.html#processing-of-reads)) -3. Contamination filtering ([`Bowtie2`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml)) -4. Alignment against miRBase mature miRNA ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) -5. Alignment against miRBase hairpin - 1. Unaligned reads from step 3 ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) - 2. Collapsed reads from step 2.2 ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) -6. Post-alignment processing of miRBase hairpin - 1. Basic statistics from step 3 and step 4.1 ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) - 2. Analysis on miRBase, or MirGeneDB hairpin counts ([`edgeR`](https://bioconductor.org/packages/release/bioc/html/edgeR.html)) - - TMM normalization and a table of top expression hairpin - - MDS plot clustering samples - - Heatmap of sample similarities - 3. miRNA and isomiR annotation from step 4.1 ([`mirtop`](https://github.com/miRTop/mirtop)) -7. Alignment against host reference genome ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) - 1. Post-alignment processing of alignment against host reference genome ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) -8. Novel miRNAs and known miRNAs discovery ([`MiRDeep2`](https://www.mdc-berlin.de/content/mirdeep2-documentation)) - 1. Mapping against reference genome with the mapper module - 2. Known and novel miRNA discovery with the mirdeep2 module -9. miRNA quality control ([`mirtrace`](https://github.com/friedlanderlab/mirtrace)) -10. Present QC for raw read, alignment, and expression results ([`MultiQC`](http://multiqc.info/)) +1. Quality check and triming + 1. Raw read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) + 2. Adapter trimming ([`fastp`](https://github.com/OpenGene/fastp)) + 3. Trim read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) +2. miRNA QC ([`miRTrace`](https://github.com/friedlanderlab/mirtrace)) +3. Contamination filtering ([`Bowtie2`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml)) (Optional) + 1. rRNA filtration + 2. tRNA filtration + 3. cDNA filtration + 4. ncRNA filtration + 5. piRNA filtration + 6. Others filtration +4. miRNA quantification + - EdgeR + 1. Reads alignment against miRBase mature miRNA ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) + 2. Post-alignment processing of alignment against Mature miRNA ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) + 3. Unmapped reads (from reads vs mature miRNA) alignment against miRBase hairpin + 4. Post-alignment processing of alignment against Hairpin ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) + 5. Analysis on miRBase, or MirGeneDB hairpin counts ([`edgeR`](https://bioconductor.org/packages/release/bioc/html/edgeR.html)) + - TMM normalization and a table of top expression hairpin + - MDS plot clustering samples + - Heatmap of sample similarities + - Mirtop quantification + 1. Read collapsing ([`seqcluster`](https://github.com/lpantano/seqcluster)) + 2. miRNA and isomiR annotation ([`mirtop`](https://github.com/miRTop/mirtop)) +5. Genome Quantification (Optional) + 1. Reads alignment against host reference genome ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) + 2. Post-alignment processing of alignment against host reference genome ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) +6. Novel miRNAs and known miRNAs discovery ([`MiRDeep2`](https://www.mdc-berlin.de/content/mirdeep2-documentation)) (Optional) + 1. Mapping against reference genome with the mapper module + 2. Known and novel miRNA discovery with the mirdeep2 module +7. Present QC for raw read, alignment, and expression results ([`MultiQC`](http://multiqc.info/)) ## Usage From 892ab8924217539ce33679b89eaffa1ff3e2fe30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Tue, 17 Oct 2023 18:33:33 +0100 Subject: [PATCH 045/117] Fix version: missing quotes and misaligned END_VER --- modules/local/blat_mirna.nf | 2 +- modules/local/mirdeep2_prepare.nf | 2 +- modules/local/mirdeep2_run.nf | 2 +- modules/local/mirtop_quant.nf | 2 +- modules/local/mirtrace.nf | 2 +- modules/local/parse_fasta_mirna.nf | 2 +- modules/local/seqcluster_collapse.nf | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/local/blat_mirna.nf b/modules/local/blat_mirna.nf index 7f8a2324..aa0d3d51 100644 --- a/modules/local/blat_mirna.nf +++ b/modules/local/blat_mirna.nf @@ -53,7 +53,7 @@ process BLAT_MIRNA { blat -out=blast8 $mirna $contaminants /dev/stdout | awk 'BEGIN{FS="\t"}{if(\$11 < 1e-5)print \$1;}' | uniq > mirnahit.txt awk 'BEGIN { while((getline<"mirnahit.txt")>0) l[">"\$1]=1 } /^>/ {x = l[\$1]} {if(!x) print }' $contaminants > filtered.fa -cat <<-END_VERSIONS > versions.yml + cat <<-END_VERSIONS > versions.yml "${task.process}": blat: \$(echo \$(blat) | grep Standalone | awk '{ if (match(\$0,/[0-9]*[0-9]/,m)) print m[0] }') END_VERSIONS diff --git a/modules/local/mirdeep2_prepare.nf b/modules/local/mirdeep2_prepare.nf index 7e2f2437..124b5b63 100644 --- a/modules/local/mirdeep2_prepare.nf +++ b/modules/local/mirdeep2_prepare.nf @@ -23,7 +23,7 @@ process MIRDEEP2_PIGZ { pigz -f -d -p $task.cpus $reads cat <<-END_VERSIONS > versions.yml - ${task.process}": + "${task.process}": pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) END_VERSIONS """ diff --git a/modules/local/mirdeep2_run.nf b/modules/local/mirdeep2_run.nf index b676aa10..17a484c1 100644 --- a/modules/local/mirdeep2_run.nf +++ b/modules/local/mirdeep2_run.nf @@ -35,7 +35,7 @@ process MIRDEEP2_RUN { -z _${reads.simpleName} cat <<-END_VERSIONS > versions.yml - ${task.process}": + "${task.process}": mirdeep2: \$(echo "$VERSION") END_VERSIONS """ diff --git a/modules/local/mirtop_quant.nf b/modules/local/mirtop_quant.nf index e97d6a09..ab38c93d 100644 --- a/modules/local/mirtop_quant.nf +++ b/modules/local/mirtop_quant.nf @@ -34,7 +34,7 @@ process MIRTOP_QUANT { mv mirtop/stats/mirtop_stats.log mirtop/stats/full_mirtop_stats.log cat <<-END_VERSIONS > versions.yml - ${task.process}": + "${task.process}": mirtop: \$(echo \$(mirtop --version 2>&1) | sed 's/^.*mirtop //') END_VERSIONS """ diff --git a/modules/local/mirtrace.nf b/modules/local/mirtrace.nf index f576ebc0..95989293 100644 --- a/modules/local/mirtrace.nf +++ b/modules/local/mirtrace.nf @@ -43,7 +43,7 @@ process MIRTRACE_RUN { --force cat <<-END_VERSIONS > versions.yml - ${task.process}": + "${task.process}": mirtrace: \$(echo \$(mirtrace -v 2>&1)) END_VERSIONS """ diff --git a/modules/local/parse_fasta_mirna.nf b/modules/local/parse_fasta_mirna.nf index a0bbc75e..ad63401e 100644 --- a/modules/local/parse_fasta_mirna.nf +++ b/modules/local/parse_fasta_mirna.nf @@ -34,7 +34,7 @@ process PARSE_FASTA_MIRNA { seqkit seq --rna2dna \${FASTA}_sps.fa > \${FASTA}_igenome.fa cat <<-END_VERSIONS > versions.yml - ${task.process}": + "${task.process}": seqkit: \$(echo \$(seqkit 2>&1) | sed 's/^.*Version: //; s/ .*\$//') END_VERSIONS """ diff --git a/modules/local/seqcluster_collapse.nf b/modules/local/seqcluster_collapse.nf index 39f6ce85..82470e5a 100644 --- a/modules/local/seqcluster_collapse.nf +++ b/modules/local/seqcluster_collapse.nf @@ -25,7 +25,7 @@ process SEQCLUSTER_SEQUENCES { mv collapsed/*.fastq.gz final/. cat <<-END_VERSIONS > versions.yml - ${task.process}": + "${task.process}": seqcluster: \$(echo \$(seqcluster --version 2>&1) | sed 's/^.*seqcluster //') END_VERSIONS """ From 05ae65ad2f2e9ad9c1117f55697298266389a0f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Tue, 17 Oct 2023 18:34:03 +0100 Subject: [PATCH 046/117] Update doc with HTML less fa --- docs/usage.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 9bfe5494..b0b7e647 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -27,10 +27,10 @@ It should point to the 3-letter species name used by [miRBase](https://www.mirba Different parameters can be set for the two supported databases. By default `miRBase` will be used with the parameters below. - `mirna_gtf`: If not supplied by the user, then `mirna_gtf` will point to the latest GFF3 file in miRbase: `https://mirbase.org/download/CURRENT/genomes/${params.mirtrace_species}.gff3` -- `mature`: points to the FASTA file of mature miRNA sequences. `https://mirbase.org/download/CURRENT/mature.fa` -- `hairpin`: points to the FASTA file of precursor miRNA sequences. `https://mirbase.org/download/CURRENT/hairpin.fa` +- `mature`: points to the FASTA file of mature miRNA sequences. `https://mirbase.org/download/mature.fa` +- `hairpin`: points to the FASTA file of precursor miRNA sequences. `https://mirbase.org/download/hairpin.fa` -If MirGeneDB should be used instead it needs to be specified using `--mirgenedb` and use the parameters below . +If MirGeneDB should be used instead it needs to be specified using `--mirgenedb` and use the parameters below. - `mirgenedb_gff`: The data can not be downloaded automatically (URLs are created with short term tokens in it), thus the user needs to supply the gff file for either his species, or all species downloaded from `https://mirgenedb.org/download`. The total set will automatically be subsetted to the species specified with `--mirgenedb_species`. - `mirgenedb_mature`: points to the FASTA file of mature miRNA sequences. Download from `https://mirgenedb.org/download`. From e6a0b75b4ae05dc2d1203265b3ea53a8c883b203 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Wed, 18 Oct 2023 08:18:57 +0100 Subject: [PATCH 047/117] update mirtrace ouput directory --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 556d8509..ba33f736 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -80,7 +80,7 @@ process { process { withName: 'MIRTRACE_RUN' { publishDir = [ - path: { "${params.outdir}/mirtrace/${meta.id}" }, + path: { "${params.outdir}/mirtrace" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] From dd979483a127713a273590c93ed4c706bbfcc187 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Wed, 18 Oct 2023 10:05:37 +0100 Subject: [PATCH 048/117] update fastqc config --- conf/modules.config | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index ba33f736..8620836e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -87,14 +87,6 @@ process { } } -if (!(params.skip_fastqc)) { - process { - withName: '.*:FASTQC_FASTP:FASTQC_.*' { - ext.args = '--quiet' - } - } -} - if (!params.skip_fastp) { process { withName: 'FASTP' { @@ -130,6 +122,14 @@ if (!params.skip_fastp) { if (!params.skip_fastqc) { process { + withName: '.*:.*:FASTQC_FASTP:FASTQC_RAW' { + ext.args = '--quiet' + publishDir = [ + path: { "${params.outdir}/fastqc/raw" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } withName: '.*:.*:FASTQC_FASTP:FASTQC_TRIM' { ext.args = '--quiet' publishDir = [ From b59a73f317442b7dceb6f8e2772a6bb0240eaa98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Wed, 18 Oct 2023 10:57:08 +0100 Subject: [PATCH 049/117] Update mirdeep2 publish_dir path --- conf/modules.config | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 8620836e..21b16110 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -236,7 +236,14 @@ if (!params.skip_mirdeep) { process { withName: 'MIRDEEP2_MAPPER' { publishDir = [ - path: { "${params.outdir}/mirdeep" }, + path: { "${params.outdir}/mirdeep2/mapper" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'MIRDEEP2_RUN' { + publishDir = [ + path: { "${params.outdir}/mirdeep2/run" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] From c835f592a7583222f77c9d23f9edf81c1067791e Mon Sep 17 00:00:00 2001 From: Fabian Hausmann Date: Tue, 28 Nov 2023 09:55:00 +0100 Subject: [PATCH 050/117] Fix stats not executed from genome --- subworkflows/local/genome_quant.nf | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/genome_quant.nf b/subworkflows/local/genome_quant.nf index 967b2757..b310c76a 100644 --- a/subworkflows/local/genome_quant.nf +++ b/subworkflows/local/genome_quant.nf @@ -8,7 +8,7 @@ include { BOWTIE_MAP_SEQ as BOWTIE_MAP_GENOME } from '../../modules/local/bowtie workflow GENOME_QUANT { take: - fasta + fasta index reads // channel: [ val(meta), [ reads ] ] @@ -28,8 +28,10 @@ workflow GENOME_QUANT { if (bowtie_index){ BOWTIE_MAP_GENOME ( reads, bowtie_index.collect() ) ch_versions = ch_versions.mix(BOWTIE_MAP_GENOME.out.versions) - - BAM_SORT_STATS_SAMTOOLS ( BOWTIE_MAP_GENOME.out.bam, Channel.empty() ) + fasta_formatted + .map { file -> tuple(file.baseName, file) } + .set { sort_input } + BAM_SORT_STATS_SAMTOOLS ( BOWTIE_MAP_GENOME.out.bam, sort_input ) ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) } From 7278013731567b2d516e9046438233c8f818c971 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Tue, 19 Dec 2023 18:36:51 +0000 Subject: [PATCH 051/117] Template update for nf-core/tools version 2.11 --- .github/CONTRIBUTING.md | 3 + .github/PULL_REQUEST_TEMPLATE.md | 1 + .github/workflows/ci.yml | 2 +- .github/workflows/fix-linting.yml | 4 +- .github/workflows/linting.yml | 12 +- .gitpod.yml | 4 +- CHANGELOG.md | 2 +- README.md | 17 +-- assets/multiqc_config.yml | 6 +- assets/slackreport.json | 2 +- conf/modules.config | 2 +- docs/usage.md | 4 +- lib/NfcoreTemplate.groovy | 32 ++--- modules.json | 6 +- .../dumpsoftwareversions/environment.yml | 7 ++ .../custom/dumpsoftwareversions/main.nf | 6 +- .../custom/dumpsoftwareversions/meta.yml | 7 +- .../dumpsoftwareversions/tests/main.nf.test | 38 ++++++ .../tests/main.nf.test.snap | 27 +++++ .../dumpsoftwareversions/tests/tags.yml | 2 + modules/nf-core/fastqc/environment.yml | 7 ++ modules/nf-core/fastqc/main.nf | 10 +- modules/nf-core/fastqc/meta.yml | 5 + modules/nf-core/fastqc/tests/main.nf.test | 109 ++++++++++++++++++ .../nf-core/fastqc/tests/main.nf.test.snap | 10 ++ modules/nf-core/fastqc/tests/tags.yml | 2 + modules/nf-core/multiqc/environment.yml | 7 ++ modules/nf-core/multiqc/main.nf | 8 +- modules/nf-core/multiqc/meta.yml | 11 +- modules/nf-core/multiqc/tests/main.nf.test | 63 ++++++++++ modules/nf-core/multiqc/tests/tags.yml | 2 + nextflow.config | 12 +- 32 files changed, 361 insertions(+), 69 deletions(-) create mode 100644 modules/nf-core/custom/dumpsoftwareversions/environment.yml create mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test create mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap create mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml create mode 100644 modules/nf-core/fastqc/environment.yml create mode 100644 modules/nf-core/fastqc/tests/main.nf.test create mode 100644 modules/nf-core/fastqc/tests/main.nf.test.snap create mode 100644 modules/nf-core/fastqc/tests/tags.yml create mode 100644 modules/nf-core/multiqc/environment.yml create mode 100644 modules/nf-core/multiqc/tests/main.nf.test create mode 100644 modules/nf-core/multiqc/tests/tags.yml diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 4d09370b..bd134c88 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -27,6 +27,9 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests +You can optionally test your changes by running the pipeline locally. Then it is recommended to use the `debug` profile to +receive warnings about process selectors and other debug info. Example: `nextflow run . -profile debug,test,docker --outdir `. + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index e278390b..3cdbf2b7 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -19,6 +19,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/smrn - [ ] If necessary, also make a PR on the nf-core/smrnaseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d40ffd75..60f93b48 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,7 +28,7 @@ jobs: - "latest-everything" steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index ae6a0d7b..0cd44ed6 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: token: ${{ secrets.nf_core_bot_auth_token }} @@ -24,7 +24,7 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install Prettier run: npm install -g prettier @prettier/plugin-php diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index b8bdd214..905c58e4 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,9 +14,9 @@ jobs: EditorConfig: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install editorconfig-checker run: npm install -g editorconfig-checker @@ -27,9 +27,9 @@ jobs: Prettier: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install Prettier run: npm install -g prettier @@ -40,7 +40,7 @@ jobs: PythonBlack: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Check code lints with Black uses: psf/black@stable @@ -71,7 +71,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 diff --git a/.gitpod.yml b/.gitpod.yml index 25488dcc..acf72695 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -4,7 +4,9 @@ tasks: command: | pre-commit install --install-hooks nextflow self-update - + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack - codezombiech.gitignore # Language support for .gitignore files diff --git a/CHANGELOG.md b/CHANGELOG.md index d6c4fb89..809d1ec2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.2.3 - [date] +## v2.3dev - [date] Initial release of nf-core/smrnaseq, created with the [nf-core](https://nf-co.re/) template. diff --git a/README.md b/README.md index e5dc1d80..d0b5ad21 100644 --- a/README.md +++ b/README.md @@ -30,11 +30,8 @@ ## Usage -:::note -If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how -to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) -with `-profile test` before running the workflow on actual data. -::: +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. - + diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 267c3cc9..b92cb9e7 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,9 +1,7 @@ report_comment: > - - This report has been generated by the nf-core/smrnaseq + This report has been generated by the nf-core/smrnaseq analysis pipeline. For information about how to interpret these results, please see the - documentation. - + documentation. report_section_order: "nf-core-smrnaseq-methods-description": order: -1000 diff --git a/assets/slackreport.json b/assets/slackreport.json index 214c7fa9..1884b21a 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "nf-core/smrnaseq v${version} - ${runName}", + "author_name": "nf-core/smrnaseq ${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/conf/modules.config b/conf/modules.config index 39e81386..d91c6aba 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -39,7 +39,7 @@ process { } withName: 'MULTIQC' { - ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, diff --git a/docs/usage.md b/docs/usage.md index 8878af27..f14fb92e 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -20,7 +20,7 @@ You will need to create a samplesheet with information about the samples you wou The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: -```console +```csv title="samplesheet.csv" sample,fastq_1,fastq_2 CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz @@ -33,7 +33,7 @@ The pipeline will auto-detect whether a sample is single- or paired-end using th A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. -```console +```csv title="samplesheet.csv" sample,fastq_1,fastq_2 CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 01b8653d..e248e4c3 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -4,6 +4,7 @@ import org.yaml.snakeyaml.Yaml import groovy.json.JsonOutput +import nextflow.extension.FilesEx class NfcoreTemplate { @@ -141,12 +142,14 @@ class NfcoreTemplate { try { if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } [ 'sendmail', '-t' ].execute() << sendmail_html log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" } catch (all) { // Catch failures and try with plaintext def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { + if ( mqc_report != null && mqc_report.size() <= max_multiqc_email_size.toBytes() ) { mail_cmd += [ '-A', mqc_report ] } mail_cmd.execute() << email_html @@ -155,14 +158,16 @@ class NfcoreTemplate { } // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") + FilesEx.copyTo(output_hf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() } // @@ -227,15 +232,14 @@ class NfcoreTemplate { // Dump pipeline parameters in a json file // public static void dump_parameters(workflow, params) { - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def output_pf = new File(output_d, "params_${timestamp}.json") + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") def jsonStr = JsonOutput.toJson(params) - output_pf.text = JsonOutput.prettyPrint(jsonStr) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${params.outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() } // diff --git a/modules.json b/modules.json index 98249a7b..0606570e 100644 --- a/modules.json +++ b/modules.json @@ -7,17 +7,17 @@ "nf-core": { "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e", "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "bd8092b67b5103bdd52e300f75889442275c3117", + "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "4ab13872435962dadc239979554d13709e20bf29", "installed_by": ["modules"] } } diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml new file mode 100644 index 00000000..f0c63f69 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -0,0 +1,7 @@ +name: custom_dumpsoftwareversions +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.17 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index ebc87273..7685b33c 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.14" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' : + 'biocontainers/multiqc:1.17--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml index c32657de..5f15a5fd 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: custom_dumpsoftwareversions description: Custom module used to dump software versions within the nf-core pipeline template keywords: @@ -16,7 +16,6 @@ input: type: file description: YML file containing software versions pattern: "*.yml" - output: - yml: type: file @@ -30,7 +29,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@drpatelh" - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test new file mode 100644 index 00000000..eec1db10 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -0,0 +1,38 @@ +nextflow_process { + + name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" + script "../main.nf" + process "CUSTOM_DUMPSOFTWAREVERSIONS" + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "dumpsoftwareversions" + tag "custom/dumpsoftwareversions" + + test("Should run without failures") { + when { + process { + """ + def tool1_version = ''' + TOOL1: + tool1: 0.11.9 + '''.stripIndent() + + def tool2_version = ''' + TOOL2: + tool2: 1.9 + '''.stripIndent() + + input[0] = Channel.of(tool1_version, tool2_version).collectFile() + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap new file mode 100644 index 00000000..4274ed57 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -0,0 +1,27 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" + ], + "1": [ + "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" + ], + "2": [ + "versions.yml:md5,3843ac526e762117eedf8825b40683df" + ], + "mqc_yml": [ + "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" + ], + "versions": [ + "versions.yml:md5,3843ac526e762117eedf8825b40683df" + ], + "yml": [ + "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" + ] + } + ], + "timestamp": "2023-11-03T14:43:22.157011" + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml new file mode 100644 index 00000000..405aa24a --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml @@ -0,0 +1,2 @@ +custom/dumpsoftwareversions: + - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml new file mode 100644 index 00000000..1787b38a --- /dev/null +++ b/modules/nf-core/fastqc/environment.yml @@ -0,0 +1,7 @@ +name: fastqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 249f9064..9e19a74c 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -2,10 +2,10 @@ process FASTQC { tag "$meta.id" label 'process_medium' - conda "bioconda::fastqc=0.11.9" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'biocontainers/fastqc:0.11.9--0' }" + 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : + 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" input: tuple val(meta), path(reads) @@ -37,7 +37,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ @@ -49,7 +49,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ } diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index 4da5bb5a..ee5507e0 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -50,3 +50,8 @@ authors: - "@grst" - "@ewels" - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test new file mode 100644 index 00000000..b9e8f926 --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -0,0 +1,109 @@ +nextflow_process { + + name "Test Process FASTQC" + script "../main.nf" + process "FASTQC" + tag "modules" + tag "modules_nfcore" + tag "fastqc" + + test("Single-Read") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id: 'test', single_end:true ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
Mon 2 Oct 2023
test.gz
+ // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" }, + { assert path(process.out.html.get(0).get(1)).getText().contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match("versions") }, + { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" } + ) + } + } +// TODO +// // +// // Test with paired-end data +// // +// workflow test_fastqc_paired_end { +// input = [ +// [id: 'test', single_end: false], // meta map +// [ +// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) +// ] +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with interleaved data +// // +// workflow test_fastqc_interleaved { +// input = [ +// [id: 'test', single_end: false], // meta map +// file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with bam data +// // +// workflow test_fastqc_bam { +// input = [ +// [id: 'test', single_end: false], // meta map +// file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with multiple samples +// // +// workflow test_fastqc_multiple { +// input = [ +// [id: 'test', single_end: false], // meta map +// [ +// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) +// ] +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with custom prefix +// // +// workflow test_fastqc_custom_prefix { +// input = [ +// [ id:'mysample', single_end:true ], // meta map +// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) +// ] + +// FASTQC ( input ) +// } +} diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap new file mode 100644 index 00000000..636a32ce --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -0,0 +1,10 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "timestamp": "2023-10-09T23:40:54+0000" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml new file mode 100644 index 00000000..7834294b --- /dev/null +++ b/modules/nf-core/fastqc/tests/tags.yml @@ -0,0 +1,2 @@ +fastqc: + - modules/nf-core/fastqc/** diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 00000000..bc0bdb5b --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,7 @@ +name: multiqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.18 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 1fc387be..00cc48d2 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.14" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.18--pyhdfd78af_0' : + 'biocontainers/multiqc:1.18--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -25,12 +25,14 @@ process MULTIQC { def args = task.ext.args ?: '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' """ multiqc \\ --force \\ $args \\ $config \\ $extra_config \\ + $logo \\ . cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index f93b5ee5..f1aa660e 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,5 +1,5 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: MultiQC +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: - QC @@ -13,7 +13,6 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] - input: - multiqc_files: type: file @@ -31,7 +30,6 @@ input: type: file description: Optional logo file for MultiQC pattern: "*.{png}" - output: - report: type: file @@ -54,3 +52,8 @@ authors: - "@bunop" - "@drpatelh" - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 00000000..c2dad217 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,63 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + test("MULTIQC: FASTQC") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.report.get(0)).exists() }, + { assert path(process.out.data.get(0)).exists() }, + { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + ) + } + + } + + test("MULTIQC: FASTQC and a config file") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.report.get(0)).exists() }, + { assert path(process.out.data.get(0)).exists() }, + { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 00000000..bea6c0d3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/nextflow.config b/nextflow.config index 79a64b2a..b01c59fa 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,7 +14,7 @@ params { input = null // References genome = null - igenomes_base = 's3://ngi-igenomes/igenomes' + igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false @@ -82,6 +82,7 @@ profiles { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' cleanup = false + nextflow.enable.configProcessNamesValidation = true } conda { conda.enabled = true @@ -104,13 +105,13 @@ profiles { } docker { docker.enabled = true - docker.userEmulation = true conda.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false apptainer.enabled = false + runOptions = '-u $(id -u):$(id -g)' } arm { docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' @@ -181,7 +182,7 @@ singularity.registry = 'quay.io' // Nextflow plugins plugins { - id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet } // Load igenomes.config if required @@ -204,6 +205,9 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true @@ -229,7 +233,7 @@ manifest { description = """Small RNA-Seq Best Practice Analysis Pipeline.""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.2.3' + version = '2.3dev' doi = '' } From ec3d44615456f2ce58094c587b4353c502072d6c Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Wed, 20 Dec 2023 17:05:35 +0000 Subject: [PATCH 052/117] Template update for nf-core/tools version 2.11.1 --- .../{release-announcments.yml => release-announcements.yml} | 0 nextflow.config | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename .github/workflows/{release-announcments.yml => release-announcements.yml} (100%) diff --git a/.github/workflows/release-announcments.yml b/.github/workflows/release-announcements.yml similarity index 100% rename from .github/workflows/release-announcments.yml rename to .github/workflows/release-announcements.yml diff --git a/nextflow.config b/nextflow.config index b01c59fa..a473a3b6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -111,10 +111,10 @@ profiles { shifter.enabled = false charliecloud.enabled = false apptainer.enabled = false - runOptions = '-u $(id -u):$(id -g)' + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { singularity.enabled = true From ce28d5a06fdf9e891c51d717893b4869f3b44a4d Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 11 Jan 2024 13:01:04 +0000 Subject: [PATCH 053/117] First batch of updates to 2024 --- modules.json | 33 ++- modules/nf-core/fastp/environment.yml | 1 + modules/nf-core/fastp/main.nf | 26 +- modules/nf-core/fastp/tests/main.nf.test | 241 ++++++++++++++++++ modules/nf-core/fastp/tests/main.nf.test.snap | 55 ++++ modules/nf-core/modules/cat/cat/main.nf | 62 ----- modules/nf-core/modules/cat/cat/meta.yml | 37 --- .../nf-core/modules/samtools/bam2fq/main.nf | 56 ---- .../nf-core/modules/samtools/bam2fq/meta.yml | 55 ---- .../nf-core/modules/umitools/dedup/main.nf | 41 --- .../nf-core/modules/umitools/dedup/meta.yml | 59 ----- .../nf-core/modules/umitools/extract/main.nf | 55 ---- .../nf-core/modules/umitools/extract/meta.yml | 47 ---- modules/nf-core/multiqc/environment.yml | 3 +- modules/nf-core/multiqc/main.nf | 10 +- modules/nf-core/multiqc/meta.yml | 3 +- 16 files changed, 357 insertions(+), 427 deletions(-) delete mode 100644 modules/nf-core/modules/cat/cat/main.nf delete mode 100644 modules/nf-core/modules/cat/cat/meta.yml delete mode 100644 modules/nf-core/modules/samtools/bam2fq/main.nf delete mode 100644 modules/nf-core/modules/samtools/bam2fq/meta.yml delete mode 100644 modules/nf-core/modules/umitools/dedup/main.nf delete mode 100644 modules/nf-core/modules/umitools/dedup/meta.yml delete mode 100644 modules/nf-core/modules/umitools/extract/main.nf delete mode 100644 modules/nf-core/modules/umitools/extract/meta.yml diff --git a/modules.json b/modules.json index 1250670b..4a236584 100644 --- a/modules.json +++ b/modules.json @@ -5,6 +5,11 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "cat/cat": { + "branch": "master", + "git_sha": "d593e8f6b7d1bbbb2acf43a4b9efeeac8d6720f2", + "installed_by": ["modules"] + }, "cat/fastq": { "branch": "master", "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", @@ -17,17 +22,22 @@ }, "fastp": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", - "installed_by": ["modules"] + "git_sha": "d086322563bdbb08c94bf15a7db58a39ccdb1520", + "installed_by": ["fastq_fastqc_umitools_fastp", "modules"] }, "fastqc": { "branch": "master", "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", - "installed_by": ["modules"] + "installed_by": ["fastq_fastqc_umitools_fastp", "modules"] }, "multiqc": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", + "installed_by": ["modules"] + }, + "samtools/bam2fq": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", "installed_by": ["modules"] }, "samtools/flagstat": { @@ -54,6 +64,16 @@ "branch": "master", "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", "installed_by": ["modules", "bam_stats_samtools"] + }, + "umitools/dedup": { + "branch": "master", + "git_sha": "9d489648a7425fa18d8bed18935442c104d8733c", + "installed_by": ["modules"] + }, + "umitools/extract": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "installed_by": ["fastq_fastqc_umitools_fastp", "modules"] } } }, @@ -68,6 +88,11 @@ "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", "installed_by": ["subworkflows", "bam_sort_stats_samtools"] + }, + "fastq_fastqc_umitools_fastp": { + "branch": "master", + "git_sha": "3e8b0c1144ccf60b7848efbdc2be285ff20b49ee", + "installed_by": ["subworkflows"] } } } diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/fastp/environment.yml index 19ccec25..70389e66 100644 --- a/modules/nf-core/fastp/environment.yml +++ b/modules/nf-core/fastp/environment.yml @@ -1,3 +1,4 @@ +name: fastp channels: - conda-forge - bioconda diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf index ca5f100f..2a3b679e 100644 --- a/modules/nf-core/fastp/main.nf +++ b/modules/nf-core/fastp/main.nf @@ -2,7 +2,7 @@ process FASTP { tag "$meta.id" label 'process_medium' - conda 'modules/nf-core/fastp/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' : 'biocontainers/fastp:0.23.4--h5f740d0_0' }" @@ -45,7 +45,7 @@ process FASTP { $adapter_list \\ $fail_fastq \\ $args \\ - 2> ${prefix}.fastp.log \\ + 2> >(tee ${prefix}.fastp.log >&2) \\ | gzip -c > ${prefix}.fastp.fastq.gz cat <<-END_VERSIONS > versions.yml @@ -66,7 +66,7 @@ process FASTP { $adapter_list \\ $fail_fastq \\ $args \\ - 2> ${prefix}.fastp.log + 2> >(tee ${prefix}.fastp.log >&2) cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -91,7 +91,7 @@ process FASTP { --thread $task.cpus \\ --detect_adapter_for_pe \\ $args \\ - 2> ${prefix}.fastp.log + 2> >(tee ${prefix}.fastp.log >&2) cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -99,4 +99,22 @@ process FASTP { END_VERSIONS """ } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end + def touch_reads = is_single_output ? "${prefix}.fastp.fastq.gz" : "${prefix}_1.fastp.fastq.gz ${prefix}_2.fastp.fastq.gz" + def touch_merged = (!is_single_output && save_merged) ? "touch ${prefix}.merged.fastq.gz" : "" + """ + touch $touch_reads + touch "${prefix}.fastp.json" + touch "${prefix}.fastp.html" + touch "${prefix}.fastp.log" + $touch_merged + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ } diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test index f610b735..17dce8ac 100644 --- a/modules/nf-core/fastp/tests/main.nf.test +++ b/modules/nf-core/fastp/tests/main.nf.test @@ -57,6 +57,67 @@ nextflow_process { { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } } }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_single_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_single_end-stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = [ + [ id:'test', single_end:true ], + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_single_end-for_stub_match") + }, { assert snapshot(process.out.versions).match("versions") } ) } @@ -127,6 +188,67 @@ nextflow_process { { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } } }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end-stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end-for_stub_match") + }, { assert snapshot(process.out.versions).match("versions") } ) } @@ -181,6 +303,66 @@ nextflow_process { { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } } }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_interleaved-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("fastp test_fastp_interleaved-stub") { + + options '-stub' + + config './nextflow.config' + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ] + ] + + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_interleaved-for_stub_match") + }, { assert snapshot(process.out.versions).match("versions") } ) } @@ -399,6 +581,65 @@ nextflow_process { { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } } }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end_merged-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end_merged-stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = true + + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end_merged-for_stub_match") + }, { assert snapshot(process.out.versions).match("versions") } ) } diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap index 0fa68c7d..1b7d2419 100644 --- a/modules/nf-core/fastp/tests/main.nf.test.snap +++ b/modules/nf-core/fastp/tests/main.nf.test.snap @@ -1,4 +1,19 @@ { + "test_fastp_paired_end-for_stub_match": { + "content": [ + [ + [ + "test_1.fastp.fastq.gz", + "test_2.fastp.fastq.gz" + ], + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=false}" + ] + ], + "timestamp": "2023-12-21T09:44:37.202512" + }, "fastp test_fastp_interleaved_json": { "content": [ [ @@ -13,6 +28,22 @@ ], "timestamp": "2023-10-17T11:04:45.794175881" }, + "test_fastp_paired_end_merged-for_stub_match": { + "content": [ + [ + [ + "test_1.fastp.fastq.gz", + "test_2.fastp.fastq.gz" + ], + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "test.merged.fastq.gz", + "{id=test, single_end=false}" + ] + ], + "timestamp": "2023-12-21T09:53:45.237014" + }, "test_fastp_single_end_json": { "content": [ [ @@ -35,6 +66,30 @@ ], "timestamp": "2023-10-17T11:04:10.582076024" }, + "test_fastp_interleaved-for_stub_match": { + "content": [ + [ + "test.fastp.fastq.gz", + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=true}" + ] + ], + "timestamp": "2023-12-21T09:48:43.148485" + }, + "test_fastp_single_end-for_stub_match": { + "content": [ + [ + "test.fastp.fastq.gz", + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=true}" + ] + ], + "timestamp": "2023-12-21T09:20:07.254788" + }, "test_fastp_single_end_trim_fail_json": { "content": [ [ diff --git a/modules/nf-core/modules/cat/cat/main.nf b/modules/nf-core/modules/cat/cat/main.nf deleted file mode 100644 index 40e53f3e..00000000 --- a/modules/nf-core/modules/cat/cat/main.nf +++ /dev/null @@ -1,62 +0,0 @@ -process CAT_CAT { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "conda-forge::pigz=2.3.4" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : - 'quay.io/biocontainers/pigz:2.3.4' }" - - input: - tuple val(meta), path(files_in) - - output: - tuple val(meta), path("${prefix}"), emit: file_out - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def file_list = files_in.collect { it.toString() } - - // | input | output | command1 | command2 | - // |-----------|------------|----------|----------| - // | gzipped | gzipped | cat | | - // | ungzipped | ungzipped | cat | | - // | gzipped | ungzipped | zcat | | - // | ungzipped | gzipped | cat | pigz | - - // Use input file ending as default - prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" - out_zip = prefix.endsWith('.gz') - in_zip = file_list[0].endsWith('.gz') - command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' - command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : '' - """ - $command1 \\ - $args \\ - ${file_list.join(' ')} \\ - $command2 \\ - > ${prefix} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) - END_VERSIONS - """ - - stub: - def file_list = files_in.collect { it.toString() } - prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" - """ - touch $prefix - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/cat/cat/meta.yml b/modules/nf-core/modules/cat/cat/meta.yml deleted file mode 100644 index 5eeff5a6..00000000 --- a/modules/nf-core/modules/cat/cat/meta.yml +++ /dev/null @@ -1,37 +0,0 @@ -name: cat_cat -description: A module for concatenation of gzipped or uncompressed files -keywords: - - concatenate - - gzip - - cat -tools: - - cat: - description: Just concatenation - homepage: None - documentation: https://man7.org/linux/man-pages/man1/cat.1.html - tool_dev_url: None - licence: ["GPL-3.0-or-later"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - files_in: - type: file - description: List of compressed / uncompressed files - pattern: "*" - -output: - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - file_out: - type: file - description: Concatenated file. Will be gzipped if file_out ends with ".gz" - pattern: "${file_out}" - -authors: - - "@erikrikarddaniel" - - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/samtools/bam2fq/main.nf b/modules/nf-core/modules/samtools/bam2fq/main.nf deleted file mode 100644 index 9301d1d3..00000000 --- a/modules/nf-core/modules/samtools/bam2fq/main.nf +++ /dev/null @@ -1,56 +0,0 @@ -process SAMTOOLS_BAM2FQ { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" - - input: - tuple val(meta), path(inputbam) - val split - - output: - tuple val(meta), path("*.fq.gz"), emit: reads - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - - if (split){ - """ - samtools \\ - bam2fq \\ - $args \\ - -@ $task.cpus \\ - -1 ${prefix}_1.fq.gz \\ - -2 ${prefix}_2.fq.gz \\ - -0 ${prefix}_other.fq.gz \\ - -s ${prefix}_singleton.fq.gz \\ - $inputbam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ - } else { - """ - samtools \\ - bam2fq \\ - $args \\ - -@ $task.cpus \\ - $inputbam | gzip --no-name > ${prefix}_interleaved.fq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ - } -} diff --git a/modules/nf-core/modules/samtools/bam2fq/meta.yml b/modules/nf-core/modules/samtools/bam2fq/meta.yml deleted file mode 100644 index 319a60cf..00000000 --- a/modules/nf-core/modules/samtools/bam2fq/meta.yml +++ /dev/null @@ -1,55 +0,0 @@ -name: samtools_bam2fq -description: | - The module uses bam2fq method from samtools to - convert a SAM, BAM or CRAM file to FASTQ format -keywords: - - bam2fq - - samtools - - fastq -tools: - - samtools: - description: Tools for dealing with SAM, BAM and CRAM files - homepage: None - documentation: http://www.htslib.org/doc/1.1/samtools.html - tool_dev_url: None - doi: "" - licence: ["MIT"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - inputbam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - split: - type: boolean - description: | - TRUE/FALSE value to indicate if reads should be separated into - /1, /2 and if present other, or singleton. - Note: choosing TRUE will generate 4 different files. - Choosing FALSE will produce a single file, which will be interleaved in case - the input contains paired reads. - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - reads: - type: file - description: | - FASTQ files, which will be either a group of 4 files (read_1, read_2, other and singleton) - or a single interleaved .fq.gz file if the user chooses not to split the reads. - pattern: "*.fq.gz" - -authors: - - "@lescai" diff --git a/modules/nf-core/modules/umitools/dedup/main.nf b/modules/nf-core/modules/umitools/dedup/main.nf deleted file mode 100644 index dfcbcf2f..00000000 --- a/modules/nf-core/modules/umitools/dedup/main.nf +++ /dev/null @@ -1,41 +0,0 @@ -process UMITOOLS_DEDUP { - tag "$meta.id" - label "process_medium" - - conda (params.enable_conda ? "bioconda::umi_tools=1.1.2" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.2--py38h4a8c8d9_0' : - 'quay.io/biocontainers/umi_tools:1.1.2--py38h4a8c8d9_0' }" - - input: - tuple val(meta), path(bam), path(bai) - - output: - tuple val(meta), path("*.bam") , emit: bam - tuple val(meta), path("*edit_distance.tsv"), emit: tsv_edit_distance - tuple val(meta), path("*per_umi.tsv") , emit: tsv_per_umi - tuple val(meta), path("*per_position.tsv") , emit: tsv_umi_per_position - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def paired = meta.single_end ? "" : "--paired" - """ - umi_tools \\ - dedup \\ - -I $bam \\ - -S ${prefix}.bam \\ - --output-stats $prefix \\ - $paired \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/umitools/dedup/meta.yml b/modules/nf-core/modules/umitools/dedup/meta.yml deleted file mode 100644 index eee8952f..00000000 --- a/modules/nf-core/modules/umitools/dedup/meta.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: umitools_dedup -description: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read. -keywords: - - umitools - - deduplication -tools: - - umi_tools: - description: > - UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) - and single cell RNA-Seq cell barcodes - documentation: https://umi-tools.readthedocs.io/en/latest/ - license: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: | - BAM file containing reads to be deduplicated via UMIs. - pattern: "*.{bam}" - - bai: - type: file - description: | - BAM index files corresponding to the input BAM file. - pattern: "*.{bai}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM file with deduplicated UMIs. - pattern: "*.{bam}" - - tsv_edit_distance: - type: file - description: Reports the (binned) average edit distance between the UMIs at each position. - pattern: "*edit_distance.tsv" - - tsv_per_umi: - type: file - description: UMI-level summary statistics. - pattern: "*per_umi.tsv" - - tsv_umi_per_position: - type: file - description: Tabulates the counts for unique combinations of UMI and position. - pattern: "*per_position.tsv" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@drpatelh" - - "@grst" - - "@klkeys" diff --git a/modules/nf-core/modules/umitools/extract/main.nf b/modules/nf-core/modules/umitools/extract/main.nf deleted file mode 100644 index 22a405b9..00000000 --- a/modules/nf-core/modules/umitools/extract/main.nf +++ /dev/null @@ -1,55 +0,0 @@ -process UMITOOLS_EXTRACT { - tag "$meta.id" - label "process_low" - - conda (params.enable_conda ? "bioconda::umi_tools=1.1.2" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.2--py38h4a8c8d9_0' : - 'quay.io/biocontainers/umi_tools:1.1.2--py38h4a8c8d9_0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.fastq.gz"), emit: reads - tuple val(meta), path("*.log") , emit: log - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - """ - umi_tools \\ - extract \\ - -I $reads \\ - -S ${prefix}.umi_extract.fastq.gz \\ - $args \\ - > ${prefix}.umi_extract.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') - END_VERSIONS - """ - } else { - """ - umi_tools \\ - extract \\ - -I ${reads[0]} \\ - --read2-in=${reads[1]} \\ - -S ${prefix}.umi_extract_1.fastq.gz \\ - --read2-out=${prefix}.umi_extract_2.fastq.gz \\ - $args \\ - > ${prefix}.umi_extract.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') - END_VERSIONS - """ - } -} diff --git a/modules/nf-core/modules/umitools/extract/meta.yml b/modules/nf-core/modules/umitools/extract/meta.yml deleted file mode 100644 index 7fc23f72..00000000 --- a/modules/nf-core/modules/umitools/extract/meta.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: umitools_extract -description: Extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place -keywords: - - umitools - - extract -tools: - - umi_tools: - description: > - UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) - and single cell RNA-Seq cell barcodes - documentation: https://umi-tools.readthedocs.io/en/latest/ - license: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: list - description: | - List of input FASTQ files whose UMIs will be extracted. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: > - Extracted FASTQ files. | - For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | - For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. - pattern: "*.{fastq.gz}" - - log: - type: file - description: Logfile for umi_tools - pattern: "*.{log}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@drpatelh" - - "@grst" diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index 9d0e6b20..7625b752 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -1,6 +1,7 @@ +name: multiqc channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::multiqc=1.17 + - bioconda::multiqc=1.19 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 485b3ba8..1b9f7c43 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda 'modules/nf-core/multiqc/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' : - 'biocontainers/multiqc:1.17--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : + 'biocontainers/multiqc:1.19--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -25,12 +25,14 @@ process MULTIQC { def args = task.ext.args ?: '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' """ multiqc \\ --force \\ $args \\ $config \\ $extra_config \\ + $logo \\ . cat <<-END_VERSIONS > versions.yml @@ -41,7 +43,7 @@ process MULTIQC { stub: """ - touch multiqc_data + mkdir multiqc_data touch multiqc_plots touch multiqc_report.html diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index a61223ed..45a9bc35 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,5 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: MultiQC +name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: - QC From fed23d1644862c285001275cc421071c4a0911ec Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 11 Jan 2024 13:02:09 +0000 Subject: [PATCH 054/117] Add missing modules / subworkflows --- modules/nf-core/cat/cat/environment.yml | 7 + modules/nf-core/cat/cat/main.nf | 70 +++++++ modules/nf-core/cat/cat/meta.yml | 36 ++++ modules/nf-core/cat/cat/tests/main.nf.test | 179 ++++++++++++++++++ .../nf-core/cat/cat/tests/main.nf.test.snap | 121 ++++++++++++ .../cat/tests/nextflow_unzipped_zipped.config | 6 + .../cat/tests/nextflow_zipped_unzipped.config | 8 + modules/nf-core/cat/cat/tests/tags.yml | 2 + modules/nf-core/multiqc/tests/main.nf.test | 83 ++++++++ .../nf-core/multiqc/tests/main.nf.test.snap | 21 ++ modules/nf-core/multiqc/tests/tags.yml | 2 + modules/nf-core/samtools/bam2fq/main.nf | 56 ++++++ modules/nf-core/samtools/bam2fq/meta.yml | 55 ++++++ modules/nf-core/umitools/dedup/main.nf | 41 ++++ modules/nf-core/umitools/dedup/meta.yml | 59 ++++++ modules/nf-core/umitools/extract/main.nf | 55 ++++++ modules/nf-core/umitools/extract/meta.yml | 47 +++++ .../fastq_fastqc_umitools_fastp/main.nf | 140 ++++++++++++++ .../fastq_fastqc_umitools_fastp/meta.yml | 128 +++++++++++++ .../tests/main.nf.test | 60 ++++++ .../tests/main.nf.test.snap | 81 ++++++++ .../tests/tags.yml | 2 + 22 files changed, 1259 insertions(+) create mode 100644 modules/nf-core/cat/cat/environment.yml create mode 100644 modules/nf-core/cat/cat/main.nf create mode 100644 modules/nf-core/cat/cat/meta.yml create mode 100644 modules/nf-core/cat/cat/tests/main.nf.test create mode 100644 modules/nf-core/cat/cat/tests/main.nf.test.snap create mode 100644 modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config create mode 100644 modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config create mode 100644 modules/nf-core/cat/cat/tests/tags.yml create mode 100644 modules/nf-core/multiqc/tests/main.nf.test create mode 100644 modules/nf-core/multiqc/tests/main.nf.test.snap create mode 100644 modules/nf-core/multiqc/tests/tags.yml create mode 100644 modules/nf-core/samtools/bam2fq/main.nf create mode 100644 modules/nf-core/samtools/bam2fq/meta.yml create mode 100644 modules/nf-core/umitools/dedup/main.nf create mode 100644 modules/nf-core/umitools/dedup/meta.yml create mode 100644 modules/nf-core/umitools/extract/main.nf create mode 100644 modules/nf-core/umitools/extract/meta.yml create mode 100644 subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf create mode 100644 subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml create mode 100644 subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test create mode 100644 subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap create mode 100644 subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml new file mode 100644 index 00000000..17a04ef2 --- /dev/null +++ b/modules/nf-core/cat/cat/environment.yml @@ -0,0 +1,7 @@ +name: cat_cat +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::pigz=2.3.4 diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf new file mode 100644 index 00000000..970ab760 --- /dev/null +++ b/modules/nf-core/cat/cat/main.nf @@ -0,0 +1,70 @@ +process CAT_CAT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : + 'biocontainers/pigz:2.3.4' }" + + input: + tuple val(meta), path(files_in) + + output: + tuple val(meta), path("${prefix}"), emit: file_out + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def file_list = files_in.collect { it.toString() } + + // | input | output | command1 | command2 | + // |-----------|------------|----------|----------| + // | gzipped | gzipped | cat | | + // | ungzipped | ungzipped | cat | | + // | gzipped | ungzipped | zcat | | + // | ungzipped | gzipped | cat | pigz | + + // Use input file ending as default + prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + out_zip = prefix.endsWith('.gz') + in_zip = file_list[0].endsWith('.gz') + command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' + command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : '' + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + $command1 \\ + $args \\ + ${file_list.join(' ')} \\ + $command2 \\ + > ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def file_list = files_in.collect { it.toString() } + prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + touch $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml new file mode 100644 index 00000000..00a8db0b --- /dev/null +++ b/modules/nf-core/cat/cat/meta.yml @@ -0,0 +1,36 @@ +name: cat_cat +description: A module for concatenation of gzipped or uncompressed files +keywords: + - concatenate + - gzip + - cat +tools: + - cat: + description: Just concatenation + documentation: https://man7.org/linux/man-pages/man1/cat.1.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files_in: + type: file + description: List of compressed / uncompressed files + pattern: "*" +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - file_out: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" +authors: + - "@erikrikarddaniel" + - "@FriederikeHanssen" +maintainers: + - "@erikrikarddaniel" + - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test new file mode 100644 index 00000000..ed5a4f12 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -0,0 +1,179 @@ +nextflow_process { + + name "Test Process CAT_CAT" + script "../main.nf" + process "CAT_CAT" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/cat" + + test("test_cat_name_conflict") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'genome', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") } + ) + } + } + + test("test_cat_unzipped_unzipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + + test("test_cat_zipped_zipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(lines[0..5]).match("test_cat_zipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_zipped_zipped_size")} + ) + } + } + + test("test_cat_zipped_unzipped") { + config './nextflow_zipped_unzipped.config' + + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_cat_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(lines[0..5]).match("test_cat_unzipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_unzipped_zipped_size")} + ) + } + } + + test("test_cat_one_file_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(lines[0..5]).match("test_cat_one_file_unzipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_one_file_unzipped_zipped_size")} + ) + } + } +} + diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap new file mode 100644 index 00000000..423571ba --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -0,0 +1,121 @@ +{ + "test_cat_unzipped_zipped_size": { + "content": [ + 375 + ], + "timestamp": "2023-10-16T14:33:08.049445686" + }, + "test_cat_unzipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "timestamp": "2023-10-16T14:32:18.500464399" + }, + "test_cat_zipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "timestamp": "2023-10-16T14:32:49.642741302" + }, + "test_cat_zipped_zipped_lines": { + "content": [ + [ + "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab", + "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1", + "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1", + "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1" + ] + ], + "timestamp": "2023-10-16T14:32:33.629048645" + }, + "test_cat_unzipped_zipped_lines": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ] + ], + "timestamp": "2023-10-16T14:33:08.038830506" + }, + "test_cat_one_file_unzipped_zipped_lines": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ] + ], + "timestamp": "2023-10-16T14:33:21.39642399" + }, + "test_cat_zipped_zipped_size": { + "content": [ + 78 + ], + "timestamp": "2023-10-16T14:32:33.641869244" + }, + "test_cat_one_file_unzipped_zipped_size": { + "content": [ + 374 + ], + "timestamp": "2023-10-16T14:33:21.4094373" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config new file mode 100644 index 00000000..ec26b0fd --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config @@ -0,0 +1,6 @@ + +process { + withName: CAT_CAT { + ext.prefix = 'cat.txt.gz' + } +} diff --git a/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config new file mode 100644 index 00000000..fbc79783 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config @@ -0,0 +1,8 @@ + +process { + + withName: CAT_CAT { + ext.prefix = 'cat.txt' + } + +} diff --git a/modules/nf-core/cat/cat/tests/tags.yml b/modules/nf-core/cat/cat/tests/tags.yml new file mode 100644 index 00000000..37b578f5 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/tags.yml @@ -0,0 +1,2 @@ +cat/cat: + - modules/nf-core/cat/cat/** diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 00000000..d0438eda --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,83 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + test("sarscov2 single-end [fastqc]") { + + when { + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("sarscov2 single-end [fastqc] [config]") { + + when { + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match() } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 00000000..d37e7304 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,21 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,14e9a2661241abd828f4f06a7b5c222d" + ] + ], + "timestamp": "2024-01-09T23:02:49.911994" + }, + "sarscov2 single-end [fastqc] - stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,14e9a2661241abd828f4f06a7b5c222d" + ] + ], + "timestamp": "2024-01-09T23:03:14.524346" + } +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 00000000..bea6c0d3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/modules/nf-core/samtools/bam2fq/main.nf b/modules/nf-core/samtools/bam2fq/main.nf new file mode 100644 index 00000000..9301d1d3 --- /dev/null +++ b/modules/nf-core/samtools/bam2fq/main.nf @@ -0,0 +1,56 @@ +process SAMTOOLS_BAM2FQ { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(inputbam) + val split + + output: + tuple val(meta), path("*.fq.gz"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + if (split){ + """ + samtools \\ + bam2fq \\ + $args \\ + -@ $task.cpus \\ + -1 ${prefix}_1.fq.gz \\ + -2 ${prefix}_2.fq.gz \\ + -0 ${prefix}_other.fq.gz \\ + -s ${prefix}_singleton.fq.gz \\ + $inputbam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + } else { + """ + samtools \\ + bam2fq \\ + $args \\ + -@ $task.cpus \\ + $inputbam | gzip --no-name > ${prefix}_interleaved.fq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/samtools/bam2fq/meta.yml b/modules/nf-core/samtools/bam2fq/meta.yml new file mode 100644 index 00000000..319a60cf --- /dev/null +++ b/modules/nf-core/samtools/bam2fq/meta.yml @@ -0,0 +1,55 @@ +name: samtools_bam2fq +description: | + The module uses bam2fq method from samtools to + convert a SAM, BAM or CRAM file to FASTQ format +keywords: + - bam2fq + - samtools + - fastq +tools: + - samtools: + description: Tools for dealing with SAM, BAM and CRAM files + homepage: None + documentation: http://www.htslib.org/doc/1.1/samtools.html + tool_dev_url: None + doi: "" + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - inputbam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - split: + type: boolean + description: | + TRUE/FALSE value to indicate if reads should be separated into + /1, /2 and if present other, or singleton. + Note: choosing TRUE will generate 4 different files. + Choosing FALSE will produce a single file, which will be interleaved in case + the input contains paired reads. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads: + type: file + description: | + FASTQ files, which will be either a group of 4 files (read_1, read_2, other and singleton) + or a single interleaved .fq.gz file if the user chooses not to split the reads. + pattern: "*.fq.gz" + +authors: + - "@lescai" diff --git a/modules/nf-core/umitools/dedup/main.nf b/modules/nf-core/umitools/dedup/main.nf new file mode 100644 index 00000000..dfcbcf2f --- /dev/null +++ b/modules/nf-core/umitools/dedup/main.nf @@ -0,0 +1,41 @@ +process UMITOOLS_DEDUP { + tag "$meta.id" + label "process_medium" + + conda (params.enable_conda ? "bioconda::umi_tools=1.1.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.2--py38h4a8c8d9_0' : + 'quay.io/biocontainers/umi_tools:1.1.2--py38h4a8c8d9_0' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.bam") , emit: bam + tuple val(meta), path("*edit_distance.tsv"), emit: tsv_edit_distance + tuple val(meta), path("*per_umi.tsv") , emit: tsv_per_umi + tuple val(meta), path("*per_position.tsv") , emit: tsv_umi_per_position + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired = meta.single_end ? "" : "--paired" + """ + umi_tools \\ + dedup \\ + -I $bam \\ + -S ${prefix}.bam \\ + --output-stats $prefix \\ + $paired \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/umitools/dedup/meta.yml b/modules/nf-core/umitools/dedup/meta.yml new file mode 100644 index 00000000..eee8952f --- /dev/null +++ b/modules/nf-core/umitools/dedup/meta.yml @@ -0,0 +1,59 @@ +name: umitools_dedup +description: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read. +keywords: + - umitools + - deduplication +tools: + - umi_tools: + description: > + UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) + and single cell RNA-Seq cell barcodes + documentation: https://umi-tools.readthedocs.io/en/latest/ + license: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: | + BAM file containing reads to be deduplicated via UMIs. + pattern: "*.{bam}" + - bai: + type: file + description: | + BAM index files corresponding to the input BAM file. + pattern: "*.{bai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file with deduplicated UMIs. + pattern: "*.{bam}" + - tsv_edit_distance: + type: file + description: Reports the (binned) average edit distance between the UMIs at each position. + pattern: "*edit_distance.tsv" + - tsv_per_umi: + type: file + description: UMI-level summary statistics. + pattern: "*per_umi.tsv" + - tsv_umi_per_position: + type: file + description: Tabulates the counts for unique combinations of UMI and position. + pattern: "*per_position.tsv" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@drpatelh" + - "@grst" + - "@klkeys" diff --git a/modules/nf-core/umitools/extract/main.nf b/modules/nf-core/umitools/extract/main.nf new file mode 100644 index 00000000..22a405b9 --- /dev/null +++ b/modules/nf-core/umitools/extract/main.nf @@ -0,0 +1,55 @@ +process UMITOOLS_EXTRACT { + tag "$meta.id" + label "process_low" + + conda (params.enable_conda ? "bioconda::umi_tools=1.1.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.2--py38h4a8c8d9_0' : + 'quay.io/biocontainers/umi_tools:1.1.2--py38h4a8c8d9_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.fastq.gz"), emit: reads + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.single_end) { + """ + umi_tools \\ + extract \\ + -I $reads \\ + -S ${prefix}.umi_extract.fastq.gz \\ + $args \\ + > ${prefix}.umi_extract.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') + END_VERSIONS + """ + } else { + """ + umi_tools \\ + extract \\ + -I ${reads[0]} \\ + --read2-in=${reads[1]} \\ + -S ${prefix}.umi_extract_1.fastq.gz \\ + --read2-out=${prefix}.umi_extract_2.fastq.gz \\ + $args \\ + > ${prefix}.umi_extract.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/umitools/extract/meta.yml b/modules/nf-core/umitools/extract/meta.yml new file mode 100644 index 00000000..7fc23f72 --- /dev/null +++ b/modules/nf-core/umitools/extract/meta.yml @@ -0,0 +1,47 @@ +name: umitools_extract +description: Extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place +keywords: + - umitools + - extract +tools: + - umi_tools: + description: > + UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) + and single cell RNA-Seq cell barcodes + documentation: https://umi-tools.readthedocs.io/en/latest/ + license: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: list + description: | + List of input FASTQ files whose UMIs will be extracted. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: > + Extracted FASTQ files. | + For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | + For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. + pattern: "*.{fastq.gz}" + - log: + type: file + description: Logfile for umi_tools + pattern: "*.{log}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@drpatelh" + - "@grst" diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf new file mode 100644 index 00000000..3dbb27ea --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf @@ -0,0 +1,140 @@ +// +// Read QC, UMI extraction and trimming +// + +include { FASTQC as FASTQC_RAW } from '../../../modules/nf-core/fastqc/main' +include { FASTQC as FASTQC_TRIM } from '../../../modules/nf-core/fastqc/main' +include { UMITOOLS_EXTRACT } from '../../../modules/nf-core/umitools/extract/main' +include { FASTP } from '../../../modules/nf-core/fastp/main' + +// +// Function that parses fastp json output file to get total number of reads after trimming +// +import groovy.json.JsonSlurper + +def getFastpReadsAfterFiltering(json_file) { + def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('summary') + return json['after_filtering']['total_reads'].toLong() +} + +workflow FASTQ_FASTQC_UMITOOLS_FASTP { + take: + reads // channel: [ val(meta), [ reads ] ] + skip_fastqc // boolean: true/false + with_umi // boolean: true/false + skip_umi_extract // boolean: true/false + umi_discard_read // integer: 0, 1 or 2 + skip_trimming // boolean: true/false + adapter_fasta // file: adapter.fasta + save_trimmed_fail // boolean: true/false + save_merged // boolean: true/false + min_trimmed_reads // integer: > 0 + + main: + ch_versions = Channel.empty() + fastqc_raw_html = Channel.empty() + fastqc_raw_zip = Channel.empty() + if (!skip_fastqc) { + FASTQC_RAW ( + reads + ) + fastqc_raw_html = FASTQC_RAW.out.html + fastqc_raw_zip = FASTQC_RAW.out.zip + ch_versions = ch_versions.mix(FASTQC_RAW.out.versions.first()) + } + + umi_reads = reads + umi_log = Channel.empty() + if (with_umi && !skip_umi_extract) { + UMITOOLS_EXTRACT ( + reads + ) + umi_reads = UMITOOLS_EXTRACT.out.reads + umi_log = UMITOOLS_EXTRACT.out.log + ch_versions = ch_versions.mix(UMITOOLS_EXTRACT.out.versions.first()) + + // Discard R1 / R2 if required + if (umi_discard_read in [1,2]) { + UMITOOLS_EXTRACT + .out + .reads + .map { + meta, reads -> + meta.single_end ? [ meta, reads ] : [ meta + [single_end: true], reads[umi_discard_read % 2] ] + } + .set { umi_reads } + } + } + + trim_reads = umi_reads + trim_json = Channel.empty() + trim_html = Channel.empty() + trim_log = Channel.empty() + trim_reads_fail = Channel.empty() + trim_reads_merged = Channel.empty() + fastqc_trim_html = Channel.empty() + fastqc_trim_zip = Channel.empty() + trim_read_count = Channel.empty() + if (!skip_trimming) { + FASTP ( + umi_reads, + adapter_fasta, + save_trimmed_fail, + save_merged + ) + trim_json = FASTP.out.json + trim_html = FASTP.out.html + trim_log = FASTP.out.log + trim_reads_fail = FASTP.out.reads_fail + trim_reads_merged = FASTP.out.reads_merged + ch_versions = ch_versions.mix(FASTP.out.versions.first()) + + // + // Filter FastQ files based on minimum trimmed read count after adapter trimming + // + FASTP + .out + .reads + .join(trim_json) + .map { meta, reads, json -> [ meta, reads, getFastpReadsAfterFiltering(json) ] } + .set { ch_num_trimmed_reads } + + ch_num_trimmed_reads + .filter { meta, reads, num_reads -> num_reads >= min_trimmed_reads.toLong() } + .map { meta, reads, num_reads -> [ meta, reads ] } + .set { trim_reads } + + ch_num_trimmed_reads + .map { meta, reads, num_reads -> [ meta, num_reads ] } + .set { trim_read_count } + + if (!skip_fastqc) { + FASTQC_TRIM ( + trim_reads + ) + fastqc_trim_html = FASTQC_TRIM.out.html + fastqc_trim_zip = FASTQC_TRIM.out.zip + ch_versions = ch_versions.mix(FASTQC_TRIM.out.versions.first()) + } + } + + emit: + reads = trim_reads // channel: [ val(meta), [ reads ] ] + + fastqc_raw_html // channel: [ val(meta), [ html ] ] + fastqc_raw_zip // channel: [ val(meta), [ zip ] ] + + umi_log // channel: [ val(meta), [ log ] ] + + trim_json // channel: [ val(meta), [ json ] ] + trim_html // channel: [ val(meta), [ html ] ] + trim_log // channel: [ val(meta), [ log ] ] + trim_reads_fail // channel: [ val(meta), [ fastq.gz ] ] + trim_reads_merged // channel: [ val(meta), [ fastq.gz ] ] + trim_read_count // channel: [ val(meta), val(count) ] + + fastqc_trim_html // channel: [ val(meta), [ html ] ] + fastqc_trim_zip // channel: [ val(meta), [ zip ] ] + + versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml new file mode 100644 index 00000000..220e8db1 --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml @@ -0,0 +1,128 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +# yaml-language-server: $schema=yaml-schema.json +name: "fastq_fastqc_umitools_fastp" +description: Read QC, UMI extraction and trimming +keywords: + - fastq + - fastqc + - qc + - UMI + - trimming + - fastp +components: + - fastqc + - umitools/extract + - fastp +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - skip_fastqc: + type: boolean + description: | + Skip fastqc process + - with_umi: + type: boolean + description: | + With or without umi detection + - skip_umi_extract: + type: boolean + description: | + With or without umi extrection + - umi_discard_read: + type: integer + description: | + Discard R1 / R2 if required + - skip_trimming: + type: boolean + description: | + Allows to skip trimgalore execution + - adapter_fasta: + type: file + description: | + Fasta file of adapter sequences + - save_trimmed_fail: + type: boolean + description: | + Save trimmed fastqs of failed samples + - save_merged: + type: boolean + description: | + Save merged fastqs + - min_trimmed_reads: + type: integer + description: | + Inputs with fewer than this reads will be filtered out of the "reads" output channel +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - reads: + type: file + description: > + Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | + + + + For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. + pattern: "*.{fastq.gz}" + - fastqc_html: + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - fastqc_zip: + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - log: + type: file + description: Logfile for umi_tools + pattern: "*.{log}" + - trim_json: + type: file + description: FastP Trimming report + pattern: "*.{fastp.json}" + - trim_html: + type: file + description: FastP Trimming report + pattern: "*.{fastp.html}" + - log: + type: file + description: Logfile FastP + pattern: "*.{fastp.log}" + - trim_reads_fail: + type: file + description: Trimmed fastq files failing QC + pattern: "*.{fastq.gz}" + - trim_reads_merged: + type: file + description: Trimmed and merged fastq files + pattern: "*.{fastq.gz}" + - trim_read_count: + type: integer + description: Number of reads after trimming + - fastqc_trim_html: + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - fastqc_trim_zip: + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@robsyme" +maintainers: + - "@robsyme" diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test new file mode 100644 index 00000000..cdd73984 --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test @@ -0,0 +1,60 @@ +nextflow_workflow { + + name "Test Workflow FASTQ_FASTQC_UMITOOLS_FASTP" + script "../main.nf" + workflow "FASTQ_FASTQC_UMITOOLS_FASTP" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_fastqc_umitools_fastp" + tag "fastq_fastqc_umitools_fastp" + tag "fastqc" + tag "umitools/extract" + tag "fastp" + + + test("sarscov2 paired-end [fastq]") { + + when { + workflow { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = false // skip_fastqc + input[2] = false // with_umi + input[3] = false // skip_umi_extract + input[4] = 1 // umi_discard_read + input[5] = false // skip_trimming + input[6] = [] // adapter_fasta + input[7] = false // save_trimmed_fail + input[8] = false // save_merged + input[9] = 1 // min_trimmed_reads + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.reads).match("reads") }, + { assert snapshot(workflow.out.umi_log).match("umi_log") }, + { assert snapshot(workflow.out.trim_json).match("trim_json") }, + { assert snapshot(workflow.out.trim_reads_fail).match("trim_reads_fail") }, + { assert snapshot(workflow.out.trim_reads_merged).match("trim_reads_merged") }, + { assert snapshot(workflow.out.trim_read_count).match("trim_read_count") }, + { assert snapshot(workflow.out.versions).match("versions") }, + + { assert workflow.out.fastqc_raw_html }, + { assert workflow.out.fastqc_raw_zip }, + { assert workflow.out.trim_html }, + { assert workflow.out.trim_log }, + { assert workflow.out.fastqc_trim_html }, + { assert workflow.out.fastqc_trim_zip } + ) + } + } +} diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap new file mode 100644 index 00000000..38a65aeb --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap @@ -0,0 +1,81 @@ +{ + "trim_reads_merged": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-26T02:28:26.26920982" + }, + "trim_reads_fail": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-26T02:28:26.25861515" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", + "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0", + "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e" + ] + ], + "timestamp": "2023-11-26T02:28:26.30891403" + }, + "trim_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,1e0f8e27e71728e2b63fc64086be95cd" + ] + ] + ], + "timestamp": "2023-11-26T02:28:26.24768259" + }, + "reads": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7", + "test_2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39" + ] + ] + ] + ], + "timestamp": "2023-12-04T11:30:32.061644815" + }, + "umi_log": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-26T02:28:26.238536" + }, + "trim_read_count": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + 198 + ] + ] + ], + "timestamp": "2023-11-26T02:28:26.27984169" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml new file mode 100644 index 00000000..84a4b567 --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/fastq_fastqc_umitools_fastp: + - subworkflows/nf-core/fastq_fastqc_umitools_fastp/** From be1590056b38ad9744bd73d38f1fc45413693afd Mon Sep 17 00:00:00 2001 From: Fabian Hausmann Date: Thu, 11 Jan 2024 14:06:44 +0100 Subject: [PATCH 055/117] Add issue to changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 47cb7b72..a966783d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [dev](https://github.com/nf-core/smrnaseq/branch/dev) -- _nothing yet done_ +- [[#299]](https://github.com/nf-core/smrnaseq/issues/299) - Bugfix for missing inputs in BAM stats (`genome_quant.r`) ## [v2.2.4](https://github.com/nf-core/smrnaseq/releases/tag/2.2.4) - 2023-11-03 From 1d4fd076d9a2e1382e847e42156196f6044f20fc Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 11 Jan 2024 13:07:25 +0000 Subject: [PATCH 056/117] Bringing this to 2024 --- .github/CONTRIBUTING.md | 3 + .github/PULL_REQUEST_TEMPLATE.md | 1 + .github/workflows/linting.yml | 12 +- assets/multiqc_config.yml | 4 +- lib/NfcoreTemplate.groovy | 32 +- modules.json | 38 +-- modules/nf-core/cat/fastq/environment.yml | 1 + modules/nf-core/cat/fastq/main.nf | 2 +- .../dumpsoftwareversions/environment.yml | 3 +- .../custom/dumpsoftwareversions/main.nf | 6 +- .../custom/dumpsoftwareversions/meta.yml | 2 +- .../dumpsoftwareversions/tests/main.nf.test | 7 +- .../tests/main.nf.test.snap | 50 +-- modules/nf-core/fastqc/environment.yml | 1 + modules/nf-core/fastqc/main.nf | 6 +- modules/nf-core/fastqc/tests/main.nf.test | 203 +++++++++++- .../nf-core/fastqc/tests/main.nf.test.snap | 12 +- .../nf-core/samtools/bam2fq/environment.yml | 7 + modules/nf-core/samtools/bam2fq/main.nf | 6 +- modules/nf-core/samtools/bam2fq/meta.yml | 8 +- .../samtools/bam2fq/tests/main.nf.test | 71 ++++ .../samtools/bam2fq/tests/main.nf.test.snap | 49 +++ .../samtools/bam2fq/tests/nextflow.config | 3 + .../nf-core/samtools/bam2fq/tests/tags.yml | 2 + .../nf-core/samtools/flagstat/environment.yml | 3 +- modules/nf-core/samtools/flagstat/main.nf | 6 +- .../samtools/flagstat/tests/main.nf.test | 36 +++ .../samtools/flagstat/tests/main.nf.test.snap | 16 + .../nf-core/samtools/flagstat/tests/tags.yml | 2 + .../nf-core/samtools/idxstats/environment.yml | 3 +- modules/nf-core/samtools/idxstats/main.nf | 6 +- .../samtools/idxstats/tests/main.nf.test | 36 +++ .../samtools/idxstats/tests/main.nf.test.snap | 16 + .../nf-core/samtools/idxstats/tests/tags.yml | 2 + .../nf-core/samtools/index/environment.yml | 3 +- modules/nf-core/samtools/index/main.nf | 6 +- .../samtools/index/tests/csi.nextflow.config | 7 + .../nf-core/samtools/index/tests/main.nf.test | 87 +++++ .../samtools/index/tests/main.nf.test.snap | 28 ++ modules/nf-core/samtools/index/tests/tags.yml | 2 + modules/nf-core/samtools/sort/environment.yml | 3 +- modules/nf-core/samtools/sort/main.nf | 6 +- .../nf-core/samtools/sort/tests/main.nf.test | 5 +- .../samtools/sort/tests/main.nf.test.snap | 19 +- .../nf-core/samtools/stats/environment.yml | 3 +- modules/nf-core/samtools/stats/main.nf | 6 +- .../nf-core/samtools/stats/tests/main.nf.test | 2 +- .../samtools/stats/tests/main.nf.test.snap | 20 +- .../nf-core/umitools/dedup/environment.yml | 7 + modules/nf-core/umitools/dedup/main.nf | 43 ++- modules/nf-core/umitools/dedup/meta.yml | 82 +++-- .../nf-core/umitools/extract/environment.yml | 7 + modules/nf-core/umitools/extract/main.nf | 13 +- modules/nf-core/umitools/extract/meta.yml | 57 ++-- .../umitools/extract/tests/main.nf.test | 35 ++ .../umitools/extract/tests/main.nf.test.snap | 10 + .../umitools/extract/tests/nextflow.config | 9 + .../nf-core/umitools/extract/tests/tags.yml | 2 + .../tests/main.nf.test | 14 +- .../tests/main.nf.test.snap | 302 +++++------------- .../bam_sort_stats_samtools/tests/tags.yml | 2 +- .../bam_stats_samtools/tests/main.nf.test | 102 ++++++ .../tests/main.nf.test.snap | 128 ++++++++ .../nf-core/bam_stats_samtools/tests/tags.yml | 2 + 64 files changed, 1225 insertions(+), 442 deletions(-) create mode 100644 modules/nf-core/samtools/bam2fq/environment.yml create mode 100644 modules/nf-core/samtools/bam2fq/tests/main.nf.test create mode 100644 modules/nf-core/samtools/bam2fq/tests/main.nf.test.snap create mode 100644 modules/nf-core/samtools/bam2fq/tests/nextflow.config create mode 100644 modules/nf-core/samtools/bam2fq/tests/tags.yml create mode 100644 modules/nf-core/samtools/flagstat/tests/main.nf.test create mode 100644 modules/nf-core/samtools/flagstat/tests/main.nf.test.snap create mode 100644 modules/nf-core/samtools/flagstat/tests/tags.yml create mode 100644 modules/nf-core/samtools/idxstats/tests/main.nf.test create mode 100644 modules/nf-core/samtools/idxstats/tests/main.nf.test.snap create mode 100644 modules/nf-core/samtools/idxstats/tests/tags.yml create mode 100644 modules/nf-core/samtools/index/tests/csi.nextflow.config create mode 100644 modules/nf-core/samtools/index/tests/main.nf.test create mode 100644 modules/nf-core/samtools/index/tests/main.nf.test.snap create mode 100644 modules/nf-core/samtools/index/tests/tags.yml create mode 100644 modules/nf-core/umitools/dedup/environment.yml create mode 100644 modules/nf-core/umitools/extract/environment.yml create mode 100644 modules/nf-core/umitools/extract/tests/main.nf.test create mode 100644 modules/nf-core/umitools/extract/tests/main.nf.test.snap create mode 100644 modules/nf-core/umitools/extract/tests/nextflow.config create mode 100644 modules/nf-core/umitools/extract/tests/tags.yml create mode 100644 subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test create mode 100644 subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap create mode 100644 subworkflows/nf-core/bam_stats_samtools/tests/tags.yml diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 4d09370b..bd134c88 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -27,6 +27,9 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests +You can optionally test your changes by running the pipeline locally. Then it is recommended to use the `debug` profile to +receive warnings about process selectors and other debug info. Example: `nextflow run . -profile debug,test,docker --outdir `. + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index e278390b..3cdbf2b7 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -19,6 +19,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/smrn - [ ] If necessary, also make a PR on the nf-core/smrnaseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index b8bdd214..905c58e4 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,9 +14,9 @@ jobs: EditorConfig: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install editorconfig-checker run: npm install -g editorconfig-checker @@ -27,9 +27,9 @@ jobs: Prettier: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install Prettier run: npm install -g prettier @@ -40,7 +40,7 @@ jobs: PythonBlack: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Check code lints with Black uses: psf/black@stable @@ -71,7 +71,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 74c7de72..e953a059 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,5 @@ report_comment: > - This report has been generated by the nf-core/smrnaseq - analysis pipeline. For information about how to interpret these results, please see the - documentation. + This report has been generated by the nf-core/smrnaseq analysis pipeline. For information about how to interpret these results, please see the documentation. report_section_order: "nf-core-smrnaseq-methods-description": order: -1000 diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 01b8653d..e248e4c3 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -4,6 +4,7 @@ import org.yaml.snakeyaml.Yaml import groovy.json.JsonOutput +import nextflow.extension.FilesEx class NfcoreTemplate { @@ -141,12 +142,14 @@ class NfcoreTemplate { try { if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } [ 'sendmail', '-t' ].execute() << sendmail_html log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" } catch (all) { // Catch failures and try with plaintext def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { + if ( mqc_report != null && mqc_report.size() <= max_multiqc_email_size.toBytes() ) { mail_cmd += [ '-A', mqc_report ] } mail_cmd.execute() << email_html @@ -155,14 +158,16 @@ class NfcoreTemplate { } // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") + FilesEx.copyTo(output_hf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() } // @@ -227,15 +232,14 @@ class NfcoreTemplate { // Dump pipeline parameters in a json file // public static void dump_parameters(workflow, params) { - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def output_pf = new File(output_d, "params_${timestamp}.json") + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") def jsonStr = JsonOutput.toJson(params) - output_pf.text = JsonOutput.prettyPrint(jsonStr) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${params.outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() } // diff --git a/modules.json b/modules.json index 4a236584..56255242 100644 --- a/modules.json +++ b/modules.json @@ -12,12 +12,12 @@ }, "cat/fastq": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", "installed_by": ["modules"] }, "fastp": { @@ -27,7 +27,7 @@ }, "fastqc": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "git_sha": "617777a807a1770f73deb38c80004bac06807eef", "installed_by": ["fastq_fastqc_umitools_fastp", "modules"] }, "multiqc": { @@ -37,42 +37,42 @@ }, "samtools/bam2fq": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", "installed_by": ["modules"] }, "samtools/flagstat": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", - "installed_by": ["modules", "bam_stats_samtools"] + "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "installed_by": ["bam_stats_samtools", "modules"] }, "samtools/idxstats": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", - "installed_by": ["modules", "bam_stats_samtools"] + "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "installed_by": ["bam_stats_samtools", "modules"] }, "samtools/index": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", - "installed_by": ["modules", "bam_sort_stats_samtools"] + "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "installed_by": ["bam_sort_stats_samtools", "modules"] }, "samtools/sort": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", - "installed_by": ["modules", "bam_sort_stats_samtools"] + "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "installed_by": ["bam_sort_stats_samtools", "modules"] }, "samtools/stats": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", - "installed_by": ["modules", "bam_stats_samtools"] + "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "installed_by": ["bam_stats_samtools", "modules"] }, "umitools/dedup": { "branch": "master", - "git_sha": "9d489648a7425fa18d8bed18935442c104d8733c", + "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", "installed_by": ["modules"] }, "umitools/extract": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", "installed_by": ["fastq_fastqc_umitools_fastp", "modules"] } } @@ -81,13 +81,13 @@ "nf-core": { "bam_sort_stats_samtools": { "branch": "master", - "git_sha": "7c8eeb2b37a6c6d3ffba0aef55ff60c8718c0ba6", + "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", "installed_by": ["subworkflows"] }, "bam_stats_samtools": { "branch": "master", - "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": ["subworkflows", "bam_sort_stats_samtools"] + "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "installed_by": ["bam_sort_stats_samtools", "subworkflows"] }, "fastq_fastqc_umitools_fastp": { "branch": "master", diff --git a/modules/nf-core/cat/fastq/environment.yml b/modules/nf-core/cat/fastq/environment.yml index 222b301f..bff93add 100644 --- a/modules/nf-core/cat/fastq/environment.yml +++ b/modules/nf-core/cat/fastq/environment.yml @@ -1,3 +1,4 @@ +name: cat_fastq channels: - conda-forge - bioconda diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf index b75a2e73..3d963784 100644 --- a/modules/nf-core/cat/fastq/main.nf +++ b/modules/nf-core/cat/fastq/main.nf @@ -2,7 +2,7 @@ process CAT_FASTQ { tag "$meta.id" label 'process_single' - conda 'modules/nf-core/cat/fastq/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 'nf-core/ubuntu:20.04' }" diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml index 7ca22161..9b3272bc 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/environment.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -1,6 +1,7 @@ +name: custom_dumpsoftwareversions channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::multiqc=1.15 + - bioconda::multiqc=1.19 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index 60a19e0e..f2187611 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda 'modules/nf-core/custom/dumpsoftwareversions/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : - 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : + 'biocontainers/multiqc:1.19--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml index 9414c32d..5f15a5fd 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: custom_dumpsoftwareversions description: Custom module used to dump software versions within the nf-core pipeline template keywords: diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test index eec1db10..b1e1630b 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -31,7 +31,12 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.versions, + file(process.out.mqc_yml[0]).readLines()[0..10], + file(process.out.yml[0]).readLines()[0..7] + ).match() + } ) } } diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap index 8713b921..5f59a936 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -1,27 +1,33 @@ { "Should run without failures": { "content": [ - { - "0": [ - "software_versions.yml:md5,a027f820f30b8191a20ca16465daaf37" - ], - "1": [ - "software_versions_mqc.yml:md5,ee4a1d028ad29987f9ac511f4668f17c" - ], - "2": [ - "versions.yml:md5,f47ebd22aba1dd987b7e5d5247b766c3" - ], - "mqc_yml": [ - "software_versions_mqc.yml:md5,ee4a1d028ad29987f9ac511f4668f17c" - ], - "versions": [ - "versions.yml:md5,f47ebd22aba1dd987b7e5d5247b766c3" - ], - "yml": [ - "software_versions.yml:md5,a027f820f30b8191a20ca16465daaf37" - ] - } + [ + "versions.yml:md5,76d454d92244589d32455833f7c1ba6d" + ], + [ + "data: \"\\n\\n \\n \\n \\n \\n \\n \\n \\n\\", + " \\n\\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n \\n \\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n\\n\\n \\n\\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\" + ], + [ + "CUSTOM_DUMPSOFTWAREVERSIONS:", + " python: 3.11.7", + " yaml: 5.4.1", + "TOOL1:", + " tool1: 0.11.9", + "TOOL2:", + " tool2: '1.9'", + "Workflow:" + ] ], - "timestamp": "2023-10-11T17:10:02.930699" + "timestamp": "2024-01-09T23:01:18.710682" } -} +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml index f52a53a0..1787b38a 100644 --- a/modules/nf-core/fastqc/environment.yml +++ b/modules/nf-core/fastqc/environment.yml @@ -1,3 +1,4 @@ +name: fastqc channels: - conda-forge - bioconda diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 5def8818..9e19a74c 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -2,7 +2,7 @@ process FASTQC { tag "$meta.id" label 'process_medium' - conda 'modules/nf-core/fastqc/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" @@ -37,7 +37,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ @@ -49,7 +49,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ } diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test index 6437a144..ad9bc54f 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -3,23 +3,21 @@ nextflow_process { name "Test Process FASTQC" script "../main.nf" process "FASTQC" + tag "modules" tag "modules_nfcore" tag "fastqc" - test("Single-Read") { + test("sarscov2 single-end [fastq]") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = [ - [ id: 'test', single_end:true ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) - ] + [ id: 'test', single_end:true ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] ] """ } @@ -28,14 +26,195 @@ nextflow_process { then { assertAll ( { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. // looks like this:
Mon 2 Oct 2023
test.gz
// https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 - { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" }, - { assert path(process.out.html.get(0).get(1)).getText().contains("") }, - { assert snapshot(process.out.versions).match("versions") }, - { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" } + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 paired-end [fastq]") { + + when { + process { + """ + input[0] = [ + [id: 'test', single_end: false], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("") }, + { assert path(process.out.html[0][1][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 interleaved [fastq]") { + + when { + process { + """ + input[0] = [ + [id: 'test', single_end: false], // meta map + file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = [ + [id: 'test', single_end: false], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("versions") } ) } } + + test("sarscov2 multiple [fastq]") { + + when { + process { + """ + input[0] = [ + [id: 'test', single_end: false], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, + { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, + { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("") }, + { assert path(process.out.html[0][1][1]).text.contains("") }, + { assert path(process.out.html[0][1][2]).text.contains("") }, + { assert path(process.out.html[0][1][3]).text.contains("") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 custom_prefix") { + + when { + process { + """ + input[0] = [ + [ id:'mysample', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 single-end [fastq] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id: 'test', single_end:true ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.html.collect { file(it[1]).getName() } + + process.out.zip.collect { file(it[1]).getName() } + + process.out.versions ).match() } + ) + } + } + } diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap index 636a32ce..5ef5afbd 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test.snap +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -1,10 +1,20 @@ { + "sarscov2 single-end [fastq] - stub": { + "content": [ + [ + "test.html", + "test.zip", + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "timestamp": "2023-12-29T02:48:05.126117287" + }, "versions": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], - "timestamp": "2023-10-09T23:40:54+0000" + "timestamp": "2023-12-29T02:46:49.507942667" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/bam2fq/environment.yml b/modules/nf-core/samtools/bam2fq/environment.yml new file mode 100644 index 00000000..5297496f --- /dev/null +++ b/modules/nf-core/samtools/bam2fq/environment.yml @@ -0,0 +1,7 @@ +name: samtools_bam2fq +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.18 diff --git a/modules/nf-core/samtools/bam2fq/main.nf b/modules/nf-core/samtools/bam2fq/main.nf index 9301d1d3..55ffd0cf 100644 --- a/modules/nf-core/samtools/bam2fq/main.nf +++ b/modules/nf-core/samtools/bam2fq/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_BAM2FQ { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : + 'biocontainers/samtools:1.18--h50ea8bc_1' }" input: tuple val(meta), path(inputbam) diff --git a/modules/nf-core/samtools/bam2fq/meta.yml b/modules/nf-core/samtools/bam2fq/meta.yml index 319a60cf..7769046b 100644 --- a/modules/nf-core/samtools/bam2fq/meta.yml +++ b/modules/nf-core/samtools/bam2fq/meta.yml @@ -9,12 +9,8 @@ keywords: tools: - samtools: description: Tools for dealing with SAM, BAM and CRAM files - homepage: None documentation: http://www.htslib.org/doc/1.1/samtools.html - tool_dev_url: None - doi: "" licence: ["MIT"] - input: - meta: type: map @@ -33,7 +29,6 @@ input: Note: choosing TRUE will generate 4 different files. Choosing FALSE will produce a single file, which will be interleaved in case the input contains paired reads. - output: - meta: type: map @@ -50,6 +45,7 @@ output: FASTQ files, which will be either a group of 4 files (read_1, read_2, other and singleton) or a single interleaved .fq.gz file if the user chooses not to split the reads. pattern: "*.fq.gz" - authors: - "@lescai" +maintainers: + - "@lescai" diff --git a/modules/nf-core/samtools/bam2fq/tests/main.nf.test b/modules/nf-core/samtools/bam2fq/tests/main.nf.test new file mode 100644 index 00000000..cd65abbe --- /dev/null +++ b/modules/nf-core/samtools/bam2fq/tests/main.nf.test @@ -0,0 +1,71 @@ +nextflow_process { + + name "Test Process SAMTOOLS_BAM2FQ" + script "../main.nf" + process "SAMTOOLS_BAM2FQ" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/bam2fq" + + config "./nextflow.config" + + test("homo_sapiens - bam, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_umi_converted_bam'], checkIfExists: true) + ] + input[1] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + path(process.out.reads[0][1]).linesGzip[0..6], + process.out.versions + ).match() } + ) + } + + } + + test("homo_sapiens - bam, true") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_umi_converted_bam'], checkIfExists: true) + ] + input[1] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.reads[0][1].collect{ + if(it ==~ /.*(other|singleton)\.fq\.gz$/) { + return file(it).name + } + return path(it).linesGzip[0..6] + }, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/samtools/bam2fq/tests/main.nf.test.snap b/modules/nf-core/samtools/bam2fq/tests/main.nf.test.snap new file mode 100644 index 00000000..1f824503 --- /dev/null +++ b/modules/nf-core/samtools/bam2fq/tests/main.nf.test.snap @@ -0,0 +1,49 @@ +{ + "homo_sapiens - bam, false": { + "content": [ + [ + "@922332/1\tRX:Z:ATTTCAG-TATTATT", + "GAGAGGATCTCGTGTAGAAATTGCTTTGAGCTGTTCTTTGTCATTTTCCCTTAATTCATTGTCTCTAGCTAGTCTGTTACTCTGTAAAATAAAATAATAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTTAAGGTCAGTG", + "+", + "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE versions.yml "${task.process}": - umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') + umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' ) + END_VERSIONS + """ + + stub: + """ + touch ${prefix}.bam + touch ${prefix}.log + touch ${prefix}_edit_distance.tsv + touch ${prefix}_per_umi.tsv + touch ${prefix}_per_position.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' ) END_VERSIONS """ } diff --git a/modules/nf-core/umitools/dedup/meta.yml b/modules/nf-core/umitools/dedup/meta.yml index eee8952f..38d3fd46 100644 --- a/modules/nf-core/umitools/dedup/meta.yml +++ b/modules/nf-core/umitools/dedup/meta.yml @@ -3,57 +3,69 @@ description: Deduplicate reads based on the mapping co-ordinate and the UMI atta keywords: - umitools - deduplication + - dedup tools: - umi_tools: - description: > - UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) - and single cell RNA-Seq cell barcodes - documentation: https://umi-tools.readthedocs.io/en/latest/ - license: ["MIT"] + description: > + UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes + + documentation: https://umi-tools.readthedocs.io/en/latest/ + license: ["MIT"] input: - meta: - type: map - description: | - Groovy Map containing sample information + type: map + description: | + Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - bam: - type: file - description: | - BAM file containing reads to be deduplicated via UMIs. - pattern: "*.{bam}" + type: file + description: | + BAM file containing reads to be deduplicated via UMIs. + pattern: "*.{bam}" - bai: - type: file - description: | - BAM index files corresponding to the input BAM file. - pattern: "*.{bai}" + type: file + description: | + BAM index files corresponding to the input BAM file. + pattern: "*.{bai}" + - get_output_stats: + type: boolean + description: | + Whether or not to generate output stats. output: - meta: - type: map - description: | - Groovy Map containing sample information + type: map + description: | + Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - bam: - type: file - description: BAM file with deduplicated UMIs. - pattern: "*.{bam}" + type: file + description: BAM file with deduplicated UMIs. + pattern: "*.{bam}" + - log: + type: file + description: File with logging information + pattern: "*.{log}" - tsv_edit_distance: - type: file - description: Reports the (binned) average edit distance between the UMIs at each position. - pattern: "*edit_distance.tsv" + type: file + description: Reports the (binned) average edit distance between the UMIs at each position. + pattern: "*edit_distance.tsv" - tsv_per_umi: - type: file - description: UMI-level summary statistics. - pattern: "*per_umi.tsv" + type: file + description: UMI-level summary statistics. + pattern: "*per_umi.tsv" - tsv_umi_per_position: - type: file - description: Tabulates the counts for unique combinations of UMI and position. - pattern: "*per_position.tsv" + type: file + description: Tabulates the counts for unique combinations of UMI and position. + pattern: "*per_position.tsv" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@grst" - "@klkeys" +maintainers: + - "@drpatelh" + - "@grst" + - "@klkeys" diff --git a/modules/nf-core/umitools/extract/environment.yml b/modules/nf-core/umitools/extract/environment.yml new file mode 100644 index 00000000..7d08ac0e --- /dev/null +++ b/modules/nf-core/umitools/extract/environment.yml @@ -0,0 +1,7 @@ +name: umitools_extract +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::umi_tools=1.1.4 diff --git a/modules/nf-core/umitools/extract/main.nf b/modules/nf-core/umitools/extract/main.nf index 22a405b9..4bd79e79 100644 --- a/modules/nf-core/umitools/extract/main.nf +++ b/modules/nf-core/umitools/extract/main.nf @@ -1,11 +1,12 @@ process UMITOOLS_EXTRACT { tag "$meta.id" - label "process_low" + label "process_single" + label "process_long" - conda (params.enable_conda ? "bioconda::umi_tools=1.1.2" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.2--py38h4a8c8d9_0' : - 'quay.io/biocontainers/umi_tools:1.1.2--py38h4a8c8d9_0' }" + 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' : + 'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }" input: tuple val(meta), path(reads) @@ -32,7 +33,7 @@ process UMITOOLS_EXTRACT { cat <<-END_VERSIONS > versions.yml "${task.process}": - umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') + umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' ) END_VERSIONS """ } else { @@ -48,7 +49,7 @@ process UMITOOLS_EXTRACT { cat <<-END_VERSIONS > versions.yml "${task.process}": - umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') + umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' ) END_VERSIONS """ } diff --git a/modules/nf-core/umitools/extract/meta.yml b/modules/nf-core/umitools/extract/meta.yml index 7fc23f72..7695b271 100644 --- a/modules/nf-core/umitools/extract/meta.yml +++ b/modules/nf-core/umitools/extract/meta.yml @@ -1,47 +1,48 @@ name: umitools_extract description: Extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place keywords: - - umitools + - UMI + - barcode - extract + - umitools tools: - umi_tools: - description: > - UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) - and single cell RNA-Seq cell barcodes - documentation: https://umi-tools.readthedocs.io/en/latest/ - license: ["MIT"] + description: > + UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes + documentation: https://umi-tools.readthedocs.io/en/latest/ + license: "MIT" input: - meta: - type: map - description: | - Groovy Map containing sample information + type: map + description: | + Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - reads: - type: list - description: | - List of input FASTQ files whose UMIs will be extracted. + type: list + description: | + List of input FASTQ files whose UMIs will be extracted. output: - meta: - type: map - description: | - Groovy Map containing sample information + type: map + description: | + Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - reads: - type: file - description: > - Extracted FASTQ files. | - For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | - For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. - pattern: "*.{fastq.gz}" + type: file + description: > + Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. + pattern: "*.{fastq.gz}" - log: - type: file - description: Logfile for umi_tools - pattern: "*.{log}" + type: file + description: Logfile for umi_tools + pattern: "*.{log}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test b/modules/nf-core/umitools/extract/tests/main.nf.test new file mode 100644 index 00000000..22242d1d --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process UMITOOLS_EXTRACT" + script "../main.nf" + process "UMITOOLS_EXTRACT" + config "./nextflow.config" + tag "modules_nfcore" + tag "modules" + tag "umitools" + tag "umitools/extract" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } +} \ No newline at end of file diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test.snap b/modules/nf-core/umitools/extract/tests/main.nf.test.snap new file mode 100644 index 00000000..6d5944f1 --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/main.nf.test.snap @@ -0,0 +1,10 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,5a18da2d3a5a4de15e7aaae9082d7abb" + ] + ], + "timestamp": "2023-12-08T09:41:43.540658352" + } +} \ No newline at end of file diff --git a/modules/nf-core/umitools/extract/tests/nextflow.config b/modules/nf-core/umitools/extract/tests/nextflow.config new file mode 100644 index 00000000..c866f5a0 --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: UMITOOLS_EXTRACT { + ext.args = '--bc-pattern="NNNN"' + } + +} diff --git a/modules/nf-core/umitools/extract/tests/tags.yml b/modules/nf-core/umitools/extract/tests/tags.yml new file mode 100644 index 00000000..c3fb23de --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/tags.yml @@ -0,0 +1,2 @@ +umitools/extract: + - modules/nf-core/umitools/extract/** diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test index a8a13f2a..59b749d8 100644 --- a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test @@ -5,7 +5,9 @@ nextflow_workflow { workflow "BAM_SORT_STATS_SAMTOOLS" tag "subworkflows" tag "subworkflows_nfcore" + tag "subworkflows/bam_sort_stats_samtools" tag "bam_sort_stats_samtools" + tag "subworkflows/bam_stats_samtools" tag "bam_stats_samtools" tag "samtools" tag "samtools/index" @@ -35,7 +37,11 @@ nextflow_workflow { then { assertAll( { assert workflow.success}, - { assert snapshot(workflow.out).match()} + { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"}, + { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"}, + { assert snapshot(workflow.out.stats).match("test_bam_sort_stats_samtools_single_end_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_sort_stats_samtools_single_end_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_sort_stats_samtools_single_end_idxstats") } ) } } @@ -61,7 +67,11 @@ nextflow_workflow { then { assertAll( { assert workflow.success}, - { assert snapshot(workflow.out).match()} + { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"}, + { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"}, + { assert snapshot(workflow.out.stats).match("test_bam_sort_stats_samtools_paired_end_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_sort_stats_samtools_paired_end_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_sort_stats_samtools_paired_end_idxstats") } ) } } diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap index 50ffde60..77afbf17 100644 --- a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap @@ -1,236 +1,86 @@ { - "test_bam_sort_stats_samtools_single_end": { + "test_bam_sort_stats_samtools_paired_end_flagstats": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam:md5,2cf8fe8dbba3da7eb4fb251c79f428dc" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam.bai:md5,002488588110dcee464e65f68c4726e8" - ] - ], - "2": [ - - ], - "3": [ - [ - { - "id": "test", - "single_end": false - }, - "test.stats:md5,796f45f791f06291b76329528fae0a54" - ] - ], - "4": [ - [ - { - "id": "test", - "single_end": false - }, - "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" - ] - ], - "5": [ - [ - { - "id": "test", - "single_end": false - }, - "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" - ] - ], - "6": [ - "versions.yml:md5,176f12ceae81f76341e481988c799c15", - "versions.yml:md5,7beadfaf6b22ea0ae6e655b41447803f", - "versions.yml:md5,bfcdd8e2d5151a14dac15a9332d73d52", - "versions.yml:md5,dd8f44a9bfef10555ef1c8cc0267ff9c", - "versions.yml:md5,f2eb7aba102adae159006c9a443c301b" - ], - "bai": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam.bai:md5,002488588110dcee464e65f68c4726e8" - ] - ], - "bam": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam:md5,2cf8fe8dbba3da7eb4fb251c79f428dc" - ] - ], - "csi": [ - - ], - "flagstat": [ - [ - { - "id": "test", - "single_end": false - }, - "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" - ] - ], - "idxstats": [ - [ - { - "id": "test", - "single_end": false - }, - "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" - ] - ], - "stats": [ - [ - { - "id": "test", - "single_end": false - }, - "test.stats:md5,796f45f791f06291b76329528fae0a54" - ] - ], - "versions": [ - "versions.yml:md5,176f12ceae81f76341e481988c799c15", - "versions.yml:md5,7beadfaf6b22ea0ae6e655b41447803f", - "versions.yml:md5,bfcdd8e2d5151a14dac15a9332d73d52", - "versions.yml:md5,dd8f44a9bfef10555ef1c8cc0267ff9c", - "versions.yml:md5,f2eb7aba102adae159006c9a443c301b" + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" ] - } + ] ], - "timestamp": "2023-10-18T09:34:31.989804787" + "timestamp": "2023-10-22T20:25:03.687121177" }, - "test_bam_sort_stats_samtools_paired_end": { + "test_bam_sort_stats_samtools_paired_end_idxstats": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam:md5,81adec7882577c0ad17962599acf7745" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam.bai:md5,9e6427a796975290b1110c9d542ac79d" - ] - ], - "2": [ - - ], - "3": [ - [ - { - "id": "test", - "single_end": false - }, - "test.stats:md5,f3f0e5aad236aae678ac5361b529a664" - ] - ], - "4": [ - [ - { - "id": "test", - "single_end": false - }, - "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" - ] - ], - "5": [ - [ - { - "id": "test", - "single_end": false - }, - "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" - ] - ], - "6": [ - "versions.yml:md5,176f12ceae81f76341e481988c799c15", - "versions.yml:md5,7beadfaf6b22ea0ae6e655b41447803f", - "versions.yml:md5,bfcdd8e2d5151a14dac15a9332d73d52", - "versions.yml:md5,dd8f44a9bfef10555ef1c8cc0267ff9c", - "versions.yml:md5,f2eb7aba102adae159006c9a443c301b" - ], - "bai": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam.bai:md5,9e6427a796975290b1110c9d542ac79d" - ] - ], - "bam": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam:md5,81adec7882577c0ad17962599acf7745" - ] - ], - "csi": [ - - ], - "flagstat": [ - [ - { - "id": "test", - "single_end": false - }, - "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" - ] - ], - "idxstats": [ - [ - { - "id": "test", - "single_end": false - }, - "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" - ] - ], - "stats": [ - [ - { - "id": "test", - "single_end": false - }, - "test.stats:md5,f3f0e5aad236aae678ac5361b529a664" - ] - ], - "versions": [ - "versions.yml:md5,176f12ceae81f76341e481988c799c15", - "versions.yml:md5,7beadfaf6b22ea0ae6e655b41447803f", - "versions.yml:md5,bfcdd8e2d5151a14dac15a9332d73d52", - "versions.yml:md5,dd8f44a9bfef10555ef1c8cc0267ff9c", - "versions.yml:md5,f2eb7aba102adae159006c9a443c301b" + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" ] - } + ] ], - "timestamp": "2023-10-18T09:34:57.682759147" + "timestamp": "2023-10-22T20:25:03.709648916" + }, + "test_bam_sort_stats_samtools_single_end_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,f281507081517414eb1a04b2d9c855b2" + ] + ] + ], + "timestamp": "2023-12-04T11:06:50.951881479" + }, + "test_bam_sort_stats_samtools_paired_end_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,e32e7e49dce1fbe327a89e0fb7bc01b1" + ] + ] + ], + "timestamp": "2023-12-04T11:06:59.253905951" + }, + "test_bam_sort_stats_samtools_single_end_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" + ] + ] + ], + "timestamp": "2023-10-22T20:25:58.451364604" + }, + "test_bam_sort_stats_samtools_single_end_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" + ] + ] + ], + "timestamp": "2023-10-22T20:25:58.416859285" } } \ No newline at end of file diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml b/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml index a8274109..30b69d6a 100644 --- a/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml @@ -1,2 +1,2 @@ -bam_sort_stats_samtools: +subworkflows/bam_sort_stats_samtools: - subworkflows/nf-core/bam_sort_stats_samtools/** diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test new file mode 100644 index 00000000..97210890 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test @@ -0,0 +1,102 @@ +nextflow_workflow { + + name "Test Workflow BAM_STATS_SAMTOOLS" + script "../main.nf" + workflow "BAM_STATS_SAMTOOLS" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "bam_stats_samtools" + tag "subworkflows/bam_stats_samtools" + tag "samtools" + tag "samtools/flagstat" + tag "samtools/idxstats" + tag "samtools/stats" + + test("test_bam_stats_samtools_single_end") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = [ [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true) + ] + input[1] = [ [ id:'genome' ], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out.stats).match("test_bam_stats_samtools_single_end_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_stats_samtools_single_end_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_stats_samtools_single_end_idxstats") } + ) + } + } + + test("test_bam_stats_samtools_paired_end") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = [ [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + ] + input[1] = [ [ id:'genome' ], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.stats).match("test_bam_stats_samtools_paired_end_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_stats_samtools_paired_end_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_stats_samtools_paired_end_idxstats") } + ) + } + } + + test("test_bam_stats_samtools_paired_end_cram") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) + ] + input[1] = [ [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out.stats).match("test_bam_stats_samtools_paired_end_cram_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_stats_samtools_paired_end_cram_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_stats_samtools_paired_end_cram_idxstats") } + ) + } + } + +} diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap new file mode 100644 index 00000000..d3af1376 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap @@ -0,0 +1,128 @@ +{ + "test_bam_stats_samtools_paired_end_cram_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,a53f3d26e2e9851f7d528442bbfe9781" + ] + ] + ], + "timestamp": "2023-11-06T09:31:26.194017574" + }, + "test_bam_stats_samtools_paired_end_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,49e2b43344ff92bc4c02463a58f7ba4a" + ] + ] + ], + "timestamp": "2023-12-04T11:07:13.965061942" + }, + "test_bam_stats_samtools_paired_end_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ] + ], + "timestamp": "2023-11-06T09:31:11.668517251" + }, + "test_bam_stats_samtools_single_end_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" + ] + ] + ], + "timestamp": "2023-11-06T09:26:10.340046381" + }, + "test_bam_stats_samtools_paired_end_cram_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,e179601fa7b8ebce81ac3765206f6c15" + ] + ] + ], + "timestamp": "2023-11-06T09:31:26.207052003" + }, + "test_bam_stats_samtools_single_end_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,5a6667d97806e5002731e9cf23674fad" + ] + ] + ], + "timestamp": "2023-12-04T11:07:06.676820877" + }, + "test_bam_stats_samtools_paired_end_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ] + ], + "timestamp": "2023-11-06T09:31:11.68246157" + }, + "test_bam_stats_samtools_single_end_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" + ] + ] + ], + "timestamp": "2023-11-06T09:26:10.349439801" + }, + "test_bam_stats_samtools_paired_end_cram_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,2cf2fe93596ee3d74f946097b204a629" + ] + ] + ], + "timestamp": "2023-12-04T11:07:22.30295557" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml b/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml new file mode 100644 index 00000000..ec2f2d68 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_stats_samtools: + - subworkflows/nf-core/bam_stats_samtools/** From 285cfd0d12a9f02286052ffbe36e933060a1747f Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 11 Jan 2024 13:30:57 +0000 Subject: [PATCH 057/117] Fixing linting errors --- assets/multiqc_config.yml | 5 +-- modules/nf-core/multiqc/environment.yml | 2 +- modules/nf-core/multiqc/meta.yml | 1 - modules/nf-core/multiqc/tests/main.nf.test | 48 +++++++++++++++------- 4 files changed, 37 insertions(+), 19 deletions(-) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 71a77e0b..11c1b997 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,8 +1,7 @@ report_comment: > - - This report has been generated by the nf-core/smrnaseq + This report has been generated by the nf-core/smrnaseq analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-smrnaseq-methods-description": diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index bc0bdb5b..7625b752 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::multiqc=1.18 + - bioconda::multiqc=1.19 diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index f1aa660e..45a9bc35 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,4 +1,3 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test index c2dad217..d0438eda 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -7,12 +7,9 @@ nextflow_process { tag "modules_nfcore" tag "multiqc" - test("MULTIQC: FASTQC") { + test("sarscov2 single-end [fastqc]") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) @@ -26,20 +23,17 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert path(process.out.report.get(0)).exists() }, - { assert path(process.out.data.get(0)).exists() }, - { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("versions") } ) } } - test("MULTIQC: FASTQC and a config file") { + test("sarscov2 single-end [fastqc] [config]") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) @@ -53,9 +47,35 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert path(process.out.report.get(0)).exists() }, - { assert path(process.out.data.get(0)).exists() }, - { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match() } ) } From e1b232f42c6db1dc701233ab2f7befe31c62aff7 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 11 Jan 2024 13:54:21 +0000 Subject: [PATCH 058/117] Follow the lead, Timmy --- subworkflows/local/umi_dedup.nf | 8 +- .../nf-core/fastqc_umitools_trimgalore.nf | 78 ------------------- workflows/smrnaseq.nf | 40 ++++------ 3 files changed, 19 insertions(+), 107 deletions(-) delete mode 100644 subworkflows/nf-core/fastqc_umitools_trimgalore.nf diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf index 12033274..8712f526 100644 --- a/subworkflows/local/umi_dedup.nf +++ b/subworkflows/local/umi_dedup.nf @@ -1,10 +1,10 @@ -// +// // Deduplicate the UMI reads by mapping them to the complete genome. // include { INDEX_GENOME } from '../../modules/local/bowtie_genome' include { BOWTIE_MAP_SEQ as UMI_MAP_GENOME } from '../../modules/local/bowtie_map_mirna' -include { BAM_SORT_SAMTOOLS } from '../../subworkflows/nf-core/bam_sort_samtools' +include { BAM_SORT_STATS_SAMTOOLS } from '../../subworkflows/nf-core/bam_sort_stats_samtools' include { UMITOOLS_DEDUP } from '../../modules/nf-core/modules/umitools/dedup/main' include { SAMTOOLS_BAM2FQ } from '../../modules/nf-core/modules/samtools/bam2fq/main' include { CAT_CAT } from '../../modules/nf-core/modules/cat/cat/main' @@ -31,7 +31,7 @@ workflow DEDUPLICATE_UMIS { } if (bt_index){ - + UMI_MAP_GENOME ( reads, bt_index.collect() ) ch_versions = ch_versions.mix(UMI_MAP_GENOME.out.versions) @@ -54,7 +54,7 @@ workflow DEDUPLICATE_UMIS { .join(UMI_MAP_GENOME.out.unmapped) .map { meta, file1, file2 -> [meta, [file1, file2]]} .set { ch_cat } - + CAT_CAT ( ch_cat ) ch_dedup_reads = CAT_CAT.out.file_out } diff --git a/subworkflows/nf-core/fastqc_umitools_trimgalore.nf b/subworkflows/nf-core/fastqc_umitools_trimgalore.nf deleted file mode 100644 index ca158e7a..00000000 --- a/subworkflows/nf-core/fastqc_umitools_trimgalore.nf +++ /dev/null @@ -1,78 +0,0 @@ -// -// Read QC, UMI extraction and trimming -// - -nextflow.enable.dsl=2 - -include { FASTQC } from '../../modules/nf-core/modules/fastqc/main' -include { UMITOOLS_EXTRACT } from '../../modules/nf-core/modules/umitools/extract/main' -include { TRIMGALORE } from '../../modules/nf-core/modules/trimgalore/main' - -workflow FASTQC_UMITOOLS_TRIMGALORE { - take: - reads // channel: [ val(meta), [ reads ] ] - skip_fastqc // boolean: true/false - with_umi // boolean: true/false - skip_trimming // boolean: true/false - umi_discard_read // integer: 0, 1 or 2 - - main: - - ch_versions = Channel.empty() - fastqc_html = Channel.empty() - fastqc_zip = Channel.empty() - if (!skip_fastqc) { - FASTQC ( reads ).html.set { fastqc_html } - fastqc_zip = FASTQC.out.zip - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - } - - umi_reads = reads - umi_log = Channel.empty() - if (with_umi) { - UMITOOLS_EXTRACT ( reads ).reads.set { umi_reads } - umi_log = UMITOOLS_EXTRACT.out.log - ch_versions = ch_versions.mix(UMITOOLS_EXTRACT.out.versions.first()) - - // Discard R1 / R2 if required - if (umi_discard_read in [1,2]) { - UMITOOLS_EXTRACT - .out - .reads - .map { meta, reads -> - if (!meta.single_end) { - meta['single_end'] = true - reads = reads[umi_discard_read % 2] - } - return [ meta, reads ] - } - .set { umi_reads } - } - } - - trim_reads = umi_reads - trim_html = Channel.empty() - trim_zip = Channel.empty() - trim_log = Channel.empty() - if (!skip_trimming) { - TRIMGALORE ( umi_reads ).reads.set { trim_reads } - trim_html = TRIMGALORE.out.html - trim_zip = TRIMGALORE.out.zip - trim_log = TRIMGALORE.out.log - ch_versions = ch_versions.mix(TRIMGALORE.out.versions.first()) - } - - emit: - reads = trim_reads // channel: [ val(meta), [ reads ] ] - - fastqc_html // channel: [ val(meta), [ html ] ] - fastqc_zip // channel: [ val(meta), [ zip ] ] - - umi_log // channel: [ val(meta), [ log ] ] - - trim_html // channel: [ val(meta), [ html ] ] - trim_zip // channel: [ val(meta), [ zip ] ] - trim_log // channel: [ val(meta), [ txt ] ] - - versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] -} \ No newline at end of file diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 55f7614b..04529c63 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -64,14 +64,14 @@ if (!params.mirgenedb) { if (params.mirgenedb_gff) { mirna_gtf = file(params.mirgenedb_gff, checkIfExists: true) } else { exit 1, "MirGeneDB gff file not found: ${params.mirgenedb_gff}"} } -include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { FASTQC_UMITOOLS_FASTP } from '../subworkflows/nf-core/fastqc_umitools_trimgalore' -include { DEDUPLICATE_UMIS } from '../subworkflows/local/umi_dedup' -include { CONTAMINANT_FILTER } from '../subworkflows/local/contaminant_filter' -include { MIRNA_QUANT } from '../subworkflows/local/mirna_quant' -include { GENOME_QUANT } from '../subworkflows/local/genome_quant' -include { MIRTRACE } from '../subworkflows/local/mirtrace' -include { MIRDEEP2 } from '../subworkflows/local/mirdeep2' +include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../subworkflows/nf-core/fastq_fastqc_umitools_fastp' +include { DEDUPLICATE_UMIS } from '../subworkflows/local/umi_dedup' +include { CONTAMINANT_FILTER } from '../subworkflows/local/contaminant_filter' +include { MIRNA_QUANT } from '../subworkflows/local/mirna_quant' +include { GENOME_QUANT } from '../subworkflows/local/genome_quant' +include { MIRTRACE } from '../subworkflows/local/mirtrace' +include { MIRDEEP2 } from '../subworkflows/local/mirdeep2' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -132,22 +132,19 @@ workflow SMRNASEQ { ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first().ifEmpty(null)) // - // SUBWORKFLOW: Read QC and trim adapters + // SUBWORKFLOW: Read QC, extract UMI and trim adapters & dedup UMIs if necessary / desired by the user // - // - // SUBWORKFLOW: Read QC, extract UMI and trim adapters - // - FASTQC_UMITOOLS_FASTP ( + FASTQ_FASTQC_UMITOOLS_FASTP ( ch_cat_fastq, params.skip_fastqc || params.skip_qc, params.with_umi, params.skip_trimming, params.umi_discard_read ) - ch_versions = ch_versions.mix(FASTQC_UMITOOLS_FASTP.out.versions) + ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions) - reads_for_mirna = FASTQC_UMITOOLS_FASTP.out.reads + reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads // // SUBWORKFLOW: Deduplicate UMIs by mapping them to the genome @@ -156,8 +153,8 @@ workflow SMRNASEQ { if (fasta){ fasta_ch = file(fasta) DEDUPLICATE_UMIS ( - fasta_ch, - bt_index, + fasta_ch, + bt_index, FASTQC_UMITOOLS_FASTP.out.reads ) reads_for_mirna = DEDUPLICATE_UMIS.out.reads @@ -165,18 +162,11 @@ workflow SMRNASEQ { } } - FASTQC_UMITOOLS_FASTP ( - ch_cat_fastq, - ch_fastp_adapters, - false, - false - ) - ch_versions = ch_versions.mix(FASTQC_FASTP.out.versions) // // SUBWORKFLOW: mirtrace QC // - FASTQC_FASTP.out.adapterseq + FASTQ_FASTQC_UMITOOLS_FASTP.out.adapterseq .join( FASTQC_FASTP.out.reads ) .map { meta, adapterseq, reads -> [adapterseq, meta.id, reads] } .groupTuple() From 2828bd2a05dc1d6282dfaa5f4d7a5be96482ae18 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 11 Jan 2024 14:16:18 +0000 Subject: [PATCH 059/117] Improvements to channel handling --- subworkflows/local/umi_dedup.nf | 7 ++++--- workflows/smrnaseq.nf | 17 ++++++++++++----- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf index 8712f526..5a5a393f 100644 --- a/subworkflows/local/umi_dedup.nf +++ b/subworkflows/local/umi_dedup.nf @@ -5,9 +5,10 @@ include { INDEX_GENOME } from '../../modules/local/bowtie_genome' include { BOWTIE_MAP_SEQ as UMI_MAP_GENOME } from '../../modules/local/bowtie_map_mirna' include { BAM_SORT_STATS_SAMTOOLS } from '../../subworkflows/nf-core/bam_sort_stats_samtools' -include { UMITOOLS_DEDUP } from '../../modules/nf-core/modules/umitools/dedup/main' -include { SAMTOOLS_BAM2FQ } from '../../modules/nf-core/modules/samtools/bam2fq/main' -include { CAT_CAT } from '../../modules/nf-core/modules/cat/cat/main' +include { UMITOOLS_DEDUP } from '../../modules/nf-core/umitools/dedup/main' +include { SAMTOOLS_BAM2FQ } from '../../modules/nf-core/samtools/bam2fq/main' +include { CAT_CAT } from '../../modules/nf-core/cat/cat/main' + workflow DEDUPLICATE_UMIS { take: diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 04529c63..250f1991 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -137,14 +137,21 @@ workflow SMRNASEQ { FASTQ_FASTQC_UMITOOLS_FASTP ( ch_cat_fastq, - params.skip_fastqc || params.skip_qc, + params.skip_fastqc, params.with_umi, + params.skip_umi_extract, + params.umi_discard_read, params.skip_trimming, - params.umi_discard_read + params.umi_discard_read, + params.skip_trimming, + params.adapter_fasta, + params.save_trimmed_fail, + params.save_merged, + params.min_trimmed_reads ) ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions) - reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads + reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_reads // // SUBWORKFLOW: Deduplicate UMIs by mapping them to the genome @@ -155,7 +162,7 @@ workflow SMRNASEQ { DEDUPLICATE_UMIS ( fasta_ch, bt_index, - FASTQC_UMITOOLS_FASTP.out.reads + FASTQC_UMITOOLS_FASTP.out.trim_reads ) reads_for_mirna = DEDUPLICATE_UMIS.out.reads ch_versions = ch_versions.mix(DEDUPLICATE_UMIS.out.versions) @@ -167,7 +174,7 @@ workflow SMRNASEQ { // SUBWORKFLOW: mirtrace QC // FASTQ_FASTQC_UMITOOLS_FASTP.out.adapterseq - .join( FASTQC_FASTP.out.reads ) + .join( FASTQ_FASTQC_UMITOOLS_FASTP.out.reads ) .map { meta, adapterseq, reads -> [adapterseq, meta.id, reads] } .groupTuple() .set { ch_mirtrace_inputs } From ff703f10d4bd122210ac0dbe5e1eac55d0e3cf04 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 12 Jan 2024 08:59:12 +0000 Subject: [PATCH 060/117] Update to latest subworkflow version --- modules.json | 2 +- .../fastq_fastqc_umitools_fastp/main.nf | 14 +++++++++- .../fastq_fastqc_umitools_fastp/meta.yml | 11 ++++---- .../tests/main.nf.test | 1 + .../tests/main.nf.test.snap | 28 ++++++++++++++----- workflows/smrnaseq.nf | 4 +-- 6 files changed, 44 insertions(+), 16 deletions(-) diff --git a/modules.json b/modules.json index 56255242..40eda6cc 100644 --- a/modules.json +++ b/modules.json @@ -91,7 +91,7 @@ }, "fastq_fastqc_umitools_fastp": { "branch": "master", - "git_sha": "3e8b0c1144ccf60b7848efbdc2be285ff20b49ee", + "git_sha": "668185ddcd2d9084c819691c99020360e0f029a0", "installed_by": ["subworkflows"] } } diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf index 3dbb27ea..49570521 100644 --- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf @@ -17,6 +17,11 @@ def getFastpReadsAfterFiltering(json_file) { return json['after_filtering']['total_reads'].toLong() } +def getFastpAdapterSequence(json_file){ + def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('adapter_cutting') + return json['read1_adapter_sequence'] +} + workflow FASTQ_FASTQC_UMITOOLS_FASTP { take: reads // channel: [ val(meta), [ reads ] ] @@ -25,7 +30,7 @@ workflow FASTQ_FASTQC_UMITOOLS_FASTP { skip_umi_extract // boolean: true/false umi_discard_read // integer: 0, 1 or 2 skip_trimming // boolean: true/false - adapter_fasta // file: adapter.fasta + adapter_fasta // file: adapter.fasta save_trimmed_fail // boolean: true/false save_merged // boolean: true/false min_trimmed_reads // integer: > 0 @@ -75,6 +80,8 @@ workflow FASTQ_FASTQC_UMITOOLS_FASTP { fastqc_trim_html = Channel.empty() fastqc_trim_zip = Channel.empty() trim_read_count = Channel.empty() + adapter_seq = Channel.empty() + if (!skip_trimming) { FASTP ( umi_reads, @@ -108,6 +115,10 @@ workflow FASTQ_FASTQC_UMITOOLS_FASTP { .map { meta, reads, num_reads -> [ meta, num_reads ] } .set { trim_read_count } + trim_json + .map { meta, json -> [meta, getFastpAdapterSequence(json)] } + .set { adapter_seq } + if (!skip_fastqc) { FASTQC_TRIM ( trim_reads @@ -125,6 +136,7 @@ workflow FASTQ_FASTQC_UMITOOLS_FASTP { fastqc_raw_zip // channel: [ val(meta), [ zip ] ] umi_log // channel: [ val(meta), [ log ] ] + adapter_seq // channel: [ val(meta), [ adapter_seq] ] trim_json // channel: [ val(meta), [ json ] ] trim_html // channel: [ val(meta), [ html ] ] diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml index 220e8db1..9308fe9b 100644 --- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml @@ -43,7 +43,7 @@ input: - skip_trimming: type: boolean description: | - Allows to skip trimgalore execution + Allows to skip FastP execution - adapter_fasta: type: file description: | @@ -70,10 +70,7 @@ output: type: file description: > Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | - - - - For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. + For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. pattern: "*.{fastq.gz}" - fastqc_html: type: file @@ -118,6 +115,10 @@ output: type: file description: FastQC report archive pattern: "*_{fastqc.zip}" + - adapter_seq: + type: string + description: | + Adapter Sequence found in read1 - versions: type: file description: File containing software versions diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test index cdd73984..91dec8c5 100644 --- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test @@ -45,6 +45,7 @@ nextflow_workflow { { assert snapshot(workflow.out.trim_json).match("trim_json") }, { assert snapshot(workflow.out.trim_reads_fail).match("trim_reads_fail") }, { assert snapshot(workflow.out.trim_reads_merged).match("trim_reads_merged") }, + { assert snapshot(workflow.out.adapter_seq).match("adapter_seq") }, { assert snapshot(workflow.out.trim_read_count).match("trim_read_count") }, { assert snapshot(workflow.out.versions).match("versions") }, diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap index 38a65aeb..973746a3 100644 --- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap @@ -5,7 +5,7 @@ ] ], - "timestamp": "2023-11-26T02:28:26.26920982" + "timestamp": "2024-01-12T08:38:50.041635573" }, "trim_reads_fail": { "content": [ @@ -13,7 +13,7 @@ ] ], - "timestamp": "2023-11-26T02:28:26.25861515" + "timestamp": "2024-01-12T08:38:50.033284693" }, "versions": { "content": [ @@ -23,7 +23,7 @@ "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e" ] ], - "timestamp": "2023-11-26T02:28:26.30891403" + "timestamp": "2024-01-12T08:38:50.121510557" }, "trim_json": { "content": [ @@ -37,7 +37,21 @@ ] ] ], - "timestamp": "2023-11-26T02:28:26.24768259" + "timestamp": "2024-01-12T08:38:50.024410724" + }, + "adapter_seq": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "unspecified" + ] + ] + ], + "timestamp": "2024-01-12T08:38:50.08674429" }, "reads": { "content": [ @@ -54,7 +68,7 @@ ] ] ], - "timestamp": "2023-12-04T11:30:32.061644815" + "timestamp": "2024-01-12T08:38:49.994419936" }, "umi_log": { "content": [ @@ -62,7 +76,7 @@ ] ], - "timestamp": "2023-11-26T02:28:26.238536" + "timestamp": "2024-01-12T08:38:50.017720214" }, "trim_read_count": { "content": [ @@ -76,6 +90,6 @@ ] ] ], - "timestamp": "2023-11-26T02:28:26.27984169" + "timestamp": "2024-01-12T08:38:50.102326089" } } \ No newline at end of file diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 250f1991..5ea0df5e 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -173,9 +173,9 @@ workflow SMRNASEQ { // // SUBWORKFLOW: mirtrace QC // - FASTQ_FASTQC_UMITOOLS_FASTP.out.adapterseq + FASTQ_FASTQC_UMITOOLS_FASTP.out.adapter_seq .join( FASTQ_FASTQC_UMITOOLS_FASTP.out.reads ) - .map { meta, adapterseq, reads -> [adapterseq, meta.id, reads] } + .map { meta, adapter_seq, reads -> [adapter_seq, meta.id, reads] } .groupTuple() .set { ch_mirtrace_inputs } From 313e7d3f899b4e400912480f50c20c40b4bd7d34 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 12 Jan 2024 09:19:36 +0000 Subject: [PATCH 061/117] Cleaning up --- nextflow.config | 1 + nextflow_schema.json | 11 ++--------- workflows/smrnaseq.nf | 6 ++---- 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/nextflow.config b/nextflow.config index a223bc26..886011cd 100644 --- a/nextflow.config +++ b/nextflow.config @@ -47,6 +47,7 @@ params { fastp_min_length = 17 fastp_known_mirna_adapters = "$projectDir/assets/known_adapters.fa" save_trimmed_fail = false + save_merged = false skip_fastqc = false skip_multiqc = false skip_mirdeep = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 33b150b4..d736635e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -92,7 +92,8 @@ "umi_merge_unmapped": { "type": "boolean", "fa_icon": "fas fa-save", - "description": "Unless deactivated the deduplicated reads are merged with the reads that could not be placed to reduce the potential reference bias" + "description": "Unless deactivated the deduplicated reads are merged with the reads that could not be placed to reduce the potential reference bias", + "default": true } }, "fa_icon": "fas fa-barcode" @@ -209,7 +210,6 @@ }, "three_prime_adapter": { "type": "string", - "default": "TGGAATTCTCGGGTGCCAAGG", "fa_icon": "fas fa-text-width", "description": "Sequencing adapter sequence to use for trimming." }, @@ -408,14 +408,12 @@ "type": "boolean", "description": "Display help text.", "fa_icon": "fas fa-question-circle", - "default": false, "hidden": true }, "version": { "type": "boolean", "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", - "default": false, "hidden": true }, "publish_dir_mode": { @@ -439,7 +437,6 @@ "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", - "default": false, "hidden": true }, "max_multiqc_email_size": { @@ -454,7 +451,6 @@ "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", - "default": false, "hidden": true }, "hook_url": { @@ -493,7 +489,6 @@ "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", - "default": false, "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." }, @@ -501,7 +496,6 @@ "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters fails when an unrecognised parameter is found.", - "default": false, "hidden": true, "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." }, @@ -509,7 +503,6 @@ "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters in lenient more.", - "default": false, "hidden": true, "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 5ea0df5e..11fb44e5 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -142,16 +142,14 @@ workflow SMRNASEQ { params.skip_umi_extract, params.umi_discard_read, params.skip_trimming, - params.umi_discard_read, - params.skip_trimming, - params.adapter_fasta, + params.fastp_known_mirna_adapters, params.save_trimmed_fail, params.save_merged, params.min_trimmed_reads ) ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions) - reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_reads + reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads // // SUBWORKFLOW: Deduplicate UMIs by mapping them to the genome From 5e3f0824f619aa7d94b2e2a25a6b190e6265b32b Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 12 Jan 2024 09:30:47 +0000 Subject: [PATCH 062/117] Some more fixes / cleanup --- nextflow.config | 3 ++- nextflow_schema.json | 3 +++ workflows/smrnaseq.nf | 12 ++++++------ 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/nextflow.config b/nextflow.config index 886011cd..e476d297 100644 --- a/nextflow.config +++ b/nextflow.config @@ -54,7 +54,8 @@ params { skip_fastp = false save_reference = false fastp_max_length = 40 - + min_trimmed_reads = 10 + // Contamination filtering filter_contamination = false rrna = null diff --git a/nextflow_schema.json b/nextflow_schema.json index d736635e..a846609e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -505,6 +505,9 @@ "description": "Validation of parameters in lenient more.", "hidden": true, "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + }, + "save_merged": { + "type": "boolean" } } } diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 11fb44e5..bf9c3ac8 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -141,7 +141,7 @@ workflow SMRNASEQ { params.with_umi, params.skip_umi_extract, params.umi_discard_read, - params.skip_trimming, + params.skip_fastp, params.fastp_known_mirna_adapters, params.save_trimmed_fail, params.save_merged, @@ -185,7 +185,7 @@ workflow SMRNASEQ { // SUBWORKFLOW: remove contaminants from reads // contamination_stats = Channel.empty() - mirna_reads = FASTQC_FASTP.out.reads + mirna_reads = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads if (params.filter_contamination){ CONTAMINANT_FILTER ( reference_hairpin, @@ -195,7 +195,7 @@ workflow SMRNASEQ { params.ncrna, params.pirna, params.other_contamination, - FASTQC_FASTP.out.reads + FASTQ_FASTQC_UMITOOLS_FASTP.out.reads ) contamination_stats = CONTAMINANT_FILTER.out.filter_stats @@ -223,7 +223,7 @@ workflow SMRNASEQ { if (!params.skip_mirdeep) { MIRDEEP2 ( - FASTQC_UMITOOLS_FASTP.out.reads, + FASTQ_FASTQC_UMITOOLS_FASTP.out.reads, GENOME_QUANT.out.fasta, GENOME_QUANT.out.index.collect(), MIRNA_QUANT.out.fasta_hairpin, @@ -252,8 +252,8 @@ workflow SMRNASEQ { ch_multiqc_files = Channel.empty() ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC_UMITOOLS_FASTP.out.fastqc_raw_zip.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC_FASTP.out.trim_json.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_raw_zip.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_json.collect{it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(contamination_stats.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(genome_stats.collect({it[1]}).ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(MIRNA_QUANT.out.mature_stats.collect({it[1]}).ifEmpty([])) From 6240ec0ac8d10372491a3400b6881540a62674f6 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 12 Jan 2024 09:31:48 +0000 Subject: [PATCH 063/117] Clean --- nextflow.config | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nextflow.config b/nextflow.config index e476d297..ef869a71 100644 --- a/nextflow.config +++ b/nextflow.config @@ -55,7 +55,7 @@ params { save_reference = false fastp_max_length = 40 min_trimmed_reads = 10 - + // Contamination filtering filter_contamination = false rrna = null @@ -65,7 +65,6 @@ params { pirna = null other_contamination = null - // MultiQC options multiqc_config = null multiqc_title = null From bfcf4863db204e94c604bf0c4b0ac0285105ec76 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 12 Jan 2024 09:55:58 +0000 Subject: [PATCH 064/117] Improved error handling --- subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf index 49570521..764ce013 100644 --- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf @@ -18,8 +18,13 @@ def getFastpReadsAfterFiltering(json_file) { } def getFastpAdapterSequence(json_file){ - def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('adapter_cutting') - return json['read1_adapter_sequence'] + def Map json = (Map) new JsonSlurper().parseText(json_file.text) + try{ + adapter = json['adapter_cutting']['read1_adapter_sequence'] + } catch(Exception ex){ + adapter = "" + } + return adapter } workflow FASTQ_FASTQC_UMITOOLS_FASTP { From d5d1bfcd31c209a8ea39b5fe3901d726c06e9d0d Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 12 Jan 2024 11:07:48 +0100 Subject: [PATCH 065/117] Update nextflow_schema.json --- nextflow_schema.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/nextflow_schema.json b/nextflow_schema.json index a846609e..b1106dcd 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -241,6 +241,12 @@ "default": "${projectDir}/assets/known_adapters.fa", "description": "FastA with known miRNA adapter sequences for adapter trimming", "fa_icon": "far fa-question-circle" + }, + "min_trimmed_reads": { + "type": "integer", + "default": 10, + "fa_icon": "far fa-window-minimize", + "description": "Minimum number of reads required in input file to use it" } } }, From 22f07dead81b26b3664c054b88e43b5e4610b2fe Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 12 Jan 2024 10:48:46 +0000 Subject: [PATCH 066/117] Fix for UMI fasta --- workflows/smrnaseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 9caf0d5e..314c8d75 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -155,7 +155,7 @@ workflow SMRNASEQ { // SUBWORKFLOW: Deduplicate UMIs by mapping them to the genome // if (params.with_umi){ - if (fasta){ + if (params.fasta){ fasta_ch = file(fasta) DEDUPLICATE_UMIS ( fasta_ch, From 8fb0ae060ca6596d9245fcbff6df4db2bf17513a Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 12 Jan 2024 10:54:45 +0000 Subject: [PATCH 067/117] Fix params.fasta --- workflows/smrnaseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 314c8d75..ee62cb26 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -156,7 +156,7 @@ workflow SMRNASEQ { // if (params.with_umi){ if (params.fasta){ - fasta_ch = file(fasta) + fasta_ch = file(params.fasta) DEDUPLICATE_UMIS ( fasta_ch, bt_index, From 227af283d143ca3325344ed4c76abc674ab5e5fe Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 12 Jan 2024 11:09:07 +0000 Subject: [PATCH 068/117] Minor updates --- modules/local/edger_qc.nf | 2 +- modules/local/mirdeep2_prepare.nf | 6 +++--- workflows/smrnaseq.nf | 9 ++++++++- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/modules/local/edger_qc.nf b/modules/local/edger_qc.nf index 729d5eed..5560de23 100644 --- a/modules/local/edger_qc.nf +++ b/modules/local/edger_qc.nf @@ -1,7 +1,7 @@ process EDGER_QC { label 'process_medium' - conda 'bioconda::bioconductor-limma=3.50.0 bioconda::bioconductor-edger=3.36.0 conda-forge::r-data.table=1.14.2 conda-forge::r-gplots=3.1.1 conda-forge::r-statmod=1.4.36' + conda 'bioconda::bioconductor-limma=3.58.1 bioconda::bioconductor-edger=4.0.2 conda-forge::r-data.table=1.14.10 conda-forge::r-gplots=3.1.3 conda-forge::r-statmod=1.5.0' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-419bd7f10b2b902489ac63bbaafc7db76f8e0ae1:709335c37934db1b481054cbec637c6e5b5971cb-0' : 'biocontainers/mulled-v2-419bd7f10b2b902489ac63bbaafc7db76f8e0ae1:709335c37934db1b481054cbec637c6e5b5971cb-0' }" diff --git a/modules/local/mirdeep2_prepare.nf b/modules/local/mirdeep2_prepare.nf index 124b5b63..ce66b9f1 100644 --- a/modules/local/mirdeep2_prepare.nf +++ b/modules/local/mirdeep2_prepare.nf @@ -3,10 +3,10 @@ process MIRDEEP2_PIGZ { tag "$meta.id" // TODO maybe create a mulled container and uncompress within mirdeep2_mapper? - conda 'bioconda::bioconvert=0.4.3' + conda 'bioconda::bioconvert=1.1.1' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bioconvert:0.4.3--py_0' : - 'biocontainers/bioconvert:0.4.3--py_0' }" + 'https://depot.galaxyproject.org/singularity/bioconvert:1.1.1--pyhdfd78af_0' : + 'biocontainers/bioconvert:1.1.1--pyhdfd78af_0' }" input: tuple val(meta), path(reads) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index ee62cb26..95e5b810 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -72,6 +72,7 @@ include { MIRNA_QUANT } from '../subworkflows/local/mirna_quant' include { GENOME_QUANT } from '../subworkflows/local/genome_quant' include { MIRTRACE } from '../subworkflows/local/mirtrace' include { MIRDEEP2 } from '../subworkflows/local/mirdeep2' +include { INDEX_GENOME } from '../../modules/local/bowtie_genome' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -157,9 +158,15 @@ workflow SMRNASEQ { if (params.with_umi){ if (params.fasta){ fasta_ch = file(params.fasta) + + //This needs to be done here as GENOME_QUANT should not run prior to the deduplication of UMIs. + INDEX_GENOME ( fasta ) + + ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) + DEDUPLICATE_UMIS ( fasta_ch, - bt_index, + INDEX_GENOME.out.bowtie_indices, FASTQC_UMITOOLS_FASTP.out.trim_reads ) reads_for_mirna = DEDUPLICATE_UMIS.out.reads From eed08955d5fd4c018d216d772e653df0f209307a Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 12 Jan 2024 11:11:05 +0000 Subject: [PATCH 069/117] Fixing some lint warnings --- modules/local/bowtie_contaminants.nf | 2 +- modules/local/bowtie_genome.nf | 2 +- modules/local/bowtie_map_contaminants.nf | 2 +- modules/local/bowtie_mirna.nf | 2 +- modules/local/filter_stats.nf | 2 +- modules/local/seqcluster_collapse.nf | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/local/bowtie_contaminants.nf b/modules/local/bowtie_contaminants.nf index e6a594a7..cf02de31 100644 --- a/modules/local/bowtie_contaminants.nf +++ b/modules/local/bowtie_contaminants.nf @@ -4,7 +4,7 @@ process INDEX_CONTAMINANTS { conda 'bowtie2=2.4.5' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bowtie2:2.4.5--py39hd2f7db1_2' : - 'biocontainers/bowtie2:2.4.5--py36hfca12d5_2'}" + 'biocontainers/bowtie2:2.4.5--py39hd2f7db1_2'}" input: path fasta diff --git a/modules/local/bowtie_genome.nf b/modules/local/bowtie_genome.nf index 91a6cd53..4e1ea7ca 100644 --- a/modules/local/bowtie_genome.nf +++ b/modules/local/bowtie_genome.nf @@ -4,7 +4,7 @@ process INDEX_GENOME { conda 'bioconda::bowtie=1.3.1-4' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bowtie%3A1.3.1--py39hd400a0c_2' : + 'https://depot.galaxyproject.org/singularity/bowtie:1.3.1--py310h4070885_4' : 'biocontainers/bowtie:1.3.1--py310h4070885_4' }" input: diff --git a/modules/local/bowtie_map_contaminants.nf b/modules/local/bowtie_map_contaminants.nf index d744b1fd..c9863ab3 100644 --- a/modules/local/bowtie_map_contaminants.nf +++ b/modules/local/bowtie_map_contaminants.nf @@ -4,7 +4,7 @@ process BOWTIE_MAP_CONTAMINANTS { conda 'bowtie2=2.4.5' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bowtie2:2.4.5--py39hd2f7db1_2' : - 'biocontainers/bowtie2:2.4.5--py36hfca12d5_2' }" + 'biocontainers/bowtie2:2.4.5--py39hd2f7db1_2' }" input: tuple val(meta), path(reads) diff --git a/modules/local/bowtie_mirna.nf b/modules/local/bowtie_mirna.nf index 2be45bb8..cb7f0d0f 100644 --- a/modules/local/bowtie_mirna.nf +++ b/modules/local/bowtie_mirna.nf @@ -3,7 +3,7 @@ process INDEX_MIRNA { conda 'bioconda::bowtie=1.3.0-2' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bowtie%3A1.3.1--py39hd400a0c_2' : + 'https://depot.galaxyproject.org/singularity/bowtie:1.3.1--py310h4070885_4' : 'biocontainers/bowtie:1.3.1--py310h4070885_4' }" input: diff --git a/modules/local/filter_stats.nf b/modules/local/filter_stats.nf index 18e7016b..1e5494cd 100644 --- a/modules/local/filter_stats.nf +++ b/modules/local/filter_stats.nf @@ -4,7 +4,7 @@ process FILTER_STATS { conda 'bowtie2=2.4.5' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bowtie2:2.4.5--py39hd2f7db1_2' : - 'biocontainers/bowtie2:2.4.5--py36hfca12d5_2' }" + 'biocontainers/bowtie2:2.4.5--py39hd2f7db1_2' }" input: tuple val(meta), path(reads) diff --git a/modules/local/seqcluster_collapse.nf b/modules/local/seqcluster_collapse.nf index 82470e5a..826f8cef 100644 --- a/modules/local/seqcluster_collapse.nf +++ b/modules/local/seqcluster_collapse.nf @@ -5,7 +5,7 @@ process SEQCLUSTER_SEQUENCES { conda 'bioconda::seqcluster=1.2.9-0' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/seqcluster:1.2.9--pyh5e36f6f_0' : - 'biocontainers/seqcluster:1.2.8--pyh5e36f6f_0' }" + 'biocontainers/seqcluster:1.2.9--pyh5e36f6f_0' }" input: tuple val(meta), path(reads) From 2cc2a0df4df8462b0ba6c658801afb19988bd898 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 12 Jan 2024 11:18:23 +0000 Subject: [PATCH 070/117] Adding in versions for filter stats --- modules/local/filter_stats.nf | 8 ++ subworkflows/local/contaminant_filter.nf | 4 +- subworkflows/local/fastqc_fastp.nf | 116 ----------------------- 3 files changed, 10 insertions(+), 118 deletions(-) delete mode 100644 subworkflows/local/fastqc_fastp.nf diff --git a/modules/local/filter_stats.nf b/modules/local/filter_stats.nf index 1e5494cd..4c46f51d 100644 --- a/modules/local/filter_stats.nf +++ b/modules/local/filter_stats.nf @@ -13,6 +13,7 @@ process FILTER_STATS { output: path "*_mqc.yaml" , emit: stats tuple val(meta), path('*.filtered.fastq.gz'), emit: reads + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -24,5 +25,12 @@ process FILTER_STATS { tr '\n' ', ' | \\ awk -v sample=${meta.id} -v readnumber=\$readnumber '{ print "id: \\"my_pca_section\\"\\nsection_name: \\"Contamination Filtering\\"\\ndescription: \\"This plot shows the amount of reads filtered by contaminant type.\\"\\nplot_type: \\"bargraph\\"\\npconfig:\\n id: \\"contamination_filter_plot\\"\\n title: \\"Contamination Plot\\"\\n ylab: \\"Number of reads\\"\\ndata:\\n "sample": {"\$0"\\"remaining reads\\": "readnumber"}" }' > ${meta.id}.contamination_mqc.yaml gzip -c ${reads} > ${meta.id}.filtered.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(cat --version | grep 'cat ' |sed 's/cat (GNU coreutils) //') + gzip: \$(gzip --version | grep "gzip" | sed 's/gzip //') + tr: \$(tr --version | grep 'tr ' |sed 's/tr (GNU coreutils) //') + END_VERSIONS """ } diff --git a/subworkflows/local/contaminant_filter.nf b/subworkflows/local/contaminant_filter.nf index 383c85ad..dede115d 100644 --- a/subworkflows/local/contaminant_filter.nf +++ b/subworkflows/local/contaminant_filter.nf @@ -123,6 +123,6 @@ workflow CONTAMINANT_FILTER { emit: filtered_reads = FILTER_STATS.out.reads - versions = ch_versions + versions = ch_versions.mix(FILTER_STATS.out.versions) filter_stats = FILTER_STATS.out.stats -} \ No newline at end of file +} diff --git a/subworkflows/local/fastqc_fastp.nf b/subworkflows/local/fastqc_fastp.nf deleted file mode 100644 index 9e4d952e..00000000 --- a/subworkflows/local/fastqc_fastp.nf +++ /dev/null @@ -1,116 +0,0 @@ -// -// Read QC and trimming -// - -include { FASTQC as FASTQC_RAW } from '../../modules/nf-core/fastqc/main' -include { FASTQC as FASTQC_TRIM } from '../../modules/nf-core/fastqc/main' -include { FASTP } from '../../modules/nf-core/fastp/main' - -// -// Function that parses fastp json output file to get total number of reads after trimming -// -import groovy.json.JsonSlurper - -def getFastpReadsAfterFiltering(json_file) { - return new JsonSlurper().parseText(json_file.text) - ?.get('summary') - ?.get('after_filtering') - ?.get('total_reads') - ?.toInteger() -} - -String getFastpAdapterSequence(json_file){ - return new JsonSlurper().parseText(json_file.text) - ?.get('adapter_cutting') - ?.get('read1_adapter_sequence') -} - -workflow FASTQC_FASTP { - take: - reads // channel: [ val(meta), [ reads ] ] - adapter_list // channel: [ path/to/adapters.fa ] - save_trimmed_fail // value: boolean - save_merged // value: boolean - - - main: - - ch_versions = Channel.empty() - fastqc_raw_html = Channel.empty() - fastqc_raw_zip = Channel.empty() - adapterseq = reads.map { meta, _ -> [meta, null] } - if (!params.skip_fastqc) { - FASTQC_RAW ( - reads - ) - fastqc_raw_html = FASTQC_RAW.out.html - fastqc_raw_zip = FASTQC_RAW.out.zip - ch_versions = ch_versions.mix(FASTQC_RAW.out.versions.first()) - } - - trim_reads = reads - trim_json = Channel.empty() - trim_html = Channel.empty() - trim_log = Channel.empty() - trim_reads_fail = Channel.empty() - trim_reads_merged = Channel.empty() - fastqc_trim_html = Channel.empty() - fastqc_trim_zip = Channel.empty() - if (!params.skip_fastp) { - FASTP ( - reads, - adapter_list, - save_trimmed_fail, - save_merged - ) - trim_reads = FASTP.out.reads - trim_json = FASTP.out.json - trim_html = FASTP.out.html - trim_log = FASTP.out.log - trim_reads_fail = FASTP.out.reads_fail - trim_reads_merged = FASTP.out.reads_merged - ch_versions = ch_versions.mix(FASTP.out.versions.first()) - - // - // Filter empty FastQ files after adapter trimming so FastQC doesn't fail - // - trim_reads - .join(trim_json) - .map { - meta, reads, json -> - if (getFastpReadsAfterFiltering(json) > 0) { - [ meta, reads ] - } - } - .set { trim_reads } - - trim_json - .map { meta, json -> [meta, getFastpAdapterSequence(json)] } - .set { adapterseq } - - if (!params.skip_fastqc) { - FASTQC_TRIM ( - trim_reads - ) - fastqc_trim_html = FASTQC_TRIM.out.html - fastqc_trim_zip = FASTQC_TRIM.out.zip - ch_versions = ch_versions.mix(FASTQC_TRIM.out.versions.first()) - } - } - - emit: - reads = trim_reads // channel: [ val(meta), [ reads ] ] - trim_json // channel: [ val(meta), [ json ] ] - trim_html // channel: [ val(meta), [ html ] ] - trim_log // channel: [ val(meta), [ log ] ] - trim_reads_fail // channel: [ val(meta), [ fastq.gz ] ] - trim_reads_merged // channel: [ val(meta), [ fastq.gz ] ] - adapterseq // channel: [ val(meta), [ adapterseq ] ] - - fastqc_raw_html // channel: [ val(meta), [ html ] ] - fastqc_raw_zip // channel: [ val(meta), [ zip ] ] - fastqc_trim_html // channel: [ val(meta), [ html ] ] - fastqc_trim_zip // channel: [ val(meta), [ zip ] ] - - versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] -} From bf6e8bde711df521183e67a7ed3ef6364185bacb Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 12 Jan 2024 11:21:49 +0000 Subject: [PATCH 071/117] Fixing more conda imports --- modules/local/bowtie_genome.nf | 6 +++--- modules/local/bowtie_mirna.nf | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/local/bowtie_genome.nf b/modules/local/bowtie_genome.nf index 4e1ea7ca..5ccb0afa 100644 --- a/modules/local/bowtie_genome.nf +++ b/modules/local/bowtie_genome.nf @@ -2,10 +2,10 @@ process INDEX_GENOME { tag "$fasta" label 'process_medium' - conda 'bioconda::bowtie=1.3.1-4' + conda 'bioconda::bowtie=1.3.1-6' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bowtie:1.3.1--py310h4070885_4' : - 'biocontainers/bowtie:1.3.1--py310h4070885_4' }" + 'https://depot.galaxyproject.org/singularity/bowtie:1.3.1-py310h30d4ff4_6' : + 'biocontainers/bowtie:1.3.1-py310h30d4ff4_6' }" input: tuple val(meta2), path(fasta) diff --git a/modules/local/bowtie_mirna.nf b/modules/local/bowtie_mirna.nf index cb7f0d0f..08521095 100644 --- a/modules/local/bowtie_mirna.nf +++ b/modules/local/bowtie_mirna.nf @@ -1,10 +1,10 @@ process INDEX_MIRNA { label 'process_medium' - conda 'bioconda::bowtie=1.3.0-2' + conda 'bioconda::bowtie=1.3.1-6' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bowtie:1.3.1--py310h4070885_4' : - 'biocontainers/bowtie:1.3.1--py310h4070885_4' }" + 'https://depot.galaxyproject.org/singularity/bowtie:1.3.1-py310h30d4ff4_6' : + 'biocontainers/bowtie:1.3.1-py310h30d4ff4_6' }" input: tuple val(meta2), path(fasta) From 454504d1b415bb2b88dd5d7bda25d5599d3e0145 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 12 Jan 2024 11:28:40 +0000 Subject: [PATCH 072/117] More updates of modules --- modules/local/bowtie_genome.nf | 4 ++-- modules/local/bowtie_mirna.nf | 4 ++-- modules/local/format_fasta_mirna.nf | 6 +++--- modules/local/mirdeep2_mapper.nf | 2 +- modules/local/mirdeep2_run.nf | 2 +- modules/local/parse_fasta_mirna.nf | 6 +++--- modules/local/seqcluster_collapse.nf | 2 +- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/modules/local/bowtie_genome.nf b/modules/local/bowtie_genome.nf index 5ccb0afa..60f33f1a 100644 --- a/modules/local/bowtie_genome.nf +++ b/modules/local/bowtie_genome.nf @@ -2,9 +2,9 @@ process INDEX_GENOME { tag "$fasta" label 'process_medium' - conda 'bioconda::bowtie=1.3.1-6' + conda 'bioconda::bowtie=1.3.1' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bowtie:1.3.1-py310h30d4ff4_6' : + 'https://depot.galaxyproject.org/singularity/bowtie:1.3.1--py310h7b97f60_6' : 'biocontainers/bowtie:1.3.1-py310h30d4ff4_6' }" input: diff --git a/modules/local/bowtie_mirna.nf b/modules/local/bowtie_mirna.nf index 08521095..2fbf1588 100644 --- a/modules/local/bowtie_mirna.nf +++ b/modules/local/bowtie_mirna.nf @@ -1,9 +1,9 @@ process INDEX_MIRNA { label 'process_medium' - conda 'bioconda::bowtie=1.3.1-6' + conda 'bioconda::bowtie=1.3.1' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bowtie:1.3.1-py310h30d4ff4_6' : + 'https://depot.galaxyproject.org/singularity/bowtie:1.3.1--py310h7b97f60_6' : 'biocontainers/bowtie:1.3.1-py310h30d4ff4_6' }" input: diff --git a/modules/local/format_fasta_mirna.nf b/modules/local/format_fasta_mirna.nf index 489879a5..67461d64 100644 --- a/modules/local/format_fasta_mirna.nf +++ b/modules/local/format_fasta_mirna.nf @@ -4,10 +4,10 @@ process FORMAT_FASTA_MIRNA { tag "$fasta" label 'process_medium' - conda 'bioconda::fastx_toolkit=0.0.14-9' + conda 'bioconda::fastx_toolkit=0.0.14' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastx_toolkit:0.0.14--he1b5a44_8' : - 'biocontainers/fastx_toolkit:0.0.14--he1b5a44_8' }" + 'https://depot.galaxyproject.org/singularity/fastx_toolkit:0.0.14--hdbdd923_11' : + 'biocontainers/fastx_toolkit:0.0.14--hdbdd923_11' }" input: tuple val(meta2), path(fasta) diff --git a/modules/local/mirdeep2_mapper.nf b/modules/local/mirdeep2_mapper.nf index 842af6e6..19a9c5dc 100644 --- a/modules/local/mirdeep2_mapper.nf +++ b/modules/local/mirdeep2_mapper.nf @@ -4,7 +4,7 @@ process MIRDEEP2_MAPPER { label 'process_medium' tag "$meta.id" - conda 'bioconda::mirdeep2=2.0.1' + conda 'bioconda::mirdeep2=2.0.1.3' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mirdeep2:2.0.1.3--hdfd78af_1' : 'biocontainers/mirdeep2:2.0.1.3--hdfd78af_1' }" diff --git a/modules/local/mirdeep2_run.nf b/modules/local/mirdeep2_run.nf index 9b18ed7d..442f26f3 100644 --- a/modules/local/mirdeep2_run.nf +++ b/modules/local/mirdeep2_run.nf @@ -4,7 +4,7 @@ process MIRDEEP2_RUN { label 'process_medium' errorStrategy 'ignore' - conda 'bioconda::mirdeep2=2.0.1' + conda 'bioconda::mirdeep2=2.0.1.3' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mirdeep2:2.0.1.3--hdfd78af_1' : 'biocontainers/mirdeep2:2.0.1.3--hdfd78af_1' }" diff --git a/modules/local/parse_fasta_mirna.nf b/modules/local/parse_fasta_mirna.nf index ad63401e..60665251 100644 --- a/modules/local/parse_fasta_mirna.nf +++ b/modules/local/parse_fasta_mirna.nf @@ -1,10 +1,10 @@ process PARSE_FASTA_MIRNA { label 'process_medium' - conda 'bioconda::seqkit=2.3.1' + conda 'bioconda::seqkit=2.6.1' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/seqkit:2.3.1--h9ee0642_0' : - 'biocontainers/seqkit:2.3.1--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/seqkit:2.6.1--h9ee0642_0' : + 'biocontainers/seqkit:2.6.1--h9ee0642_0' }" input: tuple val(meta2), path(fasta) diff --git a/modules/local/seqcluster_collapse.nf b/modules/local/seqcluster_collapse.nf index 826f8cef..4379654c 100644 --- a/modules/local/seqcluster_collapse.nf +++ b/modules/local/seqcluster_collapse.nf @@ -2,7 +2,7 @@ process SEQCLUSTER_SEQUENCES { label 'process_medium' tag "$meta.id" - conda 'bioconda::seqcluster=1.2.9-0' + conda 'bioconda::seqcluster=1.2.9' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/seqcluster:1.2.9--pyh5e36f6f_0' : 'biocontainers/seqcluster:1.2.9--pyh5e36f6f_0' }" From 7eefccaa5d22d14ab9f9b4a5b6bd58a0d7300c8a Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 12 Jan 2024 12:03:37 +0000 Subject: [PATCH 073/117] Fix --- workflows/smrnaseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 95e5b810..51a4a2cd 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -72,7 +72,7 @@ include { MIRNA_QUANT } from '../subworkflows/local/mirna_quant' include { GENOME_QUANT } from '../subworkflows/local/genome_quant' include { MIRTRACE } from '../subworkflows/local/mirtrace' include { MIRDEEP2 } from '../subworkflows/local/mirdeep2' -include { INDEX_GENOME } from '../../modules/local/bowtie_genome' +include { INDEX_GENOME } from '../modules/local/bowtie_genome' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From f118456bbfaa4e92a02cfc44f5bd109ca5e932b2 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 12 Jan 2024 12:07:00 +0000 Subject: [PATCH 074/117] Fix --- modules/local/bowtie_genome.nf | 2 +- modules/local/bowtie_mirna.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/bowtie_genome.nf b/modules/local/bowtie_genome.nf index 60f33f1a..17ea9253 100644 --- a/modules/local/bowtie_genome.nf +++ b/modules/local/bowtie_genome.nf @@ -5,7 +5,7 @@ process INDEX_GENOME { conda 'bioconda::bowtie=1.3.1' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bowtie:1.3.1--py310h7b97f60_6' : - 'biocontainers/bowtie:1.3.1-py310h30d4ff4_6' }" + 'biocontainers/bowtie:1.3.1--py310h7b97f60_6' }" input: tuple val(meta2), path(fasta) diff --git a/modules/local/bowtie_mirna.nf b/modules/local/bowtie_mirna.nf index 2fbf1588..06756bef 100644 --- a/modules/local/bowtie_mirna.nf +++ b/modules/local/bowtie_mirna.nf @@ -4,7 +4,7 @@ process INDEX_MIRNA { conda 'bioconda::bowtie=1.3.1' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bowtie:1.3.1--py310h7b97f60_6' : - 'biocontainers/bowtie:1.3.1-py310h30d4ff4_6' }" + 'biocontainers/bowtie:1.3.1--py310h30d4ff4_6' }" input: tuple val(meta2), path(fasta) From cbbc6d8a307e4dea4c7c62f477250dce9467bbbb Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 12 Jan 2024 12:16:56 +0000 Subject: [PATCH 075/117] Another fix --- modules/local/bowtie_mirna.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/bowtie_mirna.nf b/modules/local/bowtie_mirna.nf index 06756bef..733d816e 100644 --- a/modules/local/bowtie_mirna.nf +++ b/modules/local/bowtie_mirna.nf @@ -4,7 +4,7 @@ process INDEX_MIRNA { conda 'bioconda::bowtie=1.3.1' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bowtie:1.3.1--py310h7b97f60_6' : - 'biocontainers/bowtie:1.3.1--py310h30d4ff4_6' }" + 'biocontainers/bowtie:1.3.1--py310h7b97f60_6' }" input: tuple val(meta2), path(fasta) From ea6d05eebd83cef1220f774b982f5e8a7236838f Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 12 Jan 2024 12:39:04 +0000 Subject: [PATCH 076/117] Yeah fasta --- workflows/smrnaseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 51a4a2cd..b3de8acc 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -160,7 +160,7 @@ workflow SMRNASEQ { fasta_ch = file(params.fasta) //This needs to be done here as GENOME_QUANT should not run prior to the deduplication of UMIs. - INDEX_GENOME ( fasta ) + INDEX_GENOME ( fasta_ch ) ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) From 6df081f83eadc4e48fe062ed6a802d683b6aa8ab Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 12 Jan 2024 12:47:28 +0000 Subject: [PATCH 077/117] Another one in the umi workflow --- workflows/smrnaseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index b3de8acc..a9f34706 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -167,7 +167,7 @@ workflow SMRNASEQ { DEDUPLICATE_UMIS ( fasta_ch, INDEX_GENOME.out.bowtie_indices, - FASTQC_UMITOOLS_FASTP.out.trim_reads + FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_reads ) reads_for_mirna = DEDUPLICATE_UMIS.out.reads ch_versions = ch_versions.mix(DEDUPLICATE_UMIS.out.versions) From 43d3aba11c6489baa9df94e1b0017338ea30ce6f Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 12 Jan 2024 20:39:33 +0000 Subject: [PATCH 078/117] Update edger_qc --- modules/local/edger_qc.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/edger_qc.nf b/modules/local/edger_qc.nf index 5560de23..8c311457 100644 --- a/modules/local/edger_qc.nf +++ b/modules/local/edger_qc.nf @@ -3,8 +3,8 @@ process EDGER_QC { conda 'bioconda::bioconductor-limma=3.58.1 bioconda::bioconductor-edger=4.0.2 conda-forge::r-data.table=1.14.10 conda-forge::r-gplots=3.1.3 conda-forge::r-statmod=1.5.0' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-419bd7f10b2b902489ac63bbaafc7db76f8e0ae1:709335c37934db1b481054cbec637c6e5b5971cb-0' : - 'biocontainers/mulled-v2-419bd7f10b2b902489ac63bbaafc7db76f8e0ae1:709335c37934db1b481054cbec637c6e5b5971cb-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-419bd7f10b2b902489ac63bbaafc7db76f8e0ae1:f5ff7de321749bc7ae12f7e79a4b581497f4c8ce-0' : + 'biocontainers/mulled-v2-419bd7f10b2b902489ac63bbaafc7db76f8e0ae1:f5ff7de321749bc7ae12f7e79a4b581497f4c8ce-0' }" input: path input_files From 2059ed4cf72950cfb1e30000412b0e9aaf19163f Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 17 Jan 2024 08:08:29 +0000 Subject: [PATCH 079/117] Shorten test.config --- conf/test.config | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/conf/test.config b/conf/test.config index 450ef11d..e699776b 100644 --- a/conf/test.config +++ b/conf/test.config @@ -23,9 +23,7 @@ params { input = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/samplesheet/v2.0/samplesheet.csv' fasta = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genome.fa' - mature = 'https://mirbase.org/download/mature.fa' - hairpin = 'https://mirbase.org/download/hairpin.fa' - mirna_gtf = 'https://mirbase.org/download/hsa.gff3' + mirtrace_species = 'hsa' protocol = 'illumina' skip_mirdeep = true From 945c3182d27150a0dcabd63b581d5573ac4fa595 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 17 Jan 2024 08:10:00 +0000 Subject: [PATCH 080/117] Adding in UMI tests --- .github/workflows/ci.yml | 1 + conf/test_umi.config | 31 +++++++++++++++++++++++++++++++ nextflow.config | 1 + 3 files changed, 33 insertions(+) create mode 100644 conf/test_umi.config diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 885ae9b6..51c3ee37 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,6 +29,7 @@ jobs: profile: - "test" - "test_no_genome" + - "test_umi" steps: - name: Check out pipeline code uses: actions/checkout@v4 diff --git a/conf/test_umi.config b/conf/test_umi.config new file mode 100644 index 00000000..a50ddb7a --- /dev/null +++ b/conf/test_umi.config @@ -0,0 +1,31 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/smrnaseq -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + + input = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/samplesheet/v2.0/samplesheet_umi.csv' + fasta = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genome.fa' + + mirtrace_species = 'hsa' + protocol = 'illumina' + skip_mirdeep = true + +} diff --git a/nextflow.config b/nextflow.config index 4143e3a0..b32d5ecf 100644 --- a/nextflow.config +++ b/nextflow.config @@ -230,6 +230,7 @@ profiles { } test { includeConfig 'conf/test.config' } + test_umi { includeConfig 'conf/test_umi.config' } test_no_genome { includeConfig 'conf/test_no_genome.config' } test_full { includeConfig 'conf/test_full.config' } } From e43e4cfcdf103c49588276c031d91a15bc9b132e Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 17 Jan 2024 08:19:40 +0000 Subject: [PATCH 081/117] Addressing some reviews --- CHANGELOG.md | 1 + conf/modules.config | 6 ++--- subworkflows/local/umi_dedup.nf | 39 ++++++++++++++++----------------- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ebcea04..611c5338 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#299]](https://github.com/nf-core/smrnaseq/issues/299) - Bugfix for missing inputs in BAM stats (`genome_quant.r`) - [[#164]](https://github.com/nf-core/smrnaseq/pull/164) - UMI Handling Feature implemented in the pipeline - [[#302]](https://github.com/nf-core/smrnaseq/pull/302) - Merged in nf-core template v2.11.1 +- [[#294]](https://github.com/nf-core/smrnaseq/pull/294) - Fixed contamination screening issues ### Parameters diff --git a/conf/modules.config b/conf/modules.config index b54a3534..769f62f8 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -131,7 +131,7 @@ if (!params.skip_fastp) { if (!params.skip_fastqc) { process { - withName: '.*:.*:FASTQC_FASTP:FASTQC_RAW' { + withName: '.*:.*:FASTQC_UMITOOLS_FASTP:FASTQC_RAW' { ext.args = '--quiet' publishDir = [ path: { "${params.outdir}/fastqc/raw" }, @@ -139,7 +139,7 @@ if (!params.skip_fastp) { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: '.*:.*:FASTQC_FASTP:FASTQC_TRIM' { + withName: '.*:.*:FASTQC_UMITOOLS_FASTP:FASTQC_TRIM' { ext.args = '--quiet' publishDir = [ path: { "${params.outdir}/fastqc/trim" }, @@ -153,7 +153,7 @@ if (!params.skip_fastp) { if (params.with_umi && !params.skip_umi_extract) { process { - withName: '.*:FASTQC_UMITOOLS_TRIMGALORE:UMITOOLS_EXTRACT' { + withName: '.*:FASTQC_UMITOOLS_FASTP:UMITOOLS_EXTRACT' { ext.args = [ params.umitools_extract_method ? "--extract-method=${params.umitools_extract_method}" : '', params.umitools_bc_pattern ? "--bc-pattern='${params.umitools_bc_pattern}'" : '', diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf index 5a5a393f..db9065ea 100644 --- a/subworkflows/local/umi_dedup.nf +++ b/subworkflows/local/umi_dedup.nf @@ -31,36 +31,35 @@ workflow DEDUPLICATE_UMIS { fasta_formatted = fasta } - if (bt_index){ - UMI_MAP_GENOME ( reads, bt_index.collect() ) - ch_versions = ch_versions.mix(UMI_MAP_GENOME.out.versions) + UMI_MAP_GENOME ( reads, bt_index.collect() ) + ch_versions = ch_versions.mix(UMI_MAP_GENOME.out.versions) - BAM_SORT_SAMTOOLS ( UMI_MAP_GENOME.out.bam, Channel.empty() ) - ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions) + BAM_SORT_SAMTOOLS ( UMI_MAP_GENOME.out.bam, Channel.empty() ) + ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions) - ch_umi_dedup = BAM_SORT_SAMTOOLS.out.bam.join(BAM_SORT_SAMTOOLS.out.bai) - UMITOOLS_DEDUP ( ch_umi_dedup ) - ch_versions = ch_versions.mix(UMITOOLS_DEDUP.out.versions) - ch_dedup_stats = ch_dedup_stats.mix(UMITOOLS_DEDUP.out.tsv_edit_distance).join(UMITOOLS_DEDUP.out.tsv_per_umi).join(UMITOOLS_DEDUP.out.tsv_umi_per_position) + ch_umi_dedup = BAM_SORT_SAMTOOLS.out.bam.join(BAM_SORT_SAMTOOLS.out.bai) + UMITOOLS_DEDUP ( ch_umi_dedup ) + ch_versions = ch_versions.mix(UMITOOLS_DEDUP.out.versions) + ch_dedup_stats = ch_dedup_stats.mix(UMITOOLS_DEDUP.out.tsv_edit_distance).join(UMITOOLS_DEDUP.out.tsv_per_umi).join(UMITOOLS_DEDUP.out.tsv_umi_per_position) - SAMTOOLS_BAM2FQ ( UMITOOLS_DEDUP.out.bam, false ) - ch_versions = ch_versions.mix(SAMTOOLS_BAM2FQ.out.versions) + SAMTOOLS_BAM2FQ ( UMITOOLS_DEDUP.out.bam, false ) + ch_versions = ch_versions.mix(SAMTOOLS_BAM2FQ.out.versions) - ch_dedup_reads = SAMTOOLS_BAM2FQ.out.reads + ch_dedup_reads = SAMTOOLS_BAM2FQ.out.reads - if ( params.umi_merge_unmapped ) { + if ( params.umi_merge_unmapped ) { - SAMTOOLS_BAM2FQ.out.reads - .join(UMI_MAP_GENOME.out.unmapped) - .map { meta, file1, file2 -> [meta, [file1, file2]]} - .set { ch_cat } + SAMTOOLS_BAM2FQ.out.reads + .join(UMI_MAP_GENOME.out.unmapped) + .map { meta, file1, file2 -> [meta, [file1, file2]]} + .set { ch_cat } - CAT_CAT ( ch_cat ) - ch_dedup_reads = CAT_CAT.out.file_out - } + CAT_CAT ( ch_cat ) + ch_dedup_reads = CAT_CAT.out.file_out } + emit: reads = ch_dedup_reads indices = bt_index From 3b21fe1a504dc7229d2f49d33bc7d570351bf617 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 17 Jan 2024 08:39:29 +0000 Subject: [PATCH 082/117] Add UMI regex --- conf/test_umi.config | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/conf/test_umi.config b/conf/test_umi.config index a50ddb7a..a7a59e75 100644 --- a/conf/test_umi.config +++ b/conf/test_umi.config @@ -28,4 +28,10 @@ params { protocol = 'illumina' skip_mirdeep = true + //UMI Specific testcase + with_umi = true + umitools_extract_method = 'regex' + umitools_bc_pattern '.+AACTGTAGGCACCATCAAT{s<=2}(?P.{12})(?P.*)' + save_umi_intermeds = true + } From 8e21df8efa67c7e103fc34d5239c6e9a1a716892 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 17 Jan 2024 08:57:18 +0000 Subject: [PATCH 083/117] Some more adjustments to make nf-core modules work --- conf/test_umi.config | 2 +- nextflow.config | 1 + nextflow_schema.json | 5 +++++ subworkflows/local/umi_dedup.nf | 9 +++++---- workflows/smrnaseq.nf | 3 ++- 5 files changed, 14 insertions(+), 6 deletions(-) diff --git a/conf/test_umi.config b/conf/test_umi.config index a7a59e75..53a58f41 100644 --- a/conf/test_umi.config +++ b/conf/test_umi.config @@ -31,7 +31,7 @@ params { //UMI Specific testcase with_umi = true umitools_extract_method = 'regex' - umitools_bc_pattern '.+AACTGTAGGCACCATCAAT{s<=2}(?P.{12})(?P.*)' + umitools_bc_pattern = '.+AACTGTAGGCACCATCAAT{s<=2}(?P.{12})(?P.*)' save_umi_intermeds = true } diff --git a/nextflow.config b/nextflow.config index b32d5ecf..0a6d0945 100644 --- a/nextflow.config +++ b/nextflow.config @@ -38,6 +38,7 @@ params { umi_discard_read = null save_umi_intermeds = false umi_merge_unmapped = true + umi_stats = true // Trimming options clip_r1 = null diff --git a/nextflow_schema.json b/nextflow_schema.json index a8eebb26..cd8fa7a5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -94,6 +94,11 @@ "fa_icon": "fas fa-save", "description": "Unless deactivated the deduplicated reads are merged with the reads that could not be placed to reduce the potential reference bias", "default": true + }, + "umi_stats": { + "type": "boolean", + "default": true, + "description": "Compute UMI statistics for MultiQC" } }, "fa_icon": "fas fa-barcode" diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf index db9065ea..1c88cde0 100644 --- a/subworkflows/local/umi_dedup.nf +++ b/subworkflows/local/umi_dedup.nf @@ -15,6 +15,7 @@ workflow DEDUPLICATE_UMIS { fasta bt_index reads // channel: [ val(meta), [ reads ] ] + val_get_dedup_stats //boolean true/false main: @@ -35,11 +36,11 @@ workflow DEDUPLICATE_UMIS { UMI_MAP_GENOME ( reads, bt_index.collect() ) ch_versions = ch_versions.mix(UMI_MAP_GENOME.out.versions) - BAM_SORT_SAMTOOLS ( UMI_MAP_GENOME.out.bam, Channel.empty() ) - ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions) + BAM_SORT_STATS_SAMTOOLS ( UMI_MAP_GENOME.out.bam, Channel.empty() ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) - ch_umi_dedup = BAM_SORT_SAMTOOLS.out.bam.join(BAM_SORT_SAMTOOLS.out.bai) - UMITOOLS_DEDUP ( ch_umi_dedup ) + //ch_umi_dedup = BAM_SORT_STATS_SAMTOOLS.out.bam.join(BAM_SORT_STATS_SAMTOOLS.out.bai) + UMITOOLS_DEDUP ( BAM_SORT_STATS_SAMTOOLS.out.bam, val_get_dedup_stats) ch_versions = ch_versions.mix(UMITOOLS_DEDUP.out.versions) ch_dedup_stats = ch_dedup_stats.mix(UMITOOLS_DEDUP.out.tsv_edit_distance).join(UMITOOLS_DEDUP.out.tsv_per_umi).join(UMITOOLS_DEDUP.out.tsv_umi_per_position) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index a9f34706..cb46b44d 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -167,7 +167,8 @@ workflow SMRNASEQ { DEDUPLICATE_UMIS ( fasta_ch, INDEX_GENOME.out.bowtie_indices, - FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_reads + FASTQ_FASTQC_UMITOOLS_FASTP.out.reads, + params.umi_stats ) reads_for_mirna = DEDUPLICATE_UMIS.out.reads ch_versions = ch_versions.mix(DEDUPLICATE_UMIS.out.versions) From fc0119558de7a6a73f85241000fda4fa368a828b Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 17 Jan 2024 09:00:44 +0000 Subject: [PATCH 084/117] Fix meta2 --- subworkflows/local/umi_dedup.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf index 1c88cde0..3d12fd99 100644 --- a/subworkflows/local/umi_dedup.nf +++ b/subworkflows/local/umi_dedup.nf @@ -23,7 +23,7 @@ workflow DEDUPLICATE_UMIS { ch_dedup_stats = Channel.empty() if (!bt_index){ - INDEX_GENOME ( fasta ) + INDEX_GENOME ( [ [:], fasta ] ) bt_index = INDEX_GENOME.out.bowtie_indices fasta_formatted = INDEX_GENOME.out.fasta ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) From 14109235504f62bd912373d785af21027c96a4ca Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 17 Jan 2024 09:05:31 +0000 Subject: [PATCH 085/117] Passing on meta2 --- workflows/smrnaseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index cb46b44d..ad291593 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -160,7 +160,7 @@ workflow SMRNASEQ { fasta_ch = file(params.fasta) //This needs to be done here as GENOME_QUANT should not run prior to the deduplication of UMIs. - INDEX_GENOME ( fasta_ch ) + INDEX_GENOME ( [ [:], fasta ] ) ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) From 12be1865b66035915f7b1086a723817b60bf5d2b Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 17 Jan 2024 09:05:49 +0000 Subject: [PATCH 086/117] use the channels, luke --- workflows/smrnaseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index ad291593..872de5f3 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -160,7 +160,7 @@ workflow SMRNASEQ { fasta_ch = file(params.fasta) //This needs to be done here as GENOME_QUANT should not run prior to the deduplication of UMIs. - INDEX_GENOME ( [ [:], fasta ] ) + INDEX_GENOME ( [ [:], fasta_ch ] ) ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) From 96adf135bedb6ed20394537b9915ca38cb89af01 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 17 Jan 2024 20:42:21 +0000 Subject: [PATCH 087/117] Add better changelog with deps --- CHANGELOG.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 611c5338..fc286f02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | | `--save_umi_intermeds` | | | `--umi_merge_unmapped` | +### Software dependencies + +| Dependency | Old version | New version | +| ------------ | ----------- | ----------- | +| `multiqc` | 1.15 | 1.19 | +| `edgeR` | 3.36.0 | 4.0.2 | +| `limma` | 3.50.0 | 3.58.1 | +| `bioconvert` | 0.4.3 | 1.1.1 | +| `mirdeep` | 2.0.1 | 2.0.1.3 | +| `seqkit` | 2.3.1 | 2.6.1 | +| `fastqc` | 0.11.4 | 0.12.1 | +| `samtools` | 1.17 | 1.18 | +| `umitools` | | 1.1.4 | + ## [v2.2.4](https://github.com/nf-core/smrnaseq/releases/tag/2.2.4) - 2023-11-03 - Update template to 2.10 @@ -119,7 +133,6 @@ Note, since the pipeline is now using Nextflow DSL2, each process will be run wi | `seqkit` | 0.16.0 | 2.0.0 | | `trim-galore` | 0.6.6 | 0.6.7 | | `bioconvert` | - | 0.4.3 | -| `umi_tools` | - | 1.1.2 | | `htseq` | - | - | | `markdown` | - | - | | `pymdown-extensions` | - | - | From 5c7227a30de536d24228ff3b516213f852a05065 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 17 Jan 2024 20:42:44 +0000 Subject: [PATCH 088/117] Add updated modules --- modules.json | 8 +- modules/nf-core/cat/cat/main.nf | 11 +- modules/nf-core/cat/cat/tests/main.nf.test | 6 +- .../nf-core/cat/cat/tests/main.nf.test.snap | 92 +++++---- modules/nf-core/cat/fastq/tests/main.nf.test | 63 +++--- .../nf-core/cat/fastq/tests/main.nf.test.snap | 185 +++++++++++++----- modules/nf-core/fastp/tests/main.nf.test | 95 +++++---- modules/nf-core/fastp/tests/main.nf.test.snap | 16 +- 8 files changed, 295 insertions(+), 181 deletions(-) diff --git a/modules.json b/modules.json index 40eda6cc..f11503fc 100644 --- a/modules.json +++ b/modules.json @@ -7,12 +7,12 @@ "nf-core": { "cat/cat": { "branch": "master", - "git_sha": "d593e8f6b7d1bbbb2acf43a4b9efeeac8d6720f2", + "git_sha": "81f27e75847087865299cc46605deb3b09b4e0a2", "installed_by": ["modules"] }, "cat/fastq": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "02fd5bd7275abad27aad32d5c852e0a9b1b98882", "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { @@ -22,13 +22,13 @@ }, "fastp": { "branch": "master", - "git_sha": "d086322563bdbb08c94bf15a7db58a39ccdb1520", + "git_sha": "1799e452de650f6fb8890d25829bca23014b0728", "installed_by": ["fastq_fastqc_umitools_fastp", "modules"] }, "fastqc": { "branch": "master", "git_sha": "617777a807a1770f73deb38c80004bac06807eef", - "installed_by": ["fastq_fastqc_umitools_fastp", "modules"] + "installed_by": ["fastq_fastqc_umitools_fastp"] }, "multiqc": { "branch": "master", diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf index 970ab760..adbdbd7b 100644 --- a/modules/nf-core/cat/cat/main.nf +++ b/modules/nf-core/cat/cat/main.nf @@ -22,6 +22,8 @@ process CAT_CAT { def args2 = task.ext.args2 ?: '' def file_list = files_in.collect { it.toString() } + // choose appropriate concatenation tool depending on input and output format + // | input | output | command1 | command2 | // |-----------|------------|----------|----------| // | gzipped | gzipped | cat | | @@ -30,7 +32,7 @@ process CAT_CAT { // | ungzipped | gzipped | cat | pigz | // Use input file ending as default - prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + prefix = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}" out_zip = prefix.endsWith('.gz') in_zip = file_list[0].endsWith('.gz') command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' @@ -68,3 +70,10 @@ process CAT_CAT { END_VERSIONS """ } + +// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz +def getFileSuffix(filename) { + def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/ + return match ? match[0][1] : filename.substring(filename.lastIndexOf('.')) +} + diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test index ed5a4f12..aaae04f9 100644 --- a/modules/nf-core/cat/cat/tests/main.nf.test +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -83,8 +83,7 @@ nextflow_process { def lines = path(process.out.file_out.get(0).get(1)).linesGzip assertAll( { assert process.success }, - { assert snapshot(lines[0..5]).match("test_cat_zipped_zipped_lines") }, - { assert snapshot(lines.size()).match("test_cat_zipped_zipped_size")} + { assert snapshot(process.out).match() } ) } } @@ -142,8 +141,7 @@ nextflow_process { def lines = path(process.out.file_out.get(0).get(1)).linesGzip assertAll( { assert process.success }, - { assert snapshot(lines[0..5]).match("test_cat_unzipped_zipped_lines") }, - { assert snapshot(lines.size()).match("test_cat_unzipped_zipped_size")} + { assert snapshot(process.out).match() } ) } } diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap index 423571ba..0c9bfe8d 100644 --- a/modules/nf-core/cat/cat/tests/main.nf.test.snap +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -1,10 +1,4 @@ { - "test_cat_unzipped_zipped_size": { - "content": [ - 375 - ], - "timestamp": "2023-10-16T14:33:08.049445686" - }, "test_cat_unzipped_unzipped": { "content": [ { @@ -67,31 +61,36 @@ ], "timestamp": "2023-10-16T14:32:49.642741302" }, - "test_cat_zipped_zipped_lines": { - "content": [ - [ - "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab", - "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1", - "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", - "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", - "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1", - "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1" - ] - ], - "timestamp": "2023-10-16T14:32:33.629048645" - }, - "test_cat_unzipped_zipped_lines": { + "test_cat_zipped_zipped": { "content": [ - [ - ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", - "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", - "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", - "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", - "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", - "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" - ] + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.gff3.gz:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.gff3.gz:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } ], - "timestamp": "2023-10-16T14:33:08.038830506" + "timestamp": "2024-01-12T14:02:02.999254641" }, "test_cat_one_file_unzipped_zipped_lines": { "content": [ @@ -106,16 +105,41 @@ ], "timestamp": "2023-10-16T14:33:21.39642399" }, - "test_cat_zipped_zipped_size": { + "test_cat_unzipped_zipped": { "content": [ - 78 + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt.gz:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt.gz:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } ], - "timestamp": "2023-10-16T14:32:33.641869244" + "timestamp": "2024-01-12T14:08:26.948048418" }, "test_cat_one_file_unzipped_zipped_size": { "content": [ 374 ], - "timestamp": "2023-10-16T14:33:21.4094373" + "timestamp": "2024-01-12T14:10:22.445700266" } -} \ No newline at end of file +} diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test b/modules/nf-core/cat/fastq/tests/main.nf.test index f5f94182..dab2e14c 100644 --- a/modules/nf-core/cat/fastq/tests/main.nf.test +++ b/modules/nf-core/cat/fastq/tests/main.nf.test @@ -16,11 +16,11 @@ nextflow_process { } process { """ - input[0] = [ - [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true) ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -28,8 +28,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match() }, - { assert path(process.out.versions.get(0)).getText().contains("cat") } + { assert snapshot(process.out).match() } ) } } @@ -42,13 +41,13 @@ nextflow_process { } process { """ - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -56,8 +55,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match() }, - { assert path(process.out.versions.get(0)).getText().contains("cat") } + { assert snapshot(process.out).match() } ) } } @@ -70,11 +68,11 @@ nextflow_process { } process { """ - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -82,8 +80,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match() }, - { assert path(process.out.versions.get(0)).getText().contains("cat") } + { assert snapshot(process.out).match() } ) } } @@ -96,13 +93,13 @@ nextflow_process { } process { """ - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -110,8 +107,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match() }, - { assert path(process.out.versions.get(0)).getText().contains("cat") } + { assert snapshot(process.out).match() } ) } } @@ -124,10 +120,10 @@ nextflow_process { } process { """ - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -135,8 +131,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match() }, - { assert path(process.out.versions.get(0)).getText().contains("cat") } + { assert snapshot(process.out).match() } ) } } diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test.snap b/modules/nf-core/cat/fastq/tests/main.nf.test.snap index ec2342e5..43dfe28f 100644 --- a/modules/nf-core/cat/fastq/tests/main.nf.test.snap +++ b/modules/nf-core/cat/fastq/tests/main.nf.test.snap @@ -1,78 +1,169 @@ { "test_cat_fastq_single_end": { "content": [ - [ - [ - { - "id": "test", - "single_end": true - }, - "test.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d" + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" ] - ] + } ], - "timestamp": "2023-10-17T23:19:12.990284837" + "timestamp": "2024-01-17T17:30:39.816981" }, "test_cat_fastq_single_end_same_name": { "content": [ - [ - [ - { - "id": "test", - "single_end": true - }, - "test.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66" + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" ] - ] + } ], - "timestamp": "2023-10-17T23:19:31.554568147" + "timestamp": "2024-01-17T17:32:35.229332" }, "test_cat_fastq_single_end_single_file": { "content": [ - [ - [ - { - "id": "test", - "single_end": true - }, - "test.merged.fastq.gz:md5,e325ef7deb4023447a1f074e285761af" + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" ] - ] + } ], - "timestamp": "2023-10-17T23:19:49.629360033" + "timestamp": "2024-01-17T17:34:00.058829" }, "test_cat_fastq_paired_end_same_name": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, + { + "0": [ [ - "test_1.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66", - "test_2.merged.fastq.gz:md5,fe9f266f43a6fc3dcab690a18419a56e" + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" ] - ] + } ], - "timestamp": "2023-10-17T23:19:40.711617539" + "timestamp": "2024-01-17T17:33:33.031555" }, "test_cat_fastq_paired_end": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ [ - "test_1.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d", - "test_2.merged.fastq.gz:md5,77c8e966e130d8c6b6ec9be52fcb2bda" + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" ] - ] + } ], - "timestamp": "2023-10-18T07:53:20.923560211" + "timestamp": "2024-01-17T17:32:02.270935" } } \ No newline at end of file diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test index 17dce8ac..dcf4fd6f 100644 --- a/modules/nf-core/fastp/tests/main.nf.test +++ b/modules/nf-core/fastp/tests/main.nf.test @@ -19,11 +19,10 @@ nextflow_process { save_trimmed_fail = false save_merged = false - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:true ], - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] - ] - + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -89,11 +88,10 @@ nextflow_process { save_trimmed_fail = false save_merged = false - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:true ], - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] - ] - + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -135,12 +133,11 @@ nextflow_process { save_trimmed_fail = false save_merged = false - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] - ] - + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -220,12 +217,11 @@ nextflow_process { save_trimmed_fail = false save_merged = false - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] - ] - + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -266,10 +262,10 @@ nextflow_process { save_trimmed_fail = false save_merged = false - input[0] = [ [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ] - ] - + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -336,10 +332,10 @@ nextflow_process { save_trimmed_fail = false save_merged = false - input[0] = [ [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ] - ] - + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -380,9 +376,10 @@ nextflow_process { save_trimmed_fail = true save_merged = false - input[0] = [ [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -440,13 +437,11 @@ nextflow_process { save_trimmed_fail = true save_merged = false - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) - ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -519,11 +514,11 @@ nextflow_process { adapter_fasta = [] save_trimmed_fail = false save_merged = true - - input[0] = [ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -613,10 +608,11 @@ nextflow_process { save_trimmed_fail = false save_merged = true - input[0] = [ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -653,14 +649,15 @@ nextflow_process { } process { """ - adapter_fasta = file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/fastp/adapters.fasta", checkIfExists: true) + adapter_fasta = Channel.of([ file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) ]) save_trimmed_fail = false save_merged = true - input[0] = [ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap index 1b7d2419..6a71b680 100644 --- a/modules/nf-core/fastp/tests/main.nf.test.snap +++ b/modules/nf-core/fastp/tests/main.nf.test.snap @@ -12,7 +12,7 @@ "{id=test, single_end=false}" ] ], - "timestamp": "2023-12-21T09:44:37.202512" + "timestamp": "2024-01-17T18:07:15.398827" }, "fastp test_fastp_interleaved_json": { "content": [ @@ -26,7 +26,7 @@ ] ] ], - "timestamp": "2023-10-17T11:04:45.794175881" + "timestamp": "2024-01-17T18:08:06.123035" }, "test_fastp_paired_end_merged-for_stub_match": { "content": [ @@ -42,7 +42,7 @@ "{id=test, single_end=false}" ] ], - "timestamp": "2023-12-21T09:53:45.237014" + "timestamp": "2024-01-17T18:10:13.467574" }, "test_fastp_single_end_json": { "content": [ @@ -56,7 +56,7 @@ ] ] ], - "timestamp": "2023-10-17T11:04:10.566343705" + "timestamp": "2024-01-17T18:06:00.223817" }, "versions": { "content": [ @@ -64,7 +64,7 @@ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" ] ], - "timestamp": "2023-10-17T11:04:10.582076024" + "timestamp": "2024-01-17T18:06:00.248422" }, "test_fastp_interleaved-for_stub_match": { "content": [ @@ -76,7 +76,7 @@ "{id=test, single_end=true}" ] ], - "timestamp": "2023-12-21T09:48:43.148485" + "timestamp": "2024-01-17T18:08:06.127974" }, "test_fastp_single_end-for_stub_match": { "content": [ @@ -88,7 +88,7 @@ "{id=test, single_end=true}" ] ], - "timestamp": "2023-12-21T09:20:07.254788" + "timestamp": "2024-01-17T18:06:00.244202" }, "test_fastp_single_end_trim_fail_json": { "content": [ @@ -102,6 +102,6 @@ ] ] ], - "timestamp": "2023-10-17T11:05:00.379878948" + "timestamp": "2024-01-17T18:08:41.942317" } } \ No newline at end of file From 8d19dbf47604d5cd2acd898586b23d093102766f Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 17 Jan 2024 20:43:07 +0000 Subject: [PATCH 089/117] Add updated modules.json --- modules.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules.json b/modules.json index f11503fc..9d68299f 100644 --- a/modules.json +++ b/modules.json @@ -91,7 +91,7 @@ }, "fastq_fastqc_umitools_fastp": { "branch": "master", - "git_sha": "668185ddcd2d9084c819691c99020360e0f029a0", + "git_sha": "d20574ce7420a54fe47b46a91c5f2215d1a2777c", "installed_by": ["subworkflows"] } } From 84da917bc464f02147a40515ebad4942880a4d2c Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 17 Jan 2024 20:53:29 +0000 Subject: [PATCH 090/117] Fixing the error :) --- subworkflows/local/umi_dedup.nf | 3 +-- workflows/smrnaseq.nf | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf index 3d12fd99..4a7f80f9 100644 --- a/subworkflows/local/umi_dedup.nf +++ b/subworkflows/local/umi_dedup.nf @@ -24,11 +24,10 @@ workflow DEDUPLICATE_UMIS { if (!bt_index){ INDEX_GENOME ( [ [:], fasta ] ) - bt_index = INDEX_GENOME.out.bowtie_indices + bt_index = INDEX_GENOME.out.index fasta_formatted = INDEX_GENOME.out.fasta ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) } else { - bt_index = Channel.fromPath("${bt_index}**ebwt", checkIfExists: true).ifEmpty { exit 1, "Bowtie1 index directory not found: ${bt_index}" } fasta_formatted = fasta } diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 872de5f3..02501098 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -166,7 +166,7 @@ workflow SMRNASEQ { DEDUPLICATE_UMIS ( fasta_ch, - INDEX_GENOME.out.bowtie_indices, + INDEX_GENOME.out.index, FASTQ_FASTQC_UMITOOLS_FASTP.out.reads, params.umi_stats ) From e95f075895e77325244073017c20b474bb547730 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Thu, 18 Jan 2024 07:58:51 +0000 Subject: [PATCH 091/117] [automated] Fix linting with Prettier --- .devcontainer/devcontainer.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4ecfbfe3..4a9bc5c7 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -18,11 +18,11 @@ "python.linting.flake8Path": "/opt/conda/bin/flake8", "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint" + "python.linting.pylintPath": "/opt/conda/bin/pylint", }, // Add the IDs of extensions you want installed when the container is created. - "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] - } - } + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"], + }, + }, } From c51f54d491fbcb192c270ebad97da5b5a0d8c5b1 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 18 Jan 2024 08:21:09 +0000 Subject: [PATCH 092/117] This should fix umitools extract --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 769f62f8..6e6820f5 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -153,7 +153,7 @@ if (!params.skip_fastp) { if (params.with_umi && !params.skip_umi_extract) { process { - withName: '.*:FASTQC_UMITOOLS_FASTP:UMITOOLS_EXTRACT' { + withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:UMITOOLS_EXTRACT' { ext.args = [ params.umitools_extract_method ? "--extract-method=${params.umitools_extract_method}" : '', params.umitools_bc_pattern ? "--bc-pattern='${params.umitools_bc_pattern}'" : '', From 3776e1834a37c5a3d32df3ebbceb5f89d1312bc4 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 18 Jan 2024 08:29:52 +0000 Subject: [PATCH 093/117] Improve more modules --- conf/modules.config | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 6e6820f5..23406e58 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -201,7 +201,7 @@ if (params.with_umi) { ] } - withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_SAMTOOLS:SAMTOOLS_SORT' { + withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { ext.prefix = { "${meta.id}.sorted" } publishDir = [ path: { "${params.outdir}/umi_dedup" }, @@ -213,7 +213,7 @@ if (params.with_umi) { ] } - withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_SAMTOOLS:SAMTOOLS_INDEX' { + withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { ext.prefix = { "${meta.id}.sorted" } publishDir = [ path: { "${params.outdir}/umi_dedup" }, @@ -225,7 +225,7 @@ if (params.with_umi) { ] } - withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_STATS_SAMTOOLS:.*' { publishDir = [ path: { "${params.outdir}/umi_dedup/samtools_stats" }, mode: params.publish_dir_mode, From a5daf6b74ffe4649403b6921e2c6aae88aec357d Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 18 Jan 2024 08:30:52 +0000 Subject: [PATCH 094/117] Less lines --- subworkflows/local/mirna_quant.nf | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/subworkflows/local/mirna_quant.nf b/subworkflows/local/mirna_quant.nf index e26839bc..fc6942fe 100644 --- a/subworkflows/local/mirna_quant.nf +++ b/subworkflows/local/mirna_quant.nf @@ -33,8 +33,6 @@ workflow MIRNA_QUANT { main: ch_versions = Channel.empty() - - PARSE_MATURE ( mature ).parsed_fasta.set { mirna_parsed } ch_versions = ch_versions.mix(PARSE_MATURE.out.versions) @@ -78,7 +76,6 @@ workflow MIRNA_QUANT { ch_versions = ch_versions.mix(BAM_STATS_HAIRPIN.out.versions) - BAM_STATS_MATURE.out.idxstats.collect{it[1]} .mix(BAM_STATS_HAIRPIN.out.idxstats.collect{it[1]}) .dump(tag:'edger') @@ -87,8 +84,6 @@ workflow MIRNA_QUANT { .set { edger_input } EDGER_QC ( edger_input ) - - reads .map { add_suffix(it, "seqcluster") } .dump (tag:'ssux') @@ -100,9 +95,6 @@ workflow MIRNA_QUANT { BOWTIE_MAP_SEQCLUSTER ( reads_collapsed, hairpin_bowtie.collect() ) ch_versions = ch_versions.mix(BOWTIE_MAP_SEQCLUSTER.out.versions) - - - ch_mirtop_logs = Channel.empty() if (params.mirtrace_species){ MIRTOP_QUANT ( BOWTIE_MAP_SEQCLUSTER.out.bam.collect{it[1]}, FORMAT_HAIRPIN.out.formatted_fasta.collect{it[1]}, gtf ) @@ -117,8 +109,6 @@ workflow MIRNA_QUANT { .dump (tag:'gsux') .set { reads_genome } - - emit: fasta_mature = FORMAT_MATURE.out.formatted_fasta fasta_hairpin = FORMAT_HAIRPIN.out.formatted_fasta From 4baf752461389e77f0bc7b9c036ebd40283596e2 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 18 Jan 2024 09:31:57 +0100 Subject: [PATCH 095/117] Apply suggestions from code review Co-authored-by: Maxime U Garcia --- nextflow.config | 8 ++++---- workflows/smrnaseq.nf | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/nextflow.config b/nextflow.config index 0a6d0945..4d7eb423 100644 --- a/nextflow.config +++ b/nextflow.config @@ -247,8 +247,6 @@ singularity.registry = 'quay.io' // Nextflow plugins plugins { - id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet - id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet } @@ -276,8 +274,10 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] // Set default registry for Docker and Podman independent of -profile // Will not be used unless Docker / Podman are enabled // Set to your registry if you have a mirror of containers -docker.registry = 'quay.io' -podman.registry = 'quay.io' +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' // Disable process selector warnings by default. Use debug profile to enable warnings. nextflow.enable.configProcessNamesValidation = false diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 02501098..4b81796c 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -64,15 +64,15 @@ if (!params.mirgenedb) { if (params.mirgenedb_gff) { mirna_gtf = file(params.mirgenedb_gff, checkIfExists: true) } else { exit 1, "MirGeneDB gff file not found: ${params.mirgenedb_gff}"} } -include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { INPUT_CHECK } from '../subworkflows/local/input_check' include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../subworkflows/nf-core/fastq_fastqc_umitools_fastp' -include { DEDUPLICATE_UMIS } from '../subworkflows/local/umi_dedup' -include { CONTAMINANT_FILTER } from '../subworkflows/local/contaminant_filter' -include { MIRNA_QUANT } from '../subworkflows/local/mirna_quant' -include { GENOME_QUANT } from '../subworkflows/local/genome_quant' -include { MIRTRACE } from '../subworkflows/local/mirtrace' -include { MIRDEEP2 } from '../subworkflows/local/mirdeep2' -include { INDEX_GENOME } from '../modules/local/bowtie_genome' +include { DEDUPLICATE_UMIS } from '../subworkflows/local/umi_dedup' +include { CONTAMINANT_FILTER } from '../subworkflows/local/contaminant_filter' +include { MIRNA_QUANT } from '../subworkflows/local/mirna_quant' +include { GENOME_QUANT } from '../subworkflows/local/genome_quant' +include { MIRTRACE } from '../subworkflows/local/mirtrace' +include { MIRDEEP2 } from '../subworkflows/local/mirdeep2' +include { INDEX_GENOME } from '../modules/local/bowtie_genome' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 533a0f35e6e78d41b496f12af4f6004263a3b20f Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Thu, 18 Jan 2024 10:16:06 +0100 Subject: [PATCH 096/117] Fix input cardinality --- subworkflows/local/umi_dedup.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf index 4a7f80f9..f8981758 100644 --- a/subworkflows/local/umi_dedup.nf +++ b/subworkflows/local/umi_dedup.nf @@ -38,8 +38,8 @@ workflow DEDUPLICATE_UMIS { BAM_SORT_STATS_SAMTOOLS ( UMI_MAP_GENOME.out.bam, Channel.empty() ) ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) - //ch_umi_dedup = BAM_SORT_STATS_SAMTOOLS.out.bam.join(BAM_SORT_STATS_SAMTOOLS.out.bai) - UMITOOLS_DEDUP ( BAM_SORT_STATS_SAMTOOLS.out.bam, val_get_dedup_stats) + ch_umi_dedup = BAM_SORT_STATS_SAMTOOLS.out.bam.join(BAM_SORT_STATS_SAMTOOLS.out.bai) + UMITOOLS_DEDUP ( ch_umi_dedup, val_get_dedup_stats) ch_versions = ch_versions.mix(UMITOOLS_DEDUP.out.versions) ch_dedup_stats = ch_dedup_stats.mix(UMITOOLS_DEDUP.out.tsv_edit_distance).join(UMITOOLS_DEDUP.out.tsv_per_umi).join(UMITOOLS_DEDUP.out.tsv_umi_per_position) From 60052b62d0bc07f6b31e1f68e0859c77713e88bf Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Thu, 18 Jan 2024 10:37:26 +0100 Subject: [PATCH 097/117] Compute index only once --- subworkflows/local/genome_quant.nf | 27 ++++++--------------- subworkflows/local/umi_dedup.nf | 11 --------- workflows/smrnaseq.nf | 39 ++++++++++++++++-------------- 3 files changed, 28 insertions(+), 49 deletions(-) diff --git a/subworkflows/local/genome_quant.nf b/subworkflows/local/genome_quant.nf index b310c76a..0f97f16b 100644 --- a/subworkflows/local/genome_quant.nf +++ b/subworkflows/local/genome_quant.nf @@ -8,32 +8,19 @@ include { BOWTIE_MAP_SEQ as BOWTIE_MAP_GENOME } from '../../modules/local/bowtie workflow GENOME_QUANT { take: - fasta - index + bowtie_index + fasta_formatted // fasta as generated by bowtie index step reads // channel: [ val(meta), [ reads ] ] main: ch_versions = Channel.empty() - if (!index){ - INDEX_GENOME ( [ [:], fasta ] ) - bowtie_index = INDEX_GENOME.out.index - fasta_formatted = INDEX_GENOME.out.fasta - ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) - } else { - bowtie_index = Channel.fromPath("${index}**ebwt", checkIfExists: true).ifEmpty { exit 1, "Bowtie1 index directory not found: ${index}" } - fasta_formatted = fasta - } + BOWTIE_MAP_GENOME ( reads, bowtie_index.collect() ) + ch_versions = ch_versions.mix(BOWTIE_MAP_GENOME.out.versions) - if (bowtie_index){ - BOWTIE_MAP_GENOME ( reads, bowtie_index.collect() ) - ch_versions = ch_versions.mix(BOWTIE_MAP_GENOME.out.versions) - fasta_formatted - .map { file -> tuple(file.baseName, file) } - .set { sort_input } - BAM_SORT_STATS_SAMTOOLS ( BOWTIE_MAP_GENOME.out.bam, sort_input ) - ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) - } + ch_fasta_formatted_for_sort = fasta_formatted .map { file -> tuple(file.baseName, file) } + BAM_SORT_STATS_SAMTOOLS ( BOWTIE_MAP_GENOME.out.bam, ch_fasta_formatted_for_sort ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) emit: fasta = fasta_formatted diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf index f8981758..2eda1b60 100644 --- a/subworkflows/local/umi_dedup.nf +++ b/subworkflows/local/umi_dedup.nf @@ -12,7 +12,6 @@ include { CAT_CAT } from '../../modules/nf-core/cat/ workflow DEDUPLICATE_UMIS { take: - fasta bt_index reads // channel: [ val(meta), [ reads ] ] val_get_dedup_stats //boolean true/false @@ -22,16 +21,6 @@ workflow DEDUPLICATE_UMIS { ch_versions = Channel.empty() ch_dedup_stats = Channel.empty() - if (!bt_index){ - INDEX_GENOME ( [ [:], fasta ] ) - bt_index = INDEX_GENOME.out.index - fasta_formatted = INDEX_GENOME.out.fasta - ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) - } else { - fasta_formatted = fasta - } - - UMI_MAP_GENOME ( reads, bt_index.collect() ) ch_versions = ch_versions.mix(UMI_MAP_GENOME.out.versions) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 4b81796c..56ab5a05 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -150,29 +150,32 @@ workflow SMRNASEQ { ) ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions) + ch_fasta = file(params.fasta) reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads + //Prepare bowtie index, unless specified + //This needs to be done here as the index is used by both UMI deduplication and GENOME_QUANT + if(params.bowtie_index) { + ch_bowtie_index = Channel.fromPath("${index}**ebwt", checkIfExists: true).ifEmpty { error "Bowtie1 index directory not found: ${index}" } + ch_fasta_formatted = ch_fasta + } else { + INDEX_GENOME ( [ [:], ch_fasta ] ) + ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) + ch_bowtie_index = INDEX_GENOME.out.index + ch_fasta_formatted = INDEX_GENOME.out.fasta + } + // // SUBWORKFLOW: Deduplicate UMIs by mapping them to the genome // if (params.with_umi){ - if (params.fasta){ - fasta_ch = file(params.fasta) - - //This needs to be done here as GENOME_QUANT should not run prior to the deduplication of UMIs. - INDEX_GENOME ( [ [:], fasta_ch ] ) - - ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) - - DEDUPLICATE_UMIS ( - fasta_ch, - INDEX_GENOME.out.index, - FASTQ_FASTQC_UMITOOLS_FASTP.out.reads, - params.umi_stats - ) - reads_for_mirna = DEDUPLICATE_UMIS.out.reads - ch_versions = ch_versions.mix(DEDUPLICATE_UMIS.out.versions) - } + DEDUPLICATE_UMIS ( + ch_bowtie_index, + FASTQ_FASTQC_UMITOOLS_FASTP.out.reads, + params.umi_stats + ) + reads_for_mirna = DEDUPLICATE_UMIS.out.reads + ch_versions = ch_versions.mix(DEDUPLICATE_UMIS.out.versions) } @@ -225,7 +228,7 @@ workflow SMRNASEQ { // genome_stats = Channel.empty() if (params.fasta){ - GENOME_QUANT ( file(params.fasta), params.bowtie_index, MIRNA_QUANT.out.unmapped ) + GENOME_QUANT ( ch_bowtie_index, ch_fasta_formatted, MIRNA_QUANT.out.unmapped ) genome_stats = GENOME_QUANT.out.stats ch_versions = ch_versions.mix(GENOME_QUANT.out.versions) From f04ef2e5ae0faf05cc87202c9cc2fe4df9a8dd6e Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Thu, 18 Jan 2024 10:47:32 +0100 Subject: [PATCH 098/117] Check that fasta is provided when UMIs are used --- workflows/smrnaseq.nf | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 56ab5a05..0fd20636 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -150,19 +150,22 @@ workflow SMRNASEQ { ) ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions) - ch_fasta = file(params.fasta) + if(params.with_umi && !params.fasta) { + error "Specifying a genome fasta is required for UMI deduplication" + } + ch_fasta = params.fasta ? file(params.fasta): [] reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads //Prepare bowtie index, unless specified //This needs to be done here as the index is used by both UMI deduplication and GENOME_QUANT if(params.bowtie_index) { ch_bowtie_index = Channel.fromPath("${index}**ebwt", checkIfExists: true).ifEmpty { error "Bowtie1 index directory not found: ${index}" } - ch_fasta_formatted = ch_fasta } else { INDEX_GENOME ( [ [:], ch_fasta ] ) ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) ch_bowtie_index = INDEX_GENOME.out.index - ch_fasta_formatted = INDEX_GENOME.out.fasta + // set to reformatted fasta as generated by `bowtie index` + ch_fasta = INDEX_GENOME.out.fasta } // @@ -228,7 +231,7 @@ workflow SMRNASEQ { // genome_stats = Channel.empty() if (params.fasta){ - GENOME_QUANT ( ch_bowtie_index, ch_fasta_formatted, MIRNA_QUANT.out.unmapped ) + GENOME_QUANT ( ch_bowtie_index, ch_fasta, MIRNA_QUANT.out.unmapped ) genome_stats = GENOME_QUANT.out.stats ch_versions = ch_versions.mix(GENOME_QUANT.out.versions) From 41bd0d06decdc62bb208e8ddffea48b29d33bb51 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Thu, 18 Jan 2024 10:50:19 +0100 Subject: [PATCH 099/117] Acually use deduplicated reads --- workflows/smrnaseq.nf | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 0fd20636..f740ab8d 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -154,7 +154,7 @@ workflow SMRNASEQ { error "Specifying a genome fasta is required for UMI deduplication" } ch_fasta = params.fasta ? file(params.fasta): [] - reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads + ch_reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads //Prepare bowtie index, unless specified //This needs to be done here as the index is used by both UMI deduplication and GENOME_QUANT @@ -174,10 +174,10 @@ workflow SMRNASEQ { if (params.with_umi){ DEDUPLICATE_UMIS ( ch_bowtie_index, - FASTQ_FASTQC_UMITOOLS_FASTP.out.reads, + ch_reads_for_mirna, params.umi_stats ) - reads_for_mirna = DEDUPLICATE_UMIS.out.reads + ch_reads_for_mirna = DEDUPLICATE_UMIS.out.reads ch_versions = ch_versions.mix(DEDUPLICATE_UMIS.out.versions) } @@ -186,7 +186,7 @@ workflow SMRNASEQ { // SUBWORKFLOW: mirtrace QC // FASTQ_FASTQC_UMITOOLS_FASTP.out.adapter_seq - .join( FASTQ_FASTQC_UMITOOLS_FASTP.out.reads ) + .join( ch_reads_for_mirna ) .map { meta, adapter_seq, reads -> [adapter_seq, meta.id, reads] } .groupTuple() .set { ch_mirtrace_inputs } @@ -199,7 +199,6 @@ workflow SMRNASEQ { // SUBWORKFLOW: remove contaminants from reads // contamination_stats = Channel.empty() - mirna_reads = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads if (params.filter_contamination){ CONTAMINANT_FILTER ( reference_hairpin, @@ -209,7 +208,7 @@ workflow SMRNASEQ { params.ncrna, params.pirna, params.other_contamination, - FASTQ_FASTQC_UMITOOLS_FASTP.out.reads + ch_reads_for_mirna ) contamination_stats = CONTAMINANT_FILTER.out.filter_stats @@ -240,7 +239,7 @@ workflow SMRNASEQ { if (!params.skip_mirdeep) { MIRDEEP2 ( - FASTQ_FASTQC_UMITOOLS_FASTP.out.reads, + ch_reads_for_mirna, GENOME_QUANT.out.fasta, GENOME_QUANT.out.index.collect(), hairpin_clean, From 6213f5f06245917975a13f0d07fd19aebba36bee Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Thu, 18 Jan 2024 11:01:49 +0100 Subject: [PATCH 100/117] Use contaminant-filtered reads downstream --- modules/local/mirtrace.nf | 2 +- workflows/smrnaseq.nf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/local/mirtrace.nf b/modules/local/mirtrace.nf index 95989293..500de058 100644 --- a/modules/local/mirtrace.nf +++ b/modules/local/mirtrace.nf @@ -44,7 +44,7 @@ process MIRTRACE_RUN { cat <<-END_VERSIONS > versions.yml "${task.process}": - mirtrace: \$(echo \$(mirtrace -v 2>&1)) + mirtrace: \$(echo \$(mirtrace -v)) END_VERSIONS """ diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index f740ab8d..29010308 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -213,7 +213,7 @@ workflow SMRNASEQ { contamination_stats = CONTAMINANT_FILTER.out.filter_stats ch_versions = ch_versions.mix(CONTAMINANT_FILTER.out.versions) - mirna_reads = CONTAMINANT_FILTER.out.filtered_reads + ch_reads_for_mirna = CONTAMINANT_FILTER.out.filtered_reads } @@ -221,7 +221,7 @@ workflow SMRNASEQ { [ [:], reference_mature], [ [:], reference_hairpin], mirna_gtf, - mirna_reads + ch_reads_for_mirna ) ch_versions = ch_versions.mix(MIRNA_QUANT.out.versions.ifEmpty(null)) From ed8ad08092229f322c0f1ddb4d6dc515bc5049fe Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Thu, 18 Jan 2024 11:19:13 +0100 Subject: [PATCH 101/117] Ensure fasta is optional --- workflows/smrnaseq.nf | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 29010308..7c1103d4 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -156,16 +156,20 @@ workflow SMRNASEQ { ch_fasta = params.fasta ? file(params.fasta): [] ch_reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads - //Prepare bowtie index, unless specified - //This needs to be done here as the index is used by both UMI deduplication and GENOME_QUANT - if(params.bowtie_index) { - ch_bowtie_index = Channel.fromPath("${index}**ebwt", checkIfExists: true).ifEmpty { error "Bowtie1 index directory not found: ${index}" } - } else { - INDEX_GENOME ( [ [:], ch_fasta ] ) - ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) - ch_bowtie_index = INDEX_GENOME.out.index - // set to reformatted fasta as generated by `bowtie index` - ch_fasta = INDEX_GENOME.out.fasta + // even if bowtie index is specified, there still needs to be a fasta. + // without fasta, no genome analysis. + if(params.fasta) { + //Prepare bowtie index, unless specified + //This needs to be done here as the index is used by both UMI deduplication and GENOME_QUANT + if(params.bowtie_index) { + ch_bowtie_index = Channel.fromPath("${index}**ebwt", checkIfExists: true).ifEmpty { error "Bowtie1 index directory not found: ${index}" } + } else { + INDEX_GENOME ( [ [:], ch_fasta ] ) + ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) + ch_bowtie_index = INDEX_GENOME.out.index + // set to reformatted fasta as generated by `bowtie index` + ch_fasta = INDEX_GENOME.out.fasta + } } // From ed99b7ea082557c8969117a373d2ba7eb2205502 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Thu, 18 Jan 2024 11:41:23 +0100 Subject: [PATCH 102/117] Attempt to disable umi_stats on CI --- conf/test_umi.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/test_umi.config b/conf/test_umi.config index 53a58f41..7d9e6e53 100644 --- a/conf/test_umi.config +++ b/conf/test_umi.config @@ -33,5 +33,6 @@ params { umitools_extract_method = 'regex' umitools_bc_pattern = '.+AACTGTAGGCACCATCAAT{s<=2}(?P.{12})(?P.*)' save_umi_intermeds = true + umi_stats = false // takes too much memory for CI } From 1c278fe0e755a102de7d2c299bed02925699a2d2 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Fri, 19 Jan 2024 09:32:03 +0100 Subject: [PATCH 103/117] increase limits for umitools dedup --- conf/modules.config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index 23406e58..b64c6691 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -199,6 +199,8 @@ if (params.with_umi) { ) ] ] + time = { check_max( 120.h , 'time' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } } withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { From 91e67f2520715b5e12c14e9fb213905b49df85c3 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 22 Jan 2024 12:28:34 +0000 Subject: [PATCH 104/117] Adding in possibiltiy to use different method for UMI grouping --- conf/modules.config | 2 +- nextflow.config | 1 + nextflow_schema.json | 7 +++++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index b64c6691..28790647 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -182,7 +182,7 @@ if (params.with_umi && !params.skip_umi_extract) { if (params.with_umi) { process { withName: '.*:DEDUPLICATE_UMIS:UMITOOLS_DEDUP' { - ext.args = { meta.single_end ? '' : '--unpaired-reads=discard --chimeric-pairs=discard' } + ext.args = { meta.single_end ? '--method $params.method' : '--method $params.method --unpaired-reads=discard --chimeric-pairs=discard' } ext.prefix = { "${meta.id}.umi_dedup.sorted" } publishDir = [ [ diff --git a/nextflow.config b/nextflow.config index 4d7eb423..82058ae6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -36,6 +36,7 @@ params { umitools_extract_method = 'string' umitools_bc_pattern = null umi_discard_read = null + umitools_method = 'directional' save_umi_intermeds = false umi_merge_unmapped = true umi_stats = true diff --git a/nextflow_schema.json b/nextflow_schema.json index cd8fa7a5..a5434a37 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -68,6 +68,13 @@ "description": "UMI pattern to use. Can be either 'string' (default) or 'regex'.", "help_text": "More details can be found in the [UMI-tools documentation](https://umi-tools.readthedocs.io/en/latest/reference/extract.html#extract-method).\n" }, + "umitools_method": { + "type": "string", + "default": "directional", + "description": "UMI grouping method", + "fa_icon": "fas fa-layer-group", + "help_text": "Available options are unique, percentile, cluster, adjacency or directional." + }, "skip_umi_extract": { "type": "boolean", "fa_icon": "fas fa-compress-alt", From cfac917f8f03e74001bc518aa38041d5d147928f Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Mon, 22 Jan 2024 13:41:22 +0100 Subject: [PATCH 105/117] Install umicollapse instead of umitools dedup --- modules.json | 82 ++++++++++++++----- .../dedup => umicollapse}/environment.yml | 4 +- modules/nf-core/umicollapse/main.nf | 50 +++++++++++ modules/nf-core/umicollapse/meta.yml | 51 ++++++++++++ modules/nf-core/umitools/dedup/main.nf | 62 -------------- modules/nf-core/umitools/dedup/meta.yml | 71 ---------------- 6 files changed, 165 insertions(+), 155 deletions(-) rename modules/nf-core/{umitools/dedup => umicollapse}/environment.yml (56%) create mode 100644 modules/nf-core/umicollapse/main.nf create mode 100644 modules/nf-core/umicollapse/meta.yml delete mode 100644 modules/nf-core/umitools/dedup/main.nf delete mode 100644 modules/nf-core/umitools/dedup/meta.yml diff --git a/modules.json b/modules.json index 9d68299f..53f34207 100644 --- a/modules.json +++ b/modules.json @@ -8,72 +8,107 @@ "cat/cat": { "branch": "master", "git_sha": "81f27e75847087865299cc46605deb3b09b4e0a2", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cat/fastq": { "branch": "master", "git_sha": "02fd5bd7275abad27aad32d5c852e0a9b1b98882", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastp": { "branch": "master", "git_sha": "1799e452de650f6fb8890d25829bca23014b0728", - "installed_by": ["fastq_fastqc_umitools_fastp", "modules"] + "installed_by": [ + "fastq_fastqc_umitools_fastp", + "modules" + ] }, "fastqc": { "branch": "master", "git_sha": "617777a807a1770f73deb38c80004bac06807eef", - "installed_by": ["fastq_fastqc_umitools_fastp"] + "installed_by": [ + "fastq_fastqc_umitools_fastp" + ] }, "multiqc": { "branch": "master", "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/bam2fq": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/flagstat": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": ["bam_stats_samtools", "modules"] + "installed_by": [ + "bam_stats_samtools", + "modules" + ] }, "samtools/idxstats": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": ["bam_stats_samtools", "modules"] + "installed_by": [ + "bam_stats_samtools", + "modules" + ] }, "samtools/index": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": ["bam_sort_stats_samtools", "modules"] + "installed_by": [ + "bam_sort_stats_samtools", + "modules" + ] }, "samtools/sort": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": ["bam_sort_stats_samtools", "modules"] + "installed_by": [ + "bam_sort_stats_samtools", + "modules" + ] }, "samtools/stats": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": ["bam_stats_samtools", "modules"] + "installed_by": [ + "bam_stats_samtools", + "modules" + ] }, - "umitools/dedup": { + "umicollapse": { "branch": "master", - "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", - "installed_by": ["modules"] + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": [ + "modules" + ] }, "umitools/extract": { "branch": "master", "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", - "installed_by": ["fastq_fastqc_umitools_fastp", "modules"] + "installed_by": [ + "fastq_fastqc_umitools_fastp", + "modules" + ] } } }, @@ -82,20 +117,27 @@ "bam_sort_stats_samtools": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "bam_stats_samtools": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": ["bam_sort_stats_samtools", "subworkflows"] + "installed_by": [ + "bam_sort_stats_samtools", + "subworkflows" + ] }, "fastq_fastqc_umitools_fastp": { "branch": "master", "git_sha": "d20574ce7420a54fe47b46a91c5f2215d1a2777c", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/umitools/dedup/environment.yml b/modules/nf-core/umicollapse/environment.yml similarity index 56% rename from modules/nf-core/umitools/dedup/environment.yml rename to modules/nf-core/umicollapse/environment.yml index f443735f..8dbc65dc 100644 --- a/modules/nf-core/umitools/dedup/environment.yml +++ b/modules/nf-core/umicollapse/environment.yml @@ -1,7 +1,7 @@ -name: umitools_dedup +name: umicollapse channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::umi_tools=1.1.4 + - bioconda::umicollapse=1.0.0 diff --git a/modules/nf-core/umicollapse/main.nf b/modules/nf-core/umicollapse/main.nf new file mode 100644 index 00000000..01ad2541 --- /dev/null +++ b/modules/nf-core/umicollapse/main.nf @@ -0,0 +1,50 @@ +process UMICOLLAPSE { + tag "$meta.id" + label "process_high_memory" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/umicollapse:1.0.0--hdfd78af_1' : + 'biocontainers/umicollapse:1.0.0--hdfd78af_1' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("*.log"), emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + + """ + umicollapse \\ + bam \\ + -i $bam \\ + -o ${prefix}.bam \\ + $args + + mv .command.log ${prefix}_UMICollapse.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umicollapse: $VERSION + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.dedup.bam + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umicollapse: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/umicollapse/meta.yml b/modules/nf-core/umicollapse/meta.yml new file mode 100644 index 00000000..528bc0ee --- /dev/null +++ b/modules/nf-core/umicollapse/meta.yml @@ -0,0 +1,51 @@ +--- +name: "umicollapse" +description: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read. +keywords: + - umicollapse + - deduplication + - genomics +tools: + - "umicollapse": + description: "UMICollapse contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs)." + homepage: "https://github.com/Daniel-Liu-c0deb0t/UMICollapse" + documentation: "https://github.com/Daniel-Liu-c0deb0t/UMICollapse" + tool_dev_url: "https://github.com/Daniel-Liu-c0deb0t/UMICollapse" + doi: "10.7717/peerj.8275" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: | + BAM file containing reads to be deduplicated via UMIs. + pattern: "*.{bam}" + - bai: + type: file + description: | + BAM index files corresponding to the input BAM file. + pattern: "*.{bai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file with deduplicated UMIs. + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@CharlotteAnne" + - "@chris-cheshire" +maintainers: + - "@CharlotteAnne" + - "@chris-cheshire" diff --git a/modules/nf-core/umitools/dedup/main.nf b/modules/nf-core/umitools/dedup/main.nf deleted file mode 100644 index 64ab8f98..00000000 --- a/modules/nf-core/umitools/dedup/main.nf +++ /dev/null @@ -1,62 +0,0 @@ -process UMITOOLS_DEDUP { - tag "$meta.id" - label "process_medium" - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' : - 'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }" - - input: - tuple val(meta), path(bam), path(bai) - val get_output_stats - - output: - tuple val(meta), path("${prefix}.bam") , emit: bam - tuple val(meta), path("*.log") , emit: log - tuple val(meta), path("*edit_distance.tsv"), optional:true, emit: tsv_edit_distance - tuple val(meta), path("*per_umi.tsv") , optional:true, emit: tsv_per_umi - tuple val(meta), path("*per_position.tsv") , optional:true, emit: tsv_umi_per_position - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - def paired = meta.single_end ? "" : "--paired" - stats = get_output_stats ? "--output-stats ${prefix}" : "" - if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" - - if (!(args ==~ /.*--random-seed.*/)) {args += " --random-seed=100"} - """ - PYTHONHASHSEED=0 umi_tools \\ - dedup \\ - -I $bam \\ - -S ${prefix}.bam \\ - -L ${prefix}.log \\ - $stats \\ - $paired \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' ) - END_VERSIONS - """ - - stub: - """ - touch ${prefix}.bam - touch ${prefix}.log - touch ${prefix}_edit_distance.tsv - touch ${prefix}_per_umi.tsv - touch ${prefix}_per_position.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/umitools/dedup/meta.yml b/modules/nf-core/umitools/dedup/meta.yml deleted file mode 100644 index 38d3fd46..00000000 --- a/modules/nf-core/umitools/dedup/meta.yml +++ /dev/null @@ -1,71 +0,0 @@ -name: umitools_dedup -description: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read. -keywords: - - umitools - - deduplication - - dedup -tools: - - umi_tools: - description: > - UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes - - documentation: https://umi-tools.readthedocs.io/en/latest/ - license: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: | - BAM file containing reads to be deduplicated via UMIs. - pattern: "*.{bam}" - - bai: - type: file - description: | - BAM index files corresponding to the input BAM file. - pattern: "*.{bai}" - - get_output_stats: - type: boolean - description: | - Whether or not to generate output stats. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM file with deduplicated UMIs. - pattern: "*.{bam}" - - log: - type: file - description: File with logging information - pattern: "*.{log}" - - tsv_edit_distance: - type: file - description: Reports the (binned) average edit distance between the UMIs at each position. - pattern: "*edit_distance.tsv" - - tsv_per_umi: - type: file - description: UMI-level summary statistics. - pattern: "*per_umi.tsv" - - tsv_umi_per_position: - type: file - description: Tabulates the counts for unique combinations of UMI and position. - pattern: "*per_position.tsv" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@grst" - - "@klkeys" -maintainers: - - "@drpatelh" - - "@grst" - - "@klkeys" From b9208eace2d4c052a9496084002057878cd504c2 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Mon, 22 Jan 2024 13:55:37 +0100 Subject: [PATCH 106/117] Switch to umicollapse --- conf/modules.config | 23 ++++++++--------------- nextflow.config | 3 +-- nextflow_schema.json | 9 ++------- subworkflows/local/umi_dedup.nf | 11 ++++------- 4 files changed, 15 insertions(+), 31 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 28790647..9fe19d49 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -181,23 +181,16 @@ if (params.with_umi && !params.skip_umi_extract) { if (params.with_umi) { process { - withName: '.*:DEDUPLICATE_UMIS:UMITOOLS_DEDUP' { - ext.args = { meta.single_end ? '--method $params.method' : '--method $params.method --unpaired-reads=discard --chimeric-pairs=discard' } + withName: '.*:DEDUPLICATE_UMIS:UMICOLLAPSE' { + ext.args = { meta.single_end ? '--algo ${params.umitools_method}' : '--method ${params.umitools_method} --paired --remove-unpaired --remove-chimeric' } ext.prefix = { "${meta.id}.umi_dedup.sorted" } publishDir = [ - [ - path: { "${params.outdir}/umi_dedup/umitools" }, - mode: params.publish_dir_mode, - pattern: '*.tsv' - ], - [ - path: { "${params.outdir}/umi_dedup" }, - mode: params.publish_dir_mode, - pattern: '*.bam', - enabled: ( - params.save_umi_intermeds - ) - ] + path: { "${params.outdir}/umi_dedup" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: ( + params.save_umi_intermeds + ) ] time = { check_max( 120.h , 'time' ) } memory = { check_max( 72.GB * task.attempt, 'memory' ) } diff --git a/nextflow.config b/nextflow.config index 82058ae6..0c3d8b45 100644 --- a/nextflow.config +++ b/nextflow.config @@ -36,10 +36,9 @@ params { umitools_extract_method = 'string' umitools_bc_pattern = null umi_discard_read = null - umitools_method = 'directional' + umitools_method = 'dir' save_umi_intermeds = false umi_merge_unmapped = true - umi_stats = true // Trimming options clip_r1 = null diff --git a/nextflow_schema.json b/nextflow_schema.json index a5434a37..c4d62d96 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -70,10 +70,10 @@ }, "umitools_method": { "type": "string", - "default": "directional", + "default": "dir", "description": "UMI grouping method", "fa_icon": "fas fa-layer-group", - "help_text": "Available options are unique, percentile, cluster, adjacency or directional." + "help_text": "Available options are dir, cc, adj" }, "skip_umi_extract": { "type": "boolean", @@ -101,11 +101,6 @@ "fa_icon": "fas fa-save", "description": "Unless deactivated the deduplicated reads are merged with the reads that could not be placed to reduce the potential reference bias", "default": true - }, - "umi_stats": { - "type": "boolean", - "default": true, - "description": "Compute UMI statistics for MultiQC" } }, "fa_icon": "fas fa-barcode" diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf index 2eda1b60..01fe678c 100644 --- a/subworkflows/local/umi_dedup.nf +++ b/subworkflows/local/umi_dedup.nf @@ -5,7 +5,7 @@ include { INDEX_GENOME } from '../../modules/local/bowtie_genome' include { BOWTIE_MAP_SEQ as UMI_MAP_GENOME } from '../../modules/local/bowtie_map_mirna' include { BAM_SORT_STATS_SAMTOOLS } from '../../subworkflows/nf-core/bam_sort_stats_samtools' -include { UMITOOLS_DEDUP } from '../../modules/nf-core/umitools/dedup/main' +include { UMICOLLAPSE } from '../../modules/nf-core/umicollapse/main' include { SAMTOOLS_BAM2FQ } from '../../modules/nf-core/samtools/bam2fq/main' include { CAT_CAT } from '../../modules/nf-core/cat/cat/main' @@ -19,7 +19,6 @@ workflow DEDUPLICATE_UMIS { main: ch_versions = Channel.empty() - ch_dedup_stats = Channel.empty() UMI_MAP_GENOME ( reads, bt_index.collect() ) ch_versions = ch_versions.mix(UMI_MAP_GENOME.out.versions) @@ -28,11 +27,10 @@ workflow DEDUPLICATE_UMIS { ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) ch_umi_dedup = BAM_SORT_STATS_SAMTOOLS.out.bam.join(BAM_SORT_STATS_SAMTOOLS.out.bai) - UMITOOLS_DEDUP ( ch_umi_dedup, val_get_dedup_stats) - ch_versions = ch_versions.mix(UMITOOLS_DEDUP.out.versions) - ch_dedup_stats = ch_dedup_stats.mix(UMITOOLS_DEDUP.out.tsv_edit_distance).join(UMITOOLS_DEDUP.out.tsv_per_umi).join(UMITOOLS_DEDUP.out.tsv_umi_per_position) + UMICOLLAPSE ( ch_umi_dedup, val_get_dedup_stats) + ch_versions = ch_versions.mix(UMICOLLAPSE.out.versions) - SAMTOOLS_BAM2FQ ( UMITOOLS_DEDUP.out.bam, false ) + SAMTOOLS_BAM2FQ ( UMICOLLAPSE.out.bam, false ) ch_versions = ch_versions.mix(SAMTOOLS_BAM2FQ.out.versions) ch_dedup_reads = SAMTOOLS_BAM2FQ.out.reads @@ -52,6 +50,5 @@ workflow DEDUPLICATE_UMIS { emit: reads = ch_dedup_reads indices = bt_index - stats = ch_dedup_stats versions = ch_versions } From 65f7f0e0cc812c9921129a0200ee457189fa75d2 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Mon, 22 Jan 2024 14:15:05 +0100 Subject: [PATCH 107/117] Switch to umicollapse --- conf/modules.config | 4 +--- conf/test_umi.config | 2 -- subworkflows/local/umi_dedup.nf | 3 +-- workflows/smrnaseq.nf | 1 - 4 files changed, 2 insertions(+), 8 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 9fe19d49..a5f7ea37 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -182,7 +182,7 @@ if (params.with_umi && !params.skip_umi_extract) { if (params.with_umi) { process { withName: '.*:DEDUPLICATE_UMIS:UMICOLLAPSE' { - ext.args = { meta.single_end ? '--algo ${params.umitools_method}' : '--method ${params.umitools_method} --paired --remove-unpaired --remove-chimeric' } + ext.args = { meta.single_end ? "--algo ${params.umitools_method}" : "--method ${params.umitools_method} --paired --remove-unpaired --remove-chimeric" } ext.prefix = { "${meta.id}.umi_dedup.sorted" } publishDir = [ path: { "${params.outdir}/umi_dedup" }, @@ -192,8 +192,6 @@ if (params.with_umi) { params.save_umi_intermeds ) ] - time = { check_max( 120.h , 'time' ) } - memory = { check_max( 72.GB * task.attempt, 'memory' ) } } withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { diff --git a/conf/test_umi.config b/conf/test_umi.config index 7d9e6e53..48f61181 100644 --- a/conf/test_umi.config +++ b/conf/test_umi.config @@ -33,6 +33,4 @@ params { umitools_extract_method = 'regex' umitools_bc_pattern = '.+AACTGTAGGCACCATCAAT{s<=2}(?P.{12})(?P.*)' save_umi_intermeds = true - umi_stats = false // takes too much memory for CI - } diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf index 01fe678c..5ef4b908 100644 --- a/subworkflows/local/umi_dedup.nf +++ b/subworkflows/local/umi_dedup.nf @@ -14,7 +14,6 @@ workflow DEDUPLICATE_UMIS { take: bt_index reads // channel: [ val(meta), [ reads ] ] - val_get_dedup_stats //boolean true/false main: @@ -27,7 +26,7 @@ workflow DEDUPLICATE_UMIS { ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) ch_umi_dedup = BAM_SORT_STATS_SAMTOOLS.out.bam.join(BAM_SORT_STATS_SAMTOOLS.out.bai) - UMICOLLAPSE ( ch_umi_dedup, val_get_dedup_stats) + UMICOLLAPSE(ch_umi_dedup) ch_versions = ch_versions.mix(UMICOLLAPSE.out.versions) SAMTOOLS_BAM2FQ ( UMICOLLAPSE.out.bam, false ) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 7c1103d4..58a4e8ad 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -179,7 +179,6 @@ workflow SMRNASEQ { DEDUPLICATE_UMIS ( ch_bowtie_index, ch_reads_for_mirna, - params.umi_stats ) ch_reads_for_mirna = DEDUPLICATE_UMIS.out.reads ch_versions = ch_versions.mix(DEDUPLICATE_UMIS.out.versions) From 82436515987080d4cf132cfbc1b31c7c2ade9d8f Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Mon, 22 Jan 2024 13:16:26 +0000 Subject: [PATCH 108/117] [automated] Fix linting with Prettier --- modules.json | 78 ++++++++++++---------------------------------------- 1 file changed, 18 insertions(+), 60 deletions(-) diff --git a/modules.json b/modules.json index 53f34207..03697a05 100644 --- a/modules.json +++ b/modules.json @@ -8,107 +8,72 @@ "cat/cat": { "branch": "master", "git_sha": "81f27e75847087865299cc46605deb3b09b4e0a2", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "cat/fastq": { "branch": "master", "git_sha": "02fd5bd7275abad27aad32d5c852e0a9b1b98882", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "fastp": { "branch": "master", "git_sha": "1799e452de650f6fb8890d25829bca23014b0728", - "installed_by": [ - "fastq_fastqc_umitools_fastp", - "modules" - ] + "installed_by": ["fastq_fastqc_umitools_fastp", "modules"] }, "fastqc": { "branch": "master", "git_sha": "617777a807a1770f73deb38c80004bac06807eef", - "installed_by": [ - "fastq_fastqc_umitools_fastp" - ] + "installed_by": ["fastq_fastqc_umitools_fastp"] }, "multiqc": { "branch": "master", "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/bam2fq": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/flagstat": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": [ - "bam_stats_samtools", - "modules" - ] + "installed_by": ["bam_stats_samtools", "modules"] }, "samtools/idxstats": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": [ - "bam_stats_samtools", - "modules" - ] + "installed_by": ["bam_stats_samtools", "modules"] }, "samtools/index": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": [ - "bam_sort_stats_samtools", - "modules" - ] + "installed_by": ["bam_sort_stats_samtools", "modules"] }, "samtools/sort": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": [ - "bam_sort_stats_samtools", - "modules" - ] + "installed_by": ["bam_sort_stats_samtools", "modules"] }, "samtools/stats": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": [ - "bam_stats_samtools", - "modules" - ] + "installed_by": ["bam_stats_samtools", "modules"] }, "umicollapse": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "umitools/extract": { "branch": "master", "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", - "installed_by": [ - "fastq_fastqc_umitools_fastp", - "modules" - ] + "installed_by": ["fastq_fastqc_umitools_fastp", "modules"] } } }, @@ -117,27 +82,20 @@ "bam_sort_stats_samtools": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "bam_stats_samtools": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": [ - "bam_sort_stats_samtools", - "subworkflows" - ] + "installed_by": ["bam_sort_stats_samtools", "subworkflows"] }, "fastq_fastqc_umitools_fastp": { "branch": "master", "git_sha": "d20574ce7420a54fe47b46a91c5f2215d1a2777c", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} From ea68c8e6864ecfffeee684915ecc93fca2cd184c Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Mon, 22 Jan 2024 15:03:13 +0100 Subject: [PATCH 109/117] Don't merge deduplicated fastqs with unmapped fastqs --- CHANGELOG.md | 3 ++- docs/output.md | 2 +- nextflow.config | 1 - nextflow_schema.json | 6 ------ subworkflows/local/umi_dedup.nf | 12 ------------ 5 files changed, 3 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fc286f02..10fc5720 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,10 +16,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | ------------- | --------------------------- | | | `--with_umi` | | | `--umitools_extract_method` | +| | `--umitools_method` | +| | `--skip_umi_extract` | | | `--umitools_bc_pattern` | | | `--umi_discard_read` | | | `--save_umi_intermeds` | -| | `--umi_merge_unmapped` | ### Software dependencies diff --git a/docs/output.md b/docs/output.md index fc9d14ef..537f17c5 100644 --- a/docs/output.md +++ b/docs/output.md @@ -83,7 +83,7 @@ FastP can automatically detect adapter sequences when not specified directly by - `samtools_stats/` - `*.{stats,flagstat,idxstats}:` Statistics on the mappings underlying the UMI deduplication. -[UMI-tools](https://github.com/CGATOxford/UMI-tools) deduplicates reads based on unique molecular identifiers (UMIs) to address PCR-bias. Firstly, the UMI-tools `extract` command removes the UMI barcode information from the read sequence and adds it to the read name as highlighted in the [UMI-tools extract](#umi-tools-extract) section. The reads are deduplicated based on an alignment against the full genome of the species. The deduplicated reads are then converted into fastq format and merged with the reads that remained unmapped in order to reduce potential reference bias. This behavior can be stopped by setting `--umi_merge_unmapped false`. The resulting fastq files are used in the remaining steps of the pipeline. +[UMI-tools](https://github.com/CGATOxford/UMI-tools) deduplicates reads based on unique molecular identifiers (UMIs) to address PCR-bias. Firstly, the UMI-tools `extract` command removes the UMI barcode information from the read sequence and adds it to the read name as highlighted in the [UMI-tools extract](#umi-tools-extract) section. The reads are deduplicated based on an alignment against the full genome of the species. The deduplicated reads are then converted into fastq format. The resulting fastq files are used in the remaining steps of the pipeline. ## Bowtie2 diff --git a/nextflow.config b/nextflow.config index 0c3d8b45..0ea71080 100644 --- a/nextflow.config +++ b/nextflow.config @@ -38,7 +38,6 @@ params { umi_discard_read = null umitools_method = 'dir' save_umi_intermeds = false - umi_merge_unmapped = true // Trimming options clip_r1 = null diff --git a/nextflow_schema.json b/nextflow_schema.json index c4d62d96..204376ab 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -95,12 +95,6 @@ "type": "boolean", "fa_icon": "fas fa-save", "description": "If this option is specified, intermediate FastQ and BAM files produced by UMI-tools are also saved in the results directory." - }, - "umi_merge_unmapped": { - "type": "boolean", - "fa_icon": "fas fa-save", - "description": "Unless deactivated the deduplicated reads are merged with the reads that could not be placed to reduce the potential reference bias", - "default": true } }, "fa_icon": "fas fa-barcode" diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf index 5ef4b908..9f65fa8e 100644 --- a/subworkflows/local/umi_dedup.nf +++ b/subworkflows/local/umi_dedup.nf @@ -34,18 +34,6 @@ workflow DEDUPLICATE_UMIS { ch_dedup_reads = SAMTOOLS_BAM2FQ.out.reads - if ( params.umi_merge_unmapped ) { - - SAMTOOLS_BAM2FQ.out.reads - .join(UMI_MAP_GENOME.out.unmapped) - .map { meta, file1, file2 -> [meta, [file1, file2]]} - .set { ch_cat } - - CAT_CAT ( ch_cat ) - ch_dedup_reads = CAT_CAT.out.file_out - } - - emit: reads = ch_dedup_reads indices = bt_index From d493ff7b51adb5184c80d0e89cfce2d5482aa0bb Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Mon, 22 Jan 2024 16:07:39 +0100 Subject: [PATCH 110/117] Try to set heap size --- modules/nf-core/umicollapse/main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/nf-core/umicollapse/main.nf b/modules/nf-core/umicollapse/main.nf index 01ad2541..593463f8 100644 --- a/modules/nf-core/umicollapse/main.nf +++ b/modules/nf-core/umicollapse/main.nf @@ -25,6 +25,7 @@ process UMICOLLAPSE { """ umicollapse \\ + -Xmx${task.memory.toMega() * 0.8}M \\ bam \\ -i $bam \\ -o ${prefix}.bam \\ From 6fe662941fa3edfa409d98f40e3041a6d22c6392 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Mon, 22 Jan 2024 16:13:20 +0100 Subject: [PATCH 111/117] cast heap size to int --- modules/nf-core/umicollapse/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/umicollapse/main.nf b/modules/nf-core/umicollapse/main.nf index 593463f8..314e0df1 100644 --- a/modules/nf-core/umicollapse/main.nf +++ b/modules/nf-core/umicollapse/main.nf @@ -25,7 +25,7 @@ process UMICOLLAPSE { """ umicollapse \\ - -Xmx${task.memory.toMega() * 0.8}M \\ + -Xmx${(task.memory.toMega() * 0.8).intValue()}M \\ bam \\ -i $bam \\ -o ${prefix}.bam \\ From 58c5b1a5c78c5618d6a7e1d5b4fe3f557c162c9c Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Tue, 23 Jan 2024 09:19:42 +0100 Subject: [PATCH 112/117] Increase java stack size --- modules/nf-core/umicollapse/main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/nf-core/umicollapse/main.nf b/modules/nf-core/umicollapse/main.nf index 314e0df1..fdecd6d9 100644 --- a/modules/nf-core/umicollapse/main.nf +++ b/modules/nf-core/umicollapse/main.nf @@ -26,6 +26,7 @@ process UMICOLLAPSE { """ umicollapse \\ -Xmx${(task.memory.toMega() * 0.8).intValue()}M \\ + -Xss99M \\ bam \\ -i $bam \\ -o ${prefix}.bam \\ From 53c6ffa861eb9c8894e3859d0fb1ddf673eb9fd2 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Tue, 23 Jan 2024 09:37:39 +0100 Subject: [PATCH 113/117] Set jvm options via JAVA_TOOL_OPTIONS --- modules/nf-core/umicollapse/main.nf | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/modules/nf-core/umicollapse/main.nf b/modules/nf-core/umicollapse/main.nf index fdecd6d9..90224df0 100644 --- a/modules/nf-core/umicollapse/main.nf +++ b/modules/nf-core/umicollapse/main.nf @@ -24,9 +24,8 @@ process UMICOLLAPSE { def VERSION = '1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ - umicollapse \\ - -Xmx${(task.memory.toMega() * 0.8).intValue()}M \\ - -Xss99M \\ + JAVA_TOOL_OPTIONS="-Xmx${(task.memory.toMega() * 0.8).intValue()}M -Xss99M" \\ + umicollapse \\ bam \\ -i $bam \\ -o ${prefix}.bam \\ From d5192d48a3491504b41fe1af51860d1d9773c632 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Tue, 23 Jan 2024 09:52:51 +0100 Subject: [PATCH 114/117] Full manual mode --- modules/nf-core/umicollapse/main.nf | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/modules/nf-core/umicollapse/main.nf b/modules/nf-core/umicollapse/main.nf index 90224df0..788f4d1e 100644 --- a/modules/nf-core/umicollapse/main.nf +++ b/modules/nf-core/umicollapse/main.nf @@ -21,15 +21,17 @@ process UMICOLLAPSE { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = '1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION = '1.0.0-1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ - JAVA_TOOL_OPTIONS="-Xmx${(task.memory.toMega() * 0.8).intValue()}M -Xss99M" \\ - umicollapse \\ - bam \\ - -i $bam \\ - -o ${prefix}.bam \\ - $args + java \\ + -Xmx${(task.memory.toMega() * 0.8).intValue()}M \\ + -Xss99M \\ + -jar /usr/local/share/umicollapse-${VERSION}/umicollapse.jar \\ + bam \\ + -i $bam \\ + -o ${prefix}.bam \\ + $args mv .command.log ${prefix}_UMICollapse.log From 3d8219ead8e53ee0c5ec8b1895defaaca119d1a8 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Tue, 23 Jan 2024 12:58:11 +0100 Subject: [PATCH 115/117] Use two-pass mode for umicollapse --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index a5f7ea37..127e0a34 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -182,7 +182,7 @@ if (params.with_umi && !params.skip_umi_extract) { if (params.with_umi) { process { withName: '.*:DEDUPLICATE_UMIS:UMICOLLAPSE' { - ext.args = { meta.single_end ? "--algo ${params.umitools_method}" : "--method ${params.umitools_method} --paired --remove-unpaired --remove-chimeric" } + ext.args = { meta.single_end ? "--algo ${params.umitools_method} --two-pass" : "--method ${params.umitools_method} --two-pass --paired --remove-unpaired --remove-chimeric" } ext.prefix = { "${meta.id}.umi_dedup.sorted" } publishDir = [ path: { "${params.outdir}/umi_dedup" }, From 4fb25232db7bed454daac683b0b3dcb77daa4d1f Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Tue, 23 Jan 2024 13:24:39 +0100 Subject: [PATCH 116/117] Increase stack size even further --- modules/nf-core/umicollapse/main.nf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/umicollapse/main.nf b/modules/nf-core/umicollapse/main.nf index 788f4d1e..0ae78118 100644 --- a/modules/nf-core/umicollapse/main.nf +++ b/modules/nf-core/umicollapse/main.nf @@ -23,10 +23,11 @@ process UMICOLLAPSE { def prefix = task.ext.prefix ?: "${meta.id}" def VERSION = '1.0.0-1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + // Things I tried... """ java \\ - -Xmx${(task.memory.toMega() * 0.8).intValue()}M \\ - -Xss99M \\ + -Xmx${(task.memory.toGiga() - 1).intValue()}G \\ + -Xss1G \\ -jar /usr/local/share/umicollapse-${VERSION}/umicollapse.jar \\ bam \\ -i $bam \\ From 2dd1e24ff2c1cc643e71b5e6a3cfdf3dbd47bd33 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 24 Jan 2024 15:24:55 +0000 Subject: [PATCH 117/117] Updated umicollapse to modules provided version :) --- modules.json | 2 +- modules/nf-core/umicollapse/main.nf | 31 ++-- modules/nf-core/umicollapse/meta.yml | 4 + .../nf-core/umicollapse/tests/main.nf.test | 153 ++++++++++++++++++ .../umicollapse/tests/main.nf.test.snap | 36 +++++ .../nf-core/umicollapse/tests/nextflow.config | 8 + .../umicollapse/tests/nextflow_PE.config | 10 ++ .../umicollapse/tests/nextflow_SE.config | 10 ++ modules/nf-core/umicollapse/tests/tags.yml | 2 + 9 files changed, 243 insertions(+), 13 deletions(-) create mode 100644 modules/nf-core/umicollapse/tests/main.nf.test create mode 100644 modules/nf-core/umicollapse/tests/main.nf.test.snap create mode 100644 modules/nf-core/umicollapse/tests/nextflow.config create mode 100644 modules/nf-core/umicollapse/tests/nextflow_PE.config create mode 100644 modules/nf-core/umicollapse/tests/nextflow_SE.config create mode 100644 modules/nf-core/umicollapse/tests/tags.yml diff --git a/modules.json b/modules.json index 03697a05..8236fcfd 100644 --- a/modules.json +++ b/modules.json @@ -67,7 +67,7 @@ }, "umicollapse": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "b71a681e9ec41cefd330e12b8566b5f5aff4941c", "installed_by": ["modules"] }, "umitools/extract": { diff --git a/modules/nf-core/umicollapse/main.nf b/modules/nf-core/umicollapse/main.nf index 0ae78118..291d3273 100644 --- a/modules/nf-core/umicollapse/main.nf +++ b/modules/nf-core/umicollapse/main.nf @@ -1,5 +1,6 @@ process UMICOLLAPSE { tag "$meta.id" + label "process_high" label "process_high_memory" conda "${moduleDir}/environment.yml" @@ -12,7 +13,7 @@ process UMICOLLAPSE { output: tuple val(meta), path("*.bam"), emit: bam - tuple val(meta), path("*.log"), emit: log + tuple val(meta), path("*_UMICollapse.log"), emit: log path "versions.yml" , emit: versions when: @@ -22,19 +23,25 @@ process UMICOLLAPSE { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def VERSION = '1.0.0-1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - - // Things I tried... + // Memory allocation: We need to make sure that both heap and stack size is sufficiently large for + // umicollapse. We set the stack size to 5% of the available memory, the heap size to 90% + // which leaves 5% for stuff happening outside of java without the scheduler killing the process. + def max_heap_size_mega = (task.memory.toMega() * 0.9).intValue() + def max_stack_size_mega = (task.memory.toMega() * 0.05).intValue() """ + # Getting the umicollapse jar file like this because `umicollapse` is a Python wrapper script generated + # by conda that allows to set the heap size (Xmx), but not the stack size (Xss). + # `which` allows us to get the directory that contains `umicollapse`, independent of whether we + # are in a container or conda environment. + UMICOLLAPSE_JAR=\$(dirname \$(which umicollapse))/../share/umicollapse-${VERSION}/umicollapse.jar java \\ - -Xmx${(task.memory.toGiga() - 1).intValue()}G \\ - -Xss1G \\ - -jar /usr/local/share/umicollapse-${VERSION}/umicollapse.jar \\ - bam \\ - -i $bam \\ - -o ${prefix}.bam \\ - $args - - mv .command.log ${prefix}_UMICollapse.log + -Xmx${max_heap_size_mega}M \\ + -Xss${max_stack_size_mega}M \\ + -jar \$UMICOLLAPSE_JAR \\ + bam \\ + -i $bam \\ + -o ${prefix}.bam \\ + $args | tee ${prefix}_UMICollapse.log cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/umicollapse/meta.yml b/modules/nf-core/umicollapse/meta.yml index 528bc0ee..c45d6932 100644 --- a/modules/nf-core/umicollapse/meta.yml +++ b/modules/nf-core/umicollapse/meta.yml @@ -39,6 +39,10 @@ output: type: file description: BAM file with deduplicated UMIs. pattern: "*.{bam}" + - log: + type: file + description: A log file with the deduplication statistics. + pattern: "*_{UMICollapse.log}" - versions: type: file description: File containing software versions diff --git a/modules/nf-core/umicollapse/tests/main.nf.test b/modules/nf-core/umicollapse/tests/main.nf.test new file mode 100644 index 00000000..1b8bf7e9 --- /dev/null +++ b/modules/nf-core/umicollapse/tests/main.nf.test @@ -0,0 +1,153 @@ +nextflow_process { + + name "Test Process UMICOLLAPSE" + script "../main.nf" + process "UMICOLLAPSE" + + tag "modules" + tag "modules_nfcore" + tag "umicollapse" + tag "umitools/extract" + tag "samtools/index" + tag "bwa/index" + tag "bwa/mem" + + test("umicollapse single end test") { + setup{ + run("UMITOOLS_EXTRACT"){ + script "../../umitools/extract/main.nf" + config "./nextflow_SE.config" + process{ + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + run("BWA_INDEX"){ + script "../../bwa/index/main.nf" + process{ + """ + input[0] = [ + [ id:'sarscov2'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + run("BWA_MEM"){ + script "../../bwa/mem/main.nf" + process{ + """ + input[0] = UMITOOLS_EXTRACT.out.reads + input[1] = BWA_INDEX.out.index + input[2] = true + """ + } + } + run("SAMTOOLS_INDEX"){ + script "../../samtools/index/main.nf" + process{ + """ + input[0] = BWA_MEM.out.bam + """ + } + } + } + + when { + config "./nextflow_SE.config" + process { + """ + input[0] = BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.versions).match() } + ) + } + + } + + test("umicollapse paired tests") { + setup{ + run("UMITOOLS_EXTRACT"){ + script "../../umitools/extract/main.nf" + config "./nextflow_PE.config" + process{ + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + run("BWA_INDEX"){ + script "../../bwa/index/main.nf" + process{ + """ + input[0] = [ + [ id:'sarscov2'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + run("BWA_MEM"){ + script "../../bwa/mem/main.nf" + process{ + """ + input[0] = UMITOOLS_EXTRACT.out.reads + input[1] = BWA_INDEX.out.index + input[2] = true + """ + } + } + run("SAMTOOLS_INDEX"){ + script "../../samtools/index/main.nf" + process{ + """ + input[0] = BWA_MEM.out.bam + """ + } + } + } + + when { + config "./nextflow_PE.config" + process { + """ + input[0] = BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.versions).match() } + ) + } + + } + +} diff --git a/modules/nf-core/umicollapse/tests/main.nf.test.snap b/modules/nf-core/umicollapse/tests/main.nf.test.snap new file mode 100644 index 00000000..60250530 --- /dev/null +++ b/modules/nf-core/umicollapse/tests/main.nf.test.snap @@ -0,0 +1,36 @@ +{ + "umicollapse single end test": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.dedup.bam:md5,4e86d60aa82242889ab5f9031418ab2e" + ] + ], + [ + "versions.yml:md5,c1e0275d81b1c97a9344d216f9154996" + ] + ], + "timestamp": "2024-01-24T13:57:02.801573999" + }, + "umicollapse paired tests": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.dedup.bam:md5,54be836ec246073e60212445b4369a91" + ] + ], + [ + "versions.yml:md5,c1e0275d81b1c97a9344d216f9154996" + ] + ], + "timestamp": "2024-01-24T13:57:24.797928099" + } +} \ No newline at end of file diff --git a/modules/nf-core/umicollapse/tests/nextflow.config b/modules/nf-core/umicollapse/tests/nextflow.config new file mode 100644 index 00000000..844edbdc --- /dev/null +++ b/modules/nf-core/umicollapse/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: UMITOOLS_EXTRACT { + ext.args = '--bc-pattern="NNNN"' + } + withName: UMICOLLAPSE { + ext.prefix = { "${meta.id}.dedup" } + } +} \ No newline at end of file diff --git a/modules/nf-core/umicollapse/tests/nextflow_PE.config b/modules/nf-core/umicollapse/tests/nextflow_PE.config new file mode 100644 index 00000000..ae4c9632 --- /dev/null +++ b/modules/nf-core/umicollapse/tests/nextflow_PE.config @@ -0,0 +1,10 @@ +process { + + withName: UMITOOLS_EXTRACT { + ext.args = '--bc-pattern="NNNN" --bc-pattern2="NNNN"' + } + + withName: UMICOLLAPSE { + ext.prefix = { "${meta.id}.dedup" } + } +} diff --git a/modules/nf-core/umicollapse/tests/nextflow_SE.config b/modules/nf-core/umicollapse/tests/nextflow_SE.config new file mode 100644 index 00000000..d4b94436 --- /dev/null +++ b/modules/nf-core/umicollapse/tests/nextflow_SE.config @@ -0,0 +1,10 @@ +process { + + withName: UMITOOLS_EXTRACT { + ext.args = '--bc-pattern="NNNN"' + } + + withName: UMICOLLAPSE { + ext.prefix = { "${meta.id}.dedup" } + } +} diff --git a/modules/nf-core/umicollapse/tests/tags.yml b/modules/nf-core/umicollapse/tests/tags.yml new file mode 100644 index 00000000..912879c4 --- /dev/null +++ b/modules/nf-core/umicollapse/tests/tags.yml @@ -0,0 +1,2 @@ +umicollapse: + - "modules/nf-core/umicollapse/**"
Process Name \\", + " \\ Software Version
CUSTOM_DUMPSOFTWAREVERSIONSpython3.11.7
yaml5.4.1
TOOL1tool10.11.9
TOOL2tool21.9
WorkflowNextflow
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls