From d783ef9deb98dbd4adfabb5ab0858ba60a658057 Mon Sep 17 00:00:00 2001
From: Christian Kubica <chriskub@CK-MBP14USIctx.local>
Date: Tue, 10 May 2022 14:35:54 +0200
Subject: [PATCH 001/117] REVERT CHANGES

Reverting changes to a non-linted version and added the umitools modules.
---
 modules.json                                  |  8 ++-
 .../nf-core/modules/umitools/dedup/main.nf    | 41 +++++++++++++
 .../nf-core/modules/umitools/dedup/meta.yml   | 59 +++++++++++++++++++
 .../nf-core/modules/umitools/extract/main.nf  | 55 +++++++++++++++++
 .../nf-core/modules/umitools/extract/meta.yml | 47 +++++++++++++++
 5 files changed, 209 insertions(+), 1 deletion(-)
 create mode 100644 modules/nf-core/modules/umitools/dedup/main.nf
 create mode 100644 modules/nf-core/modules/umitools/dedup/meta.yml
 create mode 100644 modules/nf-core/modules/umitools/extract/main.nf
 create mode 100644 modules/nf-core/modules/umitools/extract/meta.yml

diff --git a/modules.json b/modules.json
index 81208b16..071935f8 100644
--- a/modules.json
+++ b/modules.json
@@ -32,7 +32,13 @@
             },
             "trimgalore": {
                 "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
+            },
+            "umitools/dedup": {
+                "git_sha": "f425aa3cea10015fe9b345b9d6dcc2336b53155f"
+            },
+            "umitools/extract": {
+                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/modules/nf-core/modules/umitools/dedup/main.nf b/modules/nf-core/modules/umitools/dedup/main.nf
new file mode 100644
index 00000000..dfcbcf2f
--- /dev/null
+++ b/modules/nf-core/modules/umitools/dedup/main.nf
@@ -0,0 +1,41 @@
+process UMITOOLS_DEDUP {
+    tag "$meta.id"
+    label "process_medium"
+
+    conda (params.enable_conda ? "bioconda::umi_tools=1.1.2" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/umi_tools:1.1.2--py38h4a8c8d9_0' :
+        'quay.io/biocontainers/umi_tools:1.1.2--py38h4a8c8d9_0' }"
+
+    input:
+    tuple val(meta), path(bam), path(bai)
+
+    output:
+    tuple val(meta), path("*.bam")             , emit: bam
+    tuple val(meta), path("*edit_distance.tsv"), emit: tsv_edit_distance
+    tuple val(meta), path("*per_umi.tsv")      , emit: tsv_per_umi
+    tuple val(meta), path("*per_position.tsv") , emit: tsv_umi_per_position
+    path  "versions.yml"                       , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def paired = meta.single_end ? "" : "--paired"
+    """
+    umi_tools \\
+        dedup \\
+        -I $bam \\
+        -S ${prefix}.bam \\
+        --output-stats $prefix \\
+        $paired \\
+        $args
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/modules/umitools/dedup/meta.yml b/modules/nf-core/modules/umitools/dedup/meta.yml
new file mode 100644
index 00000000..eee8952f
--- /dev/null
+++ b/modules/nf-core/modules/umitools/dedup/meta.yml
@@ -0,0 +1,59 @@
+name: umitools_dedup
+description: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read.
+keywords:
+  - umitools
+  - deduplication
+tools:
+  - umi_tools:
+    description: >
+      UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs)
+      and single cell RNA-Seq cell barcodes
+    documentation: https://umi-tools.readthedocs.io/en/latest/
+    license: ["MIT"]
+input:
+  - meta:
+    type: map
+    description: |
+      Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+    type: file
+    description: |
+      BAM file containing reads to be deduplicated via UMIs.
+    pattern: "*.{bam}"
+  - bai:
+    type: file
+    description: |
+      BAM index files corresponding to the input BAM file.
+    pattern: "*.{bai}"
+output:
+  - meta:
+    type: map
+    description: |
+      Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+    type: file
+    description: BAM file with deduplicated UMIs.
+    pattern: "*.{bam}"
+  - tsv_edit_distance:
+    type: file
+    description: Reports the (binned) average edit distance between the UMIs at each position.
+    pattern: "*edit_distance.tsv"
+  - tsv_per_umi:
+    type: file
+    description: UMI-level summary statistics.
+    pattern: "*per_umi.tsv"
+  - tsv_umi_per_position:
+    type: file
+    description: Tabulates the counts for unique combinations of UMI and position.
+    pattern: "*per_position.tsv"
+  - versions:
+    type: file
+    description: File containing software versions
+    pattern: "versions.yml"
+
+authors:
+  - "@drpatelh"
+  - "@grst"
+  - "@klkeys"
diff --git a/modules/nf-core/modules/umitools/extract/main.nf b/modules/nf-core/modules/umitools/extract/main.nf
new file mode 100644
index 00000000..22a405b9
--- /dev/null
+++ b/modules/nf-core/modules/umitools/extract/main.nf
@@ -0,0 +1,55 @@
+process UMITOOLS_EXTRACT {
+    tag "$meta.id"
+    label "process_low"
+
+    conda (params.enable_conda ? "bioconda::umi_tools=1.1.2" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/umi_tools:1.1.2--py38h4a8c8d9_0' :
+        'quay.io/biocontainers/umi_tools:1.1.2--py38h4a8c8d9_0' }"
+
+    input:
+    tuple val(meta), path(reads)
+
+    output:
+    tuple val(meta), path("*.fastq.gz"), emit: reads
+    tuple val(meta), path("*.log")     , emit: log
+    path  "versions.yml"               , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    if (meta.single_end) {
+        """
+        umi_tools \\
+            extract \\
+            -I $reads \\
+            -S ${prefix}.umi_extract.fastq.gz \\
+            $args \\
+            > ${prefix}.umi_extract.log
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
+        END_VERSIONS
+        """
+    }  else {
+        """
+        umi_tools \\
+            extract \\
+            -I ${reads[0]} \\
+            --read2-in=${reads[1]} \\
+            -S ${prefix}.umi_extract_1.fastq.gz \\
+            --read2-out=${prefix}.umi_extract_2.fastq.gz \\
+            $args \\
+            > ${prefix}.umi_extract.log
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
+        END_VERSIONS
+        """
+    }
+}
diff --git a/modules/nf-core/modules/umitools/extract/meta.yml b/modules/nf-core/modules/umitools/extract/meta.yml
new file mode 100644
index 00000000..7fc23f72
--- /dev/null
+++ b/modules/nf-core/modules/umitools/extract/meta.yml
@@ -0,0 +1,47 @@
+name: umitools_extract
+description: Extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place
+keywords:
+  - umitools
+  - extract
+tools:
+  - umi_tools:
+    description: >
+      UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs)
+      and single cell RNA-Seq cell barcodes
+    documentation: https://umi-tools.readthedocs.io/en/latest/
+    license: ["MIT"]
+input:
+  - meta:
+    type: map
+    description: |
+      Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+    type: list
+    description: |
+      List of input FASTQ files whose UMIs will be extracted.
+output:
+  - meta:
+    type: map
+    description: |
+      Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+    type: file
+    description: >
+      Extracted FASTQ files. |
+      For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. |
+        For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz.
+    pattern: "*.{fastq.gz}"
+  - log:
+    type: file
+    description: Logfile for umi_tools
+    pattern: "*.{log}"
+  - versions:
+    type: file
+    description: File containing software versions
+    pattern: "versions.yml"
+
+authors:
+  - "@drpatelh"
+  - "@grst"

From 1043932a309d7ce573973225f16e62e4f7f82e3e Mon Sep 17 00:00:00 2001
From: Christian Kubica <chriskub@CK-MBP14USIctx.local>
Date: Tue, 10 May 2022 15:58:50 +0200
Subject: [PATCH 002/117] INCLUDE UMITOOLS WORKFLOW

Added the umitools workflow and integrated it into the smrnaseq workflow
---
 .../nf-core/fastqc_umitools_trimgalore.nf     | 78 +++++++++++++++++++
 workflows/smrnaseq.nf                         | 14 ++--
 2 files changed, 86 insertions(+), 6 deletions(-)
 create mode 100644 subworkflows/nf-core/fastqc_umitools_trimgalore.nf

diff --git a/subworkflows/nf-core/fastqc_umitools_trimgalore.nf b/subworkflows/nf-core/fastqc_umitools_trimgalore.nf
new file mode 100644
index 00000000..ca158e7a
--- /dev/null
+++ b/subworkflows/nf-core/fastqc_umitools_trimgalore.nf
@@ -0,0 +1,78 @@
+//
+// Read QC, UMI extraction and trimming
+//
+
+nextflow.enable.dsl=2
+
+include { FASTQC           } from '../../modules/nf-core/modules/fastqc/main'
+include { UMITOOLS_EXTRACT } from '../../modules/nf-core/modules/umitools/extract/main'
+include { TRIMGALORE       } from '../../modules/nf-core/modules/trimgalore/main'
+
+workflow FASTQC_UMITOOLS_TRIMGALORE {
+    take:
+    reads            // channel: [ val(meta), [ reads ] ]
+    skip_fastqc      // boolean: true/false
+    with_umi         // boolean: true/false
+    skip_trimming    // boolean: true/false
+    umi_discard_read // integer: 0, 1 or 2
+
+    main:
+
+    ch_versions = Channel.empty()
+    fastqc_html = Channel.empty()
+    fastqc_zip  = Channel.empty()
+    if (!skip_fastqc) {
+        FASTQC ( reads ).html.set { fastqc_html }
+        fastqc_zip  = FASTQC.out.zip
+        ch_versions = ch_versions.mix(FASTQC.out.versions.first())
+    }
+
+    umi_reads = reads
+    umi_log   = Channel.empty()
+    if (with_umi) {
+        UMITOOLS_EXTRACT ( reads ).reads.set { umi_reads }
+        umi_log     = UMITOOLS_EXTRACT.out.log
+        ch_versions = ch_versions.mix(UMITOOLS_EXTRACT.out.versions.first())
+
+        // Discard R1 / R2 if required
+        if (umi_discard_read in [1,2]) {
+            UMITOOLS_EXTRACT
+                .out
+                .reads
+                .map { meta, reads ->
+                    if (!meta.single_end) {
+                        meta['single_end'] = true
+                        reads = reads[umi_discard_read % 2]
+                    }
+                    return [ meta, reads ]
+                }
+                .set { umi_reads }
+        }
+    }
+
+    trim_reads = umi_reads
+    trim_html  = Channel.empty()
+    trim_zip   = Channel.empty()
+    trim_log   = Channel.empty()
+    if (!skip_trimming) {
+        TRIMGALORE ( umi_reads ).reads.set { trim_reads }
+        trim_html   = TRIMGALORE.out.html
+        trim_zip    = TRIMGALORE.out.zip
+        trim_log    = TRIMGALORE.out.log
+        ch_versions = ch_versions.mix(TRIMGALORE.out.versions.first())
+    }
+
+    emit:
+    reads = trim_reads // channel: [ val(meta), [ reads ] ]
+
+    fastqc_html        // channel: [ val(meta), [ html ] ]
+    fastqc_zip         // channel: [ val(meta), [ zip ] ]
+
+    umi_log            // channel: [ val(meta), [ log ] ]
+
+    trim_html          // channel: [ val(meta), [ html ] ]
+    trim_zip           // channel: [ val(meta), [ zip ] ]
+    trim_log           // channel: [ val(meta), [ txt ] ]
+
+    versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ]
+}
\ No newline at end of file
diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index ff8669bf..1613425c 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -55,7 +55,7 @@ if (params.mature) { reference_mature = file(params.mature, checkIfExists: true)
 if (params.hairpin) { reference_hairpin = file(params.hairpin, checkIfExists: true) } else { exit 1, "Hairpin miRNA fasta file not found: ${params.hairpin}" }
 
 include { INPUT_CHECK       } from '../subworkflows/local/input_check'
-include { FASTQC_TRIMGALORE } from '../subworkflows/nf-core/fastqc_trimgalore'
+include { FASTQC_UMITOOLS_TRIMGALORE } from '../subworkflows/nf-core/fastqc_umitools_trimgalore'
 include { MIRNA_QUANT       } from '../subworkflows/local/mirna_quant'
 include { GENOME_QUANT      } from '../subworkflows/local/genome_quant'
 include { MIRTRACE          } from '../subworkflows/local/mirtrace'
@@ -133,14 +133,16 @@ workflow SMRNASEQ {
     //
     // SUBWORKFLOW: Read QC, extract UMI and trim adapters
     //
-    FASTQC_TRIMGALORE (
+    FASTQC_UMITOOLS_TRIMGALORE (
         ch_cat_fastq,
+        params.with_umi,
+        params.umi_discard_read,
         params.skip_fastqc || params.skip_qc,
         params.skip_trimming
     )
-    ch_versions = ch_versions.mix(FASTQC_TRIMGALORE.out.versions)
+    ch_versions = ch_versions.mix(FASTQC_UMITOOLS_TRIMGALORE.out.versions)
 
-    reads_for_mirna = FASTQC_TRIMGALORE.out.reads
+    reads_for_mirna = FASTQC_UMITOOLS_TRIMGALORE.out.reads
     MIRNA_QUANT (
         reference_mature,
         reference_hairpin,
@@ -161,7 +163,7 @@ workflow SMRNASEQ {
         ch_versions = ch_versions.mix(GENOME_QUANT.out.versions)
 
         if (!params.skip_mirdeep) {
-            MIRDEEP2 (FASTQC_TRIMGALORE.out.reads, GENOME_QUANT.out.fasta, GENOME_QUANT.out.indices, MIRNA_QUANT.out.fasta_hairpin, MIRNA_QUANT.out.fasta_mature)
+            MIRDEEP2 (FASTQC_UMITOOLS_TRIMGALORE.out.reads, GENOME_QUANT.out.fasta, GENOME_QUANT.out.indices, MIRNA_QUANT.out.fasta_hairpin, MIRNA_QUANT.out.fasta_mature)
             ch_versions = ch_versions.mix(MIRDEEP2.out.versions)
         }
     }
@@ -186,7 +188,7 @@ workflow SMRNASEQ {
         ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([]))
         ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
 
-        ch_multiqc_files = ch_multiqc_files.mix(FASTQC_TRIMGALORE.out.fastqc_zip.collect{it[1]}.ifEmpty([]))
+        ch_multiqc_files = ch_multiqc_files.mix(FASTQC_UMITOOLS_TRIMGALORE.out.fastqc_zip.collect{it[1]}.ifEmpty([]))
         ch_multiqc_files = ch_multiqc_files.mix(MIRNA_QUANT.out.mature_stats.collect({it[1]}).ifEmpty([]))
         ch_multiqc_files = ch_multiqc_files.mix(MIRNA_QUANT.out.hairpin_stats.collect({it[1]}).ifEmpty([]))
         ch_multiqc_files = ch_multiqc_files.mix(genome_stats.collect({it[1]}).ifEmpty([]))

From 27fd482b7eba23b47b8526a1eb32bee489bc3d53 Mon Sep 17 00:00:00 2001
From: Christian Kubica <chriskub@CK-MBP14USIctx.local>
Date: Fri, 13 May 2022 13:03:20 +0200
Subject: [PATCH 003/117] ADD DOCUMENTATION

Add additional documentation to use UMI tools as part of the pipeline.
Most of the documentation has been copied from nf-core/rnaseq.
---
 CHANGELOG.md         | 16 +++++++++++-----
 README.md            | 17 +++++++++--------
 docs/output.md       | 16 ++++++++++++++++
 nextflow.config      |  8 ++++++++
 nextflow_schema.json | 45 ++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 89 insertions(+), 13 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e1b6875e..db1eec43 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,14 +21,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Other enhancements & fixes
 
 - [#134](https://github.com/nf-core/smrnaseq/issues/134) - Fixed colSum of zero issues for edgeR_miRBase.R script
+- [#49](https://github.com/nf-core/smrnaseq/issues/49) - Integrated the existing umitools modules into the pipeline.
 
 ### Parameters
 
-| Old parameter        | New parameter    |
-| -------------------- | ---------------- |
-| `--conda`            | `--enable_conda` |
-| `--clusterOptions`   |                  |
-| `--publish_dir_mode` |                  |
+| Old parameter        | New parameter               |
+| -------------------- | --------------------------- |
+| `--conda`            | `--enable_conda`            |
+| `--clusterOptions`   |                             |
+| `--publish_dir_mode` |                             |
+|                      | `--with_umi`                |
+|                      | `--umitools_extract_method` |
+|                      | `--umitools_bc_pattern`     |
+|                      | `--umi_discard_read`        |
+|                      | `--save_umi_intermeds`      |
 
 > **NB:** Parameter has been **updated** if both old and new parameter information is present.
 > **NB:** Parameter has been **added** if just the new parameter information is present.
diff --git a/README.md b/README.md
index 6d3efa52..e4294f99 100644
--- a/README.md
+++ b/README.md
@@ -31,27 +31,28 @@ On release, automated continuous integration tests run the pipeline on a full-si
 ## Pipeline summary
 
 1. Raw read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))
-2. Adapter trimming ([`Trim Galore!`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/))
+2. UMI barcode extraction ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools))
+3. Adapter trimming ([`Trim Galore!`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/))
    1. Insert Size calculation
    2. Collapse reads ([`seqcluster`](https://seqcluster.readthedocs.io/mirna_annotation.html#processing-of-reads))
-3. Alignment against miRBase mature miRNA ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml))
-4. Alignment against miRBase hairpin
+4. Alignment against miRBase mature miRNA ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml))
+5. Alignment against miRBase hairpin
    1. Unaligned reads from step 3 ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml))
    2. Collapsed reads from step 2.2 ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml))
-5. Post-alignment processing of miRBase hairpin
+6. Post-alignment processing of miRBase hairpin
    1. Basic statistics from step 3 and step 4.1 ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/))
    2. Analysis on miRBase hairpin counts ([`edgeR`](https://bioconductor.org/packages/release/bioc/html/edgeR.html))
       - TMM normalization and a table of top expression hairpin
       - MDS plot clustering samples
       - Heatmap of sample similarities
    3. miRNA and isomiR annotation from step 4.1 ([`mirtop`](https://github.com/miRTop/mirtop))
-6. Alignment against host reference genome ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml))
+7. Alignment against host reference genome ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml))
    1. Post-alignment processing of alignment against host reference genome ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/))
-7. Novel miRNAs and known miRNAs discovery ([`MiRDeep2`](https://www.mdc-berlin.de/content/mirdeep2-documentation))
+8. Novel miRNAs and known miRNAs discovery ([`MiRDeep2`](https://www.mdc-berlin.de/content/mirdeep2-documentation))
    1. Mapping against reference genome with the mapper module
    2. Known and novel miRNA discovery with the mirdeep2 module
-8. miRNA quality control ([`mirtrace`](https://github.com/friedlanderlab/mirtrace))
-9. Present QC for raw read, alignment, and expression results ([`MultiQC`](http://multiqc.info/))
+9. miRNA quality control ([`mirtrace`](https://github.com/friedlanderlab/mirtrace))
+10. Present QC for raw read, alignment, and expression results ([`MultiQC`](http://multiqc.info/))
 
 ## Quick Start
 
diff --git a/docs/output.md b/docs/output.md
index ce1f8347..79f1d6d9 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -13,6 +13,7 @@ The directories listed below will be created in the results directory after the
 The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps:
 
 - [FastQC](#fastqc) - read quality control
+- [UMI-tools extract](#umi-tools-extract) - UMI barcode extraction
 - [TrimGalore](#trimgalore) - adapter trimming
 - [Bowtie](#bowtie) - alignment against mature miRNAs and miRNA precursors (hairpins)
 - [SAMtools](#samtools) - alignment result processing and feature counting
@@ -39,6 +40,21 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
 
 ![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png)
 
+## UMI-tools extract
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `umitools/`
+  - `*.fastq.gz`: If `--save_umi_intermeds` is specified, FastQ files **after** UMI extraction will be placed in this directory.
+  - `*.log`: Log file generated by the UMI-tools `extract` command.
+
+</details>
+
+[UMI-tools](https://github.com/CGATOxford/UMI-tools) deduplicates reads based on unique molecular identifiers (UMIs) to address PCR-bias. Firstly, the UMI-tools `extract` command removes the UMI barcode information from the read sequence and adds it to the read name. Secondly, reads are deduplicated based on UMI identifier after mapping as highlighted in the [UMI-tools dedup](#umi-tools-dedup) section.
+
+To facilitate processing of input data which has the UMI barcode already embedded in the read name from the start, `--skip_umi_extract` can be specified in conjunction with `--with_umi`.
+
 ## TrimGalore
 
 [TrimGalore](http://www.bioinformatics.babraham.ac.uk/projects/trim_galore/) is used for removal of adapter contamination and trimming of low quality regions. TrimGalore uses [Cutadapt](https://github.com/marcelm/cutadapt) for adapter trimming and runs FastQC after it finishes.
diff --git a/nextflow.config b/nextflow.config
index c5fa807d..4d724120 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -27,6 +27,14 @@ params {
     mature                     = "https://mirbase.org/ftp/CURRENT/mature.fa.gz"
     hairpin                    = "https://mirbase.org/ftp/CURRENT/hairpin.fa.gz"
 
+    // UMI handling
+    with_umi                   = false
+    skip_umi_extract           = false
+    umitools_extract_method    = 'string'
+    umitools_bc_pattern        = null
+    umi_discard_read           = null
+    save_umi_intermeds         = false
+
     // Trimming options
     clip_r1                    = 0
     three_prime_clip_r1        = 0
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 027f1b37..5019408f 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -50,6 +50,48 @@
                 }
             }
         },
+        "umi_options": {
+            "title": "UMI options",
+            "type": "object",
+            "description": "Options for processing reads with unique molecular identifiers",
+            "default": "",
+            "properties": {
+                "with_umi": {
+                    "type": "boolean",
+                    "fa_icon": "fas fa-barcode",
+                    "description": "Enable UMI-based read deduplication."
+                },
+                "umitools_extract_method": {
+                    "type": "string",
+                    "default": "string",
+                    "fa_icon": "fas fa-barcode",
+                    "description": "UMI pattern to use. Can be either 'string' (default) or 'regex'.",
+                    "help_text": "More details can be found in the [UMI-tools documentation](https://umi-tools.readthedocs.io/en/latest/reference/extract.html#extract-method).\n"
+                },
+                "skip_umi_extract": {
+                    "type": "boolean",
+                    "fa_icon": "fas fa-compress-alt",
+                    "description": "Skip the UMI extraction from the read in case the UMIs have been moved to the headers in advance of the pipeline run."
+                },
+                "umitools_bc_pattern": {
+                    "type": "string",
+                    "fa_icon": "fas fa-barcode",
+                    "help_text": "More details can be found in the [UMI-tools documentation](https://umi-tools.readthedocs.io/en/latest/reference/extract.html#extract-method).",
+                    "description": "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the first 6 nucleotides of the read are from the UMI."
+                },
+                "umi_discard_read": {
+                    "type": "integer",
+                    "fa_icon": "fas fa-barcode",
+                    "description": "After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively."
+                },
+                "save_umi_intermeds": {
+                    "type": "boolean",
+                    "fa_icon": "fas fa-save",
+                    "description": "If this option is specified, intermediate FastQ and BAM files produced by UMI-tools are also saved in the results directory."
+                }
+            },
+            "fa_icon": "fas fa-barcode"
+        },
         "reference_genome_options": {
             "title": "Reference genome options",
             "type": "object",
@@ -379,6 +421,9 @@
         {
             "$ref": "#/definitions/input_output_options"
         },
+        {
+            "$ref": "#/definitions/umi_options"
+        },
         {
             "$ref": "#/definitions/reference_genome_options"
         },

From ee673b095f667b119bf1102e370b253a37ce8d4a Mon Sep 17 00:00:00 2001
From: Christian Kubica <chriskub@CK-MBP14USIctx.local>
Date: Mon, 13 Jun 2022 13:22:37 +0200
Subject: [PATCH 004/117] ADD SAMTOOLS BAM2FQ MODULE

The bam2fq module is neccessary to convert the deduplicated bam
files back into a fastq format to be fed into the existing
pipeline.
---
 modules.json                                  |  5 +-
 .../nf-core/modules/samtools/bam2fq/main.nf   | 56 +++++++++++++++++++
 .../nf-core/modules/samtools/bam2fq/meta.yml  | 55 ++++++++++++++++++
 3 files changed, 115 insertions(+), 1 deletion(-)
 create mode 100644 modules/nf-core/modules/samtools/bam2fq/main.nf
 create mode 100644 modules/nf-core/modules/samtools/bam2fq/meta.yml

diff --git a/modules.json b/modules.json
index 071935f8..c328879e 100644
--- a/modules.json
+++ b/modules.json
@@ -15,6 +15,9 @@
             "multiqc": {
                 "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe"
             },
+            "samtools/bam2fq": {
+                "git_sha": "5510ea39fe638594bc26ac34cadf4a84bf27d159"
+            },
             "samtools/flagstat": {
                 "git_sha": "1ad73f1b2abdea9398680d6d20014838135c9a35"
             },
@@ -41,4 +44,4 @@
             }
         }
     }
-}
\ No newline at end of file
+}
diff --git a/modules/nf-core/modules/samtools/bam2fq/main.nf b/modules/nf-core/modules/samtools/bam2fq/main.nf
new file mode 100644
index 00000000..9301d1d3
--- /dev/null
+++ b/modules/nf-core/modules/samtools/bam2fq/main.nf
@@ -0,0 +1,56 @@
+process SAMTOOLS_BAM2FQ {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' :
+        'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }"
+
+    input:
+    tuple val(meta), path(inputbam)
+    val split
+
+    output:
+    tuple val(meta), path("*.fq.gz"), emit: reads
+    path "versions.yml"             , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    if (split){
+        """
+        samtools \\
+            bam2fq \\
+            $args \\
+            -@ $task.cpus \\
+            -1 ${prefix}_1.fq.gz \\
+            -2 ${prefix}_2.fq.gz \\
+            -0 ${prefix}_other.fq.gz \\
+            -s ${prefix}_singleton.fq.gz \\
+            $inputbam
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+        END_VERSIONS
+        """
+    } else {
+        """
+        samtools \\
+            bam2fq \\
+            $args \\
+            -@ $task.cpus \\
+            $inputbam | gzip --no-name > ${prefix}_interleaved.fq.gz
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+        END_VERSIONS
+        """
+    }
+}
diff --git a/modules/nf-core/modules/samtools/bam2fq/meta.yml b/modules/nf-core/modules/samtools/bam2fq/meta.yml
new file mode 100644
index 00000000..319a60cf
--- /dev/null
+++ b/modules/nf-core/modules/samtools/bam2fq/meta.yml
@@ -0,0 +1,55 @@
+name: samtools_bam2fq
+description: |
+  The module uses bam2fq method from samtools to
+  convert a SAM, BAM or CRAM file to FASTQ format
+keywords:
+  - bam2fq
+  - samtools
+  - fastq
+tools:
+  - samtools:
+      description: Tools for dealing with SAM, BAM and CRAM files
+      homepage: None
+      documentation: http://www.htslib.org/doc/1.1/samtools.html
+      tool_dev_url: None
+      doi: ""
+      licence: ["MIT"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - inputbam:
+      type: file
+      description: BAM/CRAM/SAM file
+      pattern: "*.{bam,cram,sam}"
+  - split:
+      type: boolean
+      description: |
+        TRUE/FALSE value to indicate if reads should be separated into
+        /1, /2 and if present other, or singleton.
+        Note: choosing TRUE will generate 4 different files.
+        Choosing FALSE will produce a single file, which will be interleaved in case
+        the input contains paired reads.
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - reads:
+      type: file
+      description: |
+        FASTQ files, which will be either a group of 4 files (read_1, read_2, other and singleton)
+        or a single interleaved .fq.gz file if the user chooses not to split the reads.
+      pattern: "*.fq.gz"
+
+authors:
+  - "@lescai"

From 0bc65e456596ba8dca61b62616b351dc04c1af97 Mon Sep 17 00:00:00 2001
From: Christian Kubica <chriskub@CK-MBP14USIctx.local>
Date: Tue, 14 Jun 2022 14:13:23 +0200
Subject: [PATCH 005/117] ADD UMITOOLS EXTRACT ARGS

Added the umitools extract modules.config lines from nf-core/rnaseq
to this pipeline.
---
 conf/modules.config | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index c802ccf3..92fadab2 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -38,6 +38,30 @@ process {
     }
 }
 
+if (params.with_umi && !params.skip_umi_extract) {
+    process {
+        withName: '.*:FASTQC_UMITOOLS_TRIMGALORE:UMITOOLS_EXTRACT' {
+            ext.args   = [
+                    params.umitools_extract_method ? "--extract-method=${params.umitools_extract_method}" : '',
+                    params.umitools_bc_pattern     ? "--bc-pattern='${params.umitools_bc_pattern}'" : '',
+                ].join(' ').trim()
+            publishDir = [
+                [
+                    path: { "${params.outdir}/umitools" },
+                    mode: params.publish_dir_mode,
+                    pattern: "*.log"
+                ],
+                [
+                    path: { "${params.outdir}/umitools" },
+                    mode: params.publish_dir_mode,
+                    pattern: "*.fastq.gz",
+                    enabled: params.save_umi_intermeds
+                ]
+            ]
+        }
+    }
+}
+
 //
 // Genome preparation options
 //
@@ -89,7 +113,7 @@ process {
 
 if (!(params.skip_fastqc || params.skip_qc)) {
     process {
-        withName: '.*:FASTQC_TRIMGALORE:FASTQC' {
+        withName: '.*:FASTQC_UMITOOLS_TRIMGALORE:FASTQC' {
             ext.args = '--quiet'
         }
     }
@@ -97,7 +121,7 @@ if (!(params.skip_fastqc || params.skip_qc)) {
 
 if (!params.skip_trimming) {
     process {
-        withName: '.*:FASTQC_TRIMGALORE:TRIMGALORE' {
+        withName: '.*:FASTQC_UMITOOLS_TRIMGALORE:TRIMGALORE' {
             ext.args   = '--fastqc'
             publishDir = [
                 [

From 8d14f90a58b51460dfb41a47a7487ecbc272d40e Mon Sep 17 00:00:00 2001
From: Christian Kubica <chriskub@CK-MBP14USIctx.local>
Date: Wed, 15 Jun 2022 09:41:18 +0200
Subject: [PATCH 006/117] UPDATE MODULES.CONFIG

Added configurations for umi deduplication.
---
 conf/modules.config | 97 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 73 insertions(+), 24 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 92fadab2..c1a74e21 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -38,30 +38,6 @@ process {
     }
 }
 
-if (params.with_umi && !params.skip_umi_extract) {
-    process {
-        withName: '.*:FASTQC_UMITOOLS_TRIMGALORE:UMITOOLS_EXTRACT' {
-            ext.args   = [
-                    params.umitools_extract_method ? "--extract-method=${params.umitools_extract_method}" : '',
-                    params.umitools_bc_pattern     ? "--bc-pattern='${params.umitools_bc_pattern}'" : '',
-                ].join(' ').trim()
-            publishDir = [
-                [
-                    path: { "${params.outdir}/umitools" },
-                    mode: params.publish_dir_mode,
-                    pattern: "*.log"
-                ],
-                [
-                    path: { "${params.outdir}/umitools" },
-                    mode: params.publish_dir_mode,
-                    pattern: "*.fastq.gz",
-                    enabled: params.save_umi_intermeds
-                ]
-            ]
-        }
-    }
-}
-
 //
 // Genome preparation options
 //
@@ -101,6 +77,7 @@ process {
 //
 // Read QC and trimming options
 //
+
 process {
     withName: 'MIRTRACE_RUN' {
         publishDir = [
@@ -145,6 +122,78 @@ if (!params.skip_trimming) {
     }
 }
 
+if (params.with_umi && !params.skip_umi_extract) {
+    process {
+        withName: '.*:FASTQC_UMITOOLS_TRIMGALORE:UMITOOLS_EXTRACT' {
+            ext.args   = [
+                    params.umitools_extract_method ? "--extract-method=${params.umitools_extract_method}" : '',
+                    params.umitools_bc_pattern     ? "--bc-pattern='${params.umitools_bc_pattern}'" : '',
+                ].join(' ').trim()
+            publishDir = [
+                [
+                    path: { "${params.outdir}/umitools" },
+                    mode: params.publish_dir_mode,
+                    pattern: "*.log"
+                ],
+                [
+                    path: { "${params.outdir}/umitools" },
+                    mode: params.publish_dir_mode,
+                    pattern: "*.fastq.gz",
+                    enabled: params.save_umi_intermeds
+                ]
+            ]
+        }
+    }
+}
+
+//
+// UMI tools deduplication
+//
+
+if (params.with_umi) {
+    process {
+        withName: '.*:DEDUPLICATE_UMIS:UMITOOLS_DEDUP' {
+            ext.args = { meta.single_end ? '' : '--unpaired-reads=discard --chimeric-pairs=discard' }
+            ext.prefix = { "${meta.id}.umi_dedup.sorted" }
+            publishDir = [
+                [
+                    path: { "${params.outdir}/umi_dedup/umitools" },
+                    mode: params.publish_dir_mode,
+                    pattern: '*.tsv'
+                ],
+                [
+                    path: { "${params.outdir}/umi_dedup" },
+                    mode: params.publish_dir_mode,
+                    pattern: '*.bam',
+                    enabled: (
+                        params.save_umi_intermeds
+                    )
+                ]
+            ]
+        }
+
+        withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_SAMTOOLS:SAMTOOLS_INDEX' {
+            ext.prefix = { "${meta.id}.umi_dedup.sorted" }
+            publishDir = [
+                path: { "${params.outdir}/umi_dedup" },
+                mode: params.publish_dir_mode,
+                pattern: '*.{bai,csi}',
+                enabled: (
+                    params.save_umi_intermeds
+                )
+            ]
+        }
+
+        withName: '.*:DEDUPLICATE_UMIS:BAM_STATS_SAMTOOLS:.*' {
+            publishDir = [
+                path: { "${params.outdir}/umi_dedup/samtools_stats" },
+                mode: params.publish_dir_mode,
+                pattern: '*.{stats,flagstat,idxstats}'
+            ]
+        }
+    }
+}
+
 //
 // Quantification
 //

From 23f96d83ea5bfcf40810d58797dc8f3b060a0303 Mon Sep 17 00:00:00 2001
From: Christian Kubica <chriskub@CK-MBP14USIctx.local>
Date: Wed, 15 Jun 2022 09:43:43 +0200
Subject: [PATCH 007/117] INCLUDE UMITOOLS DEDUP WORKFLOW

Initial comit of the umi dedup subworkflow. The workflow combines
already existing modules of the pipeline and nf-core module to
deduplicate the reads by mapping them to the species genome and
re-converting them to fastq after deduplication.
---
 subworkflows/local/umi_dedup.nf | 69 +++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)
 create mode 100644 subworkflows/local/umi_dedup.nf

diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf
new file mode 100644
index 00000000..f97b8e8c
--- /dev/null
+++ b/subworkflows/local/umi_dedup.nf
@@ -0,0 +1,69 @@
+// 
+// Deduplicate the UMI reads by mapping them to the complete genome.
+//
+
+include { INDEX_GENOME                        } from '../../modules/local/bowtie_genome'
+include { BOWTIE_MAP_SEQ as BOWTIE_MAP_GENOME } from '../../modules/local/bowtie_map_mirna'
+include { BAM_SORT_SAMTOOLS                   } from '../../subworkflows/nf-core/bam_sort_samtools'
+include { UMITOOLS_DEDUP                      } from '../../modules/nf-core/modules/umitools/dedup/main'
+include { SAMTOOLS_BAM2FQ                     } from '../../modules/nf-core/modules/samtools/bam2fq/main'
+
+workflow DEDUPLICATE_UMIS {
+    take:
+    fasta
+    bt_index
+    reads      // channel: [ val(meta), [ reads ] ]
+
+    main:
+
+    ch_versions = Channel.empty()
+    ch_dedup_stats = Channel.empty()
+
+    if (!bt_index){
+        INDEX_GENOME ( fasta )
+        bt_indices      = INDEX_GENOME.out.bt_indices
+        fasta_formatted = INDEX_GENOME.out.fasta
+        ch_versions     = ch_versions.mix(INDEX_GENOME.out.versions)
+    } else {
+        bt_indices      = Channel.fromPath("${bt_index}**ebwt", checkIfExists: true).ifEmpty { exit 1, "Bowtie1 index directory not found: ${bt_index}" }
+        fasta_formatted = fasta
+    }
+
+    if (bt_indices){
+
+        reads.view()
+        
+        BOWTIE_MAP_GENOME ( reads, bt_indices.collect() )
+        ch_versions = ch_versions.mix(BOWTIE_MAP_GENOME.out.versions)
+
+        BAM_SORT_SAMTOOLS ( BOWTIE_MAP_GENOME.out.bam, Channel.empty() )
+        ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions)
+
+        BAM_SORT_SAMTOOLS.out.bam.view()
+        ch_umi_dedup = BAM_SORT_SAMTOOLS.out.bam.join(BAM_SORT_SAMTOOLS.out.bai)
+
+        ch_umi_dedup.view()
+
+        UMITOOLS_DEDUP ( ch_umi_dedup )
+        ch_versions = ch_versions.mix(UMITOOLS_DEDUP.out.versions)
+        ch_dedup_stats = ch_dedup_stats.mix(UMITOOLS_DEDUP.out.tsv_edit_distance).join(UMITOOLS_DEDUP.out.tsv_per_umi).join(UMITOOLS_DEDUP.out.tsv_umi_per_position)
+
+        SAMTOOLS_BAM2FQ ( UMITOOLS_DEDUP.out.bam, false )
+        ch_versions = ch_versions.mix(SAMTOOLS_BAM2FQ.out.versions)
+    }
+
+    emit:
+    reads    = SAMTOOLS_BAM2FQ.out.reads // channel: [ val(meta), [ reads ] ]
+    indices  = bt_indices
+    stats    = ch_dedup_stats
+    versions = ch_versions
+}
+
+
+def add_suffix(row, suffix) {
+    def meta = [:]
+    meta.id           = "${row[0].id}_${suffix}"
+    def array = []
+    array = [ meta, row[1] ]
+    return array
+}
\ No newline at end of file

From 944d27789fb85915efd6f5e60f43fcc77667a892 Mon Sep 17 00:00:00 2001
From: Christian Kubica <chriskub@CK-MBP14USIctx.local>
Date: Wed, 15 Jun 2022 09:47:40 +0200
Subject: [PATCH 008/117] INCLUDE UMITOOLS DEDUP

includes the optional umitools deduplication step after the read
QC.
---
 workflows/smrnaseq.nf | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index 1613425c..27bf842a 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -54,12 +54,13 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multi
 if (params.mature) { reference_mature = file(params.mature, checkIfExists: true) } else { exit 1, "Mature miRNA fasta file not found: ${params.mature}" }
 if (params.hairpin) { reference_hairpin = file(params.hairpin, checkIfExists: true) } else { exit 1, "Hairpin miRNA fasta file not found: ${params.hairpin}" }
 
-include { INPUT_CHECK       } from '../subworkflows/local/input_check'
+include { INPUT_CHECK                } from '../subworkflows/local/input_check'
 include { FASTQC_UMITOOLS_TRIMGALORE } from '../subworkflows/nf-core/fastqc_umitools_trimgalore'
-include { MIRNA_QUANT       } from '../subworkflows/local/mirna_quant'
-include { GENOME_QUANT      } from '../subworkflows/local/genome_quant'
-include { MIRTRACE          } from '../subworkflows/local/mirtrace'
-include { MIRDEEP2          } from '../subworkflows/local/mirdeep2'
+include { DEDUPLICATE_UMIS           } from '../subworkflows/local/umi_dedup'
+include { MIRNA_QUANT                } from '../subworkflows/local/mirna_quant'
+include { GENOME_QUANT               } from '../subworkflows/local/genome_quant'
+include { MIRTRACE                   } from '../subworkflows/local/mirtrace'
+include { MIRDEEP2                   } from '../subworkflows/local/mirdeep2'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -135,14 +136,26 @@ workflow SMRNASEQ {
     //
     FASTQC_UMITOOLS_TRIMGALORE (
         ch_cat_fastq,
-        params.with_umi,
-        params.umi_discard_read,
         params.skip_fastqc || params.skip_qc,
-        params.skip_trimming
+        params.with_umi,
+        params.skip_trimming,
+        params.umi_discard_read
     )
     ch_versions = ch_versions.mix(FASTQC_UMITOOLS_TRIMGALORE.out.versions)
 
     reads_for_mirna = FASTQC_UMITOOLS_TRIMGALORE.out.reads
+
+    reads_for_mirna.view()
+
+    if (params.with_umi){
+        if (fasta){
+            fasta_ch = file(fasta)
+            DEDUPLICATE_UMIS (fasta_ch, bt_index, FASTQC_UMITOOLS_TRIMGALORE.out.reads)
+            reads_for_mirna = DEDUPLICATE_UMIS.out.reads
+            ch_versions = ch_versions.mix(DEDUPLICATE_UMIS.out.versions)
+        }
+    }
+
     MIRNA_QUANT (
         reference_mature,
         reference_hairpin,

From ddb3dbaa92b6243b3dc498290062fd3b7f761925 Mon Sep 17 00:00:00 2001
From: CKComputomics <christian.kubica@computomics.com>
Date: Wed, 15 Jun 2022 13:55:52 +0200
Subject: [PATCH 009/117] ADD SAMTOOLS SORT CONFIG

Added additional configuration to change the output file name of
samtools sort.
---
 conf/modules.config | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/conf/modules.config b/conf/modules.config
index c1a74e21..92832223 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -172,8 +172,20 @@ if (params.with_umi) {
             ]
         }
 
+        withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_SAMTOOLS:SAMTOOLS_SORT' {
+            ext.prefix = { "${meta.id}.sorted" }
+            publishDir = [
+                path: { "${params.outdir}/umi_dedup" },
+                mode: params.publish_dir_mode,
+                pattern: '*.{bam}',
+                enabled: (
+                    params.save_umi_intermeds
+                )
+            ]
+        }
+
         withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_SAMTOOLS:SAMTOOLS_INDEX' {
-            ext.prefix = { "${meta.id}.umi_dedup.sorted" }
+            ext.prefix = { "${meta.id}.sorted" }
             publishDir = [
                 path: { "${params.outdir}/umi_dedup" },
                 mode: params.publish_dir_mode,

From b2ef66a76772f2299a705a10a42622dbc3d32407 Mon Sep 17 00:00:00 2001
From: CKComputomics <christian.kubica@computomics.com>
Date: Wed, 15 Jun 2022 15:28:36 +0200
Subject: [PATCH 010/117] FIX TYPO

---
 subworkflows/local/umi_dedup.nf | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf
index f97b8e8c..5886ed52 100644
--- a/subworkflows/local/umi_dedup.nf
+++ b/subworkflows/local/umi_dedup.nf
@@ -30,8 +30,6 @@ workflow DEDUPLICATE_UMIS {
     }
 
     if (bt_indices){
-
-        reads.view()
         
         BOWTIE_MAP_GENOME ( reads, bt_indices.collect() )
         ch_versions = ch_versions.mix(BOWTIE_MAP_GENOME.out.versions)
@@ -39,11 +37,7 @@ workflow DEDUPLICATE_UMIS {
         BAM_SORT_SAMTOOLS ( BOWTIE_MAP_GENOME.out.bam, Channel.empty() )
         ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions)
 
-        BAM_SORT_SAMTOOLS.out.bam.view()
         ch_umi_dedup = BAM_SORT_SAMTOOLS.out.bam.join(BAM_SORT_SAMTOOLS.out.bai)
-
-        ch_umi_dedup.view()
-
         UMITOOLS_DEDUP ( ch_umi_dedup )
         ch_versions = ch_versions.mix(UMITOOLS_DEDUP.out.versions)
         ch_dedup_stats = ch_dedup_stats.mix(UMITOOLS_DEDUP.out.tsv_edit_distance).join(UMITOOLS_DEDUP.out.tsv_per_umi).join(UMITOOLS_DEDUP.out.tsv_umi_per_position)

From 29ec7dae7ec06fe74ec1fd0d4745e1e81d06087c Mon Sep 17 00:00:00 2001
From: CKComputomics <christian.kubica@computomics.com>
Date: Wed, 15 Jun 2022 15:28:47 +0200
Subject: [PATCH 011/117] ADD DEDUP DOCUMENTATION

Added the documentation detailing the output files of the UMI-tools
deduplication step.
---
 docs/output.md | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/docs/output.md b/docs/output.md
index 79f1d6d9..e0c2bd67 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -15,6 +15,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
 - [FastQC](#fastqc) - read quality control
 - [UMI-tools extract](#umi-tools-extract) - UMI barcode extraction
 - [TrimGalore](#trimgalore) - adapter trimming
+- [UMI-tools deduplicate](#umi-tools-deduplicate) - read deduplication
 - [Bowtie](#bowtie) - alignment against mature miRNAs and miRNA precursors (hairpins)
 - [SAMtools](#samtools) - alignment result processing and feature counting
 - [edgeR](#edger) - normalization, MDS plot and sample pairwise distance heatmap
@@ -51,7 +52,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
 
 </details>
 
-[UMI-tools](https://github.com/CGATOxford/UMI-tools) deduplicates reads based on unique molecular identifiers (UMIs) to address PCR-bias. Firstly, the UMI-tools `extract` command removes the UMI barcode information from the read sequence and adds it to the read name. Secondly, reads are deduplicated based on UMI identifier after mapping as highlighted in the [UMI-tools dedup](#umi-tools-dedup) section.
+[UMI-tools](https://github.com/CGATOxford/UMI-tools) deduplicates reads based on unique molecular identifiers (UMIs) to address PCR-bias. Firstly, the UMI-tools `extract` command removes the UMI barcode information from the read sequence and adds it to the read name. Secondly, reads are deduplicated based on UMI identifier after mapping as highlighted in the [UMI-tools deduplicate](#umi-tools-deduplicate) section.
 
 To facilitate processing of input data which has the UMI barcode already embedded in the read name from the start, `--skip_umi_extract` can be specified in conjunction with `--with_umi`.
 
@@ -74,6 +75,20 @@ This is an example of the output we can get:
 
 ![cutadapt](images/cutadapt_plot.png)
 
+## UMI-tools deduplicate
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `umi_dedup/`
+  - `*.tsv`: Results statistics files detailing the UMI deduplication results.
+  - `*.bam`: If `--save_umi_intermeds` is specified, the deduplicated bam files **after** UMI deduplication will be placed in this directory. In addition the sorted and indexed files will be placed there as well.
+  - `samtools_stats/`
+    - `*.{stats,flagstat,idxstats}:` Statistics on the mappings underlying the UMI deduplication.
+</details>
+
+[UMI-tools](https://github.com/CGATOxford/UMI-tools) deduplicates reads based on unique molecular identifiers (UMIs) to address PCR-bias. Firstly, the UMI-tools `extract` command removes the UMI barcode information from the read sequence and adds it to the read name as highlighted in the [UMI-tools extract](#umi-tools-extract) section. The reads are deduplicated based on an alignment against the full genome of the species. The deduplicated reads are converted into fastq format and then used in the pipeline.
+
 ## Bowtie
 
 [Bowtie](http://bowtie-bio.sourceforge.net/index.shtml) is used for mapping adapter trimmed reads against the mature miRNAs and miRNA precursors (hairpins) in [miRBase](http://www.mirbase.org/).

From afa1ad76c7ece9d5a521e44a16abe522655c9808 Mon Sep 17 00:00:00 2001
From: CKComputomics <christian.kubica@computomics.com>
Date: Wed, 15 Jun 2022 15:38:42 +0200
Subject: [PATCH 012/117] ADD DEDUP STEP

---
 README.md | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index e4294f99..e3ceb707 100644
--- a/README.md
+++ b/README.md
@@ -35,24 +35,25 @@ On release, automated continuous integration tests run the pipeline on a full-si
 3. Adapter trimming ([`Trim Galore!`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/))
    1. Insert Size calculation
    2. Collapse reads ([`seqcluster`](https://seqcluster.readthedocs.io/mirna_annotation.html#processing-of-reads))
-4. Alignment against miRBase mature miRNA ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml))
-5. Alignment against miRBase hairpin
+4. UMI barcode deduplication ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools))
+5. Alignment against miRBase mature miRNA ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml))
+6. Alignment against miRBase hairpin
    1. Unaligned reads from step 3 ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml))
    2. Collapsed reads from step 2.2 ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml))
-6. Post-alignment processing of miRBase hairpin
+7. Post-alignment processing of miRBase hairpin
    1. Basic statistics from step 3 and step 4.1 ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/))
    2. Analysis on miRBase hairpin counts ([`edgeR`](https://bioconductor.org/packages/release/bioc/html/edgeR.html))
       - TMM normalization and a table of top expression hairpin
       - MDS plot clustering samples
       - Heatmap of sample similarities
    3. miRNA and isomiR annotation from step 4.1 ([`mirtop`](https://github.com/miRTop/mirtop))
-7. Alignment against host reference genome ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml))
+8. Alignment against host reference genome ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml))
    1. Post-alignment processing of alignment against host reference genome ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/))
-8. Novel miRNAs and known miRNAs discovery ([`MiRDeep2`](https://www.mdc-berlin.de/content/mirdeep2-documentation))
+9. Novel miRNAs and known miRNAs discovery ([`MiRDeep2`](https://www.mdc-berlin.de/content/mirdeep2-documentation))
    1. Mapping against reference genome with the mapper module
    2. Known and novel miRNA discovery with the mirdeep2 module
-9. miRNA quality control ([`mirtrace`](https://github.com/friedlanderlab/mirtrace))
-10. Present QC for raw read, alignment, and expression results ([`MultiQC`](http://multiqc.info/))
+10. miRNA quality control ([`mirtrace`](https://github.com/friedlanderlab/mirtrace))
+11. Present QC for raw read, alignment, and expression results ([`MultiQC`](http://multiqc.info/))
 
 ## Quick Start
 

From c72ac5be2993be2a65e90c0d4bc99385fd2d661b Mon Sep 17 00:00:00 2001
From: CKComputomics <christian.kubica@computomics.com>
Date: Wed, 15 Jun 2022 15:43:39 +0200
Subject: [PATCH 013/117] ADD UMITOOLS VERSION

---
 CHANGELOG.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index db1eec43..32df9aaf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,7 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Other enhancements & fixes
 
 - [#134](https://github.com/nf-core/smrnaseq/issues/134) - Fixed colSum of zero issues for edgeR_miRBase.R script
-- [#49](https://github.com/nf-core/smrnaseq/issues/49) - Integrated the existing umitools modules into the pipeline.
+- [#49](https://github.com/nf-core/smrnaseq/issues/49) - Integrated the existing umitools modules into the pipeline and extend the deduplication step.
 
 ### Parameters
 
@@ -61,6 +61,7 @@ Note, since the pipeline is now using Nextflow DSL2, each process will be run wi
 | `seqkit`             | 0.16.0      | 2.0.0       |
 | `trim-galore`        | 0.6.6       | 0.6.7       |
 | `bioconvert`         | -           | 0.4.3       |
+| `umi_tools`          | -           | 1.1.2       |
 | `htseq`              | -           | -           |
 | `markdown`           | -           | -           |
 | `pymdown-extensions` | -           | -           |

From f44228931a3342e700b6e900e7ddfdcda91fc5d4 Mon Sep 17 00:00:00 2001
From: CKComputomics <christian.kubica@computomics.com>
Date: Mon, 20 Jun 2022 15:26:09 +0200
Subject: [PATCH 014/117] MERGE DEDUPLICATED AND UNMAPPED READS AFTER
 DEDUPLICATION

After deduplication the reads that remained unaligned to the
provided reference genome are merged with the set of deduplicated
reads to enable the use of the full spectrum of reads, independent
of potential reference bias. This behaviour can be deactivated by
setting --umi_merge_unmapped false
---
 conf/modules.config             |  2 +-
 modules/local/join_reads.nf     | 21 +++++++++++++++++++++
 subworkflows/local/umi_dedup.nf | 25 +++++++++++++++++++------
 workflows/smrnaseq.nf           |  4 +---
 4 files changed, 42 insertions(+), 10 deletions(-)
 create mode 100644 modules/local/join_reads.nf

diff --git a/conf/modules.config b/conf/modules.config
index 92832223..ed78630e 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -196,7 +196,7 @@ if (params.with_umi) {
             ]
         }
 
-        withName: '.*:DEDUPLICATE_UMIS:BAM_STATS_SAMTOOLS:.*' {
+        withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' {
             publishDir = [
                 path: { "${params.outdir}/umi_dedup/samtools_stats" },
                 mode: params.publish_dir_mode,
diff --git a/modules/local/join_reads.nf b/modules/local/join_reads.nf
new file mode 100644
index 00000000..9f68f062
--- /dev/null
+++ b/modules/local/join_reads.nf
@@ -0,0 +1,21 @@
+process JOIN_FASTQS {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda (params.enable_conda ? 'bioconda::samtools=1.13' : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/mulled-v2-ffbf83a6b0ab6ec567a336cf349b80637135bca3:40128b496751b037e2bd85f6789e83d4ff8a4837-0' :
+        'quay.io/biocontainers/mulled-v2-ffbf83a6b0ab6ec567a336cf349b80637135bca3:40128b496751b037e2bd85f6789e83d4ff8a4837-0' }"
+
+    input:
+    tuple val(meta), path(reads)
+    tuple val(unmapped_meta), path(unmapped_reads)
+
+    output:
+    tuple val(meta), path('*_merged.fq.gz'), emit: merged
+    script:
+    """
+    cat ${reads} ${unmapped_reads} > ${meta.id}_merged.fq.gz
+    """
+
+}
diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf
index 5886ed52..0e054d9b 100644
--- a/subworkflows/local/umi_dedup.nf
+++ b/subworkflows/local/umi_dedup.nf
@@ -3,10 +3,11 @@
 //
 
 include { INDEX_GENOME                        } from '../../modules/local/bowtie_genome'
-include { BOWTIE_MAP_SEQ as BOWTIE_MAP_GENOME } from '../../modules/local/bowtie_map_mirna'
+include { BOWTIE_MAP_SEQ as UMI_MAP_GENOME    } from '../../modules/local/bowtie_map_mirna'
 include { BAM_SORT_SAMTOOLS                   } from '../../subworkflows/nf-core/bam_sort_samtools'
 include { UMITOOLS_DEDUP                      } from '../../modules/nf-core/modules/umitools/dedup/main'
 include { SAMTOOLS_BAM2FQ                     } from '../../modules/nf-core/modules/samtools/bam2fq/main'
+include { JOIN_FASTQS                         } from '../../modules/local/join_reads'
 
 workflow DEDUPLICATE_UMIS {
     take:
@@ -31,10 +32,10 @@ workflow DEDUPLICATE_UMIS {
 
     if (bt_indices){
         
-        BOWTIE_MAP_GENOME ( reads, bt_indices.collect() )
-        ch_versions = ch_versions.mix(BOWTIE_MAP_GENOME.out.versions)
+        UMI_MAP_GENOME ( reads, bt_indices.collect() )
+        ch_versions = ch_versions.mix(UMI_MAP_GENOME.out.versions)
 
-        BAM_SORT_SAMTOOLS ( BOWTIE_MAP_GENOME.out.bam, Channel.empty() )
+        BAM_SORT_SAMTOOLS ( UMI_MAP_GENOME.out.bam, Channel.empty() )
         ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions)
 
         ch_umi_dedup = BAM_SORT_SAMTOOLS.out.bam.join(BAM_SORT_SAMTOOLS.out.bai)
@@ -44,12 +45,24 @@ workflow DEDUPLICATE_UMIS {
 
         SAMTOOLS_BAM2FQ ( UMITOOLS_DEDUP.out.bam, false )
         ch_versions = ch_versions.mix(SAMTOOLS_BAM2FQ.out.versions)
+
+        ch_dedup_reads = SAMTOOLS_BAM2FQ.out.reads
+
+        if ( params.umi_merge_unmapped ) {
+    
+            JOIN_FASTQS ( 
+                SAMTOOLS_BAM2FQ.out.reads,
+                UMI_MAP_GENOME.out.unmapped 
+            )
+            ch_dedup_reads = JOIN_FASTQS.out.merged
+        }
     }
 
     emit:
-    reads    = SAMTOOLS_BAM2FQ.out.reads // channel: [ val(meta), [ reads ] ]
+//    reads    = SAMTOOLS_BAM2FQ.out.reads // channel: [ val(meta), [ reads ] ]
+    reads    = ch_dedup_reads
     indices  = bt_indices
-    stats    = ch_dedup_stats
+//    stats    = ch_dedup_stats
     versions = ch_versions
 }
 
diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index 27bf842a..54f0e505 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -144,9 +144,7 @@ workflow SMRNASEQ {
     ch_versions = ch_versions.mix(FASTQC_UMITOOLS_TRIMGALORE.out.versions)
 
     reads_for_mirna = FASTQC_UMITOOLS_TRIMGALORE.out.reads
-
-    reads_for_mirna.view()
-
+    
     if (params.with_umi){
         if (fasta){
             fasta_ch = file(fasta)

From f9ca542f8d4710a3b779eda7e366b4ac70ffb2e7 Mon Sep 17 00:00:00 2001
From: CKComputomics <christian.kubica@computomics.com>
Date: Mon, 20 Jun 2022 15:29:00 +0200
Subject: [PATCH 015/117] ADD MISSING OPTION

---
 nextflow.config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/nextflow.config b/nextflow.config
index 4d724120..78df9386 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -34,6 +34,7 @@ params {
     umitools_bc_pattern        = null
     umi_discard_read           = null
     save_umi_intermeds         = false
+    umi_merge_unmapped         = true
 
     // Trimming options
     clip_r1                    = 0

From b9747170589a60215f57d04bf39426bd5fca7868 Mon Sep 17 00:00:00 2001
From: CKComputomics <christian.kubica@computomics.com>
Date: Mon, 20 Jun 2022 15:29:36 +0200
Subject: [PATCH 016/117] ADD NEWLINE

---
 modules/local/mirdeep2_run.nf | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/local/mirdeep2_run.nf b/modules/local/mirdeep2_run.nf
index 8ef6e804..5444ecfc 100644
--- a/modules/local/mirdeep2_run.nf
+++ b/modules/local/mirdeep2_run.nf
@@ -37,4 +37,3 @@ process MIRDEEP2_RUN {
     END_VERSIONS
     """
 }
-

From 4610be12d815689d0918f8c335b45dd6cfbd0107 Mon Sep 17 00:00:00 2001
From: CKComputomics <christian.kubica@computomics.com>
Date: Tue, 21 Jun 2022 10:20:00 +0200
Subject: [PATCH 017/117] CLEAN CODE

---
 subworkflows/local/umi_dedup.nf | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf
index 0e054d9b..94341a96 100644
--- a/subworkflows/local/umi_dedup.nf
+++ b/subworkflows/local/umi_dedup.nf
@@ -59,18 +59,8 @@ workflow DEDUPLICATE_UMIS {
     }
 
     emit:
-//    reads    = SAMTOOLS_BAM2FQ.out.reads // channel: [ val(meta), [ reads ] ]
     reads    = ch_dedup_reads
     indices  = bt_indices
-//    stats    = ch_dedup_stats
+    stats    = ch_dedup_stats
     versions = ch_versions
 }
-
-
-def add_suffix(row, suffix) {
-    def meta = [:]
-    meta.id           = "${row[0].id}_${suffix}"
-    def array = []
-    array = [ meta, row[1] ]
-    return array
-}
\ No newline at end of file

From 67b2caceadeae51cdfc86e5a966dbeee66a4e69f Mon Sep 17 00:00:00 2001
From: CKComputomics <christian.kubica@computomics.com>
Date: Tue, 21 Jun 2022 10:21:45 +0200
Subject: [PATCH 018/117] ADD DOCUMENTATION

---
 workflows/smrnaseq.nf | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index 54f0e505..51e412e8 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -144,7 +144,10 @@ workflow SMRNASEQ {
     ch_versions = ch_versions.mix(FASTQC_UMITOOLS_TRIMGALORE.out.versions)
 
     reads_for_mirna = FASTQC_UMITOOLS_TRIMGALORE.out.reads
-    
+
+    //
+    // SUBWORKFLOW: Deduplicate UMIs by mapping them to the genome
+    //
     if (params.with_umi){
         if (fasta){
             fasta_ch = file(fasta)

From 23fc985ab537abe0aa55771a693e831e6eae9f92 Mon Sep 17 00:00:00 2001
From: CKComputomics <christian.kubica@computomics.com>
Date: Tue, 21 Jun 2022 10:25:34 +0200
Subject: [PATCH 019/117] ADD UMI_MERGE_UNMAPPED COMMAND

---
 nextflow_schema.json | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 5019408f..4547be90 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -88,6 +88,11 @@
                     "type": "boolean",
                     "fa_icon": "fas fa-save",
                     "description": "If this option is specified, intermediate FastQ and BAM files produced by UMI-tools are also saved in the results directory."
+                },
+                "umi_merge_unmapped": {
+                    "type": "boolean",
+                    "fa_icon": "fas fa-save",
+                    "description": "Unless deactivated the deduplicated reads are merged with the reads that could not be placed to reduce the potential reference bias"
                 }
             },
             "fa_icon": "fas fa-barcode"

From be241eafd97c5943be3614a0b843e28fec6b5be3 Mon Sep 17 00:00:00 2001
From: CKComputomics <christian.kubica@computomics.com>
Date: Tue, 21 Jun 2022 10:31:32 +0200
Subject: [PATCH 020/117] FINALIZE DOCUMENTATION

Information on the new --umi_merge_unmapped command were added to
both the CHANGELOG, as well as the output markdown script.
---
 CHANGELOG.md   | 1 +
 docs/output.md | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 32df9aaf..8fb364d4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -35,6 +35,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 |                      | `--umitools_bc_pattern`     |
 |                      | `--umi_discard_read`        |
 |                      | `--save_umi_intermeds`      |
+|                      | `--umi_merge_unmapped`      |
 
 > **NB:** Parameter has been **updated** if both old and new parameter information is present.
 > **NB:** Parameter has been **added** if just the new parameter information is present.
diff --git a/docs/output.md b/docs/output.md
index e0c2bd67..14cc4e6d 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -87,7 +87,7 @@ This is an example of the output we can get:
     - `*.{stats,flagstat,idxstats}:` Statistics on the mappings underlying the UMI deduplication.
 </details>
 
-[UMI-tools](https://github.com/CGATOxford/UMI-tools) deduplicates reads based on unique molecular identifiers (UMIs) to address PCR-bias. Firstly, the UMI-tools `extract` command removes the UMI barcode information from the read sequence and adds it to the read name as highlighted in the [UMI-tools extract](#umi-tools-extract) section. The reads are deduplicated based on an alignment against the full genome of the species. The deduplicated reads are converted into fastq format and then used in the pipeline.
+[UMI-tools](https://github.com/CGATOxford/UMI-tools) deduplicates reads based on unique molecular identifiers (UMIs) to address PCR-bias. Firstly, the UMI-tools `extract` command removes the UMI barcode information from the read sequence and adds it to the read name as highlighted in the [UMI-tools extract](#umi-tools-extract) section. The reads are deduplicated based on an alignment against the full genome of the species. The deduplicated reads are then converted into fastq format and merged with the reads that remained unmapped in order to reduce potential reference bias. This behavior can be stopped by setting `--umi_merge_unmapped false`. The resulting fastq files are used in the remaining steps of the pipeline.
 
 ## Bowtie
 

From 8b433f1876749f277cf946f84b52407434fdcbdc Mon Sep 17 00:00:00 2001
From: CKComputomics <christian.kubica@computomics.com>
Date: Tue, 21 Jun 2022 10:37:09 +0200
Subject: [PATCH 021/117] UPDATE MAIL TEMPLATE

---
 assets/email_template.html | 142 +++++++++++--------------------------
 1 file changed, 42 insertions(+), 100 deletions(-)

diff --git a/assets/email_template.html b/assets/email_template.html
index 05d590d3..e75e86ac 100644
--- a/assets/email_template.html
+++ b/assets/email_template.html
@@ -1,111 +1,53 @@
 <html>
-  <head>
-    <meta charset="utf-8" />
-    <meta http-equiv="X-UA-Compatible" content="IE=edge" />
-    <meta name="viewport" content="width=device-width, initial-scale=1" />
+<head>
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
 
-    <!-- prettier-ignore -->
-    <meta name="description" content="nf-core/smrnaseq: Small RNA-Seq Best Practice Analysis Pipeline." />
-    <title>nf-core/smrnaseq Pipeline Report</title>
-  </head>
-  <body>
-    <div style="font-family: Helvetica, Arial, sans-serif; padding: 30px; max-width: 800px; margin: 0 auto">
-      <img src="cid:nfcorepipelinelogo" />
+  <meta name="description" content="nf-core/smrnaseq: Small RNA-Seq Best Practice Analysis Pipeline.">
+  <title>nf-core/smrnaseq Pipeline Report</title>
+</head>
+<body>
+<div style="font-family: Helvetica, Arial, sans-serif; padding: 30px; max-width: 800px; margin: 0 auto;">
 
-      <h1>nf-core/smrnaseq v${version}</h1>
-      <h2>Run Name: $runName</h2>
+<img src="cid:nfcorepipelinelogo">
 
-      <% if (!success){ out << """
-      <div
-        style="
-          color: #a94442;
-          background-color: #f2dede;
-          border-color: #ebccd1;
-          padding: 15px;
-          margin-bottom: 20px;
-          border: 1px solid transparent;
-          border-radius: 4px;
-        "
-      >
-        <h4 style="margin-top: 0; color: inherit">nf-core/smrnaseq execution completed unsuccessfully!</h4>
+<h1>nf-core/smrnaseq v${version}</h1>
+<h2>Run Name: $runName</h2>
+
+<% if (!success){
+    out << """
+    <div style="color: #a94442; background-color: #f2dede; border-color: #ebccd1; padding: 15px; margin-bottom: 20px; border: 1px solid transparent; border-radius: 4px;">
+        <h4 style="margin-top:0; color: inherit;">nf-core/smrnaseq execution completed unsuccessfully!</h4>
         <p>The exit status of the task that caused the workflow execution to fail was: <code>$exitStatus</code>.</p>
         <p>The full error message was:</p>
-        <pre style="white-space: pre-wrap; overflow: visible; margin-bottom: 0">${errorReport}</pre>
-      </div>
-      """ } else { out << """
-      <div
-        style="
-          color: #3c763d;
-          background-color: #dff0d8;
-          border-color: #d6e9c6;
-          padding: 15px;
-          margin-bottom: 20px;
-          border: 1px solid transparent;
-          border-radius: 4px;
-        "
-      >
+        <pre style="white-space: pre-wrap; overflow: visible; margin-bottom: 0;">${errorReport}</pre>
+    </div>
+    """
+} else {
+    out << """
+    <div style="color: #3c763d; background-color: #dff0d8; border-color: #d6e9c6; padding: 15px; margin-bottom: 20px; border: 1px solid transparent; border-radius: 4px;">
         nf-core/smrnaseq execution completed successfully!
-      </div>
-      """ } %>
+    </div>
+    """
+}
+%>
 
-      <p>The workflow was completed at <strong>$dateComplete</strong> (duration: <strong>$duration</strong>)</p>
-      <p>The command used to launch the workflow was as follows:</p>
-      <pre
-        style="
-          white-space: pre-wrap;
-          overflow: visible;
-          background-color: #ededed;
-          padding: 15px;
-          border-radius: 4px;
-          margin-bottom: 30px;
-        "
-      >
-$commandLine</pre
-      >
+<p>The workflow was completed at <strong>$dateComplete</strong> (duration: <strong>$duration</strong>)</p>
+<p>The command used to launch the workflow was as follows:</p>
+<pre style="white-space: pre-wrap; overflow: visible; background-color: #ededed; padding: 15px; border-radius: 4px; margin-bottom:30px;">$commandLine</pre>
 
-      <h3>Pipeline Configuration:</h3>
-      <table
-        style="
-          width: 100%;
-          max-width: 100%;
-          border-spacing: 0;
-          border-collapse: collapse;
-          border: 0;
-          margin-bottom: 30px;
-        "
-      >
-        <tbody style="border-bottom: 1px solid #ddd">
-          <% out << summary.collect{ k,v -> "
-          <tr>
-            <th
-              style="
-                text-align: left;
-                padding: 8px 0;
-                line-height: 1.42857143;
-                vertical-align: top;
-                border-top: 1px solid #ddd;
-              "
-            >
-              $k
-            </th>
-            <td
-              style="
-                text-align: left;
-                padding: 8px;
-                line-height: 1.42857143;
-                vertical-align: top;
-                border-top: 1px solid #ddd;
-              "
-            >
-              <pre style="white-space: pre-wrap; overflow: visible">$v</pre>
-            </td>
-          </tr>
-          " }.join("\n") %>
-        </tbody>
-      </table>
+<h3>Pipeline Configuration:</h3>
+<table style="width:100%; max-width:100%; border-spacing: 0; border-collapse: collapse; border:0; margin-bottom: 30px;">
+    <tbody style="border-bottom: 1px solid #ddd;">
+        <% out << summary.collect{ k,v -> "<tr><th style='text-align:left; padding: 8px 0; line-height: 1.42857143; vertical-align: top; border-top: 1px solid #ddd;'>$k</th><td style='text-align:left; padding: 8px; line-height: 1.42857143; vertical-align: top; border-top: 1px solid #ddd;'><pre style='white-space: pre-wrap; overflow: visible;'>$v</pre></td></tr>" }.join("\n") %>
+    </tbody>
+</table>
 
-      <p>nf-core/smrnaseq</p>
-      <p><a href="https://github.com/nf-core/smrnaseq">https://github.com/nf-core/smrnaseq</a></p>
-    </div>
-  </body>
+<p>nf-core/smrnaseq</p>
+<p><a href="https://github.com/nf-core/smrnaseq">https://github.com/nf-core/smrnaseq</a></p>
+
+</div>
+
+</body>
 </html>

From 0e732ede28b931b4e233d07cc092526753de95c1 Mon Sep 17 00:00:00 2001
From: CKComputomics <christian.kubica@computomics.com>
Date: Tue, 21 Jun 2022 10:43:38 +0200
Subject: [PATCH 022/117] CHANGE DAG OUTPUT TO HTML

---
 nextflow.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow.config b/nextflow.config
index 78df9386..7e528b38 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -192,7 +192,7 @@ trace {
 }
 dag {
     enabled = true
-    file    = "${params.tracedir}/pipeline_dag_${trace_timestamp}.svg"
+    file    = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html"
 }
 
 manifest {

From 8f426b56ac14b90bef00aa63ff57ecdb6c843e1f Mon Sep 17 00:00:00 2001
From: CKComputomics <christian.kubica@computomics.com>
Date: Tue, 21 Jun 2022 10:56:12 +0200
Subject: [PATCH 023/117] PLEASE PRETTIER

---
 assets/email_template.html | 141 ++++++++++++++++++++++++++-----------
 docs/output.md             |   5 +-
 2 files changed, 101 insertions(+), 45 deletions(-)

diff --git a/assets/email_template.html b/assets/email_template.html
index e75e86ac..d856192a 100644
--- a/assets/email_template.html
+++ b/assets/email_template.html
@@ -1,53 +1,110 @@
 <html>
-<head>
-  <meta charset="utf-8">
-  <meta http-equiv="X-UA-Compatible" content="IE=edge">
-  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <head>
+    <meta charset="utf-8" />
+    <meta http-equiv="X-UA-Compatible" content="IE=edge" />
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
 
-  <meta name="description" content="nf-core/smrnaseq: Small RNA-Seq Best Practice Analysis Pipeline.">
-  <title>nf-core/smrnaseq Pipeline Report</title>
-</head>
-<body>
-<div style="font-family: Helvetica, Arial, sans-serif; padding: 30px; max-width: 800px; margin: 0 auto;">
+    <meta name="description" content="nf-core/smrnaseq: Small RNA-Seq Best Practice Analysis Pipeline." />
+    <title>nf-core/smrnaseq Pipeline Report</title>
+  </head>
+  <body>
+    <div style="font-family: Helvetica, Arial, sans-serif; padding: 30px; max-width: 800px; margin: 0 auto">
+      <img src="cid:nfcorepipelinelogo" />
 
-<img src="cid:nfcorepipelinelogo">
+      <h1>nf-core/smrnaseq v${version}</h1>
+      <h2>Run Name: $runName</h2>
 
-<h1>nf-core/smrnaseq v${version}</h1>
-<h2>Run Name: $runName</h2>
-
-<% if (!success){
-    out << """
-    <div style="color: #a94442; background-color: #f2dede; border-color: #ebccd1; padding: 15px; margin-bottom: 20px; border: 1px solid transparent; border-radius: 4px;">
-        <h4 style="margin-top:0; color: inherit;">nf-core/smrnaseq execution completed unsuccessfully!</h4>
+      <% if (!success){ out << """
+      <div
+        style="
+          color: #a94442;
+          background-color: #f2dede;
+          border-color: #ebccd1;
+          padding: 15px;
+          margin-bottom: 20px;
+          border: 1px solid transparent;
+          border-radius: 4px;
+        "
+      >
+        <h4 style="margin-top: 0; color: inherit">nf-core/smrnaseq execution completed unsuccessfully!</h4>
         <p>The exit status of the task that caused the workflow execution to fail was: <code>$exitStatus</code>.</p>
         <p>The full error message was:</p>
-        <pre style="white-space: pre-wrap; overflow: visible; margin-bottom: 0;">${errorReport}</pre>
-    </div>
-    """
-} else {
-    out << """
-    <div style="color: #3c763d; background-color: #dff0d8; border-color: #d6e9c6; padding: 15px; margin-bottom: 20px; border: 1px solid transparent; border-radius: 4px;">
+        <pre style="white-space: pre-wrap; overflow: visible; margin-bottom: 0">${errorReport}</pre>
+      </div>
+      """ } else { out << """
+      <div
+        style="
+          color: #3c763d;
+          background-color: #dff0d8;
+          border-color: #d6e9c6;
+          padding: 15px;
+          margin-bottom: 20px;
+          border: 1px solid transparent;
+          border-radius: 4px;
+        "
+      >
         nf-core/smrnaseq execution completed successfully!
-    </div>
-    """
-}
-%>
+      </div>
+      """ } %>
 
-<p>The workflow was completed at <strong>$dateComplete</strong> (duration: <strong>$duration</strong>)</p>
-<p>The command used to launch the workflow was as follows:</p>
-<pre style="white-space: pre-wrap; overflow: visible; background-color: #ededed; padding: 15px; border-radius: 4px; margin-bottom:30px;">$commandLine</pre>
+      <p>The workflow was completed at <strong>$dateComplete</strong> (duration: <strong>$duration</strong>)</p>
+      <p>The command used to launch the workflow was as follows:</p>
+      <pre
+        style="
+          white-space: pre-wrap;
+          overflow: visible;
+          background-color: #ededed;
+          padding: 15px;
+          border-radius: 4px;
+          margin-bottom: 30px;
+        "
+      >
+$commandLine</pre
+      >
 
-<h3>Pipeline Configuration:</h3>
-<table style="width:100%; max-width:100%; border-spacing: 0; border-collapse: collapse; border:0; margin-bottom: 30px;">
-    <tbody style="border-bottom: 1px solid #ddd;">
-        <% out << summary.collect{ k,v -> "<tr><th style='text-align:left; padding: 8px 0; line-height: 1.42857143; vertical-align: top; border-top: 1px solid #ddd;'>$k</th><td style='text-align:left; padding: 8px; line-height: 1.42857143; vertical-align: top; border-top: 1px solid #ddd;'><pre style='white-space: pre-wrap; overflow: visible;'>$v</pre></td></tr>" }.join("\n") %>
-    </tbody>
-</table>
+      <h3>Pipeline Configuration:</h3>
+      <table
+        style="
+          width: 100%;
+          max-width: 100%;
+          border-spacing: 0;
+          border-collapse: collapse;
+          border: 0;
+          margin-bottom: 30px;
+        "
+      >
+        <tbody style="border-bottom: 1px solid #ddd">
+          <% out << summary.collect{ k,v -> "
+          <tr>
+            <th
+              style="
+                text-align: left;
+                padding: 8px 0;
+                line-height: 1.42857143;
+                vertical-align: top;
+                border-top: 1px solid #ddd;
+              "
+            >
+              $k
+            </th>
+            <td
+              style="
+                text-align: left;
+                padding: 8px;
+                line-height: 1.42857143;
+                vertical-align: top;
+                border-top: 1px solid #ddd;
+              "
+            >
+              <pre style="white-space: pre-wrap; overflow: visible">$v</pre>
+            </td>
+          </tr>
+          " }.join("\n") %>
+        </tbody>
+      </table>
 
-<p>nf-core/smrnaseq</p>
-<p><a href="https://github.com/nf-core/smrnaseq">https://github.com/nf-core/smrnaseq</a></p>
-
-</div>
-
-</body>
+      <p>nf-core/smrnaseq</p>
+      <p><a href="https://github.com/nf-core/smrnaseq">https://github.com/nf-core/smrnaseq</a></p>
+    </div>
+  </body>
 </html>
diff --git a/docs/output.md b/docs/output.md
index 14cc4e6d..c2b28d06 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -83,9 +83,8 @@ This is an example of the output we can get:
 - `umi_dedup/`
   - `*.tsv`: Results statistics files detailing the UMI deduplication results.
   - `*.bam`: If `--save_umi_intermeds` is specified, the deduplicated bam files **after** UMI deduplication will be placed in this directory. In addition the sorted and indexed files will be placed there as well.
-  - `samtools_stats/`
-    - `*.{stats,flagstat,idxstats}:` Statistics on the mappings underlying the UMI deduplication.
-</details>
+  - `samtools_stats/` - `*.{stats,flagstat,idxstats}:` Statistics on the mappings underlying the UMI deduplication.
+  </details>
 
 [UMI-tools](https://github.com/CGATOxford/UMI-tools) deduplicates reads based on unique molecular identifiers (UMIs) to address PCR-bias. Firstly, the UMI-tools `extract` command removes the UMI barcode information from the read sequence and adds it to the read name as highlighted in the [UMI-tools extract](#umi-tools-extract) section. The reads are deduplicated based on an alignment against the full genome of the species. The deduplicated reads are then converted into fastq format and merged with the reads that remained unmapped in order to reduce potential reference bias. This behavior can be stopped by setting `--umi_merge_unmapped false`. The resulting fastq files are used in the remaining steps of the pipeline.
 

From 039843f42d61801163491d4518ec295203ba9a1f Mon Sep 17 00:00:00 2001
From: CKComputomics <christian.kubica@computomics.com>
Date: Tue, 21 Jun 2022 13:06:41 +0200
Subject: [PATCH 024/117] FIX MERGE ERROR

---
 workflows/smrnaseq.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index f66c3a7c..043796d9 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -166,7 +166,7 @@ workflow SMRNASEQ {
         }
     }
 
-    reads_for_mirna = FASTQC_TRIMGALORE.out.reads
+    reads_for_mirna = FASTQC_UMITOOLS_TRIMGALORE.out.reads
     //
     // SUBWORKFLOW: remove contaminants from reads
     //

From 53c097c366ca885ab6b7145a33de48a5f1c7fff6 Mon Sep 17 00:00:00 2001
From: CKComputomics <christian.kubica@computomics.com>
Date: Tue, 21 Jun 2022 13:12:22 +0200
Subject: [PATCH 025/117] MAKE PRETTIER HAPPY

---
 CHANGELOG.md | 29 ++++++++++++++---------------
 README.md    |  8 ++++----
 2 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 03ae1048..cc0ba654 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,20 +16,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Parameters
 
-| Old parameter | New parameter            |
-| ------------- | ------------------------ |
-|               | `--mirGeneDB`            |
-|               | `--mirGeneDB_species`    |
-|               | `--mirGeneDB_gff`        |
-|               | `--mirGeneDB_mature`     |
-|               | `--mirGeneDB_hairpin`    |
-|               | `--contamination_filter` |
-|               | `--rrna`                 |
-|               | `--trna`                 |
-|               | `--cdna`                 |
-|               | `--ncrna`                |
-|               | `--pirna`                |
-|               | `--other_contamination`  |
+| Old parameter | New parameter               |
+| ------------- | --------------------------- |
+|               | `--mirGeneDB`               |
+|               | `--mirGeneDB_species`       |
+|               | `--mirGeneDB_gff`           |
+|               | `--mirGeneDB_mature`        |
+|               | `--mirGeneDB_hairpin`       |
+|               | `--contamination_filter`    |
+|               | `--rrna`                    |
+|               | `--trna`                    |
+|               | `--cdna`                    |
+|               | `--ncrna`                   |
+|               | `--pirna`                   |
+|               | `--other_contamination`     |
 |               | `--with_umi`                |
 |               | `--umitools_extract_method` |
 |               | `--umitools_bc_pattern`     |
@@ -37,7 +37,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 |               | `--save_umi_intermeds`      |
 |               | `--umi_merge_unmapped`      |
 
-
 ## [v2.0.0](https://github.com/nf-core/smrnaseq/releases/tag/2.0.0) - 2022-05-31 Aqua Zinc Chihuahua
 
 ### Major enhancements
diff --git a/README.md b/README.md
index 5177598a..f6198652 100644
--- a/README.md
+++ b/README.md
@@ -48,10 +48,10 @@ On release, automated continuous integration tests run the pipeline on a full-si
 9. Alignment against host reference genome ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml))
    1. Post-alignment processing of alignment against host reference genome ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/))
 10. Novel miRNAs and known miRNAs discovery ([`MiRDeep2`](https://www.mdc-berlin.de/content/mirdeep2-documentation))
-   1. Mapping against reference genome with the mapper module
-   2. Known and novel miRNA discovery with the mirdeep2 module
-11. miRNA quality control ([`mirtrace`](https://github.com/friedlanderlab/mirtrace))
-12. Present QC for raw read, alignment, and expression results ([`MultiQC`](http://multiqc.info/))
+11. Mapping against reference genome with the mapper module
+12. Known and novel miRNA discovery with the mirdeep2 module
+13. miRNA quality control ([`mirtrace`](https://github.com/friedlanderlab/mirtrace))
+14. Present QC for raw read, alignment, and expression results ([`MultiQC`](http://multiqc.info/))
 
 ## Quick Start
 

From 608c4140a989b567dd472fb852c08984eac4e2d0 Mon Sep 17 00:00:00 2001
From: Christian Kubica <christian.kubica@computomics.com>
Date: Wed, 22 Jun 2022 15:26:29 +0200
Subject: [PATCH 026/117] ADD NF-CORE CAT

Includes the nf-core cat module to replace the custom concatenation
module.
---
 modules.json                             |  3 ++
 modules/nf-core/modules/cat/cat/main.nf  | 62 ++++++++++++++++++++++++
 modules/nf-core/modules/cat/cat/meta.yml | 37 ++++++++++++++
 3 files changed, 102 insertions(+)
 create mode 100644 modules/nf-core/modules/cat/cat/main.nf
 create mode 100644 modules/nf-core/modules/cat/cat/meta.yml

diff --git a/modules.json b/modules.json
index c328879e..c74fa935 100644
--- a/modules.json
+++ b/modules.json
@@ -3,6 +3,9 @@
     "homePage": "https://github.com/nf-core/smrnaseq",
     "repos": {
         "nf-core/modules": {
+            "cat/cat": {
+                "git_sha": "eeda4136c096688d04cc40bb3c70d948213ed641"
+            },
             "cat/fastq": {
                 "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
             },
diff --git a/modules/nf-core/modules/cat/cat/main.nf b/modules/nf-core/modules/cat/cat/main.nf
new file mode 100644
index 00000000..40e53f3e
--- /dev/null
+++ b/modules/nf-core/modules/cat/cat/main.nf
@@ -0,0 +1,62 @@
+process CAT_CAT {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "conda-forge::pigz=2.3.4" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/pigz:2.3.4' :
+        'quay.io/biocontainers/pigz:2.3.4' }"
+
+    input:
+    tuple val(meta), path(files_in)
+
+    output:
+    tuple val(meta), path("${prefix}"), emit: file_out
+    path "versions.yml"               , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def args2 = task.ext.args2 ?: ''
+    def file_list = files_in.collect { it.toString() }
+
+    // | input     | output     | command1 | command2 |
+    // |-----------|------------|----------|----------|
+    // | gzipped   | gzipped    | cat      |          |
+    // | ungzipped | ungzipped  | cat      |          |
+    // | gzipped   | ungzipped  | zcat     |          |
+    // | ungzipped | gzipped    | cat      | pigz     |
+
+    // Use input file ending as default
+    prefix   = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}"
+    out_zip  = prefix.endsWith('.gz')
+    in_zip   = file_list[0].endsWith('.gz')
+    command1 = (in_zip && !out_zip) ? 'zcat' : 'cat'
+    command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : ''
+    """
+    $command1 \\
+        $args \\
+        ${file_list.join(' ')} \\
+        $command2 \\
+        > ${prefix}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
+    END_VERSIONS
+    """
+
+    stub:
+    def file_list = files_in.collect { it.toString() }
+    prefix   = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}"
+    """
+    touch $prefix
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/modules/cat/cat/meta.yml b/modules/nf-core/modules/cat/cat/meta.yml
new file mode 100644
index 00000000..5eeff5a6
--- /dev/null
+++ b/modules/nf-core/modules/cat/cat/meta.yml
@@ -0,0 +1,37 @@
+name: cat_cat
+description: A module for concatenation of gzipped or uncompressed files
+keywords:
+  - concatenate
+  - gzip
+  - cat
+tools:
+  - cat:
+      description: Just concatenation
+      homepage: None
+      documentation: https://man7.org/linux/man-pages/man1/cat.1.html
+      tool_dev_url: None
+      licence: ["GPL-3.0-or-later"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - files_in:
+      type: file
+      description: List of compressed / uncompressed files
+      pattern: "*"
+
+output:
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - file_out:
+      type: file
+      description: Concatenated file. Will be gzipped if file_out ends with ".gz"
+      pattern: "${file_out}"
+
+authors:
+  - "@erikrikarddaniel"
+  - "@FriederikeHanssen"

From 6d305c25ba933082b8a6003a8670fe62c032ad1b Mon Sep 17 00:00:00 2001
From: Christian Kubica <christian.kubica@computomics.com>
Date: Wed, 22 Jun 2022 15:27:32 +0200
Subject: [PATCH 027/117] REPLACE CUSTOM CAT WITH NF-CORE CAT

Implements the use of the nf-core cat module.
---
 subworkflows/local/umi_dedup.nf | 24 +++++++++++++-----------
 workflows/smrnaseq.nf           |  7 +++++--
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf
index 94341a96..12033274 100644
--- a/subworkflows/local/umi_dedup.nf
+++ b/subworkflows/local/umi_dedup.nf
@@ -7,7 +7,7 @@ include { BOWTIE_MAP_SEQ as UMI_MAP_GENOME    } from '../../modules/local/bowtie
 include { BAM_SORT_SAMTOOLS                   } from '../../subworkflows/nf-core/bam_sort_samtools'
 include { UMITOOLS_DEDUP                      } from '../../modules/nf-core/modules/umitools/dedup/main'
 include { SAMTOOLS_BAM2FQ                     } from '../../modules/nf-core/modules/samtools/bam2fq/main'
-include { JOIN_FASTQS                         } from '../../modules/local/join_reads'
+include { CAT_CAT                             } from '../../modules/nf-core/modules/cat/cat/main'
 
 workflow DEDUPLICATE_UMIS {
     take:
@@ -22,17 +22,17 @@ workflow DEDUPLICATE_UMIS {
 
     if (!bt_index){
         INDEX_GENOME ( fasta )
-        bt_indices      = INDEX_GENOME.out.bt_indices
+        bt_index      = INDEX_GENOME.out.bowtie_indices
         fasta_formatted = INDEX_GENOME.out.fasta
         ch_versions     = ch_versions.mix(INDEX_GENOME.out.versions)
     } else {
-        bt_indices      = Channel.fromPath("${bt_index}**ebwt", checkIfExists: true).ifEmpty { exit 1, "Bowtie1 index directory not found: ${bt_index}" }
+        bt_index     = Channel.fromPath("${bt_index}**ebwt", checkIfExists: true).ifEmpty { exit 1, "Bowtie1 index directory not found: ${bt_index}" }
         fasta_formatted = fasta
     }
 
-    if (bt_indices){
+    if (bt_index){
         
-        UMI_MAP_GENOME ( reads, bt_indices.collect() )
+        UMI_MAP_GENOME ( reads, bt_index.collect() )
         ch_versions = ch_versions.mix(UMI_MAP_GENOME.out.versions)
 
         BAM_SORT_SAMTOOLS ( UMI_MAP_GENOME.out.bam, Channel.empty() )
@@ -49,18 +49,20 @@ workflow DEDUPLICATE_UMIS {
         ch_dedup_reads = SAMTOOLS_BAM2FQ.out.reads
 
         if ( params.umi_merge_unmapped ) {
+
+            SAMTOOLS_BAM2FQ.out.reads
+                .join(UMI_MAP_GENOME.out.unmapped)
+                .map { meta, file1, file2 -> [meta, [file1, file2]]}
+                .set { ch_cat }
     
-            JOIN_FASTQS ( 
-                SAMTOOLS_BAM2FQ.out.reads,
-                UMI_MAP_GENOME.out.unmapped 
-            )
-            ch_dedup_reads = JOIN_FASTQS.out.merged
+            CAT_CAT ( ch_cat )
+            ch_dedup_reads = CAT_CAT.out.file_out
         }
     }
 
     emit:
     reads    = ch_dedup_reads
-    indices  = bt_indices
+    indices  = bt_index
     stats    = ch_dedup_stats
     versions = ch_versions
 }
diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index 043796d9..dc30e875 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -160,13 +160,16 @@ workflow SMRNASEQ {
     if (params.with_umi){
         if (fasta){
             fasta_ch = file(fasta)
-            DEDUPLICATE_UMIS (fasta_ch, bt_index, FASTQC_UMITOOLS_TRIMGALORE.out.reads)
+            DEDUPLICATE_UMIS (
+                fasta_ch, 
+                bt_index, 
+                FASTQC_UMITOOLS_TRIMGALORE.out.reads
+            )
             reads_for_mirna = DEDUPLICATE_UMIS.out.reads
             ch_versions = ch_versions.mix(DEDUPLICATE_UMIS.out.versions)
         }
     }
 
-    reads_for_mirna = FASTQC_UMITOOLS_TRIMGALORE.out.reads
     //
     // SUBWORKFLOW: remove contaminants from reads
     //

From 57a8dbaab7f3614899952f4f880f6dcfa8c1c242 Mon Sep 17 00:00:00 2001
From: Christian Kubica <christian.kubica@computomics.com>
Date: Wed, 22 Jun 2022 15:30:58 +0200
Subject: [PATCH 028/117] REMOVE UNUSED MODULE

deletes the now unused conatenation module.
---
 modules/local/join_reads.nf | 21 ---------------------
 1 file changed, 21 deletions(-)
 delete mode 100644 modules/local/join_reads.nf

diff --git a/modules/local/join_reads.nf b/modules/local/join_reads.nf
deleted file mode 100644
index 9f68f062..00000000
--- a/modules/local/join_reads.nf
+++ /dev/null
@@ -1,21 +0,0 @@
-process JOIN_FASTQS {
-    tag "$meta.id"
-    label 'process_medium'
-
-    conda (params.enable_conda ? 'bioconda::samtools=1.13' : null)
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/mulled-v2-ffbf83a6b0ab6ec567a336cf349b80637135bca3:40128b496751b037e2bd85f6789e83d4ff8a4837-0' :
-        'quay.io/biocontainers/mulled-v2-ffbf83a6b0ab6ec567a336cf349b80637135bca3:40128b496751b037e2bd85f6789e83d4ff8a4837-0' }"
-
-    input:
-    tuple val(meta), path(reads)
-    tuple val(unmapped_meta), path(unmapped_reads)
-
-    output:
-    tuple val(meta), path('*_merged.fq.gz'), emit: merged
-    script:
-    """
-    cat ${reads} ${unmapped_reads} > ${meta.id}_merged.fq.gz
-    """
-
-}

From 04b852cec33782bc51b05e5ed555cb93787d3871 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= <sguizard@ed.ac.uk>
Date: Thu, 12 Oct 2023 09:06:58 +0100
Subject: [PATCH 029/117] Re-organized code mirna_quant to make it simpler to
 read

---
 subworkflows/local/mirna_quant.nf | 37 +++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/subworkflows/local/mirna_quant.nf b/subworkflows/local/mirna_quant.nf
index dfa16ab4..e26839bc 100644
--- a/subworkflows/local/mirna_quant.nf
+++ b/subworkflows/local/mirna_quant.nf
@@ -33,24 +33,17 @@ workflow MIRNA_QUANT {
     main:
     ch_versions = Channel.empty()
 
+
+
     PARSE_MATURE ( mature ).parsed_fasta.set { mirna_parsed }
     ch_versions = ch_versions.mix(PARSE_MATURE.out.versions)
 
     FORMAT_MATURE ( mirna_parsed )
     ch_versions = ch_versions.mix(FORMAT_MATURE.out.versions)
 
-    PARSE_HAIRPIN ( hairpin ).parsed_fasta.set { hairpin_parsed }
-    ch_versions = ch_versions.mix(PARSE_HAIRPIN.out.versions)
-
-    FORMAT_HAIRPIN ( hairpin_parsed )
-    ch_versions = ch_versions.mix(FORMAT_HAIRPIN.out.versions)
-
     INDEX_MATURE ( FORMAT_MATURE.out.formatted_fasta ).index.set { mature_bowtie }
     ch_versions = ch_versions.mix(INDEX_MATURE.out.versions)
 
-    INDEX_HAIRPIN ( FORMAT_HAIRPIN.out.formatted_fasta ).index.set { hairpin_bowtie }
-    ch_versions = ch_versions.mix(INDEX_HAIRPIN.out.versions)
-
     reads
         .map { add_suffix(it, "mature") }
         .dump (tag:'msux')
@@ -64,15 +57,28 @@ workflow MIRNA_QUANT {
         .dump (tag:'hsux')
         .set { reads_hairpin }
 
-    BOWTIE_MAP_HAIRPIN ( reads_hairpin, hairpin_bowtie.collect() )
-    ch_versions = ch_versions.mix(BOWTIE_MAP_HAIRPIN.out.versions)
-
     BAM_STATS_MATURE ( BOWTIE_MAP_MATURE.out.bam, FORMAT_MATURE.out.formatted_fasta )
     ch_versions = ch_versions.mix(BAM_STATS_MATURE.out.versions)
 
+
+
+    PARSE_HAIRPIN ( hairpin ).parsed_fasta.set { hairpin_parsed }
+    ch_versions = ch_versions.mix(PARSE_HAIRPIN.out.versions)
+
+    FORMAT_HAIRPIN ( hairpin_parsed )
+    ch_versions = ch_versions.mix(FORMAT_HAIRPIN.out.versions)
+
+    INDEX_HAIRPIN ( FORMAT_HAIRPIN.out.formatted_fasta ).index.set { hairpin_bowtie }
+    ch_versions = ch_versions.mix(INDEX_HAIRPIN.out.versions)
+
+    BOWTIE_MAP_HAIRPIN ( reads_hairpin, hairpin_bowtie.collect() )
+    ch_versions = ch_versions.mix(BOWTIE_MAP_HAIRPIN.out.versions)
+
     BAM_STATS_HAIRPIN ( BOWTIE_MAP_HAIRPIN.out.bam, FORMAT_HAIRPIN.out.formatted_fasta )
     ch_versions = ch_versions.mix(BAM_STATS_HAIRPIN.out.versions)
 
+
+
     BAM_STATS_MATURE.out.idxstats.collect{it[1]}
         .mix(BAM_STATS_HAIRPIN.out.idxstats.collect{it[1]})
         .dump(tag:'edger')
@@ -81,6 +87,8 @@ workflow MIRNA_QUANT {
         .set { edger_input }
     EDGER_QC ( edger_input )
 
+
+
     reads
         .map { add_suffix(it, "seqcluster") }
         .dump (tag:'ssux')
@@ -92,6 +100,9 @@ workflow MIRNA_QUANT {
     BOWTIE_MAP_SEQCLUSTER ( reads_collapsed, hairpin_bowtie.collect() )
     ch_versions = ch_versions.mix(BOWTIE_MAP_SEQCLUSTER.out.versions)
 
+
+
+
     ch_mirtop_logs = Channel.empty()
     if (params.mirtrace_species){
         MIRTOP_QUANT ( BOWTIE_MAP_SEQCLUSTER.out.bam.collect{it[1]}, FORMAT_HAIRPIN.out.formatted_fasta.collect{it[1]}, gtf )
@@ -106,6 +117,8 @@ workflow MIRNA_QUANT {
         .dump (tag:'gsux')
         .set { reads_genome }
 
+
+
     emit:
     fasta_mature        = FORMAT_MATURE.out.formatted_fasta
     fasta_hairpin       = FORMAT_HAIRPIN.out.formatted_fasta

From f285867b7a1e370148a0de57338fdd7c5a04af6e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= <sguizard@ed.ac.uk>
Date: Thu, 12 Oct 2023 09:07:47 +0100
Subject: [PATCH 030/117] Switched to HTML less hairpin.fa and mature.fa

---
 conf/test.config     | 4 ++--
 nextflow.config      | 4 ++--
 nextflow_schema.json | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/conf/test.config b/conf/test.config
index 1a81afee..450ef11d 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -23,8 +23,8 @@ params {
 
     input            = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/samplesheet/v2.0/samplesheet.csv'
     fasta            = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genome.fa'
-    mature           = 'https://mirbase.org/download/CURRENT/mature.fa'
-    hairpin          = 'https://mirbase.org/download/CURRENT/hairpin.fa'
+    mature           = 'https://mirbase.org/download/mature.fa'
+    hairpin          = 'https://mirbase.org/download/hairpin.fa'
     mirna_gtf        = 'https://mirbase.org/download/hsa.gff3'
     mirtrace_species = 'hsa'
     protocol         = 'illumina'
diff --git a/nextflow.config b/nextflow.config
index 059feffe..f1a3dbcf 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -20,8 +20,8 @@ params {
     igenomes_base               = 's3://ngi-igenomes/igenomes'
     igenomes_ignore             = false
     mirna_gtf                   = null
-    mature                      = "https://mirbase.org/download/CURRENT/mature.fa"
-    hairpin                     = "https://mirbase.org/download/CURRENT/hairpin.fa"
+    mature                      = "https://mirbase.org/download/mature.fa"
+    hairpin                     = "https://mirbase.org/download/hairpin.fa"
     mirgenedb                   = false
     mirgenedb_mature            = null
     mirgenedb_hairpin           = null
diff --git a/nextflow_schema.json b/nextflow_schema.json
index d0162e47..cc061902 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -102,7 +102,7 @@
                     "description": "Path to FASTA file with mature miRNAs.",
                     "fa_icon": "fas fa-wheelchair",
                     "help_text": "Typically this will be the `mature.fa` file from miRBase. Can be given either as a plain text `.fa` file or a compressed `.gz` file.\n\nDefaults to the current miRBase release URL, from which the file will be downloaded.",
-                    "default": "https://mirbase.org/download/CURRENT/mature.fa"
+                    "default": "https://mirbase.org/download/mature.fa"
                 },
                 "mirgenedb_mature": {
                     "type": "string",
@@ -114,7 +114,7 @@
                     "description": "Path to FASTA file with miRNAs precursors.",
                     "fa_icon": "fab fa-cuttlefish",
                     "help_text": "Typically this will be the `mature.fa` file from miRBase. Can be given either as a plain text `.fa` file or a compressed `.gz` file.\n\nDefaults to the current miRBase release URL, from which the file will be downloaded.",
-                    "default": "https://mirbase.org/download/CURRENT/hairpin.fa"
+                    "default": "https://mirbase.org/download/hairpin.fa"
                 },
                 "mirgenedb_hairpin": {
                     "type": "string",

From 6948bad72d1355c3704d17f4a2f0df8a305078f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= <sguizard@ed.ac.uk>
Date: Mon, 16 Oct 2023 08:39:16 +0100
Subject: [PATCH 031/117] detect bowtie2 idx instead of bowtie1 idx

---
 modules/local/bowtie_map_contaminants.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/bowtie_map_contaminants.nf b/modules/local/bowtie_map_contaminants.nf
index d10f13b5..e47ee7c7 100644
--- a/modules/local/bowtie_map_contaminants.nf
+++ b/modules/local/bowtie_map_contaminants.nf
@@ -22,7 +22,7 @@ process BOWTIE_MAP_CONTAMINANTS {
 
     script:
     """
-    INDEX=`find -L ./ -name "*.3.ebwt" | sed 's/.3.ebwt//'`
+    INDEX=`find -L ./ -name "*.3.bt2" | sed 's/.3.bt2//'`
     bowtie2 \\
         --threads ${task.cpus} \\
         --very-sensitive-local \\

From 16bb675f2bf2c5105cc9f956a8c6a93eea09523e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= <sguizard@ed.ac.uk>
Date: Mon, 16 Oct 2023 10:22:09 +0100
Subject: [PATCH 032/117] print mirQuant inputs

---
 workflows/smrnaseq.nf | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index 049851d0..60c6106f 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -178,6 +178,11 @@ workflow SMRNASEQ {
 
     }
 
+    print reference_mature
+    print reference_hairpin
+    print mirna_gtf
+    print mirna_reads
+
     MIRNA_QUANT (
         [ [:], reference_mature],
         [ [:], reference_hairpin],

From 90527a2ab0bd1580d34f6ab39b643315746929d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= <sguizard@ed.ac.uk>
Date: Mon, 16 Oct 2023 11:29:57 +0100
Subject: [PATCH 033/117] debug mirdeeprun

---
 subworkflows/local/mirdeep2.nf | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/subworkflows/local/mirdeep2.nf b/subworkflows/local/mirdeep2.nf
index f8098ba5..484beabd 100644
--- a/subworkflows/local/mirdeep2.nf
+++ b/subworkflows/local/mirdeep2.nf
@@ -23,6 +23,13 @@ workflow MIRDEEP2 {
     MIRDEEP2_MAPPER ( MIRDEEP2_PIGZ.out.reads, index )
     ch_versions = ch_versions.mix(MIRDEEP2_MAPPER.out.versions.first())
 
+
+    MIRDEEP2_MAPPER.out.mirdeep2_inputs.view{ it -> "DEBUG: MIRDEEP2_MAPPER.out.mirdeep2_inputs: " }
+
+    print "DEBUG fasta:" + fasta
+    print "DEBUG hairpin:" + hairpin
+    print "DEBUG mature:" + mature
+
     MIRDEEP2_RUN ( fasta, MIRDEEP2_MAPPER.out.mirdeep2_inputs, hairpin, mature )
     ch_versions = ch_versions.mix(MIRDEEP2_RUN.out.versions.first())
 

From 39f0597536c3308fbbdef7c0f778ec4d61a772e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= <sguizard@ed.ac.uk>
Date: Mon, 16 Oct 2023 11:32:12 +0100
Subject: [PATCH 034/117] remove previous debug print

---
 workflows/smrnaseq.nf | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index 60c6106f..049851d0 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -178,11 +178,6 @@ workflow SMRNASEQ {
 
     }
 
-    print reference_mature
-    print reference_hairpin
-    print mirna_gtf
-    print mirna_reads
-
     MIRNA_QUANT (
         [ [:], reference_mature],
         [ [:], reference_hairpin],

From c198eda989caaccd3c86fc5131b0787161dda0ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= <sguizard@ed.ac.uk>
Date: Mon, 16 Oct 2023 12:16:07 +0100
Subject: [PATCH 035/117] add more debug messages

---
 subworkflows/local/mirdeep2.nf | 6 +++---
 workflows/smrnaseq.nf          | 6 ++++++
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/subworkflows/local/mirdeep2.nf b/subworkflows/local/mirdeep2.nf
index 484beabd..e7ca4789 100644
--- a/subworkflows/local/mirdeep2.nf
+++ b/subworkflows/local/mirdeep2.nf
@@ -26,9 +26,9 @@ workflow MIRDEEP2 {
 
     MIRDEEP2_MAPPER.out.mirdeep2_inputs.view{ it -> "DEBUG: MIRDEEP2_MAPPER.out.mirdeep2_inputs: " }
 
-    print "DEBUG fasta:" + fasta
-    print "DEBUG hairpin:" + hairpin
-    print "DEBUG mature:" + mature
+    println "DEBUG fasta:" + fasta
+    println "DEBUG hairpin:" + hairpin
+    println "DEBUG mature:" + mature
 
     MIRDEEP2_RUN ( fasta, MIRDEEP2_MAPPER.out.mirdeep2_inputs, hairpin, mature )
     ch_versions = ch_versions.mix(MIRDEEP2_RUN.out.versions.first())
diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index 049851d0..a463de5a 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -195,6 +195,12 @@ workflow SMRNASEQ {
         genome_stats = GENOME_QUANT.out.stats
         ch_versions = ch_versions.mix(GENOME_QUANT.out.versions)
 
+        FASTQC_FASTP.out.reads.view { it -> println "DEBUG ==> FASTQC_FASTP.out.reads = $it" }
+        GENOME_QUANT.out.fasta.view { it -> println "DEBUG ==> GENOME_QUANT.out.fasta = $it" }
+        GENOME_QUANT.out.index.view { it -> println "DEBUG ==> GENOME_QUANT.out.index = $it" }
+        MIRNA_QUANT.out.fasta_hairpin.view { it -> println "DEBUG ==> MIRNA_QUANT.out.fasta_hairpin = $it" }
+        MIRNA_QUANT.out.fasta_mature.view { it -> println "DEBUG ==> MIRNA_QUANT.out.fasta_mature = $it" }
+
         if (!params.skip_mirdeep) {
             MIRDEEP2 (
                 FASTQC_FASTP.out.reads,

From 85cadb0d53e4eb422934d8e4f379a53a79f99822 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= <sguizard@ed.ac.uk>
Date: Mon, 16 Oct 2023 12:22:15 +0100
Subject: [PATCH 036/117] remove prints

---
 subworkflows/local/mirdeep2.nf | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/subworkflows/local/mirdeep2.nf b/subworkflows/local/mirdeep2.nf
index e7ca4789..a08023a0 100644
--- a/subworkflows/local/mirdeep2.nf
+++ b/subworkflows/local/mirdeep2.nf
@@ -24,11 +24,11 @@ workflow MIRDEEP2 {
     ch_versions = ch_versions.mix(MIRDEEP2_MAPPER.out.versions.first())
 
 
-    MIRDEEP2_MAPPER.out.mirdeep2_inputs.view{ it -> "DEBUG: MIRDEEP2_MAPPER.out.mirdeep2_inputs: " }
+    // MIRDEEP2_MAPPER.out.mirdeep2_inputs.view{ it -> "DEBUG: MIRDEEP2_MAPPER.out.mirdeep2_inputs: " }
 
-    println "DEBUG fasta:" + fasta
-    println "DEBUG hairpin:" + hairpin
-    println "DEBUG mature:" + mature
+    // println "DEBUG fasta:" + fasta
+    // println "DEBUG hairpin:" + hairpin
+    // println "DEBUG mature:" + mature
 
     MIRDEEP2_RUN ( fasta, MIRDEEP2_MAPPER.out.mirdeep2_inputs, hairpin, mature )
     ch_versions = ch_versions.mix(MIRDEEP2_RUN.out.versions.first())

From 7c703c9b0a78781ed9d80fc5db42967e1376e7a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= <sguizard@ed.ac.uk>
Date: Mon, 16 Oct 2023 12:34:32 +0100
Subject: [PATCH 037/117] remove metq from mature and hairpin

---
 workflows/smrnaseq.nf | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index a463de5a..be8ae374 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -195,11 +195,15 @@ workflow SMRNASEQ {
         genome_stats = GENOME_QUANT.out.stats
         ch_versions = ch_versions.mix(GENOME_QUANT.out.versions)
 
+        hairpin_clean = MIRNA_QUANT.out.fasta_hairpin.map { it -> it[1] }
+        mature_clean  = MIRNA_QUANT.out.fasta_mature.map { it -> it[1] }
         FASTQC_FASTP.out.reads.view { it -> println "DEBUG ==> FASTQC_FASTP.out.reads = $it" }
         GENOME_QUANT.out.fasta.view { it -> println "DEBUG ==> GENOME_QUANT.out.fasta = $it" }
         GENOME_QUANT.out.index.view { it -> println "DEBUG ==> GENOME_QUANT.out.index = $it" }
-        MIRNA_QUANT.out.fasta_hairpin.view { it -> println "DEBUG ==> MIRNA_QUANT.out.fasta_hairpin = $it" }
-        MIRNA_QUANT.out.fasta_mature.view { it -> println "DEBUG ==> MIRNA_QUANT.out.fasta_mature = $it" }
+        // MIRNA_QUANT.out.fasta_hairpin.view { it -> println "DEBUG ==> MIRNA_QUANT.out.fasta_hairpin = $it" }
+        // MIRNA_QUANT.out.fasta_mature.view { it -> println "DEBUG ==> MIRNA_QUANT.out.fasta_mature = $it" }
+        hairpin_clean.view { it -> println "DEBUG ==> hairpin_clean = $it" }
+        mature_clean.view { it -> println "DEBUG ==> mature_clean = $it" }
 
         if (!params.skip_mirdeep) {
             MIRDEEP2 (

From e8d04d21a4b6ce742389cfdf2fd73daca27523fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= <sguizard@ed.ac.uk>
Date: Mon, 16 Oct 2023 12:37:32 +0100
Subject: [PATCH 038/117] connect clean hairpin / mature to mirdeep2

---
 workflows/smrnaseq.nf | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index be8ae374..f4415d12 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -210,8 +210,10 @@ workflow SMRNASEQ {
                 FASTQC_FASTP.out.reads,
                 GENOME_QUANT.out.fasta,
                 GENOME_QUANT.out.index.collect(),
-                MIRNA_QUANT.out.fasta_hairpin,
-                MIRNA_QUANT.out.fasta_mature
+                // MIRNA_QUANT.out.fasta_hairpin,
+                // MIRNA_QUANT.out.fasta_mature
+                hairpin_clean,
+                mature_clean
             )
             ch_versions = ch_versions.mix(MIRDEEP2.out.versions)
         }

From 9285dfbe175e269f9dbd6e051802e3c537b2798c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= <sguizard@ed.ac.uk>
Date: Mon, 16 Oct 2023 12:41:06 +0100
Subject: [PATCH 039/117] remove meta from fasta in mirdeep2_run

---
 modules/local/mirdeep2_run.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/mirdeep2_run.nf b/modules/local/mirdeep2_run.nf
index e4e2aaaf..b676aa10 100644
--- a/modules/local/mirdeep2_run.nf
+++ b/modules/local/mirdeep2_run.nf
@@ -10,7 +10,7 @@ process MIRDEEP2_RUN {
         'biocontainers/mirdeep2:2.0.1.3--hdfd78af_1' }"
 
     input:
-    tuple val(meta2), path(fasta)
+    path(fasta)
     tuple path(reads), path(arf)
     path hairpin
     path mature

From 3a3f11bf057c9422d8e7eb40425b8e2c7f6ac704 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= <sguizard@ed.ac.uk>
Date: Mon, 16 Oct 2023 14:27:57 +0100
Subject: [PATCH 040/117] Clean debug prints

---
 subworkflows/local/mirdeep2.nf | 7 -------
 workflows/smrnaseq.nf          | 9 ---------
 2 files changed, 16 deletions(-)

diff --git a/subworkflows/local/mirdeep2.nf b/subworkflows/local/mirdeep2.nf
index a08023a0..f8098ba5 100644
--- a/subworkflows/local/mirdeep2.nf
+++ b/subworkflows/local/mirdeep2.nf
@@ -23,13 +23,6 @@ workflow MIRDEEP2 {
     MIRDEEP2_MAPPER ( MIRDEEP2_PIGZ.out.reads, index )
     ch_versions = ch_versions.mix(MIRDEEP2_MAPPER.out.versions.first())
 
-
-    // MIRDEEP2_MAPPER.out.mirdeep2_inputs.view{ it -> "DEBUG: MIRDEEP2_MAPPER.out.mirdeep2_inputs: " }
-
-    // println "DEBUG fasta:" + fasta
-    // println "DEBUG hairpin:" + hairpin
-    // println "DEBUG mature:" + mature
-
     MIRDEEP2_RUN ( fasta, MIRDEEP2_MAPPER.out.mirdeep2_inputs, hairpin, mature )
     ch_versions = ch_versions.mix(MIRDEEP2_RUN.out.versions.first())
 
diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index f4415d12..17eb985d 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -197,21 +197,12 @@ workflow SMRNASEQ {
 
         hairpin_clean = MIRNA_QUANT.out.fasta_hairpin.map { it -> it[1] }
         mature_clean  = MIRNA_QUANT.out.fasta_mature.map { it -> it[1] }
-        FASTQC_FASTP.out.reads.view { it -> println "DEBUG ==> FASTQC_FASTP.out.reads = $it" }
-        GENOME_QUANT.out.fasta.view { it -> println "DEBUG ==> GENOME_QUANT.out.fasta = $it" }
-        GENOME_QUANT.out.index.view { it -> println "DEBUG ==> GENOME_QUANT.out.index = $it" }
-        // MIRNA_QUANT.out.fasta_hairpin.view { it -> println "DEBUG ==> MIRNA_QUANT.out.fasta_hairpin = $it" }
-        // MIRNA_QUANT.out.fasta_mature.view { it -> println "DEBUG ==> MIRNA_QUANT.out.fasta_mature = $it" }
-        hairpin_clean.view { it -> println "DEBUG ==> hairpin_clean = $it" }
-        mature_clean.view { it -> println "DEBUG ==> mature_clean = $it" }
 
         if (!params.skip_mirdeep) {
             MIRDEEP2 (
                 FASTQC_FASTP.out.reads,
                 GENOME_QUANT.out.fasta,
                 GENOME_QUANT.out.index.collect(),
-                // MIRNA_QUANT.out.fasta_hairpin,
-                // MIRNA_QUANT.out.fasta_mature
                 hairpin_clean,
                 mature_clean
             )

From c6efa86cf50dc2f359be18b309bea2bfe9590382 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= <sguizard@ed.ac.uk>
Date: Tue, 17 Oct 2023 09:30:51 +0100
Subject: [PATCH 041/117] update bowtie map contaminant with right options

---
 modules/local/bowtie_map_contaminants.nf | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/modules/local/bowtie_map_contaminants.nf b/modules/local/bowtie_map_contaminants.nf
index e47ee7c7..10f5dadd 100644
--- a/modules/local/bowtie_map_contaminants.nf
+++ b/modules/local/bowtie_map_contaminants.nf
@@ -22,16 +22,17 @@ process BOWTIE_MAP_CONTAMINANTS {
 
     script:
     """
-    INDEX=`find -L ./ -name "*.3.bt2" | sed 's/.3.bt2//'`
+    INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"`
     bowtie2 \\
+        -x \$INDEX \\
+        -U ${reads} \\
         --threads ${task.cpus} \\
+        --un ${meta.id}.${contaminant_type}.filter.unmapped.contaminant.fastq \\
         --very-sensitive-local \\
         -k 1 \\
-        -x \$INDEX \\
-        --un ${meta.id}.${contaminant_type}.filter.unmapped.contaminant.fastq \\
-        ${reads} \\
+        -S ${meta.id}.filter.contaminant.sam \\
         ${args} \\
-        -S ${meta.id}.filter.contaminant.sam > ${meta.id}.contaminant_bowtie.log 2>&1
+        > ${meta.id}.contaminant_bowtie.log 2>&1
 
     # extracting number of reads from bowtie logs
     awk -v type=${contaminant_type} 'BEGIN{tot=0} {if(NR==4 || NR == 5){tot += \$1}} END {print "\\""type"\\": "tot }' ${meta.id}.contaminant_bowtie.log | tr -d , > filtered.${meta.id}_${contaminant_type}.stats

From 0be5a172404e76da496418afc23e7ff0c061f301 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= <sguizard@ed.ac.uk>
Date: Tue, 17 Oct 2023 09:45:33 +0100
Subject: [PATCH 042/117] define args variable

---
 modules/local/bowtie_map_contaminants.nf | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modules/local/bowtie_map_contaminants.nf b/modules/local/bowtie_map_contaminants.nf
index 10f5dadd..d744b1fd 100644
--- a/modules/local/bowtie_map_contaminants.nf
+++ b/modules/local/bowtie_map_contaminants.nf
@@ -21,6 +21,8 @@ process BOWTIE_MAP_CONTAMINANTS {
     task.ext.when == null || task.ext.when
 
     script:
+    def args = task.ext.args ?: ""
+
     """
     INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"`
     bowtie2 \\

From 13fc7a47725374e1dded9a9f42ff5815657ec033 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= <sguizard@ed.ac.uk>
Date: Tue, 17 Oct 2023 14:49:44 +0100
Subject: [PATCH 043/117] FIx? - include fastqc trim to multiQC

---
 workflows/smrnaseq.nf | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index 17eb985d..f73bc79c 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -230,7 +230,8 @@ workflow SMRNASEQ {
         ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
         ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
         ch_multiqc_files = ch_multiqc_files.mix(FASTQC_FASTP.out.fastqc_raw_zip.collect{it[1]}.ifEmpty([]))
-        ch_multiqc_files = ch_multiqc_files.mix(FASTQC_FASTP.out.trim_json.collect{it[1]}.ifEmpty([]))
+        ch_multiqc_files = ch_multiqc_files.mix(FASTQC_FASTP.out.fastqc_trim_zip.collect{it[1]}.ifEmpty([]))
+        // ch_multiqc_files = ch_multiqc_files.mix(FASTQC_FASTP.out.trim_json.collect{it[1]}.ifEmpty([]))
         ch_multiqc_files = ch_multiqc_files.mix(contamination_stats.collect().ifEmpty([]))
         ch_multiqc_files = ch_multiqc_files.mix(genome_stats.collect({it[1]}).ifEmpty([]))
         ch_multiqc_files = ch_multiqc_files.mix(MIRNA_QUANT.out.mature_stats.collect({it[1]}).ifEmpty([]))

From 71590dba904f39fa3d524f3eefee4c0df7d4f2f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= <sguizard@ed.ac.uk>
Date: Tue, 17 Oct 2023 15:32:04 +0100
Subject: [PATCH 044/117] Refresh pipeline summary

---
 README.md | 55 ++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 32 insertions(+), 23 deletions(-)

diff --git a/README.md b/README.md
index 44066051..dea262d8 100644
--- a/README.md
+++ b/README.md
@@ -26,29 +26,38 @@ You can find numerous talks on the nf-core events page from various topics inclu
 
 ## Pipeline summary
 
-1. Raw read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))
-2. Adapter trimming ([`Trim Galore!`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/))
-   1. Insert Size calculation
-   2. Collapse reads ([`seqcluster`](https://seqcluster.readthedocs.io/mirna_annotation.html#processing-of-reads))
-3. Contamination filtering ([`Bowtie2`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml))
-4. Alignment against miRBase mature miRNA ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml))
-5. Alignment against miRBase hairpin
-   1. Unaligned reads from step 3 ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml))
-   2. Collapsed reads from step 2.2 ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml))
-6. Post-alignment processing of miRBase hairpin
-   1. Basic statistics from step 3 and step 4.1 ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/))
-   2. Analysis on miRBase, or MirGeneDB hairpin counts ([`edgeR`](https://bioconductor.org/packages/release/bioc/html/edgeR.html))
-      - TMM normalization and a table of top expression hairpin
-      - MDS plot clustering samples
-      - Heatmap of sample similarities
-   3. miRNA and isomiR annotation from step 4.1 ([`mirtop`](https://github.com/miRTop/mirtop))
-7. Alignment against host reference genome ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml))
-   1. Post-alignment processing of alignment against host reference genome ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/))
-8. Novel miRNAs and known miRNAs discovery ([`MiRDeep2`](https://www.mdc-berlin.de/content/mirdeep2-documentation))
-   1. Mapping against reference genome with the mapper module
-   2. Known and novel miRNA discovery with the mirdeep2 module
-9. miRNA quality control ([`mirtrace`](https://github.com/friedlanderlab/mirtrace))
-10. Present QC for raw read, alignment, and expression results ([`MultiQC`](http://multiqc.info/))
+1. Quality check and triming
+    1. Raw read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))
+    2. Adapter trimming ([`fastp`](https://github.com/OpenGene/fastp))
+    3. Trim read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))
+2. miRNA QC ([`miRTrace`](https://github.com/friedlanderlab/mirtrace))
+3. Contamination filtering ([`Bowtie2`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml)) (Optional)
+    1. rRNA filtration
+    2. tRNA filtration
+    3. cDNA filtration
+    4. ncRNA filtration
+    5. piRNA filtration
+    6. Others filtration
+4. miRNA quantification
+    - EdgeR
+        1. Reads alignment against miRBase mature miRNA ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml))
+        2. Post-alignment processing of alignment against Mature miRNA ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/))
+        3. Unmapped reads (from reads vs mature miRNA) alignment against miRBase hairpin
+        4. Post-alignment processing of alignment against Hairpin ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/))
+        5. Analysis on miRBase, or MirGeneDB hairpin counts ([`edgeR`](https://bioconductor.org/packages/release/bioc/html/edgeR.html))
+            - TMM normalization and a table of top expression hairpin
+            - MDS plot clustering samples
+            - Heatmap of sample similarities
+    - Mirtop quantification
+        1. Read collapsing  ([`seqcluster`](https://github.com/lpantano/seqcluster))
+        2. miRNA and isomiR annotation ([`mirtop`](https://github.com/miRTop/mirtop))
+5. Genome Quantification (Optional)
+    1. Reads alignment against host reference genome ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml))
+    2. Post-alignment processing of alignment against host reference genome ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/))
+6. Novel miRNAs and known miRNAs discovery ([`MiRDeep2`](https://www.mdc-berlin.de/content/mirdeep2-documentation)) (Optional)
+    1. Mapping against reference genome with the mapper module
+    2. Known and novel miRNA discovery with the mirdeep2 module
+7. Present QC for raw read, alignment, and expression results ([`MultiQC`](http://multiqc.info/))
 
 ## Usage
 

From 892ab8924217539ce33679b89eaffa1ff3e2fe30 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= <sguizard@ed.ac.uk>
Date: Tue, 17 Oct 2023 18:33:33 +0100
Subject: [PATCH 045/117] Fix version: missing quotes and misaligned END_VER

---
 modules/local/blat_mirna.nf          | 2 +-
 modules/local/mirdeep2_prepare.nf    | 2 +-
 modules/local/mirdeep2_run.nf        | 2 +-
 modules/local/mirtop_quant.nf        | 2 +-
 modules/local/mirtrace.nf            | 2 +-
 modules/local/parse_fasta_mirna.nf   | 2 +-
 modules/local/seqcluster_collapse.nf | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/modules/local/blat_mirna.nf b/modules/local/blat_mirna.nf
index 7f8a2324..aa0d3d51 100644
--- a/modules/local/blat_mirna.nf
+++ b/modules/local/blat_mirna.nf
@@ -53,7 +53,7 @@ process BLAT_MIRNA {
         blat -out=blast8 $mirna $contaminants /dev/stdout | awk 'BEGIN{FS="\t"}{if(\$11 < 1e-5)print \$1;}' | uniq > mirnahit.txt
         awk 'BEGIN { while((getline<"mirnahit.txt")>0) l[">"\$1]=1 } /^>/ {x = l[\$1]} {if(!x) print }' $contaminants  > filtered.fa
 
-cat <<-END_VERSIONS > versions.yml
+        cat <<-END_VERSIONS > versions.yml
         "${task.process}":
             blat: \$(echo \$(blat) | grep Standalone | awk '{ if (match(\$0,/[0-9]*[0-9]/,m)) print m[0] }')
         END_VERSIONS
diff --git a/modules/local/mirdeep2_prepare.nf b/modules/local/mirdeep2_prepare.nf
index 7e2f2437..124b5b63 100644
--- a/modules/local/mirdeep2_prepare.nf
+++ b/modules/local/mirdeep2_prepare.nf
@@ -23,7 +23,7 @@ process MIRDEEP2_PIGZ {
     pigz -f -d -p $task.cpus $reads
 
     cat <<-END_VERSIONS > versions.yml
-    ${task.process}":
+    "${task.process}":
         pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
     END_VERSIONS
     """
diff --git a/modules/local/mirdeep2_run.nf b/modules/local/mirdeep2_run.nf
index b676aa10..17a484c1 100644
--- a/modules/local/mirdeep2_run.nf
+++ b/modules/local/mirdeep2_run.nf
@@ -35,7 +35,7 @@ process MIRDEEP2_RUN {
         -z _${reads.simpleName}
 
     cat <<-END_VERSIONS > versions.yml
-    ${task.process}":
+    "${task.process}":
         mirdeep2: \$(echo "$VERSION")
     END_VERSIONS
     """
diff --git a/modules/local/mirtop_quant.nf b/modules/local/mirtop_quant.nf
index e97d6a09..ab38c93d 100644
--- a/modules/local/mirtop_quant.nf
+++ b/modules/local/mirtop_quant.nf
@@ -34,7 +34,7 @@ process MIRTOP_QUANT {
     mv mirtop/stats/mirtop_stats.log mirtop/stats/full_mirtop_stats.log
 
     cat <<-END_VERSIONS > versions.yml
-    ${task.process}":
+    "${task.process}":
         mirtop: \$(echo \$(mirtop --version 2>&1) | sed 's/^.*mirtop //')
     END_VERSIONS
     """
diff --git a/modules/local/mirtrace.nf b/modules/local/mirtrace.nf
index f576ebc0..95989293 100644
--- a/modules/local/mirtrace.nf
+++ b/modules/local/mirtrace.nf
@@ -43,7 +43,7 @@ process MIRTRACE_RUN {
         --force
 
     cat <<-END_VERSIONS > versions.yml
-    ${task.process}":
+    "${task.process}":
         mirtrace: \$(echo \$(mirtrace -v 2>&1))
     END_VERSIONS
     """
diff --git a/modules/local/parse_fasta_mirna.nf b/modules/local/parse_fasta_mirna.nf
index a0bbc75e..ad63401e 100644
--- a/modules/local/parse_fasta_mirna.nf
+++ b/modules/local/parse_fasta_mirna.nf
@@ -34,7 +34,7 @@ process PARSE_FASTA_MIRNA {
     seqkit seq --rna2dna \${FASTA}_sps.fa > \${FASTA}_igenome.fa
 
     cat <<-END_VERSIONS > versions.yml
-    ${task.process}":
+    "${task.process}":
         seqkit: \$(echo \$(seqkit 2>&1) | sed 's/^.*Version: //; s/ .*\$//')
     END_VERSIONS
     """
diff --git a/modules/local/seqcluster_collapse.nf b/modules/local/seqcluster_collapse.nf
index 39f6ce85..82470e5a 100644
--- a/modules/local/seqcluster_collapse.nf
+++ b/modules/local/seqcluster_collapse.nf
@@ -25,7 +25,7 @@ process SEQCLUSTER_SEQUENCES {
     mv collapsed/*.fastq.gz final/.
 
     cat <<-END_VERSIONS > versions.yml
-    ${task.process}":
+    "${task.process}":
         seqcluster: \$(echo \$(seqcluster --version 2>&1) | sed 's/^.*seqcluster //')
     END_VERSIONS
     """

From 05ae65ad2f2e9ad9c1117f55697298266389a0f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= <sguizard@ed.ac.uk>
Date: Tue, 17 Oct 2023 18:34:03 +0100
Subject: [PATCH 046/117] Update doc with HTML less fa

---
 docs/usage.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index 9bfe5494..b0b7e647 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -27,10 +27,10 @@ It should point to the 3-letter species name used by [miRBase](https://www.mirba
 Different parameters can be set for the two supported databases. By default `miRBase` will be used with the parameters below.
 
 - `mirna_gtf`: If not supplied by the user, then `mirna_gtf` will point to the latest GFF3 file in miRbase: `https://mirbase.org/download/CURRENT/genomes/${params.mirtrace_species}.gff3`
-- `mature`: points to the FASTA file of mature miRNA sequences. `https://mirbase.org/download/CURRENT/mature.fa`
-- `hairpin`: points to the FASTA file of precursor miRNA sequences. `https://mirbase.org/download/CURRENT/hairpin.fa`
+- `mature`: points to the FASTA file of mature miRNA sequences. `https://mirbase.org/download/mature.fa`
+- `hairpin`: points to the FASTA file of precursor miRNA sequences. `https://mirbase.org/download/hairpin.fa`
 
-If MirGeneDB should be used instead it needs to be specified using `--mirgenedb` and use the parameters below .
+If MirGeneDB should be used instead it needs to be specified using `--mirgenedb` and use the parameters below.
 
 - `mirgenedb_gff`: The data can not be downloaded automatically (URLs are created with short term tokens in it), thus the user needs to supply the gff file for either his species, or all species downloaded from `https://mirgenedb.org/download`. The total set will automatically be subsetted to the species specified with `--mirgenedb_species`.
 - `mirgenedb_mature`: points to the FASTA file of mature miRNA sequences. Download from `https://mirgenedb.org/download`.

From e6a0b75b4ae05dc2d1203265b3ea53a8c883b203 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= <sguizard@ed.ac.uk>
Date: Wed, 18 Oct 2023 08:18:57 +0100
Subject: [PATCH 047/117] update mirtrace ouput directory

---
 conf/modules.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/modules.config b/conf/modules.config
index 556d8509..ba33f736 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -80,7 +80,7 @@ process {
 process {
     withName: 'MIRTRACE_RUN' {
         publishDir = [
-            path: { "${params.outdir}/mirtrace/${meta.id}" },
+            path: { "${params.outdir}/mirtrace" },
             mode: params.publish_dir_mode,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]

From dd979483a127713a273590c93ed4c706bbfcc187 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= <sguizard@ed.ac.uk>
Date: Wed, 18 Oct 2023 10:05:37 +0100
Subject: [PATCH 048/117] update fastqc config

---
 conf/modules.config | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index ba33f736..8620836e 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -87,14 +87,6 @@ process {
     }
 }
 
-if (!(params.skip_fastqc)) {
-    process {
-        withName: '.*:FASTQC_FASTP:FASTQC_.*' {
-            ext.args = '--quiet'
-        }
-    }
-}
-
 if (!params.skip_fastp) {
     process {
         withName: 'FASTP' {
@@ -130,6 +122,14 @@ if (!params.skip_fastp) {
 
     if (!params.skip_fastqc) {
         process {
+            withName: '.*:.*:FASTQC_FASTP:FASTQC_RAW' {
+                ext.args = '--quiet'
+                publishDir = [
+                    path: { "${params.outdir}/fastqc/raw" },
+                    mode: params.publish_dir_mode,
+                    saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+                ]
+            }
             withName: '.*:.*:FASTQC_FASTP:FASTQC_TRIM' {
                 ext.args = '--quiet'
                 publishDir = [

From b59a73f317442b7dceb6f8e2772a6bb0240eaa98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= <sguizard@ed.ac.uk>
Date: Wed, 18 Oct 2023 10:57:08 +0100
Subject: [PATCH 049/117] Update mirdeep2 publish_dir path

---
 conf/modules.config | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/conf/modules.config b/conf/modules.config
index 8620836e..21b16110 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -236,7 +236,14 @@ if (!params.skip_mirdeep) {
     process {
         withName: 'MIRDEEP2_MAPPER' {
             publishDir = [
-                path: { "${params.outdir}/mirdeep" },
+                path: { "${params.outdir}/mirdeep2/mapper" },
+                mode: params.publish_dir_mode,
+                saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+            ]
+        }
+        withName: 'MIRDEEP2_RUN' {
+            publishDir = [
+                path: { "${params.outdir}/mirdeep2/run" },
                 mode: params.publish_dir_mode,
                 saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
             ]

From c835f592a7583222f77c9d23f9edf81c1067791e Mon Sep 17 00:00:00 2001
From: Fabian Hausmann <fabian.hausmann@zmnh.uni-hamburg.de>
Date: Tue, 28 Nov 2023 09:55:00 +0100
Subject: [PATCH 050/117] Fix stats not executed from genome

---
 subworkflows/local/genome_quant.nf | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/subworkflows/local/genome_quant.nf b/subworkflows/local/genome_quant.nf
index 967b2757..b310c76a 100644
--- a/subworkflows/local/genome_quant.nf
+++ b/subworkflows/local/genome_quant.nf
@@ -8,7 +8,7 @@ include { BOWTIE_MAP_SEQ as BOWTIE_MAP_GENOME } from '../../modules/local/bowtie
 
 workflow GENOME_QUANT {
     take:
-    fasta 
+    fasta
     index
     reads // channel: [ val(meta), [ reads ] ]
 
@@ -28,8 +28,10 @@ workflow GENOME_QUANT {
     if (bowtie_index){
         BOWTIE_MAP_GENOME ( reads, bowtie_index.collect() )
         ch_versions = ch_versions.mix(BOWTIE_MAP_GENOME.out.versions)
-
-        BAM_SORT_STATS_SAMTOOLS ( BOWTIE_MAP_GENOME.out.bam, Channel.empty()  )
+        fasta_formatted
+            .map { file -> tuple(file.baseName, file) }
+            .set { sort_input }
+        BAM_SORT_STATS_SAMTOOLS ( BOWTIE_MAP_GENOME.out.bam,  sort_input )
         ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions)
     }
 

From 7278013731567b2d516e9046438233c8f818c971 Mon Sep 17 00:00:00 2001
From: nf-core-bot <core@nf-co.re>
Date: Tue, 19 Dec 2023 18:36:51 +0000
Subject: [PATCH 051/117] Template update for nf-core/tools version 2.11

---
 .github/CONTRIBUTING.md                       |   3 +
 .github/PULL_REQUEST_TEMPLATE.md              |   1 +
 .github/workflows/ci.yml                      |   2 +-
 .github/workflows/fix-linting.yml             |   4 +-
 .github/workflows/linting.yml                 |  12 +-
 .gitpod.yml                                   |   4 +-
 CHANGELOG.md                                  |   2 +-
 README.md                                     |  17 +--
 assets/multiqc_config.yml                     |   6 +-
 assets/slackreport.json                       |   2 +-
 conf/modules.config                           |   2 +-
 docs/usage.md                                 |   4 +-
 lib/NfcoreTemplate.groovy                     |  32 ++---
 modules.json                                  |   6 +-
 .../dumpsoftwareversions/environment.yml      |   7 ++
 .../custom/dumpsoftwareversions/main.nf       |   6 +-
 .../custom/dumpsoftwareversions/meta.yml      |   7 +-
 .../dumpsoftwareversions/tests/main.nf.test   |  38 ++++++
 .../tests/main.nf.test.snap                   |  27 +++++
 .../dumpsoftwareversions/tests/tags.yml       |   2 +
 modules/nf-core/fastqc/environment.yml        |   7 ++
 modules/nf-core/fastqc/main.nf                |  10 +-
 modules/nf-core/fastqc/meta.yml               |   5 +
 modules/nf-core/fastqc/tests/main.nf.test     | 109 ++++++++++++++++++
 .../nf-core/fastqc/tests/main.nf.test.snap    |  10 ++
 modules/nf-core/fastqc/tests/tags.yml         |   2 +
 modules/nf-core/multiqc/environment.yml       |   7 ++
 modules/nf-core/multiqc/main.nf               |   8 +-
 modules/nf-core/multiqc/meta.yml              |  11 +-
 modules/nf-core/multiqc/tests/main.nf.test    |  63 ++++++++++
 modules/nf-core/multiqc/tests/tags.yml        |   2 +
 nextflow.config                               |  12 +-
 32 files changed, 361 insertions(+), 69 deletions(-)
 create mode 100644 modules/nf-core/custom/dumpsoftwareversions/environment.yml
 create mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test
 create mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml
 create mode 100644 modules/nf-core/fastqc/environment.yml
 create mode 100644 modules/nf-core/fastqc/tests/main.nf.test
 create mode 100644 modules/nf-core/fastqc/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/fastqc/tests/tags.yml
 create mode 100644 modules/nf-core/multiqc/environment.yml
 create mode 100644 modules/nf-core/multiqc/tests/main.nf.test
 create mode 100644 modules/nf-core/multiqc/tests/tags.yml

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 4d09370b..bd134c88 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -27,6 +27,9 @@ If you're not used to this workflow with git, you can start with some [docs from
 
 ## Tests
 
+You can optionally test your changes by running the pipeline locally. Then it is recommended to use the `debug` profile to
+receive warnings about process selectors and other debug info. Example: `nextflow run . -profile debug,test,docker --outdir <OUTDIR>`.
+
 When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests.
 Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then.
 
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index e278390b..3cdbf2b7 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -19,6 +19,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/smrn
 - [ ] If necessary, also make a PR on the nf-core/smrnaseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository.
 - [ ] Make sure your code lints (`nf-core lint`).
 - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir <OUTDIR>`).
+- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir <OUTDIR>`).
 - [ ] Usage Documentation in `docs/usage.md` is updated.
 - [ ] Output Documentation in `docs/output.md` is updated.
 - [ ] `CHANGELOG.md` is updated.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d40ffd75..60f93b48 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -28,7 +28,7 @@ jobs:
           - "latest-everything"
     steps:
       - name: Check out pipeline code
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Install Nextflow
         uses: nf-core/setup-nextflow@v1
diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml
index ae6a0d7b..0cd44ed6 100644
--- a/.github/workflows/fix-linting.yml
+++ b/.github/workflows/fix-linting.yml
@@ -13,7 +13,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       # Use the @nf-core-bot token to check out so we can push later
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           token: ${{ secrets.nf_core_bot_auth_token }}
 
@@ -24,7 +24,7 @@ jobs:
         env:
           GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }}
 
-      - uses: actions/setup-node@v3
+      - uses: actions/setup-node@v4
 
       - name: Install Prettier
         run: npm install -g prettier @prettier/plugin-php
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
index b8bdd214..905c58e4 100644
--- a/.github/workflows/linting.yml
+++ b/.github/workflows/linting.yml
@@ -14,9 +14,9 @@ jobs:
   EditorConfig:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
-      - uses: actions/setup-node@v3
+      - uses: actions/setup-node@v4
 
       - name: Install editorconfig-checker
         run: npm install -g editorconfig-checker
@@ -27,9 +27,9 @@ jobs:
   Prettier:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
-      - uses: actions/setup-node@v3
+      - uses: actions/setup-node@v4
 
       - name: Install Prettier
         run: npm install -g prettier
@@ -40,7 +40,7 @@ jobs:
   PythonBlack:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - name: Check code lints with Black
         uses: psf/black@stable
@@ -71,7 +71,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Check out pipeline code
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Install Nextflow
         uses: nf-core/setup-nextflow@v1
diff --git a/.gitpod.yml b/.gitpod.yml
index 25488dcc..acf72695 100644
--- a/.gitpod.yml
+++ b/.gitpod.yml
@@ -4,7 +4,9 @@ tasks:
     command: |
       pre-commit install --install-hooks
       nextflow self-update
-
+  - name: unset JAVA_TOOL_OPTIONS
+    command: |
+      unset JAVA_TOOL_OPTIONS
 vscode:
   extensions: # based on nf-core.nf-core-extensionpack
     - codezombiech.gitignore # Language support for .gitignore files
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d6c4fb89..809d1ec2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,7 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## v2.2.3 - [date]
+## v2.3dev - [date]
 
 Initial release of nf-core/smrnaseq, created with the [nf-core](https://nf-co.re/) template.
 
diff --git a/README.md b/README.md
index e5dc1d80..d0b5ad21 100644
--- a/README.md
+++ b/README.md
@@ -30,11 +30,8 @@
 
 ## Usage
 
-:::note
-If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how
-to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline)
-with `-profile test` before running the workflow on actual data.
-:::
+> [!NOTE]
+> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.
 
 <!-- TODO nf-core: Describe the minimum required steps to execute the pipeline, e.g. how to prepare samplesheets.
      Explain what rows and columns represent. For instance (please edit as appropriate):
@@ -63,11 +60,9 @@ nextflow run nf-core/smrnaseq \
    --outdir <OUTDIR>
 ```
 
-:::warning
-Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those
-provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;
-see [docs](https://nf-co.re/usage/configuration#custom-configuration-files).
-:::
+> [!WARNING]
+> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;
+> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files).
 
 For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/smrnaseq/usage) and the [parameter documentation](https://nf-co.re/smrnaseq/parameters).
 
@@ -94,7 +89,7 @@ For further information or help, don't hesitate to get in touch on the [Slack `#
 ## Citations
 
 <!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. -->
-<!-- If you use  nf-core/smrnaseq for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) -->
+<!-- If you use nf-core/smrnaseq for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) -->
 
 <!-- TODO nf-core: Add bibliography of tools and data used in your pipeline -->
 
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index 267c3cc9..b92cb9e7 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -1,9 +1,7 @@
 report_comment: >
-  
-  This report has been generated by the <a href="https://github.com/nf-core/smrnaseq/releases/tag/2.2.3" target="_blank">nf-core/smrnaseq</a>
+  This report has been generated by the <a href="https://github.com/nf-core/smrnaseq/tree/dev" target="_blank">nf-core/smrnaseq</a>
   analysis pipeline. For information about how to interpret these results, please see the
-  <a href="https://nf-co.re/smrnaseq/2.2.3/docs/output" target="_blank">documentation</a>.
-  
+  <a href="https://nf-co.re/smrnaseq/dev/docs/output" target="_blank">documentation</a>.
 report_section_order:
   "nf-core-smrnaseq-methods-description":
     order: -1000
diff --git a/assets/slackreport.json b/assets/slackreport.json
index 214c7fa9..1884b21a 100644
--- a/assets/slackreport.json
+++ b/assets/slackreport.json
@@ -3,7 +3,7 @@
         {
             "fallback": "Plain-text summary of the attachment.",
             "color": "<% if (success) { %>good<% } else { %>danger<%} %>",
-            "author_name": "nf-core/smrnaseq v${version} - ${runName}",
+            "author_name": "nf-core/smrnaseq ${version} - ${runName}",
             "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico",
             "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>",
             "fields": [
diff --git a/conf/modules.config b/conf/modules.config
index 39e81386..d91c6aba 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -39,7 +39,7 @@ process {
     }
 
     withName: 'MULTIQC' {
-        ext.args   = params.multiqc_title ? "--title \"$params.multiqc_title\"" : ''
+        ext.args   = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
         publishDir = [
             path: { "${params.outdir}/multiqc" },
             mode: params.publish_dir_mode,
diff --git a/docs/usage.md b/docs/usage.md
index 8878af27..f14fb92e 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -20,7 +20,7 @@ You will need to create a samplesheet with information about the samples you wou
 
 The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes:
 
-```console
+```csv title="samplesheet.csv"
 sample,fastq_1,fastq_2
 CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz
 CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz
@@ -33,7 +33,7 @@ The pipeline will auto-detect whether a sample is single- or paired-end using th
 
 A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice.
 
-```console
+```csv title="samplesheet.csv"
 sample,fastq_1,fastq_2
 CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz
 CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz
diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy
index 01b8653d..e248e4c3 100755
--- a/lib/NfcoreTemplate.groovy
+++ b/lib/NfcoreTemplate.groovy
@@ -4,6 +4,7 @@
 
 import org.yaml.snakeyaml.Yaml
 import groovy.json.JsonOutput
+import nextflow.extension.FilesEx
 
 class NfcoreTemplate {
 
@@ -141,12 +142,14 @@ class NfcoreTemplate {
             try {
                 if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') }
                 // Try to send HTML e-mail using sendmail
+                def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html")
+                sendmail_tf.withWriter { w -> w << sendmail_html }
                 [ 'sendmail', '-t' ].execute() << sendmail_html
                 log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-"
             } catch (all) {
                 // Catch failures and try with plaintext
                 def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ]
-                if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) {
+                if ( mqc_report != null && mqc_report.size() <= max_multiqc_email_size.toBytes() ) {
                     mail_cmd += [ '-A', mqc_report ]
                 }
                 mail_cmd.execute() << email_html
@@ -155,14 +158,16 @@ class NfcoreTemplate {
         }
 
         // Write summary e-mail HTML to a file
-        def output_d = new File("${params.outdir}/pipeline_info/")
-        if (!output_d.exists()) {
-            output_d.mkdirs()
-        }
-        def output_hf = new File(output_d, "pipeline_report.html")
+        def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html")
         output_hf.withWriter { w -> w << email_html }
-        def output_tf = new File(output_d, "pipeline_report.txt")
+        FilesEx.copyTo(output_hf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.html");
+        output_hf.delete()
+
+        // Write summary e-mail TXT to a file
+        def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt")
         output_tf.withWriter { w -> w << email_txt }
+        FilesEx.copyTo(output_tf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.txt");
+        output_tf.delete()
     }
 
     //
@@ -227,15 +232,14 @@ class NfcoreTemplate {
     // Dump pipeline parameters in a json file
     //
     public static void dump_parameters(workflow, params) {
-        def output_d = new File("${params.outdir}/pipeline_info/")
-        if (!output_d.exists()) {
-            output_d.mkdirs()
-        }
-
         def timestamp  = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
-        def output_pf  = new File(output_d, "params_${timestamp}.json")
+        def filename   = "params_${timestamp}.json"
+        def temp_pf    = new File(workflow.launchDir.toString(), ".${filename}")
         def jsonStr    = JsonOutput.toJson(params)
-        output_pf.text = JsonOutput.prettyPrint(jsonStr)
+        temp_pf.text   = JsonOutput.prettyPrint(jsonStr)
+
+        FilesEx.copyTo(temp_pf.toPath(), "${params.outdir}/pipeline_info/params_${timestamp}.json")
+        temp_pf.delete()
     }
 
     //
diff --git a/modules.json b/modules.json
index 98249a7b..0606570e 100644
--- a/modules.json
+++ b/modules.json
@@ -7,17 +7,17 @@
                 "nf-core": {
                     "custom/dumpsoftwareversions": {
                         "branch": "master",
-                        "git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
+                        "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e",
                         "installed_by": ["modules"]
                     },
                     "fastqc": {
                         "branch": "master",
-                        "git_sha": "bd8092b67b5103bdd52e300f75889442275c3117",
+                        "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
                         "installed_by": ["modules"]
                     },
                     "multiqc": {
                         "branch": "master",
-                        "git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
+                        "git_sha": "4ab13872435962dadc239979554d13709e20bf29",
                         "installed_by": ["modules"]
                     }
                 }
diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml
new file mode 100644
index 00000000..f0c63f69
--- /dev/null
+++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml
@@ -0,0 +1,7 @@
+name: custom_dumpsoftwareversions
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::multiqc=1.17
diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf
index ebc87273..7685b33c 100644
--- a/modules/nf-core/custom/dumpsoftwareversions/main.nf
+++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf
@@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS {
     label 'process_single'
 
     // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container
-    conda "bioconda::multiqc=1.14"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' :
-        'biocontainers/multiqc:1.14--pyhdfd78af_0' }"
+        'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' :
+        'biocontainers/multiqc:1.17--pyhdfd78af_0' }"
 
     input:
     path versions
diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml
index c32657de..5f15a5fd 100644
--- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml
+++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml
@@ -1,4 +1,4 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
 name: custom_dumpsoftwareversions
 description: Custom module used to dump software versions within the nf-core pipeline template
 keywords:
@@ -16,7 +16,6 @@ input:
       type: file
       description: YML file containing software versions
       pattern: "*.yml"
-
 output:
   - yml:
       type: file
@@ -30,7 +29,9 @@ output:
       type: file
       description: File containing software versions
       pattern: "versions.yml"
-
 authors:
   - "@drpatelh"
   - "@grst"
+maintainers:
+  - "@drpatelh"
+  - "@grst"
diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test
new file mode 100644
index 00000000..eec1db10
--- /dev/null
+++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test
@@ -0,0 +1,38 @@
+nextflow_process {
+
+    name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS"
+    script "../main.nf"
+    process "CUSTOM_DUMPSOFTWAREVERSIONS"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "custom"
+    tag "dumpsoftwareversions"
+    tag "custom/dumpsoftwareversions"
+
+    test("Should run without failures") {
+        when {
+            process {
+                """
+                def tool1_version = '''
+                TOOL1:
+                    tool1: 0.11.9
+                '''.stripIndent()
+
+                def tool2_version = '''
+                TOOL2:
+                    tool2: 1.9
+                '''.stripIndent()
+
+                input[0] = Channel.of(tool1_version, tool2_version).collectFile()
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap
new file mode 100644
index 00000000..4274ed57
--- /dev/null
+++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap
@@ -0,0 +1,27 @@
+{
+    "Should run without failures": {
+        "content": [
+            {
+                "0": [
+                    "software_versions.yml:md5,1c851188476409cda5752ce971b20b58"
+                ],
+                "1": [
+                    "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d"
+                ],
+                "2": [
+                    "versions.yml:md5,3843ac526e762117eedf8825b40683df"
+                ],
+                "mqc_yml": [
+                    "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d"
+                ],
+                "versions": [
+                    "versions.yml:md5,3843ac526e762117eedf8825b40683df"
+                ],
+                "yml": [
+                    "software_versions.yml:md5,1c851188476409cda5752ce971b20b58"
+                ]
+            }
+        ],
+        "timestamp": "2023-11-03T14:43:22.157011"
+    }
+}
diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml
new file mode 100644
index 00000000..405aa24a
--- /dev/null
+++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml
@@ -0,0 +1,2 @@
+custom/dumpsoftwareversions:
+  - modules/nf-core/custom/dumpsoftwareversions/**
diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml
new file mode 100644
index 00000000..1787b38a
--- /dev/null
+++ b/modules/nf-core/fastqc/environment.yml
@@ -0,0 +1,7 @@
+name: fastqc
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::fastqc=0.12.1
diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf
index 249f9064..9e19a74c 100644
--- a/modules/nf-core/fastqc/main.nf
+++ b/modules/nf-core/fastqc/main.nf
@@ -2,10 +2,10 @@ process FASTQC {
     tag "$meta.id"
     label 'process_medium'
 
-    conda "bioconda::fastqc=0.11.9"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' :
-        'biocontainers/fastqc:0.11.9--0' }"
+        'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' :
+        'biocontainers/fastqc:0.12.1--hdfd78af_0' }"
 
     input:
     tuple val(meta), path(reads)
@@ -37,7 +37,7 @@ process FASTQC {
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
+        fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' )
     END_VERSIONS
     """
 
@@ -49,7 +49,7 @@ process FASTQC {
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
+        fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' )
     END_VERSIONS
     """
 }
diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml
index 4da5bb5a..ee5507e0 100644
--- a/modules/nf-core/fastqc/meta.yml
+++ b/modules/nf-core/fastqc/meta.yml
@@ -50,3 +50,8 @@ authors:
   - "@grst"
   - "@ewels"
   - "@FelixKrueger"
+maintainers:
+  - "@drpatelh"
+  - "@grst"
+  - "@ewels"
+  - "@FelixKrueger"
diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test
new file mode 100644
index 00000000..b9e8f926
--- /dev/null
+++ b/modules/nf-core/fastqc/tests/main.nf.test
@@ -0,0 +1,109 @@
+nextflow_process {
+
+    name "Test Process FASTQC"
+    script "../main.nf"
+    process "FASTQC"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "fastqc"
+
+    test("Single-Read") {
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                input[0] = [
+                    [ id: 'test', single_end:true ],
+                    [
+                        file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+                    ]
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll (
+            { assert process.success },
+            // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it.
+            // looks like this: <div id="header_filename">Mon 2 Oct 2023<br/>test.gz</div>
+            // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039
+            { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" },
+            { assert path(process.out.html.get(0).get(1)).getText().contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+            { assert snapshot(process.out.versions).match("versions") },
+            { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" }
+            )
+        }
+    }
+// TODO
+// //
+// // Test with paired-end data
+// //
+// workflow test_fastqc_paired_end {
+//     input = [
+//                 [id: 'test', single_end: false], // meta map
+//                 [
+//                     file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+//                     file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
+//                 ]
+//             ]
+
+//     FASTQC ( input )
+// }
+
+// //
+// // Test with interleaved data
+// //
+// workflow test_fastqc_interleaved {
+//     input = [
+//                 [id: 'test', single_end: false], // meta map
+//                 file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true)
+//             ]
+
+//     FASTQC ( input )
+// }
+
+// //
+// // Test with bam data
+// //
+// workflow test_fastqc_bam {
+//     input = [
+//                 [id: 'test', single_end: false], // meta map
+//                 file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
+//             ]
+
+//     FASTQC ( input )
+// }
+
+// //
+// // Test with multiple samples
+// //
+// workflow test_fastqc_multiple {
+//     input = [
+//                 [id: 'test', single_end: false], // meta map
+//                 [
+//                     file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+//                     file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true),
+//                     file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true),
+//                     file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true)
+//                 ]
+//             ]
+
+//     FASTQC ( input )
+// }
+
+// //
+// // Test with custom prefix
+// //
+// workflow test_fastqc_custom_prefix {
+//     input = [
+//                 [ id:'mysample', single_end:true ], // meta map
+//                 file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+//             ]
+
+//     FASTQC ( input )
+// }
+}
diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap
new file mode 100644
index 00000000..636a32ce
--- /dev/null
+++ b/modules/nf-core/fastqc/tests/main.nf.test.snap
@@ -0,0 +1,10 @@
+{
+    "versions": {
+        "content": [
+            [
+                "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+            ]
+        ],
+        "timestamp": "2023-10-09T23:40:54+0000"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml
new file mode 100644
index 00000000..7834294b
--- /dev/null
+++ b/modules/nf-core/fastqc/tests/tags.yml
@@ -0,0 +1,2 @@
+fastqc:
+  - modules/nf-core/fastqc/**
diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml
new file mode 100644
index 00000000..bc0bdb5b
--- /dev/null
+++ b/modules/nf-core/multiqc/environment.yml
@@ -0,0 +1,7 @@
+name: multiqc
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::multiqc=1.18
diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf
index 1fc387be..00cc48d2 100644
--- a/modules/nf-core/multiqc/main.nf
+++ b/modules/nf-core/multiqc/main.nf
@@ -1,10 +1,10 @@
 process MULTIQC {
     label 'process_single'
 
-    conda "bioconda::multiqc=1.14"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' :
-        'biocontainers/multiqc:1.14--pyhdfd78af_0' }"
+        'https://depot.galaxyproject.org/singularity/multiqc:1.18--pyhdfd78af_0' :
+        'biocontainers/multiqc:1.18--pyhdfd78af_0' }"
 
     input:
     path  multiqc_files, stageAs: "?/*"
@@ -25,12 +25,14 @@ process MULTIQC {
     def args = task.ext.args ?: ''
     def config = multiqc_config ? "--config $multiqc_config" : ''
     def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : ''
+    def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : ''
     """
     multiqc \\
         --force \\
         $args \\
         $config \\
         $extra_config \\
+        $logo \\
         .
 
     cat <<-END_VERSIONS > versions.yml
diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml
index f93b5ee5..f1aa660e 100644
--- a/modules/nf-core/multiqc/meta.yml
+++ b/modules/nf-core/multiqc/meta.yml
@@ -1,5 +1,5 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json
-name: MultiQC
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: multiqc
 description: Aggregate results from bioinformatics analyses across many samples into a single report
 keywords:
   - QC
@@ -13,7 +13,6 @@ tools:
       homepage: https://multiqc.info/
       documentation: https://multiqc.info/docs/
       licence: ["GPL-3.0-or-later"]
-
 input:
   - multiqc_files:
       type: file
@@ -31,7 +30,6 @@ input:
       type: file
       description: Optional logo file for MultiQC
       pattern: "*.{png}"
-
 output:
   - report:
       type: file
@@ -54,3 +52,8 @@ authors:
   - "@bunop"
   - "@drpatelh"
   - "@jfy133"
+maintainers:
+  - "@abhi18av"
+  - "@bunop"
+  - "@drpatelh"
+  - "@jfy133"
diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test
new file mode 100644
index 00000000..c2dad217
--- /dev/null
+++ b/modules/nf-core/multiqc/tests/main.nf.test
@@ -0,0 +1,63 @@
+nextflow_process {
+
+    name "Test Process MULTIQC"
+    script "../main.nf"
+    process "MULTIQC"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "multiqc"
+
+    test("MULTIQC: FASTQC") {
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)])
+                input[1] = []
+                input[2] = []
+                input[3] = []
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert path(process.out.report.get(0)).exists() },
+                { assert path(process.out.data.get(0)).exists() },
+                { assert path(process.out.versions.get(0)).getText().contains("multiqc") }
+            )
+        }
+
+    }
+
+    test("MULTIQC: FASTQC and a config file") {
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)])
+                input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true))
+                input[2] = []
+                input[3] = []
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert path(process.out.report.get(0)).exists() },
+                { assert path(process.out.data.get(0)).exists() },
+                { assert path(process.out.versions.get(0)).getText().contains("multiqc") }
+            )
+        }
+
+    }
+}
diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml
new file mode 100644
index 00000000..bea6c0d3
--- /dev/null
+++ b/modules/nf-core/multiqc/tests/tags.yml
@@ -0,0 +1,2 @@
+multiqc:
+  - modules/nf-core/multiqc/**
diff --git a/nextflow.config b/nextflow.config
index 79a64b2a..b01c59fa 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -14,7 +14,7 @@ params {
     input                      = null
     // References
     genome                     = null
-    igenomes_base              = 's3://ngi-igenomes/igenomes'
+    igenomes_base              = 's3://ngi-igenomes/igenomes/'
     igenomes_ignore            = false
     
 
@@ -82,6 +82,7 @@ profiles {
         dumpHashes             = true
         process.beforeScript   = 'echo $HOSTNAME'
         cleanup                = false
+        nextflow.enable.configProcessNamesValidation = true
     }
     conda {
         conda.enabled          = true
@@ -104,13 +105,13 @@ profiles {
     }
     docker {
         docker.enabled         = true
-        docker.userEmulation   = true
         conda.enabled          = false
         singularity.enabled    = false
         podman.enabled         = false
         shifter.enabled        = false
         charliecloud.enabled   = false
         apptainer.enabled      = false
+        runOptions             = '-u $(id -u):$(id -g)'
     }
     arm {
         docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64'
@@ -181,7 +182,7 @@ singularity.registry = 'quay.io'
 
 // Nextflow plugins
 plugins {
-    id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet
+    id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet
 }
 
 // Load igenomes.config if required
@@ -204,6 +205,9 @@ env {
 // Capture exit codes from upstream processes when piping
 process.shell = ['/bin/bash', '-euo', 'pipefail']
 
+// Disable process selector warnings by default. Use debug profile to enable warnings.
+nextflow.enable.configProcessNamesValidation = false
+
 def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
 timeline {
     enabled = true
@@ -229,7 +233,7 @@ manifest {
     description     = """Small RNA-Seq Best Practice Analysis Pipeline."""
     mainScript      = 'main.nf'
     nextflowVersion = '!>=23.04.0'
-    version         = '2.2.3'
+    version         = '2.3dev'
     doi             = ''
 }
 

From ec3d44615456f2ce58094c587b4353c502072d6c Mon Sep 17 00:00:00 2001
From: nf-core-bot <core@nf-co.re>
Date: Wed, 20 Dec 2023 17:05:35 +0000
Subject: [PATCH 052/117] Template update for nf-core/tools version 2.11.1

---
 .../{release-announcments.yml => release-announcements.yml}   | 0
 nextflow.config                                               | 4 ++--
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename .github/workflows/{release-announcments.yml => release-announcements.yml} (100%)

diff --git a/.github/workflows/release-announcments.yml b/.github/workflows/release-announcements.yml
similarity index 100%
rename from .github/workflows/release-announcments.yml
rename to .github/workflows/release-announcements.yml
diff --git a/nextflow.config b/nextflow.config
index b01c59fa..a473a3b6 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -111,10 +111,10 @@ profiles {
         shifter.enabled        = false
         charliecloud.enabled   = false
         apptainer.enabled      = false
-        runOptions             = '-u $(id -u):$(id -g)'
+        docker.runOptions      = '-u $(id -u):$(id -g)'
     }
     arm {
-        docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64'
+        docker.runOptions      = '-u $(id -u):$(id -g) --platform=linux/amd64'
     }
     singularity {
         singularity.enabled    = true

From ce28d5a06fdf9e891c51d717893b4869f3b44a4d Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Thu, 11 Jan 2024 13:01:04 +0000
Subject: [PATCH 053/117] First batch of updates to 2024

---
 modules.json                                  |  33 ++-
 modules/nf-core/fastp/environment.yml         |   1 +
 modules/nf-core/fastp/main.nf                 |  26 +-
 modules/nf-core/fastp/tests/main.nf.test      | 241 ++++++++++++++++++
 modules/nf-core/fastp/tests/main.nf.test.snap |  55 ++++
 modules/nf-core/modules/cat/cat/main.nf       |  62 -----
 modules/nf-core/modules/cat/cat/meta.yml      |  37 ---
 .../nf-core/modules/samtools/bam2fq/main.nf   |  56 ----
 .../nf-core/modules/samtools/bam2fq/meta.yml  |  55 ----
 .../nf-core/modules/umitools/dedup/main.nf    |  41 ---
 .../nf-core/modules/umitools/dedup/meta.yml   |  59 -----
 .../nf-core/modules/umitools/extract/main.nf  |  55 ----
 .../nf-core/modules/umitools/extract/meta.yml |  47 ----
 modules/nf-core/multiqc/environment.yml       |   3 +-
 modules/nf-core/multiqc/main.nf               |  10 +-
 modules/nf-core/multiqc/meta.yml              |   3 +-
 16 files changed, 357 insertions(+), 427 deletions(-)
 delete mode 100644 modules/nf-core/modules/cat/cat/main.nf
 delete mode 100644 modules/nf-core/modules/cat/cat/meta.yml
 delete mode 100644 modules/nf-core/modules/samtools/bam2fq/main.nf
 delete mode 100644 modules/nf-core/modules/samtools/bam2fq/meta.yml
 delete mode 100644 modules/nf-core/modules/umitools/dedup/main.nf
 delete mode 100644 modules/nf-core/modules/umitools/dedup/meta.yml
 delete mode 100644 modules/nf-core/modules/umitools/extract/main.nf
 delete mode 100644 modules/nf-core/modules/umitools/extract/meta.yml

diff --git a/modules.json b/modules.json
index 1250670b..4a236584 100644
--- a/modules.json
+++ b/modules.json
@@ -5,6 +5,11 @@
         "https://github.com/nf-core/modules.git": {
             "modules": {
                 "nf-core": {
+                    "cat/cat": {
+                        "branch": "master",
+                        "git_sha": "d593e8f6b7d1bbbb2acf43a4b9efeeac8d6720f2",
+                        "installed_by": ["modules"]
+                    },
                     "cat/fastq": {
                         "branch": "master",
                         "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc",
@@ -17,17 +22,22 @@
                     },
                     "fastp": {
                         "branch": "master",
-                        "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc",
-                        "installed_by": ["modules"]
+                        "git_sha": "d086322563bdbb08c94bf15a7db58a39ccdb1520",
+                        "installed_by": ["fastq_fastqc_umitools_fastp", "modules"]
                     },
                     "fastqc": {
                         "branch": "master",
                         "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc",
-                        "installed_by": ["modules"]
+                        "installed_by": ["fastq_fastqc_umitools_fastp", "modules"]
                     },
                     "multiqc": {
                         "branch": "master",
-                        "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc",
+                        "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93",
+                        "installed_by": ["modules"]
+                    },
+                    "samtools/bam2fq": {
+                        "branch": "master",
+                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                         "installed_by": ["modules"]
                     },
                     "samtools/flagstat": {
@@ -54,6 +64,16 @@
                         "branch": "master",
                         "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc",
                         "installed_by": ["modules", "bam_stats_samtools"]
+                    },
+                    "umitools/dedup": {
+                        "branch": "master",
+                        "git_sha": "9d489648a7425fa18d8bed18935442c104d8733c",
+                        "installed_by": ["modules"]
+                    },
+                    "umitools/extract": {
+                        "branch": "master",
+                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
+                        "installed_by": ["fastq_fastqc_umitools_fastp", "modules"]
                     }
                 }
             },
@@ -68,6 +88,11 @@
                         "branch": "master",
                         "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f",
                         "installed_by": ["subworkflows", "bam_sort_stats_samtools"]
+                    },
+                    "fastq_fastqc_umitools_fastp": {
+                        "branch": "master",
+                        "git_sha": "3e8b0c1144ccf60b7848efbdc2be285ff20b49ee",
+                        "installed_by": ["subworkflows"]
                     }
                 }
             }
diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/fastp/environment.yml
index 19ccec25..70389e66 100644
--- a/modules/nf-core/fastp/environment.yml
+++ b/modules/nf-core/fastp/environment.yml
@@ -1,3 +1,4 @@
+name: fastp
 channels:
   - conda-forge
   - bioconda
diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf
index ca5f100f..2a3b679e 100644
--- a/modules/nf-core/fastp/main.nf
+++ b/modules/nf-core/fastp/main.nf
@@ -2,7 +2,7 @@ process FASTP {
     tag "$meta.id"
     label 'process_medium'
 
-    conda 'modules/nf-core/fastp/environment.yml'
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' :
         'biocontainers/fastp:0.23.4--h5f740d0_0' }"
@@ -45,7 +45,7 @@ process FASTP {
             $adapter_list \\
             $fail_fastq \\
             $args \\
-            2> ${prefix}.fastp.log \\
+            2> >(tee ${prefix}.fastp.log >&2) \\
         | gzip -c > ${prefix}.fastp.fastq.gz
 
         cat <<-END_VERSIONS > versions.yml
@@ -66,7 +66,7 @@ process FASTP {
             $adapter_list \\
             $fail_fastq \\
             $args \\
-            2> ${prefix}.fastp.log
+            2> >(tee ${prefix}.fastp.log >&2)
 
         cat <<-END_VERSIONS > versions.yml
         "${task.process}":
@@ -91,7 +91,7 @@ process FASTP {
             --thread $task.cpus \\
             --detect_adapter_for_pe \\
             $args \\
-            2> ${prefix}.fastp.log
+            2> >(tee ${prefix}.fastp.log >&2)
 
         cat <<-END_VERSIONS > versions.yml
         "${task.process}":
@@ -99,4 +99,22 @@ process FASTP {
         END_VERSIONS
         """
     }
+
+    stub:
+    def prefix              = task.ext.prefix ?: "${meta.id}"
+    def is_single_output    = task.ext.args?.contains('--interleaved_in') || meta.single_end
+    def touch_reads         = is_single_output ? "${prefix}.fastp.fastq.gz" : "${prefix}_1.fastp.fastq.gz ${prefix}_2.fastp.fastq.gz"
+    def touch_merged        = (!is_single_output && save_merged) ? "touch ${prefix}.merged.fastq.gz" : ""
+    """
+    touch $touch_reads
+    touch "${prefix}.fastp.json"
+    touch "${prefix}.fastp.html"
+    touch "${prefix}.fastp.log"
+    $touch_merged
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
+    END_VERSIONS
+    """
 }
diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test
index f610b735..17dce8ac 100644
--- a/modules/nf-core/fastp/tests/main.nf.test
+++ b/modules/nf-core/fastp/tests/main.nf.test
@@ -57,6 +57,67 @@ nextflow_process {
                     { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
                     }
                 },
+                {
+                    assert snapshot(
+                        (
+                            [process.out.reads[0][0].toString()] + // meta
+                            process.out.reads.collect { file(it[1]).getName() } +
+                            process.out.json.collect { file(it[1]).getName() } +
+                            process.out.html.collect { file(it[1]).getName() } +
+                            process.out.log.collect { file(it[1]).getName() } +
+                            process.out.reads_fail.collect { file(it[1]).getName() } +
+                            process.out.reads_merged.collect { file(it[1]).getName() }
+                        ).sort()
+                    ).match("test_fastp_single_end-for_stub_match")
+                },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("test_fastp_single_end-stub") {
+
+        options '-stub'
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                adapter_fasta     = []
+                save_trimmed_fail = false
+                save_merged       = false
+
+                input[0] = [
+                    [ id:'test', single_end:true ],
+                    [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+                ]
+
+                input[1] = adapter_fasta
+                input[2] = save_trimmed_fail
+                input[3] = save_merged
+                """
+            }
+        }
+
+        then {
+
+            assertAll(
+                { assert process.success },
+                {
+                    assert snapshot(
+                        (
+                            [process.out.reads[0][0].toString()] + // meta
+                            process.out.reads.collect { file(it[1]).getName() } +
+                            process.out.json.collect { file(it[1]).getName() } +
+                            process.out.html.collect { file(it[1]).getName() } +
+                            process.out.log.collect { file(it[1]).getName() } +
+                            process.out.reads_fail.collect { file(it[1]).getName() } +
+                            process.out.reads_merged.collect { file(it[1]).getName() }
+                        ).sort()
+                    ).match("test_fastp_single_end-for_stub_match")
+                },
                 { assert snapshot(process.out.versions).match("versions") }
             )
         }
@@ -127,6 +188,67 @@ nextflow_process {
                     { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
                     }
                 },
+                {
+                    assert snapshot(
+                        (
+                            [process.out.reads[0][0].toString()] + // meta
+                            process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+                            process.out.json.collect { file(it[1]).getName() } +
+                            process.out.html.collect { file(it[1]).getName() } +
+                            process.out.log.collect { file(it[1]).getName() } +
+                            process.out.reads_fail.collect { file(it[1]).getName() } +
+                            process.out.reads_merged.collect { file(it[1]).getName() }
+                        ).sort()
+                    ).match("test_fastp_paired_end-for_stub_match")
+                },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("test_fastp_paired_end-stub") {
+
+        options '-stub'
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                adapter_fasta     = []
+                save_trimmed_fail = false
+                save_merged       = false
+
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                        file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
+                ]
+
+                input[1] = adapter_fasta
+                input[2] = save_trimmed_fail
+                input[3] = save_merged
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                {
+                    assert snapshot(
+                        (
+                            [process.out.reads[0][0].toString()] + // meta
+                            process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+                            process.out.json.collect { file(it[1]).getName() } +
+                            process.out.html.collect { file(it[1]).getName() } +
+                            process.out.log.collect { file(it[1]).getName() } +
+                            process.out.reads_fail.collect { file(it[1]).getName() } +
+                            process.out.reads_merged.collect { file(it[1]).getName() }
+                        ).sort()
+                    ).match("test_fastp_paired_end-for_stub_match")
+                },
                 { assert snapshot(process.out.versions).match("versions") }
             )
         }
@@ -181,6 +303,66 @@ nextflow_process {
                     { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
                     }
                 },
+                {
+                    assert snapshot(
+                        (
+                            [process.out.reads[0][0].toString()] + // meta
+                            process.out.reads.collect { file(it[1]).getName() } +
+                            process.out.json.collect { file(it[1]).getName() } +
+                            process.out.html.collect { file(it[1]).getName() } +
+                            process.out.log.collect { file(it[1]).getName() } +
+                            process.out.reads_fail.collect { file(it[1]).getName() } +
+                            process.out.reads_merged.collect { file(it[1]).getName() }
+                        ).sort()
+                    ).match("test_fastp_interleaved-for_stub_match")
+                },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("fastp test_fastp_interleaved-stub") {
+
+        options '-stub'
+
+        config './nextflow.config'
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                adapter_fasta     = []
+                save_trimmed_fail = false
+                save_merged       = false
+
+                input[0] = [ [ id:'test', single_end:true ], // meta map
+                            [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ]
+                        ]
+
+                input[1] = adapter_fasta
+                input[2] = save_trimmed_fail
+                input[3] = save_merged
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                {
+                    assert snapshot(
+                        (
+                            [process.out.reads[0][0].toString()] + // meta
+                            process.out.reads.collect { file(it[1]).getName() } +
+                            process.out.json.collect { file(it[1]).getName() } +
+                            process.out.html.collect { file(it[1]).getName() } +
+                            process.out.log.collect { file(it[1]).getName() } +
+                            process.out.reads_fail.collect { file(it[1]).getName() } +
+                            process.out.reads_merged.collect { file(it[1]).getName() }
+                        ).sort()
+                    ).match("test_fastp_interleaved-for_stub_match")
+                },
                 { assert snapshot(process.out.versions).match("versions") }
             )
         }
@@ -399,6 +581,65 @@ nextflow_process {
                     { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
                     }
                 },
+                {
+                    assert snapshot(
+                        (
+                            [process.out.reads[0][0].toString()] + // meta
+                            process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+                            process.out.json.collect { file(it[1]).getName() } +
+                            process.out.html.collect { file(it[1]).getName() } +
+                            process.out.log.collect { file(it[1]).getName() } +
+                            process.out.reads_fail.collect { file(it[1]).getName() } +
+                            process.out.reads_merged.collect { file(it[1]).getName() }
+                        ).sort()
+                    ).match("test_fastp_paired_end_merged-for_stub_match")
+                },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("test_fastp_paired_end_merged-stub") {
+
+        options '-stub'
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                adapter_fasta     = []
+                save_trimmed_fail = false
+                save_merged       = true
+
+                input[0] = [ [ id:'test', single_end:false ], // meta map
+                             [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                               file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
+                           ]
+                input[1] = adapter_fasta
+                input[2] = save_trimmed_fail
+                input[3] = save_merged
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                {
+                    assert snapshot(
+                        (
+                            [process.out.reads[0][0].toString()] + // meta
+                            process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+                            process.out.json.collect { file(it[1]).getName() } +
+                            process.out.html.collect { file(it[1]).getName() } +
+                            process.out.log.collect { file(it[1]).getName() } +
+                            process.out.reads_fail.collect { file(it[1]).getName() } +
+                            process.out.reads_merged.collect { file(it[1]).getName() }
+                        ).sort()
+                    ).match("test_fastp_paired_end_merged-for_stub_match")
+                },
                 { assert snapshot(process.out.versions).match("versions") }
             )
         }
diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap
index 0fa68c7d..1b7d2419 100644
--- a/modules/nf-core/fastp/tests/main.nf.test.snap
+++ b/modules/nf-core/fastp/tests/main.nf.test.snap
@@ -1,4 +1,19 @@
 {
+    "test_fastp_paired_end-for_stub_match": {
+        "content": [
+            [
+                [
+                    "test_1.fastp.fastq.gz",
+                    "test_2.fastp.fastq.gz"
+                ],
+                "test.fastp.html",
+                "test.fastp.json",
+                "test.fastp.log",
+                "{id=test, single_end=false}"
+            ]
+        ],
+        "timestamp": "2023-12-21T09:44:37.202512"
+    },
     "fastp test_fastp_interleaved_json": {
         "content": [
             [
@@ -13,6 +28,22 @@
         ],
         "timestamp": "2023-10-17T11:04:45.794175881"
     },
+    "test_fastp_paired_end_merged-for_stub_match": {
+        "content": [
+            [
+                [
+                    "test_1.fastp.fastq.gz",
+                    "test_2.fastp.fastq.gz"
+                ],
+                "test.fastp.html",
+                "test.fastp.json",
+                "test.fastp.log",
+                "test.merged.fastq.gz",
+                "{id=test, single_end=false}"
+            ]
+        ],
+        "timestamp": "2023-12-21T09:53:45.237014"
+    },
     "test_fastp_single_end_json": {
         "content": [
             [
@@ -35,6 +66,30 @@
         ],
         "timestamp": "2023-10-17T11:04:10.582076024"
     },
+    "test_fastp_interleaved-for_stub_match": {
+        "content": [
+            [
+                "test.fastp.fastq.gz",
+                "test.fastp.html",
+                "test.fastp.json",
+                "test.fastp.log",
+                "{id=test, single_end=true}"
+            ]
+        ],
+        "timestamp": "2023-12-21T09:48:43.148485"
+    },
+    "test_fastp_single_end-for_stub_match": {
+        "content": [
+            [
+                "test.fastp.fastq.gz",
+                "test.fastp.html",
+                "test.fastp.json",
+                "test.fastp.log",
+                "{id=test, single_end=true}"
+            ]
+        ],
+        "timestamp": "2023-12-21T09:20:07.254788"
+    },
     "test_fastp_single_end_trim_fail_json": {
         "content": [
             [
diff --git a/modules/nf-core/modules/cat/cat/main.nf b/modules/nf-core/modules/cat/cat/main.nf
deleted file mode 100644
index 40e53f3e..00000000
--- a/modules/nf-core/modules/cat/cat/main.nf
+++ /dev/null
@@ -1,62 +0,0 @@
-process CAT_CAT {
-    tag "$meta.id"
-    label 'process_low'
-
-    conda (params.enable_conda ? "conda-forge::pigz=2.3.4" : null)
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/pigz:2.3.4' :
-        'quay.io/biocontainers/pigz:2.3.4' }"
-
-    input:
-    tuple val(meta), path(files_in)
-
-    output:
-    tuple val(meta), path("${prefix}"), emit: file_out
-    path "versions.yml"               , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    def args2 = task.ext.args2 ?: ''
-    def file_list = files_in.collect { it.toString() }
-
-    // | input     | output     | command1 | command2 |
-    // |-----------|------------|----------|----------|
-    // | gzipped   | gzipped    | cat      |          |
-    // | ungzipped | ungzipped  | cat      |          |
-    // | gzipped   | ungzipped  | zcat     |          |
-    // | ungzipped | gzipped    | cat      | pigz     |
-
-    // Use input file ending as default
-    prefix   = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}"
-    out_zip  = prefix.endsWith('.gz')
-    in_zip   = file_list[0].endsWith('.gz')
-    command1 = (in_zip && !out_zip) ? 'zcat' : 'cat'
-    command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : ''
-    """
-    $command1 \\
-        $args \\
-        ${file_list.join(' ')} \\
-        $command2 \\
-        > ${prefix}
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
-    END_VERSIONS
-    """
-
-    stub:
-    def file_list = files_in.collect { it.toString() }
-    prefix   = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}"
-    """
-    touch $prefix
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
-    END_VERSIONS
-    """
-}
diff --git a/modules/nf-core/modules/cat/cat/meta.yml b/modules/nf-core/modules/cat/cat/meta.yml
deleted file mode 100644
index 5eeff5a6..00000000
--- a/modules/nf-core/modules/cat/cat/meta.yml
+++ /dev/null
@@ -1,37 +0,0 @@
-name: cat_cat
-description: A module for concatenation of gzipped or uncompressed files
-keywords:
-  - concatenate
-  - gzip
-  - cat
-tools:
-  - cat:
-      description: Just concatenation
-      homepage: None
-      documentation: https://man7.org/linux/man-pages/man1/cat.1.html
-      tool_dev_url: None
-      licence: ["GPL-3.0-or-later"]
-input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - files_in:
-      type: file
-      description: List of compressed / uncompressed files
-      pattern: "*"
-
-output:
-  - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
-  - file_out:
-      type: file
-      description: Concatenated file. Will be gzipped if file_out ends with ".gz"
-      pattern: "${file_out}"
-
-authors:
-  - "@erikrikarddaniel"
-  - "@FriederikeHanssen"
diff --git a/modules/nf-core/modules/samtools/bam2fq/main.nf b/modules/nf-core/modules/samtools/bam2fq/main.nf
deleted file mode 100644
index 9301d1d3..00000000
--- a/modules/nf-core/modules/samtools/bam2fq/main.nf
+++ /dev/null
@@ -1,56 +0,0 @@
-process SAMTOOLS_BAM2FQ {
-    tag "$meta.id"
-    label 'process_low'
-
-    conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null)
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' :
-        'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }"
-
-    input:
-    tuple val(meta), path(inputbam)
-    val split
-
-    output:
-    tuple val(meta), path("*.fq.gz"), emit: reads
-    path "versions.yml"             , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-
-    if (split){
-        """
-        samtools \\
-            bam2fq \\
-            $args \\
-            -@ $task.cpus \\
-            -1 ${prefix}_1.fq.gz \\
-            -2 ${prefix}_2.fq.gz \\
-            -0 ${prefix}_other.fq.gz \\
-            -s ${prefix}_singleton.fq.gz \\
-            $inputbam
-
-        cat <<-END_VERSIONS > versions.yml
-        "${task.process}":
-            samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
-        END_VERSIONS
-        """
-    } else {
-        """
-        samtools \\
-            bam2fq \\
-            $args \\
-            -@ $task.cpus \\
-            $inputbam | gzip --no-name > ${prefix}_interleaved.fq.gz
-
-        cat <<-END_VERSIONS > versions.yml
-        "${task.process}":
-            samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
-        END_VERSIONS
-        """
-    }
-}
diff --git a/modules/nf-core/modules/samtools/bam2fq/meta.yml b/modules/nf-core/modules/samtools/bam2fq/meta.yml
deleted file mode 100644
index 319a60cf..00000000
--- a/modules/nf-core/modules/samtools/bam2fq/meta.yml
+++ /dev/null
@@ -1,55 +0,0 @@
-name: samtools_bam2fq
-description: |
-  The module uses bam2fq method from samtools to
-  convert a SAM, BAM or CRAM file to FASTQ format
-keywords:
-  - bam2fq
-  - samtools
-  - fastq
-tools:
-  - samtools:
-      description: Tools for dealing with SAM, BAM and CRAM files
-      homepage: None
-      documentation: http://www.htslib.org/doc/1.1/samtools.html
-      tool_dev_url: None
-      doi: ""
-      licence: ["MIT"]
-
-input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - inputbam:
-      type: file
-      description: BAM/CRAM/SAM file
-      pattern: "*.{bam,cram,sam}"
-  - split:
-      type: boolean
-      description: |
-        TRUE/FALSE value to indicate if reads should be separated into
-        /1, /2 and if present other, or singleton.
-        Note: choosing TRUE will generate 4 different files.
-        Choosing FALSE will produce a single file, which will be interleaved in case
-        the input contains paired reads.
-
-output:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
-  - reads:
-      type: file
-      description: |
-        FASTQ files, which will be either a group of 4 files (read_1, read_2, other and singleton)
-        or a single interleaved .fq.gz file if the user chooses not to split the reads.
-      pattern: "*.fq.gz"
-
-authors:
-  - "@lescai"
diff --git a/modules/nf-core/modules/umitools/dedup/main.nf b/modules/nf-core/modules/umitools/dedup/main.nf
deleted file mode 100644
index dfcbcf2f..00000000
--- a/modules/nf-core/modules/umitools/dedup/main.nf
+++ /dev/null
@@ -1,41 +0,0 @@
-process UMITOOLS_DEDUP {
-    tag "$meta.id"
-    label "process_medium"
-
-    conda (params.enable_conda ? "bioconda::umi_tools=1.1.2" : null)
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/umi_tools:1.1.2--py38h4a8c8d9_0' :
-        'quay.io/biocontainers/umi_tools:1.1.2--py38h4a8c8d9_0' }"
-
-    input:
-    tuple val(meta), path(bam), path(bai)
-
-    output:
-    tuple val(meta), path("*.bam")             , emit: bam
-    tuple val(meta), path("*edit_distance.tsv"), emit: tsv_edit_distance
-    tuple val(meta), path("*per_umi.tsv")      , emit: tsv_per_umi
-    tuple val(meta), path("*per_position.tsv") , emit: tsv_umi_per_position
-    path  "versions.yml"                       , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    def paired = meta.single_end ? "" : "--paired"
-    """
-    umi_tools \\
-        dedup \\
-        -I $bam \\
-        -S ${prefix}.bam \\
-        --output-stats $prefix \\
-        $paired \\
-        $args
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
-    END_VERSIONS
-    """
-}
diff --git a/modules/nf-core/modules/umitools/dedup/meta.yml b/modules/nf-core/modules/umitools/dedup/meta.yml
deleted file mode 100644
index eee8952f..00000000
--- a/modules/nf-core/modules/umitools/dedup/meta.yml
+++ /dev/null
@@ -1,59 +0,0 @@
-name: umitools_dedup
-description: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read.
-keywords:
-  - umitools
-  - deduplication
-tools:
-  - umi_tools:
-    description: >
-      UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs)
-      and single cell RNA-Seq cell barcodes
-    documentation: https://umi-tools.readthedocs.io/en/latest/
-    license: ["MIT"]
-input:
-  - meta:
-    type: map
-    description: |
-      Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - bam:
-    type: file
-    description: |
-      BAM file containing reads to be deduplicated via UMIs.
-    pattern: "*.{bam}"
-  - bai:
-    type: file
-    description: |
-      BAM index files corresponding to the input BAM file.
-    pattern: "*.{bai}"
-output:
-  - meta:
-    type: map
-    description: |
-      Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - bam:
-    type: file
-    description: BAM file with deduplicated UMIs.
-    pattern: "*.{bam}"
-  - tsv_edit_distance:
-    type: file
-    description: Reports the (binned) average edit distance between the UMIs at each position.
-    pattern: "*edit_distance.tsv"
-  - tsv_per_umi:
-    type: file
-    description: UMI-level summary statistics.
-    pattern: "*per_umi.tsv"
-  - tsv_umi_per_position:
-    type: file
-    description: Tabulates the counts for unique combinations of UMI and position.
-    pattern: "*per_position.tsv"
-  - versions:
-    type: file
-    description: File containing software versions
-    pattern: "versions.yml"
-
-authors:
-  - "@drpatelh"
-  - "@grst"
-  - "@klkeys"
diff --git a/modules/nf-core/modules/umitools/extract/main.nf b/modules/nf-core/modules/umitools/extract/main.nf
deleted file mode 100644
index 22a405b9..00000000
--- a/modules/nf-core/modules/umitools/extract/main.nf
+++ /dev/null
@@ -1,55 +0,0 @@
-process UMITOOLS_EXTRACT {
-    tag "$meta.id"
-    label "process_low"
-
-    conda (params.enable_conda ? "bioconda::umi_tools=1.1.2" : null)
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/umi_tools:1.1.2--py38h4a8c8d9_0' :
-        'quay.io/biocontainers/umi_tools:1.1.2--py38h4a8c8d9_0' }"
-
-    input:
-    tuple val(meta), path(reads)
-
-    output:
-    tuple val(meta), path("*.fastq.gz"), emit: reads
-    tuple val(meta), path("*.log")     , emit: log
-    path  "versions.yml"               , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    if (meta.single_end) {
-        """
-        umi_tools \\
-            extract \\
-            -I $reads \\
-            -S ${prefix}.umi_extract.fastq.gz \\
-            $args \\
-            > ${prefix}.umi_extract.log
-
-        cat <<-END_VERSIONS > versions.yml
-        "${task.process}":
-            umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
-        END_VERSIONS
-        """
-    }  else {
-        """
-        umi_tools \\
-            extract \\
-            -I ${reads[0]} \\
-            --read2-in=${reads[1]} \\
-            -S ${prefix}.umi_extract_1.fastq.gz \\
-            --read2-out=${prefix}.umi_extract_2.fastq.gz \\
-            $args \\
-            > ${prefix}.umi_extract.log
-
-        cat <<-END_VERSIONS > versions.yml
-        "${task.process}":
-            umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
-        END_VERSIONS
-        """
-    }
-}
diff --git a/modules/nf-core/modules/umitools/extract/meta.yml b/modules/nf-core/modules/umitools/extract/meta.yml
deleted file mode 100644
index 7fc23f72..00000000
--- a/modules/nf-core/modules/umitools/extract/meta.yml
+++ /dev/null
@@ -1,47 +0,0 @@
-name: umitools_extract
-description: Extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place
-keywords:
-  - umitools
-  - extract
-tools:
-  - umi_tools:
-    description: >
-      UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs)
-      and single cell RNA-Seq cell barcodes
-    documentation: https://umi-tools.readthedocs.io/en/latest/
-    license: ["MIT"]
-input:
-  - meta:
-    type: map
-    description: |
-      Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - reads:
-    type: list
-    description: |
-      List of input FASTQ files whose UMIs will be extracted.
-output:
-  - meta:
-    type: map
-    description: |
-      Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - reads:
-    type: file
-    description: >
-      Extracted FASTQ files. |
-      For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. |
-        For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz.
-    pattern: "*.{fastq.gz}"
-  - log:
-    type: file
-    description: Logfile for umi_tools
-    pattern: "*.{log}"
-  - versions:
-    type: file
-    description: File containing software versions
-    pattern: "versions.yml"
-
-authors:
-  - "@drpatelh"
-  - "@grst"
diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml
index 9d0e6b20..7625b752 100644
--- a/modules/nf-core/multiqc/environment.yml
+++ b/modules/nf-core/multiqc/environment.yml
@@ -1,6 +1,7 @@
+name: multiqc
 channels:
   - conda-forge
   - bioconda
   - defaults
 dependencies:
-  - bioconda::multiqc=1.17
+  - bioconda::multiqc=1.19
diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf
index 485b3ba8..1b9f7c43 100644
--- a/modules/nf-core/multiqc/main.nf
+++ b/modules/nf-core/multiqc/main.nf
@@ -1,10 +1,10 @@
 process MULTIQC {
     label 'process_single'
 
-    conda 'modules/nf-core/multiqc/environment.yml'
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' :
-        'biocontainers/multiqc:1.17--pyhdfd78af_0' }"
+        'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' :
+        'biocontainers/multiqc:1.19--pyhdfd78af_0' }"
 
     input:
     path  multiqc_files, stageAs: "?/*"
@@ -25,12 +25,14 @@ process MULTIQC {
     def args = task.ext.args ?: ''
     def config = multiqc_config ? "--config $multiqc_config" : ''
     def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : ''
+    def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : ''
     """
     multiqc \\
         --force \\
         $args \\
         $config \\
         $extra_config \\
+        $logo \\
         .
 
     cat <<-END_VERSIONS > versions.yml
@@ -41,7 +43,7 @@ process MULTIQC {
 
     stub:
     """
-    touch multiqc_data
+    mkdir multiqc_data
     touch multiqc_plots
     touch multiqc_report.html
 
diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml
index a61223ed..45a9bc35 100644
--- a/modules/nf-core/multiqc/meta.yml
+++ b/modules/nf-core/multiqc/meta.yml
@@ -1,5 +1,4 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json
-name: MultiQC
+name: multiqc
 description: Aggregate results from bioinformatics analyses across many samples into a single report
 keywords:
   - QC

From fed23d1644862c285001275cc421071c4a0911ec Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Thu, 11 Jan 2024 13:02:09 +0000
Subject: [PATCH 054/117] Add missing modules / subworkflows

---
 modules/nf-core/cat/cat/environment.yml       |   7 +
 modules/nf-core/cat/cat/main.nf               |  70 +++++++
 modules/nf-core/cat/cat/meta.yml              |  36 ++++
 modules/nf-core/cat/cat/tests/main.nf.test    | 179 ++++++++++++++++++
 .../nf-core/cat/cat/tests/main.nf.test.snap   | 121 ++++++++++++
 .../cat/tests/nextflow_unzipped_zipped.config |   6 +
 .../cat/tests/nextflow_zipped_unzipped.config |   8 +
 modules/nf-core/cat/cat/tests/tags.yml        |   2 +
 modules/nf-core/multiqc/tests/main.nf.test    |  83 ++++++++
 .../nf-core/multiqc/tests/main.nf.test.snap   |  21 ++
 modules/nf-core/multiqc/tests/tags.yml        |   2 +
 modules/nf-core/samtools/bam2fq/main.nf       |  56 ++++++
 modules/nf-core/samtools/bam2fq/meta.yml      |  55 ++++++
 modules/nf-core/umitools/dedup/main.nf        |  41 ++++
 modules/nf-core/umitools/dedup/meta.yml       |  59 ++++++
 modules/nf-core/umitools/extract/main.nf      |  55 ++++++
 modules/nf-core/umitools/extract/meta.yml     |  47 +++++
 .../fastq_fastqc_umitools_fastp/main.nf       | 140 ++++++++++++++
 .../fastq_fastqc_umitools_fastp/meta.yml      | 128 +++++++++++++
 .../tests/main.nf.test                        |  60 ++++++
 .../tests/main.nf.test.snap                   |  81 ++++++++
 .../tests/tags.yml                            |   2 +
 22 files changed, 1259 insertions(+)
 create mode 100644 modules/nf-core/cat/cat/environment.yml
 create mode 100644 modules/nf-core/cat/cat/main.nf
 create mode 100644 modules/nf-core/cat/cat/meta.yml
 create mode 100644 modules/nf-core/cat/cat/tests/main.nf.test
 create mode 100644 modules/nf-core/cat/cat/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config
 create mode 100644 modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config
 create mode 100644 modules/nf-core/cat/cat/tests/tags.yml
 create mode 100644 modules/nf-core/multiqc/tests/main.nf.test
 create mode 100644 modules/nf-core/multiqc/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/multiqc/tests/tags.yml
 create mode 100644 modules/nf-core/samtools/bam2fq/main.nf
 create mode 100644 modules/nf-core/samtools/bam2fq/meta.yml
 create mode 100644 modules/nf-core/umitools/dedup/main.nf
 create mode 100644 modules/nf-core/umitools/dedup/meta.yml
 create mode 100644 modules/nf-core/umitools/extract/main.nf
 create mode 100644 modules/nf-core/umitools/extract/meta.yml
 create mode 100644 subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
 create mode 100644 subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml
 create mode 100644 subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test
 create mode 100644 subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap
 create mode 100644 subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml

diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml
new file mode 100644
index 00000000..17a04ef2
--- /dev/null
+++ b/modules/nf-core/cat/cat/environment.yml
@@ -0,0 +1,7 @@
+name: cat_cat
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - conda-forge::pigz=2.3.4
diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf
new file mode 100644
index 00000000..970ab760
--- /dev/null
+++ b/modules/nf-core/cat/cat/main.nf
@@ -0,0 +1,70 @@
+process CAT_CAT {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/pigz:2.3.4' :
+        'biocontainers/pigz:2.3.4' }"
+
+    input:
+    tuple val(meta), path(files_in)
+
+    output:
+    tuple val(meta), path("${prefix}"), emit: file_out
+    path "versions.yml"               , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def args2 = task.ext.args2 ?: ''
+    def file_list = files_in.collect { it.toString() }
+
+    // | input     | output     | command1 | command2 |
+    // |-----------|------------|----------|----------|
+    // | gzipped   | gzipped    | cat      |          |
+    // | ungzipped | ungzipped  | cat      |          |
+    // | gzipped   | ungzipped  | zcat     |          |
+    // | ungzipped | gzipped    | cat      | pigz     |
+
+    // Use input file ending as default
+    prefix   = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}"
+    out_zip  = prefix.endsWith('.gz')
+    in_zip   = file_list[0].endsWith('.gz')
+    command1 = (in_zip && !out_zip) ? 'zcat' : 'cat'
+    command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : ''
+    if(file_list.contains(prefix.trim())) {
+        error "The name of the input file can't be the same as for the output prefix in the " +
+        "module CAT_CAT (currently `$prefix`). Please choose a different one."
+    }
+    """
+    $command1 \\
+        $args \\
+        ${file_list.join(' ')} \\
+        $command2 \\
+        > ${prefix}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
+    END_VERSIONS
+    """
+
+    stub:
+    def file_list   = files_in.collect { it.toString() }
+    prefix          = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}"
+    if(file_list.contains(prefix.trim())) {
+        error "The name of the input file can't be the same as for the output prefix in the " +
+        "module CAT_CAT (currently `$prefix`). Please choose a different one."
+    }
+    """
+    touch $prefix
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml
new file mode 100644
index 00000000..00a8db0b
--- /dev/null
+++ b/modules/nf-core/cat/cat/meta.yml
@@ -0,0 +1,36 @@
+name: cat_cat
+description: A module for concatenation of gzipped or uncompressed files
+keywords:
+  - concatenate
+  - gzip
+  - cat
+tools:
+  - cat:
+      description: Just concatenation
+      documentation: https://man7.org/linux/man-pages/man1/cat.1.html
+      licence: ["GPL-3.0-or-later"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - files_in:
+      type: file
+      description: List of compressed / uncompressed files
+      pattern: "*"
+output:
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - file_out:
+      type: file
+      description: Concatenated file. Will be gzipped if file_out ends with ".gz"
+      pattern: "${file_out}"
+authors:
+  - "@erikrikarddaniel"
+  - "@FriederikeHanssen"
+maintainers:
+  - "@erikrikarddaniel"
+  - "@FriederikeHanssen"
diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test
new file mode 100644
index 00000000..ed5a4f12
--- /dev/null
+++ b/modules/nf-core/cat/cat/tests/main.nf.test
@@ -0,0 +1,179 @@
+nextflow_process {
+
+    name "Test Process CAT_CAT"
+    script "../main.nf"
+    process "CAT_CAT"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "cat"
+    tag "cat/cat"
+
+    test("test_cat_name_conflict") {
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'genome', single_end:true ],
+                        [
+                            file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
+                            file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            assertAll(
+                { assert !process.success },
+                { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") }
+            )
+        }
+    }
+
+    test("test_cat_unzipped_unzipped") {
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
+                            file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+
+    test("test_cat_zipped_zipped") {
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true),
+                            file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            def lines = path(process.out.file_out.get(0).get(1)).linesGzip
+            assertAll(
+                { assert process.success },
+                { assert snapshot(lines[0..5]).match("test_cat_zipped_zipped_lines") },
+                { assert snapshot(lines.size()).match("test_cat_zipped_zipped_size")}
+            )
+        }
+    }
+
+    test("test_cat_zipped_unzipped") {
+        config './nextflow_zipped_unzipped.config'
+
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true),
+                            file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("test_cat_unzipped_zipped") {
+        config './nextflow_unzipped_zipped.config'
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
+                            file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            def lines = path(process.out.file_out.get(0).get(1)).linesGzip
+            assertAll(
+                { assert process.success },
+                { assert snapshot(lines[0..5]).match("test_cat_unzipped_zipped_lines") },
+                { assert snapshot(lines.size()).match("test_cat_unzipped_zipped_size")}
+            )
+        }
+    }
+
+    test("test_cat_one_file_unzipped_zipped") {
+        config './nextflow_unzipped_zipped.config'
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            def lines = path(process.out.file_out.get(0).get(1)).linesGzip
+            assertAll(
+                { assert process.success },
+                { assert snapshot(lines[0..5]).match("test_cat_one_file_unzipped_zipped_lines") },
+                { assert snapshot(lines.size()).match("test_cat_one_file_unzipped_zipped_size")}
+            )
+        }
+    }
+}
+
diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap
new file mode 100644
index 00000000..423571ba
--- /dev/null
+++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap
@@ -0,0 +1,121 @@
+{
+    "test_cat_unzipped_zipped_size": {
+        "content": [
+            375
+        ],
+        "timestamp": "2023-10-16T14:33:08.049445686"
+    },
+    "test_cat_unzipped_unzipped": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+                ],
+                "file_out": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+                ]
+            }
+        ],
+        "timestamp": "2023-10-16T14:32:18.500464399"
+    },
+    "test_cat_zipped_unzipped": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+                ],
+                "file_out": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+                ]
+            }
+        ],
+        "timestamp": "2023-10-16T14:32:49.642741302"
+    },
+    "test_cat_zipped_zipped_lines": {
+        "content": [
+            [
+                "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab",
+                "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1",
+                "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1",
+                "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1",
+                "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1",
+                "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1"
+            ]
+        ],
+        "timestamp": "2023-10-16T14:32:33.629048645"
+    },
+    "test_cat_unzipped_zipped_lines": {
+        "content": [
+            [
+                ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome",
+                "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT",
+                "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG",
+                "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG",
+                "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT",
+                "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG"
+            ]
+        ],
+        "timestamp": "2023-10-16T14:33:08.038830506"
+    },
+    "test_cat_one_file_unzipped_zipped_lines": {
+        "content": [
+            [
+                ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome",
+                "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT",
+                "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG",
+                "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG",
+                "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT",
+                "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG"
+            ]
+        ],
+        "timestamp": "2023-10-16T14:33:21.39642399"
+    },
+    "test_cat_zipped_zipped_size": {
+        "content": [
+            78
+        ],
+        "timestamp": "2023-10-16T14:32:33.641869244"
+    },
+    "test_cat_one_file_unzipped_zipped_size": {
+        "content": [
+            374
+        ],
+        "timestamp": "2023-10-16T14:33:21.4094373"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config
new file mode 100644
index 00000000..ec26b0fd
--- /dev/null
+++ b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config
@@ -0,0 +1,6 @@
+
+process {
+    withName: CAT_CAT {
+        ext.prefix = 'cat.txt.gz'
+    }
+}
diff --git a/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config
new file mode 100644
index 00000000..fbc79783
--- /dev/null
+++ b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config
@@ -0,0 +1,8 @@
+
+process {
+
+    withName: CAT_CAT {
+        ext.prefix = 'cat.txt'
+    }
+
+}
diff --git a/modules/nf-core/cat/cat/tests/tags.yml b/modules/nf-core/cat/cat/tests/tags.yml
new file mode 100644
index 00000000..37b578f5
--- /dev/null
+++ b/modules/nf-core/cat/cat/tests/tags.yml
@@ -0,0 +1,2 @@
+cat/cat:
+  - modules/nf-core/cat/cat/**
diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test
new file mode 100644
index 00000000..d0438eda
--- /dev/null
+++ b/modules/nf-core/multiqc/tests/main.nf.test
@@ -0,0 +1,83 @@
+nextflow_process {
+
+    name "Test Process MULTIQC"
+    script "../main.nf"
+    process "MULTIQC"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "multiqc"
+
+    test("sarscov2 single-end [fastqc]") {
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)])
+                input[1] = []
+                input[2] = []
+                input[3] = []
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.report[0] ==~ ".*/multiqc_report.html" },
+                { assert process.out.data[0] ==~ ".*/multiqc_data" },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+
+    }
+
+    test("sarscov2 single-end [fastqc] [config]") {
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)])
+                input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true))
+                input[2] = []
+                input[3] = []
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.report[0] ==~ ".*/multiqc_report.html" },
+                { assert process.out.data[0] ==~ ".*/multiqc_data" },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("sarscov2 single-end [fastqc] - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)])
+                input[1] = []
+                input[2] = []
+                input[3] = []
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.report.collect { file(it).getName() } +
+                                process.out.data.collect { file(it).getName() } +
+                                process.out.plots.collect { file(it).getName() } +
+                                process.out.versions ).match() }
+            )
+        }
+
+    }
+}
diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap
new file mode 100644
index 00000000..d37e7304
--- /dev/null
+++ b/modules/nf-core/multiqc/tests/main.nf.test.snap
@@ -0,0 +1,21 @@
+{
+    "versions": {
+        "content": [
+            [
+                "versions.yml:md5,14e9a2661241abd828f4f06a7b5c222d"
+            ]
+        ],
+        "timestamp": "2024-01-09T23:02:49.911994"
+    },
+    "sarscov2 single-end [fastqc] - stub": {
+        "content": [
+            [
+                "multiqc_report.html",
+                "multiqc_data",
+                "multiqc_plots",
+                "versions.yml:md5,14e9a2661241abd828f4f06a7b5c222d"
+            ]
+        ],
+        "timestamp": "2024-01-09T23:03:14.524346"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml
new file mode 100644
index 00000000..bea6c0d3
--- /dev/null
+++ b/modules/nf-core/multiqc/tests/tags.yml
@@ -0,0 +1,2 @@
+multiqc:
+  - modules/nf-core/multiqc/**
diff --git a/modules/nf-core/samtools/bam2fq/main.nf b/modules/nf-core/samtools/bam2fq/main.nf
new file mode 100644
index 00000000..9301d1d3
--- /dev/null
+++ b/modules/nf-core/samtools/bam2fq/main.nf
@@ -0,0 +1,56 @@
+process SAMTOOLS_BAM2FQ {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' :
+        'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }"
+
+    input:
+    tuple val(meta), path(inputbam)
+    val split
+
+    output:
+    tuple val(meta), path("*.fq.gz"), emit: reads
+    path "versions.yml"             , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    if (split){
+        """
+        samtools \\
+            bam2fq \\
+            $args \\
+            -@ $task.cpus \\
+            -1 ${prefix}_1.fq.gz \\
+            -2 ${prefix}_2.fq.gz \\
+            -0 ${prefix}_other.fq.gz \\
+            -s ${prefix}_singleton.fq.gz \\
+            $inputbam
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+        END_VERSIONS
+        """
+    } else {
+        """
+        samtools \\
+            bam2fq \\
+            $args \\
+            -@ $task.cpus \\
+            $inputbam | gzip --no-name > ${prefix}_interleaved.fq.gz
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+        END_VERSIONS
+        """
+    }
+}
diff --git a/modules/nf-core/samtools/bam2fq/meta.yml b/modules/nf-core/samtools/bam2fq/meta.yml
new file mode 100644
index 00000000..319a60cf
--- /dev/null
+++ b/modules/nf-core/samtools/bam2fq/meta.yml
@@ -0,0 +1,55 @@
+name: samtools_bam2fq
+description: |
+  The module uses bam2fq method from samtools to
+  convert a SAM, BAM or CRAM file to FASTQ format
+keywords:
+  - bam2fq
+  - samtools
+  - fastq
+tools:
+  - samtools:
+      description: Tools for dealing with SAM, BAM and CRAM files
+      homepage: None
+      documentation: http://www.htslib.org/doc/1.1/samtools.html
+      tool_dev_url: None
+      doi: ""
+      licence: ["MIT"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - inputbam:
+      type: file
+      description: BAM/CRAM/SAM file
+      pattern: "*.{bam,cram,sam}"
+  - split:
+      type: boolean
+      description: |
+        TRUE/FALSE value to indicate if reads should be separated into
+        /1, /2 and if present other, or singleton.
+        Note: choosing TRUE will generate 4 different files.
+        Choosing FALSE will produce a single file, which will be interleaved in case
+        the input contains paired reads.
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - reads:
+      type: file
+      description: |
+        FASTQ files, which will be either a group of 4 files (read_1, read_2, other and singleton)
+        or a single interleaved .fq.gz file if the user chooses not to split the reads.
+      pattern: "*.fq.gz"
+
+authors:
+  - "@lescai"
diff --git a/modules/nf-core/umitools/dedup/main.nf b/modules/nf-core/umitools/dedup/main.nf
new file mode 100644
index 00000000..dfcbcf2f
--- /dev/null
+++ b/modules/nf-core/umitools/dedup/main.nf
@@ -0,0 +1,41 @@
+process UMITOOLS_DEDUP {
+    tag "$meta.id"
+    label "process_medium"
+
+    conda (params.enable_conda ? "bioconda::umi_tools=1.1.2" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/umi_tools:1.1.2--py38h4a8c8d9_0' :
+        'quay.io/biocontainers/umi_tools:1.1.2--py38h4a8c8d9_0' }"
+
+    input:
+    tuple val(meta), path(bam), path(bai)
+
+    output:
+    tuple val(meta), path("*.bam")             , emit: bam
+    tuple val(meta), path("*edit_distance.tsv"), emit: tsv_edit_distance
+    tuple val(meta), path("*per_umi.tsv")      , emit: tsv_per_umi
+    tuple val(meta), path("*per_position.tsv") , emit: tsv_umi_per_position
+    path  "versions.yml"                       , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def paired = meta.single_end ? "" : "--paired"
+    """
+    umi_tools \\
+        dedup \\
+        -I $bam \\
+        -S ${prefix}.bam \\
+        --output-stats $prefix \\
+        $paired \\
+        $args
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/umitools/dedup/meta.yml b/modules/nf-core/umitools/dedup/meta.yml
new file mode 100644
index 00000000..eee8952f
--- /dev/null
+++ b/modules/nf-core/umitools/dedup/meta.yml
@@ -0,0 +1,59 @@
+name: umitools_dedup
+description: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read.
+keywords:
+  - umitools
+  - deduplication
+tools:
+  - umi_tools:
+    description: >
+      UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs)
+      and single cell RNA-Seq cell barcodes
+    documentation: https://umi-tools.readthedocs.io/en/latest/
+    license: ["MIT"]
+input:
+  - meta:
+    type: map
+    description: |
+      Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+    type: file
+    description: |
+      BAM file containing reads to be deduplicated via UMIs.
+    pattern: "*.{bam}"
+  - bai:
+    type: file
+    description: |
+      BAM index files corresponding to the input BAM file.
+    pattern: "*.{bai}"
+output:
+  - meta:
+    type: map
+    description: |
+      Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+    type: file
+    description: BAM file with deduplicated UMIs.
+    pattern: "*.{bam}"
+  - tsv_edit_distance:
+    type: file
+    description: Reports the (binned) average edit distance between the UMIs at each position.
+    pattern: "*edit_distance.tsv"
+  - tsv_per_umi:
+    type: file
+    description: UMI-level summary statistics.
+    pattern: "*per_umi.tsv"
+  - tsv_umi_per_position:
+    type: file
+    description: Tabulates the counts for unique combinations of UMI and position.
+    pattern: "*per_position.tsv"
+  - versions:
+    type: file
+    description: File containing software versions
+    pattern: "versions.yml"
+
+authors:
+  - "@drpatelh"
+  - "@grst"
+  - "@klkeys"
diff --git a/modules/nf-core/umitools/extract/main.nf b/modules/nf-core/umitools/extract/main.nf
new file mode 100644
index 00000000..22a405b9
--- /dev/null
+++ b/modules/nf-core/umitools/extract/main.nf
@@ -0,0 +1,55 @@
+process UMITOOLS_EXTRACT {
+    tag "$meta.id"
+    label "process_low"
+
+    conda (params.enable_conda ? "bioconda::umi_tools=1.1.2" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/umi_tools:1.1.2--py38h4a8c8d9_0' :
+        'quay.io/biocontainers/umi_tools:1.1.2--py38h4a8c8d9_0' }"
+
+    input:
+    tuple val(meta), path(reads)
+
+    output:
+    tuple val(meta), path("*.fastq.gz"), emit: reads
+    tuple val(meta), path("*.log")     , emit: log
+    path  "versions.yml"               , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    if (meta.single_end) {
+        """
+        umi_tools \\
+            extract \\
+            -I $reads \\
+            -S ${prefix}.umi_extract.fastq.gz \\
+            $args \\
+            > ${prefix}.umi_extract.log
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
+        END_VERSIONS
+        """
+    }  else {
+        """
+        umi_tools \\
+            extract \\
+            -I ${reads[0]} \\
+            --read2-in=${reads[1]} \\
+            -S ${prefix}.umi_extract_1.fastq.gz \\
+            --read2-out=${prefix}.umi_extract_2.fastq.gz \\
+            $args \\
+            > ${prefix}.umi_extract.log
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
+        END_VERSIONS
+        """
+    }
+}
diff --git a/modules/nf-core/umitools/extract/meta.yml b/modules/nf-core/umitools/extract/meta.yml
new file mode 100644
index 00000000..7fc23f72
--- /dev/null
+++ b/modules/nf-core/umitools/extract/meta.yml
@@ -0,0 +1,47 @@
+name: umitools_extract
+description: Extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place
+keywords:
+  - umitools
+  - extract
+tools:
+  - umi_tools:
+    description: >
+      UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs)
+      and single cell RNA-Seq cell barcodes
+    documentation: https://umi-tools.readthedocs.io/en/latest/
+    license: ["MIT"]
+input:
+  - meta:
+    type: map
+    description: |
+      Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+    type: list
+    description: |
+      List of input FASTQ files whose UMIs will be extracted.
+output:
+  - meta:
+    type: map
+    description: |
+      Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+    type: file
+    description: >
+      Extracted FASTQ files. |
+      For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. |
+        For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz.
+    pattern: "*.{fastq.gz}"
+  - log:
+    type: file
+    description: Logfile for umi_tools
+    pattern: "*.{log}"
+  - versions:
+    type: file
+    description: File containing software versions
+    pattern: "versions.yml"
+
+authors:
+  - "@drpatelh"
+  - "@grst"
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
new file mode 100644
index 00000000..3dbb27ea
--- /dev/null
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
@@ -0,0 +1,140 @@
+//
+// Read QC, UMI extraction and trimming
+//
+
+include { FASTQC as FASTQC_RAW  } from '../../../modules/nf-core/fastqc/main'
+include { FASTQC as FASTQC_TRIM } from '../../../modules/nf-core/fastqc/main'
+include { UMITOOLS_EXTRACT      } from '../../../modules/nf-core/umitools/extract/main'
+include { FASTP                 } from '../../../modules/nf-core/fastp/main'
+
+//
+// Function that parses fastp json output file to get total number of reads after trimming
+//
+import groovy.json.JsonSlurper
+
+def getFastpReadsAfterFiltering(json_file) {
+    def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('summary')
+    return json['after_filtering']['total_reads'].toLong()
+}
+
+workflow FASTQ_FASTQC_UMITOOLS_FASTP {
+    take:
+    reads             // channel: [ val(meta), [ reads ] ]
+    skip_fastqc       // boolean: true/false
+    with_umi          // boolean: true/false
+    skip_umi_extract  // boolean: true/false
+    umi_discard_read  // integer: 0, 1 or 2
+    skip_trimming     // boolean: true/false
+    adapter_fasta     //    file: adapter.fasta
+    save_trimmed_fail // boolean: true/false
+    save_merged       // boolean: true/false
+    min_trimmed_reads // integer: > 0
+
+    main:
+    ch_versions = Channel.empty()
+    fastqc_raw_html = Channel.empty()
+    fastqc_raw_zip  = Channel.empty()
+    if (!skip_fastqc) {
+        FASTQC_RAW (
+            reads
+        )
+        fastqc_raw_html = FASTQC_RAW.out.html
+        fastqc_raw_zip  = FASTQC_RAW.out.zip
+        ch_versions     = ch_versions.mix(FASTQC_RAW.out.versions.first())
+    }
+
+    umi_reads = reads
+    umi_log   = Channel.empty()
+    if (with_umi && !skip_umi_extract) {
+        UMITOOLS_EXTRACT (
+            reads
+        )
+        umi_reads   = UMITOOLS_EXTRACT.out.reads
+        umi_log     = UMITOOLS_EXTRACT.out.log
+        ch_versions = ch_versions.mix(UMITOOLS_EXTRACT.out.versions.first())
+
+        // Discard R1 / R2 if required
+        if (umi_discard_read in [1,2]) {
+            UMITOOLS_EXTRACT
+                .out
+                .reads
+                .map {
+                    meta, reads ->
+                        meta.single_end ? [ meta, reads ] : [ meta + [single_end: true], reads[umi_discard_read % 2] ]
+                }
+                .set { umi_reads }
+        }
+    }
+
+    trim_reads        = umi_reads
+    trim_json         = Channel.empty()
+    trim_html         = Channel.empty()
+    trim_log          = Channel.empty()
+    trim_reads_fail   = Channel.empty()
+    trim_reads_merged = Channel.empty()
+    fastqc_trim_html  = Channel.empty()
+    fastqc_trim_zip   = Channel.empty()
+    trim_read_count   = Channel.empty()
+    if (!skip_trimming) {
+        FASTP (
+            umi_reads,
+            adapter_fasta,
+            save_trimmed_fail,
+            save_merged
+        )
+        trim_json         = FASTP.out.json
+        trim_html         = FASTP.out.html
+        trim_log          = FASTP.out.log
+        trim_reads_fail   = FASTP.out.reads_fail
+        trim_reads_merged = FASTP.out.reads_merged
+        ch_versions       = ch_versions.mix(FASTP.out.versions.first())
+
+        //
+        // Filter FastQ files based on minimum trimmed read count after adapter trimming
+        //
+        FASTP
+            .out
+            .reads
+            .join(trim_json)
+            .map { meta, reads, json -> [ meta, reads, getFastpReadsAfterFiltering(json) ] }
+            .set { ch_num_trimmed_reads }
+
+        ch_num_trimmed_reads
+            .filter { meta, reads, num_reads -> num_reads >= min_trimmed_reads.toLong() }
+            .map { meta, reads, num_reads -> [ meta, reads ] }
+            .set { trim_reads }
+
+        ch_num_trimmed_reads
+            .map { meta, reads, num_reads -> [ meta, num_reads ] }
+            .set { trim_read_count }
+
+        if (!skip_fastqc) {
+            FASTQC_TRIM (
+                trim_reads
+            )
+            fastqc_trim_html = FASTQC_TRIM.out.html
+            fastqc_trim_zip  = FASTQC_TRIM.out.zip
+            ch_versions      = ch_versions.mix(FASTQC_TRIM.out.versions.first())
+        }
+    }
+
+    emit:
+    reads = trim_reads // channel: [ val(meta), [ reads ] ]
+
+    fastqc_raw_html    // channel: [ val(meta), [ html ] ]
+    fastqc_raw_zip     // channel: [ val(meta), [ zip ] ]
+
+    umi_log            // channel: [ val(meta), [ log ] ]
+
+    trim_json          // channel: [ val(meta), [ json ] ]
+    trim_html          // channel: [ val(meta), [ html ] ]
+    trim_log           // channel: [ val(meta), [ log ] ]
+    trim_reads_fail    // channel: [ val(meta), [ fastq.gz ] ]
+    trim_reads_merged  // channel: [ val(meta), [ fastq.gz ] ]
+    trim_read_count    // channel: [ val(meta), val(count) ]
+
+    fastqc_trim_html   // channel: [ val(meta), [ html ] ]
+    fastqc_trim_zip    // channel: [ val(meta), [ zip ] ]
+
+    versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ]
+}
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml
new file mode 100644
index 00000000..220e8db1
--- /dev/null
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml
@@ -0,0 +1,128 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+# yaml-language-server: $schema=yaml-schema.json
+name: "fastq_fastqc_umitools_fastp"
+description: Read QC, UMI extraction and trimming
+keywords:
+  - fastq
+  - fastqc
+  - qc
+  - UMI
+  - trimming
+  - fastp
+components:
+  - fastqc
+  - umitools/extract
+  - fastp
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test' ]
+  - reads:
+      type: file
+      description: |
+        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+        respectively.
+  - skip_fastqc:
+      type: boolean
+      description: |
+        Skip fastqc process
+  - with_umi:
+      type: boolean
+      description: |
+        With or without umi detection
+  - skip_umi_extract:
+      type: boolean
+      description: |
+        With or without umi extrection
+  - umi_discard_read:
+      type: integer
+      description: |
+        Discard R1 / R2 if required
+  - skip_trimming:
+      type: boolean
+      description: |
+        Allows to skip trimgalore execution
+  - adapter_fasta:
+      type: file
+      description: |
+        Fasta file of adapter sequences
+  - save_trimmed_fail:
+      type: boolean
+      description: |
+        Save trimmed fastqs of failed samples
+  - save_merged:
+      type: boolean
+      description: |
+        Save merged fastqs
+  - min_trimmed_reads:
+      type: integer
+      description: |
+        Inputs with fewer than this reads will be filtered out of the "reads" output channel
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test' ]
+  - reads:
+      type: file
+      description: >
+        Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. |
+
+
+
+          For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz.
+      pattern: "*.{fastq.gz}"
+  - fastqc_html:
+      type: file
+      description: FastQC report
+      pattern: "*_{fastqc.html}"
+  - fastqc_zip:
+      type: file
+      description: FastQC report archive
+      pattern: "*_{fastqc.zip}"
+  - log:
+      type: file
+      description: Logfile for umi_tools
+      pattern: "*.{log}"
+  - trim_json:
+      type: file
+      description: FastP Trimming report
+      pattern: "*.{fastp.json}"
+  - trim_html:
+      type: file
+      description: FastP Trimming report
+      pattern: "*.{fastp.html}"
+  - log:
+      type: file
+      description: Logfile FastP
+      pattern: "*.{fastp.log}"
+  - trim_reads_fail:
+      type: file
+      description: Trimmed fastq files failing QC
+      pattern: "*.{fastq.gz}"
+  - trim_reads_merged:
+      type: file
+      description: Trimmed and merged fastq files
+      pattern: "*.{fastq.gz}"
+  - trim_read_count:
+      type: integer
+      description: Number of reads after trimming
+  - fastqc_trim_html:
+      type: file
+      description: FastQC report
+      pattern: "*_{fastqc.html}"
+  - fastqc_trim_zip:
+      type: file
+      description: FastQC report archive
+      pattern: "*_{fastqc.zip}"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@robsyme"
+maintainers:
+  - "@robsyme"
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test
new file mode 100644
index 00000000..cdd73984
--- /dev/null
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test
@@ -0,0 +1,60 @@
+nextflow_workflow {
+
+    name "Test Workflow FASTQ_FASTQC_UMITOOLS_FASTP"
+    script "../main.nf"
+    workflow "FASTQ_FASTQC_UMITOOLS_FASTP"
+    tag "subworkflows"
+    tag "subworkflows_nfcore"
+    tag "subworkflows/fastq_fastqc_umitools_fastp"
+    tag "fastq_fastqc_umitools_fastp"
+    tag "fastqc"
+    tag "umitools/extract"
+    tag "fastp"
+
+
+    test("sarscov2 paired-end [fastq]") {
+
+        when {
+            workflow {
+                """
+                input[0] = [
+                            [ id:'test', single_end:false ], // meta map
+                            [
+                             file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                             file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
+                            ]
+                ]
+                input[1] = false // skip_fastqc
+                input[2] = false // with_umi
+                input[3] = false // skip_umi_extract
+                input[4] = 1     // umi_discard_read
+                input[5] = false // skip_trimming
+                input[6] = []    // adapter_fasta
+                input[7] = false // save_trimmed_fail
+                input[8] = false // save_merged
+                input[9] = 1     // min_trimmed_reads
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success },
+                { assert snapshot(workflow.out.reads).match("reads") },
+                { assert snapshot(workflow.out.umi_log).match("umi_log") },
+                { assert snapshot(workflow.out.trim_json).match("trim_json") },
+                { assert snapshot(workflow.out.trim_reads_fail).match("trim_reads_fail") },
+                { assert snapshot(workflow.out.trim_reads_merged).match("trim_reads_merged") },
+                { assert snapshot(workflow.out.trim_read_count).match("trim_read_count") },
+                { assert snapshot(workflow.out.versions).match("versions") },
+
+                { assert workflow.out.fastqc_raw_html },
+                { assert workflow.out.fastqc_raw_zip },
+                { assert workflow.out.trim_html },
+                { assert workflow.out.trim_log },
+                { assert workflow.out.fastqc_trim_html },
+                { assert workflow.out.fastqc_trim_zip }
+            )
+        }
+    }
+}
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap
new file mode 100644
index 00000000..38a65aeb
--- /dev/null
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap
@@ -0,0 +1,81 @@
+{
+    "trim_reads_merged": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-26T02:28:26.26920982"
+    },
+    "trim_reads_fail": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-26T02:28:26.25861515"
+    },
+    "versions": {
+        "content": [
+            [
+                "versions.yml:md5,85bd0117e5778fff18e3920972a296ad",
+                "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0",
+                "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e"
+            ]
+        ],
+        "timestamp": "2023-11-26T02:28:26.30891403"
+    },
+    "trim_json": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.fastp.json:md5,1e0f8e27e71728e2b63fc64086be95cd"
+                ]
+            ]
+        ],
+        "timestamp": "2023-11-26T02:28:26.24768259"
+    },
+    "reads": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    [
+                        "test_1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7",
+                        "test_2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39"
+                    ]
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T11:30:32.061644815"
+    },
+    "umi_log": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "timestamp": "2023-11-26T02:28:26.238536"
+    },
+    "trim_read_count": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    198
+                ]
+            ]
+        ],
+        "timestamp": "2023-11-26T02:28:26.27984169"
+    }
+}
\ No newline at end of file
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml
new file mode 100644
index 00000000..84a4b567
--- /dev/null
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml
@@ -0,0 +1,2 @@
+subworkflows/fastq_fastqc_umitools_fastp:
+  - subworkflows/nf-core/fastq_fastqc_umitools_fastp/**

From be1590056b38ad9744bd73d38f1fc45413693afd Mon Sep 17 00:00:00 2001
From: Fabian Hausmann <fabian.hausmann@zmnh.uni-hamburg.de>
Date: Thu, 11 Jan 2024 14:06:44 +0100
Subject: [PATCH 055/117] Add issue to changelog

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 47cb7b72..a966783d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [dev](https://github.com/nf-core/smrnaseq/branch/dev)
 
-- _nothing yet done_
+- [[#299]](https://github.com/nf-core/smrnaseq/issues/299) - Bugfix for missing inputs in BAM stats (`genome_quant.r`)
 
 ## [v2.2.4](https://github.com/nf-core/smrnaseq/releases/tag/2.2.4) - 2023-11-03
 

From 1d4fd076d9a2e1382e847e42156196f6044f20fc Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Thu, 11 Jan 2024 13:07:25 +0000
Subject: [PATCH 056/117] Bringing this to 2024

---
 .github/CONTRIBUTING.md                       |   3 +
 .github/PULL_REQUEST_TEMPLATE.md              |   1 +
 .github/workflows/linting.yml                 |  12 +-
 assets/multiqc_config.yml                     |   4 +-
 lib/NfcoreTemplate.groovy                     |  32 +-
 modules.json                                  |  38 +--
 modules/nf-core/cat/fastq/environment.yml     |   1 +
 modules/nf-core/cat/fastq/main.nf             |   2 +-
 .../dumpsoftwareversions/environment.yml      |   3 +-
 .../custom/dumpsoftwareversions/main.nf       |   6 +-
 .../custom/dumpsoftwareversions/meta.yml      |   2 +-
 .../dumpsoftwareversions/tests/main.nf.test   |   7 +-
 .../tests/main.nf.test.snap                   |  50 +--
 modules/nf-core/fastqc/environment.yml        |   1 +
 modules/nf-core/fastqc/main.nf                |   6 +-
 modules/nf-core/fastqc/tests/main.nf.test     | 203 +++++++++++-
 .../nf-core/fastqc/tests/main.nf.test.snap    |  12 +-
 .../nf-core/samtools/bam2fq/environment.yml   |   7 +
 modules/nf-core/samtools/bam2fq/main.nf       |   6 +-
 modules/nf-core/samtools/bam2fq/meta.yml      |   8 +-
 .../samtools/bam2fq/tests/main.nf.test        |  71 ++++
 .../samtools/bam2fq/tests/main.nf.test.snap   |  49 +++
 .../samtools/bam2fq/tests/nextflow.config     |   3 +
 .../nf-core/samtools/bam2fq/tests/tags.yml    |   2 +
 .../nf-core/samtools/flagstat/environment.yml |   3 +-
 modules/nf-core/samtools/flagstat/main.nf     |   6 +-
 .../samtools/flagstat/tests/main.nf.test      |  36 +++
 .../samtools/flagstat/tests/main.nf.test.snap |  16 +
 .../nf-core/samtools/flagstat/tests/tags.yml  |   2 +
 .../nf-core/samtools/idxstats/environment.yml |   3 +-
 modules/nf-core/samtools/idxstats/main.nf     |   6 +-
 .../samtools/idxstats/tests/main.nf.test      |  36 +++
 .../samtools/idxstats/tests/main.nf.test.snap |  16 +
 .../nf-core/samtools/idxstats/tests/tags.yml  |   2 +
 .../nf-core/samtools/index/environment.yml    |   3 +-
 modules/nf-core/samtools/index/main.nf        |   6 +-
 .../samtools/index/tests/csi.nextflow.config  |   7 +
 .../nf-core/samtools/index/tests/main.nf.test |  87 +++++
 .../samtools/index/tests/main.nf.test.snap    |  28 ++
 modules/nf-core/samtools/index/tests/tags.yml |   2 +
 modules/nf-core/samtools/sort/environment.yml |   3 +-
 modules/nf-core/samtools/sort/main.nf         |   6 +-
 .../nf-core/samtools/sort/tests/main.nf.test  |   5 +-
 .../samtools/sort/tests/main.nf.test.snap     |  19 +-
 .../nf-core/samtools/stats/environment.yml    |   3 +-
 modules/nf-core/samtools/stats/main.nf        |   6 +-
 .../nf-core/samtools/stats/tests/main.nf.test |   2 +-
 .../samtools/stats/tests/main.nf.test.snap    |  20 +-
 .../nf-core/umitools/dedup/environment.yml    |   7 +
 modules/nf-core/umitools/dedup/main.nf        |  43 ++-
 modules/nf-core/umitools/dedup/meta.yml       |  82 +++--
 .../nf-core/umitools/extract/environment.yml  |   7 +
 modules/nf-core/umitools/extract/main.nf      |  13 +-
 modules/nf-core/umitools/extract/meta.yml     |  57 ++--
 .../umitools/extract/tests/main.nf.test       |  35 ++
 .../umitools/extract/tests/main.nf.test.snap  |  10 +
 .../umitools/extract/tests/nextflow.config    |   9 +
 .../nf-core/umitools/extract/tests/tags.yml   |   2 +
 .../tests/main.nf.test                        |  14 +-
 .../tests/main.nf.test.snap                   | 302 +++++-------------
 .../bam_sort_stats_samtools/tests/tags.yml    |   2 +-
 .../bam_stats_samtools/tests/main.nf.test     | 102 ++++++
 .../tests/main.nf.test.snap                   | 128 ++++++++
 .../nf-core/bam_stats_samtools/tests/tags.yml |   2 +
 64 files changed, 1225 insertions(+), 442 deletions(-)
 create mode 100644 modules/nf-core/samtools/bam2fq/environment.yml
 create mode 100644 modules/nf-core/samtools/bam2fq/tests/main.nf.test
 create mode 100644 modules/nf-core/samtools/bam2fq/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/samtools/bam2fq/tests/nextflow.config
 create mode 100644 modules/nf-core/samtools/bam2fq/tests/tags.yml
 create mode 100644 modules/nf-core/samtools/flagstat/tests/main.nf.test
 create mode 100644 modules/nf-core/samtools/flagstat/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/samtools/flagstat/tests/tags.yml
 create mode 100644 modules/nf-core/samtools/idxstats/tests/main.nf.test
 create mode 100644 modules/nf-core/samtools/idxstats/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/samtools/idxstats/tests/tags.yml
 create mode 100644 modules/nf-core/samtools/index/tests/csi.nextflow.config
 create mode 100644 modules/nf-core/samtools/index/tests/main.nf.test
 create mode 100644 modules/nf-core/samtools/index/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/samtools/index/tests/tags.yml
 create mode 100644 modules/nf-core/umitools/dedup/environment.yml
 create mode 100644 modules/nf-core/umitools/extract/environment.yml
 create mode 100644 modules/nf-core/umitools/extract/tests/main.nf.test
 create mode 100644 modules/nf-core/umitools/extract/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/umitools/extract/tests/nextflow.config
 create mode 100644 modules/nf-core/umitools/extract/tests/tags.yml
 create mode 100644 subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test
 create mode 100644 subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap
 create mode 100644 subworkflows/nf-core/bam_stats_samtools/tests/tags.yml

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 4d09370b..bd134c88 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -27,6 +27,9 @@ If you're not used to this workflow with git, you can start with some [docs from
 
 ## Tests
 
+You can optionally test your changes by running the pipeline locally. Then it is recommended to use the `debug` profile to
+receive warnings about process selectors and other debug info. Example: `nextflow run . -profile debug,test,docker --outdir <OUTDIR>`.
+
 When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests.
 Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then.
 
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index e278390b..3cdbf2b7 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -19,6 +19,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/smrn
 - [ ] If necessary, also make a PR on the nf-core/smrnaseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository.
 - [ ] Make sure your code lints (`nf-core lint`).
 - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir <OUTDIR>`).
+- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir <OUTDIR>`).
 - [ ] Usage Documentation in `docs/usage.md` is updated.
 - [ ] Output Documentation in `docs/output.md` is updated.
 - [ ] `CHANGELOG.md` is updated.
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
index b8bdd214..905c58e4 100644
--- a/.github/workflows/linting.yml
+++ b/.github/workflows/linting.yml
@@ -14,9 +14,9 @@ jobs:
   EditorConfig:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
-      - uses: actions/setup-node@v3
+      - uses: actions/setup-node@v4
 
       - name: Install editorconfig-checker
         run: npm install -g editorconfig-checker
@@ -27,9 +27,9 @@ jobs:
   Prettier:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
-      - uses: actions/setup-node@v3
+      - uses: actions/setup-node@v4
 
       - name: Install Prettier
         run: npm install -g prettier
@@ -40,7 +40,7 @@ jobs:
   PythonBlack:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - name: Check code lints with Black
         uses: psf/black@stable
@@ -71,7 +71,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Check out pipeline code
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Install Nextflow
         uses: nf-core/setup-nextflow@v1
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index 74c7de72..e953a059 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -1,7 +1,5 @@
 report_comment: >
-  This report has been generated by the <a href="https://github.com/nf-core/smrnaseq/releases/tag/dev" target="_blank">nf-core/smrnaseq</a>
-  analysis pipeline. For information about how to interpret these results, please see the
-  <a href="https://nf-co.re/smrnaseq/dev/docs/output" target="_blank">documentation</a>.
+  This report has been generated by the <a href="https://github.com/nf-core/smrnaseq/tree/dev" target="_blank">nf-core/smrnaseq</a> analysis pipeline. For information about how to interpret these results, please see the <a href="https://nf-co.re/smrnaseq/dev/docs/output" target="_blank">documentation</a>.
 report_section_order:
   "nf-core-smrnaseq-methods-description":
     order: -1000
diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy
index 01b8653d..e248e4c3 100755
--- a/lib/NfcoreTemplate.groovy
+++ b/lib/NfcoreTemplate.groovy
@@ -4,6 +4,7 @@
 
 import org.yaml.snakeyaml.Yaml
 import groovy.json.JsonOutput
+import nextflow.extension.FilesEx
 
 class NfcoreTemplate {
 
@@ -141,12 +142,14 @@ class NfcoreTemplate {
             try {
                 if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') }
                 // Try to send HTML e-mail using sendmail
+                def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html")
+                sendmail_tf.withWriter { w -> w << sendmail_html }
                 [ 'sendmail', '-t' ].execute() << sendmail_html
                 log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-"
             } catch (all) {
                 // Catch failures and try with plaintext
                 def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ]
-                if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) {
+                if ( mqc_report != null && mqc_report.size() <= max_multiqc_email_size.toBytes() ) {
                     mail_cmd += [ '-A', mqc_report ]
                 }
                 mail_cmd.execute() << email_html
@@ -155,14 +158,16 @@ class NfcoreTemplate {
         }
 
         // Write summary e-mail HTML to a file
-        def output_d = new File("${params.outdir}/pipeline_info/")
-        if (!output_d.exists()) {
-            output_d.mkdirs()
-        }
-        def output_hf = new File(output_d, "pipeline_report.html")
+        def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html")
         output_hf.withWriter { w -> w << email_html }
-        def output_tf = new File(output_d, "pipeline_report.txt")
+        FilesEx.copyTo(output_hf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.html");
+        output_hf.delete()
+
+        // Write summary e-mail TXT to a file
+        def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt")
         output_tf.withWriter { w -> w << email_txt }
+        FilesEx.copyTo(output_tf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.txt");
+        output_tf.delete()
     }
 
     //
@@ -227,15 +232,14 @@ class NfcoreTemplate {
     // Dump pipeline parameters in a json file
     //
     public static void dump_parameters(workflow, params) {
-        def output_d = new File("${params.outdir}/pipeline_info/")
-        if (!output_d.exists()) {
-            output_d.mkdirs()
-        }
-
         def timestamp  = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
-        def output_pf  = new File(output_d, "params_${timestamp}.json")
+        def filename   = "params_${timestamp}.json"
+        def temp_pf    = new File(workflow.launchDir.toString(), ".${filename}")
         def jsonStr    = JsonOutput.toJson(params)
-        output_pf.text = JsonOutput.prettyPrint(jsonStr)
+        temp_pf.text   = JsonOutput.prettyPrint(jsonStr)
+
+        FilesEx.copyTo(temp_pf.toPath(), "${params.outdir}/pipeline_info/params_${timestamp}.json")
+        temp_pf.delete()
     }
 
     //
diff --git a/modules.json b/modules.json
index 4a236584..56255242 100644
--- a/modules.json
+++ b/modules.json
@@ -12,12 +12,12 @@
                     },
                     "cat/fastq": {
                         "branch": "master",
-                        "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
                         "installed_by": ["modules"]
                     },
                     "custom/dumpsoftwareversions": {
                         "branch": "master",
-                        "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc",
+                        "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93",
                         "installed_by": ["modules"]
                     },
                     "fastp": {
@@ -27,7 +27,7 @@
                     },
                     "fastqc": {
                         "branch": "master",
-                        "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc",
+                        "git_sha": "617777a807a1770f73deb38c80004bac06807eef",
                         "installed_by": ["fastq_fastqc_umitools_fastp", "modules"]
                     },
                     "multiqc": {
@@ -37,42 +37,42 @@
                     },
                     "samtools/bam2fq": {
                         "branch": "master",
-                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
+                        "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
                         "installed_by": ["modules"]
                     },
                     "samtools/flagstat": {
                         "branch": "master",
-                        "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc",
-                        "installed_by": ["modules", "bam_stats_samtools"]
+                        "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
+                        "installed_by": ["bam_stats_samtools", "modules"]
                     },
                     "samtools/idxstats": {
                         "branch": "master",
-                        "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc",
-                        "installed_by": ["modules", "bam_stats_samtools"]
+                        "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
+                        "installed_by": ["bam_stats_samtools", "modules"]
                     },
                     "samtools/index": {
                         "branch": "master",
-                        "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc",
-                        "installed_by": ["modules", "bam_sort_stats_samtools"]
+                        "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
+                        "installed_by": ["bam_sort_stats_samtools", "modules"]
                     },
                     "samtools/sort": {
                         "branch": "master",
-                        "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc",
-                        "installed_by": ["modules", "bam_sort_stats_samtools"]
+                        "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
+                        "installed_by": ["bam_sort_stats_samtools", "modules"]
                     },
                     "samtools/stats": {
                         "branch": "master",
-                        "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc",
-                        "installed_by": ["modules", "bam_stats_samtools"]
+                        "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
+                        "installed_by": ["bam_stats_samtools", "modules"]
                     },
                     "umitools/dedup": {
                         "branch": "master",
-                        "git_sha": "9d489648a7425fa18d8bed18935442c104d8733c",
+                        "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
                         "installed_by": ["modules"]
                     },
                     "umitools/extract": {
                         "branch": "master",
-                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
+                        "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
                         "installed_by": ["fastq_fastqc_umitools_fastp", "modules"]
                     }
                 }
@@ -81,13 +81,13 @@
                 "nf-core": {
                     "bam_sort_stats_samtools": {
                         "branch": "master",
-                        "git_sha": "7c8eeb2b37a6c6d3ffba0aef55ff60c8718c0ba6",
+                        "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
                         "installed_by": ["subworkflows"]
                     },
                     "bam_stats_samtools": {
                         "branch": "master",
-                        "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f",
-                        "installed_by": ["subworkflows", "bam_sort_stats_samtools"]
+                        "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
+                        "installed_by": ["bam_sort_stats_samtools", "subworkflows"]
                     },
                     "fastq_fastqc_umitools_fastp": {
                         "branch": "master",
diff --git a/modules/nf-core/cat/fastq/environment.yml b/modules/nf-core/cat/fastq/environment.yml
index 222b301f..bff93add 100644
--- a/modules/nf-core/cat/fastq/environment.yml
+++ b/modules/nf-core/cat/fastq/environment.yml
@@ -1,3 +1,4 @@
+name: cat_fastq
 channels:
   - conda-forge
   - bioconda
diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf
index b75a2e73..3d963784 100644
--- a/modules/nf-core/cat/fastq/main.nf
+++ b/modules/nf-core/cat/fastq/main.nf
@@ -2,7 +2,7 @@ process CAT_FASTQ {
     tag "$meta.id"
     label 'process_single'
 
-    conda 'modules/nf-core/cat/fastq/environment.yml'
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
         'nf-core/ubuntu:20.04' }"
diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml
index 7ca22161..9b3272bc 100644
--- a/modules/nf-core/custom/dumpsoftwareversions/environment.yml
+++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml
@@ -1,6 +1,7 @@
+name: custom_dumpsoftwareversions
 channels:
   - conda-forge
   - bioconda
   - defaults
 dependencies:
-  - bioconda::multiqc=1.15
+  - bioconda::multiqc=1.19
diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf
index 60a19e0e..f2187611 100644
--- a/modules/nf-core/custom/dumpsoftwareversions/main.nf
+++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf
@@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS {
     label 'process_single'
 
     // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container
-    conda 'modules/nf-core/custom/dumpsoftwareversions/environment.yml'
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' :
-        'biocontainers/multiqc:1.15--pyhdfd78af_0' }"
+        'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' :
+        'biocontainers/multiqc:1.19--pyhdfd78af_0' }"
 
     input:
     path versions
diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml
index 9414c32d..5f15a5fd 100644
--- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml
+++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml
@@ -1,4 +1,4 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
 name: custom_dumpsoftwareversions
 description: Custom module used to dump software versions within the nf-core pipeline template
 keywords:
diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test
index eec1db10..b1e1630b 100644
--- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test
+++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test
@@ -31,7 +31,12 @@ nextflow_process {
         then {
             assertAll(
                 { assert process.success },
-                { assert snapshot(process.out).match() }
+                { assert snapshot(
+                    process.out.versions,
+                    file(process.out.mqc_yml[0]).readLines()[0..10],
+                    file(process.out.yml[0]).readLines()[0..7]
+                    ).match()
+                }
             )
         }
     }
diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap
index 8713b921..5f59a936 100644
--- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap
+++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap
@@ -1,27 +1,33 @@
 {
     "Should run without failures": {
         "content": [
-            {
-                "0": [
-                    "software_versions.yml:md5,a027f820f30b8191a20ca16465daaf37"
-                ],
-                "1": [
-                    "software_versions_mqc.yml:md5,ee4a1d028ad29987f9ac511f4668f17c"
-                ],
-                "2": [
-                    "versions.yml:md5,f47ebd22aba1dd987b7e5d5247b766c3"
-                ],
-                "mqc_yml": [
-                    "software_versions_mqc.yml:md5,ee4a1d028ad29987f9ac511f4668f17c"
-                ],
-                "versions": [
-                    "versions.yml:md5,f47ebd22aba1dd987b7e5d5247b766c3"
-                ],
-                "yml": [
-                    "software_versions.yml:md5,a027f820f30b8191a20ca16465daaf37"
-                ]
-            }
+            [
+                "versions.yml:md5,76d454d92244589d32455833f7c1ba6d"
+            ],
+            [
+                "data: \"<style>\\n#nf-core-versions tbody:nth-child(even) {\\n    background-color: #f2f2f2;\\n\\",
+                "  }\\n</style>\\n<table class=\\\"table\\\" style=\\\"width:100%\\\" id=\\\"nf-core-versions\\\"\\",
+                "  >\\n    <thead>\\n        <tr>\\n            <th> Process Name </th>\\n            <th>\\",
+                "  \\ Software </th>\\n            <th> Version  </th>\\n        </tr>\\n    </thead>\\n\\",
+                "  \\n<tbody>\\n<tr>\\n    <td><samp>CUSTOM_DUMPSOFTWAREVERSIONS</samp></td>\\n    <td><samp>python</samp></td>\\n\\",
+                "  \\    <td><samp>3.11.7</samp></td>\\n</tr>\\n\\n<tr>\\n    <td><samp></samp></td>\\n \\",
+                "  \\   <td><samp>yaml</samp></td>\\n    <td><samp>5.4.1</samp></td>\\n</tr>\\n\\n</tbody>\\n\\",
+                "  <tbody>\\n<tr>\\n    <td><samp>TOOL1</samp></td>\\n    <td><samp>tool1</samp></td>\\n\\",
+                "  \\    <td><samp>0.11.9</samp></td>\\n</tr>\\n\\n</tbody>\\n<tbody>\\n<tr>\\n    <td><samp>TOOL2</samp></td>\\n\\",
+                "  \\    <td><samp>tool2</samp></td>\\n    <td><samp>1.9</samp></td>\\n</tr>\\n\\n</tbody>\\n\\",
+                "  <tbody>\\n<tr>\\n    <td><samp>Workflow</samp></td>\\n    <td><samp>Nextflow</samp></td>\\n\\"
+            ],
+            [
+                "CUSTOM_DUMPSOFTWAREVERSIONS:",
+                "  python: 3.11.7",
+                "  yaml: 5.4.1",
+                "TOOL1:",
+                "  tool1: 0.11.9",
+                "TOOL2:",
+                "  tool2: '1.9'",
+                "Workflow:"
+            ]
         ],
-        "timestamp": "2023-10-11T17:10:02.930699"
+        "timestamp": "2024-01-09T23:01:18.710682"
     }
-}
+}
\ No newline at end of file
diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml
index f52a53a0..1787b38a 100644
--- a/modules/nf-core/fastqc/environment.yml
+++ b/modules/nf-core/fastqc/environment.yml
@@ -1,3 +1,4 @@
+name: fastqc
 channels:
   - conda-forge
   - bioconda
diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf
index 5def8818..9e19a74c 100644
--- a/modules/nf-core/fastqc/main.nf
+++ b/modules/nf-core/fastqc/main.nf
@@ -2,7 +2,7 @@ process FASTQC {
     tag "$meta.id"
     label 'process_medium'
 
-    conda 'modules/nf-core/fastqc/environment.yml'
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' :
         'biocontainers/fastqc:0.12.1--hdfd78af_0' }"
@@ -37,7 +37,7 @@ process FASTQC {
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
+        fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' )
     END_VERSIONS
     """
 
@@ -49,7 +49,7 @@ process FASTQC {
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
+        fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' )
     END_VERSIONS
     """
 }
diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test
index 6437a144..ad9bc54f 100644
--- a/modules/nf-core/fastqc/tests/main.nf.test
+++ b/modules/nf-core/fastqc/tests/main.nf.test
@@ -3,23 +3,21 @@ nextflow_process {
     name "Test Process FASTQC"
     script "../main.nf"
     process "FASTQC"
+
     tag "modules"
     tag "modules_nfcore"
     tag "fastqc"
 
-    test("Single-Read") {
+    test("sarscov2 single-end [fastq]") {
 
         when {
-            params {
-                outdir   = "$outputDir"
-            }
             process {
                 """
                 input[0] = [
-                    [ id: 'test', single_end:true ],
-                    [
-                        file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
-                    ]
+                            [ id: 'test', single_end:true ],
+                            [
+                                file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+                            ]
                 ]
                 """
             }
@@ -28,14 +26,195 @@ nextflow_process {
         then {
             assertAll (
             { assert process.success },
+
             // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it.
             // looks like this: <div id="header_filename">Mon 2 Oct 2023<br/>test.gz</div>
             // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039
-            { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" },
-            { assert path(process.out.html.get(0).get(1)).getText().contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
-            { assert snapshot(process.out.versions).match("versions") },
-            { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" }
+
+            { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
+            { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
+            { assert path(process.out.html[0][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+
+            { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("sarscov2 paired-end [fastq]") {
+
+        when {
+            process {
+            """
+            input[0] = [
+                        [id: 'test', single_end: false], // meta map
+                        [
+                            file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                            file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
+                        ]
+            ]
+            """
+            }
+        }
+
+        then {
+            assertAll (
+            { assert process.success },
+
+            { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" },
+            { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" },
+            { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" },
+            { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" },
+            { assert path(process.out.html[0][1][0]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+            { assert path(process.out.html[0][1][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+
+            { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("sarscov2 interleaved [fastq]") {
+
+        when {
+            process {
+            """
+            input[0] = [
+                        [id: 'test', single_end: false], // meta map
+                        file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true)
+            ]
+            """
+            }
+        }
+
+        then {
+            assertAll (
+            { assert process.success },
+
+            { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
+            { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
+            { assert path(process.out.html[0][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+
+            { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("sarscov2 paired-end [bam]") {
+
+        when {
+            process {
+            """
+            input[0] = [
+                        [id: 'test', single_end: false], // meta map
+                        file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
+            ]
+            """
+            }
+        }
+
+        then {
+            assertAll (
+            { assert process.success },
+
+            { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
+            { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
+            { assert path(process.out.html[0][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+
+            { assert snapshot(process.out.versions).match("versions") }
             )
         }
     }
+
+    test("sarscov2 multiple [fastq]") {
+
+        when {
+            process {
+            """
+            input[0] = [
+                        [id: 'test', single_end: false], // meta map
+                        [
+                            file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                            file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true),
+                            file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true),
+                            file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true)
+                        ]
+            ]
+            """
+            }
+        }
+
+        then {
+            assertAll (
+            { assert process.success },
+
+            { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" },
+            { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" },
+            { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" },
+            { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" },
+            { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" },
+            { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" },
+            { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" },
+            { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" },
+            { assert path(process.out.html[0][1][0]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+            { assert path(process.out.html[0][1][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+            { assert path(process.out.html[0][1][2]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+            { assert path(process.out.html[0][1][3]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+
+            { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("sarscov2 custom_prefix") {
+
+        when {
+            process {
+            """
+            input[0] = [
+                        [ id:'mysample', single_end:true ], // meta map
+                        file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+            ]
+            """
+            }
+        }
+
+        then {
+            assertAll (
+            { assert process.success },
+
+            { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" },
+            { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" },
+            { assert path(process.out.html[0][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+
+            { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("sarscov2 single-end [fastq] - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                            [ id: 'test', single_end:true ],
+                            [
+                                file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+                            ]
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll (
+            { assert process.success },
+            { assert snapshot(process.out.html.collect { file(it[1]).getName() } +
+                                process.out.zip.collect { file(it[1]).getName() } +
+                                process.out.versions ).match() }
+            )
+        }
+    }
+
 }
diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap
index 636a32ce..5ef5afbd 100644
--- a/modules/nf-core/fastqc/tests/main.nf.test.snap
+++ b/modules/nf-core/fastqc/tests/main.nf.test.snap
@@ -1,10 +1,20 @@
 {
+    "sarscov2 single-end [fastq] - stub": {
+        "content": [
+            [
+                "test.html",
+                "test.zip",
+                "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+            ]
+        ],
+        "timestamp": "2023-12-29T02:48:05.126117287"
+    },
     "versions": {
         "content": [
             [
                 "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
             ]
         ],
-        "timestamp": "2023-10-09T23:40:54+0000"
+        "timestamp": "2023-12-29T02:46:49.507942667"
     }
 }
\ No newline at end of file
diff --git a/modules/nf-core/samtools/bam2fq/environment.yml b/modules/nf-core/samtools/bam2fq/environment.yml
new file mode 100644
index 00000000..5297496f
--- /dev/null
+++ b/modules/nf-core/samtools/bam2fq/environment.yml
@@ -0,0 +1,7 @@
+name: samtools_bam2fq
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::samtools=1.18
diff --git a/modules/nf-core/samtools/bam2fq/main.nf b/modules/nf-core/samtools/bam2fq/main.nf
index 9301d1d3..55ffd0cf 100644
--- a/modules/nf-core/samtools/bam2fq/main.nf
+++ b/modules/nf-core/samtools/bam2fq/main.nf
@@ -2,10 +2,10 @@ process SAMTOOLS_BAM2FQ {
     tag "$meta.id"
     label 'process_low'
 
-    conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null)
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' :
-        'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }"
+        'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' :
+        'biocontainers/samtools:1.18--h50ea8bc_1' }"
 
     input:
     tuple val(meta), path(inputbam)
diff --git a/modules/nf-core/samtools/bam2fq/meta.yml b/modules/nf-core/samtools/bam2fq/meta.yml
index 319a60cf..7769046b 100644
--- a/modules/nf-core/samtools/bam2fq/meta.yml
+++ b/modules/nf-core/samtools/bam2fq/meta.yml
@@ -9,12 +9,8 @@ keywords:
 tools:
   - samtools:
       description: Tools for dealing with SAM, BAM and CRAM files
-      homepage: None
       documentation: http://www.htslib.org/doc/1.1/samtools.html
-      tool_dev_url: None
-      doi: ""
       licence: ["MIT"]
-
 input:
   - meta:
       type: map
@@ -33,7 +29,6 @@ input:
         Note: choosing TRUE will generate 4 different files.
         Choosing FALSE will produce a single file, which will be interleaved in case
         the input contains paired reads.
-
 output:
   - meta:
       type: map
@@ -50,6 +45,7 @@ output:
         FASTQ files, which will be either a group of 4 files (read_1, read_2, other and singleton)
         or a single interleaved .fq.gz file if the user chooses not to split the reads.
       pattern: "*.fq.gz"
-
 authors:
   - "@lescai"
+maintainers:
+  - "@lescai"
diff --git a/modules/nf-core/samtools/bam2fq/tests/main.nf.test b/modules/nf-core/samtools/bam2fq/tests/main.nf.test
new file mode 100644
index 00000000..cd65abbe
--- /dev/null
+++ b/modules/nf-core/samtools/bam2fq/tests/main.nf.test
@@ -0,0 +1,71 @@
+nextflow_process {
+
+    name "Test Process SAMTOOLS_BAM2FQ"
+    script "../main.nf"
+    process "SAMTOOLS_BAM2FQ"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "samtools"
+    tag "samtools/bam2fq"
+
+    config "./nextflow.config"
+
+    test("homo_sapiens - bam, false") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    file(params.test_data['homo_sapiens']['illumina']['test_paired_end_umi_converted_bam'], checkIfExists: true)
+                ]
+                input[1] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    path(process.out.reads[0][1]).linesGzip[0..6],
+                    process.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+    test("homo_sapiens - bam, true") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    file(params.test_data['homo_sapiens']['illumina']['test_paired_end_umi_converted_bam'], checkIfExists: true)
+                ]
+                input[1] = true
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out.reads[0][1].collect{
+                        if(it ==~ /.*(other|singleton)\.fq\.gz$/) {
+                            return file(it).name
+                        }
+                        return path(it).linesGzip[0..6]
+                    },
+                    process.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/samtools/bam2fq/tests/main.nf.test.snap b/modules/nf-core/samtools/bam2fq/tests/main.nf.test.snap
new file mode 100644
index 00000000..1f824503
--- /dev/null
+++ b/modules/nf-core/samtools/bam2fq/tests/main.nf.test.snap
@@ -0,0 +1,49 @@
+{
+    "homo_sapiens - bam, false": {
+        "content": [
+            [
+                "@922332/1\tRX:Z:ATTTCAG-TATTATT",
+                "GAGAGGATCTCGTGTAGAAATTGCTTTGAGCTGTTCTTTGTCATTTTCCCTTAATTCATTGTCTCTAGCTAGTCTGTTACTCTGTAAAATAAAATAATAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTTAAGGTCAGTG",
+                "+",
+                "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE<EEEEEEEEEEEAEAEE6AAEEEEE/EAAAA<AEEEEAAEEAAAA<EEE/",
+                "@922332/2\tRX:Z:ATTTCAG-TATTATT",
+                "TATTTTACAGAGTAACAGACTAGCTAGAGACAATGAATTAAGGGAAAATGACAAAGAACAGCTCAAAGCAATTTCTACACGAGATCCTCTCTCTGAAATAGATCGGAAGAGCACACGTCTGAACTCCAGTCACGAACCGCGAT",
+                "+"
+            ],
+            [
+                "versions.yml:md5,0b88ba6888f9730c167242c0c4b3d04b"
+            ]
+        ],
+        "timestamp": "2023-12-04T13:40:52.124056991"
+    },
+    "homo_sapiens - bam, true": {
+        "content": [
+            [
+                [
+                    "@922332\tRX:Z:ATTTCAG-TATTATT",
+                    "GAGAGGATCTCGTGTAGAAATTGCTTTGAGCTGTTCTTTGTCATTTTCCCTTAATTCATTGTCTCTAGCTAGTCTGTTACTCTGTAAAATAAAATAATAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTTAAGGTCAGTG",
+                    "+",
+                    "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE<EEEEEEEEEEEAEAEE6AAEEEEE/EAAAA<AEEEEAAEEAAAA<EEE/",
+                    "@928177\tRX:Z:ACATAAA-TGAGATT",
+                    "AAAAGTATATAAGTAATACATATTTATAAATCTATTAAGAAAGCAAGTAATATGTACCTTAAGAATTTAATGGGAAAATAATTAGACTTACTTTAAATGCCAAAAGAAAAAGTGCCCAATCCTTTGATTAGTCAATGCTTTCT",
+                    "+"
+                ],
+                [
+                    "@922332\tRX:Z:ATTTCAG-TATTATT",
+                    "TATTTTACAGAGTAACAGACTAGCTAGAGACAATGAATTAAGGGAAAATGACAAAGAACAGCTCAAAGCAATTTCTACACGAGATCCTCTCTCTGAAATAGATCGGAAGAGCACACGTCTGAACTCCAGTCACGAACCGCGAT",
+                    "+",
+                    "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEA<AEEE<<<<AAEEEEEEEEEEA<EEEAEAE//A<AAE<6",
+                    "@928177\tRX:Z:ACATAAA-TGAGATT",
+                    "TTACTGAAGAAAGCATTGACTAATCAAAGGATTGGGCACTTTTTCTTTTGGCATTTAAAGTAAGTCTAATTATTTTCCCATTAAATTCTTAAGGTACATATTACTTGCTTTCTTAATAGATTTATAAATATGTATTACTTATA",
+                    "+"
+                ],
+                "test_other.fq.gz",
+                "test_singleton.fq.gz"
+            ],
+            [
+                "versions.yml:md5,0b88ba6888f9730c167242c0c4b3d04b"
+            ]
+        ],
+        "timestamp": "2023-12-04T13:45:59.949456034"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/samtools/bam2fq/tests/nextflow.config b/modules/nf-core/samtools/bam2fq/tests/nextflow.config
new file mode 100644
index 00000000..dae4e48b
--- /dev/null
+++ b/modules/nf-core/samtools/bam2fq/tests/nextflow.config
@@ -0,0 +1,3 @@
+process {
+    ext.args = '-T RX'
+}
\ No newline at end of file
diff --git a/modules/nf-core/samtools/bam2fq/tests/tags.yml b/modules/nf-core/samtools/bam2fq/tests/tags.yml
new file mode 100644
index 00000000..2fbf4a1f
--- /dev/null
+++ b/modules/nf-core/samtools/bam2fq/tests/tags.yml
@@ -0,0 +1,2 @@
+samtools/bam2fq:
+  - "modules/nf-core/samtools/bam2fq/**"
diff --git a/modules/nf-core/samtools/flagstat/environment.yml b/modules/nf-core/samtools/flagstat/environment.yml
index 04c82f14..5efae053 100644
--- a/modules/nf-core/samtools/flagstat/environment.yml
+++ b/modules/nf-core/samtools/flagstat/environment.yml
@@ -1,6 +1,7 @@
+name: samtools_flagstat
 channels:
   - conda-forge
   - bioconda
   - defaults
 dependencies:
-  - bioconda::samtools=1.17
+  - bioconda::samtools=1.18
diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf
index b289d151..f1893d7c 100644
--- a/modules/nf-core/samtools/flagstat/main.nf
+++ b/modules/nf-core/samtools/flagstat/main.nf
@@ -2,10 +2,10 @@ process SAMTOOLS_FLAGSTAT {
     tag "$meta.id"
     label 'process_single'
 
-    conda 'modules/nf-core/samtools/flagstat/environment.yml'
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' :
-        'biocontainers/samtools:1.17--h00cdaf9_0' }"
+        'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' :
+        'biocontainers/samtools:1.18--h50ea8bc_1' }"
 
     input:
     tuple val(meta), path(bam), path(bai)
diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test b/modules/nf-core/samtools/flagstat/tests/main.nf.test
new file mode 100644
index 00000000..c8dd8dc9
--- /dev/null
+++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test
@@ -0,0 +1,36 @@
+nextflow_process {
+
+    name "Test Process SAMTOOLS_FLAGSTAT"
+    script "../main.nf"
+    process "SAMTOOLS_FLAGSTAT"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "samtools"
+    tag "samtools/flagstat"
+
+    test("BAM") {
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
+                    file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(process.out.flagstat).match() },
+                { assert path(process.out.versions.get(0)).getText().contains("samtools") }
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap
new file mode 100644
index 00000000..880019f2
--- /dev/null
+++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap
@@ -0,0 +1,16 @@
+{
+    "BAM": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783"
+                ]
+            ]
+        ],
+        "timestamp": "2023-11-14T15:49:22.577133"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/samtools/flagstat/tests/tags.yml b/modules/nf-core/samtools/flagstat/tests/tags.yml
new file mode 100644
index 00000000..2d2b7255
--- /dev/null
+++ b/modules/nf-core/samtools/flagstat/tests/tags.yml
@@ -0,0 +1,2 @@
+samtools/flagstat:
+  - modules/nf-core/samtools/flagstat/**
diff --git a/modules/nf-core/samtools/idxstats/environment.yml b/modules/nf-core/samtools/idxstats/environment.yml
index 04c82f14..2401db0f 100644
--- a/modules/nf-core/samtools/idxstats/environment.yml
+++ b/modules/nf-core/samtools/idxstats/environment.yml
@@ -1,6 +1,7 @@
+name: samtools_idxstats
 channels:
   - conda-forge
   - bioconda
   - defaults
 dependencies:
-  - bioconda::samtools=1.17
+  - bioconda::samtools=1.18
diff --git a/modules/nf-core/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf
index 97217419..00d916bb 100644
--- a/modules/nf-core/samtools/idxstats/main.nf
+++ b/modules/nf-core/samtools/idxstats/main.nf
@@ -2,10 +2,10 @@ process SAMTOOLS_IDXSTATS {
     tag "$meta.id"
     label 'process_single'
 
-    conda 'modules/nf-core/samtools/idxstats/environment.yml'
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' :
-        'biocontainers/samtools:1.17--h00cdaf9_0' }"
+        'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' :
+        'biocontainers/samtools:1.18--h50ea8bc_1' }"
 
     input:
     tuple val(meta), path(bam), path(bai)
diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test b/modules/nf-core/samtools/idxstats/tests/main.nf.test
new file mode 100644
index 00000000..f6c92150
--- /dev/null
+++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test
@@ -0,0 +1,36 @@
+nextflow_process {
+
+    name "Test Process SAMTOOLS_IDXSTATS"
+    script "../main.nf"
+    process "SAMTOOLS_IDXSTATS"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "samtools"
+    tag "samtools/idxstats"
+
+    test("BAM") {
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
+                    file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(process.out.idxstats).match() },
+                { assert path(process.out.versions.get(0)).getText().contains("samtools") }
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap
new file mode 100644
index 00000000..4c6c12bd
--- /dev/null
+++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap
@@ -0,0 +1,16 @@
+{
+    "BAM": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2"
+                ]
+            ]
+        ],
+        "timestamp": "2023-11-14T15:52:19.875194"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/samtools/idxstats/tests/tags.yml b/modules/nf-core/samtools/idxstats/tests/tags.yml
new file mode 100644
index 00000000..d3057c61
--- /dev/null
+++ b/modules/nf-core/samtools/idxstats/tests/tags.yml
@@ -0,0 +1,2 @@
+samtools/idxstats:
+  - modules/nf-core/samtools/idxstats/**
diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml
index 04c82f14..296ed99e 100644
--- a/modules/nf-core/samtools/index/environment.yml
+++ b/modules/nf-core/samtools/index/environment.yml
@@ -1,6 +1,7 @@
+name: samtools_index
 channels:
   - conda-forge
   - bioconda
   - defaults
 dependencies:
-  - bioconda::samtools=1.17
+  - bioconda::samtools=1.18
diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf
index af3dbc4c..8ad18fdc 100644
--- a/modules/nf-core/samtools/index/main.nf
+++ b/modules/nf-core/samtools/index/main.nf
@@ -2,10 +2,10 @@ process SAMTOOLS_INDEX {
     tag "$meta.id"
     label 'process_low'
 
-    conda 'modules/nf-core/samtools/index/environment.yml'
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' :
-        'biocontainers/samtools:1.17--h00cdaf9_0' }"
+        'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' :
+        'biocontainers/samtools:1.18--h50ea8bc_1' }"
 
     input:
     tuple val(meta), path(input)
diff --git a/modules/nf-core/samtools/index/tests/csi.nextflow.config b/modules/nf-core/samtools/index/tests/csi.nextflow.config
new file mode 100644
index 00000000..0ed260ef
--- /dev/null
+++ b/modules/nf-core/samtools/index/tests/csi.nextflow.config
@@ -0,0 +1,7 @@
+process {
+
+    withName: SAMTOOLS_INDEX {
+        ext.args = '-c'
+    }
+
+}
diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test
new file mode 100644
index 00000000..c76a9169
--- /dev/null
+++ b/modules/nf-core/samtools/index/tests/main.nf.test
@@ -0,0 +1,87 @@
+nextflow_process {
+
+    name "Test Process SAMTOOLS_INDEX"
+    script "../main.nf"
+    process "SAMTOOLS_INDEX"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "samtools"
+    tag "samtools/index"
+
+    test("sarscov2 [BAI]") {
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(process.out.bai).match("bai") },
+                { assert path(process.out.versions.get(0)).getText().contains("samtools") }
+            )
+        }
+    }
+
+    test("homo_sapiens [CRAI]") {
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(process.out.crai).match("crai") },
+                { assert path(process.out.versions.get(0)).getText().contains("samtools") }
+            )
+        }
+    }
+
+    test("homo_sapiens [CSI]") {
+
+        config "./csi.nextflow.config"
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert path(process.out.csi.get(0).get(1)).exists() },
+                { assert path(process.out.versions.get(0)).getText().contains("samtools") }
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap
new file mode 100644
index 00000000..b3baee7f
--- /dev/null
+++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap
@@ -0,0 +1,28 @@
+{
+    "crai": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029"
+                ]
+            ]
+        ],
+        "timestamp": "2023-11-15T15:17:37.30801"
+    },
+    "bai": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4"
+                ]
+            ]
+        ],
+        "timestamp": "2023-11-15T15:17:30.869234"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/samtools/index/tests/tags.yml b/modules/nf-core/samtools/index/tests/tags.yml
new file mode 100644
index 00000000..e0f58a7a
--- /dev/null
+++ b/modules/nf-core/samtools/index/tests/tags.yml
@@ -0,0 +1,2 @@
+samtools/index:
+  - modules/nf-core/samtools/index/**
diff --git a/modules/nf-core/samtools/sort/environment.yml b/modules/nf-core/samtools/sort/environment.yml
index 04c82f14..cd50868c 100644
--- a/modules/nf-core/samtools/sort/environment.yml
+++ b/modules/nf-core/samtools/sort/environment.yml
@@ -1,6 +1,7 @@
+name: samtools_sort
 channels:
   - conda-forge
   - bioconda
   - defaults
 dependencies:
-  - bioconda::samtools=1.17
+  - bioconda::samtools=1.18
diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf
index 77256702..4a666d42 100644
--- a/modules/nf-core/samtools/sort/main.nf
+++ b/modules/nf-core/samtools/sort/main.nf
@@ -2,10 +2,10 @@ process SAMTOOLS_SORT {
     tag "$meta.id"
     label 'process_medium'
 
-    conda 'modules/nf-core/samtools/sort/environment.yml'
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' :
-        'biocontainers/samtools:1.17--h00cdaf9_0' }"
+        'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' :
+        'biocontainers/samtools:1.18--h50ea8bc_1' }"
 
     input:
     tuple val(meta), path(bam)
diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test b/modules/nf-core/samtools/sort/tests/main.nf.test
index 1f72f3b9..abb80978 100644
--- a/modules/nf-core/samtools/sort/tests/main.nf.test
+++ b/modules/nf-core/samtools/sort/tests/main.nf.test
@@ -61,7 +61,10 @@ nextflow_process {
         then {
             assertAll (
                 { assert process.success },
-                { assert snapshot(process.out).match() }
+                { assert snapshot(
+                    file(process.out.bam[0][1]).name,
+                    process.out.versions
+                ).match() }
             )
         }
 
diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/modules/nf-core/samtools/sort/tests/main.nf.test.snap
index a43566da..ff722259 100644
--- a/modules/nf-core/samtools/sort/tests/main.nf.test.snap
+++ b/modules/nf-core/samtools/sort/tests/main.nf.test.snap
@@ -8,14 +8,14 @@
                             "id": "test",
                             "single_end": false
                         },
-                        "test.sorted.bam:md5,a29570e7607d217c2fa4d75829e09cd7"
+                        "test.sorted.bam:md5,ea6a0fef94eb534e901f107a05a33a06"
                     ]
                 ],
                 "1": [
                     
                 ],
                 "2": [
-                    "versions.yml:md5,46f7a36082fa1f68285fe30d689244e8"
+                    "versions.yml:md5,33b6a403dc19a0d28e4219ccab0a1d80"
                 ],
                 "bam": [
                     [
@@ -23,17 +23,26 @@
                             "id": "test",
                             "single_end": false
                         },
-                        "test.sorted.bam:md5,a29570e7607d217c2fa4d75829e09cd7"
+                        "test.sorted.bam:md5,ea6a0fef94eb534e901f107a05a33a06"
                     ]
                 ],
                 "csi": [
                     
                 ],
                 "versions": [
-                    "versions.yml:md5,46f7a36082fa1f68285fe30d689244e8"
+                    "versions.yml:md5,33b6a403dc19a0d28e4219ccab0a1d80"
                 ]
             }
         ],
-        "timestamp": "2023-10-17T17:21:46.5427968"
+        "timestamp": "2023-12-04T11:11:22.005628301"
+    },
+    "test_samtools_sort_stub": {
+        "content": [
+            "test.sorted.bam",
+            [
+                "versions.yml:md5,33b6a403dc19a0d28e4219ccab0a1d80"
+            ]
+        ],
+        "timestamp": "2023-12-04T17:47:22.314445935"
     }
 }
\ No newline at end of file
diff --git a/modules/nf-core/samtools/stats/environment.yml b/modules/nf-core/samtools/stats/environment.yml
index 04c82f14..b89ce647 100644
--- a/modules/nf-core/samtools/stats/environment.yml
+++ b/modules/nf-core/samtools/stats/environment.yml
@@ -1,6 +1,7 @@
+name: samtools_stats
 channels:
   - conda-forge
   - bioconda
   - defaults
 dependencies:
-  - bioconda::samtools=1.17
+  - bioconda::samtools=1.18
diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf
index fe30bf89..7539140a 100644
--- a/modules/nf-core/samtools/stats/main.nf
+++ b/modules/nf-core/samtools/stats/main.nf
@@ -2,10 +2,10 @@ process SAMTOOLS_STATS {
     tag "$meta.id"
     label 'process_single'
 
-    conda 'modules/nf-core/samtools/stats/environment.yml'
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' :
-        'biocontainers/samtools:1.17--h00cdaf9_0' }"
+        'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' :
+        'biocontainers/samtools:1.18--h50ea8bc_1' }"
 
     input:
     tuple val(meta), path(input), path(input_index)
diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test b/modules/nf-core/samtools/stats/tests/main.nf.test
index e037132c..20c3efe1 100644
--- a/modules/nf-core/samtools/stats/tests/main.nf.test
+++ b/modules/nf-core/samtools/stats/tests/main.nf.test
@@ -4,7 +4,7 @@ nextflow_process {
     script "../main.nf"
     process "SAMTOOLS_STATS"
     tag "modules"
-    tag "modules/nf-core"
+    tag "modules_nfcore"
     tag "samtools"
     tag "samtools/stats"
 
diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test.snap b/modules/nf-core/samtools/stats/tests/main.nf.test.snap
index 516b2b01..025c83a5 100644
--- a/modules/nf-core/samtools/stats/tests/main.nf.test.snap
+++ b/modules/nf-core/samtools/stats/tests/main.nf.test.snap
@@ -8,11 +8,11 @@
                             "id": "test",
                             "single_end": false
                         },
-                        "test.stats:md5,6e768486d5df0257351c5419a79f9c9b"
+                        "test.stats:md5,045a48208b1c6f5b8af4347fe31f4def"
                     ]
                 ],
                 "1": [
-                    "versions.yml:md5,08035f3409d934d47a416150884bb0df"
+                    "versions.yml:md5,650a365c6635001436008350ae83337c"
                 ],
                 "stats": [
                     [
@@ -20,15 +20,15 @@
                             "id": "test",
                             "single_end": false
                         },
-                        "test.stats:md5,6e768486d5df0257351c5419a79f9c9b"
+                        "test.stats:md5,045a48208b1c6f5b8af4347fe31f4def"
                     ]
                 ],
                 "versions": [
-                    "versions.yml:md5,08035f3409d934d47a416150884bb0df"
+                    "versions.yml:md5,650a365c6635001436008350ae83337c"
                 ]
             }
         ],
-        "timestamp": "2023-10-18T12:12:42.998746"
+        "timestamp": "2023-12-04T11:07:28.26821485"
     },
     "SAMTOOLS CRAM Should run without failures": {
         "content": [
@@ -39,11 +39,11 @@
                             "id": "test",
                             "single_end": false
                         },
-                        "test.stats:md5,7c9ee5747793cceb9d6f4d733345641a"
+                        "test.stats:md5,dfbfa130d4a6925ddd1931dcd8354a43"
                     ]
                 ],
                 "1": [
-                    "versions.yml:md5,08035f3409d934d47a416150884bb0df"
+                    "versions.yml:md5,650a365c6635001436008350ae83337c"
                 ],
                 "stats": [
                     [
@@ -51,14 +51,14 @@
                             "id": "test",
                             "single_end": false
                         },
-                        "test.stats:md5,7c9ee5747793cceb9d6f4d733345641a"
+                        "test.stats:md5,dfbfa130d4a6925ddd1931dcd8354a43"
                     ]
                 ],
                 "versions": [
-                    "versions.yml:md5,08035f3409d934d47a416150884bb0df"
+                    "versions.yml:md5,650a365c6635001436008350ae83337c"
                 ]
             }
         ],
-        "timestamp": "2023-10-18T12:13:30.747222"
+        "timestamp": "2023-12-04T11:07:50.356233402"
     }
 }
\ No newline at end of file
diff --git a/modules/nf-core/umitools/dedup/environment.yml b/modules/nf-core/umitools/dedup/environment.yml
new file mode 100644
index 00000000..f443735f
--- /dev/null
+++ b/modules/nf-core/umitools/dedup/environment.yml
@@ -0,0 +1,7 @@
+name: umitools_dedup
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::umi_tools=1.1.4
diff --git a/modules/nf-core/umitools/dedup/main.nf b/modules/nf-core/umitools/dedup/main.nf
index dfcbcf2f..64ab8f98 100644
--- a/modules/nf-core/umitools/dedup/main.nf
+++ b/modules/nf-core/umitools/dedup/main.nf
@@ -2,19 +2,21 @@ process UMITOOLS_DEDUP {
     tag "$meta.id"
     label "process_medium"
 
-    conda (params.enable_conda ? "bioconda::umi_tools=1.1.2" : null)
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/umi_tools:1.1.2--py38h4a8c8d9_0' :
-        'quay.io/biocontainers/umi_tools:1.1.2--py38h4a8c8d9_0' }"
+        'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' :
+        'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }"
 
     input:
     tuple val(meta), path(bam), path(bai)
+    val get_output_stats
 
     output:
-    tuple val(meta), path("*.bam")             , emit: bam
-    tuple val(meta), path("*edit_distance.tsv"), emit: tsv_edit_distance
-    tuple val(meta), path("*per_umi.tsv")      , emit: tsv_per_umi
-    tuple val(meta), path("*per_position.tsv") , emit: tsv_umi_per_position
+    tuple val(meta), path("${prefix}.bam")     , emit: bam
+    tuple val(meta), path("*.log")             , emit: log
+    tuple val(meta), path("*edit_distance.tsv"), optional:true, emit: tsv_edit_distance
+    tuple val(meta), path("*per_umi.tsv")      , optional:true, emit: tsv_per_umi
+    tuple val(meta), path("*per_position.tsv") , optional:true, emit: tsv_umi_per_position
     path  "versions.yml"                       , emit: versions
 
     when:
@@ -22,20 +24,39 @@ process UMITOOLS_DEDUP {
 
     script:
     def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
+    prefix = task.ext.prefix ?: "${meta.id}"
     def paired = meta.single_end ? "" : "--paired"
+    stats = get_output_stats ? "--output-stats ${prefix}" : ""
+    if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!"
+
+    if (!(args ==~ /.*--random-seed.*/)) {args += " --random-seed=100"}
     """
-    umi_tools \\
+    PYTHONHASHSEED=0 umi_tools \\
         dedup \\
         -I $bam \\
         -S ${prefix}.bam \\
-        --output-stats $prefix \\
+        -L ${prefix}.log \\
+        $stats \\
         $paired \\
         $args
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
+        umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' )
+    END_VERSIONS
+    """
+
+    stub:
+    """
+    touch ${prefix}.bam
+    touch ${prefix}.log
+    touch ${prefix}_edit_distance.tsv
+    touch ${prefix}_per_umi.tsv
+    touch ${prefix}_per_position.tsv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' )
     END_VERSIONS
     """
 }
diff --git a/modules/nf-core/umitools/dedup/meta.yml b/modules/nf-core/umitools/dedup/meta.yml
index eee8952f..38d3fd46 100644
--- a/modules/nf-core/umitools/dedup/meta.yml
+++ b/modules/nf-core/umitools/dedup/meta.yml
@@ -3,57 +3,69 @@ description: Deduplicate reads based on the mapping co-ordinate and the UMI atta
 keywords:
   - umitools
   - deduplication
+  - dedup
 tools:
   - umi_tools:
-    description: >
-      UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs)
-      and single cell RNA-Seq cell barcodes
-    documentation: https://umi-tools.readthedocs.io/en/latest/
-    license: ["MIT"]
+      description: >
+        UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes
+
+      documentation: https://umi-tools.readthedocs.io/en/latest/
+      license: ["MIT"]
 input:
   - meta:
-    type: map
-    description: |
-      Groovy Map containing sample information
+      type: map
+      description: |
+        Groovy Map containing sample information
         e.g. [ id:'test', single_end:false ]
   - bam:
-    type: file
-    description: |
-      BAM file containing reads to be deduplicated via UMIs.
-    pattern: "*.{bam}"
+      type: file
+      description: |
+        BAM file containing reads to be deduplicated via UMIs.
+      pattern: "*.{bam}"
   - bai:
-    type: file
-    description: |
-      BAM index files corresponding to the input BAM file.
-    pattern: "*.{bai}"
+      type: file
+      description: |
+        BAM index files corresponding to the input BAM file.
+      pattern: "*.{bai}"
+  - get_output_stats:
+      type: boolean
+      description: |
+        Whether or not to generate output stats.
 output:
   - meta:
-    type: map
-    description: |
-      Groovy Map containing sample information
+      type: map
+      description: |
+        Groovy Map containing sample information
         e.g. [ id:'test', single_end:false ]
   - bam:
-    type: file
-    description: BAM file with deduplicated UMIs.
-    pattern: "*.{bam}"
+      type: file
+      description: BAM file with deduplicated UMIs.
+      pattern: "*.{bam}"
+  - log:
+      type: file
+      description: File with logging information
+      pattern: "*.{log}"
   - tsv_edit_distance:
-    type: file
-    description: Reports the (binned) average edit distance between the UMIs at each position.
-    pattern: "*edit_distance.tsv"
+      type: file
+      description: Reports the (binned) average edit distance between the UMIs at each position.
+      pattern: "*edit_distance.tsv"
   - tsv_per_umi:
-    type: file
-    description: UMI-level summary statistics.
-    pattern: "*per_umi.tsv"
+      type: file
+      description: UMI-level summary statistics.
+      pattern: "*per_umi.tsv"
   - tsv_umi_per_position:
-    type: file
-    description: Tabulates the counts for unique combinations of UMI and position.
-    pattern: "*per_position.tsv"
+      type: file
+      description: Tabulates the counts for unique combinations of UMI and position.
+      pattern: "*per_position.tsv"
   - versions:
-    type: file
-    description: File containing software versions
-    pattern: "versions.yml"
-
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
 authors:
   - "@drpatelh"
   - "@grst"
   - "@klkeys"
+maintainers:
+  - "@drpatelh"
+  - "@grst"
+  - "@klkeys"
diff --git a/modules/nf-core/umitools/extract/environment.yml b/modules/nf-core/umitools/extract/environment.yml
new file mode 100644
index 00000000..7d08ac0e
--- /dev/null
+++ b/modules/nf-core/umitools/extract/environment.yml
@@ -0,0 +1,7 @@
+name: umitools_extract
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::umi_tools=1.1.4
diff --git a/modules/nf-core/umitools/extract/main.nf b/modules/nf-core/umitools/extract/main.nf
index 22a405b9..4bd79e79 100644
--- a/modules/nf-core/umitools/extract/main.nf
+++ b/modules/nf-core/umitools/extract/main.nf
@@ -1,11 +1,12 @@
 process UMITOOLS_EXTRACT {
     tag "$meta.id"
-    label "process_low"
+    label "process_single"
+    label "process_long"
 
-    conda (params.enable_conda ? "bioconda::umi_tools=1.1.2" : null)
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/umi_tools:1.1.2--py38h4a8c8d9_0' :
-        'quay.io/biocontainers/umi_tools:1.1.2--py38h4a8c8d9_0' }"
+        'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' :
+        'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }"
 
     input:
     tuple val(meta), path(reads)
@@ -32,7 +33,7 @@ process UMITOOLS_EXTRACT {
 
         cat <<-END_VERSIONS > versions.yml
         "${task.process}":
-            umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
+            umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' )
         END_VERSIONS
         """
     }  else {
@@ -48,7 +49,7 @@ process UMITOOLS_EXTRACT {
 
         cat <<-END_VERSIONS > versions.yml
         "${task.process}":
-            umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
+            umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' )
         END_VERSIONS
         """
     }
diff --git a/modules/nf-core/umitools/extract/meta.yml b/modules/nf-core/umitools/extract/meta.yml
index 7fc23f72..7695b271 100644
--- a/modules/nf-core/umitools/extract/meta.yml
+++ b/modules/nf-core/umitools/extract/meta.yml
@@ -1,47 +1,48 @@
 name: umitools_extract
 description: Extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place
 keywords:
-  - umitools
+  - UMI
+  - barcode
   - extract
+  - umitools
 tools:
   - umi_tools:
-    description: >
-      UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs)
-      and single cell RNA-Seq cell barcodes
-    documentation: https://umi-tools.readthedocs.io/en/latest/
-    license: ["MIT"]
+      description: >
+        UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes
+      documentation: https://umi-tools.readthedocs.io/en/latest/
+      license: "MIT"
 input:
   - meta:
-    type: map
-    description: |
-      Groovy Map containing sample information
+      type: map
+      description: |
+        Groovy Map containing sample information
         e.g. [ id:'test', single_end:false ]
   - reads:
-    type: list
-    description: |
-      List of input FASTQ files whose UMIs will be extracted.
+      type: list
+      description: |
+        List of input FASTQ files whose UMIs will be extracted.
 output:
   - meta:
-    type: map
-    description: |
-      Groovy Map containing sample information
+      type: map
+      description: |
+        Groovy Map containing sample information
         e.g. [ id:'test', single_end:false ]
   - reads:
-    type: file
-    description: >
-      Extracted FASTQ files. |
-      For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. |
-        For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz.
-    pattern: "*.{fastq.gz}"
+      type: file
+      description: >
+        Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz.
+      pattern: "*.{fastq.gz}"
   - log:
-    type: file
-    description: Logfile for umi_tools
-    pattern: "*.{log}"
+      type: file
+      description: Logfile for umi_tools
+      pattern: "*.{log}"
   - versions:
-    type: file
-    description: File containing software versions
-    pattern: "versions.yml"
-
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
 authors:
   - "@drpatelh"
   - "@grst"
+maintainers:
+  - "@drpatelh"
+  - "@grst"
diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test b/modules/nf-core/umitools/extract/tests/main.nf.test
new file mode 100644
index 00000000..22242d1d
--- /dev/null
+++ b/modules/nf-core/umitools/extract/tests/main.nf.test
@@ -0,0 +1,35 @@
+nextflow_process {
+
+    name "Test Process UMITOOLS_EXTRACT"
+    script "../main.nf"
+    process "UMITOOLS_EXTRACT"
+    config "./nextflow.config"
+    tag "modules_nfcore"
+    tag "modules"
+    tag "umitools"
+    tag "umitools/extract"
+
+    test("Should run without failures") {
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] = [ [ id:'test', single_end:true ], // meta map
+              [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+            ]
+                """
+            }
+        }
+
+        then {
+            assertAll (
+            { assert process.success },
+            { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test.snap b/modules/nf-core/umitools/extract/tests/main.nf.test.snap
new file mode 100644
index 00000000..6d5944f1
--- /dev/null
+++ b/modules/nf-core/umitools/extract/tests/main.nf.test.snap
@@ -0,0 +1,10 @@
+{
+    "versions": {
+        "content": [
+            [
+                "versions.yml:md5,5a18da2d3a5a4de15e7aaae9082d7abb"
+            ]
+        ],
+        "timestamp": "2023-12-08T09:41:43.540658352"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/umitools/extract/tests/nextflow.config b/modules/nf-core/umitools/extract/tests/nextflow.config
new file mode 100644
index 00000000..c866f5a0
--- /dev/null
+++ b/modules/nf-core/umitools/extract/tests/nextflow.config
@@ -0,0 +1,9 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+    
+    withName: UMITOOLS_EXTRACT {
+        ext.args = '--bc-pattern="NNNN"'
+    }
+
+}
diff --git a/modules/nf-core/umitools/extract/tests/tags.yml b/modules/nf-core/umitools/extract/tests/tags.yml
new file mode 100644
index 00000000..c3fb23de
--- /dev/null
+++ b/modules/nf-core/umitools/extract/tests/tags.yml
@@ -0,0 +1,2 @@
+umitools/extract:
+  - modules/nf-core/umitools/extract/**
diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test
index a8a13f2a..59b749d8 100644
--- a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test
+++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test
@@ -5,7 +5,9 @@ nextflow_workflow {
     workflow "BAM_SORT_STATS_SAMTOOLS"
     tag "subworkflows"
     tag "subworkflows_nfcore"
+    tag "subworkflows/bam_sort_stats_samtools"
     tag "bam_sort_stats_samtools"
+    tag "subworkflows/bam_stats_samtools"
     tag "bam_stats_samtools"
     tag "samtools"
     tag "samtools/index"
@@ -35,7 +37,11 @@ nextflow_workflow {
         then {
             assertAll(
                 { assert workflow.success},
-                { assert snapshot(workflow.out).match()}
+                { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"},
+                { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"},
+                { assert snapshot(workflow.out.stats).match("test_bam_sort_stats_samtools_single_end_stats") },
+                { assert snapshot(workflow.out.flagstat).match("test_bam_sort_stats_samtools_single_end_flagstats") },
+                { assert snapshot(workflow.out.idxstats).match("test_bam_sort_stats_samtools_single_end_idxstats") }
             )
         }
     }
@@ -61,7 +67,11 @@ nextflow_workflow {
         then {
             assertAll(
                 { assert workflow.success},
-                { assert snapshot(workflow.out).match()}
+                { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"},
+                { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"},
+                { assert snapshot(workflow.out.stats).match("test_bam_sort_stats_samtools_paired_end_stats") },
+                { assert snapshot(workflow.out.flagstat).match("test_bam_sort_stats_samtools_paired_end_flagstats") },
+                { assert snapshot(workflow.out.idxstats).match("test_bam_sort_stats_samtools_paired_end_idxstats") }
             )
         }
     }
diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap
index 50ffde60..77afbf17 100644
--- a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap
+++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap
@@ -1,236 +1,86 @@
 {
-    "test_bam_sort_stats_samtools_single_end": {
+    "test_bam_sort_stats_samtools_paired_end_flagstats": {
         "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": false
-                        },
-                        "test.bam:md5,2cf8fe8dbba3da7eb4fb251c79f428dc"
-                    ]
-                ],
-                "1": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": false
-                        },
-                        "test.bam.bai:md5,002488588110dcee464e65f68c4726e8"
-                    ]
-                ],
-                "2": [
-                    
-                ],
-                "3": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": false
-                        },
-                        "test.stats:md5,796f45f791f06291b76329528fae0a54"
-                    ]
-                ],
-                "4": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": false
-                        },
-                        "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53"
-                    ]
-                ],
-                "5": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": false
-                        },
-                        "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20"
-                    ]
-                ],
-                "6": [
-                    "versions.yml:md5,176f12ceae81f76341e481988c799c15",
-                    "versions.yml:md5,7beadfaf6b22ea0ae6e655b41447803f",
-                    "versions.yml:md5,bfcdd8e2d5151a14dac15a9332d73d52",
-                    "versions.yml:md5,dd8f44a9bfef10555ef1c8cc0267ff9c",
-                    "versions.yml:md5,f2eb7aba102adae159006c9a443c301b"
-                ],
-                "bai": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": false
-                        },
-                        "test.bam.bai:md5,002488588110dcee464e65f68c4726e8"
-                    ]
-                ],
-                "bam": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": false
-                        },
-                        "test.bam:md5,2cf8fe8dbba3da7eb4fb251c79f428dc"
-                    ]
-                ],
-                "csi": [
-                    
-                ],
-                "flagstat": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": false
-                        },
-                        "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53"
-                    ]
-                ],
-                "idxstats": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": false
-                        },
-                        "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20"
-                    ]
-                ],
-                "stats": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": false
-                        },
-                        "test.stats:md5,796f45f791f06291b76329528fae0a54"
-                    ]
-                ],
-                "versions": [
-                    "versions.yml:md5,176f12ceae81f76341e481988c799c15",
-                    "versions.yml:md5,7beadfaf6b22ea0ae6e655b41447803f",
-                    "versions.yml:md5,bfcdd8e2d5151a14dac15a9332d73d52",
-                    "versions.yml:md5,dd8f44a9bfef10555ef1c8cc0267ff9c",
-                    "versions.yml:md5,f2eb7aba102adae159006c9a443c301b"
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783"
                 ]
-            }
+            ]
         ],
-        "timestamp": "2023-10-18T09:34:31.989804787"
+        "timestamp": "2023-10-22T20:25:03.687121177"
     },
-    "test_bam_sort_stats_samtools_paired_end": {
+    "test_bam_sort_stats_samtools_paired_end_idxstats": {
         "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": false
-                        },
-                        "test.bam:md5,81adec7882577c0ad17962599acf7745"
-                    ]
-                ],
-                "1": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": false
-                        },
-                        "test.bam.bai:md5,9e6427a796975290b1110c9d542ac79d"
-                    ]
-                ],
-                "2": [
-                    
-                ],
-                "3": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": false
-                        },
-                        "test.stats:md5,f3f0e5aad236aae678ac5361b529a664"
-                    ]
-                ],
-                "4": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": false
-                        },
-                        "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783"
-                    ]
-                ],
-                "5": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": false
-                        },
-                        "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2"
-                    ]
-                ],
-                "6": [
-                    "versions.yml:md5,176f12ceae81f76341e481988c799c15",
-                    "versions.yml:md5,7beadfaf6b22ea0ae6e655b41447803f",
-                    "versions.yml:md5,bfcdd8e2d5151a14dac15a9332d73d52",
-                    "versions.yml:md5,dd8f44a9bfef10555ef1c8cc0267ff9c",
-                    "versions.yml:md5,f2eb7aba102adae159006c9a443c301b"
-                ],
-                "bai": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": false
-                        },
-                        "test.bam.bai:md5,9e6427a796975290b1110c9d542ac79d"
-                    ]
-                ],
-                "bam": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": false
-                        },
-                        "test.bam:md5,81adec7882577c0ad17962599acf7745"
-                    ]
-                ],
-                "csi": [
-                    
-                ],
-                "flagstat": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": false
-                        },
-                        "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783"
-                    ]
-                ],
-                "idxstats": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": false
-                        },
-                        "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2"
-                    ]
-                ],
-                "stats": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": false
-                        },
-                        "test.stats:md5,f3f0e5aad236aae678ac5361b529a664"
-                    ]
-                ],
-                "versions": [
-                    "versions.yml:md5,176f12ceae81f76341e481988c799c15",
-                    "versions.yml:md5,7beadfaf6b22ea0ae6e655b41447803f",
-                    "versions.yml:md5,bfcdd8e2d5151a14dac15a9332d73d52",
-                    "versions.yml:md5,dd8f44a9bfef10555ef1c8cc0267ff9c",
-                    "versions.yml:md5,f2eb7aba102adae159006c9a443c301b"
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2"
                 ]
-            }
+            ]
         ],
-        "timestamp": "2023-10-18T09:34:57.682759147"
+        "timestamp": "2023-10-22T20:25:03.709648916"
+    },
+    "test_bam_sort_stats_samtools_single_end_stats": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.stats:md5,f281507081517414eb1a04b2d9c855b2"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T11:06:50.951881479"
+    },
+    "test_bam_sort_stats_samtools_paired_end_stats": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.stats:md5,e32e7e49dce1fbe327a89e0fb7bc01b1"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T11:06:59.253905951"
+    },
+    "test_bam_sort_stats_samtools_single_end_idxstats": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20"
+                ]
+            ]
+        ],
+        "timestamp": "2023-10-22T20:25:58.451364604"
+    },
+    "test_bam_sort_stats_samtools_single_end_flagstats": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53"
+                ]
+            ]
+        ],
+        "timestamp": "2023-10-22T20:25:58.416859285"
     }
 }
\ No newline at end of file
diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml b/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml
index a8274109..30b69d6a 100644
--- a/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml
+++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml
@@ -1,2 +1,2 @@
-bam_sort_stats_samtools:
+subworkflows/bam_sort_stats_samtools:
   - subworkflows/nf-core/bam_sort_stats_samtools/**
diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test
new file mode 100644
index 00000000..97210890
--- /dev/null
+++ b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test
@@ -0,0 +1,102 @@
+nextflow_workflow {
+
+    name "Test Workflow BAM_STATS_SAMTOOLS"
+    script "../main.nf"
+    workflow "BAM_STATS_SAMTOOLS"
+    tag "subworkflows"
+    tag "subworkflows_nfcore"
+    tag "bam_stats_samtools"
+    tag "subworkflows/bam_stats_samtools"
+    tag "samtools"
+    tag "samtools/flagstat"
+    tag "samtools/idxstats"
+    tag "samtools/stats"
+
+    test("test_bam_stats_samtools_single_end") {
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            workflow {
+                """
+                    input[0] = [ [ id:'test', single_end:true ], // meta map
+                                file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true),
+                                file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true)
+                        ]
+                    input[1] = [ [ id:'genome' ],
+                            file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+                            ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success},
+                { assert snapshot(workflow.out.stats).match("test_bam_stats_samtools_single_end_stats") },
+                { assert snapshot(workflow.out.flagstat).match("test_bam_stats_samtools_single_end_flagstats") },
+                { assert snapshot(workflow.out.idxstats).match("test_bam_stats_samtools_single_end_idxstats") }
+            )
+        }
+    }
+
+    test("test_bam_stats_samtools_paired_end") {
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            workflow {
+                """
+                    input[0] = [ [ id:'test', single_end:true ], // meta map
+                                file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
+                                file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)
+                        ]
+                    input[1] = [ [ id:'genome' ],
+                            file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+                            ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success },
+                { assert snapshot(workflow.out.stats).match("test_bam_stats_samtools_paired_end_stats") },
+                { assert snapshot(workflow.out.flagstat).match("test_bam_stats_samtools_paired_end_flagstats") },
+                { assert snapshot(workflow.out.idxstats).match("test_bam_stats_samtools_paired_end_idxstats") }
+            )
+        }
+    }
+
+    test("test_bam_stats_samtools_paired_end_cram") {
+
+        when {
+            params {
+                outdir   = "$outputDir"
+            }
+            workflow {
+                """
+                     input[0] = [ [ id:'test', single_end:false ], // meta map
+                                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
+                                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true)
+                                ]
+                    input[1] = [ [ id:'genome' ],
+                            file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+                            ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success},
+                { assert snapshot(workflow.out.stats).match("test_bam_stats_samtools_paired_end_cram_stats") },
+                { assert snapshot(workflow.out.flagstat).match("test_bam_stats_samtools_paired_end_cram_flagstats") },
+                { assert snapshot(workflow.out.idxstats).match("test_bam_stats_samtools_paired_end_cram_idxstats") }
+            )
+        }
+    }
+
+}
diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap
new file mode 100644
index 00000000..d3af1376
--- /dev/null
+++ b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap
@@ -0,0 +1,128 @@
+{
+    "test_bam_stats_samtools_paired_end_cram_flagstats": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.flagstat:md5,a53f3d26e2e9851f7d528442bbfe9781"
+                ]
+            ]
+        ],
+        "timestamp": "2023-11-06T09:31:26.194017574"
+    },
+    "test_bam_stats_samtools_paired_end_stats": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": true
+                    },
+                    "test.stats:md5,49e2b43344ff92bc4c02463a58f7ba4a"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T11:07:13.965061942"
+    },
+    "test_bam_stats_samtools_paired_end_flagstats": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": true
+                    },
+                    "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783"
+                ]
+            ]
+        ],
+        "timestamp": "2023-11-06T09:31:11.668517251"
+    },
+    "test_bam_stats_samtools_single_end_flagstats": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": true
+                    },
+                    "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53"
+                ]
+            ]
+        ],
+        "timestamp": "2023-11-06T09:26:10.340046381"
+    },
+    "test_bam_stats_samtools_paired_end_cram_idxstats": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.idxstats:md5,e179601fa7b8ebce81ac3765206f6c15"
+                ]
+            ]
+        ],
+        "timestamp": "2023-11-06T09:31:26.207052003"
+    },
+    "test_bam_stats_samtools_single_end_stats": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": true
+                    },
+                    "test.stats:md5,5a6667d97806e5002731e9cf23674fad"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T11:07:06.676820877"
+    },
+    "test_bam_stats_samtools_paired_end_idxstats": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": true
+                    },
+                    "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2"
+                ]
+            ]
+        ],
+        "timestamp": "2023-11-06T09:31:11.68246157"
+    },
+    "test_bam_stats_samtools_single_end_idxstats": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": true
+                    },
+                    "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20"
+                ]
+            ]
+        ],
+        "timestamp": "2023-11-06T09:26:10.349439801"
+    },
+    "test_bam_stats_samtools_paired_end_cram_stats": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.stats:md5,2cf2fe93596ee3d74f946097b204a629"
+                ]
+            ]
+        ],
+        "timestamp": "2023-12-04T11:07:22.30295557"
+    }
+}
\ No newline at end of file
diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml b/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml
new file mode 100644
index 00000000..ec2f2d68
--- /dev/null
+++ b/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml
@@ -0,0 +1,2 @@
+subworkflows/bam_stats_samtools:
+  - subworkflows/nf-core/bam_stats_samtools/**

From 285cfd0d12a9f02286052ffbe36e933060a1747f Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Thu, 11 Jan 2024 13:30:57 +0000
Subject: [PATCH 057/117] Fixing linting errors

---
 assets/multiqc_config.yml                  |  5 +--
 modules/nf-core/multiqc/environment.yml    |  2 +-
 modules/nf-core/multiqc/meta.yml           |  1 -
 modules/nf-core/multiqc/tests/main.nf.test | 48 +++++++++++++++-------
 4 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index 71a77e0b..11c1b997 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -1,8 +1,7 @@
 report_comment: >
-
-  This report has been generated by the <a href="https://github.com/nf-core/smrnaseq/releases/tag/2.2.3" target="_blank">nf-core/smrnaseq</a>
+  This report has been generated by the <a href="https://github.com/nf-core/smrnaseq/tree/dev" target="_blank">nf-core/smrnaseq</a>
   analysis pipeline. For information about how to interpret these results, please see the
-  <a href="https://nf-co.re/smrnaseq/2.2.3/docs/output" target="_blank">documentation</a>.
+  <a href="https://nf-co.re/smrnaseq/dev/docs/output" target="_blank">documentation</a>.
 
 report_section_order:
   "nf-core-smrnaseq-methods-description":
diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml
index bc0bdb5b..7625b752 100644
--- a/modules/nf-core/multiqc/environment.yml
+++ b/modules/nf-core/multiqc/environment.yml
@@ -4,4 +4,4 @@ channels:
   - bioconda
   - defaults
 dependencies:
-  - bioconda::multiqc=1.18
+  - bioconda::multiqc=1.19
diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml
index f1aa660e..45a9bc35 100644
--- a/modules/nf-core/multiqc/meta.yml
+++ b/modules/nf-core/multiqc/meta.yml
@@ -1,4 +1,3 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
 name: multiqc
 description: Aggregate results from bioinformatics analyses across many samples into a single report
 keywords:
diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test
index c2dad217..d0438eda 100644
--- a/modules/nf-core/multiqc/tests/main.nf.test
+++ b/modules/nf-core/multiqc/tests/main.nf.test
@@ -7,12 +7,9 @@ nextflow_process {
     tag "modules_nfcore"
     tag "multiqc"
 
-    test("MULTIQC: FASTQC") {
+    test("sarscov2 single-end [fastqc]") {
 
         when {
-            params {
-                outdir = "$outputDir"
-            }
             process {
                 """
                 input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)])
@@ -26,20 +23,17 @@ nextflow_process {
         then {
             assertAll(
                 { assert process.success },
-                { assert path(process.out.report.get(0)).exists() },
-                { assert path(process.out.data.get(0)).exists() },
-                { assert path(process.out.versions.get(0)).getText().contains("multiqc") }
+                { assert process.out.report[0] ==~ ".*/multiqc_report.html" },
+                { assert process.out.data[0] ==~ ".*/multiqc_data" },
+                { assert snapshot(process.out.versions).match("versions") }
             )
         }
 
     }
 
-    test("MULTIQC: FASTQC and a config file") {
+    test("sarscov2 single-end [fastqc] [config]") {
 
         when {
-            params {
-                outdir = "$outputDir"
-            }
             process {
                 """
                 input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)])
@@ -53,9 +47,35 @@ nextflow_process {
         then {
             assertAll(
                 { assert process.success },
-                { assert path(process.out.report.get(0)).exists() },
-                { assert path(process.out.data.get(0)).exists() },
-                { assert path(process.out.versions.get(0)).getText().contains("multiqc") }
+                { assert process.out.report[0] ==~ ".*/multiqc_report.html" },
+                { assert process.out.data[0] ==~ ".*/multiqc_data" },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+    }
+
+    test("sarscov2 single-end [fastqc] - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)])
+                input[1] = []
+                input[2] = []
+                input[3] = []
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.report.collect { file(it).getName() } +
+                                process.out.data.collect { file(it).getName() } +
+                                process.out.plots.collect { file(it).getName() } +
+                                process.out.versions ).match() }
             )
         }
 

From e1b232f42c6db1dc701233ab2f7befe31c62aff7 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Thu, 11 Jan 2024 13:54:21 +0000
Subject: [PATCH 058/117] Follow the lead, Timmy

---
 subworkflows/local/umi_dedup.nf               |  8 +-
 .../nf-core/fastqc_umitools_trimgalore.nf     | 78 -------------------
 workflows/smrnaseq.nf                         | 40 ++++------
 3 files changed, 19 insertions(+), 107 deletions(-)
 delete mode 100644 subworkflows/nf-core/fastqc_umitools_trimgalore.nf

diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf
index 12033274..8712f526 100644
--- a/subworkflows/local/umi_dedup.nf
+++ b/subworkflows/local/umi_dedup.nf
@@ -1,10 +1,10 @@
-// 
+//
 // Deduplicate the UMI reads by mapping them to the complete genome.
 //
 
 include { INDEX_GENOME                        } from '../../modules/local/bowtie_genome'
 include { BOWTIE_MAP_SEQ as UMI_MAP_GENOME    } from '../../modules/local/bowtie_map_mirna'
-include { BAM_SORT_SAMTOOLS                   } from '../../subworkflows/nf-core/bam_sort_samtools'
+include { BAM_SORT_STATS_SAMTOOLS             } from '../../subworkflows/nf-core/bam_sort_stats_samtools'
 include { UMITOOLS_DEDUP                      } from '../../modules/nf-core/modules/umitools/dedup/main'
 include { SAMTOOLS_BAM2FQ                     } from '../../modules/nf-core/modules/samtools/bam2fq/main'
 include { CAT_CAT                             } from '../../modules/nf-core/modules/cat/cat/main'
@@ -31,7 +31,7 @@ workflow DEDUPLICATE_UMIS {
     }
 
     if (bt_index){
-        
+
         UMI_MAP_GENOME ( reads, bt_index.collect() )
         ch_versions = ch_versions.mix(UMI_MAP_GENOME.out.versions)
 
@@ -54,7 +54,7 @@ workflow DEDUPLICATE_UMIS {
                 .join(UMI_MAP_GENOME.out.unmapped)
                 .map { meta, file1, file2 -> [meta, [file1, file2]]}
                 .set { ch_cat }
-    
+
             CAT_CAT ( ch_cat )
             ch_dedup_reads = CAT_CAT.out.file_out
         }
diff --git a/subworkflows/nf-core/fastqc_umitools_trimgalore.nf b/subworkflows/nf-core/fastqc_umitools_trimgalore.nf
deleted file mode 100644
index ca158e7a..00000000
--- a/subworkflows/nf-core/fastqc_umitools_trimgalore.nf
+++ /dev/null
@@ -1,78 +0,0 @@
-//
-// Read QC, UMI extraction and trimming
-//
-
-nextflow.enable.dsl=2
-
-include { FASTQC           } from '../../modules/nf-core/modules/fastqc/main'
-include { UMITOOLS_EXTRACT } from '../../modules/nf-core/modules/umitools/extract/main'
-include { TRIMGALORE       } from '../../modules/nf-core/modules/trimgalore/main'
-
-workflow FASTQC_UMITOOLS_TRIMGALORE {
-    take:
-    reads            // channel: [ val(meta), [ reads ] ]
-    skip_fastqc      // boolean: true/false
-    with_umi         // boolean: true/false
-    skip_trimming    // boolean: true/false
-    umi_discard_read // integer: 0, 1 or 2
-
-    main:
-
-    ch_versions = Channel.empty()
-    fastqc_html = Channel.empty()
-    fastqc_zip  = Channel.empty()
-    if (!skip_fastqc) {
-        FASTQC ( reads ).html.set { fastqc_html }
-        fastqc_zip  = FASTQC.out.zip
-        ch_versions = ch_versions.mix(FASTQC.out.versions.first())
-    }
-
-    umi_reads = reads
-    umi_log   = Channel.empty()
-    if (with_umi) {
-        UMITOOLS_EXTRACT ( reads ).reads.set { umi_reads }
-        umi_log     = UMITOOLS_EXTRACT.out.log
-        ch_versions = ch_versions.mix(UMITOOLS_EXTRACT.out.versions.first())
-
-        // Discard R1 / R2 if required
-        if (umi_discard_read in [1,2]) {
-            UMITOOLS_EXTRACT
-                .out
-                .reads
-                .map { meta, reads ->
-                    if (!meta.single_end) {
-                        meta['single_end'] = true
-                        reads = reads[umi_discard_read % 2]
-                    }
-                    return [ meta, reads ]
-                }
-                .set { umi_reads }
-        }
-    }
-
-    trim_reads = umi_reads
-    trim_html  = Channel.empty()
-    trim_zip   = Channel.empty()
-    trim_log   = Channel.empty()
-    if (!skip_trimming) {
-        TRIMGALORE ( umi_reads ).reads.set { trim_reads }
-        trim_html   = TRIMGALORE.out.html
-        trim_zip    = TRIMGALORE.out.zip
-        trim_log    = TRIMGALORE.out.log
-        ch_versions = ch_versions.mix(TRIMGALORE.out.versions.first())
-    }
-
-    emit:
-    reads = trim_reads // channel: [ val(meta), [ reads ] ]
-
-    fastqc_html        // channel: [ val(meta), [ html ] ]
-    fastqc_zip         // channel: [ val(meta), [ zip ] ]
-
-    umi_log            // channel: [ val(meta), [ log ] ]
-
-    trim_html          // channel: [ val(meta), [ html ] ]
-    trim_zip           // channel: [ val(meta), [ zip ] ]
-    trim_log           // channel: [ val(meta), [ txt ] ]
-
-    versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ]
-}
\ No newline at end of file
diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index 55f7614b..04529c63 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -64,14 +64,14 @@ if (!params.mirgenedb) {
     if (params.mirgenedb_gff) { mirna_gtf = file(params.mirgenedb_gff, checkIfExists: true) } else { exit 1, "MirGeneDB gff file not found: ${params.mirgenedb_gff}"}
 }
 
-include { INPUT_CHECK        } from '../subworkflows/local/input_check'
-include { FASTQC_UMITOOLS_FASTP } from '../subworkflows/nf-core/fastqc_umitools_trimgalore'
-include { DEDUPLICATE_UMIS           } from '../subworkflows/local/umi_dedup'
-include { CONTAMINANT_FILTER } from '../subworkflows/local/contaminant_filter'
-include { MIRNA_QUANT        } from '../subworkflows/local/mirna_quant'
-include { GENOME_QUANT       } from '../subworkflows/local/genome_quant'
-include { MIRTRACE           } from '../subworkflows/local/mirtrace'
-include { MIRDEEP2           } from '../subworkflows/local/mirdeep2'
+include { INPUT_CHECK           } from '../subworkflows/local/input_check'
+include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../subworkflows/nf-core/fastq_fastqc_umitools_fastp'
+include { DEDUPLICATE_UMIS      } from '../subworkflows/local/umi_dedup'
+include { CONTAMINANT_FILTER    } from '../subworkflows/local/contaminant_filter'
+include { MIRNA_QUANT           } from '../subworkflows/local/mirna_quant'
+include { GENOME_QUANT          } from '../subworkflows/local/genome_quant'
+include { MIRTRACE              } from '../subworkflows/local/mirtrace'
+include { MIRDEEP2              } from '../subworkflows/local/mirdeep2'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -132,22 +132,19 @@ workflow SMRNASEQ {
     ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first().ifEmpty(null))
 
     //
-    // SUBWORKFLOW: Read QC and trim adapters
+    // SUBWORKFLOW: Read QC, extract UMI and trim adapters & dedup UMIs if necessary / desired by the user
     //
 
-    //
-    // SUBWORKFLOW: Read QC, extract UMI and trim adapters
-    //
-    FASTQC_UMITOOLS_FASTP (
+    FASTQ_FASTQC_UMITOOLS_FASTP (
         ch_cat_fastq,
         params.skip_fastqc || params.skip_qc,
         params.with_umi,
         params.skip_trimming,
         params.umi_discard_read
     )
-    ch_versions = ch_versions.mix(FASTQC_UMITOOLS_FASTP.out.versions)
+    ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions)
 
-    reads_for_mirna = FASTQC_UMITOOLS_FASTP.out.reads
+    reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads
 
     //
     // SUBWORKFLOW: Deduplicate UMIs by mapping them to the genome
@@ -156,8 +153,8 @@ workflow SMRNASEQ {
         if (fasta){
             fasta_ch = file(fasta)
             DEDUPLICATE_UMIS (
-                fasta_ch, 
-                bt_index, 
+                fasta_ch,
+                bt_index,
                 FASTQC_UMITOOLS_FASTP.out.reads
             )
             reads_for_mirna = DEDUPLICATE_UMIS.out.reads
@@ -165,18 +162,11 @@ workflow SMRNASEQ {
         }
     }
 
-    FASTQC_UMITOOLS_FASTP (
-        ch_cat_fastq,
-        ch_fastp_adapters,
-        false,
-        false
-    )
-    ch_versions = ch_versions.mix(FASTQC_FASTP.out.versions)
 
     //
     // SUBWORKFLOW: mirtrace QC
     //
-    FASTQC_FASTP.out.adapterseq
+    FASTQ_FASTQC_UMITOOLS_FASTP.out.adapterseq
     .join( FASTQC_FASTP.out.reads )
     .map { meta, adapterseq, reads -> [adapterseq, meta.id, reads] }
     .groupTuple()

From 2828bd2a05dc1d6282dfaa5f4d7a5be96482ae18 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Thu, 11 Jan 2024 14:16:18 +0000
Subject: [PATCH 059/117] Improvements to channel handling

---
 subworkflows/local/umi_dedup.nf |  7 ++++---
 workflows/smrnaseq.nf           | 17 ++++++++++++-----
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf
index 8712f526..5a5a393f 100644
--- a/subworkflows/local/umi_dedup.nf
+++ b/subworkflows/local/umi_dedup.nf
@@ -5,9 +5,10 @@
 include { INDEX_GENOME                        } from '../../modules/local/bowtie_genome'
 include { BOWTIE_MAP_SEQ as UMI_MAP_GENOME    } from '../../modules/local/bowtie_map_mirna'
 include { BAM_SORT_STATS_SAMTOOLS             } from '../../subworkflows/nf-core/bam_sort_stats_samtools'
-include { UMITOOLS_DEDUP                      } from '../../modules/nf-core/modules/umitools/dedup/main'
-include { SAMTOOLS_BAM2FQ                     } from '../../modules/nf-core/modules/samtools/bam2fq/main'
-include { CAT_CAT                             } from '../../modules/nf-core/modules/cat/cat/main'
+include { UMITOOLS_DEDUP                      } from '../../modules/nf-core/umitools/dedup/main'
+include { SAMTOOLS_BAM2FQ                     } from '../../modules/nf-core/samtools/bam2fq/main'
+include { CAT_CAT                             } from '../../modules/nf-core/cat/cat/main'
+
 
 workflow DEDUPLICATE_UMIS {
     take:
diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index 04529c63..250f1991 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -137,14 +137,21 @@ workflow SMRNASEQ {
 
     FASTQ_FASTQC_UMITOOLS_FASTP (
         ch_cat_fastq,
-        params.skip_fastqc || params.skip_qc,
+        params.skip_fastqc,
         params.with_umi,
+        params.skip_umi_extract,
+        params.umi_discard_read,
         params.skip_trimming,
-        params.umi_discard_read
+        params.umi_discard_read,
+        params.skip_trimming,
+        params.adapter_fasta,
+        params.save_trimmed_fail,
+        params.save_merged,
+        params.min_trimmed_reads
     )
     ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions)
 
-    reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads
+    reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_reads
 
     //
     // SUBWORKFLOW: Deduplicate UMIs by mapping them to the genome
@@ -155,7 +162,7 @@ workflow SMRNASEQ {
             DEDUPLICATE_UMIS (
                 fasta_ch,
                 bt_index,
-                FASTQC_UMITOOLS_FASTP.out.reads
+                FASTQC_UMITOOLS_FASTP.out.trim_reads
             )
             reads_for_mirna = DEDUPLICATE_UMIS.out.reads
             ch_versions = ch_versions.mix(DEDUPLICATE_UMIS.out.versions)
@@ -167,7 +174,7 @@ workflow SMRNASEQ {
     // SUBWORKFLOW: mirtrace QC
     //
     FASTQ_FASTQC_UMITOOLS_FASTP.out.adapterseq
-    .join( FASTQC_FASTP.out.reads )
+    .join( FASTQ_FASTQC_UMITOOLS_FASTP.out.reads )
     .map { meta, adapterseq, reads -> [adapterseq, meta.id, reads] }
     .groupTuple()
     .set { ch_mirtrace_inputs }

From ff703f10d4bd122210ac0dbe5e1eac55d0e3cf04 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Fri, 12 Jan 2024 08:59:12 +0000
Subject: [PATCH 060/117] Update to latest subworkflow version

---
 modules.json                                  |  2 +-
 .../fastq_fastqc_umitools_fastp/main.nf       | 14 +++++++++-
 .../fastq_fastqc_umitools_fastp/meta.yml      | 11 ++++----
 .../tests/main.nf.test                        |  1 +
 .../tests/main.nf.test.snap                   | 28 ++++++++++++++-----
 workflows/smrnaseq.nf                         |  4 +--
 6 files changed, 44 insertions(+), 16 deletions(-)

diff --git a/modules.json b/modules.json
index 56255242..40eda6cc 100644
--- a/modules.json
+++ b/modules.json
@@ -91,7 +91,7 @@
                     },
                     "fastq_fastqc_umitools_fastp": {
                         "branch": "master",
-                        "git_sha": "3e8b0c1144ccf60b7848efbdc2be285ff20b49ee",
+                        "git_sha": "668185ddcd2d9084c819691c99020360e0f029a0",
                         "installed_by": ["subworkflows"]
                     }
                 }
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
index 3dbb27ea..49570521 100644
--- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
@@ -17,6 +17,11 @@ def getFastpReadsAfterFiltering(json_file) {
     return json['after_filtering']['total_reads'].toLong()
 }
 
+def getFastpAdapterSequence(json_file){
+    def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('adapter_cutting')
+    return json['read1_adapter_sequence']
+}
+
 workflow FASTQ_FASTQC_UMITOOLS_FASTP {
     take:
     reads             // channel: [ val(meta), [ reads ] ]
@@ -25,7 +30,7 @@ workflow FASTQ_FASTQC_UMITOOLS_FASTP {
     skip_umi_extract  // boolean: true/false
     umi_discard_read  // integer: 0, 1 or 2
     skip_trimming     // boolean: true/false
-    adapter_fasta     //    file: adapter.fasta
+    adapter_fasta     // file: adapter.fasta
     save_trimmed_fail // boolean: true/false
     save_merged       // boolean: true/false
     min_trimmed_reads // integer: > 0
@@ -75,6 +80,8 @@ workflow FASTQ_FASTQC_UMITOOLS_FASTP {
     fastqc_trim_html  = Channel.empty()
     fastqc_trim_zip   = Channel.empty()
     trim_read_count   = Channel.empty()
+    adapter_seq       = Channel.empty()
+
     if (!skip_trimming) {
         FASTP (
             umi_reads,
@@ -108,6 +115,10 @@ workflow FASTQ_FASTQC_UMITOOLS_FASTP {
             .map { meta, reads, num_reads -> [ meta, num_reads ] }
             .set { trim_read_count }
 
+        trim_json
+            .map { meta, json -> [meta, getFastpAdapterSequence(json)] }
+            .set { adapter_seq }
+
         if (!skip_fastqc) {
             FASTQC_TRIM (
                 trim_reads
@@ -125,6 +136,7 @@ workflow FASTQ_FASTQC_UMITOOLS_FASTP {
     fastqc_raw_zip     // channel: [ val(meta), [ zip ] ]
 
     umi_log            // channel: [ val(meta), [ log ] ]
+    adapter_seq        // channel: [ val(meta), [ adapter_seq] ]
 
     trim_json          // channel: [ val(meta), [ json ] ]
     trim_html          // channel: [ val(meta), [ html ] ]
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml
index 220e8db1..9308fe9b 100644
--- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml
@@ -43,7 +43,7 @@ input:
   - skip_trimming:
       type: boolean
       description: |
-        Allows to skip trimgalore execution
+        Allows to skip FastP execution
   - adapter_fasta:
       type: file
       description: |
@@ -70,10 +70,7 @@ output:
       type: file
       description: >
         Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. |
-
-
-
-          For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz.
+        For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz.
       pattern: "*.{fastq.gz}"
   - fastqc_html:
       type: file
@@ -118,6 +115,10 @@ output:
       type: file
       description: FastQC report archive
       pattern: "*_{fastqc.zip}"
+  - adapter_seq:
+      type: string
+      description: |
+        Adapter Sequence found in read1
   - versions:
       type: file
       description: File containing software versions
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test
index cdd73984..91dec8c5 100644
--- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test
@@ -45,6 +45,7 @@ nextflow_workflow {
                 { assert snapshot(workflow.out.trim_json).match("trim_json") },
                 { assert snapshot(workflow.out.trim_reads_fail).match("trim_reads_fail") },
                 { assert snapshot(workflow.out.trim_reads_merged).match("trim_reads_merged") },
+                { assert snapshot(workflow.out.adapter_seq).match("adapter_seq") },
                 { assert snapshot(workflow.out.trim_read_count).match("trim_read_count") },
                 { assert snapshot(workflow.out.versions).match("versions") },
 
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap
index 38a65aeb..973746a3 100644
--- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap
@@ -5,7 +5,7 @@
                 
             ]
         ],
-        "timestamp": "2023-11-26T02:28:26.26920982"
+        "timestamp": "2024-01-12T08:38:50.041635573"
     },
     "trim_reads_fail": {
         "content": [
@@ -13,7 +13,7 @@
                 
             ]
         ],
-        "timestamp": "2023-11-26T02:28:26.25861515"
+        "timestamp": "2024-01-12T08:38:50.033284693"
     },
     "versions": {
         "content": [
@@ -23,7 +23,7 @@
                 "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e"
             ]
         ],
-        "timestamp": "2023-11-26T02:28:26.30891403"
+        "timestamp": "2024-01-12T08:38:50.121510557"
     },
     "trim_json": {
         "content": [
@@ -37,7 +37,21 @@
                 ]
             ]
         ],
-        "timestamp": "2023-11-26T02:28:26.24768259"
+        "timestamp": "2024-01-12T08:38:50.024410724"
+    },
+    "adapter_seq": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "unspecified"
+                ]
+            ]
+        ],
+        "timestamp": "2024-01-12T08:38:50.08674429"
     },
     "reads": {
         "content": [
@@ -54,7 +68,7 @@
                 ]
             ]
         ],
-        "timestamp": "2023-12-04T11:30:32.061644815"
+        "timestamp": "2024-01-12T08:38:49.994419936"
     },
     "umi_log": {
         "content": [
@@ -62,7 +76,7 @@
                 
             ]
         ],
-        "timestamp": "2023-11-26T02:28:26.238536"
+        "timestamp": "2024-01-12T08:38:50.017720214"
     },
     "trim_read_count": {
         "content": [
@@ -76,6 +90,6 @@
                 ]
             ]
         ],
-        "timestamp": "2023-11-26T02:28:26.27984169"
+        "timestamp": "2024-01-12T08:38:50.102326089"
     }
 }
\ No newline at end of file
diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index 250f1991..5ea0df5e 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -173,9 +173,9 @@ workflow SMRNASEQ {
     //
     // SUBWORKFLOW: mirtrace QC
     //
-    FASTQ_FASTQC_UMITOOLS_FASTP.out.adapterseq
+    FASTQ_FASTQC_UMITOOLS_FASTP.out.adapter_seq
     .join( FASTQ_FASTQC_UMITOOLS_FASTP.out.reads )
-    .map { meta, adapterseq, reads -> [adapterseq, meta.id, reads] }
+    .map { meta, adapter_seq, reads -> [adapter_seq, meta.id, reads] }
     .groupTuple()
     .set { ch_mirtrace_inputs }
 

From 313e7d3f899b4e400912480f50c20c40b4bd7d34 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Fri, 12 Jan 2024 09:19:36 +0000
Subject: [PATCH 061/117] Cleaning up

---
 nextflow.config       |  1 +
 nextflow_schema.json  | 11 ++---------
 workflows/smrnaseq.nf |  6 ++----
 3 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index a223bc26..886011cd 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -47,6 +47,7 @@ params {
     fastp_min_length            = 17
     fastp_known_mirna_adapters  = "$projectDir/assets/known_adapters.fa"
     save_trimmed_fail           = false
+    save_merged                 = false
     skip_fastqc                 = false
     skip_multiqc                = false
     skip_mirdeep                = false
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 33b150b4..d736635e 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -92,7 +92,8 @@
                 "umi_merge_unmapped": {
                     "type": "boolean",
                     "fa_icon": "fas fa-save",
-                    "description": "Unless deactivated the deduplicated reads are merged with the reads that could not be placed to reduce the potential reference bias"
+                    "description": "Unless deactivated the deduplicated reads are merged with the reads that could not be placed to reduce the potential reference bias",
+                    "default": true
                 }
             },
             "fa_icon": "fas fa-barcode"
@@ -209,7 +210,6 @@
                 },
                 "three_prime_adapter": {
                     "type": "string",
-                    "default": "TGGAATTCTCGGGTGCCAAGG",
                     "fa_icon": "fas fa-text-width",
                     "description": "Sequencing adapter sequence to use for trimming."
                 },
@@ -408,14 +408,12 @@
                     "type": "boolean",
                     "description": "Display help text.",
                     "fa_icon": "fas fa-question-circle",
-                    "default": false,
                     "hidden": true
                 },
                 "version": {
                     "type": "boolean",
                     "description": "Display version and exit.",
                     "fa_icon": "fas fa-question-circle",
-                    "default": false,
                     "hidden": true
                 },
                 "publish_dir_mode": {
@@ -439,7 +437,6 @@
                     "type": "boolean",
                     "description": "Send plain-text email instead of HTML.",
                     "fa_icon": "fas fa-remove-format",
-                    "default": false,
                     "hidden": true
                 },
                 "max_multiqc_email_size": {
@@ -454,7 +451,6 @@
                     "type": "boolean",
                     "description": "Do not use coloured log outputs.",
                     "fa_icon": "fas fa-palette",
-                    "default": false,
                     "hidden": true
                 },
                 "hook_url": {
@@ -493,7 +489,6 @@
                     "type": "boolean",
                     "fa_icon": "far fa-eye-slash",
                     "description": "Show all params when using `--help`",
-                    "default": false,
                     "hidden": true,
                     "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters."
                 },
@@ -501,7 +496,6 @@
                     "type": "boolean",
                     "fa_icon": "far fa-check-circle",
                     "description": "Validation of parameters fails when an unrecognised parameter is found.",
-                    "default": false,
                     "hidden": true,
                     "help_text": "By default, when an unrecognised parameter is found, it returns a warinig."
                 },
@@ -509,7 +503,6 @@
                     "type": "boolean",
                     "fa_icon": "far fa-check-circle",
                     "description": "Validation of parameters in lenient more.",
-                    "default": false,
                     "hidden": true,
                     "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)."
                 }
diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index 5ea0df5e..11fb44e5 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -142,16 +142,14 @@ workflow SMRNASEQ {
         params.skip_umi_extract,
         params.umi_discard_read,
         params.skip_trimming,
-        params.umi_discard_read,
-        params.skip_trimming,
-        params.adapter_fasta,
+        params.fastp_known_mirna_adapters,
         params.save_trimmed_fail,
         params.save_merged,
         params.min_trimmed_reads
     )
     ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions)
 
-    reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_reads
+    reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads
 
     //
     // SUBWORKFLOW: Deduplicate UMIs by mapping them to the genome

From 5e3f0824f619aa7d94b2e2a25a6b190e6265b32b Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Fri, 12 Jan 2024 09:30:47 +0000
Subject: [PATCH 062/117] Some more fixes / cleanup

---
 nextflow.config       |  3 ++-
 nextflow_schema.json  |  3 +++
 workflows/smrnaseq.nf | 12 ++++++------
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 886011cd..e476d297 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -54,7 +54,8 @@ params {
     skip_fastp                  = false
     save_reference              = false
     fastp_max_length            = 40
-
+    min_trimmed_reads           = 10
+    
     // Contamination filtering
     filter_contamination        = false
     rrna                        = null
diff --git a/nextflow_schema.json b/nextflow_schema.json
index d736635e..a846609e 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -505,6 +505,9 @@
                     "description": "Validation of parameters in lenient more.",
                     "hidden": true,
                     "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)."
+                },
+                "save_merged": {
+                    "type": "boolean"
                 }
             }
         }
diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index 11fb44e5..bf9c3ac8 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -141,7 +141,7 @@ workflow SMRNASEQ {
         params.with_umi,
         params.skip_umi_extract,
         params.umi_discard_read,
-        params.skip_trimming,
+        params.skip_fastp,
         params.fastp_known_mirna_adapters,
         params.save_trimmed_fail,
         params.save_merged,
@@ -185,7 +185,7 @@ workflow SMRNASEQ {
     // SUBWORKFLOW: remove contaminants from reads
     //
     contamination_stats = Channel.empty()
-    mirna_reads = FASTQC_FASTP.out.reads
+    mirna_reads = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads
     if (params.filter_contamination){
         CONTAMINANT_FILTER (
             reference_hairpin,
@@ -195,7 +195,7 @@ workflow SMRNASEQ {
             params.ncrna,
             params.pirna,
             params.other_contamination,
-            FASTQC_FASTP.out.reads
+            FASTQ_FASTQC_UMITOOLS_FASTP.out.reads
         )
 
         contamination_stats = CONTAMINANT_FILTER.out.filter_stats
@@ -223,7 +223,7 @@ workflow SMRNASEQ {
 
         if (!params.skip_mirdeep) {
             MIRDEEP2 (
-                FASTQC_UMITOOLS_FASTP.out.reads,
+                FASTQ_FASTQC_UMITOOLS_FASTP.out.reads,
                 GENOME_QUANT.out.fasta,
                 GENOME_QUANT.out.index.collect(),
                 MIRNA_QUANT.out.fasta_hairpin,
@@ -252,8 +252,8 @@ workflow SMRNASEQ {
         ch_multiqc_files = Channel.empty()
         ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
         ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
-        ch_multiqc_files = ch_multiqc_files.mix(FASTQC_UMITOOLS_FASTP.out.fastqc_raw_zip.collect{it[1]}.ifEmpty([]))
-        ch_multiqc_files = ch_multiqc_files.mix(FASTQC_FASTP.out.trim_json.collect{it[1]}.ifEmpty([]))
+        ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_raw_zip.collect{it[1]}.ifEmpty([]))
+        ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_json.collect{it[1]}.ifEmpty([]))
         ch_multiqc_files = ch_multiqc_files.mix(contamination_stats.collect().ifEmpty([]))
         ch_multiqc_files = ch_multiqc_files.mix(genome_stats.collect({it[1]}).ifEmpty([]))
         ch_multiqc_files = ch_multiqc_files.mix(MIRNA_QUANT.out.mature_stats.collect({it[1]}).ifEmpty([]))

From 6240ec0ac8d10372491a3400b6881540a62674f6 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Fri, 12 Jan 2024 09:31:48 +0000
Subject: [PATCH 063/117] Clean

---
 nextflow.config | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index e476d297..ef869a71 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -55,7 +55,7 @@ params {
     save_reference              = false
     fastp_max_length            = 40
     min_trimmed_reads           = 10
-    
+
     // Contamination filtering
     filter_contamination        = false
     rrna                        = null
@@ -65,7 +65,6 @@ params {
     pirna                       = null
     other_contamination         = null
 
-
     // MultiQC options
     multiqc_config              = null
     multiqc_title               = null

From bfcf4863db204e94c604bf0c4b0ac0285105ec76 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Fri, 12 Jan 2024 09:55:58 +0000
Subject: [PATCH 064/117] Improved error handling

---
 subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
index 49570521..764ce013 100644
--- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
@@ -18,8 +18,13 @@ def getFastpReadsAfterFiltering(json_file) {
 }
 
 def getFastpAdapterSequence(json_file){
-    def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('adapter_cutting')
-    return json['read1_adapter_sequence']
+    def Map json = (Map) new JsonSlurper().parseText(json_file.text)
+    try{
+        adapter = json['adapter_cutting']['read1_adapter_sequence']
+    } catch(Exception ex){
+        adapter = ""
+    }
+    return adapter
 }
 
 workflow FASTQ_FASTQC_UMITOOLS_FASTP {

From d5d1bfcd31c209a8ea39b5fe3901d726c06e9d0d Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <apeltzer@users.noreply.github.com>
Date: Fri, 12 Jan 2024 11:07:48 +0100
Subject: [PATCH 065/117] Update nextflow_schema.json

---
 nextflow_schema.json | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index a846609e..b1106dcd 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -241,6 +241,12 @@
                     "default": "${projectDir}/assets/known_adapters.fa",
                     "description": "FastA with known miRNA adapter sequences for adapter trimming",
                     "fa_icon": "far fa-question-circle"
+                },
+                "min_trimmed_reads": {
+                    "type": "integer",
+                    "default": 10,
+                    "fa_icon": "far fa-window-minimize",
+                    "description": "Minimum number of reads required in input file to use it"
                 }
             }
         },

From 22f07dead81b26b3664c054b88e43b5e4610b2fe Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Fri, 12 Jan 2024 10:48:46 +0000
Subject: [PATCH 066/117] Fix for UMI fasta

---
 workflows/smrnaseq.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index 9caf0d5e..314c8d75 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -155,7 +155,7 @@ workflow SMRNASEQ {
     // SUBWORKFLOW: Deduplicate UMIs by mapping them to the genome
     //
     if (params.with_umi){
-        if (fasta){
+        if (params.fasta){
             fasta_ch = file(fasta)
             DEDUPLICATE_UMIS (
                 fasta_ch,

From 8fb0ae060ca6596d9245fcbff6df4db2bf17513a Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Fri, 12 Jan 2024 10:54:45 +0000
Subject: [PATCH 067/117] Fix params.fasta

---
 workflows/smrnaseq.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index 314c8d75..ee62cb26 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -156,7 +156,7 @@ workflow SMRNASEQ {
     //
     if (params.with_umi){
         if (params.fasta){
-            fasta_ch = file(fasta)
+            fasta_ch = file(params.fasta)
             DEDUPLICATE_UMIS (
                 fasta_ch,
                 bt_index,

From 227af283d143ca3325344ed4c76abc674ab5e5fe Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Fri, 12 Jan 2024 11:09:07 +0000
Subject: [PATCH 068/117] Minor updates

---
 modules/local/edger_qc.nf         | 2 +-
 modules/local/mirdeep2_prepare.nf | 6 +++---
 workflows/smrnaseq.nf             | 9 ++++++++-
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/modules/local/edger_qc.nf b/modules/local/edger_qc.nf
index 729d5eed..5560de23 100644
--- a/modules/local/edger_qc.nf
+++ b/modules/local/edger_qc.nf
@@ -1,7 +1,7 @@
 process EDGER_QC {
     label 'process_medium'
 
-    conda 'bioconda::bioconductor-limma=3.50.0 bioconda::bioconductor-edger=3.36.0 conda-forge::r-data.table=1.14.2 conda-forge::r-gplots=3.1.1 conda-forge::r-statmod=1.4.36'
+    conda 'bioconda::bioconductor-limma=3.58.1 bioconda::bioconductor-edger=4.0.2 conda-forge::r-data.table=1.14.10 conda-forge::r-gplots=3.1.3 conda-forge::r-statmod=1.5.0'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/mulled-v2-419bd7f10b2b902489ac63bbaafc7db76f8e0ae1:709335c37934db1b481054cbec637c6e5b5971cb-0' :
         'biocontainers/mulled-v2-419bd7f10b2b902489ac63bbaafc7db76f8e0ae1:709335c37934db1b481054cbec637c6e5b5971cb-0' }"
diff --git a/modules/local/mirdeep2_prepare.nf b/modules/local/mirdeep2_prepare.nf
index 124b5b63..ce66b9f1 100644
--- a/modules/local/mirdeep2_prepare.nf
+++ b/modules/local/mirdeep2_prepare.nf
@@ -3,10 +3,10 @@ process MIRDEEP2_PIGZ {
     tag "$meta.id"
 
     // TODO maybe create a mulled container and uncompress within mirdeep2_mapper?
-    conda 'bioconda::bioconvert=0.4.3'
+    conda 'bioconda::bioconvert=1.1.1'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/bioconvert:0.4.3--py_0' :
-        'biocontainers/bioconvert:0.4.3--py_0' }"
+        'https://depot.galaxyproject.org/singularity/bioconvert:1.1.1--pyhdfd78af_0' :
+        'biocontainers/bioconvert:1.1.1--pyhdfd78af_0' }"
 
     input:
     tuple val(meta), path(reads)
diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index ee62cb26..95e5b810 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -72,6 +72,7 @@ include { MIRNA_QUANT           } from '../subworkflows/local/mirna_quant'
 include { GENOME_QUANT          } from '../subworkflows/local/genome_quant'
 include { MIRTRACE              } from '../subworkflows/local/mirtrace'
 include { MIRDEEP2              } from '../subworkflows/local/mirdeep2'
+include { INDEX_GENOME          } from '../../modules/local/bowtie_genome'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -157,9 +158,15 @@ workflow SMRNASEQ {
     if (params.with_umi){
         if (params.fasta){
             fasta_ch = file(params.fasta)
+
+            //This needs to be done here as GENOME_QUANT should not run prior to the deduplication of UMIs.
+            INDEX_GENOME ( fasta )
+
+            ch_versions = ch_versions.mix(INDEX_GENOME.out.versions)
+
             DEDUPLICATE_UMIS (
                 fasta_ch,
-                bt_index,
+                INDEX_GENOME.out.bowtie_indices,
                 FASTQC_UMITOOLS_FASTP.out.trim_reads
             )
             reads_for_mirna = DEDUPLICATE_UMIS.out.reads

From eed08955d5fd4c018d216d772e653df0f209307a Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Fri, 12 Jan 2024 11:11:05 +0000
Subject: [PATCH 069/117] Fixing some lint warnings

---
 modules/local/bowtie_contaminants.nf     | 2 +-
 modules/local/bowtie_genome.nf           | 2 +-
 modules/local/bowtie_map_contaminants.nf | 2 +-
 modules/local/bowtie_mirna.nf            | 2 +-
 modules/local/filter_stats.nf            | 2 +-
 modules/local/seqcluster_collapse.nf     | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/modules/local/bowtie_contaminants.nf b/modules/local/bowtie_contaminants.nf
index e6a594a7..cf02de31 100644
--- a/modules/local/bowtie_contaminants.nf
+++ b/modules/local/bowtie_contaminants.nf
@@ -4,7 +4,7 @@ process INDEX_CONTAMINANTS {
     conda 'bowtie2=2.4.5'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/bowtie2:2.4.5--py39hd2f7db1_2' :
-        'biocontainers/bowtie2:2.4.5--py36hfca12d5_2'}"
+        'biocontainers/bowtie2:2.4.5--py39hd2f7db1_2'}"
 
     input:
     path fasta
diff --git a/modules/local/bowtie_genome.nf b/modules/local/bowtie_genome.nf
index 91a6cd53..4e1ea7ca 100644
--- a/modules/local/bowtie_genome.nf
+++ b/modules/local/bowtie_genome.nf
@@ -4,7 +4,7 @@ process INDEX_GENOME {
 
     conda 'bioconda::bowtie=1.3.1-4'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/bowtie%3A1.3.1--py39hd400a0c_2' :
+        'https://depot.galaxyproject.org/singularity/bowtie:1.3.1--py310h4070885_4' :
         'biocontainers/bowtie:1.3.1--py310h4070885_4' }"
 
     input:
diff --git a/modules/local/bowtie_map_contaminants.nf b/modules/local/bowtie_map_contaminants.nf
index d744b1fd..c9863ab3 100644
--- a/modules/local/bowtie_map_contaminants.nf
+++ b/modules/local/bowtie_map_contaminants.nf
@@ -4,7 +4,7 @@ process BOWTIE_MAP_CONTAMINANTS {
     conda 'bowtie2=2.4.5'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/bowtie2:2.4.5--py39hd2f7db1_2' :
-        'biocontainers/bowtie2:2.4.5--py36hfca12d5_2' }"
+        'biocontainers/bowtie2:2.4.5--py39hd2f7db1_2' }"
 
     input:
     tuple val(meta), path(reads)
diff --git a/modules/local/bowtie_mirna.nf b/modules/local/bowtie_mirna.nf
index 2be45bb8..cb7f0d0f 100644
--- a/modules/local/bowtie_mirna.nf
+++ b/modules/local/bowtie_mirna.nf
@@ -3,7 +3,7 @@ process INDEX_MIRNA {
 
     conda 'bioconda::bowtie=1.3.0-2'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/bowtie%3A1.3.1--py39hd400a0c_2' :
+        'https://depot.galaxyproject.org/singularity/bowtie:1.3.1--py310h4070885_4' :
         'biocontainers/bowtie:1.3.1--py310h4070885_4' }"
 
     input:
diff --git a/modules/local/filter_stats.nf b/modules/local/filter_stats.nf
index 18e7016b..1e5494cd 100644
--- a/modules/local/filter_stats.nf
+++ b/modules/local/filter_stats.nf
@@ -4,7 +4,7 @@ process FILTER_STATS {
     conda 'bowtie2=2.4.5'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/bowtie2:2.4.5--py39hd2f7db1_2' :
-        'biocontainers/bowtie2:2.4.5--py36hfca12d5_2' }"
+        'biocontainers/bowtie2:2.4.5--py39hd2f7db1_2' }"
 
     input:
     tuple val(meta), path(reads)
diff --git a/modules/local/seqcluster_collapse.nf b/modules/local/seqcluster_collapse.nf
index 82470e5a..826f8cef 100644
--- a/modules/local/seqcluster_collapse.nf
+++ b/modules/local/seqcluster_collapse.nf
@@ -5,7 +5,7 @@ process SEQCLUSTER_SEQUENCES {
     conda 'bioconda::seqcluster=1.2.9-0'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/seqcluster:1.2.9--pyh5e36f6f_0' :
-        'biocontainers/seqcluster:1.2.8--pyh5e36f6f_0' }"
+        'biocontainers/seqcluster:1.2.9--pyh5e36f6f_0' }"
 
     input:
     tuple val(meta), path(reads)

From 2cc2a0df4df8462b0ba6c658801afb19988bd898 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Fri, 12 Jan 2024 11:18:23 +0000
Subject: [PATCH 070/117] Adding in versions for filter stats

---
 modules/local/filter_stats.nf            |   8 ++
 subworkflows/local/contaminant_filter.nf |   4 +-
 subworkflows/local/fastqc_fastp.nf       | 116 -----------------------
 3 files changed, 10 insertions(+), 118 deletions(-)
 delete mode 100644 subworkflows/local/fastqc_fastp.nf

diff --git a/modules/local/filter_stats.nf b/modules/local/filter_stats.nf
index 1e5494cd..4c46f51d 100644
--- a/modules/local/filter_stats.nf
+++ b/modules/local/filter_stats.nf
@@ -13,6 +13,7 @@ process FILTER_STATS {
     output:
     path "*_mqc.yaml"                           , emit: stats
     tuple val(meta), path('*.filtered.fastq.gz'), emit: reads
+    path "versions.yml"                         , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
@@ -24,5 +25,12 @@ process FILTER_STATS {
     tr '\n' ', ' | \\
     awk -v sample=${meta.id} -v readnumber=\$readnumber '{ print "id: \\"my_pca_section\\"\\nsection_name: \\"Contamination Filtering\\"\\ndescription: \\"This plot shows the amount of reads filtered by contaminant type.\\"\\nplot_type: \\"bargraph\\"\\npconfig:\\n  id: \\"contamination_filter_plot\\"\\n  title: \\"Contamination Plot\\"\\n  ylab: \\"Number of reads\\"\\ndata:\\n    "sample": {"\$0"\\"remaining reads\\": "readnumber"}" }' > ${meta.id}.contamination_mqc.yaml
     gzip -c ${reads} > ${meta.id}.filtered.fastq.gz
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        cat:  \$(cat --version | grep 'cat ' |sed 's/cat (GNU coreutils) //')
+        gzip: \$(gzip --version | grep "gzip" | sed 's/gzip //')
+        tr:  \$(tr --version | grep 'tr ' |sed 's/tr (GNU coreutils) //')
+    END_VERSIONS
     """
 }
diff --git a/subworkflows/local/contaminant_filter.nf b/subworkflows/local/contaminant_filter.nf
index 383c85ad..dede115d 100644
--- a/subworkflows/local/contaminant_filter.nf
+++ b/subworkflows/local/contaminant_filter.nf
@@ -123,6 +123,6 @@ workflow CONTAMINANT_FILTER {
 
     emit:
     filtered_reads = FILTER_STATS.out.reads
-    versions = ch_versions
+    versions = ch_versions.mix(FILTER_STATS.out.versions)
     filter_stats = FILTER_STATS.out.stats
-}
\ No newline at end of file
+}
diff --git a/subworkflows/local/fastqc_fastp.nf b/subworkflows/local/fastqc_fastp.nf
deleted file mode 100644
index 9e4d952e..00000000
--- a/subworkflows/local/fastqc_fastp.nf
+++ /dev/null
@@ -1,116 +0,0 @@
-//
-// Read QC and trimming
-//
-
-include { FASTQC as FASTQC_RAW  } from '../../modules/nf-core/fastqc/main'
-include { FASTQC as FASTQC_TRIM } from '../../modules/nf-core/fastqc/main'
-include { FASTP                 } from '../../modules/nf-core/fastp/main'
-
-//
-// Function that parses fastp json output file to get total number of reads after trimming
-//
-import groovy.json.JsonSlurper
-
-def getFastpReadsAfterFiltering(json_file) {
-    return new JsonSlurper().parseText(json_file.text)
-    ?.get('summary')
-    ?.get('after_filtering')
-    ?.get('total_reads')
-    ?.toInteger()
-}
-
-String getFastpAdapterSequence(json_file){
-    return new JsonSlurper().parseText(json_file.text)
-    ?.get('adapter_cutting')
-    ?.get('read1_adapter_sequence')
-}
-
-workflow FASTQC_FASTP {
-    take:
-    reads             // channel: [ val(meta), [ reads ] ]
-    adapter_list      // channel: [ path/to/adapters.fa ]
-    save_trimmed_fail //   value: boolean
-    save_merged       //   value: boolean
-
-
-    main:
-
-    ch_versions     = Channel.empty()
-    fastqc_raw_html = Channel.empty()
-    fastqc_raw_zip  = Channel.empty()
-    adapterseq      = reads.map { meta, _ -> [meta, null] }
-    if (!params.skip_fastqc) {
-        FASTQC_RAW (
-            reads
-        )
-        fastqc_raw_html = FASTQC_RAW.out.html
-        fastqc_raw_zip  = FASTQC_RAW.out.zip
-        ch_versions     = ch_versions.mix(FASTQC_RAW.out.versions.first())
-    }
-
-    trim_reads        = reads
-    trim_json         = Channel.empty()
-    trim_html         = Channel.empty()
-    trim_log          = Channel.empty()
-    trim_reads_fail   = Channel.empty()
-    trim_reads_merged = Channel.empty()
-    fastqc_trim_html  = Channel.empty()
-    fastqc_trim_zip   = Channel.empty()
-    if (!params.skip_fastp) {
-        FASTP (
-            reads,
-            adapter_list,
-            save_trimmed_fail,
-            save_merged
-        )
-        trim_reads        = FASTP.out.reads
-        trim_json         = FASTP.out.json
-        trim_html         = FASTP.out.html
-        trim_log          = FASTP.out.log
-        trim_reads_fail   = FASTP.out.reads_fail
-        trim_reads_merged = FASTP.out.reads_merged
-        ch_versions       = ch_versions.mix(FASTP.out.versions.first())
-
-        //
-        // Filter empty FastQ files after adapter trimming so FastQC doesn't fail
-        //
-        trim_reads
-            .join(trim_json)
-            .map {
-                meta, reads, json ->
-                    if (getFastpReadsAfterFiltering(json) > 0) {
-                        [ meta, reads ]
-                    }
-            }
-            .set { trim_reads }
-
-        trim_json
-            .map { meta, json -> [meta, getFastpAdapterSequence(json)] }
-            .set { adapterseq }
-
-        if (!params.skip_fastqc) {
-            FASTQC_TRIM (
-                trim_reads
-            )
-            fastqc_trim_html = FASTQC_TRIM.out.html
-            fastqc_trim_zip  = FASTQC_TRIM.out.zip
-            ch_versions      = ch_versions.mix(FASTQC_TRIM.out.versions.first())
-        }
-    }
-
-    emit:
-    reads = trim_reads // channel: [ val(meta), [ reads ] ]
-    trim_json          // channel: [ val(meta), [ json ] ]
-    trim_html          // channel: [ val(meta), [ html ] ]
-    trim_log           // channel: [ val(meta), [ log ] ]
-    trim_reads_fail    // channel: [ val(meta), [ fastq.gz ] ]
-    trim_reads_merged  // channel: [ val(meta), [ fastq.gz ] ]
-    adapterseq         // channel: [ val(meta), [ adapterseq ] ]
-
-    fastqc_raw_html    // channel: [ val(meta), [ html ] ]
-    fastqc_raw_zip     // channel: [ val(meta), [ zip ] ]
-    fastqc_trim_html   // channel: [ val(meta), [ html ] ]
-    fastqc_trim_zip    // channel: [ val(meta), [ zip ] ]
-
-    versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ]
-}

From bf6e8bde711df521183e67a7ed3ef6364185bacb Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Fri, 12 Jan 2024 11:21:49 +0000
Subject: [PATCH 071/117] Fixing more conda imports

---
 modules/local/bowtie_genome.nf | 6 +++---
 modules/local/bowtie_mirna.nf  | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/modules/local/bowtie_genome.nf b/modules/local/bowtie_genome.nf
index 4e1ea7ca..5ccb0afa 100644
--- a/modules/local/bowtie_genome.nf
+++ b/modules/local/bowtie_genome.nf
@@ -2,10 +2,10 @@ process INDEX_GENOME {
     tag "$fasta"
     label 'process_medium'
 
-    conda 'bioconda::bowtie=1.3.1-4'
+    conda 'bioconda::bowtie=1.3.1-6'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/bowtie:1.3.1--py310h4070885_4' :
-        'biocontainers/bowtie:1.3.1--py310h4070885_4' }"
+        'https://depot.galaxyproject.org/singularity/bowtie:1.3.1-py310h30d4ff4_6' :
+        'biocontainers/bowtie:1.3.1-py310h30d4ff4_6' }"
 
     input:
     tuple val(meta2), path(fasta)
diff --git a/modules/local/bowtie_mirna.nf b/modules/local/bowtie_mirna.nf
index cb7f0d0f..08521095 100644
--- a/modules/local/bowtie_mirna.nf
+++ b/modules/local/bowtie_mirna.nf
@@ -1,10 +1,10 @@
 process INDEX_MIRNA {
     label 'process_medium'
 
-    conda 'bioconda::bowtie=1.3.0-2'
+    conda 'bioconda::bowtie=1.3.1-6'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/bowtie:1.3.1--py310h4070885_4' :
-        'biocontainers/bowtie:1.3.1--py310h4070885_4' }"
+        'https://depot.galaxyproject.org/singularity/bowtie:1.3.1-py310h30d4ff4_6' :
+        'biocontainers/bowtie:1.3.1-py310h30d4ff4_6' }"
 
     input:
     tuple val(meta2), path(fasta)

From 454504d1b415bb2b88dd5d7bda25d5599d3e0145 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Fri, 12 Jan 2024 11:28:40 +0000
Subject: [PATCH 072/117] More updates of modules

---
 modules/local/bowtie_genome.nf       | 4 ++--
 modules/local/bowtie_mirna.nf        | 4 ++--
 modules/local/format_fasta_mirna.nf  | 6 +++---
 modules/local/mirdeep2_mapper.nf     | 2 +-
 modules/local/mirdeep2_run.nf        | 2 +-
 modules/local/parse_fasta_mirna.nf   | 6 +++---
 modules/local/seqcluster_collapse.nf | 2 +-
 7 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/modules/local/bowtie_genome.nf b/modules/local/bowtie_genome.nf
index 5ccb0afa..60f33f1a 100644
--- a/modules/local/bowtie_genome.nf
+++ b/modules/local/bowtie_genome.nf
@@ -2,9 +2,9 @@ process INDEX_GENOME {
     tag "$fasta"
     label 'process_medium'
 
-    conda 'bioconda::bowtie=1.3.1-6'
+    conda 'bioconda::bowtie=1.3.1'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/bowtie:1.3.1-py310h30d4ff4_6' :
+        'https://depot.galaxyproject.org/singularity/bowtie:1.3.1--py310h7b97f60_6' :
         'biocontainers/bowtie:1.3.1-py310h30d4ff4_6' }"
 
     input:
diff --git a/modules/local/bowtie_mirna.nf b/modules/local/bowtie_mirna.nf
index 08521095..2fbf1588 100644
--- a/modules/local/bowtie_mirna.nf
+++ b/modules/local/bowtie_mirna.nf
@@ -1,9 +1,9 @@
 process INDEX_MIRNA {
     label 'process_medium'
 
-    conda 'bioconda::bowtie=1.3.1-6'
+    conda 'bioconda::bowtie=1.3.1'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/bowtie:1.3.1-py310h30d4ff4_6' :
+        'https://depot.galaxyproject.org/singularity/bowtie:1.3.1--py310h7b97f60_6' :
         'biocontainers/bowtie:1.3.1-py310h30d4ff4_6' }"
 
     input:
diff --git a/modules/local/format_fasta_mirna.nf b/modules/local/format_fasta_mirna.nf
index 489879a5..67461d64 100644
--- a/modules/local/format_fasta_mirna.nf
+++ b/modules/local/format_fasta_mirna.nf
@@ -4,10 +4,10 @@ process FORMAT_FASTA_MIRNA {
     tag "$fasta"
     label 'process_medium'
 
-    conda 'bioconda::fastx_toolkit=0.0.14-9'
+    conda 'bioconda::fastx_toolkit=0.0.14'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/fastx_toolkit:0.0.14--he1b5a44_8' :
-        'biocontainers/fastx_toolkit:0.0.14--he1b5a44_8' }"
+        'https://depot.galaxyproject.org/singularity/fastx_toolkit:0.0.14--hdbdd923_11' :
+        'biocontainers/fastx_toolkit:0.0.14--hdbdd923_11' }"
 
     input:
     tuple val(meta2), path(fasta)
diff --git a/modules/local/mirdeep2_mapper.nf b/modules/local/mirdeep2_mapper.nf
index 842af6e6..19a9c5dc 100644
--- a/modules/local/mirdeep2_mapper.nf
+++ b/modules/local/mirdeep2_mapper.nf
@@ -4,7 +4,7 @@ process MIRDEEP2_MAPPER {
     label 'process_medium'
     tag "$meta.id"
 
-    conda 'bioconda::mirdeep2=2.0.1'
+    conda 'bioconda::mirdeep2=2.0.1.3'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/mirdeep2:2.0.1.3--hdfd78af_1' :
         'biocontainers/mirdeep2:2.0.1.3--hdfd78af_1' }"
diff --git a/modules/local/mirdeep2_run.nf b/modules/local/mirdeep2_run.nf
index 9b18ed7d..442f26f3 100644
--- a/modules/local/mirdeep2_run.nf
+++ b/modules/local/mirdeep2_run.nf
@@ -4,7 +4,7 @@ process MIRDEEP2_RUN {
     label 'process_medium'
     errorStrategy 'ignore'
 
-    conda 'bioconda::mirdeep2=2.0.1'
+    conda 'bioconda::mirdeep2=2.0.1.3'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/mirdeep2:2.0.1.3--hdfd78af_1' :
         'biocontainers/mirdeep2:2.0.1.3--hdfd78af_1' }"
diff --git a/modules/local/parse_fasta_mirna.nf b/modules/local/parse_fasta_mirna.nf
index ad63401e..60665251 100644
--- a/modules/local/parse_fasta_mirna.nf
+++ b/modules/local/parse_fasta_mirna.nf
@@ -1,10 +1,10 @@
 process PARSE_FASTA_MIRNA {
     label 'process_medium'
 
-    conda 'bioconda::seqkit=2.3.1'
+    conda 'bioconda::seqkit=2.6.1'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/seqkit:2.3.1--h9ee0642_0' :
-        'biocontainers/seqkit:2.3.1--h9ee0642_0' }"
+        'https://depot.galaxyproject.org/singularity/seqkit:2.6.1--h9ee0642_0' :
+        'biocontainers/seqkit:2.6.1--h9ee0642_0' }"
 
     input:
     tuple val(meta2), path(fasta)
diff --git a/modules/local/seqcluster_collapse.nf b/modules/local/seqcluster_collapse.nf
index 826f8cef..4379654c 100644
--- a/modules/local/seqcluster_collapse.nf
+++ b/modules/local/seqcluster_collapse.nf
@@ -2,7 +2,7 @@ process SEQCLUSTER_SEQUENCES {
     label 'process_medium'
     tag "$meta.id"
 
-    conda 'bioconda::seqcluster=1.2.9-0'
+    conda 'bioconda::seqcluster=1.2.9'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/seqcluster:1.2.9--pyh5e36f6f_0' :
         'biocontainers/seqcluster:1.2.9--pyh5e36f6f_0' }"

From 7eefccaa5d22d14ab9f9b4a5b6bd58a0d7300c8a Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Fri, 12 Jan 2024 12:03:37 +0000
Subject: [PATCH 073/117] Fix

---
 workflows/smrnaseq.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index 95e5b810..51a4a2cd 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -72,7 +72,7 @@ include { MIRNA_QUANT           } from '../subworkflows/local/mirna_quant'
 include { GENOME_QUANT          } from '../subworkflows/local/genome_quant'
 include { MIRTRACE              } from '../subworkflows/local/mirtrace'
 include { MIRDEEP2              } from '../subworkflows/local/mirdeep2'
-include { INDEX_GENOME          } from '../../modules/local/bowtie_genome'
+include { INDEX_GENOME          } from '../modules/local/bowtie_genome'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

From f118456bbfaa4e92a02cfc44f5bd109ca5e932b2 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Fri, 12 Jan 2024 12:07:00 +0000
Subject: [PATCH 074/117] Fix

---
 modules/local/bowtie_genome.nf | 2 +-
 modules/local/bowtie_mirna.nf  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/local/bowtie_genome.nf b/modules/local/bowtie_genome.nf
index 60f33f1a..17ea9253 100644
--- a/modules/local/bowtie_genome.nf
+++ b/modules/local/bowtie_genome.nf
@@ -5,7 +5,7 @@ process INDEX_GENOME {
     conda 'bioconda::bowtie=1.3.1'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/bowtie:1.3.1--py310h7b97f60_6' :
-        'biocontainers/bowtie:1.3.1-py310h30d4ff4_6' }"
+        'biocontainers/bowtie:1.3.1--py310h7b97f60_6' }"
 
     input:
     tuple val(meta2), path(fasta)
diff --git a/modules/local/bowtie_mirna.nf b/modules/local/bowtie_mirna.nf
index 2fbf1588..06756bef 100644
--- a/modules/local/bowtie_mirna.nf
+++ b/modules/local/bowtie_mirna.nf
@@ -4,7 +4,7 @@ process INDEX_MIRNA {
     conda 'bioconda::bowtie=1.3.1'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/bowtie:1.3.1--py310h7b97f60_6' :
-        'biocontainers/bowtie:1.3.1-py310h30d4ff4_6' }"
+        'biocontainers/bowtie:1.3.1--py310h30d4ff4_6' }"
 
     input:
     tuple val(meta2), path(fasta)

From cbbc6d8a307e4dea4c7c62f477250dce9467bbbb Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Fri, 12 Jan 2024 12:16:56 +0000
Subject: [PATCH 075/117] Another fix

---
 modules/local/bowtie_mirna.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/bowtie_mirna.nf b/modules/local/bowtie_mirna.nf
index 06756bef..733d816e 100644
--- a/modules/local/bowtie_mirna.nf
+++ b/modules/local/bowtie_mirna.nf
@@ -4,7 +4,7 @@ process INDEX_MIRNA {
     conda 'bioconda::bowtie=1.3.1'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/bowtie:1.3.1--py310h7b97f60_6' :
-        'biocontainers/bowtie:1.3.1--py310h30d4ff4_6' }"
+        'biocontainers/bowtie:1.3.1--py310h7b97f60_6' }"
 
     input:
     tuple val(meta2), path(fasta)

From ea6d05eebd83cef1220f774b982f5e8a7236838f Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Fri, 12 Jan 2024 12:39:04 +0000
Subject: [PATCH 076/117] Yeah fasta

---
 workflows/smrnaseq.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index 51a4a2cd..b3de8acc 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -160,7 +160,7 @@ workflow SMRNASEQ {
             fasta_ch = file(params.fasta)
 
             //This needs to be done here as GENOME_QUANT should not run prior to the deduplication of UMIs.
-            INDEX_GENOME ( fasta )
+            INDEX_GENOME ( fasta_ch )
 
             ch_versions = ch_versions.mix(INDEX_GENOME.out.versions)
 

From 6df081f83eadc4e48fe062ed6a802d683b6aa8ab Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Fri, 12 Jan 2024 12:47:28 +0000
Subject: [PATCH 077/117] Another one in the umi workflow

---
 workflows/smrnaseq.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index b3de8acc..a9f34706 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -167,7 +167,7 @@ workflow SMRNASEQ {
             DEDUPLICATE_UMIS (
                 fasta_ch,
                 INDEX_GENOME.out.bowtie_indices,
-                FASTQC_UMITOOLS_FASTP.out.trim_reads
+                FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_reads
             )
             reads_for_mirna = DEDUPLICATE_UMIS.out.reads
             ch_versions = ch_versions.mix(DEDUPLICATE_UMIS.out.versions)

From 43d3aba11c6489baa9df94e1b0017338ea30ce6f Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Fri, 12 Jan 2024 20:39:33 +0000
Subject: [PATCH 078/117] Update edger_qc

---
 modules/local/edger_qc.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/local/edger_qc.nf b/modules/local/edger_qc.nf
index 5560de23..8c311457 100644
--- a/modules/local/edger_qc.nf
+++ b/modules/local/edger_qc.nf
@@ -3,8 +3,8 @@ process EDGER_QC {
 
     conda 'bioconda::bioconductor-limma=3.58.1 bioconda::bioconductor-edger=4.0.2 conda-forge::r-data.table=1.14.10 conda-forge::r-gplots=3.1.3 conda-forge::r-statmod=1.5.0'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/mulled-v2-419bd7f10b2b902489ac63bbaafc7db76f8e0ae1:709335c37934db1b481054cbec637c6e5b5971cb-0' :
-        'biocontainers/mulled-v2-419bd7f10b2b902489ac63bbaafc7db76f8e0ae1:709335c37934db1b481054cbec637c6e5b5971cb-0' }"
+        'https://depot.galaxyproject.org/singularity/mulled-v2-419bd7f10b2b902489ac63bbaafc7db76f8e0ae1:f5ff7de321749bc7ae12f7e79a4b581497f4c8ce-0' :
+        'biocontainers/mulled-v2-419bd7f10b2b902489ac63bbaafc7db76f8e0ae1:f5ff7de321749bc7ae12f7e79a4b581497f4c8ce-0' }"
 
     input:
     path input_files

From 2059ed4cf72950cfb1e30000412b0e9aaf19163f Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 17 Jan 2024 08:08:29 +0000
Subject: [PATCH 079/117] Shorten test.config

---
 conf/test.config | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/conf/test.config b/conf/test.config
index 450ef11d..e699776b 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -23,9 +23,7 @@ params {
 
     input            = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/samplesheet/v2.0/samplesheet.csv'
     fasta            = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genome.fa'
-    mature           = 'https://mirbase.org/download/mature.fa'
-    hairpin          = 'https://mirbase.org/download/hairpin.fa'
-    mirna_gtf        = 'https://mirbase.org/download/hsa.gff3'
+
     mirtrace_species = 'hsa'
     protocol         = 'illumina'
     skip_mirdeep     = true

From 945c3182d27150a0dcabd63b581d5573ac4fa595 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 17 Jan 2024 08:10:00 +0000
Subject: [PATCH 080/117] Adding in UMI tests

---
 .github/workflows/ci.yml |  1 +
 conf/test_umi.config     | 31 +++++++++++++++++++++++++++++++
 nextflow.config          |  1 +
 3 files changed, 33 insertions(+)
 create mode 100644 conf/test_umi.config

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 885ae9b6..51c3ee37 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -29,6 +29,7 @@ jobs:
         profile:
           - "test"
           - "test_no_genome"
+          - "test_umi"
     steps:
       - name: Check out pipeline code
         uses: actions/checkout@v4
diff --git a/conf/test_umi.config b/conf/test_umi.config
new file mode 100644
index 00000000..a50ddb7a
--- /dev/null
+++ b/conf/test_umi.config
@@ -0,0 +1,31 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/smrnaseq -profile test,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    config_profile_name        = 'Test profile'
+    config_profile_description = 'Minimal test dataset to check pipeline function'
+
+    // Limit resources so that this can run on GitHub Actions
+    max_cpus   = 2
+    max_memory = '6.GB'
+    max_time   = '6.h'
+
+    // Input data
+
+    input            = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/samplesheet/v2.0/samplesheet_umi.csv'
+    fasta            = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genome.fa'
+
+    mirtrace_species = 'hsa'
+    protocol         = 'illumina'
+    skip_mirdeep     = true
+
+}
diff --git a/nextflow.config b/nextflow.config
index 4143e3a0..b32d5ecf 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -230,6 +230,7 @@ profiles {
 
     }
     test           { includeConfig 'conf/test.config' }
+    test_umi       { includeConfig 'conf/test_umi.config' }
     test_no_genome { includeConfig 'conf/test_no_genome.config' }
     test_full      { includeConfig 'conf/test_full.config' }
 }

From e43e4cfcdf103c49588276c031d91a15bc9b132e Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 17 Jan 2024 08:19:40 +0000
Subject: [PATCH 081/117] Addressing some reviews

---
 CHANGELOG.md                    |  1 +
 conf/modules.config             |  6 ++---
 subworkflows/local/umi_dedup.nf | 39 ++++++++++++++++-----------------
 3 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7ebcea04..611c5338 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [[#299]](https://github.com/nf-core/smrnaseq/issues/299) - Bugfix for missing inputs in BAM stats (`genome_quant.r`)
 - [[#164]](https://github.com/nf-core/smrnaseq/pull/164) - UMI Handling Feature implemented in the pipeline
 - [[#302]](https://github.com/nf-core/smrnaseq/pull/302) - Merged in nf-core template v2.11.1
+- [[#294]](https://github.com/nf-core/smrnaseq/pull/294) - Fixed contamination screening issues
 
 ### Parameters
 
diff --git a/conf/modules.config b/conf/modules.config
index b54a3534..769f62f8 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -131,7 +131,7 @@ if (!params.skip_fastp) {
 
     if (!params.skip_fastqc) {
         process {
-            withName: '.*:.*:FASTQC_FASTP:FASTQC_RAW' {
+            withName: '.*:.*:FASTQC_UMITOOLS_FASTP:FASTQC_RAW' {
                 ext.args = '--quiet'
                 publishDir = [
                     path: { "${params.outdir}/fastqc/raw" },
@@ -139,7 +139,7 @@ if (!params.skip_fastp) {
                     saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
                 ]
             }
-            withName: '.*:.*:FASTQC_FASTP:FASTQC_TRIM' {
+            withName: '.*:.*:FASTQC_UMITOOLS_FASTP:FASTQC_TRIM' {
                 ext.args = '--quiet'
                 publishDir = [
                     path: { "${params.outdir}/fastqc/trim" },
@@ -153,7 +153,7 @@ if (!params.skip_fastp) {
 
 if (params.with_umi && !params.skip_umi_extract) {
     process {
-        withName: '.*:FASTQC_UMITOOLS_TRIMGALORE:UMITOOLS_EXTRACT' {
+        withName: '.*:FASTQC_UMITOOLS_FASTP:UMITOOLS_EXTRACT' {
             ext.args   = [
                     params.umitools_extract_method ? "--extract-method=${params.umitools_extract_method}" : '',
                     params.umitools_bc_pattern     ? "--bc-pattern='${params.umitools_bc_pattern}'" : '',
diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf
index 5a5a393f..db9065ea 100644
--- a/subworkflows/local/umi_dedup.nf
+++ b/subworkflows/local/umi_dedup.nf
@@ -31,36 +31,35 @@ workflow DEDUPLICATE_UMIS {
         fasta_formatted = fasta
     }
 
-    if (bt_index){
 
-        UMI_MAP_GENOME ( reads, bt_index.collect() )
-        ch_versions = ch_versions.mix(UMI_MAP_GENOME.out.versions)
+    UMI_MAP_GENOME ( reads, bt_index.collect() )
+    ch_versions = ch_versions.mix(UMI_MAP_GENOME.out.versions)
 
-        BAM_SORT_SAMTOOLS ( UMI_MAP_GENOME.out.bam, Channel.empty() )
-        ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions)
+    BAM_SORT_SAMTOOLS ( UMI_MAP_GENOME.out.bam, Channel.empty() )
+    ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions)
 
-        ch_umi_dedup = BAM_SORT_SAMTOOLS.out.bam.join(BAM_SORT_SAMTOOLS.out.bai)
-        UMITOOLS_DEDUP ( ch_umi_dedup )
-        ch_versions = ch_versions.mix(UMITOOLS_DEDUP.out.versions)
-        ch_dedup_stats = ch_dedup_stats.mix(UMITOOLS_DEDUP.out.tsv_edit_distance).join(UMITOOLS_DEDUP.out.tsv_per_umi).join(UMITOOLS_DEDUP.out.tsv_umi_per_position)
+    ch_umi_dedup = BAM_SORT_SAMTOOLS.out.bam.join(BAM_SORT_SAMTOOLS.out.bai)
+    UMITOOLS_DEDUP ( ch_umi_dedup )
+    ch_versions = ch_versions.mix(UMITOOLS_DEDUP.out.versions)
+    ch_dedup_stats = ch_dedup_stats.mix(UMITOOLS_DEDUP.out.tsv_edit_distance).join(UMITOOLS_DEDUP.out.tsv_per_umi).join(UMITOOLS_DEDUP.out.tsv_umi_per_position)
 
-        SAMTOOLS_BAM2FQ ( UMITOOLS_DEDUP.out.bam, false )
-        ch_versions = ch_versions.mix(SAMTOOLS_BAM2FQ.out.versions)
+    SAMTOOLS_BAM2FQ ( UMITOOLS_DEDUP.out.bam, false )
+    ch_versions = ch_versions.mix(SAMTOOLS_BAM2FQ.out.versions)
 
-        ch_dedup_reads = SAMTOOLS_BAM2FQ.out.reads
+    ch_dedup_reads = SAMTOOLS_BAM2FQ.out.reads
 
-        if ( params.umi_merge_unmapped ) {
+    if ( params.umi_merge_unmapped ) {
 
-            SAMTOOLS_BAM2FQ.out.reads
-                .join(UMI_MAP_GENOME.out.unmapped)
-                .map { meta, file1, file2 -> [meta, [file1, file2]]}
-                .set { ch_cat }
+        SAMTOOLS_BAM2FQ.out.reads
+            .join(UMI_MAP_GENOME.out.unmapped)
+            .map { meta, file1, file2 -> [meta, [file1, file2]]}
+            .set { ch_cat }
 
-            CAT_CAT ( ch_cat )
-            ch_dedup_reads = CAT_CAT.out.file_out
-        }
+        CAT_CAT ( ch_cat )
+        ch_dedup_reads = CAT_CAT.out.file_out
     }
 
+
     emit:
     reads    = ch_dedup_reads
     indices  = bt_index

From 3b21fe1a504dc7229d2f49d33bc7d570351bf617 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 17 Jan 2024 08:39:29 +0000
Subject: [PATCH 082/117] Add UMI regex

---
 conf/test_umi.config | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/conf/test_umi.config b/conf/test_umi.config
index a50ddb7a..a7a59e75 100644
--- a/conf/test_umi.config
+++ b/conf/test_umi.config
@@ -28,4 +28,10 @@ params {
     protocol         = 'illumina'
     skip_mirdeep     = true
 
+    //UMI Specific testcase
+    with_umi = true
+    umitools_extract_method = 'regex'
+    umitools_bc_pattern '.+AACTGTAGGCACCATCAAT{s<=2}(?P<umi_1>.{12})(?P<discard_2>.*)'
+    save_umi_intermeds = true
+
 }

From 8e21df8efa67c7e103fc34d5239c6e9a1a716892 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 17 Jan 2024 08:57:18 +0000
Subject: [PATCH 083/117] Some more adjustments to make nf-core modules work

---
 conf/test_umi.config            | 2 +-
 nextflow.config                 | 1 +
 nextflow_schema.json            | 5 +++++
 subworkflows/local/umi_dedup.nf | 9 +++++----
 workflows/smrnaseq.nf           | 3 ++-
 5 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/conf/test_umi.config b/conf/test_umi.config
index a7a59e75..53a58f41 100644
--- a/conf/test_umi.config
+++ b/conf/test_umi.config
@@ -31,7 +31,7 @@ params {
     //UMI Specific testcase
     with_umi = true
     umitools_extract_method = 'regex'
-    umitools_bc_pattern '.+AACTGTAGGCACCATCAAT{s<=2}(?P<umi_1>.{12})(?P<discard_2>.*)'
+    umitools_bc_pattern = '.+AACTGTAGGCACCATCAAT{s<=2}(?P<umi_1>.{12})(?P<discard_2>.*)'
     save_umi_intermeds = true
 
 }
diff --git a/nextflow.config b/nextflow.config
index b32d5ecf..0a6d0945 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -38,6 +38,7 @@ params {
     umi_discard_read           = null
     save_umi_intermeds         = false
     umi_merge_unmapped         = true
+    umi_stats                  = true
 
     // Trimming options
     clip_r1                     = null
diff --git a/nextflow_schema.json b/nextflow_schema.json
index a8eebb26..cd8fa7a5 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -94,6 +94,11 @@
                     "fa_icon": "fas fa-save",
                     "description": "Unless deactivated the deduplicated reads are merged with the reads that could not be placed to reduce the potential reference bias",
                     "default": true
+                },
+                "umi_stats": {
+                    "type": "boolean",
+                    "default": true,
+                    "description": "Compute UMI statistics for MultiQC"
                 }
             },
             "fa_icon": "fas fa-barcode"
diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf
index db9065ea..1c88cde0 100644
--- a/subworkflows/local/umi_dedup.nf
+++ b/subworkflows/local/umi_dedup.nf
@@ -15,6 +15,7 @@ workflow DEDUPLICATE_UMIS {
     fasta
     bt_index
     reads      // channel: [ val(meta), [ reads ] ]
+    val_get_dedup_stats //boolean true/false
 
     main:
 
@@ -35,11 +36,11 @@ workflow DEDUPLICATE_UMIS {
     UMI_MAP_GENOME ( reads, bt_index.collect() )
     ch_versions = ch_versions.mix(UMI_MAP_GENOME.out.versions)
 
-    BAM_SORT_SAMTOOLS ( UMI_MAP_GENOME.out.bam, Channel.empty() )
-    ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions)
+    BAM_SORT_STATS_SAMTOOLS ( UMI_MAP_GENOME.out.bam, Channel.empty() )
+    ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions)
 
-    ch_umi_dedup = BAM_SORT_SAMTOOLS.out.bam.join(BAM_SORT_SAMTOOLS.out.bai)
-    UMITOOLS_DEDUP ( ch_umi_dedup )
+    //ch_umi_dedup = BAM_SORT_STATS_SAMTOOLS.out.bam.join(BAM_SORT_STATS_SAMTOOLS.out.bai)
+    UMITOOLS_DEDUP ( BAM_SORT_STATS_SAMTOOLS.out.bam, val_get_dedup_stats)
     ch_versions = ch_versions.mix(UMITOOLS_DEDUP.out.versions)
     ch_dedup_stats = ch_dedup_stats.mix(UMITOOLS_DEDUP.out.tsv_edit_distance).join(UMITOOLS_DEDUP.out.tsv_per_umi).join(UMITOOLS_DEDUP.out.tsv_umi_per_position)
 
diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index a9f34706..cb46b44d 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -167,7 +167,8 @@ workflow SMRNASEQ {
             DEDUPLICATE_UMIS (
                 fasta_ch,
                 INDEX_GENOME.out.bowtie_indices,
-                FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_reads
+                FASTQ_FASTQC_UMITOOLS_FASTP.out.reads,
+                params.umi_stats
             )
             reads_for_mirna = DEDUPLICATE_UMIS.out.reads
             ch_versions = ch_versions.mix(DEDUPLICATE_UMIS.out.versions)

From fc0119558de7a6a73f85241000fda4fa368a828b Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 17 Jan 2024 09:00:44 +0000
Subject: [PATCH 084/117] Fix meta2

---
 subworkflows/local/umi_dedup.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf
index 1c88cde0..3d12fd99 100644
--- a/subworkflows/local/umi_dedup.nf
+++ b/subworkflows/local/umi_dedup.nf
@@ -23,7 +23,7 @@ workflow DEDUPLICATE_UMIS {
     ch_dedup_stats = Channel.empty()
 
     if (!bt_index){
-        INDEX_GENOME ( fasta )
+        INDEX_GENOME ( [ [:], fasta ] )
         bt_index      = INDEX_GENOME.out.bowtie_indices
         fasta_formatted = INDEX_GENOME.out.fasta
         ch_versions     = ch_versions.mix(INDEX_GENOME.out.versions)

From 14109235504f62bd912373d785af21027c96a4ca Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 17 Jan 2024 09:05:31 +0000
Subject: [PATCH 085/117] Passing on meta2

---
 workflows/smrnaseq.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index cb46b44d..ad291593 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -160,7 +160,7 @@ workflow SMRNASEQ {
             fasta_ch = file(params.fasta)
 
             //This needs to be done here as GENOME_QUANT should not run prior to the deduplication of UMIs.
-            INDEX_GENOME ( fasta_ch )
+            INDEX_GENOME ( [ [:], fasta ] )
 
             ch_versions = ch_versions.mix(INDEX_GENOME.out.versions)
 

From 12be1865b66035915f7b1086a723817b60bf5d2b Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 17 Jan 2024 09:05:49 +0000
Subject: [PATCH 086/117] use the channels, luke

---
 workflows/smrnaseq.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index ad291593..872de5f3 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -160,7 +160,7 @@ workflow SMRNASEQ {
             fasta_ch = file(params.fasta)
 
             //This needs to be done here as GENOME_QUANT should not run prior to the deduplication of UMIs.
-            INDEX_GENOME ( [ [:], fasta ] )
+            INDEX_GENOME ( [ [:], fasta_ch ] )
 
             ch_versions = ch_versions.mix(INDEX_GENOME.out.versions)
 

From 96adf135bedb6ed20394537b9915ca38cb89af01 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 17 Jan 2024 20:42:21 +0000
Subject: [PATCH 087/117] Add better changelog with deps

---
 CHANGELOG.md | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 611c5338..fc286f02 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,6 +21,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 |               | `--save_umi_intermeds`      |
 |               | `--umi_merge_unmapped`      |
 
+### Software dependencies
+
+| Dependency   | Old version | New version |
+| ------------ | ----------- | ----------- |
+| `multiqc`    | 1.15        | 1.19        |
+| `edgeR`      | 3.36.0      | 4.0.2       |
+| `limma`      | 3.50.0      | 3.58.1      |
+| `bioconvert` | 0.4.3       | 1.1.1       |
+| `mirdeep`    | 2.0.1       | 2.0.1.3     |
+| `seqkit`     | 2.3.1       | 2.6.1       |
+| `fastqc`     | 0.11.4      | 0.12.1      |
+| `samtools`   | 1.17        | 1.18        |
+| `umitools`   | <none>      | 1.1.4       |
+
 ## [v2.2.4](https://github.com/nf-core/smrnaseq/releases/tag/2.2.4) - 2023-11-03
 
 - Update template to 2.10
@@ -119,7 +133,6 @@ Note, since the pipeline is now using Nextflow DSL2, each process will be run wi
 | `seqkit`             | 0.16.0      | 2.0.0       |
 | `trim-galore`        | 0.6.6       | 0.6.7       |
 | `bioconvert`         | -           | 0.4.3       |
-| `umi_tools`          | -           | 1.1.2       |
 | `htseq`              | -           | -           |
 | `markdown`           | -           | -           |
 | `pymdown-extensions` | -           | -           |

From 5c7227a30de536d24228ff3b516213f852a05065 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 17 Jan 2024 20:42:44 +0000
Subject: [PATCH 088/117] Add updated modules

---
 modules.json                                  |   8 +-
 modules/nf-core/cat/cat/main.nf               |  11 +-
 modules/nf-core/cat/cat/tests/main.nf.test    |   6 +-
 .../nf-core/cat/cat/tests/main.nf.test.snap   |  92 +++++----
 modules/nf-core/cat/fastq/tests/main.nf.test  |  63 +++---
 .../nf-core/cat/fastq/tests/main.nf.test.snap | 185 +++++++++++++-----
 modules/nf-core/fastp/tests/main.nf.test      |  95 +++++----
 modules/nf-core/fastp/tests/main.nf.test.snap |  16 +-
 8 files changed, 295 insertions(+), 181 deletions(-)

diff --git a/modules.json b/modules.json
index 40eda6cc..f11503fc 100644
--- a/modules.json
+++ b/modules.json
@@ -7,12 +7,12 @@
                 "nf-core": {
                     "cat/cat": {
                         "branch": "master",
-                        "git_sha": "d593e8f6b7d1bbbb2acf43a4b9efeeac8d6720f2",
+                        "git_sha": "81f27e75847087865299cc46605deb3b09b4e0a2",
                         "installed_by": ["modules"]
                     },
                     "cat/fastq": {
                         "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "git_sha": "02fd5bd7275abad27aad32d5c852e0a9b1b98882",
                         "installed_by": ["modules"]
                     },
                     "custom/dumpsoftwareversions": {
@@ -22,13 +22,13 @@
                     },
                     "fastp": {
                         "branch": "master",
-                        "git_sha": "d086322563bdbb08c94bf15a7db58a39ccdb1520",
+                        "git_sha": "1799e452de650f6fb8890d25829bca23014b0728",
                         "installed_by": ["fastq_fastqc_umitools_fastp", "modules"]
                     },
                     "fastqc": {
                         "branch": "master",
                         "git_sha": "617777a807a1770f73deb38c80004bac06807eef",
-                        "installed_by": ["fastq_fastqc_umitools_fastp", "modules"]
+                        "installed_by": ["fastq_fastqc_umitools_fastp"]
                     },
                     "multiqc": {
                         "branch": "master",
diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf
index 970ab760..adbdbd7b 100644
--- a/modules/nf-core/cat/cat/main.nf
+++ b/modules/nf-core/cat/cat/main.nf
@@ -22,6 +22,8 @@ process CAT_CAT {
     def args2 = task.ext.args2 ?: ''
     def file_list = files_in.collect { it.toString() }
 
+    // choose appropriate concatenation tool depending on input and output format
+
     // | input     | output     | command1 | command2 |
     // |-----------|------------|----------|----------|
     // | gzipped   | gzipped    | cat      |          |
@@ -30,7 +32,7 @@ process CAT_CAT {
     // | ungzipped | gzipped    | cat      | pigz     |
 
     // Use input file ending as default
-    prefix   = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}"
+    prefix   = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}"
     out_zip  = prefix.endsWith('.gz')
     in_zip   = file_list[0].endsWith('.gz')
     command1 = (in_zip && !out_zip) ? 'zcat' : 'cat'
@@ -68,3 +70,10 @@ process CAT_CAT {
     END_VERSIONS
     """
 }
+
+// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz
+def getFileSuffix(filename) {
+    def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/
+    return match ? match[0][1] : filename.substring(filename.lastIndexOf('.'))
+}
+
diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test
index ed5a4f12..aaae04f9 100644
--- a/modules/nf-core/cat/cat/tests/main.nf.test
+++ b/modules/nf-core/cat/cat/tests/main.nf.test
@@ -83,8 +83,7 @@ nextflow_process {
             def lines = path(process.out.file_out.get(0).get(1)).linesGzip
             assertAll(
                 { assert process.success },
-                { assert snapshot(lines[0..5]).match("test_cat_zipped_zipped_lines") },
-                { assert snapshot(lines.size()).match("test_cat_zipped_zipped_size")}
+                { assert snapshot(process.out).match() }
             )
         }
     }
@@ -142,8 +141,7 @@ nextflow_process {
             def lines = path(process.out.file_out.get(0).get(1)).linesGzip
             assertAll(
                 { assert process.success },
-                { assert snapshot(lines[0..5]).match("test_cat_unzipped_zipped_lines") },
-                { assert snapshot(lines.size()).match("test_cat_unzipped_zipped_size")}
+                { assert snapshot(process.out).match() }
             )
         }
     }
diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap
index 423571ba..0c9bfe8d 100644
--- a/modules/nf-core/cat/cat/tests/main.nf.test.snap
+++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap
@@ -1,10 +1,4 @@
 {
-    "test_cat_unzipped_zipped_size": {
-        "content": [
-            375
-        ],
-        "timestamp": "2023-10-16T14:33:08.049445686"
-    },
     "test_cat_unzipped_unzipped": {
         "content": [
             {
@@ -67,31 +61,36 @@
         ],
         "timestamp": "2023-10-16T14:32:49.642741302"
     },
-    "test_cat_zipped_zipped_lines": {
-        "content": [
-            [
-                "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab",
-                "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1",
-                "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1",
-                "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1",
-                "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1",
-                "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1"
-            ]
-        ],
-        "timestamp": "2023-10-16T14:32:33.629048645"
-    },
-    "test_cat_unzipped_zipped_lines": {
+    "test_cat_zipped_zipped": {
         "content": [
-            [
-                ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome",
-                "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT",
-                "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG",
-                "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG",
-                "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT",
-                "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG"
-            ]
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.gff3.gz:md5,c439d3b60e7bc03e8802a451a0d9a5d9"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+                ],
+                "file_out": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.gff3.gz:md5,c439d3b60e7bc03e8802a451a0d9a5d9"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+                ]
+            }
         ],
-        "timestamp": "2023-10-16T14:33:08.038830506"
+        "timestamp": "2024-01-12T14:02:02.999254641"
     },
     "test_cat_one_file_unzipped_zipped_lines": {
         "content": [
@@ -106,16 +105,41 @@
         ],
         "timestamp": "2023-10-16T14:33:21.39642399"
     },
-    "test_cat_zipped_zipped_size": {
+    "test_cat_unzipped_zipped": {
         "content": [
-            78
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "cat.txt.gz:md5,f44b33a0e441ad58b2d3700270e2dbe2"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+                ],
+                "file_out": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "cat.txt.gz:md5,f44b33a0e441ad58b2d3700270e2dbe2"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+                ]
+            }
         ],
-        "timestamp": "2023-10-16T14:32:33.641869244"
+        "timestamp": "2024-01-12T14:08:26.948048418"
     },
     "test_cat_one_file_unzipped_zipped_size": {
         "content": [
             374
         ],
-        "timestamp": "2023-10-16T14:33:21.4094373"
+        "timestamp": "2024-01-12T14:10:22.445700266"
     }
-}
\ No newline at end of file
+}
diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test b/modules/nf-core/cat/fastq/tests/main.nf.test
index f5f94182..dab2e14c 100644
--- a/modules/nf-core/cat/fastq/tests/main.nf.test
+++ b/modules/nf-core/cat/fastq/tests/main.nf.test
@@ -16,11 +16,11 @@ nextflow_process {
             }
             process {
                 """
-                input[0] = [
-                            [ id:'test', single_end:true ], // meta map
-                            [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
-                            file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true) ]
-                        ]
+                input[0] = Channel.of([
+                    [ id:'test', single_end:true ], // meta map
+                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)]
+                ])
                 """
             }
         }
@@ -28,8 +28,7 @@ nextflow_process {
         then {
             assertAll(
                 { assert process.success },
-                { assert snapshot(process.out.reads).match() },
-                { assert path(process.out.versions.get(0)).getText().contains("cat") }
+                { assert snapshot(process.out).match() }
             )
         }
     }
@@ -42,13 +41,13 @@ nextflow_process {
             }
             process {
                 """
-                input[0] = [
+                input[0] = Channel.of([
                     [ id:'test', single_end:false ], // meta map
-                    [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
-                    file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true),
-                    file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true),
-                    file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) ]
-                ]
+                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)]
+                ])
                 """
             }
         }
@@ -56,8 +55,7 @@ nextflow_process {
         then {
             assertAll(
                 { assert process.success },
-                { assert snapshot(process.out.reads).match() },
-                { assert path(process.out.versions.get(0)).getText().contains("cat") }
+                { assert snapshot(process.out).match() }
             )
         }
     }
@@ -70,11 +68,11 @@ nextflow_process {
             }
             process {
                 """
-                input[0] = [
+                input[0] = Channel.of([
                     [ id:'test', single_end:true ], // meta map
-                    [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
-                    file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
-                ]
+                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)]
+                ])
                 """
             }
         }
@@ -82,8 +80,7 @@ nextflow_process {
         then {
             assertAll(
                 { assert process.success },
-                { assert snapshot(process.out.reads).match() },
-                { assert path(process.out.versions.get(0)).getText().contains("cat") }
+                { assert snapshot(process.out).match() }
             )
         }
     }
@@ -96,13 +93,13 @@ nextflow_process {
             }
             process {
                 """
-                input[0] = [
+                input[0] = Channel.of([
                     [ id:'test', single_end:false ], // meta map
-                    [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
-                    file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true),
-                    file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
-                    file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
-                ]
+                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)]
+                ])
                 """
             }
         }
@@ -110,8 +107,7 @@ nextflow_process {
         then {
             assertAll(
                 { assert process.success },
-                { assert snapshot(process.out.reads).match() },
-                { assert path(process.out.versions.get(0)).getText().contains("cat") }
+                { assert snapshot(process.out).match() }
             )
         }
     }
@@ -124,10 +120,10 @@ nextflow_process {
             }
             process {
                 """
-                input[0] = [
+                input[0] = Channel.of([
                     [ id:'test', single_end:true ], // meta map
-                    [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)]
-                ]
+                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)]
+                ])
                 """
             }
         }
@@ -135,8 +131,7 @@ nextflow_process {
         then {
             assertAll(
                 { assert process.success },
-                { assert snapshot(process.out.reads).match() },
-                { assert path(process.out.versions.get(0)).getText().contains("cat") }
+                { assert snapshot(process.out).match() }
             )
         }
     }
diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test.snap b/modules/nf-core/cat/fastq/tests/main.nf.test.snap
index ec2342e5..43dfe28f 100644
--- a/modules/nf-core/cat/fastq/tests/main.nf.test.snap
+++ b/modules/nf-core/cat/fastq/tests/main.nf.test.snap
@@ -1,78 +1,169 @@
 {
     "test_cat_fastq_single_end": {
         "content": [
-            [
-                [
-                    {
-                        "id": "test",
-                        "single_end": true
-                    },
-                    "test.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d"
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
+                ],
+                "reads": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
                 ]
-            ]
+            }
         ],
-        "timestamp": "2023-10-17T23:19:12.990284837"
+        "timestamp": "2024-01-17T17:30:39.816981"
     },
     "test_cat_fastq_single_end_same_name": {
         "content": [
-            [
-                [
-                    {
-                        "id": "test",
-                        "single_end": true
-                    },
-                    "test.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66"
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
+                ],
+                "reads": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
                 ]
-            ]
+            }
         ],
-        "timestamp": "2023-10-17T23:19:31.554568147"
+        "timestamp": "2024-01-17T17:32:35.229332"
     },
     "test_cat_fastq_single_end_single_file": {
         "content": [
-            [
-                [
-                    {
-                        "id": "test",
-                        "single_end": true
-                    },
-                    "test.merged.fastq.gz:md5,e325ef7deb4023447a1f074e285761af"
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
+                ],
+                "reads": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
                 ]
-            ]
+            }
         ],
-        "timestamp": "2023-10-17T23:19:49.629360033"
+        "timestamp": "2024-01-17T17:34:00.058829"
     },
     "test_cat_fastq_paired_end_same_name": {
         "content": [
-            [
-                [
-                    {
-                        "id": "test",
-                        "single_end": false
-                    },
+            {
+                "0": [
                     [
-                        "test_1.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66",
-                        "test_2.merged.fastq.gz:md5,fe9f266f43a6fc3dcab690a18419a56e"
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22",
+                            "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5"
+                        ]
                     ]
+                ],
+                "1": [
+                    "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
+                ],
+                "reads": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22",
+                            "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5"
+                        ]
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
                 ]
-            ]
+            }
         ],
-        "timestamp": "2023-10-17T23:19:40.711617539"
+        "timestamp": "2024-01-17T17:33:33.031555"
     },
     "test_cat_fastq_paired_end": {
         "content": [
-            [
-                [
-                    {
-                        "id": "test",
-                        "single_end": false
-                    },
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22",
+                            "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5"
+                        ]
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
+                ],
+                "reads": [
                     [
-                        "test_1.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d",
-                        "test_2.merged.fastq.gz:md5,77c8e966e130d8c6b6ec9be52fcb2bda"
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22",
+                            "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5"
+                        ]
                     ]
+                ],
+                "versions": [
+                    "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
                 ]
-            ]
+            }
         ],
-        "timestamp": "2023-10-18T07:53:20.923560211"
+        "timestamp": "2024-01-17T17:32:02.270935"
     }
 }
\ No newline at end of file
diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test
index 17dce8ac..dcf4fd6f 100644
--- a/modules/nf-core/fastp/tests/main.nf.test
+++ b/modules/nf-core/fastp/tests/main.nf.test
@@ -19,11 +19,10 @@ nextflow_process {
                 save_trimmed_fail = false
                 save_merged       = false
 
-                input[0] = [
+                input[0] = Channel.of([
                     [ id:'test', single_end:true ],
-                    [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
-                ]
-
+                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
+                ])
                 input[1] = adapter_fasta
                 input[2] = save_trimmed_fail
                 input[3] = save_merged
@@ -89,11 +88,10 @@ nextflow_process {
                 save_trimmed_fail = false
                 save_merged       = false
 
-                input[0] = [
+                input[0] = Channel.of([
                     [ id:'test', single_end:true ],
-                    [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
-                ]
-
+                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
+                ])
                 input[1] = adapter_fasta
                 input[2] = save_trimmed_fail
                 input[3] = save_merged
@@ -135,12 +133,11 @@ nextflow_process {
                 save_trimmed_fail = false
                 save_merged       = false
 
-                input[0] = [
+                input[0] = Channel.of([
                     [ id:'test', single_end:false ], // meta map
-                    [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
-                        file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
-                ]
-
+                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+                ])
                 input[1] = adapter_fasta
                 input[2] = save_trimmed_fail
                 input[3] = save_merged
@@ -220,12 +217,11 @@ nextflow_process {
                 save_trimmed_fail = false
                 save_merged       = false
 
-                input[0] = [
+                input[0] = Channel.of([
                     [ id:'test', single_end:false ], // meta map
-                    [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
-                        file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
-                ]
-
+                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+                ])
                 input[1] = adapter_fasta
                 input[2] = save_trimmed_fail
                 input[3] = save_merged
@@ -266,10 +262,10 @@ nextflow_process {
                 save_trimmed_fail = false
                 save_merged       = false
 
-                input[0] = [ [ id:'test', single_end:true ], // meta map
-                            [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ]
-                        ]
-
+                input[0] = Channel.of([
+                    [ id:'test', single_end:true ], // meta map
+                    [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ]
+                ])
                 input[1] = adapter_fasta
                 input[2] = save_trimmed_fail
                 input[3] = save_merged
@@ -336,10 +332,10 @@ nextflow_process {
                 save_trimmed_fail = false
                 save_merged       = false
 
-                input[0] = [ [ id:'test', single_end:true ], // meta map
-                            [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ]
-                        ]
-
+                input[0] = Channel.of([
+                    [ id:'test', single_end:true ], // meta map
+                    [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ]
+                ])
                 input[1] = adapter_fasta
                 input[2] = save_trimmed_fail
                 input[3] = save_merged
@@ -380,9 +376,10 @@ nextflow_process {
                 save_trimmed_fail = true
                 save_merged       = false
 
-                input[0] = [ [ id:'test', single_end:true ], // meta map
-                            [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
-                        ]
+                input[0] = Channel.of([
+                    [ id:'test', single_end:true ], // meta map
+                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
+                ])
                 input[1] = adapter_fasta
                 input[2] = save_trimmed_fail
                 input[3] = save_merged
@@ -440,13 +437,11 @@ nextflow_process {
                 save_trimmed_fail = true
                 save_merged       = false
 
-                input[0] = [
+                input[0] = Channel.of([
                     [ id:'test', single_end:false ], // meta map
-                    [
-                        file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
-                        file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
-                    ]
-                ]
+                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)]
+                ])
                 input[1] = adapter_fasta
                 input[2] = save_trimmed_fail
                 input[3] = save_merged
@@ -519,11 +514,11 @@ nextflow_process {
                 adapter_fasta     = []
                 save_trimmed_fail = false
                 save_merged       = true
-
-                input[0] = [ [ id:'test', single_end:false ], // meta map
-                             [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
-                               file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
-                           ]
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ], // meta map
+                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+                ])
                 input[1] = adapter_fasta
                 input[2] = save_trimmed_fail
                 input[3] = save_merged
@@ -613,10 +608,11 @@ nextflow_process {
                 save_trimmed_fail = false
                 save_merged       = true
 
-                input[0] = [ [ id:'test', single_end:false ], // meta map
-                             [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
-                               file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
-                           ]
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ], // meta map
+                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+                ])
                 input[1] = adapter_fasta
                 input[2] = save_trimmed_fail
                 input[3] = save_merged
@@ -653,14 +649,15 @@ nextflow_process {
             }
             process {
                 """
-                adapter_fasta     = file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/fastp/adapters.fasta", checkIfExists: true)
+                adapter_fasta     = Channel.of([ file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) ])
                 save_trimmed_fail = false
                 save_merged       = true
 
-                input[0] = [ [ id:'test', single_end:false ], // meta map
-                             [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
-                               file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
-                           ]
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ], // meta map
+                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+                ])
                 input[1] = adapter_fasta
                 input[2] = save_trimmed_fail
                 input[3] = save_merged
diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap
index 1b7d2419..6a71b680 100644
--- a/modules/nf-core/fastp/tests/main.nf.test.snap
+++ b/modules/nf-core/fastp/tests/main.nf.test.snap
@@ -12,7 +12,7 @@
                 "{id=test, single_end=false}"
             ]
         ],
-        "timestamp": "2023-12-21T09:44:37.202512"
+        "timestamp": "2024-01-17T18:07:15.398827"
     },
     "fastp test_fastp_interleaved_json": {
         "content": [
@@ -26,7 +26,7 @@
                 ]
             ]
         ],
-        "timestamp": "2023-10-17T11:04:45.794175881"
+        "timestamp": "2024-01-17T18:08:06.123035"
     },
     "test_fastp_paired_end_merged-for_stub_match": {
         "content": [
@@ -42,7 +42,7 @@
                 "{id=test, single_end=false}"
             ]
         ],
-        "timestamp": "2023-12-21T09:53:45.237014"
+        "timestamp": "2024-01-17T18:10:13.467574"
     },
     "test_fastp_single_end_json": {
         "content": [
@@ -56,7 +56,7 @@
                 ]
             ]
         ],
-        "timestamp": "2023-10-17T11:04:10.566343705"
+        "timestamp": "2024-01-17T18:06:00.223817"
     },
     "versions": {
         "content": [
@@ -64,7 +64,7 @@
                 "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
             ]
         ],
-        "timestamp": "2023-10-17T11:04:10.582076024"
+        "timestamp": "2024-01-17T18:06:00.248422"
     },
     "test_fastp_interleaved-for_stub_match": {
         "content": [
@@ -76,7 +76,7 @@
                 "{id=test, single_end=true}"
             ]
         ],
-        "timestamp": "2023-12-21T09:48:43.148485"
+        "timestamp": "2024-01-17T18:08:06.127974"
     },
     "test_fastp_single_end-for_stub_match": {
         "content": [
@@ -88,7 +88,7 @@
                 "{id=test, single_end=true}"
             ]
         ],
-        "timestamp": "2023-12-21T09:20:07.254788"
+        "timestamp": "2024-01-17T18:06:00.244202"
     },
     "test_fastp_single_end_trim_fail_json": {
         "content": [
@@ -102,6 +102,6 @@
                 ]
             ]
         ],
-        "timestamp": "2023-10-17T11:05:00.379878948"
+        "timestamp": "2024-01-17T18:08:41.942317"
     }
 }
\ No newline at end of file

From 8d19dbf47604d5cd2acd898586b23d093102766f Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 17 Jan 2024 20:43:07 +0000
Subject: [PATCH 089/117] Add updated modules.json

---
 modules.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules.json b/modules.json
index f11503fc..9d68299f 100644
--- a/modules.json
+++ b/modules.json
@@ -91,7 +91,7 @@
                     },
                     "fastq_fastqc_umitools_fastp": {
                         "branch": "master",
-                        "git_sha": "668185ddcd2d9084c819691c99020360e0f029a0",
+                        "git_sha": "d20574ce7420a54fe47b46a91c5f2215d1a2777c",
                         "installed_by": ["subworkflows"]
                     }
                 }

From 84da917bc464f02147a40515ebad4942880a4d2c Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 17 Jan 2024 20:53:29 +0000
Subject: [PATCH 090/117] Fixing the error :)

---
 subworkflows/local/umi_dedup.nf | 3 +--
 workflows/smrnaseq.nf           | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf
index 3d12fd99..4a7f80f9 100644
--- a/subworkflows/local/umi_dedup.nf
+++ b/subworkflows/local/umi_dedup.nf
@@ -24,11 +24,10 @@ workflow DEDUPLICATE_UMIS {
 
     if (!bt_index){
         INDEX_GENOME ( [ [:], fasta ] )
-        bt_index      = INDEX_GENOME.out.bowtie_indices
+        bt_index      = INDEX_GENOME.out.index
         fasta_formatted = INDEX_GENOME.out.fasta
         ch_versions     = ch_versions.mix(INDEX_GENOME.out.versions)
     } else {
-        bt_index     = Channel.fromPath("${bt_index}**ebwt", checkIfExists: true).ifEmpty { exit 1, "Bowtie1 index directory not found: ${bt_index}" }
         fasta_formatted = fasta
     }
 
diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index 872de5f3..02501098 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -166,7 +166,7 @@ workflow SMRNASEQ {
 
             DEDUPLICATE_UMIS (
                 fasta_ch,
-                INDEX_GENOME.out.bowtie_indices,
+                INDEX_GENOME.out.index,
                 FASTQ_FASTQC_UMITOOLS_FASTP.out.reads,
                 params.umi_stats
             )

From e95f075895e77325244073017c20b474bb547730 Mon Sep 17 00:00:00 2001
From: nf-core-bot <core@nf-co.re>
Date: Thu, 18 Jan 2024 07:58:51 +0000
Subject: [PATCH 091/117] [automated] Fix linting with Prettier

---
 .devcontainer/devcontainer.json | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 4ecfbfe3..4a9bc5c7 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -18,11 +18,11 @@
                 "python.linting.flake8Path": "/opt/conda/bin/flake8",
                 "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle",
                 "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle",
-                "python.linting.pylintPath": "/opt/conda/bin/pylint"
+                "python.linting.pylintPath": "/opt/conda/bin/pylint",
             },
 
             // Add the IDs of extensions you want installed when the container is created.
-            "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"]
-        }
-    }
+            "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"],
+        },
+    },
 }

From c51f54d491fbcb192c270ebad97da5b5a0d8c5b1 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Thu, 18 Jan 2024 08:21:09 +0000
Subject: [PATCH 092/117] This should fix umitools extract

---
 conf/modules.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/modules.config b/conf/modules.config
index 769f62f8..6e6820f5 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -153,7 +153,7 @@ if (!params.skip_fastp) {
 
 if (params.with_umi && !params.skip_umi_extract) {
     process {
-        withName: '.*:FASTQC_UMITOOLS_FASTP:UMITOOLS_EXTRACT' {
+        withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:UMITOOLS_EXTRACT' {
             ext.args   = [
                     params.umitools_extract_method ? "--extract-method=${params.umitools_extract_method}" : '',
                     params.umitools_bc_pattern     ? "--bc-pattern='${params.umitools_bc_pattern}'" : '',

From 3776e1834a37c5a3d32df3ebbceb5f89d1312bc4 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Thu, 18 Jan 2024 08:29:52 +0000
Subject: [PATCH 093/117] Improve more modules

---
 conf/modules.config | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 6e6820f5..23406e58 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -201,7 +201,7 @@ if (params.with_umi) {
             ]
         }
 
-        withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_SAMTOOLS:SAMTOOLS_SORT' {
+        withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' {
             ext.prefix = { "${meta.id}.sorted" }
             publishDir = [
                 path: { "${params.outdir}/umi_dedup" },
@@ -213,7 +213,7 @@ if (params.with_umi) {
             ]
         }
 
-        withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_SAMTOOLS:SAMTOOLS_INDEX' {
+        withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' {
             ext.prefix = { "${meta.id}.sorted" }
             publishDir = [
                 path: { "${params.outdir}/umi_dedup" },
@@ -225,7 +225,7 @@ if (params.with_umi) {
             ]
         }
 
-        withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' {
+        withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_STATS_SAMTOOLS:.*' {
             publishDir = [
                 path: { "${params.outdir}/umi_dedup/samtools_stats" },
                 mode: params.publish_dir_mode,

From a5daf6b74ffe4649403b6921e2c6aae88aec357d Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Thu, 18 Jan 2024 08:30:52 +0000
Subject: [PATCH 094/117] Less lines

---
 subworkflows/local/mirna_quant.nf | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/subworkflows/local/mirna_quant.nf b/subworkflows/local/mirna_quant.nf
index e26839bc..fc6942fe 100644
--- a/subworkflows/local/mirna_quant.nf
+++ b/subworkflows/local/mirna_quant.nf
@@ -33,8 +33,6 @@ workflow MIRNA_QUANT {
     main:
     ch_versions = Channel.empty()
 
-
-
     PARSE_MATURE ( mature ).parsed_fasta.set { mirna_parsed }
     ch_versions = ch_versions.mix(PARSE_MATURE.out.versions)
 
@@ -78,7 +76,6 @@ workflow MIRNA_QUANT {
     ch_versions = ch_versions.mix(BAM_STATS_HAIRPIN.out.versions)
 
 
-
     BAM_STATS_MATURE.out.idxstats.collect{it[1]}
         .mix(BAM_STATS_HAIRPIN.out.idxstats.collect{it[1]})
         .dump(tag:'edger')
@@ -87,8 +84,6 @@ workflow MIRNA_QUANT {
         .set { edger_input }
     EDGER_QC ( edger_input )
 
-
-
     reads
         .map { add_suffix(it, "seqcluster") }
         .dump (tag:'ssux')
@@ -100,9 +95,6 @@ workflow MIRNA_QUANT {
     BOWTIE_MAP_SEQCLUSTER ( reads_collapsed, hairpin_bowtie.collect() )
     ch_versions = ch_versions.mix(BOWTIE_MAP_SEQCLUSTER.out.versions)
 
-
-
-
     ch_mirtop_logs = Channel.empty()
     if (params.mirtrace_species){
         MIRTOP_QUANT ( BOWTIE_MAP_SEQCLUSTER.out.bam.collect{it[1]}, FORMAT_HAIRPIN.out.formatted_fasta.collect{it[1]}, gtf )
@@ -117,8 +109,6 @@ workflow MIRNA_QUANT {
         .dump (tag:'gsux')
         .set { reads_genome }
 
-
-
     emit:
     fasta_mature        = FORMAT_MATURE.out.formatted_fasta
     fasta_hairpin       = FORMAT_HAIRPIN.out.formatted_fasta

From 4baf752461389e77f0bc7b9c036ebd40283596e2 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <apeltzer@users.noreply.github.com>
Date: Thu, 18 Jan 2024 09:31:57 +0100
Subject: [PATCH 095/117] Apply suggestions from code review

Co-authored-by: Maxime U Garcia <max.u.garcia@gmail.com>
---
 nextflow.config       |  8 ++++----
 workflows/smrnaseq.nf | 16 ++++++++--------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 0a6d0945..4d7eb423 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -247,8 +247,6 @@ singularity.registry = 'quay.io'
 // Nextflow plugins
 plugins {
 
-    id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet
-
     id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet
 }
 
@@ -276,8 +274,10 @@ process.shell = ['/bin/bash', '-euo', 'pipefail']
 // Set default registry for Docker and Podman independent of -profile
 // Will not be used unless Docker / Podman are enabled
 // Set to your registry if you have a mirror of containers
-docker.registry = 'quay.io'
-podman.registry = 'quay.io'
+apptainer.registry   = 'quay.io'
+docker.registry      = 'quay.io'
+podman.registry      = 'quay.io'
+singularity.registry = 'quay.io'
 
 // Disable process selector warnings by default. Use debug profile to enable warnings.
 nextflow.enable.configProcessNamesValidation = false
diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index 02501098..4b81796c 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -64,15 +64,15 @@ if (!params.mirgenedb) {
     if (params.mirgenedb_gff) { mirna_gtf = file(params.mirgenedb_gff, checkIfExists: true) } else { exit 1, "MirGeneDB gff file not found: ${params.mirgenedb_gff}"}
 }
 
-include { INPUT_CHECK           } from '../subworkflows/local/input_check'
+include { INPUT_CHECK                 } from '../subworkflows/local/input_check'
 include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../subworkflows/nf-core/fastq_fastqc_umitools_fastp'
-include { DEDUPLICATE_UMIS      } from '../subworkflows/local/umi_dedup'
-include { CONTAMINANT_FILTER    } from '../subworkflows/local/contaminant_filter'
-include { MIRNA_QUANT           } from '../subworkflows/local/mirna_quant'
-include { GENOME_QUANT          } from '../subworkflows/local/genome_quant'
-include { MIRTRACE              } from '../subworkflows/local/mirtrace'
-include { MIRDEEP2              } from '../subworkflows/local/mirdeep2'
-include { INDEX_GENOME          } from '../modules/local/bowtie_genome'
+include { DEDUPLICATE_UMIS            } from '../subworkflows/local/umi_dedup'
+include { CONTAMINANT_FILTER          } from '../subworkflows/local/contaminant_filter'
+include { MIRNA_QUANT                 } from '../subworkflows/local/mirna_quant'
+include { GENOME_QUANT                } from '../subworkflows/local/genome_quant'
+include { MIRTRACE                    } from '../subworkflows/local/mirtrace'
+include { MIRDEEP2                    } from '../subworkflows/local/mirdeep2'
+include { INDEX_GENOME                } from '../modules/local/bowtie_genome'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

From 533a0f35e6e78d41b496f12af4f6004263a3b20f Mon Sep 17 00:00:00 2001
From: Gregor Sturm <gregor.sturm@boehringer-ingelheim.com>
Date: Thu, 18 Jan 2024 10:16:06 +0100
Subject: [PATCH 096/117] Fix input cardinality

---
 subworkflows/local/umi_dedup.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf
index 4a7f80f9..f8981758 100644
--- a/subworkflows/local/umi_dedup.nf
+++ b/subworkflows/local/umi_dedup.nf
@@ -38,8 +38,8 @@ workflow DEDUPLICATE_UMIS {
     BAM_SORT_STATS_SAMTOOLS ( UMI_MAP_GENOME.out.bam, Channel.empty() )
     ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions)
 
-    //ch_umi_dedup = BAM_SORT_STATS_SAMTOOLS.out.bam.join(BAM_SORT_STATS_SAMTOOLS.out.bai)
-    UMITOOLS_DEDUP ( BAM_SORT_STATS_SAMTOOLS.out.bam, val_get_dedup_stats)
+    ch_umi_dedup = BAM_SORT_STATS_SAMTOOLS.out.bam.join(BAM_SORT_STATS_SAMTOOLS.out.bai)
+    UMITOOLS_DEDUP ( ch_umi_dedup, val_get_dedup_stats)
     ch_versions = ch_versions.mix(UMITOOLS_DEDUP.out.versions)
     ch_dedup_stats = ch_dedup_stats.mix(UMITOOLS_DEDUP.out.tsv_edit_distance).join(UMITOOLS_DEDUP.out.tsv_per_umi).join(UMITOOLS_DEDUP.out.tsv_umi_per_position)
 

From 60052b62d0bc07f6b31e1f68e0859c77713e88bf Mon Sep 17 00:00:00 2001
From: Gregor Sturm <gregor.sturm@boehringer-ingelheim.com>
Date: Thu, 18 Jan 2024 10:37:26 +0100
Subject: [PATCH 097/117] Compute index only once

---
 subworkflows/local/genome_quant.nf | 27 ++++++---------------
 subworkflows/local/umi_dedup.nf    | 11 ---------
 workflows/smrnaseq.nf              | 39 ++++++++++++++++--------------
 3 files changed, 28 insertions(+), 49 deletions(-)

diff --git a/subworkflows/local/genome_quant.nf b/subworkflows/local/genome_quant.nf
index b310c76a..0f97f16b 100644
--- a/subworkflows/local/genome_quant.nf
+++ b/subworkflows/local/genome_quant.nf
@@ -8,32 +8,19 @@ include { BOWTIE_MAP_SEQ as BOWTIE_MAP_GENOME } from '../../modules/local/bowtie
 
 workflow GENOME_QUANT {
     take:
-    fasta
-    index
+    bowtie_index
+    fasta_formatted // fasta as generated by bowtie index step
     reads // channel: [ val(meta), [ reads ] ]
 
     main:
     ch_versions = Channel.empty()
 
-    if (!index){
-        INDEX_GENOME ( [ [:], fasta ] )
-        bowtie_index    = INDEX_GENOME.out.index
-        fasta_formatted = INDEX_GENOME.out.fasta
-        ch_versions     = ch_versions.mix(INDEX_GENOME.out.versions)
-    } else {
-        bowtie_index    = Channel.fromPath("${index}**ebwt", checkIfExists: true).ifEmpty { exit 1, "Bowtie1 index directory not found: ${index}" }
-        fasta_formatted = fasta
-    }
+    BOWTIE_MAP_GENOME ( reads, bowtie_index.collect() )
+    ch_versions = ch_versions.mix(BOWTIE_MAP_GENOME.out.versions)
 
-    if (bowtie_index){
-        BOWTIE_MAP_GENOME ( reads, bowtie_index.collect() )
-        ch_versions = ch_versions.mix(BOWTIE_MAP_GENOME.out.versions)
-        fasta_formatted
-            .map { file -> tuple(file.baseName, file) }
-            .set { sort_input }
-        BAM_SORT_STATS_SAMTOOLS ( BOWTIE_MAP_GENOME.out.bam,  sort_input )
-        ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions)
-    }
+    ch_fasta_formatted_for_sort = fasta_formatted .map { file -> tuple(file.baseName, file) }
+    BAM_SORT_STATS_SAMTOOLS ( BOWTIE_MAP_GENOME.out.bam,  ch_fasta_formatted_for_sort )
+    ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions)
 
     emit:
     fasta    = fasta_formatted
diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf
index f8981758..2eda1b60 100644
--- a/subworkflows/local/umi_dedup.nf
+++ b/subworkflows/local/umi_dedup.nf
@@ -12,7 +12,6 @@ include { CAT_CAT                             } from '../../modules/nf-core/cat/
 
 workflow DEDUPLICATE_UMIS {
     take:
-    fasta
     bt_index
     reads      // channel: [ val(meta), [ reads ] ]
     val_get_dedup_stats //boolean true/false
@@ -22,16 +21,6 @@ workflow DEDUPLICATE_UMIS {
     ch_versions = Channel.empty()
     ch_dedup_stats = Channel.empty()
 
-    if (!bt_index){
-        INDEX_GENOME ( [ [:], fasta ] )
-        bt_index      = INDEX_GENOME.out.index
-        fasta_formatted = INDEX_GENOME.out.fasta
-        ch_versions     = ch_versions.mix(INDEX_GENOME.out.versions)
-    } else {
-        fasta_formatted = fasta
-    }
-
-
     UMI_MAP_GENOME ( reads, bt_index.collect() )
     ch_versions = ch_versions.mix(UMI_MAP_GENOME.out.versions)
 
diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index 4b81796c..56ab5a05 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -150,29 +150,32 @@ workflow SMRNASEQ {
     )
     ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions)
 
+    ch_fasta = file(params.fasta)
     reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads
 
+    //Prepare bowtie index, unless specified
+    //This needs to be done here as the index is used by both UMI deduplication and GENOME_QUANT
+    if(params.bowtie_index) {
+        ch_bowtie_index  = Channel.fromPath("${index}**ebwt", checkIfExists: true).ifEmpty { error "Bowtie1 index directory not found: ${index}" }
+        ch_fasta_formatted = ch_fasta
+    } else {
+        INDEX_GENOME ( [ [:], ch_fasta ] )
+        ch_versions = ch_versions.mix(INDEX_GENOME.out.versions)
+        ch_bowtie_index = INDEX_GENOME.out.index
+        ch_fasta_formatted = INDEX_GENOME.out.fasta
+    }
+
     //
     // SUBWORKFLOW: Deduplicate UMIs by mapping them to the genome
     //
     if (params.with_umi){
-        if (params.fasta){
-            fasta_ch = file(params.fasta)
-
-            //This needs to be done here as GENOME_QUANT should not run prior to the deduplication of UMIs.
-            INDEX_GENOME ( [ [:], fasta_ch ] )
-
-            ch_versions = ch_versions.mix(INDEX_GENOME.out.versions)
-
-            DEDUPLICATE_UMIS (
-                fasta_ch,
-                INDEX_GENOME.out.index,
-                FASTQ_FASTQC_UMITOOLS_FASTP.out.reads,
-                params.umi_stats
-            )
-            reads_for_mirna = DEDUPLICATE_UMIS.out.reads
-            ch_versions = ch_versions.mix(DEDUPLICATE_UMIS.out.versions)
-        }
+        DEDUPLICATE_UMIS (
+            ch_bowtie_index,
+            FASTQ_FASTQC_UMITOOLS_FASTP.out.reads,
+            params.umi_stats
+        )
+        reads_for_mirna = DEDUPLICATE_UMIS.out.reads
+        ch_versions = ch_versions.mix(DEDUPLICATE_UMIS.out.versions)
     }
 
 
@@ -225,7 +228,7 @@ workflow SMRNASEQ {
     //
     genome_stats = Channel.empty()
     if (params.fasta){
-        GENOME_QUANT ( file(params.fasta), params.bowtie_index, MIRNA_QUANT.out.unmapped )
+        GENOME_QUANT ( ch_bowtie_index, ch_fasta_formatted, MIRNA_QUANT.out.unmapped )
         genome_stats = GENOME_QUANT.out.stats
         ch_versions = ch_versions.mix(GENOME_QUANT.out.versions)
 

From f04ef2e5ae0faf05cc87202c9cc2fe4df9a8dd6e Mon Sep 17 00:00:00 2001
From: Gregor Sturm <gregor.sturm@boehringer-ingelheim.com>
Date: Thu, 18 Jan 2024 10:47:32 +0100
Subject: [PATCH 098/117] Check that fasta is provided when UMIs are used

---
 workflows/smrnaseq.nf | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index 56ab5a05..0fd20636 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -150,19 +150,22 @@ workflow SMRNASEQ {
     )
     ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions)
 
-    ch_fasta = file(params.fasta)
+    if(params.with_umi && !params.fasta) {
+        error "Specifying a genome fasta is required for UMI deduplication"
+    }
+    ch_fasta = params.fasta ? file(params.fasta): []
     reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads
 
     //Prepare bowtie index, unless specified
     //This needs to be done here as the index is used by both UMI deduplication and GENOME_QUANT
     if(params.bowtie_index) {
         ch_bowtie_index  = Channel.fromPath("${index}**ebwt", checkIfExists: true).ifEmpty { error "Bowtie1 index directory not found: ${index}" }
-        ch_fasta_formatted = ch_fasta
     } else {
         INDEX_GENOME ( [ [:], ch_fasta ] )
         ch_versions = ch_versions.mix(INDEX_GENOME.out.versions)
         ch_bowtie_index = INDEX_GENOME.out.index
-        ch_fasta_formatted = INDEX_GENOME.out.fasta
+        // set to reformatted fasta as generated by `bowtie index`
+        ch_fasta = INDEX_GENOME.out.fasta
     }
 
     //
@@ -228,7 +231,7 @@ workflow SMRNASEQ {
     //
     genome_stats = Channel.empty()
     if (params.fasta){
-        GENOME_QUANT ( ch_bowtie_index, ch_fasta_formatted, MIRNA_QUANT.out.unmapped )
+        GENOME_QUANT ( ch_bowtie_index, ch_fasta, MIRNA_QUANT.out.unmapped )
         genome_stats = GENOME_QUANT.out.stats
         ch_versions = ch_versions.mix(GENOME_QUANT.out.versions)
 

From 41bd0d06decdc62bb208e8ddffea48b29d33bb51 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <gregor.sturm@boehringer-ingelheim.com>
Date: Thu, 18 Jan 2024 10:50:19 +0100
Subject: [PATCH 099/117] Acually use deduplicated reads

---
 workflows/smrnaseq.nf | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index 0fd20636..f740ab8d 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -154,7 +154,7 @@ workflow SMRNASEQ {
         error "Specifying a genome fasta is required for UMI deduplication"
     }
     ch_fasta = params.fasta ? file(params.fasta): []
-    reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads
+    ch_reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads
 
     //Prepare bowtie index, unless specified
     //This needs to be done here as the index is used by both UMI deduplication and GENOME_QUANT
@@ -174,10 +174,10 @@ workflow SMRNASEQ {
     if (params.with_umi){
         DEDUPLICATE_UMIS (
             ch_bowtie_index,
-            FASTQ_FASTQC_UMITOOLS_FASTP.out.reads,
+            ch_reads_for_mirna,
             params.umi_stats
         )
-        reads_for_mirna = DEDUPLICATE_UMIS.out.reads
+        ch_reads_for_mirna = DEDUPLICATE_UMIS.out.reads
         ch_versions = ch_versions.mix(DEDUPLICATE_UMIS.out.versions)
     }
 
@@ -186,7 +186,7 @@ workflow SMRNASEQ {
     // SUBWORKFLOW: mirtrace QC
     //
     FASTQ_FASTQC_UMITOOLS_FASTP.out.adapter_seq
-    .join( FASTQ_FASTQC_UMITOOLS_FASTP.out.reads )
+    .join( ch_reads_for_mirna )
     .map { meta, adapter_seq, reads -> [adapter_seq, meta.id, reads] }
     .groupTuple()
     .set { ch_mirtrace_inputs }
@@ -199,7 +199,6 @@ workflow SMRNASEQ {
     // SUBWORKFLOW: remove contaminants from reads
     //
     contamination_stats = Channel.empty()
-    mirna_reads = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads
     if (params.filter_contamination){
         CONTAMINANT_FILTER (
             reference_hairpin,
@@ -209,7 +208,7 @@ workflow SMRNASEQ {
             params.ncrna,
             params.pirna,
             params.other_contamination,
-            FASTQ_FASTQC_UMITOOLS_FASTP.out.reads
+            ch_reads_for_mirna
         )
 
         contamination_stats = CONTAMINANT_FILTER.out.filter_stats
@@ -240,7 +239,7 @@ workflow SMRNASEQ {
 
         if (!params.skip_mirdeep) {
             MIRDEEP2 (
-                FASTQ_FASTQC_UMITOOLS_FASTP.out.reads,
+                ch_reads_for_mirna,
                 GENOME_QUANT.out.fasta,
                 GENOME_QUANT.out.index.collect(),
                 hairpin_clean,

From 6213f5f06245917975a13f0d07fd19aebba36bee Mon Sep 17 00:00:00 2001
From: Gregor Sturm <gregor.sturm@boehringer-ingelheim.com>
Date: Thu, 18 Jan 2024 11:01:49 +0100
Subject: [PATCH 100/117] Use contaminant-filtered reads downstream

---
 modules/local/mirtrace.nf | 2 +-
 workflows/smrnaseq.nf     | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/local/mirtrace.nf b/modules/local/mirtrace.nf
index 95989293..500de058 100644
--- a/modules/local/mirtrace.nf
+++ b/modules/local/mirtrace.nf
@@ -44,7 +44,7 @@ process MIRTRACE_RUN {
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        mirtrace: \$(echo \$(mirtrace -v 2>&1))
+        mirtrace: \$(echo \$(mirtrace -v))
     END_VERSIONS
     """
 
diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index f740ab8d..29010308 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -213,7 +213,7 @@ workflow SMRNASEQ {
 
         contamination_stats = CONTAMINANT_FILTER.out.filter_stats
         ch_versions = ch_versions.mix(CONTAMINANT_FILTER.out.versions)
-        mirna_reads = CONTAMINANT_FILTER.out.filtered_reads
+        ch_reads_for_mirna = CONTAMINANT_FILTER.out.filtered_reads
 
     }
 
@@ -221,7 +221,7 @@ workflow SMRNASEQ {
         [ [:], reference_mature],
         [ [:], reference_hairpin],
         mirna_gtf,
-        mirna_reads
+        ch_reads_for_mirna
     )
     ch_versions = ch_versions.mix(MIRNA_QUANT.out.versions.ifEmpty(null))
 

From ed8ad08092229f322c0f1ddb4d6dc515bc5049fe Mon Sep 17 00:00:00 2001
From: Gregor Sturm <gregor.sturm@boehringer-ingelheim.com>
Date: Thu, 18 Jan 2024 11:19:13 +0100
Subject: [PATCH 101/117] Ensure fasta is optional

---
 workflows/smrnaseq.nf | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index 29010308..7c1103d4 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -156,16 +156,20 @@ workflow SMRNASEQ {
     ch_fasta = params.fasta ? file(params.fasta): []
     ch_reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads
 
-    //Prepare bowtie index, unless specified
-    //This needs to be done here as the index is used by both UMI deduplication and GENOME_QUANT
-    if(params.bowtie_index) {
-        ch_bowtie_index  = Channel.fromPath("${index}**ebwt", checkIfExists: true).ifEmpty { error "Bowtie1 index directory not found: ${index}" }
-    } else {
-        INDEX_GENOME ( [ [:], ch_fasta ] )
-        ch_versions = ch_versions.mix(INDEX_GENOME.out.versions)
-        ch_bowtie_index = INDEX_GENOME.out.index
-        // set to reformatted fasta as generated by `bowtie index`
-        ch_fasta = INDEX_GENOME.out.fasta
+    // even if bowtie index is specified, there still needs to be a fasta.
+    // without fasta, no genome analysis.
+    if(params.fasta) {
+        //Prepare bowtie index, unless specified
+        //This needs to be done here as the index is used by both UMI deduplication and GENOME_QUANT
+        if(params.bowtie_index) {
+            ch_bowtie_index  = Channel.fromPath("${index}**ebwt", checkIfExists: true).ifEmpty { error "Bowtie1 index directory not found: ${index}" }
+        } else {
+            INDEX_GENOME ( [ [:], ch_fasta ] )
+            ch_versions = ch_versions.mix(INDEX_GENOME.out.versions)
+            ch_bowtie_index = INDEX_GENOME.out.index
+            // set to reformatted fasta as generated by `bowtie index`
+            ch_fasta = INDEX_GENOME.out.fasta
+        }
     }
 
     //

From ed99b7ea082557c8969117a373d2ba7eb2205502 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <gregor.sturm@boehringer-ingelheim.com>
Date: Thu, 18 Jan 2024 11:41:23 +0100
Subject: [PATCH 102/117] Attempt to disable umi_stats on CI

---
 conf/test_umi.config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/conf/test_umi.config b/conf/test_umi.config
index 53a58f41..7d9e6e53 100644
--- a/conf/test_umi.config
+++ b/conf/test_umi.config
@@ -33,5 +33,6 @@ params {
     umitools_extract_method = 'regex'
     umitools_bc_pattern = '.+AACTGTAGGCACCATCAAT{s<=2}(?P<umi_1>.{12})(?P<discard_2>.*)'
     save_umi_intermeds = true
+    umi_stats = false // takes too much memory for CI
 
 }

From 1c278fe0e755a102de7d2c299bed02925699a2d2 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <gregor.sturm@boehringer-ingelheim.com>
Date: Fri, 19 Jan 2024 09:32:03 +0100
Subject: [PATCH 103/117] increase limits for umitools dedup

---
 conf/modules.config | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/conf/modules.config b/conf/modules.config
index 23406e58..b64c6691 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -199,6 +199,8 @@ if (params.with_umi) {
                     )
                 ]
             ]
+            time = { check_max( 120.h , 'time' ) }
+            memory = { check_max( 72.GB * task.attempt, 'memory' ) }
         }
 
         withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' {

From 91e67f2520715b5e12c14e9fb213905b49df85c3 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Mon, 22 Jan 2024 12:28:34 +0000
Subject: [PATCH 104/117] Adding in possibiltiy to use different method for UMI
 grouping

---
 conf/modules.config  | 2 +-
 nextflow.config      | 1 +
 nextflow_schema.json | 7 +++++++
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/conf/modules.config b/conf/modules.config
index b64c6691..28790647 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -182,7 +182,7 @@ if (params.with_umi && !params.skip_umi_extract) {
 if (params.with_umi) {
     process {
         withName: '.*:DEDUPLICATE_UMIS:UMITOOLS_DEDUP' {
-            ext.args = { meta.single_end ? '' : '--unpaired-reads=discard --chimeric-pairs=discard' }
+            ext.args = { meta.single_end ? '--method $params.method' : '--method $params.method --unpaired-reads=discard --chimeric-pairs=discard' }
             ext.prefix = { "${meta.id}.umi_dedup.sorted" }
             publishDir = [
                 [
diff --git a/nextflow.config b/nextflow.config
index 4d7eb423..82058ae6 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -36,6 +36,7 @@ params {
     umitools_extract_method    = 'string'
     umitools_bc_pattern        = null
     umi_discard_read           = null
+    umitools_method            = 'directional'
     save_umi_intermeds         = false
     umi_merge_unmapped         = true
     umi_stats                  = true
diff --git a/nextflow_schema.json b/nextflow_schema.json
index cd8fa7a5..a5434a37 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -68,6 +68,13 @@
                     "description": "UMI pattern to use. Can be either 'string' (default) or 'regex'.",
                     "help_text": "More details can be found in the [UMI-tools documentation](https://umi-tools.readthedocs.io/en/latest/reference/extract.html#extract-method).\n"
                 },
+                "umitools_method": {
+                    "type": "string",
+                    "default": "directional",
+                    "description": "UMI grouping method",
+                    "fa_icon": "fas fa-layer-group",
+                    "help_text": "Available options are unique, percentile, cluster, adjacency or directional."
+                },
                 "skip_umi_extract": {
                     "type": "boolean",
                     "fa_icon": "fas fa-compress-alt",

From cfac917f8f03e74001bc518aa38041d5d147928f Mon Sep 17 00:00:00 2001
From: Gregor Sturm <gregor.sturm@boehringer-ingelheim.com>
Date: Mon, 22 Jan 2024 13:41:22 +0100
Subject: [PATCH 105/117] Install umicollapse instead of umitools dedup

---
 modules.json                                  | 82 ++++++++++++++-----
 .../dedup => umicollapse}/environment.yml     |  4 +-
 modules/nf-core/umicollapse/main.nf           | 50 +++++++++++
 modules/nf-core/umicollapse/meta.yml          | 51 ++++++++++++
 modules/nf-core/umitools/dedup/main.nf        | 62 --------------
 modules/nf-core/umitools/dedup/meta.yml       | 71 ----------------
 6 files changed, 165 insertions(+), 155 deletions(-)
 rename modules/nf-core/{umitools/dedup => umicollapse}/environment.yml (56%)
 create mode 100644 modules/nf-core/umicollapse/main.nf
 create mode 100644 modules/nf-core/umicollapse/meta.yml
 delete mode 100644 modules/nf-core/umitools/dedup/main.nf
 delete mode 100644 modules/nf-core/umitools/dedup/meta.yml

diff --git a/modules.json b/modules.json
index 9d68299f..53f34207 100644
--- a/modules.json
+++ b/modules.json
@@ -8,72 +8,107 @@
                     "cat/cat": {
                         "branch": "master",
                         "git_sha": "81f27e75847087865299cc46605deb3b09b4e0a2",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "cat/fastq": {
                         "branch": "master",
                         "git_sha": "02fd5bd7275abad27aad32d5c852e0a9b1b98882",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "custom/dumpsoftwareversions": {
                         "branch": "master",
                         "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "fastp": {
                         "branch": "master",
                         "git_sha": "1799e452de650f6fb8890d25829bca23014b0728",
-                        "installed_by": ["fastq_fastqc_umitools_fastp", "modules"]
+                        "installed_by": [
+                            "fastq_fastqc_umitools_fastp",
+                            "modules"
+                        ]
                     },
                     "fastqc": {
                         "branch": "master",
                         "git_sha": "617777a807a1770f73deb38c80004bac06807eef",
-                        "installed_by": ["fastq_fastqc_umitools_fastp"]
+                        "installed_by": [
+                            "fastq_fastqc_umitools_fastp"
+                        ]
                     },
                     "multiqc": {
                         "branch": "master",
                         "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "samtools/bam2fq": {
                         "branch": "master",
                         "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "samtools/flagstat": {
                         "branch": "master",
                         "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-                        "installed_by": ["bam_stats_samtools", "modules"]
+                        "installed_by": [
+                            "bam_stats_samtools",
+                            "modules"
+                        ]
                     },
                     "samtools/idxstats": {
                         "branch": "master",
                         "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-                        "installed_by": ["bam_stats_samtools", "modules"]
+                        "installed_by": [
+                            "bam_stats_samtools",
+                            "modules"
+                        ]
                     },
                     "samtools/index": {
                         "branch": "master",
                         "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-                        "installed_by": ["bam_sort_stats_samtools", "modules"]
+                        "installed_by": [
+                            "bam_sort_stats_samtools",
+                            "modules"
+                        ]
                     },
                     "samtools/sort": {
                         "branch": "master",
                         "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-                        "installed_by": ["bam_sort_stats_samtools", "modules"]
+                        "installed_by": [
+                            "bam_sort_stats_samtools",
+                            "modules"
+                        ]
                     },
                     "samtools/stats": {
                         "branch": "master",
                         "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-                        "installed_by": ["bam_stats_samtools", "modules"]
+                        "installed_by": [
+                            "bam_stats_samtools",
+                            "modules"
+                        ]
                     },
-                    "umitools/dedup": {
+                    "umicollapse": {
                         "branch": "master",
-                        "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
-                        "installed_by": ["modules"]
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "umitools/extract": {
                         "branch": "master",
                         "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
-                        "installed_by": ["fastq_fastqc_umitools_fastp", "modules"]
+                        "installed_by": [
+                            "fastq_fastqc_umitools_fastp",
+                            "modules"
+                        ]
                     }
                 }
             },
@@ -82,20 +117,27 @@
                     "bam_sort_stats_samtools": {
                         "branch": "master",
                         "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-                        "installed_by": ["subworkflows"]
+                        "installed_by": [
+                            "subworkflows"
+                        ]
                     },
                     "bam_stats_samtools": {
                         "branch": "master",
                         "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-                        "installed_by": ["bam_sort_stats_samtools", "subworkflows"]
+                        "installed_by": [
+                            "bam_sort_stats_samtools",
+                            "subworkflows"
+                        ]
                     },
                     "fastq_fastqc_umitools_fastp": {
                         "branch": "master",
                         "git_sha": "d20574ce7420a54fe47b46a91c5f2215d1a2777c",
-                        "installed_by": ["subworkflows"]
+                        "installed_by": [
+                            "subworkflows"
+                        ]
                     }
                 }
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/modules/nf-core/umitools/dedup/environment.yml b/modules/nf-core/umicollapse/environment.yml
similarity index 56%
rename from modules/nf-core/umitools/dedup/environment.yml
rename to modules/nf-core/umicollapse/environment.yml
index f443735f..8dbc65dc 100644
--- a/modules/nf-core/umitools/dedup/environment.yml
+++ b/modules/nf-core/umicollapse/environment.yml
@@ -1,7 +1,7 @@
-name: umitools_dedup
+name: umicollapse
 channels:
   - conda-forge
   - bioconda
   - defaults
 dependencies:
-  - bioconda::umi_tools=1.1.4
+  - bioconda::umicollapse=1.0.0
diff --git a/modules/nf-core/umicollapse/main.nf b/modules/nf-core/umicollapse/main.nf
new file mode 100644
index 00000000..01ad2541
--- /dev/null
+++ b/modules/nf-core/umicollapse/main.nf
@@ -0,0 +1,50 @@
+process UMICOLLAPSE {
+    tag "$meta.id"
+    label "process_high_memory"
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/umicollapse:1.0.0--hdfd78af_1' :
+        'biocontainers/umicollapse:1.0.0--hdfd78af_1' }"
+
+    input:
+    tuple val(meta), path(bam), path(bai)
+
+    output:
+    tuple val(meta), path("*.bam"), emit: bam
+    tuple val(meta), path("*.log"), emit: log
+    path  "versions.yml"          , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args   = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def VERSION = '1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
+
+    """
+    umicollapse \\
+        bam \\
+        -i $bam \\
+        -o ${prefix}.bam \\
+        $args
+
+    mv .command.log ${prefix}_UMICollapse.log
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        umicollapse: $VERSION
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.dedup.bam
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        umicollapse: $VERSION
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/umicollapse/meta.yml b/modules/nf-core/umicollapse/meta.yml
new file mode 100644
index 00000000..528bc0ee
--- /dev/null
+++ b/modules/nf-core/umicollapse/meta.yml
@@ -0,0 +1,51 @@
+---
+name: "umicollapse"
+description: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read.
+keywords:
+  - umicollapse
+  - deduplication
+  - genomics
+tools:
+  - "umicollapse":
+      description: "UMICollapse contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs)."
+      homepage: "https://github.com/Daniel-Liu-c0deb0t/UMICollapse"
+      documentation: "https://github.com/Daniel-Liu-c0deb0t/UMICollapse"
+      tool_dev_url: "https://github.com/Daniel-Liu-c0deb0t/UMICollapse"
+      doi: "10.7717/peerj.8275"
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: |
+        BAM file containing reads to be deduplicated via UMIs.
+      pattern: "*.{bam}"
+  - bai:
+      type: file
+      description: |
+        BAM index files corresponding to the input BAM file.
+      pattern: "*.{bai}"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: BAM file with deduplicated UMIs.
+      pattern: "*.{bam}"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@CharlotteAnne"
+  - "@chris-cheshire"
+maintainers:
+  - "@CharlotteAnne"
+  - "@chris-cheshire"
diff --git a/modules/nf-core/umitools/dedup/main.nf b/modules/nf-core/umitools/dedup/main.nf
deleted file mode 100644
index 64ab8f98..00000000
--- a/modules/nf-core/umitools/dedup/main.nf
+++ /dev/null
@@ -1,62 +0,0 @@
-process UMITOOLS_DEDUP {
-    tag "$meta.id"
-    label "process_medium"
-
-    conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' :
-        'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }"
-
-    input:
-    tuple val(meta), path(bam), path(bai)
-    val get_output_stats
-
-    output:
-    tuple val(meta), path("${prefix}.bam")     , emit: bam
-    tuple val(meta), path("*.log")             , emit: log
-    tuple val(meta), path("*edit_distance.tsv"), optional:true, emit: tsv_edit_distance
-    tuple val(meta), path("*per_umi.tsv")      , optional:true, emit: tsv_per_umi
-    tuple val(meta), path("*per_position.tsv") , optional:true, emit: tsv_umi_per_position
-    path  "versions.yml"                       , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    prefix = task.ext.prefix ?: "${meta.id}"
-    def paired = meta.single_end ? "" : "--paired"
-    stats = get_output_stats ? "--output-stats ${prefix}" : ""
-    if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!"
-
-    if (!(args ==~ /.*--random-seed.*/)) {args += " --random-seed=100"}
-    """
-    PYTHONHASHSEED=0 umi_tools \\
-        dedup \\
-        -I $bam \\
-        -S ${prefix}.bam \\
-        -L ${prefix}.log \\
-        $stats \\
-        $paired \\
-        $args
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' )
-    END_VERSIONS
-    """
-
-    stub:
-    """
-    touch ${prefix}.bam
-    touch ${prefix}.log
-    touch ${prefix}_edit_distance.tsv
-    touch ${prefix}_per_umi.tsv
-    touch ${prefix}_per_position.tsv
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' )
-    END_VERSIONS
-    """
-}
diff --git a/modules/nf-core/umitools/dedup/meta.yml b/modules/nf-core/umitools/dedup/meta.yml
deleted file mode 100644
index 38d3fd46..00000000
--- a/modules/nf-core/umitools/dedup/meta.yml
+++ /dev/null
@@ -1,71 +0,0 @@
-name: umitools_dedup
-description: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read.
-keywords:
-  - umitools
-  - deduplication
-  - dedup
-tools:
-  - umi_tools:
-      description: >
-        UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes
-
-      documentation: https://umi-tools.readthedocs.io/en/latest/
-      license: ["MIT"]
-input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - bam:
-      type: file
-      description: |
-        BAM file containing reads to be deduplicated via UMIs.
-      pattern: "*.{bam}"
-  - bai:
-      type: file
-      description: |
-        BAM index files corresponding to the input BAM file.
-      pattern: "*.{bai}"
-  - get_output_stats:
-      type: boolean
-      description: |
-        Whether or not to generate output stats.
-output:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - bam:
-      type: file
-      description: BAM file with deduplicated UMIs.
-      pattern: "*.{bam}"
-  - log:
-      type: file
-      description: File with logging information
-      pattern: "*.{log}"
-  - tsv_edit_distance:
-      type: file
-      description: Reports the (binned) average edit distance between the UMIs at each position.
-      pattern: "*edit_distance.tsv"
-  - tsv_per_umi:
-      type: file
-      description: UMI-level summary statistics.
-      pattern: "*per_umi.tsv"
-  - tsv_umi_per_position:
-      type: file
-      description: Tabulates the counts for unique combinations of UMI and position.
-      pattern: "*per_position.tsv"
-  - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
-authors:
-  - "@drpatelh"
-  - "@grst"
-  - "@klkeys"
-maintainers:
-  - "@drpatelh"
-  - "@grst"
-  - "@klkeys"

From b9208eace2d4c052a9496084002057878cd504c2 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <gregor.sturm@boehringer-ingelheim.com>
Date: Mon, 22 Jan 2024 13:55:37 +0100
Subject: [PATCH 106/117] Switch to umicollapse

---
 conf/modules.config             | 23 ++++++++---------------
 nextflow.config                 |  3 +--
 nextflow_schema.json            |  9 ++-------
 subworkflows/local/umi_dedup.nf | 11 ++++-------
 4 files changed, 15 insertions(+), 31 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 28790647..9fe19d49 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -181,23 +181,16 @@ if (params.with_umi && !params.skip_umi_extract) {
 
 if (params.with_umi) {
     process {
-        withName: '.*:DEDUPLICATE_UMIS:UMITOOLS_DEDUP' {
-            ext.args = { meta.single_end ? '--method $params.method' : '--method $params.method --unpaired-reads=discard --chimeric-pairs=discard' }
+        withName: '.*:DEDUPLICATE_UMIS:UMICOLLAPSE' {
+            ext.args = { meta.single_end ? '--algo ${params.umitools_method}' : '--method ${params.umitools_method} --paired --remove-unpaired --remove-chimeric' }
             ext.prefix = { "${meta.id}.umi_dedup.sorted" }
             publishDir = [
-                [
-                    path: { "${params.outdir}/umi_dedup/umitools" },
-                    mode: params.publish_dir_mode,
-                    pattern: '*.tsv'
-                ],
-                [
-                    path: { "${params.outdir}/umi_dedup" },
-                    mode: params.publish_dir_mode,
-                    pattern: '*.bam',
-                    enabled: (
-                        params.save_umi_intermeds
-                    )
-                ]
+                path: { "${params.outdir}/umi_dedup" },
+                mode: params.publish_dir_mode,
+                pattern: '*.bam',
+                enabled: (
+                    params.save_umi_intermeds
+                )
             ]
             time = { check_max( 120.h , 'time' ) }
             memory = { check_max( 72.GB * task.attempt, 'memory' ) }
diff --git a/nextflow.config b/nextflow.config
index 82058ae6..0c3d8b45 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -36,10 +36,9 @@ params {
     umitools_extract_method    = 'string'
     umitools_bc_pattern        = null
     umi_discard_read           = null
-    umitools_method            = 'directional'
+    umitools_method            = 'dir'
     save_umi_intermeds         = false
     umi_merge_unmapped         = true
-    umi_stats                  = true
 
     // Trimming options
     clip_r1                     = null
diff --git a/nextflow_schema.json b/nextflow_schema.json
index a5434a37..c4d62d96 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -70,10 +70,10 @@
                 },
                 "umitools_method": {
                     "type": "string",
-                    "default": "directional",
+                    "default": "dir",
                     "description": "UMI grouping method",
                     "fa_icon": "fas fa-layer-group",
-                    "help_text": "Available options are unique, percentile, cluster, adjacency or directional."
+                    "help_text": "Available options are dir, cc, adj"
                 },
                 "skip_umi_extract": {
                     "type": "boolean",
@@ -101,11 +101,6 @@
                     "fa_icon": "fas fa-save",
                     "description": "Unless deactivated the deduplicated reads are merged with the reads that could not be placed to reduce the potential reference bias",
                     "default": true
-                },
-                "umi_stats": {
-                    "type": "boolean",
-                    "default": true,
-                    "description": "Compute UMI statistics for MultiQC"
                 }
             },
             "fa_icon": "fas fa-barcode"
diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf
index 2eda1b60..01fe678c 100644
--- a/subworkflows/local/umi_dedup.nf
+++ b/subworkflows/local/umi_dedup.nf
@@ -5,7 +5,7 @@
 include { INDEX_GENOME                        } from '../../modules/local/bowtie_genome'
 include { BOWTIE_MAP_SEQ as UMI_MAP_GENOME    } from '../../modules/local/bowtie_map_mirna'
 include { BAM_SORT_STATS_SAMTOOLS             } from '../../subworkflows/nf-core/bam_sort_stats_samtools'
-include { UMITOOLS_DEDUP                      } from '../../modules/nf-core/umitools/dedup/main'
+include { UMICOLLAPSE                         } from '../../modules/nf-core/umicollapse/main'
 include { SAMTOOLS_BAM2FQ                     } from '../../modules/nf-core/samtools/bam2fq/main'
 include { CAT_CAT                             } from '../../modules/nf-core/cat/cat/main'
 
@@ -19,7 +19,6 @@ workflow DEDUPLICATE_UMIS {
     main:
 
     ch_versions = Channel.empty()
-    ch_dedup_stats = Channel.empty()
 
     UMI_MAP_GENOME ( reads, bt_index.collect() )
     ch_versions = ch_versions.mix(UMI_MAP_GENOME.out.versions)
@@ -28,11 +27,10 @@ workflow DEDUPLICATE_UMIS {
     ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions)
 
     ch_umi_dedup = BAM_SORT_STATS_SAMTOOLS.out.bam.join(BAM_SORT_STATS_SAMTOOLS.out.bai)
-    UMITOOLS_DEDUP ( ch_umi_dedup, val_get_dedup_stats)
-    ch_versions = ch_versions.mix(UMITOOLS_DEDUP.out.versions)
-    ch_dedup_stats = ch_dedup_stats.mix(UMITOOLS_DEDUP.out.tsv_edit_distance).join(UMITOOLS_DEDUP.out.tsv_per_umi).join(UMITOOLS_DEDUP.out.tsv_umi_per_position)
+    UMICOLLAPSE ( ch_umi_dedup, val_get_dedup_stats)
+    ch_versions = ch_versions.mix(UMICOLLAPSE.out.versions)
 
-    SAMTOOLS_BAM2FQ ( UMITOOLS_DEDUP.out.bam, false )
+    SAMTOOLS_BAM2FQ ( UMICOLLAPSE.out.bam, false )
     ch_versions = ch_versions.mix(SAMTOOLS_BAM2FQ.out.versions)
 
     ch_dedup_reads = SAMTOOLS_BAM2FQ.out.reads
@@ -52,6 +50,5 @@ workflow DEDUPLICATE_UMIS {
     emit:
     reads    = ch_dedup_reads
     indices  = bt_index
-    stats    = ch_dedup_stats
     versions = ch_versions
 }

From 65f7f0e0cc812c9921129a0200ee457189fa75d2 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <gregor.sturm@boehringer-ingelheim.com>
Date: Mon, 22 Jan 2024 14:15:05 +0100
Subject: [PATCH 107/117] Switch to umicollapse

---
 conf/modules.config             | 4 +---
 conf/test_umi.config            | 2 --
 subworkflows/local/umi_dedup.nf | 3 +--
 workflows/smrnaseq.nf           | 1 -
 4 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 9fe19d49..a5f7ea37 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -182,7 +182,7 @@ if (params.with_umi && !params.skip_umi_extract) {
 if (params.with_umi) {
     process {
         withName: '.*:DEDUPLICATE_UMIS:UMICOLLAPSE' {
-            ext.args = { meta.single_end ? '--algo ${params.umitools_method}' : '--method ${params.umitools_method} --paired --remove-unpaired --remove-chimeric' }
+            ext.args = { meta.single_end ? "--algo ${params.umitools_method}" : "--method ${params.umitools_method} --paired --remove-unpaired --remove-chimeric" }
             ext.prefix = { "${meta.id}.umi_dedup.sorted" }
             publishDir = [
                 path: { "${params.outdir}/umi_dedup" },
@@ -192,8 +192,6 @@ if (params.with_umi) {
                     params.save_umi_intermeds
                 )
             ]
-            time = { check_max( 120.h , 'time' ) }
-            memory = { check_max( 72.GB * task.attempt, 'memory' ) }
         }
 
         withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' {
diff --git a/conf/test_umi.config b/conf/test_umi.config
index 7d9e6e53..48f61181 100644
--- a/conf/test_umi.config
+++ b/conf/test_umi.config
@@ -33,6 +33,4 @@ params {
     umitools_extract_method = 'regex'
     umitools_bc_pattern = '.+AACTGTAGGCACCATCAAT{s<=2}(?P<umi_1>.{12})(?P<discard_2>.*)'
     save_umi_intermeds = true
-    umi_stats = false // takes too much memory for CI
-
 }
diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf
index 01fe678c..5ef4b908 100644
--- a/subworkflows/local/umi_dedup.nf
+++ b/subworkflows/local/umi_dedup.nf
@@ -14,7 +14,6 @@ workflow DEDUPLICATE_UMIS {
     take:
     bt_index
     reads      // channel: [ val(meta), [ reads ] ]
-    val_get_dedup_stats //boolean true/false
 
     main:
 
@@ -27,7 +26,7 @@ workflow DEDUPLICATE_UMIS {
     ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions)
 
     ch_umi_dedup = BAM_SORT_STATS_SAMTOOLS.out.bam.join(BAM_SORT_STATS_SAMTOOLS.out.bai)
-    UMICOLLAPSE ( ch_umi_dedup, val_get_dedup_stats)
+    UMICOLLAPSE(ch_umi_dedup)
     ch_versions = ch_versions.mix(UMICOLLAPSE.out.versions)
 
     SAMTOOLS_BAM2FQ ( UMICOLLAPSE.out.bam, false )
diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
index 7c1103d4..58a4e8ad 100644
--- a/workflows/smrnaseq.nf
+++ b/workflows/smrnaseq.nf
@@ -179,7 +179,6 @@ workflow SMRNASEQ {
         DEDUPLICATE_UMIS (
             ch_bowtie_index,
             ch_reads_for_mirna,
-            params.umi_stats
         )
         ch_reads_for_mirna = DEDUPLICATE_UMIS.out.reads
         ch_versions = ch_versions.mix(DEDUPLICATE_UMIS.out.versions)

From 82436515987080d4cf132cfbc1b31c7c2ade9d8f Mon Sep 17 00:00:00 2001
From: nf-core-bot <core@nf-co.re>
Date: Mon, 22 Jan 2024 13:16:26 +0000
Subject: [PATCH 108/117] [automated] Fix linting with Prettier

---
 modules.json | 78 ++++++++++++----------------------------------------
 1 file changed, 18 insertions(+), 60 deletions(-)

diff --git a/modules.json b/modules.json
index 53f34207..03697a05 100644
--- a/modules.json
+++ b/modules.json
@@ -8,107 +8,72 @@
                     "cat/cat": {
                         "branch": "master",
                         "git_sha": "81f27e75847087865299cc46605deb3b09b4e0a2",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "cat/fastq": {
                         "branch": "master",
                         "git_sha": "02fd5bd7275abad27aad32d5c852e0a9b1b98882",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "custom/dumpsoftwareversions": {
                         "branch": "master",
                         "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "fastp": {
                         "branch": "master",
                         "git_sha": "1799e452de650f6fb8890d25829bca23014b0728",
-                        "installed_by": [
-                            "fastq_fastqc_umitools_fastp",
-                            "modules"
-                        ]
+                        "installed_by": ["fastq_fastqc_umitools_fastp", "modules"]
                     },
                     "fastqc": {
                         "branch": "master",
                         "git_sha": "617777a807a1770f73deb38c80004bac06807eef",
-                        "installed_by": [
-                            "fastq_fastqc_umitools_fastp"
-                        ]
+                        "installed_by": ["fastq_fastqc_umitools_fastp"]
                     },
                     "multiqc": {
                         "branch": "master",
                         "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "samtools/bam2fq": {
                         "branch": "master",
                         "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "samtools/flagstat": {
                         "branch": "master",
                         "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-                        "installed_by": [
-                            "bam_stats_samtools",
-                            "modules"
-                        ]
+                        "installed_by": ["bam_stats_samtools", "modules"]
                     },
                     "samtools/idxstats": {
                         "branch": "master",
                         "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-                        "installed_by": [
-                            "bam_stats_samtools",
-                            "modules"
-                        ]
+                        "installed_by": ["bam_stats_samtools", "modules"]
                     },
                     "samtools/index": {
                         "branch": "master",
                         "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-                        "installed_by": [
-                            "bam_sort_stats_samtools",
-                            "modules"
-                        ]
+                        "installed_by": ["bam_sort_stats_samtools", "modules"]
                     },
                     "samtools/sort": {
                         "branch": "master",
                         "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-                        "installed_by": [
-                            "bam_sort_stats_samtools",
-                            "modules"
-                        ]
+                        "installed_by": ["bam_sort_stats_samtools", "modules"]
                     },
                     "samtools/stats": {
                         "branch": "master",
                         "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-                        "installed_by": [
-                            "bam_stats_samtools",
-                            "modules"
-                        ]
+                        "installed_by": ["bam_stats_samtools", "modules"]
                     },
                     "umicollapse": {
                         "branch": "master",
                         "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "umitools/extract": {
                         "branch": "master",
                         "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
-                        "installed_by": [
-                            "fastq_fastqc_umitools_fastp",
-                            "modules"
-                        ]
+                        "installed_by": ["fastq_fastqc_umitools_fastp", "modules"]
                     }
                 }
             },
@@ -117,27 +82,20 @@
                     "bam_sort_stats_samtools": {
                         "branch": "master",
                         "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-                        "installed_by": [
-                            "subworkflows"
-                        ]
+                        "installed_by": ["subworkflows"]
                     },
                     "bam_stats_samtools": {
                         "branch": "master",
                         "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
-                        "installed_by": [
-                            "bam_sort_stats_samtools",
-                            "subworkflows"
-                        ]
+                        "installed_by": ["bam_sort_stats_samtools", "subworkflows"]
                     },
                     "fastq_fastqc_umitools_fastp": {
                         "branch": "master",
                         "git_sha": "d20574ce7420a54fe47b46a91c5f2215d1a2777c",
-                        "installed_by": [
-                            "subworkflows"
-                        ]
+                        "installed_by": ["subworkflows"]
                     }
                 }
             }
         }
     }
-}
\ No newline at end of file
+}

From ea68c8e6864ecfffeee684915ecc93fca2cd184c Mon Sep 17 00:00:00 2001
From: Gregor Sturm <gregor.sturm@boehringer-ingelheim.com>
Date: Mon, 22 Jan 2024 15:03:13 +0100
Subject: [PATCH 109/117] Don't merge deduplicated fastqs with unmapped fastqs

---
 CHANGELOG.md                    |  3 ++-
 docs/output.md                  |  2 +-
 nextflow.config                 |  1 -
 nextflow_schema.json            |  6 ------
 subworkflows/local/umi_dedup.nf | 12 ------------
 5 files changed, 3 insertions(+), 21 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fc286f02..10fc5720 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,10 +16,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 | ------------- | --------------------------- |
 |               | `--with_umi`                |
 |               | `--umitools_extract_method` |
+|               | `--umitools_method`         |
+|               | `--skip_umi_extract`        |
 |               | `--umitools_bc_pattern`     |
 |               | `--umi_discard_read`        |
 |               | `--save_umi_intermeds`      |
-|               | `--umi_merge_unmapped`      |
 
 ### Software dependencies
 
diff --git a/docs/output.md b/docs/output.md
index fc9d14ef..537f17c5 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -83,7 +83,7 @@ FastP can automatically detect adapter sequences when not specified directly by
   - `samtools_stats/` - `*.{stats,flagstat,idxstats}:` Statistics on the mappings underlying the UMI deduplication.
   </details>
 
-[UMI-tools](https://github.com/CGATOxford/UMI-tools) deduplicates reads based on unique molecular identifiers (UMIs) to address PCR-bias. Firstly, the UMI-tools `extract` command removes the UMI barcode information from the read sequence and adds it to the read name as highlighted in the [UMI-tools extract](#umi-tools-extract) section. The reads are deduplicated based on an alignment against the full genome of the species. The deduplicated reads are then converted into fastq format and merged with the reads that remained unmapped in order to reduce potential reference bias. This behavior can be stopped by setting `--umi_merge_unmapped false`. The resulting fastq files are used in the remaining steps of the pipeline.
+[UMI-tools](https://github.com/CGATOxford/UMI-tools) deduplicates reads based on unique molecular identifiers (UMIs) to address PCR-bias. Firstly, the UMI-tools `extract` command removes the UMI barcode information from the read sequence and adds it to the read name as highlighted in the [UMI-tools extract](#umi-tools-extract) section. The reads are deduplicated based on an alignment against the full genome of the species. The deduplicated reads are then converted into fastq format. The resulting fastq files are used in the remaining steps of the pipeline.
 
 ## Bowtie2
 
diff --git a/nextflow.config b/nextflow.config
index 0c3d8b45..0ea71080 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -38,7 +38,6 @@ params {
     umi_discard_read           = null
     umitools_method            = 'dir'
     save_umi_intermeds         = false
-    umi_merge_unmapped         = true
 
     // Trimming options
     clip_r1                     = null
diff --git a/nextflow_schema.json b/nextflow_schema.json
index c4d62d96..204376ab 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -95,12 +95,6 @@
                     "type": "boolean",
                     "fa_icon": "fas fa-save",
                     "description": "If this option is specified, intermediate FastQ and BAM files produced by UMI-tools are also saved in the results directory."
-                },
-                "umi_merge_unmapped": {
-                    "type": "boolean",
-                    "fa_icon": "fas fa-save",
-                    "description": "Unless deactivated the deduplicated reads are merged with the reads that could not be placed to reduce the potential reference bias",
-                    "default": true
                 }
             },
             "fa_icon": "fas fa-barcode"
diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf
index 5ef4b908..9f65fa8e 100644
--- a/subworkflows/local/umi_dedup.nf
+++ b/subworkflows/local/umi_dedup.nf
@@ -34,18 +34,6 @@ workflow DEDUPLICATE_UMIS {
 
     ch_dedup_reads = SAMTOOLS_BAM2FQ.out.reads
 
-    if ( params.umi_merge_unmapped ) {
-
-        SAMTOOLS_BAM2FQ.out.reads
-            .join(UMI_MAP_GENOME.out.unmapped)
-            .map { meta, file1, file2 -> [meta, [file1, file2]]}
-            .set { ch_cat }
-
-        CAT_CAT ( ch_cat )
-        ch_dedup_reads = CAT_CAT.out.file_out
-    }
-
-
     emit:
     reads    = ch_dedup_reads
     indices  = bt_index

From d493ff7b51adb5184c80d0e89cfce2d5482aa0bb Mon Sep 17 00:00:00 2001
From: Gregor Sturm <gregor.sturm@boehringer-ingelheim.com>
Date: Mon, 22 Jan 2024 16:07:39 +0100
Subject: [PATCH 110/117] Try to set heap size

---
 modules/nf-core/umicollapse/main.nf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/nf-core/umicollapse/main.nf b/modules/nf-core/umicollapse/main.nf
index 01ad2541..593463f8 100644
--- a/modules/nf-core/umicollapse/main.nf
+++ b/modules/nf-core/umicollapse/main.nf
@@ -25,6 +25,7 @@ process UMICOLLAPSE {
 
     """
     umicollapse \\
+        -Xmx${task.memory.toMega() * 0.8}M \\
         bam \\
         -i $bam \\
         -o ${prefix}.bam \\

From 6fe662941fa3edfa409d98f40e3041a6d22c6392 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <gregor.sturm@boehringer-ingelheim.com>
Date: Mon, 22 Jan 2024 16:13:20 +0100
Subject: [PATCH 111/117] cast heap size to int

---
 modules/nf-core/umicollapse/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/nf-core/umicollapse/main.nf b/modules/nf-core/umicollapse/main.nf
index 593463f8..314e0df1 100644
--- a/modules/nf-core/umicollapse/main.nf
+++ b/modules/nf-core/umicollapse/main.nf
@@ -25,7 +25,7 @@ process UMICOLLAPSE {
 
     """
     umicollapse \\
-        -Xmx${task.memory.toMega() * 0.8}M \\
+        -Xmx${(task.memory.toMega() * 0.8).intValue()}M \\
         bam \\
         -i $bam \\
         -o ${prefix}.bam \\

From 58c5b1a5c78c5618d6a7e1d5b4fe3f557c162c9c Mon Sep 17 00:00:00 2001
From: Gregor Sturm <gregor.sturm@boehringer-ingelheim.com>
Date: Tue, 23 Jan 2024 09:19:42 +0100
Subject: [PATCH 112/117] Increase java stack size

---
 modules/nf-core/umicollapse/main.nf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/nf-core/umicollapse/main.nf b/modules/nf-core/umicollapse/main.nf
index 314e0df1..fdecd6d9 100644
--- a/modules/nf-core/umicollapse/main.nf
+++ b/modules/nf-core/umicollapse/main.nf
@@ -26,6 +26,7 @@ process UMICOLLAPSE {
     """
     umicollapse \\
         -Xmx${(task.memory.toMega() * 0.8).intValue()}M \\
+        -Xss99M \\
         bam \\
         -i $bam \\
         -o ${prefix}.bam \\

From 53c6ffa861eb9c8894e3859d0fb1ddf673eb9fd2 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <gregor.sturm@boehringer-ingelheim.com>
Date: Tue, 23 Jan 2024 09:37:39 +0100
Subject: [PATCH 113/117] Set jvm options via JAVA_TOOL_OPTIONS

---
 modules/nf-core/umicollapse/main.nf | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/modules/nf-core/umicollapse/main.nf b/modules/nf-core/umicollapse/main.nf
index fdecd6d9..90224df0 100644
--- a/modules/nf-core/umicollapse/main.nf
+++ b/modules/nf-core/umicollapse/main.nf
@@ -24,9 +24,8 @@ process UMICOLLAPSE {
     def VERSION = '1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
 
     """
-    umicollapse \\
-        -Xmx${(task.memory.toMega() * 0.8).intValue()}M \\
-        -Xss99M \\
+    JAVA_TOOL_OPTIONS="-Xmx${(task.memory.toMega() * 0.8).intValue()}M -Xss99M" \\
+      umicollapse \\
         bam \\
         -i $bam \\
         -o ${prefix}.bam \\

From d5192d48a3491504b41fe1af51860d1d9773c632 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <gregor.sturm@boehringer-ingelheim.com>
Date: Tue, 23 Jan 2024 09:52:51 +0100
Subject: [PATCH 114/117] Full manual mode

---
 modules/nf-core/umicollapse/main.nf | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/modules/nf-core/umicollapse/main.nf b/modules/nf-core/umicollapse/main.nf
index 90224df0..788f4d1e 100644
--- a/modules/nf-core/umicollapse/main.nf
+++ b/modules/nf-core/umicollapse/main.nf
@@ -21,15 +21,17 @@ process UMICOLLAPSE {
     script:
     def args   = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
-    def VERSION = '1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
+    def VERSION = '1.0.0-1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
 
     """
-    JAVA_TOOL_OPTIONS="-Xmx${(task.memory.toMega() * 0.8).intValue()}M -Xss99M" \\
-      umicollapse \\
-        bam \\
-        -i $bam \\
-        -o ${prefix}.bam \\
-        $args
+    java \\
+      -Xmx${(task.memory.toMega() * 0.8).intValue()}M \\
+      -Xss99M \\
+      -jar /usr/local/share/umicollapse-${VERSION}/umicollapse.jar \\
+      bam \\
+      -i $bam \\
+      -o ${prefix}.bam \\
+      $args
 
     mv .command.log ${prefix}_UMICollapse.log
 

From 3d8219ead8e53ee0c5ec8b1895defaaca119d1a8 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <gregor.sturm@boehringer-ingelheim.com>
Date: Tue, 23 Jan 2024 12:58:11 +0100
Subject: [PATCH 115/117] Use two-pass mode for umicollapse

---
 conf/modules.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/modules.config b/conf/modules.config
index a5f7ea37..127e0a34 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -182,7 +182,7 @@ if (params.with_umi && !params.skip_umi_extract) {
 if (params.with_umi) {
     process {
         withName: '.*:DEDUPLICATE_UMIS:UMICOLLAPSE' {
-            ext.args = { meta.single_end ? "--algo ${params.umitools_method}" : "--method ${params.umitools_method} --paired --remove-unpaired --remove-chimeric" }
+            ext.args = { meta.single_end ? "--algo ${params.umitools_method} --two-pass" : "--method ${params.umitools_method} --two-pass --paired --remove-unpaired --remove-chimeric" }
             ext.prefix = { "${meta.id}.umi_dedup.sorted" }
             publishDir = [
                 path: { "${params.outdir}/umi_dedup" },

From 4fb25232db7bed454daac683b0b3dcb77daa4d1f Mon Sep 17 00:00:00 2001
From: Gregor Sturm <gregor.sturm@boehringer-ingelheim.com>
Date: Tue, 23 Jan 2024 13:24:39 +0100
Subject: [PATCH 116/117] Increase stack size even further

---
 modules/nf-core/umicollapse/main.nf | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/modules/nf-core/umicollapse/main.nf b/modules/nf-core/umicollapse/main.nf
index 788f4d1e..0ae78118 100644
--- a/modules/nf-core/umicollapse/main.nf
+++ b/modules/nf-core/umicollapse/main.nf
@@ -23,10 +23,11 @@ process UMICOLLAPSE {
     def prefix = task.ext.prefix ?: "${meta.id}"
     def VERSION = '1.0.0-1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
 
+    // Things I tried...
     """
     java \\
-      -Xmx${(task.memory.toMega() * 0.8).intValue()}M \\
-      -Xss99M \\
+      -Xmx${(task.memory.toGiga() - 1).intValue()}G \\
+      -Xss1G \\
       -jar /usr/local/share/umicollapse-${VERSION}/umicollapse.jar \\
       bam \\
       -i $bam \\

From 2dd1e24ff2c1cc643e71b5e6a3cfdf3dbd47bd33 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 24 Jan 2024 15:24:55 +0000
Subject: [PATCH 117/117] Updated umicollapse to modules provided version :)

---
 modules.json                                  |   2 +-
 modules/nf-core/umicollapse/main.nf           |  31 ++--
 modules/nf-core/umicollapse/meta.yml          |   4 +
 .../nf-core/umicollapse/tests/main.nf.test    | 153 ++++++++++++++++++
 .../umicollapse/tests/main.nf.test.snap       |  36 +++++
 .../nf-core/umicollapse/tests/nextflow.config |   8 +
 .../umicollapse/tests/nextflow_PE.config      |  10 ++
 .../umicollapse/tests/nextflow_SE.config      |  10 ++
 modules/nf-core/umicollapse/tests/tags.yml    |   2 +
 9 files changed, 243 insertions(+), 13 deletions(-)
 create mode 100644 modules/nf-core/umicollapse/tests/main.nf.test
 create mode 100644 modules/nf-core/umicollapse/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/umicollapse/tests/nextflow.config
 create mode 100644 modules/nf-core/umicollapse/tests/nextflow_PE.config
 create mode 100644 modules/nf-core/umicollapse/tests/nextflow_SE.config
 create mode 100644 modules/nf-core/umicollapse/tests/tags.yml

diff --git a/modules.json b/modules.json
index 03697a05..8236fcfd 100644
--- a/modules.json
+++ b/modules.json
@@ -67,7 +67,7 @@
                     },
                     "umicollapse": {
                         "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "git_sha": "b71a681e9ec41cefd330e12b8566b5f5aff4941c",
                         "installed_by": ["modules"]
                     },
                     "umitools/extract": {
diff --git a/modules/nf-core/umicollapse/main.nf b/modules/nf-core/umicollapse/main.nf
index 0ae78118..291d3273 100644
--- a/modules/nf-core/umicollapse/main.nf
+++ b/modules/nf-core/umicollapse/main.nf
@@ -1,5 +1,6 @@
 process UMICOLLAPSE {
     tag "$meta.id"
+    label "process_high"
     label "process_high_memory"
 
     conda "${moduleDir}/environment.yml"
@@ -12,7 +13,7 @@ process UMICOLLAPSE {
 
     output:
     tuple val(meta), path("*.bam"), emit: bam
-    tuple val(meta), path("*.log"), emit: log
+    tuple val(meta), path("*_UMICollapse.log"), emit: log
     path  "versions.yml"          , emit: versions
 
     when:
@@ -22,19 +23,25 @@ process UMICOLLAPSE {
     def args   = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
     def VERSION = '1.0.0-1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
-
-    // Things I tried...
+    // Memory allocation: We need to make sure that both heap and stack size is sufficiently large for
+    // umicollapse. We set the stack size to 5% of the available memory, the heap size to 90%
+    // which leaves 5% for stuff happening outside of java without the scheduler killing the process.
+    def max_heap_size_mega = (task.memory.toMega() * 0.9).intValue()
+    def max_stack_size_mega = (task.memory.toMega() * 0.05).intValue()
     """
+    # Getting the umicollapse jar file like this because `umicollapse` is a Python wrapper script generated
+    # by conda that allows to set the heap size (Xmx), but not the stack size (Xss).
+    # `which` allows us to get the directory that contains `umicollapse`, independent of whether we
+    # are in a container or conda environment.
+    UMICOLLAPSE_JAR=\$(dirname \$(which umicollapse))/../share/umicollapse-${VERSION}/umicollapse.jar
     java \\
-      -Xmx${(task.memory.toGiga() - 1).intValue()}G \\
-      -Xss1G \\
-      -jar /usr/local/share/umicollapse-${VERSION}/umicollapse.jar \\
-      bam \\
-      -i $bam \\
-      -o ${prefix}.bam \\
-      $args
-
-    mv .command.log ${prefix}_UMICollapse.log
+        -Xmx${max_heap_size_mega}M \\
+        -Xss${max_stack_size_mega}M \\
+        -jar \$UMICOLLAPSE_JAR \\
+        bam \\
+        -i $bam \\
+        -o ${prefix}.bam \\
+        $args | tee ${prefix}_UMICollapse.log
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/nf-core/umicollapse/meta.yml b/modules/nf-core/umicollapse/meta.yml
index 528bc0ee..c45d6932 100644
--- a/modules/nf-core/umicollapse/meta.yml
+++ b/modules/nf-core/umicollapse/meta.yml
@@ -39,6 +39,10 @@ output:
       type: file
       description: BAM file with deduplicated UMIs.
       pattern: "*.{bam}"
+  - log:
+      type: file
+      description: A log file with the deduplication statistics.
+      pattern: "*_{UMICollapse.log}"
   - versions:
       type: file
       description: File containing software versions
diff --git a/modules/nf-core/umicollapse/tests/main.nf.test b/modules/nf-core/umicollapse/tests/main.nf.test
new file mode 100644
index 00000000..1b8bf7e9
--- /dev/null
+++ b/modules/nf-core/umicollapse/tests/main.nf.test
@@ -0,0 +1,153 @@
+nextflow_process {
+
+    name "Test Process UMICOLLAPSE"
+    script "../main.nf"
+    process "UMICOLLAPSE"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "umicollapse"
+    tag "umitools/extract"
+    tag "samtools/index"
+    tag "bwa/index"
+    tag "bwa/mem"
+
+    test("umicollapse single end test") {
+        setup{
+            run("UMITOOLS_EXTRACT"){
+                script "../../umitools/extract/main.nf"
+                config "./nextflow_SE.config"
+                process{
+                    """
+                    input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    [
+                        file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                        file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
+                    ]
+                    ]
+                    """
+                }
+            }
+
+            run("BWA_INDEX"){
+                script "../../bwa/index/main.nf"
+                process{
+                    """
+                    input[0] = [
+                            [ id:'sarscov2'],
+                            file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+                            ]
+                    """
+                }
+            }
+            run("BWA_MEM"){
+                script "../../bwa/mem/main.nf"
+                process{
+                    """
+                    input[0] = UMITOOLS_EXTRACT.out.reads
+                    input[1] = BWA_INDEX.out.index
+                    input[2] = true
+                    """
+                }
+            }
+            run("SAMTOOLS_INDEX"){
+                script "../../samtools/index/main.nf"
+                process{
+                    """
+                    input[0] = BWA_MEM.out.bam
+                    """
+                }
+            }
+        }
+
+        when {
+            config "./nextflow_SE.config"
+            process {
+                """
+                input[0] =  BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0])
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out.bam,
+                    process.out.versions).match() }
+            )
+        }
+
+    }
+
+    test("umicollapse paired tests") {
+        setup{
+            run("UMITOOLS_EXTRACT"){
+                script "../../umitools/extract/main.nf"
+                config "./nextflow_PE.config"
+                process{
+                    """
+                    input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    [
+                        file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                        file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
+                    ]
+                    ]
+                    """
+                }
+            }
+
+            run("BWA_INDEX"){
+                script "../../bwa/index/main.nf"
+                process{
+                    """
+                    input[0] = [
+                            [ id:'sarscov2'],
+                            file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+                            ]
+                    """
+                }
+            }
+            run("BWA_MEM"){
+                script "../../bwa/mem/main.nf"
+                process{
+                    """
+                    input[0] = UMITOOLS_EXTRACT.out.reads
+                    input[1] = BWA_INDEX.out.index
+                    input[2] = true
+                    """
+                }
+            }
+            run("SAMTOOLS_INDEX"){
+                script "../../samtools/index/main.nf"
+                process{
+                    """
+                    input[0] = BWA_MEM.out.bam
+                    """
+                }
+            }
+        }
+
+        when {
+            config "./nextflow_PE.config"
+            process {
+                """
+                input[0] =  BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0])
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out.bam,
+                    process.out.versions).match() }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/umicollapse/tests/main.nf.test.snap b/modules/nf-core/umicollapse/tests/main.nf.test.snap
new file mode 100644
index 00000000..60250530
--- /dev/null
+++ b/modules/nf-core/umicollapse/tests/main.nf.test.snap
@@ -0,0 +1,36 @@
+{
+    "umicollapse single end test": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": true
+                    },
+                    "test.dedup.bam:md5,4e86d60aa82242889ab5f9031418ab2e"
+                ]
+            ],
+            [
+                "versions.yml:md5,c1e0275d81b1c97a9344d216f9154996"
+            ]
+        ],
+        "timestamp": "2024-01-24T13:57:02.801573999"
+    },
+    "umicollapse paired tests": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.dedup.bam:md5,54be836ec246073e60212445b4369a91"
+                ]
+            ],
+            [
+                "versions.yml:md5,c1e0275d81b1c97a9344d216f9154996"
+            ]
+        ],
+        "timestamp": "2024-01-24T13:57:24.797928099"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/umicollapse/tests/nextflow.config b/modules/nf-core/umicollapse/tests/nextflow.config
new file mode 100644
index 00000000..844edbdc
--- /dev/null
+++ b/modules/nf-core/umicollapse/tests/nextflow.config
@@ -0,0 +1,8 @@
+process {
+    withName: UMITOOLS_EXTRACT {
+        ext.args = '--bc-pattern="NNNN"'
+    }
+    withName: UMICOLLAPSE {
+        ext.prefix = { "${meta.id}.dedup" }
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/umicollapse/tests/nextflow_PE.config b/modules/nf-core/umicollapse/tests/nextflow_PE.config
new file mode 100644
index 00000000..ae4c9632
--- /dev/null
+++ b/modules/nf-core/umicollapse/tests/nextflow_PE.config
@@ -0,0 +1,10 @@
+process {
+
+    withName: UMITOOLS_EXTRACT {
+        ext.args = '--bc-pattern="NNNN" --bc-pattern2="NNNN"'
+    }
+
+    withName: UMICOLLAPSE {
+        ext.prefix = { "${meta.id}.dedup" }
+    }
+}
diff --git a/modules/nf-core/umicollapse/tests/nextflow_SE.config b/modules/nf-core/umicollapse/tests/nextflow_SE.config
new file mode 100644
index 00000000..d4b94436
--- /dev/null
+++ b/modules/nf-core/umicollapse/tests/nextflow_SE.config
@@ -0,0 +1,10 @@
+process {
+
+    withName: UMITOOLS_EXTRACT {
+        ext.args = '--bc-pattern="NNNN"'
+    }
+
+    withName: UMICOLLAPSE {
+        ext.prefix = { "${meta.id}.dedup" }
+    }
+}
diff --git a/modules/nf-core/umicollapse/tests/tags.yml b/modules/nf-core/umicollapse/tests/tags.yml
new file mode 100644
index 00000000..912879c4
--- /dev/null
+++ b/modules/nf-core/umicollapse/tests/tags.yml
@@ -0,0 +1,2 @@
+umicollapse:
+  - "modules/nf-core/umicollapse/**"