Reorder rnaseq preprocessing, fix minor issues, test sortmerna (#4982)

* Trimming should come first in preprocessing * Update tests to run sortmerna * sortmerna working in subworkflow * Don't need test data updates * Appease eclint
nf-core · Feb 24, 2024 · 53a9794 · 53a9794
1 parent 483e483
commit 53a9794
Show file tree

Hide file tree

Showing 3 changed files with 57 additions and 43 deletions.
diff --git a/subworkflows/nf-core/preprocess_rnaseq/main.nf b/subworkflows/nf-core/preprocess_rnaseq/main.nf
@@ -88,26 +88,6 @@ workflow PREPROCESS_RNASEQ {
 
     ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first().ifEmpty(null))
 
-    //
-    // MODULE: Remove ribosomal RNA reads
-    //
-    if (remove_ribo_rna) {
-        ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines())
-            .map { row -> file(row, checkIfExists: true) }
-            .collect()
-
-        SORTMERNA (
-            ch_filtered_reads,
-            ch_sortmerna_fastas
-        )
-        .reads
-        .set { ch_filtered_reads }
-
-        ch_multiqc_files = ch_multiqc_files.mix(SORTMERNA.out.log.map{ it[1] })
-
-        ch_versions = ch_versions.mix(SORTMERNA.out.versions.first())
-    }
-
     //
     // SUBWORKFLOW: Read QC, extract UMI and trim adapters with TrimGalore!
     //
@@ -128,7 +108,6 @@ workflow PREPROCESS_RNASEQ {
         ch_multiqc_files = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.fastqc_zip
             .mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_zip)
             .mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_log)
-            .map{ it[1] }
             .mix(ch_multiqc_files)
     }
 
@@ -155,7 +134,6 @@ workflow PREPROCESS_RNASEQ {
         ch_multiqc_files = FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_raw_zip
             .mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_trim_zip)
             .mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_json.map{tuple(it[0], [it[1]])})
-            .map{ it[1] }
             .mix(ch_multiqc_files)
     }
 
@@ -196,11 +174,35 @@ workflow PREPROCESS_RNASEQ {
             [ [], [] ],
             false
         )
-        .primary_fastq
-        .set { ch_filtered_reads }
+
+        BBMAP_BBSPLIT.out.primary_fastq
+            .set { ch_filtered_reads }
+
         ch_versions = ch_versions.mix(BBMAP_BBSPLIT.out.versions.first())
     }
 
+    //
+    // MODULE: Remove ribosomal RNA reads
+    //
+    if (remove_ribo_rna) {
+        ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines())
+            .map { row -> file(row, checkIfExists: true) }
+            .collect()
+
+        SORTMERNA (
+            ch_filtered_reads,
+            ch_sortmerna_fastas
+        )
+
+        SORTMERNA.out.reads
+            .set { ch_filtered_reads }
+
+        ch_multiqc_files = ch_multiqc_files
+            .mix(SORTMERNA.out.log)
+
+        ch_versions = ch_versions.mix(SORTMERNA.out.versions.first())
+    }
+
     // Branch FastQ channels if 'auto' specified to infer strandedness
     ch_filtered_reads
         .branch {
@@ -248,7 +250,7 @@ workflow PREPROCESS_RNASEQ {
     reads           = ch_strand_inferred_fastq
     trim_read_count = ch_trim_read_count
 
-    multiqc_files   = ch_multiqc_files
+    multiqc_files   = ch_multiqc_files.transpose().map{it[1]}
     versions        = ch_versions                     // channel: [ versions.yml ]
 }
 

diff --git a/subworkflows/nf-core/preprocess_rnaseq/tests/main.nf.test b/subworkflows/nf-core/preprocess_rnaseq/tests/main.nf.test
@@ -18,24 +18,31 @@ nextflow_workflow {
     tag "subworkflows/fastq_fastqc_umitools_fastp"
     tag "subworkflows/fastq_subsample_fq_salmon"
 
+
+
     test("homo_sapiens paired-end [fastq] fastp") {
 
         when {
             workflow {
                 """
-                input[0] = Channel.of([
+                ch_reads = Channel.of([
                     [ id:'test', single_end:false, strandedness:'auto' ], // meta map
                     [
                         file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
                         file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true)
                     ]
-                ]) // ch_reads
+                ])
+
+                ch_ribo_db = file('ribo_db.txt')
+                ch_ribo_db.append('https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5.8s-database-id98.fasta')
+
+                input[0] = ch_reads
                 input[1] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)) // ch_fasta
                 input[2] = Channel.of(file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/transcriptome.fasta", checkIfExists: true)) // ch_transcript_fasta
                 input[3] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)) // ch_gtf
                 input[4] = []              // ch_salmon_index
                 input[5] = []              // ch_bbsplit_index
-                input[6] = []              // ch_ribo_db
+                input[6] = ch_ribo_db      // ch_ribo_db
                 input[7] = true            // skip_bbsplit
                 input[8] = false           // skip_fastqc
                 input[9] = false           // skip_trimming
@@ -44,7 +51,7 @@ nextflow_workflow {
                 input[12] = 'fastp'        // trimmer
                 input[13] = 10             // min_trimmed_reads
                 input[14] = true           // save_trimmed
-                input[15] = false          // remove_ribo_rna
+                input[15] = true           // remove_ribo_rna
                 input[16] = false          // with_umi
                 input[17] = 0              // umi_discard_read
                 """
@@ -72,19 +79,24 @@ nextflow_workflow {
         when {
             workflow {
                 """
-                input[0] = Channel.of([
+                ch_reads = Channel.of([
                     [ id:'test', single_end:false, strandedness:'auto' ], // meta map
                     [
                         file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
                         file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true)
                     ]
-                ]) // ch_reads
+                ])
+
+                ch_ribo_db = file('ribo_db.txt')
+                ch_ribo_db.append('https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5.8s-database-id98.fasta')
+
+                input[0] = ch_reads
                 input[1] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)) // ch_fasta
                 input[2] = Channel.of(file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/transcriptome.fasta", checkIfExists: true)) // ch_transcript_fasta
                 input[3] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)) // ch_gtf
                 input[4] = []              // ch_salmon_index
                 input[5] = []              // ch_bbsplit_index
-                input[6] = []              // ch_ribo_db
+                input[6] = ch_ribo_db      // ch_ribo_db
                 input[7] = true            // skip_bbsplit
                 input[8] = false           // skip_fastqc
                 input[9] = false           // skip_trimming
@@ -93,7 +105,7 @@ nextflow_workflow {
                 input[12] = 'fastp'        // trimmer
                 input[13] = 10             // min_trimmed_reads
                 input[14] = true           // save_trimmed
-                input[15] = false          // remove_ribo_rna
+                input[15] = true           // remove_ribo_rna
                 input[16] = false          // with_umi
                 input[17] = 0              // umi_discard_read
                 """

diff --git a/subworkflows/nf-core/preprocess_rnaseq/tests/main.nf.test.snap b/subworkflows/nf-core/preprocess_rnaseq/tests/main.nf.test.snap