Skip to content

Commit

Permalink
Reorder rnaseq preprocessing, fix minor issues, test sortmerna (#4982)
Browse files Browse the repository at this point in the history
* Trimming should come first in preprocessing

* Update tests to run sortmerna

* sortmerna working in subworkflow

* Don't need test data updates

* Appease eclint
  • Loading branch information
pinin4fjords authored Feb 24, 2024
1 parent 483e483 commit 53a9794
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 43 deletions.
52 changes: 27 additions & 25 deletions subworkflows/nf-core/preprocess_rnaseq/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -88,26 +88,6 @@ workflow PREPROCESS_RNASEQ {

ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first().ifEmpty(null))

//
// MODULE: Remove ribosomal RNA reads
//
if (remove_ribo_rna) {
ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines())
.map { row -> file(row, checkIfExists: true) }
.collect()

SORTMERNA (
ch_filtered_reads,
ch_sortmerna_fastas
)
.reads
.set { ch_filtered_reads }

ch_multiqc_files = ch_multiqc_files.mix(SORTMERNA.out.log.map{ it[1] })

ch_versions = ch_versions.mix(SORTMERNA.out.versions.first())
}

//
// SUBWORKFLOW: Read QC, extract UMI and trim adapters with TrimGalore!
//
Expand All @@ -128,7 +108,6 @@ workflow PREPROCESS_RNASEQ {
ch_multiqc_files = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.fastqc_zip
.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_zip)
.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_log)
.map{ it[1] }
.mix(ch_multiqc_files)
}

Expand All @@ -155,7 +134,6 @@ workflow PREPROCESS_RNASEQ {
ch_multiqc_files = FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_raw_zip
.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_trim_zip)
.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_json.map{tuple(it[0], [it[1]])})
.map{ it[1] }
.mix(ch_multiqc_files)
}

Expand Down Expand Up @@ -196,11 +174,35 @@ workflow PREPROCESS_RNASEQ {
[ [], [] ],
false
)
.primary_fastq
.set { ch_filtered_reads }

BBMAP_BBSPLIT.out.primary_fastq
.set { ch_filtered_reads }

ch_versions = ch_versions.mix(BBMAP_BBSPLIT.out.versions.first())
}

//
// MODULE: Remove ribosomal RNA reads
//
if (remove_ribo_rna) {
ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines())
.map { row -> file(row, checkIfExists: true) }
.collect()

SORTMERNA (
ch_filtered_reads,
ch_sortmerna_fastas
)

SORTMERNA.out.reads
.set { ch_filtered_reads }

ch_multiqc_files = ch_multiqc_files
.mix(SORTMERNA.out.log)

ch_versions = ch_versions.mix(SORTMERNA.out.versions.first())
}

// Branch FastQ channels if 'auto' specified to infer strandedness
ch_filtered_reads
.branch {
Expand Down Expand Up @@ -248,7 +250,7 @@ workflow PREPROCESS_RNASEQ {
reads = ch_strand_inferred_fastq
trim_read_count = ch_trim_read_count

multiqc_files = ch_multiqc_files
multiqc_files = ch_multiqc_files.transpose().map{it[1]}
versions = ch_versions // channel: [ versions.yml ]
}

Expand Down
28 changes: 20 additions & 8 deletions subworkflows/nf-core/preprocess_rnaseq/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,31 @@ nextflow_workflow {
tag "subworkflows/fastq_fastqc_umitools_fastp"
tag "subworkflows/fastq_subsample_fq_salmon"



test("homo_sapiens paired-end [fastq] fastp") {

when {
workflow {
"""
input[0] = Channel.of([
ch_reads = Channel.of([
[ id:'test', single_end:false, strandedness:'auto' ], // meta map
[
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true)
]
]) // ch_reads
])
ch_ribo_db = file('ribo_db.txt')
ch_ribo_db.append('https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5.8s-database-id98.fasta')
input[0] = ch_reads
input[1] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)) // ch_fasta
input[2] = Channel.of(file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/transcriptome.fasta", checkIfExists: true)) // ch_transcript_fasta
input[3] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)) // ch_gtf
input[4] = [] // ch_salmon_index
input[5] = [] // ch_bbsplit_index
input[6] = [] // ch_ribo_db
input[6] = ch_ribo_db // ch_ribo_db
input[7] = true // skip_bbsplit
input[8] = false // skip_fastqc
input[9] = false // skip_trimming
Expand All @@ -44,7 +51,7 @@ nextflow_workflow {
input[12] = 'fastp' // trimmer
input[13] = 10 // min_trimmed_reads
input[14] = true // save_trimmed
input[15] = false // remove_ribo_rna
input[15] = true // remove_ribo_rna
input[16] = false // with_umi
input[17] = 0 // umi_discard_read
"""
Expand Down Expand Up @@ -72,19 +79,24 @@ nextflow_workflow {
when {
workflow {
"""
input[0] = Channel.of([
ch_reads = Channel.of([
[ id:'test', single_end:false, strandedness:'auto' ], // meta map
[
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true)
]
]) // ch_reads
])
ch_ribo_db = file('ribo_db.txt')
ch_ribo_db.append('https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5.8s-database-id98.fasta')
input[0] = ch_reads
input[1] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)) // ch_fasta
input[2] = Channel.of(file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/transcriptome.fasta", checkIfExists: true)) // ch_transcript_fasta
input[3] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)) // ch_gtf
input[4] = [] // ch_salmon_index
input[5] = [] // ch_bbsplit_index
input[6] = [] // ch_ribo_db
input[6] = ch_ribo_db // ch_ribo_db
input[7] = true // skip_bbsplit
input[8] = false // skip_fastqc
input[9] = false // skip_trimming
Expand All @@ -93,7 +105,7 @@ nextflow_workflow {
input[12] = 'fastp' // trimmer
input[13] = 10 // min_trimmed_reads
input[14] = true // save_trimmed
input[15] = false // remove_ribo_rna
input[15] = true // remove_ribo_rna
input[16] = false // with_umi
input[17] = 0 // umi_discard_read
"""
Expand Down
20 changes: 10 additions & 10 deletions subworkflows/nf-core/preprocess_rnaseq/tests/main.nf.test.snap

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 53a9794

Please sign in to comment.