From 09628ac5cd127779886f52845f86d8318f2c275c Mon Sep 17 00:00:00 2001 From: jemten Date: Mon, 30 Oct 2023 16:32:23 +0100 Subject: [PATCH] trying to fix the concatenation of fastq files --- subworkflows/local/alignment.nf | 37 +++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/alignment.nf b/subworkflows/local/alignment.nf index b0fca701..54a6bda0 100644 --- a/subworkflows/local/alignment.nf +++ b/subworkflows/local/alignment.nf @@ -28,9 +28,13 @@ workflow ALIGNMENT { main: ch_versions = Channel.empty() - CAT_FASTQ(reads) + ch_fastq = branchFastqToSingleAndMulti(reads) - FASTP(CAT_FASTQ.out.reads,[],false,false) + CAT_FASTQ(ch_fastq.multiple_fq) + .reads.mix(ch_fastq.single_fq) + .set { ch_cat_fastq } + + FASTP(ch_cat_fastq, [], false, false) STAR_ALIGN(FASTP.out.reads, star_index, gtf, false, 'illumina', false) @@ -86,3 +90,32 @@ workflow ALIGNMENT { salmon_info = SALMON_QUANT.out.json_info versions = ch_versions } + + +// Custom functions + +/** +* Branch the read channel into differnt channels, +* depending on whether the sample has multiple fastq files or not. +* The resulting channels gets the original sample id in meta. +* +* @param ch_reads Channel containing meta and fastq reads +* @return Channel containing meta with original id and branched on number of fastq files +*/ +def branchFastqToSingleAndMulti(ch_reads) { + + return ch_reads + .map { + meta, fastq -> + original_id = meta.id.split('_T')[0..-2].join('_') + [ meta + [id: original_id], fastq ] + } + .groupTuple() + .branch { + meta, fastq -> + single_fq: fastq.size() == 1 + return [ meta, fastq.flatten() ] + multiple_fq: fastq.size() > 1 + return [ meta, fastq.flatten() ] + } +}