From 09628ac5cd127779886f52845f86d8318f2c275c Mon Sep 17 00:00:00 2001
From: jemten <jemten@users.noreply.github.com>
Date: Mon, 30 Oct 2023 16:32:23 +0100
Subject: [PATCH] trying to fix the concatenation of fastq files

---
 subworkflows/local/alignment.nf | 37 +++++++++++++++++++++++++++++++--
 1 file changed, 35 insertions(+), 2 deletions(-)

diff --git a/subworkflows/local/alignment.nf b/subworkflows/local/alignment.nf
index b0fca701..54a6bda0 100644
--- a/subworkflows/local/alignment.nf
+++ b/subworkflows/local/alignment.nf
@@ -28,9 +28,13 @@ workflow ALIGNMENT {
     main:
         ch_versions = Channel.empty()
 
-        CAT_FASTQ(reads)
+        ch_fastq = branchFastqToSingleAndMulti(reads)
 
-        FASTP(CAT_FASTQ.out.reads,[],false,false)
+        CAT_FASTQ(ch_fastq.multiple_fq)
+        .reads.mix(ch_fastq.single_fq)
+        .set { ch_cat_fastq }
+
+        FASTP(ch_cat_fastq, [], false, false)
 
         STAR_ALIGN(FASTP.out.reads, star_index, gtf, false, 'illumina', false)
 
@@ -86,3 +90,32 @@ workflow ALIGNMENT {
         salmon_info     = SALMON_QUANT.out.json_info
         versions        = ch_versions
 }
+
+
+// Custom functions
+
+/**
+* Branch the read channel into differnt channels,
+* depending on whether the sample has multiple fastq files or not.
+* The resulting channels gets the original sample id in meta.
+*
+* @param ch_reads Channel containing meta and fastq reads
+* @return Channel containing meta with original id and branched on number of fastq files
+*/
+def branchFastqToSingleAndMulti(ch_reads) {
+
+    return ch_reads
+        .map {
+            meta, fastq ->
+                original_id = meta.id.split('_T')[0..-2].join('_')
+                [ meta + [id: original_id], fastq ]
+        }
+        .groupTuple()
+        .branch {
+            meta, fastq ->
+                single_fq: fastq.size() == 1
+                    return [ meta, fastq.flatten() ]
+                multiple_fq: fastq.size() > 1
+                    return [ meta, fastq.flatten() ]
+        }
+}