nf-core · maxulysse · May 22, 2024 · Nov 7, 2023 · Nov 7, 2023 · Nov 7, 2023
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -33,7 +33,7 @@ jobs:
         NXF_VER:
           - "23.04.0"
           - "latest-everything"
-        profile: ["alevin", "cellranger", "kallisto", "star"]
+        profile: ["alevin", "cellranger", "cellrangermulti", "kallisto", "star"]
 
     steps:
       - name: Disk space cleanup

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -26,6 +26,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   - The universc protocol is now specified via the `--protocol` flag
   - Any protocol specified is now passed to the respective aligner
   - Added a section to the documentation
+- Add cellranger multi subworkflow ([#247](https://github.com/nf-core/scrnaseq/issues/247))
 
 ## v2.4.1 - 2023-09-28
 

diff --git a/assets/EMPTY b/assets/EMPTY
@@ -0,0 +1 @@
+
diff --git a/assets/cellrangermulti_samplesheet.csv b/assets/cellrangermulti_samplesheet.csv
@@ -0,0 +1,9 @@
+sample,fastq_1,fastq_2,feature_type,expected_cells
+PBMC_10K,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/5gex/5gex/subsampled_sc5p_v2_hs_PBMC_10k_5gex_S1_L001_R1_001.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/5gex/5gex/subsampled_sc5p_v2_hs_PBMC_10k_5gex_S1_L001_R2_001.fastq.gz,gex,1000
+PBMC_10K,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/bcell/subsampled_sc5p_v2_hs_PBMC_10k_b_S1_L001_R1_001.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/bcell/subsampled_sc5p_v2_hs_PBMC_10k_b_S1_L001_R2_001.fastq.gz,vdj,1000
+PBMC_10K,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/5gex/5fb/subsampled_sc5p_v2_hs_PBMC_10k_5fb_S1_L001_R1_001.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/5gex/5fb/subsampled_sc5p_v2_hs_PBMC_10k_5fb_S1_L001_R2_001.fastq.gz,ab,1000
+PBMC_10K_CMO,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/gex_1/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_gex_S2_L001_R1_001.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/gex_1/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_gex_S2_L001_R2_001.fastq.gz,gex,1000
+PBMC_10K_CMO,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/cmo/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_multiplexing_capture_S1_L001_R1_001.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/cmo/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_multiplexing_capture_S1_L001_R2_001.fastq.gz,cmo,1000
+PBMC_10K_CMV,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/gex_1/subsampled_5k_human_antiCMV_T_TBNK_connect_GEX_1_S1_L001_R1_001.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/gex_1/subsampled_5k_human_antiCMV_T_TBNK_connect_GEX_1_S1_L001_R2_001.fastq.gz,gex,1000
+PBMC_10K_CMV,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/ab/subsampled_5k_human_antiCMV_T_TBNK_connect_AB_S2_L004_R1_001.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/ab/subsampled_5k_human_antiCMV_T_TBNK_connect_AB_S2_L004_R2_001.fastq.gz,ab,1000
+PBMC_10K_CMV,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/vdj/subsampled_5k_human_antiCMV_T_TBNK_connect_VDJ_S1_L001_R1_001.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/vdj/subsampled_5k_human_antiCMV_T_TBNK_connect_VDJ_S1_L001_R2_001.fastq.gz,vdj,1000
diff --git a/assets/cmo_barcodes.csv b/assets/cmo_barcodes.csv
@@ -0,0 +1,3 @@
+sample_id,cmo_ids,description
+PBMCs_human_1,CMO301,PBMCs_human_1
+PBMCs_human_2,CMO302,PBMCs_human_2
diff --git a/assets/schema_input.json b/assets/schema_input.json
@@ -40,6 +40,11 @@
                 "type": "string",
                 "enum": ["atac", "gex"],
                 "meta": ["sample_type"]
+            },
+            "feature_type": {
+                "type": "string",
+                "enum": ["gex", "vdj", "ab", "beam", "crispr", "cmo"],
+                "meta": ["feature_type"]
             }
         },
         "required": ["sample", "fastq_1", "fastq_2"]

diff --git a/conf/modules.config b/conf/modules.config
@@ -229,3 +229,56 @@ if (params.aligner == 'kallisto') {
         }
     }
 }
+
+if (params.aligner == 'cellrangermulti') {
+    process {
+        withName: FASTQC { ext.prefix = { "${meta.id}_${meta.feature_type}" } } // allow distinguishment of data types after renaming
+        withName: 'NFCORE_SCRNASEQ:SCRNASEQ:CELLRANGER_MULTI_ALIGN:CELLRANGER_MULTI' {
+            ext.prefix = null // force it null, for some reason it was being wrongly read in the module
+            publishDir = [
+                path: "${params.outdir}/${params.aligner}/count",
+                mode: params.publish_dir_mode,
+                saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+            ]
+        }
+        withName: 'GUNZIP*' {
+            publishDir = [
+                enabled: false
+            ]
+        }
+        withName: CELLRANGER_MKGTF {
+            publishDir = [
+                path: "${params.outdir}/${params.aligner}/mkgtf",
+                mode: params.publish_dir_mode,
+                saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+            ]
+        }
+        withName: CELLRANGER_MKREF {
+            publishDir = [
+                path: "${params.outdir}/${params.aligner}/mkref",
+                mode: params.publish_dir_mode
+            ]
+        }
+        withName: CELLRANGER_MKVDJREF {
+            publishDir = [
+                path: "${params.outdir}/${params.aligner}/mkvdjref",
+                mode: params.publish_dir_mode
+            ]
+        }
+    }
+}
+
+
+//
+// QUICK FIX FOR PROBLEM WITH MQC IMAGE
+// TODO: TO REMOVE WHEN FIXED IN NF-CORE MODULE
+//
+process {
+    withName: 'MULTIQC|CUSTOM_DUMPSOFTWAREVERSIONS' {
+        container = {
+            "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+            'https://depot.galaxyproject.org/singularity/multiqc:1.20--pyhdfd78af_2' :
+            'biocontainers/multiqc:1.20--pyhdfd78af_2' }"
+        }
+    }
+}
diff --git a/conf/test_cellranger_multi.config b/conf/test_cellranger_multi.config
@@ -0,0 +1,38 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/scrnaseq -profile test,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+// shared across profiles
+params {
+    config_profile_name        = 'Test profile (Cellranger Multi)'
+    config_profile_description = 'Minimal test dataset to check pipeline function using cellranger multi'
+
+    // Resources on test case
+    max_cpus   = 10
+    max_memory = '50.GB'
+    max_time   = '6.h'
+
+    // Input data
+    input           = "${projectDir}/assets/cellrangermulti_samplesheet.csv"
+    cmo_barcode_csv = 'https://github.com/nf-core/scrnaseq/raw/247-support-for-10x-ffpe-scrna/assets/cmo_barcodes.csv'
+    skip_emptydrops = true // not enough data in small test
+
+    // Genome references
+    fasta           = 'https://ftp.ensembl.org/pub/release-110/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz'
+    gtf             = 'https://ftp.ensembl.org/pub/release-110/gtf/homo_sapiens/Homo_sapiens.GRCh38.110.gtf.gz'
+
+    // aligner
+    aligner         = 'cellrangermulti'
+    protocol        = 'auto'
+
+    // other
+    validationSchemaIgnoreParams = 'genomes'
+}
diff --git a/docs/images/nf-core-scrnaseq_logo_dark.png b/docs/images/nf-core-scrnaseq_logo_dark.png
diff --git a/docs/images/nf-core-scrnaseq_logo_light.png b/docs/images/nf-core-scrnaseq_logo_light.png
diff --git a/lib/Utils.groovy b/lib/Utils.groovy
@@ -9,6 +9,7 @@ class WorkflowScrnaseq {
         def jsonSlurper = new JsonSlurper()
         def json = new File("${workflow.projectDir}/assets/protocols.json").text
         def protocols = jsonSlurper.parseText(json)
+        aligner = (aligner == 'cellrangermulti') ? 'cellranger' : aligner
         def aligner_map = protocols[aligner]
         if(aligner_map.containsKey(protocol)) {
             return aligner_map[protocol]

diff --git a/modules.json b/modules.json
@@ -7,19 +7,29 @@
                 "nf-core": {
                     "cellranger/count": {
                         "branch": "master",
-                        "git_sha": "92ca535c5a8c0fe89eb71e649ee536bd355ce4fc",
+                        "git_sha": "1774f7876ee03f65ccf49ca2e6bdef7c2356ebca",
                         "installed_by": ["modules"]
                     },
                     "cellranger/mkgtf": {
                         "branch": "master",
-                        "git_sha": "575e1bc54b083fb15e7dd8b5fcc40bea60e8ce83",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
                         "installed_by": ["modules"]
                     },
                     "cellranger/mkref": {
                         "branch": "master",
                         "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
                         "installed_by": ["modules"]
                     },
+                    "cellranger/mkvdjref": {
+                        "branch": "master",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "installed_by": ["modules"]
+                    },
+                    "cellranger/multi": {
+                        "branch": "master",
+                        "git_sha": "a03357ba56317686b6f65102415211616cd38672",
+                        "installed_by": ["modules"]
+                    },
                     "cellrangerarc/count": {
                         "branch": "master",
                         "git_sha": "18e53e27cfeca5dbbfbeee675c05438dec68245f",
@@ -52,7 +62,7 @@
                     },
                     "kallistobustools/count": {
                         "branch": "master",
-                        "git_sha": "9d3e489286eead7dfe1010fd324904d8b698eca7",
+                        "git_sha": "53c2b466994f07def210b7f4cc866bb5a8a2cb92",
                         "installed_by": ["modules"]
                     },
                     "kallistobustools/ref": {

diff --git a/modules/local/emptydrops.nf b/modules/local/emptydrops.nf
@@ -19,7 +19,7 @@ process EMPTYDROPS_CELL_CALLING {
     task.ext.when == null || task.ext.when
 
     script:
-    if (params.aligner == "cellranger") {
+    if (params.aligner in ["cellranger", "cellrangermulti"]) {
 
         matrix   = "matrix.mtx.gz"
         barcodes = "barcodes.tsv.gz"

diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
@@ -27,12 +27,12 @@ process MTX_TO_H5AD {
 
     // check input type of inputs
     input_type = (input_to_check.toUriString().contains('unfiltered') || input_to_check.toUriString().contains('raw')) ? 'raw' : 'filtered'
-    if ( params.aligner == 'alevin' ) { input_type = 'raw' } // alevin has its own filtering methods and mostly output a single mtx, raw here means, the base tool output
+    if ( params.aligner == 'alevin' ) { input_type = 'raw' } // alevin has its own filtering methods and mostly output a single mtx, 'raw' here means, the base tool output
     if (input_to_check.toUriString().contains('emptydrops')) { input_type = 'custom_emptydrops_filter' }
 
     // def file paths for aligners. Cellranger is normally converted with the .h5 files
     // However, the emptydrops call, always generate .mtx files, thus, cellranger 'emptydrops' required a parsing
-    if (params.aligner in [ 'cellranger', 'cellrangerarc' ] && input_type == 'custom_emptydrops_filter') {
+    if (params.aligner in [ 'cellranger', 'cellrangerarc', 'cellrangermulti' ] && input_type == 'custom_emptydrops_filter') {
 
         aligner      = 'cellranger'
         txp2gene     = ''
@@ -89,12 +89,12 @@ process MTX_TO_H5AD {
     //
     // run script
     //
-    if (params.aligner in [ 'cellranger', 'cellrangerarc' ] && input_type != 'custom_emptydrops_filter')
+    if (params.aligner in [ "cellranger", "cellrangerarc", "cellrangermulti"] && input_type != 'custom_emptydrops_filter')
     """
     # convert file types
     mtx_to_h5ad.py \\
         --aligner cellranger \\
-        --input ${input_type}_feature_bc_matrix.h5 \\
+        --input *${input_type}_feature_bc_matrix.h5 \\
         --sample ${meta.id} \\
         --out ${meta.id}/${meta.id}_${input_type}_matrix.h5ad
     """

diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf
@@ -35,7 +35,7 @@ process MTX_TO_SEURAT {
 
     // def file paths for aligners. Cellranger is normally converted with the .h5 files
     // However, the emptydrops call, always generate .mtx files, thus, cellranger 'emptydrops' required a parsing
-    if (params.aligner in [ 'cellranger', 'cellrangerarc' ]) {
+    if (params.aligner in [ "cellranger", "cellrangerarc", "cellrangermulti"]) {
 
         mtx_dir  = (input_type == 'custom_emptydrops_filter') ? 'emptydrops_filtered/' : ''
         matrix   = "${mtx_dir}matrix.mtx*"

diff --git a/modules/nf-core/cellranger/count/main.nf b/modules/nf-core/cellranger/count/main.nf
diff --git a/modules/nf-core/cellranger/count/meta.yml b/modules/nf-core/cellranger/count/meta.yml
diff --git a/modules/nf-core/cellranger/count/tests/main.nf.test.snap b/modules/nf-core/cellranger/count/tests/main.nf.test.snap
diff --git a/modules/nf-core/cellranger/mkgtf/meta.yml b/modules/nf-core/cellranger/mkgtf/meta.yml
diff --git a/modules/nf-core/cellranger/mkvdjref/environment.yml b/modules/nf-core/cellranger/mkvdjref/environment.yml