diff --git a/conf/modules.config b/conf/modules.config index 81395a1d..ed0c1b92 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -32,25 +32,35 @@ process { } if (!params.skip_emptydrops) { - withName: EMPTYDROPS_CELL_CALLING { + withName: 'CELLBENDER_REMOVEBACKGROUND' { publishDir = [ - path: { "${params.outdir}/${params.aligner}" }, + path: { "${params.outdir}/${params.aligner}/${meta.id}/emptydrops_filter" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'ADATA_BARCODES' { + ext.prefix = { "${meta.id}_${meta.input_type}_matrix" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mtx_conversions/${meta.id}" }, mode: params.publish_dir_mode, - saveAs: { filename -> - if ( params.aligner == 'cellranger' ) "count/${meta.id}/${filename}" - else if ( params.aligner == 'kallisto' ) "${meta.id}.count/${filename}" - else "${meta.id}/${filename}" - } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } } - withName: 'MTX_TO_H5AD|CONCAT_H5AD|MTX_TO_SEURAT' { + withName: 'MTX_TO_H5AD|CONCAT_H5AD|ANNDATAR_CONVERT' { publishDir = [ path: { "${params.outdir}/${params.aligner}/mtx_conversions" }, - mode: params.publish_dir_mode + mode: params.publish_dir_mode, + saveAs: { filename -> + if (!filename.contains('combined_')) { "${meta.id}/${filename}" } + else if (filename.equals('versions.yml')) { null } + else filename + } ] } + withName: 'GTF_GENE_FILTER' { publishDir = [ path: { "${params.outdir}/gtf_filter" }, @@ -74,13 +84,15 @@ if(params.aligner == "cellranger") { withName: CELLRANGER_MKREF { publishDir = [ path: "${params.outdir}/${params.aligner}/mkref", - mode: params.publish_dir_mode + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } withName: CELLRANGER_COUNT { publishDir = [ path: "${params.outdir}/${params.aligner}/count", - mode: params.publish_dir_mode + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] ext.args = {"--chemistry ${meta.chemistry} --create-bam true " + (meta.expected_cells ? "--expect-cells ${meta.expected_cells}" : '')} time = { check_max( 240.h * task.attempt, 'time' ) } @@ -161,8 +173,9 @@ if (params.aligner == "alevin") { } withName: 'SIMPLEAF_QUANT' { publishDir = [ - path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode + path: { "${params.outdir}/${params.aligner}/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] ext.args = "-r cr-like" } @@ -176,20 +189,20 @@ if (params.aligner == "alevin") { if (params.aligner == "star") { process { - withName: STAR_ALIGN { - ext.args = "--readFilesCommand zcat --runDirPerm All_RWX --outWigType bedGraph --twopassMode Basic --outSAMtype BAM SortedByCoordinate" - } withName: STAR_GENOMEGENERATE { publishDir = [ path: { "${params.outdir}/${params.aligner}/genome_generate" }, mode: params.publish_dir_mode, - enabled: params.save_reference + enabled: params.save_reference, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } withName: STAR_ALIGN { + ext.args = "--readFilesCommand zcat --runDirPerm All_RWX --outWigType bedGraph --twopassMode Basic --outSAMtype BAM SortedByCoordinate" publishDir = [ path: { "${params.outdir}/${params.aligner}/${meta.id}" }, - mode: params.publish_dir_mode + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } } @@ -201,14 +214,16 @@ if (params.aligner == 'kallisto') { publishDir = [ path: { "${params.outdir}/${params.aligner}" }, mode: params.publish_dir_mode, - enabled: params.save_reference + enabled: params.save_reference, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } withName: KALLISTOBUSTOOLS_COUNT { def kb_filter = (params.kb_filter) ? '--filter' : '' publishDir = [ path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] ext.args = "--workflow ${params.kb_workflow} ${kb_filter}" } @@ -247,7 +262,8 @@ if (params.aligner == 'cellrangermulti') { withName: CELLRANGER_MKVDJREF { publishDir = [ path: "${params.outdir}/${params.aligner}/mkvdjref", - mode: params.publish_dir_mode + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } } diff --git a/modules.json b/modules.json index aa186d98..b9680cfe 100644 --- a/modules.json +++ b/modules.json @@ -5,6 +5,11 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "cellbender/removebackground": { + "branch": "master", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": ["modules"] + }, "cellranger/count": { "branch": "master", "git_sha": "90dad5491658049282ceb287a3d7732c1ce39837", diff --git a/modules/local/emptydrops.nf b/modules/local/BKP/emptydrops.nf similarity index 100% rename from modules/local/emptydrops.nf rename to modules/local/BKP/emptydrops.nf diff --git a/modules/local/BKP/mtx_to_h5ad.nf b/modules/local/BKP/mtx_to_h5ad.nf new file mode 100644 index 00000000..61e06e91 --- /dev/null +++ b/modules/local/BKP/mtx_to_h5ad.nf @@ -0,0 +1,139 @@ +process MTX_TO_H5AD { + tag "$meta.id" + label 'process_medium' + + conda "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' : + 'biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" + + input: + // inputs from cellranger nf-core module does not come in a single sample dir + // for each sample, the sub-folders and files come directly in array. + tuple val(meta), path(inputs) + path txp2gene + path star_index + + output: + tuple val(input_type), path("${meta.id}/*h5ad") , emit: h5ad + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Get a file to check input type. Some aligners bring arrays instead of a single file. + def input_to_check = (inputs instanceof String) ? inputs : inputs[0] + + // check input type of inputs + input_type = (input_to_check.toUriString().contains('unfiltered') || input_to_check.toUriString().contains('raw')) ? 'raw' : 'filtered' + if ( params.aligner == 'alevin' ) { input_type = 'raw' } // alevin has its own filtering methods and mostly output a single mtx, 'raw' here means, the base tool output + if (input_to_check.toUriString().contains('emptydrops')) { input_type = 'custom_emptydrops_filter' } + + // def file paths for aligners. Cellranger is normally converted with the .h5 files + // However, the emptydrops call, always generate .mtx files, thus, cellranger 'emptydrops' required a parsing + if (params.aligner in [ 'cellranger', 'cellrangerarc', 'cellrangermulti' ] && input_type == 'custom_emptydrops_filter') { + + aligner = 'cellranger' + txp2gene = '' + star_index = '' + mtx_matrix = "emptydrops_filtered/matrix.mtx" + barcodes_tsv = "emptydrops_filtered/barcodes.tsv" + features_tsv = "emptydrops_filtered/features.tsv" + + } else if (params.aligner == 'kallisto') { + + kb_pattern = (input_type == 'raw') ? 'un' : '' + mtx_dir = (input_type == 'custom_emptydrops_filter') ? 'emptydrops_filtered' : "counts_${kb_pattern}filtered" + if ((input_type == 'custom_emptydrops_filter') && (params.kb_workflow != 'standard')) { mtx_dir = 'emptydrops_filtered/\${input_type}' } // dir has subdirs for non-standard workflows + mtx_matrix = "${mtx_dir}/*.mtx" + barcodes_tsv = "${mtx_dir}/*.barcodes.txt" + features_tsv = "${mtx_dir}/*.genes.names.txt" + + // kallisto allows the following workflows: ["standard", "lamanno", "nac"] + // lamanno creates "spliced" and "unspliced" + // nac creates "nascent", "ambiguous" "mature" + // also, lamanno produces a barcodes and genes file for both spliced and unspliced + // while nac keep only one for all the different .mtx files produced + kb_non_standard_files = "" + if (params.kb_workflow == "lamanno") { + kb_non_standard_files = "spliced unspliced" + matrix = "${mtx_dir}/\${input_type}.mtx" + barcodes_tsv = "${mtx_dir}/\${input_type}.barcodes.txt" + features_tsv = "${mtx_dir}/\${input_type}.genes.txt" + } + if (params.kb_workflow == "nac") { + kb_non_standard_files = "nascent ambiguous mature" + matrix = "${mtx_dir}/*\${input_type}.mtx" + features_tsv = "${mtx_dir}/*.genes.txt" + } // barcodes tsv has same pattern as standard workflow + + } else if (params.aligner == 'alevin') { + + // alevin does not have filtered/unfiltered results + mtx_dir = (input_type == 'custom_emptydrops_filter') ? 'emptydrops_filtered' : '*_alevin_results/af_quant/alevin' + mtx_matrix = "${mtx_dir}/quants_mat.mtx" + barcodes_tsv = "${mtx_dir}/quants_mat_rows.txt" + features_tsv = "${mtx_dir}/quants_mat_cols.txt" + + } else if (params.aligner == 'star') { + + mtx_dir = (input_type == 'custom_emptydrops_filter') ? 'emptydrops_filtered' : "${input_type}" + suffix = (input_type == 'custom_emptydrops_filter') ? '' : '.gz' + mtx_matrix = "${mtx_dir}/matrix.mtx${suffix}" + barcodes_tsv = "${mtx_dir}/barcodes.tsv${suffix}" + features_tsv = "${mtx_dir}/features.tsv${suffix}" + + } + + // + // run script + // + if (params.aligner in [ "cellranger", "cellrangerarc", "cellrangermulti"] && input_type != 'custom_emptydrops_filter') + """ + # convert file types + mtx_to_h5ad.py \\ + --aligner cellranger \\ + --input *${input_type}_feature_bc_matrix.h5 \\ + --sample ${meta.id} \\ + --out ${meta.id}/${meta.id}_${input_type}_matrix.h5ad + """ + + else if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') + """ + # convert file types + for input_type in ${kb_non_standard_files} ; do + mtx_to_h5ad.py \\ + --aligner ${params.aligner} \\ + --sample ${meta.id} \\ + --input ${matrix} \\ + --barcode ${barcodes_tsv} \\ + --feature ${features_tsv} \\ + --txp2gene ${txp2gene} \\ + --star_index ${star_index} \\ + --out ${meta.id}/${meta.id}_\${input_type}_matrix.h5ad ; + done + """ + + else + """ + # convert file types + mtx_to_h5ad.py \\ + --task_process ${task.process} \\ + --aligner ${params.aligner} \\ + --sample ${meta.id} \\ + --input $mtx_matrix \\ + --barcode $barcodes_tsv \\ + --feature $features_tsv \\ + --txp2gene ${txp2gene} \\ + --star_index ${star_index} \\ + --out ${meta.id}/${meta.id}_${input_type}_matrix.h5ad + """ + + stub: + """ + mkdir ${meta.id} + touch ${meta.id}/${meta.id}_matrix.h5ad + touch versions.yml + """ +} diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/BKP/mtx_to_seurat.nf similarity index 100% rename from modules/local/mtx_to_seurat.nf rename to modules/local/BKP/mtx_to_seurat.nf diff --git a/modules/local/adata_barcodes.nf b/modules/local/adata_barcodes.nf new file mode 100644 index 00000000..630d90ae --- /dev/null +++ b/modules/local/adata_barcodes.nf @@ -0,0 +1,29 @@ +process ADATA_BARCODES { + + // + // Module from nf-core/scdownstream. + // This module performs the subset of the h5ad file to only contain barcodes that passed emptydrops filter with cellbender + // + + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'oras://community.wave.seqera.io/library/anndata:0.10.7--e9840a94592528c8': + 'community.wave.seqera.io/library/anndata:0.10.7--336c6c1921a0632b' }" + + input: + tuple val(meta), path(h5ad), path(barcodes_csv) + + output: + tuple val(meta), path("*.h5ad"), emit: h5ad + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: "${meta.id}" + template 'barcodes.py' +} diff --git a/modules/local/alevinqc.nf b/modules/local/alevinqc.nf index 9000d79e..777a1371 100644 --- a/modules/local/alevinqc.nf +++ b/modules/local/alevinqc.nf @@ -1,4 +1,9 @@ process ALEVINQC { + + // + // This module executes alevinfry QC reporting tool on alevin results + // + tag "$meta.id" label 'process_low' diff --git a/modules/local/anndatar_convert.nf b/modules/local/anndatar_convert.nf new file mode 100644 index 00000000..8e4c242e --- /dev/null +++ b/modules/local/anndatar_convert.nf @@ -0,0 +1,29 @@ +process ANNDATAR_CONVERT { + + // + // This module uses the anndata R package to convert h5ad files in different formats + // + + tag "${meta.id}" + + label 'process_medium' + + container "docker.io/fmalmeida/anndatar:dev" // TODO: Fix + + input: + tuple val(meta), path(h5ad) + + output: + tuple val(meta), path("${meta.id}_${meta.input_type}_matrix.Rds"), emit: rds + + when: + task.ext.when == null || task.ext.when + + script: + template 'anndatar_convert.R' + + stub: + """ + touch ${meta.id}.Rds + """ +} diff --git a/modules/local/concat_h5ad.nf b/modules/local/concat_h5ad.nf index cd08cbbe..c875ba3c 100644 --- a/modules/local/concat_h5ad.nf +++ b/modules/local/concat_h5ad.nf @@ -1,28 +1,29 @@ process CONCAT_H5AD { + + // + // This module concatenates all h5ad, per type (raw, filtered, etc.) files generated during pipeline execution + // + + + tag "${meta.id}" + label 'process_medium' - conda "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' : - 'biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" + conda "conda-forge::scanpy==1.10.2 conda-forge::python-igraph conda-forge::leidenalg" + container "community.wave.seqera.io/library/scanpy:1.10.2--e83da2205b92a538" input: - tuple val(input_type), path(h5ad) + tuple val(meta), path(h5ad) path samplesheet output: - path "*.h5ad", emit: h5ad + tuple val(meta), path("*.h5ad"), emit: h5ad when: task.ext.when == null || task.ext.when script: - """ - concat_h5ad.py \\ - --input $samplesheet \\ - --out combined_${input_type}_matrix.h5ad \\ - --suffix "_matrix.h5ad" - """ + template 'concat_h5ad.py' stub: """ diff --git a/modules/local/gffread_transcriptome.nf b/modules/local/gffread_transcriptome.nf index ab573b07..671b6726 100644 --- a/modules/local/gffread_transcriptome.nf +++ b/modules/local/gffread_transcriptome.nf @@ -1,4 +1,9 @@ process GFFREAD_TRANSCRIPTOME { + + // + // This module uses gffread to filter input to generate a transcripts fasta + // + tag "${genome_fasta}" label 'process_low' diff --git a/modules/local/gtf_gene_filter.nf b/modules/local/gtf_gene_filter.nf index 063bd228..10af352b 100644 --- a/modules/local/gtf_gene_filter.nf +++ b/modules/local/gtf_gene_filter.nf @@ -1,4 +1,9 @@ process GTF_GENE_FILTER { + + // + // This module executes a custom script to filter input gtf to contain only annotations present in input genome + // + tag "$fasta" label 'process_low' diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 61e06e91..f54318f1 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -1,11 +1,14 @@ process MTX_TO_H5AD { + + // + // This module executes different conversion template scripts (per aligner) for converting output mtx files into h5ad files + // + tag "$meta.id" label 'process_medium' - conda "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' : - 'biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" + conda "conda-forge::scanpy==1.10.2 conda-forge::python-igraph conda-forge::leidenalg" + container "community.wave.seqera.io/library/scanpy:1.10.2--e83da2205b92a538" input: // inputs from cellranger nf-core module does not come in a single sample dir @@ -15,125 +18,20 @@ process MTX_TO_H5AD { path star_index output: - tuple val(input_type), path("${meta.id}/*h5ad") , emit: h5ad - path "versions.yml" , emit: versions + tuple val(meta), path("${meta.id}_${meta.input_type}_matrix.h5ad"), emit: h5ad + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - // Get a file to check input type. Some aligners bring arrays instead of a single file. - def input_to_check = (inputs instanceof String) ? inputs : inputs[0] - - // check input type of inputs - input_type = (input_to_check.toUriString().contains('unfiltered') || input_to_check.toUriString().contains('raw')) ? 'raw' : 'filtered' - if ( params.aligner == 'alevin' ) { input_type = 'raw' } // alevin has its own filtering methods and mostly output a single mtx, 'raw' here means, the base tool output - if (input_to_check.toUriString().contains('emptydrops')) { input_type = 'custom_emptydrops_filter' } - - // def file paths for aligners. Cellranger is normally converted with the .h5 files - // However, the emptydrops call, always generate .mtx files, thus, cellranger 'emptydrops' required a parsing - if (params.aligner in [ 'cellranger', 'cellrangerarc', 'cellrangermulti' ] && input_type == 'custom_emptydrops_filter') { - - aligner = 'cellranger' - txp2gene = '' - star_index = '' - mtx_matrix = "emptydrops_filtered/matrix.mtx" - barcodes_tsv = "emptydrops_filtered/barcodes.tsv" - features_tsv = "emptydrops_filtered/features.tsv" - - } else if (params.aligner == 'kallisto') { - - kb_pattern = (input_type == 'raw') ? 'un' : '' - mtx_dir = (input_type == 'custom_emptydrops_filter') ? 'emptydrops_filtered' : "counts_${kb_pattern}filtered" - if ((input_type == 'custom_emptydrops_filter') && (params.kb_workflow != 'standard')) { mtx_dir = 'emptydrops_filtered/\${input_type}' } // dir has subdirs for non-standard workflows - mtx_matrix = "${mtx_dir}/*.mtx" - barcodes_tsv = "${mtx_dir}/*.barcodes.txt" - features_tsv = "${mtx_dir}/*.genes.names.txt" - - // kallisto allows the following workflows: ["standard", "lamanno", "nac"] - // lamanno creates "spliced" and "unspliced" - // nac creates "nascent", "ambiguous" "mature" - // also, lamanno produces a barcodes and genes file for both spliced and unspliced - // while nac keep only one for all the different .mtx files produced - kb_non_standard_files = "" - if (params.kb_workflow == "lamanno") { - kb_non_standard_files = "spliced unspliced" - matrix = "${mtx_dir}/\${input_type}.mtx" - barcodes_tsv = "${mtx_dir}/\${input_type}.barcodes.txt" - features_tsv = "${mtx_dir}/\${input_type}.genes.txt" - } - if (params.kb_workflow == "nac") { - kb_non_standard_files = "nascent ambiguous mature" - matrix = "${mtx_dir}/*\${input_type}.mtx" - features_tsv = "${mtx_dir}/*.genes.txt" - } // barcodes tsv has same pattern as standard workflow - - } else if (params.aligner == 'alevin') { - - // alevin does not have filtered/unfiltered results - mtx_dir = (input_type == 'custom_emptydrops_filter') ? 'emptydrops_filtered' : '*_alevin_results/af_quant/alevin' - mtx_matrix = "${mtx_dir}/quants_mat.mtx" - barcodes_tsv = "${mtx_dir}/quants_mat_rows.txt" - features_tsv = "${mtx_dir}/quants_mat_cols.txt" - - } else if (params.aligner == 'star') { + def aligner = (params.aligner in [ 'cellranger', 'cellrangerarc', 'cellrangermulti' ]) ? 'cellranger' : params.aligner - mtx_dir = (input_type == 'custom_emptydrops_filter') ? 'emptydrops_filtered' : "${input_type}" - suffix = (input_type == 'custom_emptydrops_filter') ? '' : '.gz' - mtx_matrix = "${mtx_dir}/matrix.mtx${suffix}" - barcodes_tsv = "${mtx_dir}/barcodes.tsv${suffix}" - features_tsv = "${mtx_dir}/features.tsv${suffix}" - - } - - // - // run script - // - if (params.aligner in [ "cellranger", "cellrangerarc", "cellrangermulti"] && input_type != 'custom_emptydrops_filter') - """ - # convert file types - mtx_to_h5ad.py \\ - --aligner cellranger \\ - --input *${input_type}_feature_bc_matrix.h5 \\ - --sample ${meta.id} \\ - --out ${meta.id}/${meta.id}_${input_type}_matrix.h5ad - """ - - else if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') - """ - # convert file types - for input_type in ${kb_non_standard_files} ; do - mtx_to_h5ad.py \\ - --aligner ${params.aligner} \\ - --sample ${meta.id} \\ - --input ${matrix} \\ - --barcode ${barcodes_tsv} \\ - --feature ${features_tsv} \\ - --txp2gene ${txp2gene} \\ - --star_index ${star_index} \\ - --out ${meta.id}/${meta.id}_\${input_type}_matrix.h5ad ; - done - """ - - else - """ - # convert file types - mtx_to_h5ad.py \\ - --task_process ${task.process} \\ - --aligner ${params.aligner} \\ - --sample ${meta.id} \\ - --input $mtx_matrix \\ - --barcode $barcodes_tsv \\ - --feature $features_tsv \\ - --txp2gene ${txp2gene} \\ - --star_index ${star_index} \\ - --out ${meta.id}/${meta.id}_${input_type}_matrix.h5ad - """ + template "mtx_to_h5ad_${aligner}.py" stub: """ - mkdir ${meta.id} - touch ${meta.id}/${meta.id}_matrix.h5ad + touch ${meta.id}_raw_matrix.h5ad touch versions.yml """ } diff --git a/modules/local/parse_cellrangermulti_samplesheet.nf b/modules/local/parse_cellrangermulti_samplesheet.nf index df616995..e8f56b67 100644 --- a/modules/local/parse_cellrangermulti_samplesheet.nf +++ b/modules/local/parse_cellrangermulti_samplesheet.nf @@ -1,4 +1,9 @@ process PARSE_CELLRANGERMULTI_SAMPLESHEET { + + // + // This module contains a custom script for checking special cellranger multi samplesheet + // + label 'process_low' publishDir = [ enabled: false ] diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf index 8e8bd519..5c362c99 100644 --- a/modules/local/simpleaf_index.nf +++ b/modules/local/simpleaf_index.nf @@ -1,4 +1,9 @@ process SIMPLEAF_INDEX { + + // + // This module executes simpleaf to generate alevin genome index + // + tag "$transcript_gtf" label "process_medium" diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf index abb58404..9241b210 100644 --- a/modules/local/simpleaf_quant.nf +++ b/modules/local/simpleaf_quant.nf @@ -1,4 +1,9 @@ process SIMPLEAF_QUANT { + + // + // This module executes simpleaf to perform quantification with alevin + // + tag "$meta.id" label 'process_high' diff --git a/modules/local/star_align.nf b/modules/local/star_align.nf index 4b3df1e1..70d6770c 100644 --- a/modules/local/star_align.nf +++ b/modules/local/star_align.nf @@ -1,4 +1,9 @@ process STAR_ALIGN { + + // + // This module executes STAR align quantification + // + tag "$meta.id" label 'process_high' diff --git a/modules/local/templates/anndatar_convert.R b/modules/local/templates/anndatar_convert.R new file mode 100755 index 00000000..5be46163 --- /dev/null +++ b/modules/local/templates/anndatar_convert.R @@ -0,0 +1,16 @@ +#!/usr/bin/env Rscript + +# to use nf variables: "${meta.id}" + +# load libraries +library(anndataR) + +# read input +adata <- read_h5ad("${h5ad}") + +# convert to Rds +obj <- adata\$to_Seurat() + +# save files +dir.create(file.path("$meta.id"), showWarnings = FALSE) +saveRDS(obj, file = "${meta.id}_${meta.input_type}_matrix.Rds") diff --git a/modules/local/templates/barcodes.py b/modules/local/templates/barcodes.py new file mode 100644 index 00000000..8a9b10a7 --- /dev/null +++ b/modules/local/templates/barcodes.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 + +import platform +import anndata as ad +import pandas as pd + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + +df = pd.read_csv("${barcodes_csv}", header=None) +adata = ad.read_h5ad("${h5ad}") + +adata = adata[df[0].values] + +adata.write_h5ad("${prefix}.h5ad") + +# Versions + +versions = { + "${task.process}": { + "python": platform.python_version(), + "anndata": ad.__version__, + "pandas": pd.__version__ + } +} + +with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions)) diff --git a/bin/concat_h5ad.py b/modules/local/templates/concat_h5ad.py similarity index 53% rename from bin/concat_h5ad.py rename to modules/local/templates/concat_h5ad.py index 43ea071a..087f7fde 100755 --- a/bin/concat_h5ad.py +++ b/modules/local/templates/concat_h5ad.py @@ -7,7 +7,6 @@ import scanpy as sc, anndata as ad, pandas as pd from pathlib import Path -import argparse def read_samplesheet(samplesheet): @@ -17,36 +16,24 @@ def read_samplesheet(samplesheet): # samplesheet may contain replicates, when it has, # group information from replicates and collapse with commas # only keep unique values using set() - df = df.groupby(["sample"]).agg(lambda column: ",".join(set(column))) + df = df.groupby(["sample"]).agg(lambda column: ",".join(set(column.astype(str)))) return df if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Concatenates h5ad files and merge metadata from samplesheet") - - parser.add_argument("-i", "--input", dest="input", help="Path to samplesheet.csv") - parser.add_argument("-o", "--out", dest="out", help="Output path.") - parser.add_argument( - "-s", - "--suffix", - dest="suffix", - help="Suffix of matrices to remove and get sample name", - ) - - args = vars(parser.parse_args()) # Open samplesheet as dataframe - df_samplesheet = read_samplesheet(args["input"]) + df_samplesheet = read_samplesheet("${samplesheet}") # find all h5ad and append to dict - dict_of_h5ad = {str(path).replace(args["suffix"], ""): sc.read_h5ad(path) for path in Path(".").rglob("*.h5ad")} + dict_of_h5ad = {str(path).replace("_matrix.h5ad", ""): sc.read_h5ad(path) for path in Path(".").rglob("*.h5ad")} # concat h5ad files adata = ad.concat(dict_of_h5ad, label="sample", merge="unique", index_unique="_") # merge with data.frame, on sample information - adata.obs = adata.obs.join(df_samplesheet, on="sample") - adata.write_h5ad(args["out"], compression="gzip") + adata.obs = adata.obs.join(df_samplesheet, on="sample", how="left").astype(str) + adata.write_h5ad("combined_${meta.input_type}_matrix.h5ad") - print("Wrote h5ad file to {}".format(args["out"])) + print("Wrote h5ad file to {}".format("combined_${meta.input_type}_matrix.h5ad")) diff --git a/modules/local/templates/mtx_to_h5ad_alevin.py b/modules/local/templates/mtx_to_h5ad_alevin.py new file mode 100755 index 00000000..d54a1667 --- /dev/null +++ b/modules/local/templates/mtx_to_h5ad_alevin.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python + +# Set numba chache dir to current working directory (which is a writable mount also in containers) +import os + +os.environ["NUMBA_CACHE_DIR"] = "." + +import scanpy as sc +import pandas as pd +import argparse +from anndata import AnnData +import platform + +def _mtx_to_adata( + input: str, + sample: str, +): + + adata = sc.read_mtx(f"{input}/quants_mat.mtx") + adata.obs_names = pd.read_csv(f"{input}/quants_mat_rows.txt", header=None, sep="\\t")[0].values + adata.var_names = pd.read_csv(f"{input}/quants_mat_cols.txt", header=None, sep="\\t")[0].values + adata.obs["sample"] = sample + + return adata + + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + +def dump_versions(): + versions = { + "${task.process}": { + "python": platform.python_version(), + "scanpy": sc.__version__, + "pandas": pd.__version__ + } + } + + with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions)) + +def input_to_adata( + input_data: str, + output: str, + sample: str, +): + print(f"Reading in {input_data}") + + # open main data + adata = _mtx_to_adata(input_data, sample) + + # standard format + # index are gene IDs and symbols are a column + # TODO: how to get gene_symbols for alevin? + adata.var['gene_versions'] = adata.var.index + adata.var.index = adata.var['gene_versions'].str.split('.').str[0].values + adata.var_names_make_unique() + + # write results + adata.write_h5ad(f"{output}") + print(f"Wrote h5ad file to {output}") + +# +# Run main script +# + +# create the directory with the sample name +os.makedirs("${meta.id}", exist_ok=True) + +# input_type comes from NF module +input_to_adata( + input_data="${meta.id}_alevin_results/af_quant/alevin/", + output="${meta.id}_${meta.input_type}_matrix.h5ad", + sample="${meta.id}" +) + +# dump versions +dump_versions() diff --git a/modules/local/templates/mtx_to_h5ad_cellranger.py b/modules/local/templates/mtx_to_h5ad_cellranger.py new file mode 100755 index 00000000..ecc5c077 --- /dev/null +++ b/modules/local/templates/mtx_to_h5ad_cellranger.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python + +# Set numba chache dir to current working directory (which is a writable mount also in containers) +import os + +os.environ["NUMBA_CACHE_DIR"] = "." + +import scanpy as sc +import pandas as pd +import argparse +from anndata import AnnData +import platform + +def _mtx_to_adata( + input: str, + sample: str, +): + + adata = sc.read_10x_h5(input) + adata.var["gene_symbols"] = adata.var_names + adata.var.set_index("gene_ids", inplace=True) + adata.obs["sample"] = sample + + # reorder columns for 10x mtx files + adata.var = adata.var[["gene_symbols", "feature_types", "genome"]] + + return adata + + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + +def dump_versions(): + versions = { + "${task.process}": { + "python": platform.python_version(), + "scanpy": sc.__version__, + "pandas": pd.__version__ + } + } + + with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions)) + +def input_to_adata( + input_data: str, + output: str, + sample: str, +): + print(f"Reading in {input_data}") + + # open main data + adata = _mtx_to_adata(input_data, sample) + + # standard format + # index are gene IDs and symbols are a column + adata.var['gene_versions'] = adata.var.index + adata.var.index = adata.var['gene_versions'].str.split('.').str[0].values + adata.var_names_make_unique() + + # write results + adata.write_h5ad(f"{output}") + print(f"Wrote h5ad file to {output}") + + # dump versions + dump_versions() + + return adata + +# +# Run main script +# + +# create the directory with the sample name +os.makedirs("${meta.id}", exist_ok=True) + +# input_type comes from NF module +adata = input_to_adata( + input_data="${meta.input_type}_feature_bc_matrix.h5", + output="${meta.id}_${meta.input_type}_matrix.h5ad", + sample="${meta.id}" +) diff --git a/modules/local/templates/mtx_to_h5ad_kallisto.py b/modules/local/templates/mtx_to_h5ad_kallisto.py new file mode 100755 index 00000000..8d0f0909 --- /dev/null +++ b/modules/local/templates/mtx_to_h5ad_kallisto.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python + +# Set numba chache dir to current working directory (which is a writable mount also in containers) +import os + +os.environ["NUMBA_CACHE_DIR"] = "." + +import scanpy as sc +import pandas as pd +import argparse +from anndata import AnnData +import platform +import glob + +def _mtx_to_adata( + matrix: str, + barcodes: str, + features: str, + t2g: str, + sample: str +): + + adata = sc.read_mtx(matrix) + adata.obs_names = pd.read_csv(barcodes, header=None, sep="\\t")[0].values + adata.var_names = pd.read_csv(features, header=None, sep="\\t")[0].values + adata.obs["sample"] = sample + + txp2gene = pd.read_table(f"{t2g}", header=None, names=["gene_id", "gene_symbol"], usecols=[1, 2]) + txp2gene = txp2gene.drop_duplicates(subset="gene_id").set_index("gene_id") + adata.var = adata.var.join(txp2gene, how="left") + + return adata + + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + +def dump_versions(): + versions = { + "${task.process}": { + "python": platform.python_version(), + "scanpy": sc.__version__, + "pandas": pd.__version__ + } + } + + with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions)) + +def input_to_adata( + matrix: str, + barcodes: str, + features: str, + t2g: str, + output: str, + sample: str, +): + print(f"Reading in {matrix}") + + # open main data + adata = _mtx_to_adata(matrix=matrix, barcodes=barcodes, features=features, sample=sample, t2g=t2g) + + # standard format + # index are gene IDs and symbols are a column + adata.var['gene_versions'] = adata.var.index + adata.var.index = adata.var['gene_versions'].str.split('.').str[0].values + adata.var_names_make_unique() + + # write results + adata.write_h5ad(f"{output}") + print(f"Wrote h5ad file to {output}") + +# +# Run main script +# + +# create the directory with the sample name +os.makedirs("${meta.id}", exist_ok=True) + +# input_type comes from NF module +if "${params.kb_workflow}" == "standard": + input_to_adata( + matrix=glob.glob("${inputs}/*.mtx")[0], + barcodes=glob.glob("${inputs}/*.barcodes.txt")[0], + features=glob.glob("${inputs}/*.genes.txt")[0], + output="${meta.id}_${meta.input_type}_matrix.h5ad", + sample="${meta.id}", + t2g="${txp2gene}" + ) + +else: + for type in ['spliced', 'unspliced']: + input_to_adata( + matrix=glob.glob("${inputs}/" + f"{type}*.mtx")[0], + barcodes=glob.glob("${inputs}/" + f"{type}*.barcodes.txt")[0], + features=glob.glob("${inputs}/" + f"{type}*.genes.txt")[0], + output="${meta.id}_${meta.input_type}" + f"_{type}_matrix.h5ad", + sample="${meta.id}", + t2g="${txp2gene}" + ) + +# dump versions +dump_versions() diff --git a/modules/local/templates/mtx_to_h5ad_star.py b/modules/local/templates/mtx_to_h5ad_star.py new file mode 100755 index 00000000..e44d2478 --- /dev/null +++ b/modules/local/templates/mtx_to_h5ad_star.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python + +# Set numba chache dir to current working directory (which is a writable mount also in containers) +import os + +os.environ["NUMBA_CACHE_DIR"] = "." + +import scanpy as sc +import pandas as pd +import argparse +from anndata import AnnData +import platform + +def _mtx_to_adata( + input: str, + sample: str, +): + adata = sc.read_10x_mtx(input) + adata.obs["sample"] = sample + + return adata + + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + +def dump_versions(): + versions = { + "${task.process}": { + "python": platform.python_version(), + "scanpy": sc.__version__, + "pandas": pd.__version__ + } + } + + with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions)) + +def input_to_adata( + input_data: str, + output: str, + sample: str, +): + print(f"Reading in {input_data}") + + # open main data + adata = _mtx_to_adata(input_data, sample) + + # standard format + # index are gene IDs and symbols are a column + adata.var["gene_symbol"] = adata.var.index + adata.var['gene_versions'] = adata.var["gene_ids"] + adata.var.index = adata.var['gene_versions'].str.split('.').str[0].values + + # write results + adata.write_h5ad(f"{output}") + print(f"Wrote h5ad file to {output}") + +# +# Run main script +# + +# create the directory with the sample name +os.makedirs("${meta.id}", exist_ok=True) + +# input_type comes from NF module +input_to_adata( + input_data="${meta.input_type}", + output="${meta.id}_${meta.input_type}_matrix.h5ad", + sample="${meta.id}" +) + +# dump versions +dump_versions() diff --git a/modules/nf-core/cellbender/removebackground/environment.yml b/modules/nf-core/cellbender/removebackground/environment.yml new file mode 100644 index 00000000..a157c522 --- /dev/null +++ b/modules/nf-core/cellbender/removebackground/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::cellbender=0.3.0 diff --git a/modules/nf-core/cellbender/removebackground/main.nf b/modules/nf-core/cellbender/removebackground/main.nf new file mode 100644 index 00000000..f3cfd1ff --- /dev/null +++ b/modules/nf-core/cellbender/removebackground/main.nf @@ -0,0 +1,65 @@ +process CELLBENDER_REMOVEBACKGROUND { + tag "$meta.id" + label 'process_medium' + label 'process_gpu' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'oras://community.wave.seqera.io/library/cellbender:0.3.0--c4addb97ab2d83fe': + 'community.wave.seqera.io/library/cellbender:0.3.0--41318a055fc3aacb' }" + + input: + tuple val(meta), path(h5ad) + + output: + tuple val(meta), path("${prefix}.h5") , emit: h5 + tuple val(meta), path("${prefix}_filtered.h5") , emit: filtered_h5 + tuple val(meta), path("${prefix}_posterior.h5") , emit: posterior_h5 + tuple val(meta), path("${prefix}_cell_barcodes.csv"), emit: barcodes + tuple val(meta), path("${prefix}_metrics.csv") , emit: metrics + tuple val(meta), path("${prefix}_report.html") , emit: report + tuple val(meta), path("${prefix}.pdf") , emit: pdf + tuple val(meta), path("${prefix}.log") , emit: log + tuple val(meta), path("ckpt.tar.gz") , emit: checkpoint + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: "${meta.id}" + args = task.ext.args ?: "" + use_gpu = task.ext.use_gpu ? "--cuda" : "" + """ + TMPDIR=. cellbender remove-background \ + ${args} \ + --cpu-threads ${task.cpus} \ + ${use_gpu} \ + --input ${h5ad} \ + --output ${prefix}.h5 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellbender: \$(cellbender --version) + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch "${prefix}.h5" + touch "${prefix}_filtered.h5" + touch "${prefix}_posterior.h5" + touch "${prefix}_cell_barcodes.csv" + touch "${prefix}_metrics.csv" + touch "${prefix}_report.html" + touch "${prefix}.pdf" + touch "${prefix}.log" + touch "ckpt.tar.gz" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellbender: \$(cellbender --version) + END_VERSIONS + """ +} diff --git a/modules/nf-core/cellbender/removebackground/meta.yml b/modules/nf-core/cellbender/removebackground/meta.yml new file mode 100644 index 00000000..d70fa3fd --- /dev/null +++ b/modules/nf-core/cellbender/removebackground/meta.yml @@ -0,0 +1,75 @@ +name: cellbender_removebackground +description: Module to use CellBender to estimate ambient RNA from single-cell RNA-seq data +keywords: + - single-cell + - scRNA-seq + - ambient RNA removal +tools: + - cellbender: + description: CellBender is a software package for eliminating technical artifacts from high-throughput single-cell RNA sequencing (scRNA-seq) data. + documentation: https://cellbender.readthedocs.io/en/latest/ + tool_dev_url: https://github.com/broadinstitute/CellBender + licence: ["BSD-3-Clause"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - h5ad: + type: file + description: AnnData file containing unfiltered data (with empty droplets) + pattern: "*.h5ad" +output: + - h5: + type: file + description: Full count matrix as an h5 file, with background RNA removed. This file contains all the original droplet barcodes. + pattern: "*.h5" + - filtered_h5: + type: file + description: | + Full count matrix as an h5 file, with background RNA removed. This file contains only the droplet barcodes which were determined to have a > 50% posterior probability of containing cells. + pattern: "*.h5" + - posterior_h5: + type: file + description: | + The full posterior probability of noise counts. This is not normally used downstream. + pattern: "*.h5" + - barcodes: + type: file + description: | + CSV file containing all the droplet barcodes which were determined to have a > 50% posterior probability of containing cells. | + Barcodes are written in plain text. This information is also contained in each of the above outputs, | + but is included as a separate output for convenient use in certain downstream applications. + pattern: "*.csv" + - metrics: + type: file + description: | + Metrics describing the run, potentially to be used to flag problematic runs | + when using CellBender as part of a large-scale automated pipeline. + pattern: "*.csv" + - report: + type: file + description: | + HTML report including plots and commentary, along with any warnings or suggestions for improved parameter settings. + pattern: "*.html" + - pdf: + type: file + description: PDF file that provides a standard graphical summary of the inference procedure. + pattern: "*.pdf" + - log: + type: file + description: Log file produced by the cellbender remove-background run. + pattern: "*.log" + - checkpoint: + type: file + description: Checkpoint file which contains the trained model and the full posterior. + pattern: "*.ckpt" + - versions: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@nictru" +maintainers: + - "@nictru" diff --git a/modules/nf-core/cellbender/removebackground/tests/epochs.config b/modules/nf-core/cellbender/removebackground/tests/epochs.config new file mode 100644 index 00000000..96282b07 --- /dev/null +++ b/modules/nf-core/cellbender/removebackground/tests/epochs.config @@ -0,0 +1,6 @@ + +process { + withName: CELLBENDER_REMOVEBACKGROUND { + ext.args = '--epochs 20' + } +} diff --git a/modules/nf-core/cellbender/removebackground/tests/main.nf.test b/modules/nf-core/cellbender/removebackground/tests/main.nf.test new file mode 100644 index 00000000..1afa6f3b --- /dev/null +++ b/modules/nf-core/cellbender/removebackground/tests/main.nf.test @@ -0,0 +1,66 @@ +nextflow_process { + name 'Test Process CELLBENDER_REMOVEBACKGROUND' + script '../main.nf' + process 'CELLBENDER_REMOVEBACKGROUND' + + tag "modules" + tag "modules_nfcore" + tag "cellbender/removebackground" + tag "cellbender" + + test("test_cellbender_removebackground") { + config './epochs.config' + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/samples/SAMN14430799_raw_matrix_5k.h5ad", checkIfExists: true) + ] + """ + } + } + then { + assertAll( + {assert process.success}, + {assert file(process.out.h5.get(0).get(1)).exists()}, + {assert file(process.out.filtered_h5.get(0).get(1)).exists()}, + {assert file(process.out.posterior_h5.get(0).get(1)).exists()}, + {assert snapshot(process.out.barcodes).match("cellbender_removebackground_barcodes")}, + {assert snapshot(process.out.metrics).match("cellbender_removebackground_metrics")}, + {assert file(process.out.report.get(0).get(1)).exists()}, + {assert file(process.out.pdf.get(0).get(1)).exists()}, + {assert file(process.out.log.get(0).get(1)).exists()}, + {assert snapshot(process.out.versions).match("cellbender_removebackground_versions")} + ) + } + } + + test("test_cellbender_removebackground - stub") { + options '-stub' + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/samples/SAMN14430799_raw_matrix_5k.h5ad", checkIfExists: true) + ] + """ + } + } + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out.h5).match("cellbender_removebackground_h5_stub")}, + {assert snapshot(process.out.filtered_h5).match("cellbender_removebackground_filtered_h5_stub")}, + {assert snapshot(process.out.posterior_h5).match("cellbender_removebackground_posterior_h5_stub")}, + {assert snapshot(process.out.barcodes).match("cellbender_removebackground_barcodes_stub")}, + {assert snapshot(process.out.metrics).match("cellbender_removebackground_metrics_stub")}, + {assert snapshot(process.out.report).match("cellbender_removebackground_report_stub")}, + {assert snapshot(process.out.pdf).match("cellbender_removebackground_pdf_stub")}, + {assert snapshot(process.out.log).match("cellbender_removebackground_log_stub")}, + {assert snapshot(process.out.versions).match("cellbender_removebackground_versions_stub")} + ) + } + } +} diff --git a/modules/nf-core/cellbender/removebackground/tests/main.nf.test.snap b/modules/nf-core/cellbender/removebackground/tests/main.nf.test.snap new file mode 100644 index 00000000..fdb51d66 --- /dev/null +++ b/modules/nf-core/cellbender/removebackground/tests/main.nf.test.snap @@ -0,0 +1,196 @@ +{ + "cellbender_removebackground_versions": { + "content": [ + [ + "versions.yml:md5,b236ac7595dfa6cd4d51ac73e51cb05a" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:09.33127881" + }, + "cellbender_removebackground_filtered_h5_stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_filtered.h5:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.833598082" + }, + "cellbender_removebackground_pdf_stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.891829278" + }, + "cellbender_removebackground_metrics": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_metrics.csv:md5,88272bde1c157528b0b0ab2abe5ad26f" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:09.327155805" + }, + "cellbender_removebackground_versions_stub": { + "content": [ + [ + "versions.yml:md5,b236ac7595dfa6cd4d51ac73e51cb05a" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.904614838" + }, + "cellbender_removebackground_h5_stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.h5:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.829304361" + }, + "cellbender_removebackground_metrics_stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_metrics.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.870469733" + }, + "cellbender_removebackground_log_stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.899293304" + }, + "cellbender_removebackground_barcodes": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_cell_barcodes.csv:md5,c8e8df9d0f9aea976d6f6aa36d329429" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:09.316098811" + }, + "cellbender_removebackground_report_stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.885307244" + }, + "cellbender_removebackground_posterior_h5_stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_posterior.h5:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.838032754" + }, + "cellbender_removebackground_barcodes_stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_cell_barcodes.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.861284979" + } +} \ No newline at end of file diff --git a/modules/nf-core/cellbender/removebackground/tests/tags.yml b/modules/nf-core/cellbender/removebackground/tests/tags.yml new file mode 100644 index 00000000..d935083b --- /dev/null +++ b/modules/nf-core/cellbender/removebackground/tests/tags.yml @@ -0,0 +1,2 @@ +cellbender/removebackground: + - modules/nf-core/cellbender/removebackground/** diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf index 764c08f8..ae98cc85 100644 --- a/subworkflows/local/alevin.nf +++ b/subworkflows/local/alevin.nf @@ -1,11 +1,11 @@ /* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ -include { GFFREAD_TRANSCRIPTOME } from '../../modules/local/gffread_transcriptome' -include { ALEVINQC } from '../../modules/local/alevinqc' -include { SIMPLEAF_INDEX } from '../../modules/local/simpleaf_index' -include { SIMPLEAF_QUANT } from '../../modules/local/simpleaf_quant' +include { GFFREAD_TRANSCRIPTOME } from '../../modules/local/gffread_transcriptome' +include { ALEVINQC } from '../../modules/local/alevinqc' +include { SIMPLEAF_INDEX } from '../../modules/local/simpleaf_index' +include { SIMPLEAF_QUANT } from '../../modules/local/simpleaf_quant' /* -- IMPORT NF-CORE MODULES/SUBWORKFLOWS -- */ -include { GUNZIP } from '../../modules/nf-core/gunzip/main' +include { GUNZIP } from '../../modules/nf-core/gunzip/main' include { GFFREAD as GFFREAD_TXP2GENE } from '../../modules/nf-core/gffread/main' def multiqc_report = [] @@ -44,8 +44,6 @@ workflow SCRNASEQ_ALEVIN { } } - - /* * Perform quantification with salmon alevin */ @@ -66,6 +64,6 @@ workflow SCRNASEQ_ALEVIN { emit: ch_versions - alevin_results = SIMPLEAF_QUANT.out.alevin_results - alevinqc = ALEVINQC.out.report + alevin_results = SIMPLEAF_QUANT.out.alevin_results.map{ meta, files -> [meta + [input_type: 'raw'], files] } + alevinqc = ALEVINQC.out.report } diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf index 2461373b..d787e0f0 100644 --- a/subworkflows/local/align_cellranger.nf +++ b/subworkflows/local/align_cellranger.nf @@ -2,9 +2,9 @@ * Alignment with Cellranger */ -include {CELLRANGER_MKGTF} from "../../modules/nf-core/cellranger/mkgtf/main.nf" -include {CELLRANGER_MKREF} from "../../modules/nf-core/cellranger/mkref/main.nf" -include {CELLRANGER_COUNT} from "../../modules/nf-core/cellranger/count/main.nf" +include { CELLRANGER_MKGTF } from "../../modules/nf-core/cellranger/mkgtf/main.nf" +include { CELLRANGER_MKREF } from "../../modules/nf-core/cellranger/mkref/main.nf" +include { CELLRANGER_COUNT } from "../../modules/nf-core/cellranger/count/main.nf" // Define workflow to subset and index a genome region fasta file workflow CELLRANGER_ALIGN { @@ -49,7 +49,7 @@ workflow CELLRANGER_ALIGN { mtx_files.each{ if ( it.toString().contains("raw_feature_bc_matrix") ) { desired_files.add( it ) } } - [ meta, desired_files ] + [ meta + [input_type: 'raw'], desired_files ] } ch_matrices_filtered = @@ -58,12 +58,13 @@ workflow CELLRANGER_ALIGN { mtx_files.each{ if ( it.toString().contains("filtered_feature_bc_matrix") ) { desired_files.add( it ) } } - [ meta, desired_files ] + [ meta + [input_type: 'filtered'], desired_files ] } emit: ch_versions - cellranger_out = CELLRANGER_COUNT.out.outs - cellranger_matrices = ch_matrices_raw.mix( ch_matrices_filtered ) - star_index = cellranger_index + cellranger_out = CELLRANGER_COUNT.out.outs + cellranger_matrices_raw = ch_matrices_raw + cellranger_matrices_filtered = ch_matrices_filtered + star_index = cellranger_index } diff --git a/subworkflows/local/align_cellrangermulti.nf b/subworkflows/local/align_cellrangermulti.nf index 977bf478..f13c7bf1 100644 --- a/subworkflows/local/align_cellrangermulti.nf +++ b/subworkflows/local/align_cellrangermulti.nf @@ -204,8 +204,9 @@ workflow CELLRANGER_MULTI_ALIGN { emit: ch_versions - cellrangermulti_out = CELLRANGER_MULTI.out.outs - cellrangermulti_mtx = ch_matrices_raw.mix( ch_matrices_filtered ) + cellrangermulti_out = CELLRANGER_MULTI.out.outs + cellrangermulti_mtx_raw = ch_matrices_raw + cellrangermulti_mtx_filtered = ch_matrices_filtered } def parse_demultiplexed_output_channels(in_ch, pattern) { diff --git a/subworkflows/local/emptydrops_removal.nf b/subworkflows/local/emptydrops_removal.nf new file mode 100644 index 00000000..2ccacc26 --- /dev/null +++ b/subworkflows/local/emptydrops_removal.nf @@ -0,0 +1,33 @@ +include { CELLBENDER_REMOVEBACKGROUND } from '../../modules/nf-core/cellbender/removebackground' +include { ADATA_BARCODES } from '../../modules/local/adata_barcodes' + +workflow EMPTY_DROPLET_REMOVAL { + take: + ch_unfiltered + + main: + ch_versions = Channel.empty() + + CELLBENDER_REMOVEBACKGROUND(ch_unfiltered) + ch_versions = ch_versions.mix(CELLBENDER_REMOVEBACKGROUND.out.versions) + + ch_combined = + ch_unfiltered + .join(CELLBENDER_REMOVEBACKGROUND.out.barcodes) + .map { meta, h5ad, csv -> + def meta_clone = meta.clone() + meta_clone.input_type = meta['input_type'].toString().replaceAll('raw', 'emptydrops_filter') + + [ meta_clone, h5ad, csv ] + } + + ADATA_BARCODES(ch_combined) + ch_versions = ch_versions.mix(ADATA_BARCODES.out.versions) + + ch_h5ad = ADATA_BARCODES.out.h5ad + + emit: + h5ad = ch_h5ad + + versions = ch_versions +} diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf index 3deee2c5..5f8d9bcc 100644 --- a/subworkflows/local/kallisto_bustools.nf +++ b/subworkflows/local/kallisto_bustools.nf @@ -55,20 +55,20 @@ workflow KALLISTO_BUSTOOLS { // get raw/filtered counts ch_raw_counts = KALLISTOBUSTOOLS_COUNT.out.count.map{ meta, kb_dir -> if (file("${kb_dir.toUriString()}/counts_unfiltered").exists()) { - [meta, file("${kb_dir.toUriString()}/counts_unfiltered")] + [meta + [input_type: 'raw'], file("${kb_dir.toUriString()}/counts_unfiltered")] } } ch_filtered_counts = KALLISTOBUSTOOLS_COUNT.out.count.map{ meta, kb_dir -> if (file("${kb_dir.toUriString()}/counts_filtered").exists()) { - [meta, file("${kb_dir.toUriString()}/counts_filtered")] + [meta + [input_type: 'filtered'], file("${kb_dir.toUriString()}/counts_filtered")] } } emit: ch_versions counts = KALLISTOBUSTOOLS_COUNT.out.count - raw_counts = ch_raw_counts - filtered_counts = ch_filtered_counts + counts_raw = ch_raw_counts + counts_filtered = ch_filtered_counts txp2gene = txp2gene.collect() } diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf index 98e49a2e..9891536d 100644 --- a/subworkflows/local/mtx_conversion.nf +++ b/subworkflows/local/mtx_conversion.nf @@ -1,62 +1,91 @@ /* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ -include { MTX_TO_H5AD } from '../../modules/local/mtx_to_h5ad.nf' -include { CONCAT_H5AD } from '../../modules/local/concat_h5ad.nf' -include { MTX_TO_SEURAT } from '../../modules/local/mtx_to_seurat.nf' +include { MTX_TO_H5AD } from '../../modules/local/mtx_to_h5ad' +include { CONCAT_H5AD } from '../../modules/local/concat_h5ad.nf' +include { ANNDATAR_CONVERT } from '../../modules/local/anndatar_convert' +include { EMPTY_DROPLET_REMOVAL } from '../../subworkflows/local/emptydrops_removal' workflow MTX_CONVERSION { take: mtx_matrices - samplesheet txp2gene star_index + samplesheet main: ch_versions = Channel.empty() - - // Cellranger module output contains too many files which cause path collisions, we filter to the ones we need. - // Keeping backwards compatibility with cellranger-arc. - // TODO: Adapt cellranger-arc subworkflow like cellranger to remove this snippet here. - if (params.aligner in [ 'cellrangerarc' ]) { - mtx_matrices = mtx_matrices.map { meta, mtx_files -> - [ meta, mtx_files.findAll { it.toString().contains("filtered_feature_bc_matrix") } ] - } - .filter { meta, mtx_files -> mtx_files } // Remove any that are missing the relevant files - } + ch_h5ads = Channel.empty() // - // Convert matrix to h5ad + // MODULE: Convert matrices to h5ad // MTX_TO_H5AD ( mtx_matrices, txp2gene, star_index ) + ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions.first()) + + // fix channel size when kallisto non-standard workflow + if (params.aligner == 'kallisto' && !(params.kb_workflow == 'standard')) { + ch_h5ads = + MTX_TO_H5AD.out.h5ad + .transpose() + .map { meta, h5ad -> + def meta_clone = meta.clone() + def spc_prefix = h5ad.toString().contains('unspliced') ? 'un' : '' + + meta_clone["input_type"] = "${meta.input_type}_${spc_prefix}spliced" + + [ meta_clone, h5ad ] + } + } else { + ch_h5ads = MTX_TO_H5AD.out.h5ad + } + + // + // SUBWORKFLOW: Run cellbender emptydrops filter + // + if ( !params.skip_emptydrops && !(params.aligner in ['cellrangerarc']) ) { + + // emptydrops should only run on the raw matrices thus, filter-out the filtered result of the aligners that can produce it + EMPTY_DROPLET_REMOVAL ( + ch_h5ads.filter { meta, mtx_files -> meta.input_type.contains('raw') } + ) + ch_h5ads = ch_h5ads.mix( EMPTY_DROPLET_REMOVAL.out.h5ad ) + + } // // Concat sample-specific h5ad in one // - ch_concat_h5ad_input = MTX_TO_H5AD.out.h5ad.groupTuple() // gather all sample-specific files / per type + ch_concat_h5ad_input = ch_h5ads.groupTuple() // gather all sample-specific files / per type if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') { // when having spliced / unspliced matrices, the collected tuple has two levels ( [[mtx_1, mtx_2]] ) // which nextflow break because it is not a valid 'path' thus, we have to remove one level // making it as [ mtx_1, mtx_2 ] ch_concat_h5ad_input = ch_concat_h5ad_input.map{ type, matrices -> [ type, matrices.flatten().toList() ] } } + CONCAT_H5AD ( ch_concat_h5ad_input, samplesheet ) + ch_h5ad_concat = CONCAT_H5AD.out.h5ad.map{ meta, file -> + def meta_clone = meta.clone() + meta_clone.id = 'combined' // maintain output prefix + [ meta_clone, file ] + } // - // Convert matrix do seurat + // MODULE: Convert to Rds with AnndataR package // - MTX_TO_SEURAT ( - mtx_matrices + ANNDATAR_CONVERT ( + ch_h5ads.mix( ch_h5ad_concat ) ) //TODO CONCAT h5ad and MTX to h5ad should also have versions.yaml output - ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions, MTX_TO_SEURAT.out.versions) + // ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions, MTX_TO_SEURAT.out.versions) emit: ch_versions diff --git a/subworkflows/local/starsolo.nf b/subworkflows/local/starsolo.nf index 0c11acd1..aadda6b6 100644 --- a/subworkflows/local/starsolo.nf +++ b/subworkflows/local/starsolo.nf @@ -1,5 +1,6 @@ /* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ -include { STAR_ALIGN } from '../../modules/local/star_align' +include { STAR_ALIGN } from '../../modules/local/star_align' +include { MTX_TO_H5AD } from '../../modules/local/mtx_to_h5ad' /* -- IMPORT NF-CORE MODULES/SUBWORKFLOWS -- */ include { GUNZIP } from '../../modules/nf-core/gunzip/main' @@ -53,14 +54,12 @@ workflow STARSOLO { ) ch_versions = ch_versions.mix(STAR_ALIGN.out.versions) - emit: ch_versions // get rid of meta for star index - star_index = star_index.map{ meta, index -> index } - star_result = STAR_ALIGN.out.tab - star_counts = STAR_ALIGN.out.counts - raw_counts = STAR_ALIGN.out.raw_counts - filtered_counts = STAR_ALIGN.out.filtered_counts - for_multiqc = STAR_ALIGN.out.log_final.map{ meta, it -> it } + star_result = STAR_ALIGN.out.tab + star_counts = STAR_ALIGN.out.counts + raw_counts = STAR_ALIGN.out.raw_counts.map{ meta, files -> [meta + [input_type: 'raw'], files] } + filtered_counts = STAR_ALIGN.out.filtered_counts.map{ meta, files -> [meta + [input_type: 'filtered'], files] } + for_multiqc = STAR_ALIGN.out.log_final.map{ meta, it -> it } } diff --git a/subworkflows/local/utils_nfcore_scrnaseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_scrnaseq_pipeline/main.nf index dd54d352..16569d5a 100644 --- a/subworkflows/local/utils_nfcore_scrnaseq_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_scrnaseq_pipeline/main.nf @@ -202,9 +202,12 @@ def getGenomeAttribute(attribute) { if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { if (params.genomes[ params.genome ].containsKey(attribute)) { return params.genomes[ params.genome ][ attribute ] + } else { + return null } + } else { + return null } - return null } // diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 10ced221..418f6711 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -3,13 +3,12 @@ include { FASTQC_CHECK } from '../subworkflows/local/fastq include { KALLISTO_BUSTOOLS } from '../subworkflows/local/kallisto_bustools' include { SCRNASEQ_ALEVIN } from '../subworkflows/local/alevin' include { STARSOLO } from '../subworkflows/local/starsolo' -include { CELLRANGER_ALIGN } from "../subworkflows/local/align_cellranger" -include { CELLRANGER_MULTI_ALIGN } from "../subworkflows/local/align_cellrangermulti" -include { CELLRANGERARC_ALIGN } from "../subworkflows/local/align_cellrangerarc" -include { UNIVERSC_ALIGN } from "../subworkflows/local/align_universc" -include { MTX_CONVERSION } from "../subworkflows/local/mtx_conversion" +include { CELLRANGER_ALIGN } from '../subworkflows/local/align_cellranger' +include { CELLRANGER_MULTI_ALIGN } from '../subworkflows/local/align_cellrangermulti' +include { CELLRANGERARC_ALIGN } from '../subworkflows/local/align_cellrangerarc' +include { UNIVERSC_ALIGN } from '../subworkflows/local/align_universc' +include { MTX_CONVERSION } from '../subworkflows/local/mtx_conversion' include { GTF_GENE_FILTER } from '../modules/local/gtf_gene_filter' -include { EMPTYDROPS_CELL_CALLING } from '../modules/local/emptydrops' include { GUNZIP as GUNZIP_FASTA } from '../modules/nf-core/gunzip/main' include { GUNZIP as GUNZIP_GTF } from '../modules/nf-core/gunzip/main' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' @@ -32,12 +31,11 @@ workflow SCRNASEQ { error "Only cellranger supports `protocol = 'auto'`. Please specify the protocol manually!" } - params.fasta = getGenomeAttribute('fasta') - params.gtf = getGenomeAttribute('gtf') - params.star_index = getGenomeAttribute('star') - - ch_genome_fasta = params.fasta ? file(params.fasta, checkIfExists: true) : [] - ch_gtf = params.gtf ? file(params.gtf, checkIfExists: true) : [] + // search igenomes, but overwrite with user paths + // cannot use 'params. = ' in workflow, it does not overwrite parameter + def fasta_file = params.fasta ? params.fasta : getGenomeAttribute('fasta') + def gtf_file = params.gtf ? params.gtf : getGenomeAttribute('gtf') + def star_index = params.star_index ? params.star_index : getGenomeAttribute('star') // general input and params ch_transcript_fasta = params.transcript_fasta ? file(params.transcript_fasta): [] @@ -71,7 +69,7 @@ workflow SCRNASEQ { ch_salmon_index = params.salmon_index ? file(params.salmon_index) : [] //star params - star_index = params.star_index ? file(params.star_index, checkIfExists: true) : null + star_index = star_index ? file(star_index, checkIfExists: true) : null ch_star_index = star_index ? [[id: star_index.baseName], star_index] : [] star_feature = params.star_feature @@ -86,7 +84,7 @@ workflow SCRNASEQ { ch_multi_samplesheet = params.cellranger_multi_barcodes ? file(params.cellranger_multi_barcodes, checkIfExists: true) : [] empty_file = file("$projectDir/assets/EMPTY", checkIfExists: true) - ch_versions = Channel.empty() + ch_versions = Channel.empty() ch_mtx_matrices = Channel.empty() // Run FastQC @@ -99,24 +97,24 @@ workflow SCRNASEQ { // // Uncompress genome fasta file if required // - if (params.fasta) { - if (params.fasta.endsWith('.gz')) { - ch_genome_fasta = GUNZIP_FASTA ( [ [:], file(params.fasta) ] ).gunzip.map { it[1] } + if (fasta_file) { + if (fasta_file.endsWith('.gz')) { + ch_genome_fasta = GUNZIP_FASTA ( [ [:], file(fasta_file) ] ).gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions) } else { - ch_genome_fasta = Channel.value( file(params.fasta) ) + ch_genome_fasta = Channel.value( file(fasta_file) ) } } // // Uncompress GTF annotation file or create from GFF3 if required // - if (params.gtf) { - if (params.gtf.endsWith('.gz')) { - ch_gtf = GUNZIP_GTF ( [ [:], file(params.gtf) ] ).gunzip.map { it[1] } + if (gtf_file) { + if (gtf_file.endsWith('.gz')) { + ch_gtf = GUNZIP_GTF ( [ [:], file(gtf_file) ] ).gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions) } else { - ch_gtf = Channel.value( file(params.gtf) ) + ch_gtf = Channel.value( file(gtf_file) ) } } @@ -137,7 +135,7 @@ workflow SCRNASEQ { ch_fastq ) ch_versions = ch_versions.mix(KALLISTO_BUSTOOLS.out.ch_versions) - ch_mtx_matrices = ch_mtx_matrices.mix(KALLISTO_BUSTOOLS.out.raw_counts, KALLISTO_BUSTOOLS.out.filtered_counts) + ch_mtx_matrices = ch_mtx_matrices.mix( KALLISTO_BUSTOOLS.out.counts_raw, KALLISTO_BUSTOOLS.out.counts_filtered ) ch_txp2gene = KALLISTO_BUSTOOLS.out.txp2gene } @@ -155,7 +153,7 @@ workflow SCRNASEQ { ) ch_versions = ch_versions.mix(SCRNASEQ_ALEVIN.out.ch_versions) ch_multiqc_files = ch_multiqc_files.mix(SCRNASEQ_ALEVIN.out.alevin_results.map{ meta, it -> it }) - ch_mtx_matrices = ch_mtx_matrices.mix(SCRNASEQ_ALEVIN.out.alevin_results) + ch_mtx_matrices = ch_mtx_matrices.mix( SCRNASEQ_ALEVIN.out.alevin_results ) } // Run STARSolo pipeline @@ -171,9 +169,8 @@ workflow SCRNASEQ { protocol_config.get('extra_args', ""), ) ch_versions = ch_versions.mix(STARSOLO.out.ch_versions) - ch_mtx_matrices = ch_mtx_matrices.mix(STARSOLO.out.raw_counts, STARSOLO.out.filtered_counts) - ch_star_index = STARSOLO.out.star_index ch_multiqc_files = ch_multiqc_files.mix(STARSOLO.out.for_multiqc) + ch_mtx_matrices = ch_mtx_matrices.mix( STARSOLO.out.raw_counts, STARSOLO.out.filtered_counts ) } // Run cellranger pipeline @@ -186,9 +183,8 @@ workflow SCRNASEQ { protocol_config['protocol'] ) ch_versions = ch_versions.mix(CELLRANGER_ALIGN.out.ch_versions) - ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_ALIGN.out.cellranger_matrices) - ch_star_index = CELLRANGER_ALIGN.out.star_index - ch_multiqc_files = ch_multiqc_files.mix(CELLRANGER_ALIGN.out.cellranger_out.map{ + ch_mtx_matrices = ch_mtx_matrices.mix( CELLRANGER_ALIGN.out.cellranger_matrices_raw, CELLRANGER_ALIGN.out.cellranger_matrices_filtered ) + ch_multiqc_files = ch_multiqc_files.mix(CELLRANGER_ALIGN.out.cellranger_out.map { meta, outs -> outs.findAll{ it -> it.name == "web_summary.html"} }) } @@ -285,38 +281,16 @@ workflow SCRNASEQ { ch_multiqc_files = ch_multiqc_files.mix( CELLRANGER_MULTI_ALIGN.out.cellrangermulti_out.map{ meta, outs -> outs.findAll{ it -> it.name == "web_summary.html" } }) - ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_MULTI_ALIGN.out.cellrangermulti_mtx) - - } - - // Run emptydrops calling module - if ( !params.skip_emptydrops && !(params.aligner in ['cellrangerarc']) ) { - - // - // emptydrops should only run on the raw matrices thus, filter-out the filtered result of the aligners that can produce it - // - if ( params.aligner in [ 'cellranger', 'cellrangermulti', 'kallisto', 'star' ] ) { - ch_mtx_matrices_for_emptydrops = - ch_mtx_matrices.filter { meta, mtx_files -> - mtx_files.toString().contains("raw_feature_bc_matrix") || // cellranger - mtx_files.toString().contains("counts_unfiltered") || // kallisto - mtx_files.toString().contains("raw") // star - } - } else { - ch_mtx_matrices_for_emptydrops = ch_mtx_matrices - } - - EMPTYDROPS_CELL_CALLING( ch_mtx_matrices_for_emptydrops ) - ch_mtx_matrices = ch_mtx_matrices.mix( EMPTYDROPS_CELL_CALLING.out.filtered_matrices ) + ch_mtx_matrices = ch_mtx_matrices.mix( CELLRANGER_MULTI_ALIGN.out.cellrangermulti_mtx_raw, CELLRANGER_MULTI_ALIGN.out.cellrangermulti_mtx_filtered ) } // Run mtx to h5ad conversion subworkflow MTX_CONVERSION ( ch_mtx_matrices, - ch_input, ch_txp2gene, - ch_star_index + ch_star_index, + ch_input ) //Add Versions from MTX Conversion workflow too