diff --git a/CHANGELOG.md b/CHANGELOG.md index b42791db..a9d9a7e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## v2.6.0dev - [date] -- [[#91](https://github.com/nf-core/scrnaseq/issues/91)] - Change from pytests to nf-test +- Change from pytests to nf-test ([#291](https://github.com/nf-core/scrnaseq/pull/291)) +- Update template to v2.13.1 ([#309](https://github.com/nf-core/scrnaseq/pull/309)) +- Update to kallisto|bustools v0.28.2 ([#294](https://github.com/nf-core/scrnaseq/pull/294)) ## v2.5.1 diff --git a/modules.json b/modules.json index 8aa2641e..6040254b 100644 --- a/modules.json +++ b/modules.json @@ -52,12 +52,12 @@ }, "kallistobustools/count": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "de8215983defba48cd81961d620a9e844f11c7e7", "installed_by": ["modules"] }, "kallistobustools/ref": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "de8215983defba48cd81961d620a9e844f11c7e7", "installed_by": ["modules"] }, "multiqc": { diff --git a/modules/local/gene_map.nf b/modules/local/gene_map.nf deleted file mode 100644 index 9fd29e0a..00000000 --- a/modules/local/gene_map.nf +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Reformat design file and check validity - */ -process GENE_MAP { - tag "$gtf" - label 'process_low' - - conda "conda-forge::python=3.8.3" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'biocontainers/python:3.8.3' }" - - input: - path gtf - - output: - path "transcripts_to_genes.txt" , emit: gene_map - - when: - task.ext.when == null || task.ext.when - - script: - if("${gtf}".endsWith('.gz')){ - name = "${gtf.baseName}" - unzip = "gunzip -f ${gtf}" - } else { - unzip = "" - name = "${gtf}" - } - """ - $unzip - cat $name | t2g.py --use_version > transcripts_to_genes.txt - """ -} diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 84d98608..c991b695 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -27,7 +27,7 @@ process MTX_TO_H5AD { if (params.aligner == 'kallisto') { mtx_matrix = "*count/counts_unfiltered/*.mtx" barcodes_tsv = "*count/counts_unfiltered/*.barcodes.txt" - features_tsv = "*count/counts_unfiltered/*.genes.txt" + features_tsv = "*count/counts_unfiltered/*.genes.names.txt" } else if (params.aligner == 'alevin') { mtx_matrix = "*_alevin_results/af_quant/alevin/quants_mat.mtx" barcodes_tsv = "*_alevin_results/af_quant/alevin/quants_mat_rows.txt" @@ -54,13 +54,13 @@ process MTX_TO_H5AD { else if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') """ # convert file types - for input_type in spliced unspliced ; do + for input_type in nascent ambiguous mature ; do mtx_to_h5ad.py \\ --aligner ${params.aligner} \\ --sample ${meta.id} \\ - --input *count/counts_unfiltered/\${input_type}.mtx \\ - --barcode *count/counts_unfiltered/\${input_type}.barcodes.txt \\ - --feature *count/counts_unfiltered/\${input_type}.genes.txt \\ + --input *count/counts_unfiltered/cells_x_genes.\${input_type}.mtx \\ + --barcode $barcodes_tsv \\ + --feature $features_tsv \\ --txp2gene ${txp2gene} \\ --star_index ${star_index} \\ --out ${meta.id}/${meta.id}_\${input_type}_matrix.h5ad ; diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index d83575a4..82ee63cd 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -26,7 +26,7 @@ process MTX_TO_SEURAT { } else if (params.aligner == "kallisto") { matrix = "*count/counts_unfiltered/*.mtx" barcodes = "*count/counts_unfiltered/*.barcodes.txt" - features = "*count/counts_unfiltered/*.genes.txt" + features = "*count/counts_unfiltered/*.genes.names.txt" } else if (params.aligner == "alevin") { matrix = "*_alevin_results/af_quant/alevin/quants_mat.mtx" barcodes = "*_alevin_results/af_quant/alevin/quants_mat_rows.txt" @@ -43,11 +43,11 @@ process MTX_TO_SEURAT { if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') """ # convert file types - for input_type in spliced unspliced ; do + for input_type in nascent ambiguous mature ; do mtx_to_seurat.R \\ - *count/counts_unfiltered/\${input_type}.mtx \\ - *count/counts_unfiltered/\${input_type}.barcodes.txt \\ - *count/counts_unfiltered/\${input_type}.genes.txt \\ + *count/counts_unfiltered/cells_x_genes.\${input_type}.mtx \\ + $barcodes \\ + $features \\ ${meta.id}/${meta.id}_\${input_type}_matrix.rds \\ ${aligner} done diff --git a/modules/nf-core/kallistobustools/count/environment.yml b/modules/nf-core/kallistobustools/count/environment.yml index 7ff8a2da..024f0afc 100644 --- a/modules/nf-core/kallistobustools/count/environment.yml +++ b/modules/nf-core/kallistobustools/count/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::kb-python=0.27.2 + - bioconda::kb-python=0.28.2 diff --git a/modules/nf-core/kallistobustools/count/main.nf b/modules/nf-core/kallistobustools/count/main.nf index 036bb35d..841ea2fe 100644 --- a/modules/nf-core/kallistobustools/count/main.nf +++ b/modules/nf-core/kallistobustools/count/main.nf @@ -4,8 +4,8 @@ process KALLISTOBUSTOOLS_COUNT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/kb-python:0.27.2--pyhdfd78af_0' : - 'biocontainers/kb-python:0.27.2--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/kb-python:0.28.2--pyhdfd78af_2' : + 'biocontainers/kb-python:0.28.2--pyhdfd78af_2' }" input: tuple val(meta), path(reads) @@ -14,11 +14,12 @@ process KALLISTOBUSTOOLS_COUNT { path t1c path t2c val technology + val workflow_mode output: - tuple val(meta), path ("*.count"), emit: count - path "versions.yml" , emit: versions - path "*.count/*/*.mtx" , emit: matrix //Ensure that kallisto finished and produced outputs + tuple val(meta), path ("*.count") , emit: count + path "versions.yml" , emit: versions + path "*.count/*/*.mtx" , emit: matrix //Ensure that kallisto finished and produced outputs when: task.ext.when == null || task.ext.when @@ -27,7 +28,7 @@ process KALLISTOBUSTOOLS_COUNT { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def cdna = t1c ? "-c1 $t1c" : '' - def introns = t2c ? "-c2 $t2c" : '' + def intron = t2c ? "-c2 $t2c" : '' def memory = task.memory.toGiga() - 1 """ kb \\ @@ -36,12 +37,25 @@ process KALLISTOBUSTOOLS_COUNT { -i $index \\ -g $t2g \\ $cdna \\ - $introns \\ + $intron \\ -x $technology \\ + --workflow $workflow_mode \\ $args \\ -o ${prefix}.count \\ - ${reads.join( " " )} \\ - -m ${memory}G + -m ${memory}G \\ + ${reads.join( " " )} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kallistobustools: \$(echo \$(kb --version 2>&1) | sed 's/^.*kb_python //;s/positional arguments.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p ${prefix}.count/counts_unfiltered/ + touch ${prefix}.count/counts_unfiltered/cells_x_genes.mtx cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/kallistobustools/count/meta.yml b/modules/nf-core/kallistobustools/count/meta.yml index 7491248c..55d5dc6c 100644 --- a/modules/nf-core/kallistobustools/count/meta.yml +++ b/modules/nf-core/kallistobustools/count/meta.yml @@ -12,7 +12,7 @@ tools: homepage: https://www.kallistobus.tools/ documentation: https://kb-python.readthedocs.io/en/latest/index.html tool_dev_url: https://github.com/pachterlab/kb_python - licence: MIT License + licence: ["MIT"] input: - meta: type: map @@ -34,16 +34,16 @@ input: pattern: "*t2g.txt" - t1c: type: file - description: kb ref's c1 spliced_t2c file + description: kb ref's c1 cdna_t2c file pattern: "*.{cdna_t2c.txt}" - t2c: type: file - description: kb ref's c2 unspliced_t2c file - pattern: "*.{introns_t2c.txt}" + description: kb ref's c2 intron_t2c file + pattern: "*.{intron_t2c.txt}" - workflow_mode: type: string - description: String value defining workflow to use, can be one of "standard", "lamanno", "nucleus" - pattern: "{standard,lamanno,nucleus,kite}" + description: String value defining workflow to use, can be one of "standard", "nac", "lamanno" (obsolete) + pattern: "{standard,lamanno,nac}" - technology: type: string description: String value defining the sequencing technology used. diff --git a/modules/nf-core/kallistobustools/count/tests/main.nf.test b/modules/nf-core/kallistobustools/count/tests/main.nf.test new file mode 100644 index 00000000..550001f9 --- /dev/null +++ b/modules/nf-core/kallistobustools/count/tests/main.nf.test @@ -0,0 +1,98 @@ +nextflow_process { + + name "Test Process KALLISTOBUSTOOLS_COUNT" + script "../main.nf" + process "KALLISTOBUSTOOLS_COUNT" + + tag "modules" + tag "modules_nfcore" + tag "kallistobustools" + tag "kallistobustools/count" + tag "kallistobustools/ref" + + setup { + run("KALLISTOBUSTOOLS_REF") { + script "../../ref/main.nf" + process { + """ + input[0] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) + input[2] = "standard" + """ + } + } + } + + test("genome.fasta + genome.gtf + '10X3' + 'standard'") { + + when { + process { + """ + input[0] = Channel.of( + [ + [id:'test'], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/gex_1/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_gex_S2_L001_R1_001.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/gex_1/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_gex_S2_L001_R2_001.fastq.gz', checkIfExists: true) + ] + ] + ) + input[1] = KALLISTOBUSTOOLS_REF.out.index + input[2] = KALLISTOBUSTOOLS_REF.out.t2g + input[3] = KALLISTOBUSTOOLS_REF.out.cdna_t2c.ifEmpty{ [] } // when empty the module doesn't run unless something is passed. + input[4] = KALLISTOBUSTOOLS_REF.out.intron_t2c.ifEmpty{ [] } // when empty the module doesn't run unless something is passed. + input[5] = "10XV3" + input[6] = "standard" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.matrix, + path(process.out.count.get(0).get(1)).list().findAll { file(it.toString()).name != "run_info.json" && file(it.toString()).name != "kb_info.json" }, + file(path(process.out.count.get(0).get(1)).list().find { file(it.toString()).name == "kb_info.json" }.toString()).readLines()[15..22], + file(path(process.out.count.get(0).get(1)).list().find { file(it.toString()).name == "run_info.json" }.toString()).readLines()[0..9] + ).match() + } + ) + } + } + + test("genome.fasta + genome.gtf + '10X3' + 'standard' - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of( + [ + [id:'test'], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/gex_1/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_gex_S2_L001_R1_001.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/gex_1/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_gex_S2_L001_R2_001.fastq.gz', checkIfExists: true) + ] + ] + ) + input[1] = KALLISTOBUSTOOLS_REF.out.index + input[2] = KALLISTOBUSTOOLS_REF.out.t2g + input[3] = KALLISTOBUSTOOLS_REF.out.cdna_t2c.ifEmpty{ [] } // when empty the module doesn't run unless something is passed. + input[4] = KALLISTOBUSTOOLS_REF.out.intron_t2c.ifEmpty{ [] } // when empty the module doesn't run unless something is passed. + input[5] = "10XV3" + input[6] = "standard" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/kallistobustools/count/tests/main.nf.test.snap b/modules/nf-core/kallistobustools/count/tests/main.nf.test.snap new file mode 100644 index 00000000..3378c3c1 --- /dev/null +++ b/modules/nf-core/kallistobustools/count/tests/main.nf.test.snap @@ -0,0 +1,100 @@ +{ + "genome.fasta + genome.gtf + '10X3' + 'standard' - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + [ + "cells_x_genes.mtx:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ] + ], + "1": [ + "versions.yml:md5,6ec06270afe0a7572c41567160d927d9" + ], + "2": [ + "cells_x_genes.mtx:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "count": [ + [ + { + "id": "test" + }, + [ + [ + "cells_x_genes.mtx:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ] + ], + "matrix": [ + "cells_x_genes.mtx:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,6ec06270afe0a7572c41567160d927d9" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-01T15:48:45.775904562" + }, + "genome.fasta + genome.gtf + '10X3' + 'standard'": { + "content": [ + [ + "versions.yml:md5,6ec06270afe0a7572c41567160d927d9" + ], + [ + "cells_x_genes.mtx:md5,e12a45e7f7d6527f698dd9cb2e99ecd1" + ], + [ + "10x_version3_whitelist.txt:md5,3d36d0a4021fd292b265e2b5e72aaaf3", + [ + "cells_x_genes.barcodes.txt:md5,41f7adaf43b60f2f4f62d6a7073688de", + "cells_x_genes.genes.names.txt:md5,b29afa75be300c7f24fbd0740a66689b", + "cells_x_genes.genes.txt:md5,fe6d5501923867b514a0447aa4b4995f", + "cells_x_genes.mtx:md5,e12a45e7f7d6527f698dd9cb2e99ecd1" + ], + "inspect.json:md5,bafb47a58ac1bbf9be953f21c361d266", + "matrix.ec:md5,31a4c1a3e8e0c562b12f6569ffbf5459", + "output.bus:md5,d6fa0612a4a16eaf8a3e08bdc13ff49c", + "output.unfiltered.bus:md5,bf899b967657f612ba864188868d58cc", + "transcripts.txt:md5,23861cf43033e7c596e6989a88a3a373" + ], + [ + " \"commands\": [", + " \"kallisto bus -i kb_ref_out.idx -o test.count -x 10XV3 -t 2 subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_gex_S2_L001_R1_001.fastq.gz subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_gex_S2_L001_R2_001.fastq.gz\",", + " \"bustools sort -o test.count/tmp/output.s.bus -T test.count/tmp -t 2 -m 2G test.count/output.bus\",", + " \"bustools inspect -o test.count/inspect.json -w test.count/10x_version3_whitelist.txt test.count/tmp/output.s.bus\",", + " \"bustools correct -o test.count/tmp/output.s.c.bus -w test.count/10x_version3_whitelist.txt test.count/tmp/output.s.bus\",", + " \"bustools sort -o test.count/output.unfiltered.bus -T test.count/tmp -t 2 -m 2G test.count/tmp/output.s.c.bus\",", + " \"bustools count -o test.count/counts_unfiltered/cells_x_genes -g t2g.txt -e test.count/matrix.ec -t test.count/transcripts.txt --genecounts --umi-gene test.count/output.unfiltered.bus\"", + " ]," + ], + [ + "{", + "\t\"n_targets\": 12,", + "\t\"n_bootstraps\": 0,", + "\t\"n_processed\": 10000,", + "\t\"n_pseudoaligned\": 26,", + "\t\"n_unique\": 26,", + "\t\"p_pseudoaligned\": 0.3,", + "\t\"p_unique\": 0.3,", + "\t\"kallisto_version\": \"0.50.1\",", + "\t\"index_version\": 13," + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-01T15:48:24.476953631" + } +} \ No newline at end of file diff --git a/modules/nf-core/kallistobustools/count/tests/nextflow.config b/modules/nf-core/kallistobustools/count/tests/nextflow.config new file mode 100644 index 00000000..7a5cbfb4 --- /dev/null +++ b/modules/nf-core/kallistobustools/count/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: KALLISTOBUSTOOLS_COUNT { + ext.args = '--cellranger -m 1' + } +} diff --git a/modules/nf-core/kallistobustools/count/tests/tags.yml b/modules/nf-core/kallistobustools/count/tests/tags.yml new file mode 100644 index 00000000..9c432071 --- /dev/null +++ b/modules/nf-core/kallistobustools/count/tests/tags.yml @@ -0,0 +1,2 @@ +kallistobustools/count: + - "modules/nf-core/kallistobustools/count/**" diff --git a/modules/nf-core/kallistobustools/ref/environment.yml b/modules/nf-core/kallistobustools/ref/environment.yml index acbd0e0a..6ae07a8c 100644 --- a/modules/nf-core/kallistobustools/ref/environment.yml +++ b/modules/nf-core/kallistobustools/ref/environment.yml @@ -4,5 +4,5 @@ channels: - bioconda - defaults dependencies: - - bioconda::kb-python=0.27.2 + - bioconda::kb-python=0.28.2 - conda-forge::requests>=2.23.0 diff --git a/modules/nf-core/kallistobustools/ref/main.nf b/modules/nf-core/kallistobustools/ref/main.nf index 68d72ca9..0b45203d 100644 --- a/modules/nf-core/kallistobustools/ref/main.nf +++ b/modules/nf-core/kallistobustools/ref/main.nf @@ -4,8 +4,8 @@ process KALLISTOBUSTOOLS_REF { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/kb-python:0.27.2--pyhdfd78af_0' : - 'biocontainers/kb-python:0.27.2--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/kb-python:0.28.2--pyhdfd78af_2' : + 'biocontainers/kb-python:0.28.2--pyhdfd78af_2' }" input: path fasta @@ -62,4 +62,32 @@ process KALLISTOBUSTOOLS_REF { END_VERSIONS """ } + + stub: + if (workflow_mode == "standard") { + """ + touch kb_ref_out.idx \\ + touch t2g.txt \\ + touch cdna.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kallistobustools: \$(echo \$(kb --version 2>&1) | sed 's/^.*kb_python //;s/positional arguments.*\$//') + END_VERSIONS + """ + } else { + """ + touch kb_ref_out.idx \\ + touch t2g.txt \\ + touch cdna.fa + touch intron.fa \\ + touch cdna_t2c.txt \\ + touch intron_t2c.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kallistobustools: \$(echo \$(kb --version 2>&1) | sed 's/^.*kb_python //;s/positional arguments.*\$//') + END_VERSIONS + """ + } } diff --git a/modules/nf-core/kallistobustools/ref/meta.yml b/modules/nf-core/kallistobustools/ref/meta.yml index 00be5143..64deab9b 100644 --- a/modules/nf-core/kallistobustools/ref/meta.yml +++ b/modules/nf-core/kallistobustools/ref/meta.yml @@ -14,7 +14,7 @@ tools: documentation: https://kb-python.readthedocs.io/en/latest/index.html tool_dev_url: https://github.com/pachterlab/kb_python doi: "10.22002/D1.1876" - licence: MIT License + licence: ["MIT"] input: - fasta: type: file @@ -26,8 +26,8 @@ input: pattern: "*.{gtf,gtf.gz}" - workflow_mode: type: string - description: String value defining workflow to use, can be one of "standard", "lamanno", "nucleus" - pattern: "{standard,lamanno,nucleus}" + description: String value defining workflow to use, can be one of "standard", "nac", "lamanno" (obsolete) + pattern: "{standard,lamanno,nac}" output: - versions: type: file @@ -43,19 +43,19 @@ output: pattern: "*t2g.{txt}" - cdna: type: file - description: Cdna fasta file + description: cDNA fasta file pattern: "*cdna.{fa}" - intron: type: file - description: intron fasta file + description: Intron fasta file pattern: "*intron.{fa}" - cdna_t2c: type: file - description: cdna transcript to capture file + description: cDNA transcript to capture file pattern: "*cdna_t2c.{txt}" - intron_t2c: type: file - description: intron transcript to capture file + description: Intron transcript to capture file pattern: "*intron_t2c.{txt}" authors: - "@flowuenne" diff --git a/modules/nf-core/kallistobustools/ref/tests/main.nf.test b/modules/nf-core/kallistobustools/ref/tests/main.nf.test new file mode 100644 index 00000000..dc49d9ac --- /dev/null +++ b/modules/nf-core/kallistobustools/ref/tests/main.nf.test @@ -0,0 +1,120 @@ +nextflow_process { + + name "Test Process KALLISTOBUSTOOLS_REF" + script "../main.nf" + process "KALLISTOBUSTOOLS_REF" + + tag "modules" + tag "modules_nfcore" + tag "kallistobustools" + tag "kallistobustools/ref" + + test("genome.fasta + genome.gtf + 'standard'") { + + when { + process { + """ + input[0] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) + input[2] = "standard" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.t2g, + process.out.cdna, + process.out.intron, + process.out.cdna_t2c, + process.out.intron_t2c, + ).match() + }, + { assert file(process.out.index.get(0)).exists() } + ) + } + } + + test("genome.fasta + genome.gtf + 'nac'") { + + when { + process { + """ + input[0] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) + input[2] = "nac" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.t2g, + process.out.cdna, + process.out.intron, + process.out.cdna_t2c, + process.out.intron_t2c, + ).match() + }, + { assert file(process.out.index.get(0)).exists() } + ) + } + } + + test("genome.fasta + genome.gtf + 'standard' - stub") { + + options "-stub" + + when { + process { + """ + input[0] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) + input[2] = "standard" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("genome.fasta + genome.gtf + 'nac' - stub") { + + when { + process { + """ + input[0] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) + input[2] = "nac" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.t2g, + process.out.cdna, + process.out.intron, + process.out.cdna_t2c, + process.out.intron_t2c, + ).match() + }, + { assert file(process.out.index.get(0)).exists() } + ) + } + } +} diff --git a/modules/nf-core/kallistobustools/ref/tests/main.nf.test.snap b/modules/nf-core/kallistobustools/ref/tests/main.nf.test.snap new file mode 100644 index 00000000..9c2be8a7 --- /dev/null +++ b/modules/nf-core/kallistobustools/ref/tests/main.nf.test.snap @@ -0,0 +1,136 @@ +{ + "genome.fasta + genome.gtf + 'standard'": { + "content": [ + [ + "versions.yml:md5,d3d08c3c5638ae540965f77b8178b3c1" + ], + [ + "t2g.txt:md5,d1a8a22c59b9cb0bda39c0c9bb3f6afe" + ], + [ + "cdna.fa:md5,7bca59288fab822451de250d2eee48dc" + ], + [ + + ], + [ + + ], + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T23:37:43.739374859" + }, + "genome.fasta + genome.gtf + 'nac'": { + "content": [ + [ + "versions.yml:md5,d3d08c3c5638ae540965f77b8178b3c1" + ], + [ + "t2g.txt:md5,58591306b33bb948bac7b40f346d0cd7" + ], + [ + "cdna.fa:md5,7bca59288fab822451de250d2eee48dc" + ], + [ + "intron.fa:md5,1aad4e3f5d006f495cc6647fa0bbf6ff" + ], + [ + "cdna_t2c.txt:md5,23861cf43033e7c596e6989a88a3a373" + ], + [ + "intron_t2c.txt:md5,fe6d5501923867b514a0447aa4b4995f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T23:37:59.436989671" + }, + "genome.fasta + genome.gtf + 'standard' - stub": { + "content": [ + { + "0": [ + "versions.yml:md5,d3d08c3c5638ae540965f77b8178b3c1" + ], + "1": [ + "kb_ref_out.idx:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "2": [ + "t2g.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "3": [ + "cdna.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "cdna": [ + "cdna.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "cdna_t2c": [ + + ], + "index": [ + "kb_ref_out.idx:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "intron": [ + + ], + "intron_t2c": [ + + ], + "t2g": [ + "t2g.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,d3d08c3c5638ae540965f77b8178b3c1" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T23:38:09.578411047" + }, + "genome.fasta + genome.gtf + 'nac' - stub": { + "content": [ + [ + "versions.yml:md5,d3d08c3c5638ae540965f77b8178b3c1" + ], + [ + "t2g.txt:md5,58591306b33bb948bac7b40f346d0cd7" + ], + [ + "cdna.fa:md5,7bca59288fab822451de250d2eee48dc" + ], + [ + "intron.fa:md5,1aad4e3f5d006f495cc6647fa0bbf6ff" + ], + [ + "cdna_t2c.txt:md5,23861cf43033e7c596e6989a88a3a373" + ], + [ + "intron_t2c.txt:md5,fe6d5501923867b514a0447aa4b4995f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T23:38:25.355912473" + } +} \ No newline at end of file diff --git a/modules/nf-core/kallistobustools/ref/tests/tags.yml b/modules/nf-core/kallistobustools/ref/tests/tags.yml new file mode 100644 index 00000000..208c8d27 --- /dev/null +++ b/modules/nf-core/kallistobustools/ref/tests/tags.yml @@ -0,0 +1,2 @@ +kallistobustools/ref: + - "modules/nf-core/kallistobustools/ref/**" diff --git a/nextflow.config b/nextflow.config index 3fe5c47f..54bd4ab3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -19,17 +19,18 @@ params { // reference files genome = null transcript_fasta = null + txp2gene = null // salmon alevin parameters (simpleaf) simpleaf_rlen = 91 barcode_whitelist = null - txp2gene = null salmon_index = null - // kallist bustools parameters - kallisto_gene_map = null + // kallisto bustools parameters kallisto_index = null kb_workflow = "standard" + kb_t1c = null + kb_t2c = null // STARsolo parameters star_index = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 649b9b18..23f6e9b5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -214,26 +214,35 @@ "type": "object", "description": "Params related to Kallisto/BUS tool", "default": "", - "fa_icon": "fas fa-fish", + "fa_icon": "fas fa-rainbow", "properties": { - "kallisto_gene_map": { - "type": "string", - "description": "Specify a Kallisto gene mapping file here. If you don't, this will be automatically created in the Kallisto workflow when specifying a valid `--gtf` file.", - "fa_icon": "fas fa-fish" - }, "kallisto_index": { "type": "string", "description": "Specify a path to the precomputed Kallisto index.", - "fa_icon": "fas fa-fish", + "fa_icon": "fas fa-rainbow", + "format": "file-path", + "exists": true + }, + "kb_t1c": { + "type": "string", + "description": "Specify a path to the cDNA transcripts-to-capture.", + "fa_icon": "fas fa-rainbow", + "format": "file-path", + "exists": true + }, + "kb_t2c": { + "type": "string", + "description": "Specify a path to the intron transcripts-to-capture.", + "fa_icon": "fas fa-rainbow", "format": "file-path", "exists": true }, "kb_workflow": { "type": "string", "default": "standard", - "description": "Type of workflow. Use `lamanno` for RNA velocity based on La Manno et al. 2018 logic. Use `nucleus` for RNA velocity on single-nucleus RNA-seq reads. Use `kite` for feature barcoding. Use `kite: 10xFB` for 10x Genomics Feature Barcoding technology. (default: standard)", - "fa_icon": "fas fa-fish", - "enum": ["standard", "lamanno", "nucleus", "kite", "kite: 10xFB"] + "description": "Type of workflow. Use `nac` for an index type that can quantify nascent and mature RNA. Use `lamanno` for RNA velocity based on La Manno et al. 2018 logic. (default: standard)", + "fa_icon": "fas fa-rainbow", + "enum": ["standard", "lamanno", "nac"] } } }, diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf index 3210e47a..b6549094 100644 --- a/subworkflows/local/kallisto_bustools.nf +++ b/subworkflows/local/kallisto_bustools.nf @@ -1,5 +1,4 @@ /* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ -include { GENE_MAP } from '../../modules/local/gene_map' include {KALLISTOBUSTOOLS_COUNT } from '../../modules/nf-core/kallistobustools/count/main' /* -- IMPORT NF-CORE MODULES/SUBWORKFLOWS -- */ @@ -14,6 +13,8 @@ workflow KALLISTO_BUSTOOLS { gtf kallisto_index txp2gene + t1c + t2c protocol kb_workflow ch_fastq @@ -21,26 +22,13 @@ workflow KALLISTO_BUSTOOLS { main: ch_versions = Channel.empty() - assert kallisto_index || (genome_fasta && gtf): + assert (txp2gene && kallisto_index) || (genome_fasta && gtf): "Must provide a genome fasta file ('--fasta') and a gtf file ('--gtf') if no index is given!" - assert txp2gene || gtf: - "Must provide either a GTF file ('--gtf') or kallisto gene map ('--kallisto_gene_map') to align with kallisto bustools!" - - /* - * Generate Kallisto Gene Map if not supplied and index is given - * If no index is given, the gene map will be generated in the 'kb ref' step - */ - if (!txp2gene && kallisto_index) { - GENE_MAP( gtf ) - txp2gene = GENE_MAP.out.gene_map - ch_versions = ch_versions.mix(GENE_MAP.out.versions) - } - /* - * Generate kallisto index + * Generate kallisto index and t2g if not already present */ - if (!kallisto_index) { + if (!(txp2gene && kallisto_index)) { KALLISTOBUSTOOLS_REF( genome_fasta, gtf, kb_workflow ) txp2gene = KALLISTOBUSTOOLS_REF.out.t2g.collect() kallisto_index = KALLISTOBUSTOOLS_REF.out.index.collect() @@ -58,7 +46,8 @@ workflow KALLISTO_BUSTOOLS { txp2gene, t1c, t2c, - protocol + protocol, + kb_workflow ) ch_versions = ch_versions.mix(KALLISTOBUSTOOLS_COUNT.out.versions) @@ -66,7 +55,7 @@ workflow KALLISTO_BUSTOOLS { emit: ch_versions counts = KALLISTOBUSTOOLS_COUNT.out.count - txp2gene = txp2gene.collect() + txp2gene } diff --git a/tests/.nf-test.log b/tests/.nf-test.log new file mode 100644 index 00000000..8251fc75 --- /dev/null +++ b/tests/.nf-test.log @@ -0,0 +1,21 @@ +Feb-27 21:54:09.971 [main] INFO com.askimed.nf.test.App - nf-test 0.8.4 +Feb-27 21:54:09.988 [main] INFO com.askimed.nf.test.App - Arguments: [test, tests/main_pipeline_kallisto.test, --update-snapshot] +Feb-27 21:54:10.670 [main] INFO com.askimed.nf.test.App - Nextflow Version: 23.10.1 +Feb-27 21:54:10.674 [main] WARN com.askimed.nf.test.commands.RunTestsCommand - No nf-test config file found. +Feb-27 21:54:10.674 [main] INFO com.askimed.nf.test.commands.RunTestsCommand - Detected 1 test files. +Feb-27 21:54:10.676 [main] ERROR com.askimed.nf.test.commands.RunTestsCommand - Running tests failed. +java.lang.Exception: Test file '/home/ec2-user/scrnaseq/tests/tests/main_pipeline_kallisto.test' not found. + at com.askimed.nf.test.core.TestExecutionEngine.parse(TestExecutionEngine.java:116) + at com.askimed.nf.test.core.TestExecutionEngine.execute(TestExecutionEngine.java:159) + at com.askimed.nf.test.commands.RunTestsCommand.execute(RunTestsCommand.java:184) + at com.askimed.nf.test.commands.AbstractCommand.call(AbstractCommand.java:43) + at com.askimed.nf.test.commands.AbstractCommand.call(AbstractCommand.java:18) + at picocli.CommandLine.executeUserObject(CommandLine.java:1953) + at picocli.CommandLine.access$1300(CommandLine.java:145) + at picocli.CommandLine$RunLast.executeUserObjectOfLastSubcommandWithSameParent(CommandLine.java:2352) + at picocli.CommandLine$RunLast.handle(CommandLine.java:2346) + at picocli.CommandLine$RunLast.handle(CommandLine.java:2311) + at picocli.CommandLine$AbstractParseResultHandler.execute(CommandLine.java:2179) + at picocli.CommandLine.execute(CommandLine.java:2078) + at com.askimed.nf.test.App.run(App.java:44) + at com.askimed.nf.test.App.main(App.java:51) diff --git a/tests/main_pipeline_kallisto.test.snap b/tests/main_pipeline_kallisto.test.snap index e57b77af..1eb15749 100644 --- a/tests/main_pipeline_kallisto.test.snap +++ b/tests/main_pipeline_kallisto.test.snap @@ -20,15 +20,15 @@ "name": "workflow", "success": true }, - "cells_x_genes.barcodes.txt:md5,18be561873e435d4587f6b3f95a0e301", + "cells_x_genes.barcodes.txt:md5,72d78bb1c1ee7cb174520b30f695aa48", "cells_x_genes.genes.txt:md5,acd9d00120f52031974b2add3e7521b6", - "cells_x_genes.mtx:md5,37d2cd8c712f9c70463e87485bf6cd36", - "cells_x_genes.barcodes.txt:md5,488437e1f5477243697efb93366e5676", + "cells_x_genes.mtx:md5,894d60da192e3788de11fa8fc1fa711d", + "cells_x_genes.barcodes.txt:md5,a8cf7ea4b2d075296a94bf066a64b7a4", "cells_x_genes.genes.txt:md5,acd9d00120f52031974b2add3e7521b6", - "cells_x_genes.mtx:md5,af90e05b404490f6cb133ab7f62949f8", - "Sample_X_matrix.rds:md5,f0e43f69403f4b2e7704065421592ad0", - "Sample_Y_matrix.rds:md5,61809156e64dbdaf254cbc1c3456588e" + "cells_x_genes.mtx:md5,abd83de117204d0a77df3c92d00cc025", + "Sample_X_matrix.rds:md5,0938f4189b7a7fd1030abfcee798741c", + "Sample_Y_matrix.rds:md5,93c12abe283ab37c5f37e5cd3cb25302" ], - "timestamp": "2024-01-23T12:19:47.921508953" + "timestamp": "2024-02-27T12:19:47.921508953" } } diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index f2a0ba3f..64d75390 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -43,6 +43,12 @@ workflow SCRNASEQ { ch_barcode_whitelist = [] } + //kallisto params + ch_kallisto_index = params.kallisto_index ? file(params.kallisto_index) : [] + kb_workflow = params.kb_workflow + kb_t1c = params.kb_t1c ? file(params.kb_t1c) : [] + kb_t2c = params.kb_t2c ? file(params.kb_t2c) : [] + // samplesheet - this is passed to the MTX conversion functions to add metadata to the // AnnData objects. ch_input = file(params.input) @@ -83,6 +89,8 @@ workflow SCRNASEQ { ch_filter_gtf, ch_kallisto_index, ch_txp2gene, + kb_t1c, + kb_t2c, protocol_config['protocol'], kb_workflow, ch_fastq