From 8df7a8e2928630529a0e57879fd6513d583d4547 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Thu, 18 Jan 2024 12:46:29 +0100 Subject: [PATCH 1/9] Update modules and fix missing variable error * Somehow the cellranger module was incomplete, don't ask me how we ended up there * params.aligner was referenced incorrectly in scrnaseq.nf --- .github/workflows/ci.yml | 4 +- modules.json | 25 +- .../count/templates/cellranger_count.py | 84 ++++++ .../dumpsoftwareversions/environment.yml | 2 +- .../custom/dumpsoftwareversions/main.nf | 4 +- .../dumpsoftwareversions/tests/main.nf.test | 7 +- .../tests/main.nf.test.snap | 50 ++-- modules/nf-core/fastqc/tests/main.nf.test | 271 ++++++++++++------ .../nf-core/fastqc/tests/main.nf.test.snap | 12 +- modules/nf-core/gffread/tests/main.nf.test | 4 +- modules/nf-core/gunzip/tests/main.nf.test | 9 +- modules/nf-core/multiqc/environment.yml | 2 +- modules/nf-core/multiqc/main.nf | 6 +- modules/nf-core/multiqc/meta.yml | 1 - modules/nf-core/multiqc/tests/main.nf.test | 48 +++- .../nf-core/multiqc/tests/main.nf.test.snap | 21 ++ modules/nf-core/unzip/environment.yml | 7 + modules/nf-core/unzip/main.nf | 37 +++ modules/nf-core/unzip/meta.yml | 42 +++ workflows/scrnaseq.nf | 2 +- 20 files changed, 498 insertions(+), 140 deletions(-) create mode 100644 modules/nf-core/cellranger/count/templates/cellranger_count.py create mode 100644 modules/nf-core/multiqc/tests/main.nf.test.snap create mode 100644 modules/nf-core/unzip/environment.yml create mode 100644 modules/nf-core/unzip/main.nf create mode 100644 modules/nf-core/unzip/meta.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 368bd21a..7f8faa34 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,8 +31,8 @@ jobs: "test,docker --aligner alevin", "test,docker --aligner kallisto", "test,docker --aligner star", - "test,docker --aligner cellranger -stub", - "test,docker --aligner universc -stub", + "test,docker --aligner cellranger", + "test,docker --aligner universc", ] steps: - name: Free some space diff --git a/modules.json b/modules.json index 1119657a..94fd5c8b 100644 --- a/modules.json +++ b/modules.json @@ -20,24 +20,39 @@ "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, + "cellrangerarc/count": { + "branch": "master", + "git_sha": "4196b1b2e7ce265892f3979eabf7a9ddc030702f", + "installed_by": ["modules"] + }, + "cellrangerarc/mkgtf": { + "branch": "master", + "git_sha": "4196b1b2e7ce265892f3979eabf7a9ddc030702f", + "installed_by": ["modules"] + }, + "cellrangerarc/mkref": { + "branch": "master", + "git_sha": "4196b1b2e7ce265892f3979eabf7a9ddc030702f", + "installed_by": ["modules"] + }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e", + "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", + "git_sha": "617777a807a1770f73deb38c80004bac06807eef", "installed_by": ["modules"] }, "gffread": { "branch": "master", - "git_sha": "a2d6c3082c5c44b4155a3246daff36701ee49af8", + "git_sha": "b8858b10356b87db4325341872816f9672541b7b", "installed_by": ["modules"] }, "gunzip": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "3a5fef109d113b4997c9822198664ca5f2716208", "installed_by": ["modules"] }, "kallistobustools/count": { @@ -52,7 +67,7 @@ }, "multiqc": { "branch": "master", - "git_sha": "4ab13872435962dadc239979554d13709e20bf29", + "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", "installed_by": ["modules"] }, "star/genomegenerate": { diff --git a/modules/nf-core/cellranger/count/templates/cellranger_count.py b/modules/nf-core/cellranger/count/templates/cellranger_count.py new file mode 100644 index 00000000..1527ba76 --- /dev/null +++ b/modules/nf-core/cellranger/count/templates/cellranger_count.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 +""" +Automatically rename staged files for input into cellranger count. + +Copyright (c) Gregor Sturm 2023 - MIT License +""" +from subprocess import run +from pathlib import Path +from textwrap import dedent +import shlex +import re + + +def chunk_iter(seq, size): + """iterate over `seq` in chunks of `size`""" + return (seq[pos : pos + size] for pos in range(0, len(seq), size)) + + +sample_id = "${meta.id}" + +# get fastqs, ordered by path. Files are staged into +# - "fastq_001/{original_name.fastq.gz}" +# - "fastq_002/{oritinal_name.fastq.gz}" +# - ... +# Since we require fastq files in the input channel to be ordered such that a R1/R2 pair +# of files follows each other, ordering will get us a sequence of [R1, R2, R1, R2, ...] +fastqs = sorted(Path(".").glob("fastq_*/*")) +assert len(fastqs) % 2 == 0 + +# target directory in which the renamed fastqs will be placed +fastq_all = Path("./fastq_all") +fastq_all.mkdir(exist_ok=True) + +# Match R1 in the filename, but only if it is followed by a non-digit or non-character +# match "file_R1.fastq.gz", "file.R1_000.fastq.gz", etc. but +# do not match "SRR12345", "file_INFIXR12", etc +filename_pattern = r'([^a-zA-Z0-9])R1([^a-zA-Z0-9])' + +for i, (r1, r2) in enumerate(chunk_iter(fastqs, 2)): + # double escapes are required because nextflow processes this python 'template' + if re.sub(filename_pattern, r'\\1R2\\2', r1.name) != r2.name: + raise AssertionError( + dedent( + f"""\ + We expect R1 and R2 of the same sample to have the same filename except for R1/R2. + This has been checked by replacing "R1" with "R2" in the first filename and comparing it to the second filename. + If you believe this check shouldn't have failed on your filenames, please report an issue on GitHub! + + Files involved: + - {r1} + - {r2} + """ + ) + ) + r1.rename(fastq_all / f"{sample_id}_S1_L{i:03d}_R1_001.fastq.gz") + r2.rename(fastq_all / f"{sample_id}_S1_L{i:03d}_R2_001.fastq.gz") + +run( + # fmt: off + [ + "cellranger", "count", + "--id", "${prefix}", + "--fastqs", str(fastq_all), + "--transcriptome", "${reference.name}", + "--localcores", "${task.cpus}", + "--localmem", "${task.memory.toGiga()}", + *shlex.split("""${args}""") + ], + # fmt: on + check=True, +) + +# Output version information +version = run( + ["cellranger", "-V"], + text=True, + check=True, + capture_output=True, +).stdout.replace("cellranger cellranger-", "") + +# alas, no `pyyaml` pre-installed in the cellranger container +with open("versions.yml", "w") as f: + f.write('"${task.process}":\\n') + f.write(f' cellranger: "{version}"\\n') diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml index f0c63f69..9b3272bc 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/environment.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::multiqc=1.17 + - bioconda::multiqc=1.19 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index 7685b33c..f2187611 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -4,8 +4,8 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' : - 'biocontainers/multiqc:1.17--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : + 'biocontainers/multiqc:1.19--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test index eec1db10..b1e1630b 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -31,7 +31,12 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.versions, + file(process.out.mqc_yml[0]).readLines()[0..10], + file(process.out.yml[0]).readLines()[0..7] + ).match() + } ) } } diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap index 4274ed57..5f59a936 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -1,27 +1,33 @@ { "Should run without failures": { "content": [ - { - "0": [ - "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" - ], - "1": [ - "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" - ], - "2": [ - "versions.yml:md5,3843ac526e762117eedf8825b40683df" - ], - "mqc_yml": [ - "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" - ], - "versions": [ - "versions.yml:md5,3843ac526e762117eedf8825b40683df" - ], - "yml": [ - "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" - ] - } + [ + "versions.yml:md5,76d454d92244589d32455833f7c1ba6d" + ], + [ + "data: \"\\n\\n \\n \\n \\n \\n \\n \\n \\n\\", + " \\n\\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n \\n \\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n\\n\\n \\n\\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\" + ], + [ + "CUSTOM_DUMPSOFTWAREVERSIONS:", + " python: 3.11.7", + " yaml: 5.4.1", + "TOOL1:", + " tool1: 0.11.9", + "TOOL2:", + " tool2: '1.9'", + "Workflow:" + ] ], - "timestamp": "2023-11-03T14:43:22.157011" + "timestamp": "2024-01-09T23:01:18.710682" } -} +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test index b9e8f926..ad9bc54f 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -3,23 +3,21 @@ nextflow_process { name "Test Process FASTQC" script "../main.nf" process "FASTQC" + tag "modules" tag "modules_nfcore" tag "fastqc" - test("Single-Read") { + test("sarscov2 single-end [fastq]") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = [ - [ id: 'test', single_end:true ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) - ] + [ id: 'test', single_end:true ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] ] """ } @@ -28,82 +26,195 @@ nextflow_process { then { assertAll ( { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. // looks like this:
Mon 2 Oct 2023
test.gz
// https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 - { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" }, - { assert path(process.out.html.get(0).get(1)).getText().contains("") }, - { assert snapshot(process.out.versions).match("versions") }, - { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" } + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 paired-end [fastq]") { + + when { + process { + """ + input[0] = [ + [id: 'test', single_end: false], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("") }, + { assert path(process.out.html[0][1][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 interleaved [fastq]") { + + when { + process { + """ + input[0] = [ + [id: 'test', single_end: false], // meta map + file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = [ + [id: 'test', single_end: false], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("versions") } ) } } -// TODO -// // -// // Test with paired-end data -// // -// workflow test_fastqc_paired_end { -// input = [ -// [id: 'test', single_end: false], // meta map -// [ -// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), -// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) -// ] -// ] - -// FASTQC ( input ) -// } - -// // -// // Test with interleaved data -// // -// workflow test_fastqc_interleaved { -// input = [ -// [id: 'test', single_end: false], // meta map -// file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) -// ] - -// FASTQC ( input ) -// } - -// // -// // Test with bam data -// // -// workflow test_fastqc_bam { -// input = [ -// [id: 'test', single_end: false], // meta map -// file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) -// ] - -// FASTQC ( input ) -// } - -// // -// // Test with multiple samples -// // -// workflow test_fastqc_multiple { -// input = [ -// [id: 'test', single_end: false], // meta map -// [ -// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), -// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), -// file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), -// file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) -// ] -// ] - -// FASTQC ( input ) -// } - -// // -// // Test with custom prefix -// // -// workflow test_fastqc_custom_prefix { -// input = [ -// [ id:'mysample', single_end:true ], // meta map -// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) -// ] - -// FASTQC ( input ) -// } + + test("sarscov2 multiple [fastq]") { + + when { + process { + """ + input[0] = [ + [id: 'test', single_end: false], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, + { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, + { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("") }, + { assert path(process.out.html[0][1][1]).text.contains("") }, + { assert path(process.out.html[0][1][2]).text.contains("") }, + { assert path(process.out.html[0][1][3]).text.contains("") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 custom_prefix") { + + when { + process { + """ + input[0] = [ + [ id:'mysample', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 single-end [fastq] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id: 'test', single_end:true ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.html.collect { file(it[1]).getName() } + + process.out.zip.collect { file(it[1]).getName() } + + process.out.versions ).match() } + ) + } + } + } diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap index 636a32ce..5ef5afbd 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test.snap +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -1,10 +1,20 @@ { + "sarscov2 single-end [fastq] - stub": { + "content": [ + [ + "test.html", + "test.zip", + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "timestamp": "2023-12-29T02:48:05.126117287" + }, "versions": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], - "timestamp": "2023-10-09T23:40:54+0000" + "timestamp": "2023-12-29T02:46:49.507942667" } } \ No newline at end of file diff --git a/modules/nf-core/gffread/tests/main.nf.test b/modules/nf-core/gffread/tests/main.nf.test index 3c064b31..c4dfbdf4 100644 --- a/modules/nf-core/gffread/tests/main.nf.test +++ b/modules/nf-core/gffread/tests/main.nf.test @@ -18,7 +18,7 @@ nextflow_process { } process { """ - input[0] = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + input[0] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) """ } } @@ -42,7 +42,7 @@ nextflow_process { } process { """ - input[0] = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + input[0] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) """ } } diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test index d0317922..6406008e 100644 --- a/modules/nf-core/gunzip/tests/main.nf.test +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -15,10 +15,11 @@ nextflow_process { } process { """ - input[0] = [ - [], - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) - ] + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) """ } } diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index bc0bdb5b..7625b752 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::multiqc=1.18 + - bioconda::multiqc=1.19 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 00cc48d2..1b9f7c43 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,8 +3,8 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.18--pyhdfd78af_0' : - 'biocontainers/multiqc:1.18--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : + 'biocontainers/multiqc:1.19--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -43,7 +43,7 @@ process MULTIQC { stub: """ - touch multiqc_data + mkdir multiqc_data touch multiqc_plots touch multiqc_report.html diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index f1aa660e..45a9bc35 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,4 +1,3 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test index c2dad217..d0438eda 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -7,12 +7,9 @@ nextflow_process { tag "modules_nfcore" tag "multiqc" - test("MULTIQC: FASTQC") { + test("sarscov2 single-end [fastqc]") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) @@ -26,20 +23,17 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert path(process.out.report.get(0)).exists() }, - { assert path(process.out.data.get(0)).exists() }, - { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("versions") } ) } } - test("MULTIQC: FASTQC and a config file") { + test("sarscov2 single-end [fastqc] [config]") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) @@ -53,9 +47,35 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert path(process.out.report.get(0)).exists() }, - { assert path(process.out.data.get(0)).exists() }, - { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match() } ) } diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 00000000..d37e7304 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,21 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,14e9a2661241abd828f4f06a7b5c222d" + ] + ], + "timestamp": "2024-01-09T23:02:49.911994" + }, + "sarscov2 single-end [fastqc] - stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,14e9a2661241abd828f4f06a7b5c222d" + ] + ], + "timestamp": "2024-01-09T23:03:14.524346" + } +} \ No newline at end of file diff --git a/modules/nf-core/unzip/environment.yml b/modules/nf-core/unzip/environment.yml new file mode 100644 index 00000000..d3a535f1 --- /dev/null +++ b/modules/nf-core/unzip/environment.yml @@ -0,0 +1,7 @@ +name: unzip +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::p7zip=16.02 diff --git a/modules/nf-core/unzip/main.nf b/modules/nf-core/unzip/main.nf new file mode 100644 index 00000000..08cfc3c4 --- /dev/null +++ b/modules/nf-core/unzip/main.nf @@ -0,0 +1,37 @@ +process UNZIP { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/p7zip:16.02' : + 'biocontainers/p7zip:16.02' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("${prefix}/"), emit: unzipped_archive + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + if ( archive instanceof List && archive.name.size > 1 ) { error "[UNZIP] error: 7za only accepts a single archive as input. Please check module input." } + + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName) + """ + 7za \\ + x \\ + -o"${prefix}"/ \\ + $args \\ + $archive + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + 7za: \$(echo \$(7za --help) | sed 's/.*p7zip Version //; s/(.*//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/unzip/meta.yml b/modules/nf-core/unzip/meta.yml new file mode 100644 index 00000000..e8e377e2 --- /dev/null +++ b/modules/nf-core/unzip/meta.yml @@ -0,0 +1,42 @@ +name: unzip +description: Unzip ZIP archive files +keywords: + - unzip + - decompression + - zip + - archiving +tools: + - unzip: + description: p7zip is a quick port of 7z.exe and 7za.exe (command line version of 7zip, see www.7-zip.org) for Unix. + homepage: https://sourceforge.net/projects/p7zip/ + documentation: https://sourceforge.net/projects/p7zip/ + tool_dev_url: https://sourceforge.net/projects/p7zip" + licence: ["LGPL-2.1-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: ZIP file + pattern: "*.zip" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - unzipped_archive: + type: directory + description: Directory contents of the unzipped archive + pattern: "${archive.baseName}/" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 0f37a17f..25740a8e 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -71,7 +71,7 @@ include { MULTIQC } from '../modules/nf-core/multiqc/main' ch_output_docs = file("$projectDir/docs/output.md", checkIfExists: true) ch_output_docs_images = file("$projectDir/docs/images/", checkIfExists: true) protocol_config = WorkflowScrnaseq.getProtocol(workflow, log, params.aligner, params.protocol) -if (protocol_config['protocol'] == 'auto' && aligner != "cellranger") { +if (protocol_config['protocol'] == 'auto' && params.aligner != "cellranger") { error "Only cellranger supports `protocol = 'auto'`. Please specify the protocol manually!" } From 91c479afffdf18e71351ced890311601ab236662 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Thu, 18 Jan 2024 12:49:01 +0100 Subject: [PATCH 2/9] trigger ci From f15e8e5cf0ded606b646a48ce62586d9d2df2a26 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Thu, 18 Jan 2024 12:49:30 +0100 Subject: [PATCH 3/9] format with black --- .../nf-core/cellranger/count/templates/cellranger_count.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/cellranger/count/templates/cellranger_count.py b/modules/nf-core/cellranger/count/templates/cellranger_count.py index 1527ba76..4bfb9f4f 100644 --- a/modules/nf-core/cellranger/count/templates/cellranger_count.py +++ b/modules/nf-core/cellranger/count/templates/cellranger_count.py @@ -34,11 +34,11 @@ def chunk_iter(seq, size): # Match R1 in the filename, but only if it is followed by a non-digit or non-character # match "file_R1.fastq.gz", "file.R1_000.fastq.gz", etc. but # do not match "SRR12345", "file_INFIXR12", etc -filename_pattern = r'([^a-zA-Z0-9])R1([^a-zA-Z0-9])' +filename_pattern = r"([^a-zA-Z0-9])R1([^a-zA-Z0-9])" for i, (r1, r2) in enumerate(chunk_iter(fastqs, 2)): # double escapes are required because nextflow processes this python 'template' - if re.sub(filename_pattern, r'\\1R2\\2', r1.name) != r2.name: + if re.sub(filename_pattern, r"\\1R2\\2", r1.name) != r2.name: raise AssertionError( dedent( f"""\ From d56741d92cc9c72bc5c044c7fbe86700f95a3fc7 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Thu, 18 Jan 2024 12:55:25 +0100 Subject: [PATCH 4/9] Temporarily disable universc CI --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7f8faa34..3d0441a8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,7 +32,7 @@ jobs: "test,docker --aligner kallisto", "test,docker --aligner star", "test,docker --aligner cellranger", - "test,docker --aligner universc", + # "test,docker --aligner universc", ] steps: - name: Free some space From 1c1592d256ec05e4b33b56c512288e93a93b3ed0 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Thu, 18 Jan 2024 14:27:41 +0100 Subject: [PATCH 5/9] Exclude CI For cellrangerarc and universc --- .github/workflows/ci.yml | 3 ++- modules/local/mtx_to_seurat.nf | 6 +++--- subworkflows/local/align_universc.nf | 7 +++++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3d0441a8..d6136a2b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,7 +32,8 @@ jobs: "test,docker --aligner kallisto", "test,docker --aligner star", "test,docker --aligner cellranger", - # "test,docker --aligner universc", + # "test,docker --aligner cellrangerarc", // this currently lacks a suitable test profile, see issue # + # "test,docker --aligner universc", // this is broken, see issue # ] steps: - name: Free some space diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index 73e260d2..d83575a4 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -20,9 +20,9 @@ process MTX_TO_SEURAT { script: def aligner = params.aligner if (params.aligner in [ 'cellranger', 'cellrangerarc' ]) { - matrix = "filtered_feature_bc_matrix/matrix.mtx.gz" - barcodes = "filtered_feature_bc_matrix/barcodes.tsv.gz" - features = "filtered_feature_bc_matrix/features.tsv.gz" + matrix = "matrix.mtx.gz" + barcodes = "barcodes.tsv.gz" + features = "features.tsv.gz" } else if (params.aligner == "kallisto") { matrix = "*count/counts_unfiltered/*.mtx" barcodes = "*count/counts_unfiltered/*.barcodes.txt" diff --git a/subworkflows/local/align_universc.nf b/subworkflows/local/align_universc.nf index f4e4038e..acb2c560 100644 --- a/subworkflows/local/align_universc.nf +++ b/subworkflows/local/align_universc.nf @@ -34,8 +34,11 @@ workflow UNIVERSC_ALIGN { // Obtain read counts UNIVERSC ( - // TODO add technology and chemistry input parameters and set defaults - ch_fastq.map{ meta, reads -> [meta + ["id": meta.id, "samples": [meta.id], "technology": [universc_technology], "single_end": false, "strandedness": "forward"], reads] }, + ch_fastq.map{ meta, reads -> [ + // defaults + ["samples": [meta.id], "technology": universc_technology, "chemistry": "auto", "single_end": false, "strandedness": "forward"] + meta, // + meta overrides defaults with information already in meta + reads + ] }, universc_index ) ch_versions = ch_versions.mix(UNIVERSC.out.versions) From 505d221e2c8640a62a55f695de6009892e9bbcd4 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Thu, 18 Jan 2024 16:54:33 +0100 Subject: [PATCH 6/9] Refer to issue numbers for missing test --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d6136a2b..10599f3e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,8 +32,8 @@ jobs: "test,docker --aligner kallisto", "test,docker --aligner star", "test,docker --aligner cellranger", - # "test,docker --aligner cellrangerarc", // this currently lacks a suitable test profile, see issue # - # "test,docker --aligner universc", // this is broken, see issue # + # "test,docker --aligner cellrangerarc", // this currently lacks a suitable test profile, see issue https://github.com/nf-core/scrnaseq/issues/290 + # "test,docker --aligner universc", // this is broken, see issue https://github.com/nf-core/scrnaseq/issues/289 ] steps: - name: Free some space From 46dd1430a2ebd2bf15631736b4c95b0ded1de18f Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Fri, 19 Jan 2024 08:06:10 +0100 Subject: [PATCH 7/9] Update changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 21f9e6d6..a7c8ed27 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v2.5.1 + +- Fix that cellranger workflow couldn't be run and enable CI for this workflow ([#288](https://github.com/nf-core/scrnaseq/pull/288)). +- Update modules ([#288]()https://github.com/nf-core/scrnaseq/pull/288). + ## v2.5.0 - 2024-01-02 - Update template to v2.11.1 ([#279](https://github.com/nf-core/scrnaseq/pull/279)) From 5e428c3c1b8a4bd765110f9bd16e15d19783c1d4 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Fri, 19 Jan 2024 07:07:06 +0000 Subject: [PATCH 8/9] [automated] Fix linting with Prettier --- .devcontainer/devcontainer.json | 8 ++++---- .github/workflows/ci.yml | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4ecfbfe3..4a9bc5c7 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -18,11 +18,11 @@ "python.linting.flake8Path": "/opt/conda/bin/flake8", "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint" + "python.linting.pylintPath": "/opt/conda/bin/pylint", }, // Add the IDs of extensions you want installed when the container is created. - "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] - } - } + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"], + }, + }, } diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 10599f3e..e8907caf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,8 +26,7 @@ jobs: NXF_VER: - "23.04.0" - "latest-everything" - profile: - [ + profile: [ "test,docker --aligner alevin", "test,docker --aligner kallisto", "test,docker --aligner star", From 621f7cd68cb00221d220c5d29726e46f25b4a2d9 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Mon, 22 Jan 2024 08:20:47 +0100 Subject: [PATCH 9/9] bump version to 2.5.1 --- assets/multiqc_config.yml | 4 ++-- nextflow.config | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 0db840ca..10ec1af4 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/scrnaseq + This report has been generated by the nf-core/scrnaseq analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-scrnaseq-methods-description": order: -1000 diff --git a/nextflow.config b/nextflow.config index 6155b7c4..9e83c42a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -272,7 +272,7 @@ manifest { description = """Pipeline for processing 10x Genomics single cell rnaseq data""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.5.0' + version = '2.5.1' doi = '10.5281/zenodo.3568187' }
Process Name \\", + " \\ Software Version
CUSTOM_DUMPSOFTWAREVERSIONSpython3.11.7
yaml5.4.1
TOOL1tool10.11.9
TOOL2tool21.9
WorkflowNextflow
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls