diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index a66c8d8b..597c854d 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -27,6 +27,9 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests +You can optionally test your changes by running the pipeline locally. Then it is recommended to use the `debug` profile to +receive warnings about process selectors and other debug info. Example: `nextflow run . -profile debug,test,docker --outdir `. + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index e66587f6..073b2953 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -19,6 +19,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/scrn - [ ] If necessary, also make a PR on the nf-core/scrnaseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index eab6bf67..368bd21a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,7 +42,7 @@ jobs: sudo rm -rf "/usr/local/share/boost" sudo rm -rf "$AGENT_TOOLSDIRECTORY" - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index 9adbb159..05b700e4 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: token: ${{ secrets.nf_core_bot_auth_token }} @@ -24,7 +24,7 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install Prettier run: npm install -g prettier @prettier/plugin-php diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index b8bdd214..905c58e4 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,9 +14,9 @@ jobs: EditorConfig: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install editorconfig-checker run: npm install -g editorconfig-checker @@ -27,9 +27,9 @@ jobs: Prettier: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install Prettier run: npm install -g prettier @@ -40,7 +40,7 @@ jobs: PythonBlack: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Check code lints with Black uses: psf/black@stable @@ -71,7 +71,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 diff --git a/.github/workflows/release-announcments.yml b/.github/workflows/release-announcements.yml similarity index 100% rename from .github/workflows/release-announcments.yml rename to .github/workflows/release-announcements.yml diff --git a/.gitpod.yml b/.gitpod.yml index 25488dcc..acf72695 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -4,7 +4,9 @@ tasks: command: | pre-commit install --install-hooks nextflow self-update - + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack - codezombiech.gitignore # Language support for .gitignore files diff --git a/CHANGELOG.md b/CHANGELOG.md index 1344dcb0..21f9e6d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v2.5.0 - 2024-01-02 + +- Update template to v2.11.1 ([#279](https://github.com/nf-core/scrnaseq/pull/279)) +- Add support for paired GEX+ATAC sequencing using cellranger-arc ([#274](https://github.com/nf-core/scrnaseq/pull/274)) +- Increase default runtime limits for some processes ([#281](https://github.com/nf-core/scrnaseq/pull/281), [#284](https://github.com/nf-core/scrnaseq/pull/284)) +- Better support for custom protocols ([#273](https://github.com/nf-core/scrnaseq/pull/273)). + - The universc protocol is now specified via the `--protocol` flag + - Any protocol specified is now passed to the respective aligner + - Added a section to the documentation + ## v2.4.1 - 2023-09-28 - Fix whitelist logic for dropseq ([#267](https://github.com/nf-core/scrnaseq/pull/267)) diff --git a/README.md b/README.md index 1148b491..55044582 100644 --- a/README.md +++ b/README.md @@ -31,11 +31,8 @@ The nf-core/scrnaseq pipeline comes with documentation about the pipeline [usage ## Usage -:::note -If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how -to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) -with `-profile test` before running the workflow on actual data. -::: +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. First, prepare a samplesheet with your input data that looks as follows: @@ -62,11 +59,9 @@ nextflow run nf-core/scrnaseq \ --outdir ``` -:::warning -Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those -provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; -see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -::: +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/scrnaseq/usage) and the [parameter documentation](https://nf-co.re/scrnaseq/parameters). @@ -95,12 +90,13 @@ For more details about the output files and reports, please refer to the nf-core/scrnaseq was originally written by Bailey PJ, Botvinnik O, Marques de Almeida F, Gabernet G, Peltzer A, Sturm G. -We thank the following people for their extensive assistance in the development of this pipeline: +We thank the following people and teams for their extensive assistance in the development of this pipeline: - @heylf - @KevinMenden - @FloWuenne - @rob-p +- [GHGA](https://www.ghga.de/) ## Contributions and Support diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index b788419d..0db840ca 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,9 +1,7 @@ report_comment: > - - This report has been generated by the nf-core/scrnaseq + This report has been generated by the nf-core/scrnaseq analysis pipeline. For information about how to interpret these results, please see the - documentation. - + documentation. report_section_order: "nf-core-scrnaseq-methods-description": order: -1000 diff --git a/assets/protocols.json b/assets/protocols.json new file mode 100644 index 00000000..23ff1328 --- /dev/null +++ b/assets/protocols.json @@ -0,0 +1,90 @@ +{ + "alevin": { + "10XV1": { + "protocol": "10xv1", + "whitelist": "assets/whitelist/10x_V1_barcode_whitelist.txt.gz" + }, + "10XV2": { + "protocol": "10xv2", + "whitelist": "assets/whitelist/10x_V2_barcode_whitelist.txt.gz" + }, + "10XV3": { + "protocol": "10xv3", + "whitelist": "assets/whitelist/10x_V3_barcode_whitelist.txt.gz" + }, + "dropseq": { + "protocol": "dropseq" + } + }, + "cellranger": { + "auto": { + "protocol": "auto" + }, + "10XV1": { + "protocol": "SC3Pv1" + }, + "10XV2": { + "protocol": "SC3Pv2" + }, + "10XV3": { + "protocol": "SC3Pv3" + } + }, + "star": { + "10XV1": { + "protocol": "CB_UMI_Simple", + "extra_args": "--soloUMIlen 10", + "whitelist": "assets/whitelist/10x_V1_barcode_whitelist.txt.gz" + }, + "10XV2": { + "protocol": "CB_UMI_Simple", + "extra_args": "--soloUMIlen 10", + "whitelist": "assets/whitelist/10x_V2_barcode_whitelist.txt.gz" + }, + "10XV3": { + "protocol": "CB_UMI_Simple", + "extra_args": "--soloUMIlen 12", + "whitelist": "assets/whitelist/10x_V3_barcode_whitelist.txt.gz" + }, + "dropseq": { + "protocol": "CB_UMI_Simple" + }, + "smartseq": { + "protocol": "SmartSeq" + } + }, + "kallisto": { + "10XV1": { + "protocol": "10XV1" + }, + "10XV2": { + "protocol": "10XV2" + }, + "10XV3": { + "protocol": "10XV3" + }, + "dropseq": { + "protocol": "DROPSEQ" + }, + "smartseq": { + "protocol": "SMARTSEQ" + } + }, + "universc": { + "auto": { + "protocol": "10x" + }, + "10XV1": { + "protocol": "10x-v1" + }, + "10XV2": { + "protocol": "10x-v2" + }, + "10XV3": { + "protocol": "10x-v3" + }, + "dropseq": { + "protocol": "dropseq" + } + } +} diff --git a/assets/slackreport.json b/assets/slackreport.json index f6ba6baf..9538f235 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "nf-core/scrnaseq v${version} - ${runName}", + "author_name": "nf-core/scrnaseq ${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 47d1b446..bd713438 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -87,7 +87,8 @@ def check_samplesheet(file_in, file_out): ## Check header MIN_COLS = 2 MIN_HEADER = ["sample", "fastq_1", "fastq_2"] - OPT_HEADER = ["expected_cells", "seq_center"] + OPT_HEADER = ["expected_cells", "seq_center", "fastq_barcode", "sample_type"] + SAMPLE_TYPES = ["gex", "atac"] header = [x.strip('"') for x in fin.readline().strip().split(",")] unknown_header = 0 @@ -101,8 +102,7 @@ def check_samplesheet(file_in, file_out): min_header_count = min_header_count + 1 colmap[h] = i i = i + 1 - if min_header_count < len(MIN_HEADER): - # code was checking for unknown_header or min_header_count however looking at the ifelse, unknown_header does not seem that it should be tested + if unknown_header or min_header_count < len(MIN_HEADER): given = ",".join(header) wanted = ",".join(MIN_HEADER) print(f"ERROR: Please check samplesheet header -> {given} != {wanted}") @@ -147,7 +147,26 @@ def check_samplesheet(file_in, file_out): seq_center = seq_center.replace(" ", "_") ## Check FastQ file extension - for fastq in [fastq_1, fastq_2]: + fastq_list = [fastq_1, fastq_2] + + fastq_barcode = "" + if "fastq_barcode" in header: + fastq_barcode = lspl[colmap["fastq_barcode"]] + fastq_list.append(fastq_barcode) + + sample_type = "" + if "sample_type" in header: + sample_type = lspl[colmap["sample_type"]] + if sample_type not in SAMPLE_TYPES: + print_error( + "Sample type {} is not supported! Please specify either {}".format( + sample_type, " or ".join(SAMPLE_TYPES) + ), + "Line", + line, + ) + + for fastq in fastq_list: if fastq: if fastq.find(" ") != -1: print_error("FastQ file contains spaces!", "Line", line) @@ -161,9 +180,9 @@ def check_samplesheet(file_in, file_out): ## Auto-detect paired-end/single-end sample_info = [] ## [single_end, fastq_1, fastq_2] if sample and fastq_1 and fastq_2: ## Paired-end short reads - sample_info = ["0", fastq_1, fastq_2, expected_cells, seq_center] + sample_info = ["0", fastq_1, fastq_2, expected_cells, seq_center, fastq_barcode, sample_type] elif sample and fastq_1 and not fastq_2: ## Single-end short reads - sample_info = ["1", fastq_1, fastq_2, expected_cells, seq_center] + sample_info = ["1", fastq_1, fastq_2, expected_cells, seq_center, fastq_barcode, sample_type] else: print_error("Invalid combination of columns provided!", "Line", line) @@ -180,7 +199,21 @@ def check_samplesheet(file_in, file_out): ## Write validated samplesheet with appropriate columns if len(sample_mapping_dict) > 0: with open(file_out, "w") as fout: - fout.write(",".join(["sample", "single_end", "fastq_1", "fastq_2", "expected_cells", "seq_center"]) + "\n") + fout.write( + ",".join( + [ + "sample", + "single_end", + "fastq_1", + "fastq_2", + "expected_cells", + "seq_center", + "fastq_barcode", + "sample_type", + ] + ) + + "\n" + ) for sample in sorted(sample_mapping_dict.keys()): ## Check that multiple runs of the same sample are of the same datatype if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]): diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py index e38ca80e..43ea071a 100755 --- a/bin/concat_h5ad.py +++ b/bin/concat_h5ad.py @@ -1,4 +1,10 @@ #!/usr/bin/env python + +# Set numba chache dir to current working directory (which is a writable mount also in containers) +import os + +os.environ["NUMBA_CACHE_DIR"] = "." + import scanpy as sc, anndata as ad, pandas as pd from pathlib import Path import argparse diff --git a/bin/generate_lib_csv.py b/bin/generate_lib_csv.py new file mode 100755 index 00000000..5c1c0c4f --- /dev/null +++ b/bin/generate_lib_csv.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python +import argparse +import os + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Generate the lib.csv for cellranger-arc.") + + parser.add_argument("-t", "--sample_types", dest="sample_types", help="Comma seperated list of sample types.") + parser.add_argument("-n", "--sample_names", dest="sample_names", help="Comma seperated list of sample names.") + parser.add_argument("-f", "--fastq_folder", dest="fastq_folder", help="Folder of FASTQ files.") + parser.add_argument("-o", "--out", dest="out", help="Output path.") + + args = vars(parser.parse_args()) + + print(args) + + sample_types = args["sample_types"].split(",") + sample_names = args["sample_names"].split(",") + unique_samples_names = set(sample_names) + + lib_csv = open(args["out"], "w") + lib_csv.write("fastqs,sample,library_type") + + for i in range(0, len(sample_types)): + if sample_names[i] in unique_samples_names: + unique_samples_names.remove( + sample_names[i] + ) # this has to be done to account for different Lane files (e.g., L002) + if sample_types[i] == "gex": + lib_csv.write("\n{},{},{}".format(args["fastq_folder"], sample_names[i], "Gene Expression")) + else: + lib_csv.write("\n{},{},{}".format(args["fastq_folder"], sample_names[i], "Chromatin Accessibility")) + + lib_csv.close() + + print("Wrote lib.csv file to {}".format(args["out"])) diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py index 40e1e025..3282122d 100755 --- a/bin/mtx_to_h5ad.py +++ b/bin/mtx_to_h5ad.py @@ -1,8 +1,13 @@ #!/usr/bin/env python + +# Set numba chache dir to current working directory (which is a writable mount also in containers) +import os + +os.environ["NUMBA_CACHE_DIR"] = "." + import scanpy as sc import pandas as pd import argparse -import os from scipy import io from anndata import AnnData diff --git a/conf/base.config b/conf/base.config index 8c6f6db9..eea930a9 100644 --- a/conf/base.config +++ b/conf/base.config @@ -60,8 +60,4 @@ process { withName:CUSTOM_DUMPSOFTWAREVERSIONS { cache = false } - //Fix for issue 196 - withName: 'NFCORE_SCRNASEQ:SCRNASEQ:SCRNASEQ_ALEVIN:ALEVINQC' { - time = '20.h' - } } diff --git a/conf/modules.config b/conf/modules.config index 2fd974c5..5813926a 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -17,6 +17,10 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] + withName: FASTQC { + time = { check_max( 120.h * task.attempt, 'time' ) } + } + withName: SAMPLESHEET_CHECK { publishDir = [ path: { "${params.outdir}/pipeline_info" }, @@ -33,7 +37,7 @@ process { } withName: 'MULTIQC' { - ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, @@ -74,11 +78,39 @@ if(params.aligner == "cellranger") { ] } withName: CELLRANGER_COUNT { + publishDir = [ + path: "${params.outdir}/${params.aligner}/count", + mode: params.publish_dir_mode + ] + ext.args = {"--chemistry ${meta.chemistry} " + (meta.expected_cells ? "--expect-cells ${meta.expected_cells}" : '')} + time = { check_max( 240.h * task.attempt, 'time' ) } + } + } +} + +if(params.aligner == "cellrangerarc") { + process { + withName: CELLRANGERARC_MKGTF { + publishDir = [ + path: "${params.outdir}/${params.aligner}/mkgtf", + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = "--attribute=gene_biotype:protein_coding --attribute=gene_biotype:lncRNA --attribute=gene_biotype:pseudogene" + } + withName: CELLRANGERARC_MKREF { + publishDir = [ + path: "${params.outdir}/${params.aligner}/mkref", + mode: params.publish_dir_mode + ] + } + withName: CELLRANGERARC_COUNT { publishDir = [ path: "${params.outdir}/${params.aligner}/count", mode: params.publish_dir_mode ] ext.args = {meta.expected_cells ? "--expect-cells ${meta.expected_cells}" : ''} + time = { check_max( 240.h * task.attempt, 'time' ) } } } } @@ -108,6 +140,7 @@ if(params.aligner == "universc") { path: "${params.outdir}/universc", mode: params.publish_dir_mode ] + time = { check_max( 240.h * task.attempt, 'time' ) } } } } @@ -133,6 +166,10 @@ if (params.aligner == "alevin") { ] ext.args = "-r cr-like" } + //Fix for issue 196 + withName: 'ALEVINQC' { + time = '120.h' + } } } diff --git a/docs/output.md b/docs/output.md index c1e2b013..7e9f0cd8 100644 --- a/docs/output.md +++ b/docs/output.md @@ -17,6 +17,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [STARsolo](#starsolo) - [Salmon Alevin & AlevinQC](#salmon-alevin--alevinqc) - [Cellranger](#cellranger) + - [Cellranger ARC](#cellranger-arc) - [UniverSC](#universc) - [Other output data](#other-output-data) - [MultiQC](#multiqc) @@ -103,6 +104,14 @@ Cell Ranger is a set of analysis scripts that processes 10X Chromium single cell - Contains the mapped BAM files, filtered and unfiltered HDF5 matrices and output metrics created by Cellranger +## Cellranger ARC + +Cell Ranger ARC is a set of analysis pipelines that process Chromium Single Cell Multiome ATAC + Gene Expression sequencing data to generate a variety of analyses pertaining to gene expression (GEX), chromatin accessibility, and their linkage. Furthermore, since the ATAC and GEX measurements are on the very same cell, we are able to perform analyses that link chromatin accessibility and GEX. See [Cellranger ARC](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc) for more information on Cellranger. + +**Output directory: `results/cellrangerarc`** + +- Contains the mapped BAM files, filtered and unfiltered HDF5 matrices and output metrics created by Cellranger ARC + ## UniverSC UniverSC is a wrapper that calls an open-source implementation of Cell Ranger v3.0.2 and adjusts run parameters for compatibility with a wide ranger of technologies. diff --git a/docs/usage.md b/docs/usage.md index f90dc242..ee5d9812 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -14,7 +14,7 @@ You will need to create a samplesheet with information about the samples you wou The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: -```console +```csv title="samplesheet.csv" sample,fastq_1,fastq_2 CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz @@ -25,19 +25,6 @@ CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz There is a strict requirement for the first 3 columns to match those defined in the table below. -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. - -```console -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, -``` - | Column | Description | | ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `sample` | Required. Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | @@ -54,9 +41,9 @@ This parameter is currently supported by - [Salmon Alevin](https://salmon.readthedocs.io/en/latest/alevin.html#expectcells) - [STARsolo](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md) +- [Cellranger](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger) -In the future, support for this paramter will be added to cellranger and UniverSC. Note that since cellranger v7, -it is not recommended anymore to supply the `--expected-cells` parameter. +Note that since cellranger v7, it is **not recommended** anymore to supply the `--expected-cells` parameter. ## Aligning options @@ -71,7 +58,7 @@ Other aligner options for running the pipeline are: - [Cellranger](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger) to perform both alignment and downstream analysis. - `--aligner cellranger` - [UniverSC](https://github.com/minoda-lab/universc) to run an open-source version of Cell Ranger on any technology - - '--aligner universc' + - '--aligner universc` ### If using cellranger or universc @@ -91,12 +78,86 @@ For more details, see As a sanity check, we verify that filenames of a pair of FASTQ files only differ by `R1`/`R2`. -#### UniverSC technology configuration +### Support for different scRNA-seq protocols + +The single-cell protocol used in the experiment can be specified using the `--protocol` flag. +For cellranger, it is recommended to stick with the default value `'auto'` for automatic detection of the protocol. +For all other aligner, you need to specify the protocol manually. + +The three 10x Genomics protocols 3' v1 (`10XV1`), 3' v2 (`10XV2`) and 3' v3 (`10XV3`) are universally supported +by all aligners in the pipeline and mapped to the correct options automatically. If the protocol is unknown to the +nf-core pipeline, the value specified to `--protocol` is passed to the aligner _in verbatim_ to support additional protocols. + +Here are some hints on running the various aligners with different protocols + +#### Kallisto/bustools -UniverSC automatically updates the barcode whitelist and chemistry parameters. Use "universc_technology" to set the 'technology' parameter to configure the run. +The command `kb --list` shows all supported, preconfigured protocols. Additionally, a custom technology string such as +`0,0,16:0,16,26:1,0,0` can be speficied: + +> Additionally kallisto bus will accept a string specifying a new technology in the format of bc:umi:seq where each of bc,umi and seq are a triplet of integers separated by a comma, denoting the file index, start and stop of the sequence used. For example to specify the 10xV2 technology we would use 0,0,16:0,16,26:1,0,0 + +For more details, please refer to the [Kallisto/bustools documentation](https://pachterlab.github.io/kallisto/manual#bus). + +#### Alevin/fry + +Alevin/fry also supports custom chemistries in a slighly different format, e.g. `1{b[16]u[12]x:}2{r:}`. + +For more details, see the [simpleaf documentation](https://simpleaf.readthedocs.io/en/latest/quant-command.html#a-note-on-the-chemistry-flag) + +#### UniverSC + +See the [UniverSC GitHub page](https://github.com/minoda-lab/universc#pre-set-configurations) for all supported protocols. Currently only 3\' scRNA-Seq parameters are supported in nextflow, although chemistry parameters for 5\' scRNA-Seq and full-length scRNA-Seq libraries are supported by teh container. +### If using cellranger-arc + +#### Automatic file name detection + +This pipeline currently **does not** automatically renames input FASTQ files to follow the +[naming convention by 10x](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/fastq-input): + +``` +[Sample Name]_S1_L00[Lane Number]_[Read Type]_001.fastq.gz +``` + +Thus please make sure your files follow this naming convention. + +#### Sample sheet definition + +If you are using cellranger-arc you have to add the column _sample_type_ (atac for scATAC or gex for scRNA) and _fastq_barcode_ (part of the scATAC data) to your samplesheet as an input. + +**Beware of the following points:** + +- It is important that you give your scRNA and scATAC different [Sample Name]s. +- Check first which file is your barcode fastq file for your scATAC data ([see](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/using/using/fastq-input)). +- If you have more than one sequencing run then you have to give them another suffix (e.g., rep\*) to your [Sample Name] ([see](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/using/fastq-input#atac_quick_start)). + +An example samplesheet for a dataset called test_scARC that has two sequencing runs for the scATAC and one seqeuncing run +from two lanes for the scRNA could look like this: + +```csv +sample,fastq_1,fastq_2,fastq_barcode,sample_type +test_scARC,path/test_scARC_atac_rep1_S1_L001_R1_001.fastq.gz,path/test_scARC_atac_rep1_S1_L001_R2_001.fastq.gz,path/test_scARC_atac_rep1_S1_L001_I2_001.fastq.gz,atac +test_scARC,path/test_scARC_atac_rep2_S2_L001_R1_001.fastq.gz,path/test_scARC_atac_rep2_S2_L001_R2_001.fastq.gz,path/test_scARC_atac_rep2_S2_L001_I2_001.fastq.gz,atac +test_scARC,path/test_scARC_gex_S1_L001_R1_001.fastq.gz,path/test_scARC_gex_S1_L001_R2_001.fastq.gz,,gex +test_scARC,path/test_scARC_gex_S1_L002_R1_001.fastq.gz,path/test_scARC_gex_S1_L002_R2_001.fastq.gz,,gex +``` + +#### Config file and index + +Cellranger-arc needs a reference index directory that you can provide with `--cellranger_index`. Be aware, you can use +for cellranger-arc the same index you use for cellranger ([see](https://kb.10xgenomics.com/hc/en-us/articles/4408281606797-Are-the-references-interchangeable-between-pipelines)). +Yet, a cellranger-arc index might include additional data (e.g., TF binding motifs). Therefore, please first check if +you have to create a new cellranger-arc index ([see here](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/advanced/references) for +more information) + +If you decide to create a cellranger-arc index, then you need to create a config file to generate the index. The pipeline +can do this autmatically for you if you provide a `--fasta`, `--gtf`, and an optional `--motif` file. However, you can +also decide to provide your own config file with `--cellrangerarc_config`, then you also have to specify with `--cellrangerarc_reference` +the reference genome name that you have used and stated as _genome:_ in your config file. + ## Running the pipeline The minimum typical command for running the pipeline is as follows: diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 01b8653d..e248e4c3 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -4,6 +4,7 @@ import org.yaml.snakeyaml.Yaml import groovy.json.JsonOutput +import nextflow.extension.FilesEx class NfcoreTemplate { @@ -141,12 +142,14 @@ class NfcoreTemplate { try { if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } [ 'sendmail', '-t' ].execute() << sendmail_html log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" } catch (all) { // Catch failures and try with plaintext def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { + if ( mqc_report != null && mqc_report.size() <= max_multiqc_email_size.toBytes() ) { mail_cmd += [ '-A', mqc_report ] } mail_cmd.execute() << email_html @@ -155,14 +158,16 @@ class NfcoreTemplate { } // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") + FilesEx.copyTo(output_hf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() } // @@ -227,15 +232,14 @@ class NfcoreTemplate { // Dump pipeline parameters in a json file // public static void dump_parameters(workflow, params) { - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def output_pf = new File(output_d, "params_${timestamp}.json") + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") def jsonStr = JsonOutput.toJson(params) - output_pf.text = JsonOutput.prettyPrint(jsonStr) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${params.outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() } // diff --git a/lib/WorkflowScrnaseq.groovy b/lib/WorkflowScrnaseq.groovy index b58a89db..e4273887 100755 --- a/lib/WorkflowScrnaseq.groovy +++ b/lib/WorkflowScrnaseq.groovy @@ -4,6 +4,8 @@ import nextflow.Nextflow import groovy.text.SimpleTemplateEngine +import groovy.json.JsonSlurper + class WorkflowScrnaseq { @@ -121,90 +123,21 @@ class WorkflowScrnaseq { } } - /* - * Format the protocol - * Given the protocol paramter (params.protocol) and the aligner (params.aligner), - * this function formats the protocol such that it is fit for the respective - * subworkflow - */ - static formatProtocol(protocol, aligner) { - String new_protocol = protocol - String chemistry = '' - String other_parameters = '' - - // alevin - if (aligner == 'alevin') { - switch (protocol) { - case '10XV1': - new_protocol = '10xv1' - chemistry = 'V1' - break - case '10XV2': - new_protocol = '10xv2' - chemistry = 'V2' - break - case '10XV3': - new_protocol = '10xv3' - chemistry = 'V3' - break - // case 'dropseq': - // new_protocol = 'dropseq' - } - } - - // star - else if (aligner == 'star') { - switch (protocol) { - case '10XV1': - new_protocol = 'CB_UMI_Simple' - chemistry = 'V1' - other_parameters = '--soloUMIlen 10' - break - case '10XV2': - new_protocol = 'CB_UMI_Simple' - chemistry = 'V2' - other_parameters = '--soloUMIlen 10' - break - case '10XV3': - new_protocol = 'CB_UMI_Simple' - chemistry = 'V3' - other_parameters = '--soloUMIlen 12' - break - case 'dropseq': - new_protocol = 'CB_UMI_Simple' - break - case 'smartseq': - new_protocol = 'SmartSeq' - } - } - - // kallisto bustools - else if (aligner = 'kallisto' ) { - switch (protocol) { - case '10XV1': - new_protocol = '10XV1' - chemistry = 'V1' - break - case '10XV2': - new_protocol = '10XV2' - chemistry = 'V2' - break - case '10XV3': - new_protocol = '10XV3' - chemistry = 'V3' - break - case 'dropseq': - new_protocol = 'DROPSEQ' - break - case 'smartseq': - new_protocol = 'SMARTSEQ' - } - } - else { - exit 1, 'Aligner not recognized.' + // + // Retrieve the aligner-specific protocol based on the specified protocol. + // Returns a map ["protocol": protocol, "extra_args": , "whitelist": ] + // extra_args and whitelist are optional. + public static Map getProtocol(workflow, log, aligner, protocol) { + def jsonSlurper = new JsonSlurper() + def json = new File("${workflow.projectDir}/assets/protocols.json").text + def protocols = jsonSlurper.parseText(json) + def aligner_map = protocols[aligner] + if(aligner_map.containsKey(protocol)) { + return aligner_map[protocol] + } else { + log.warn("Protocol '${protocol}' not recognized by the pipeline. Passing on the protocol to the aligner unmodified.") + return ["protocol": protocol] } - - return [new_protocol, chemistry, other_parameters] } } diff --git a/modules.json b/modules.json index 5b4e4a3f..1119657a 100644 --- a/modules.json +++ b/modules.json @@ -7,62 +7,67 @@ "nf-core": { "cellranger/count": { "branch": "master", - "git_sha": "5df79e0383386a9e43462a6e81bf978ce0a6db09", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "cellranger/mkgtf": { "branch": "master", - "git_sha": "716ef3019b66772a817b417078edce2f7b337858", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "cellranger/mkref": { "branch": "master", - "git_sha": "716ef3019b66772a817b417078edce2f7b337858", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "05c280924b6c768d484c7c443dad5e605c4ff4b4", + "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e", "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "9a4517e720bc812e95b56d23d15a1653b6db4f53", + "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", "installed_by": ["modules"] }, "gffread": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "a2d6c3082c5c44b4155a3246daff36701ee49af8", "installed_by": ["modules"] }, "gunzip": { "branch": "master", - "git_sha": "e06548bfa36ee31869b81041879dd6b3a83b1d57", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "kallistobustools/count": { "branch": "master", - "git_sha": "de204d3c950f091336539ad74f0e47ddffe69ed4", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "kallistobustools/ref": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80", + "git_sha": "4ab13872435962dadc239979554d13709e20bf29", "installed_by": ["modules"] }, "star/genomegenerate": { "branch": "master", - "git_sha": "cc08a888069f67cab8120259bddab8032d4c0fe3", + "git_sha": "d87a6e2156c2099c09280fa70776eaf0a824817a", "installed_by": ["modules"] }, "universc": { "branch": "master", - "git_sha": "cf67a6d7d043e2bd6a3099be84c72046fc71508f", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "unzip": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] } } diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 7961e057..84d98608 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -41,11 +41,11 @@ process MTX_TO_H5AD { // // run script // - if (params.aligner == 'cellranger') + if (params.aligner in [ 'cellranger', 'cellrangerarc' ]) """ # convert file types mtx_to_h5ad.py \\ - --aligner ${params.aligner} \\ + --aligner cellranger \\ --input filtered_feature_bc_matrix.h5 \\ --sample ${meta.id} \\ --out ${meta.id}/${meta.id}_matrix.h5ad diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index 4351f4b3..73e260d2 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -19,10 +19,10 @@ process MTX_TO_SEURAT { script: def aligner = params.aligner - if (params.aligner == "cellranger") { - matrix = "matrix.mtx.gz" - barcodes = "barcodes.tsv.gz" - features = "features.tsv.gz" + if (params.aligner in [ 'cellranger', 'cellrangerarc' ]) { + matrix = "filtered_feature_bc_matrix/matrix.mtx.gz" + barcodes = "filtered_feature_bc_matrix/barcodes.tsv.gz" + features = "filtered_feature_bc_matrix/features.tsv.gz" } else if (params.aligner == "kallisto") { matrix = "*count/counts_unfiltered/*.mtx" barcodes = "*count/counts_unfiltered/*.barcodes.txt" diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf index 5e8f5c42..8e8bd519 100644 --- a/modules/local/simpleaf_index.nf +++ b/modules/local/simpleaf_index.nf @@ -27,6 +27,7 @@ process SIMPLEAF_INDEX { """ # export required var export ALEVIN_FRY_HOME=. + export NUMBA_CACHE_DIR=. # prep simpleaf simpleaf set-paths diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf index 0c879ceb..abb58404 100644 --- a/modules/local/simpleaf_quant.nf +++ b/modules/local/simpleaf_quant.nf @@ -53,6 +53,7 @@ process SIMPLEAF_QUANT { """ # export required var export ALEVIN_FRY_HOME=. + export NUMBA_CACHE_DIR=. # prep simpleaf simpleaf set-paths @@ -66,7 +67,7 @@ process SIMPLEAF_QUANT { -o ${prefix}_alevin_results \\ -m $txp2gene \\ -t $task.cpus \\ - -c $protocol \\ + -c "$protocol" \\ $expect_cells \\ $unfiltered_command \\ $args diff --git a/modules/nf-core/cellranger/count/environment.yml b/modules/nf-core/cellranger/count/environment.yml new file mode 100644 index 00000000..662f747d --- /dev/null +++ b/modules/nf-core/cellranger/count/environment.yml @@ -0,0 +1,5 @@ +name: cellranger_count +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/cellranger/count/meta.yml b/modules/nf-core/cellranger/count/meta.yml index c7d82bbc..a672180e 100644 --- a/modules/nf-core/cellranger/count/meta.yml +++ b/modules/nf-core/cellranger/count/meta.yml @@ -10,7 +10,6 @@ tools: homepage: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger documentation: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov tool_dev_url: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov - licence: 10x Genomics EULA input: - meta: @@ -47,3 +46,6 @@ output: authors: - "@ggabernet" - "@Emiller88" +maintainers: + - "@ggabernet" + - "@Emiller88" diff --git a/modules/nf-core/cellranger/count/templates/cellranger_count.py b/modules/nf-core/cellranger/count/templates/cellranger_count.py deleted file mode 100644 index 4bfb9f4f..00000000 --- a/modules/nf-core/cellranger/count/templates/cellranger_count.py +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env python3 -""" -Automatically rename staged files for input into cellranger count. - -Copyright (c) Gregor Sturm 2023 - MIT License -""" -from subprocess import run -from pathlib import Path -from textwrap import dedent -import shlex -import re - - -def chunk_iter(seq, size): - """iterate over `seq` in chunks of `size`""" - return (seq[pos : pos + size] for pos in range(0, len(seq), size)) - - -sample_id = "${meta.id}" - -# get fastqs, ordered by path. Files are staged into -# - "fastq_001/{original_name.fastq.gz}" -# - "fastq_002/{oritinal_name.fastq.gz}" -# - ... -# Since we require fastq files in the input channel to be ordered such that a R1/R2 pair -# of files follows each other, ordering will get us a sequence of [R1, R2, R1, R2, ...] -fastqs = sorted(Path(".").glob("fastq_*/*")) -assert len(fastqs) % 2 == 0 - -# target directory in which the renamed fastqs will be placed -fastq_all = Path("./fastq_all") -fastq_all.mkdir(exist_ok=True) - -# Match R1 in the filename, but only if it is followed by a non-digit or non-character -# match "file_R1.fastq.gz", "file.R1_000.fastq.gz", etc. but -# do not match "SRR12345", "file_INFIXR12", etc -filename_pattern = r"([^a-zA-Z0-9])R1([^a-zA-Z0-9])" - -for i, (r1, r2) in enumerate(chunk_iter(fastqs, 2)): - # double escapes are required because nextflow processes this python 'template' - if re.sub(filename_pattern, r"\\1R2\\2", r1.name) != r2.name: - raise AssertionError( - dedent( - f"""\ - We expect R1 and R2 of the same sample to have the same filename except for R1/R2. - This has been checked by replacing "R1" with "R2" in the first filename and comparing it to the second filename. - If you believe this check shouldn't have failed on your filenames, please report an issue on GitHub! - - Files involved: - - {r1} - - {r2} - """ - ) - ) - r1.rename(fastq_all / f"{sample_id}_S1_L{i:03d}_R1_001.fastq.gz") - r2.rename(fastq_all / f"{sample_id}_S1_L{i:03d}_R2_001.fastq.gz") - -run( - # fmt: off - [ - "cellranger", "count", - "--id", "${prefix}", - "--fastqs", str(fastq_all), - "--transcriptome", "${reference.name}", - "--localcores", "${task.cpus}", - "--localmem", "${task.memory.toGiga()}", - *shlex.split("""${args}""") - ], - # fmt: on - check=True, -) - -# Output version information -version = run( - ["cellranger", "-V"], - text=True, - check=True, - capture_output=True, -).stdout.replace("cellranger cellranger-", "") - -# alas, no `pyyaml` pre-installed in the cellranger container -with open("versions.yml", "w") as f: - f.write('"${task.process}":\\n') - f.write(f' cellranger: "{version}"\\n') diff --git a/modules/nf-core/cellranger/mkgtf/environment.yml b/modules/nf-core/cellranger/mkgtf/environment.yml new file mode 100644 index 00000000..c81ef3e4 --- /dev/null +++ b/modules/nf-core/cellranger/mkgtf/environment.yml @@ -0,0 +1,5 @@ +name: cellranger_mkgtf +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/cellranger/mkgtf/meta.yml b/modules/nf-core/cellranger/mkgtf/meta.yml index e226e42d..7ec0e0a3 100644 --- a/modules/nf-core/cellranger/mkgtf/meta.yml +++ b/modules/nf-core/cellranger/mkgtf/meta.yml @@ -10,7 +10,6 @@ tools: homepage: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger documentation: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov tool_dev_url: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov - licence: 10x Genomics EULA input: - gtf: @@ -29,3 +28,6 @@ output: authors: - "@ggabernet" - "@Emiller88" +maintainers: + - "@ggabernet" + - "@Emiller88" diff --git a/modules/nf-core/cellranger/mkref/environment.yml b/modules/nf-core/cellranger/mkref/environment.yml new file mode 100644 index 00000000..9ca3e88c --- /dev/null +++ b/modules/nf-core/cellranger/mkref/environment.yml @@ -0,0 +1,5 @@ +name: cellranger_mkref +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/cellranger/mkref/meta.yml b/modules/nf-core/cellranger/mkref/meta.yml index 1ad5d6e3..4cd9091c 100644 --- a/modules/nf-core/cellranger/mkref/meta.yml +++ b/modules/nf-core/cellranger/mkref/meta.yml @@ -10,7 +10,6 @@ tools: homepage: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger documentation: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov tool_dev_url: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov - licence: 10x Genomics EULA input: - fasta: @@ -35,3 +34,5 @@ output: pattern: "versions.yml" authors: - "@ggabernet" +maintainers: + - "@ggabernet" diff --git a/modules/nf-core/cellrangerarc/Dockerfile b/modules/nf-core/cellrangerarc/Dockerfile new file mode 100644 index 00000000..812b64ba --- /dev/null +++ b/modules/nf-core/cellrangerarc/Dockerfile @@ -0,0 +1,28 @@ +# Dockerfile to create container with Cell Ranger v2.0.2 +# Push to quay.io/nf-core/cellranger-arc: + +FROM continuumio/miniconda3:4.8.2 +LABEL authors="Gisela Gabernet , Florian Heyl" \ + description="Docker image containing Cell Ranger Arc" +# Disclaimer: this container is not provided nor supported by Illumina or 10x Genomics. + +# Install procps and clean apt cache +RUN apt-get update --allow-releaseinfo-change \ + && apt-get install -y \ + cpio \ + procps \ + rpm2cpio \ + unzip \ + && apt-get clean -y && rm -rf /var/lib/apt/lists/* + +# Copy pre-downloaded cellranger-arc file +ENV CELLRANGER_ARC_VER=2.0.2 +COPY cellranger-arc-$CELLRANGER_ARC_VER.tar.gz /opt/cellranger-arc-$CELLRANGER_ARC_VER.tar.gz + +# Install cellranger-arc +RUN \ + cd /opt && \ + tar -xzvf cellranger-arc-$CELLRANGER_ARC_VER.tar.gz && \ + export PATH=/opt/cellranger-arc-$CELLRANGER_ARC_VER:$PATH && \ + ln -s /opt/cellranger-arc-$CELLRANGER_ARC_VER/cellranger-arc /usr/bin/cellranger-arc && \ + rm -rf /opt/cellranger-arc-$CELLRANGER_ARC_VER.tar.gz diff --git a/modules/nf-core/cellrangerarc/README.md b/modules/nf-core/cellrangerarc/README.md new file mode 100644 index 00000000..6089d994 --- /dev/null +++ b/modules/nf-core/cellrangerarc/README.md @@ -0,0 +1,23 @@ +# Updating the docker container and making a new module release + +Cell Ranger Arc is a commercial tool from 10X Genomics. The container provided for the cellranger-arc nf-core module is not provided nor supported by 10x Genomics. Updating the Cell Ranger Arc versions in the container and pushing the update to Dockerhub needs to be done manually. + +1. Navigate to the appropriate download page. - [Cell Ranger Arc](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/installation): download the tar ball of the desired Cell Ranger Arc version with `curl` or `wget`. Place this file in the same folder where the Dockerfile lies. + +2. Edit the Dockerfile. Update the Cell Ranger Arc versions in this line: + +```bash +ENV CELLRANGER_ARC_VER= +``` + +3. Create and test the container: + +```bash +docker build . -t quay.io/nf-core/cellranger-arc: +``` + +4. Access rights are needed to push the container to the Dockerhub nfcore organization, please ask a core team member to do so. + +```bash +docker push quay.io/nf-core/cellranger-arc: +``` diff --git a/modules/nf-core/cellrangerarc/count/main.nf b/modules/nf-core/cellrangerarc/count/main.nf new file mode 100644 index 00000000..2bf0193a --- /dev/null +++ b/modules/nf-core/cellrangerarc/count/main.nf @@ -0,0 +1,84 @@ +process CELLRANGERARC_COUNT { + tag "$meta.id" + label 'process_high' + + container "nf-core/cellranger-arc:2.0.2" + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + exit 1, "CELLRANGERARC_COUNT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + input: + tuple val(meta), val(sample_type), val(sub_sample), path(reads, stageAs: "fastqs/*") + path reference + + output: + tuple val(meta), path("${meta.id}/outs/*"), emit: outs + path("${meta.id}_lib.csv") , emit: lib + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def reference_name = reference.name + def sample_types = sample_type.join(",") + def sample_names = sub_sample.join(",") + def lib_csv = meta.id + "_lib.csv" + + """ + fastq_folder=\$(readlink -f fastqs) + + python3 < versions.yml + "${task.process}": + cellrangerarc: \$(echo \$( cellranger-arc --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) + END_VERSIONS + """ + + stub: + """ + mkdir -p "${meta.id}/outs/" + touch ${meta.id}/outs/fake_file.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellrangerarc: \$(echo \$( cellranger-arc --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/cellrangerarc/count/meta.yml b/modules/nf-core/cellrangerarc/count/meta.yml new file mode 100644 index 00000000..919de4dc --- /dev/null +++ b/modules/nf-core/cellrangerarc/count/meta.yml @@ -0,0 +1,40 @@ +name: cellrangerarc_count +description: Module to use Cell Ranger's ARC pipelines analyze sequencing data produced from Chromium Single Cell ARC. Uses the cellranger-arc count command. +keywords: + - align + - count + - reference +tools: + - cellrangerarc: + description: Cell Ranger ARC is a set of analysis pipelines that process Chromium Single Cell ARC data. + homepage: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + documentation: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + tool_dev_url: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + licence: + - 10x Genomics EULA +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - lib_csv: + type: file + description: | + Path to a 3-column CSV file declaring FASTQ paths, sample names and library types of input ATAC and GEX FASTQs. + - reference: + type: directory + description: Directory containing all the reference indices needed by Cell Ranger ARC +output: + - outs: + type: file + description: Files containing the outputs of Cell Ranger ARC + pattern: "${meta.id}/outs/*" + - versions: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@ggabernet" + - "@Emiller88" + - "@heylf" diff --git a/modules/nf-core/cellrangerarc/mkgtf/main.nf b/modules/nf-core/cellrangerarc/mkgtf/main.nf new file mode 100644 index 00000000..f304c6bc --- /dev/null +++ b/modules/nf-core/cellrangerarc/mkgtf/main.nf @@ -0,0 +1,36 @@ +process CELLRANGERARC_MKGTF { + tag "$gtf" + label 'process_low' + + container "nf-core/cellranger-arc:2.0.2" + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + exit 1, "CELLRANGERARC_COUNT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + input: + path gtf + + output: + path "*.filtered.gtf", emit: gtf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + cellranger-arc \\ + mkgtf \\ + $gtf \\ + ${gtf.baseName}.filtered.gtf \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellrangerarc: \$(echo \$( cellranger-arc --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/cellrangerarc/mkgtf/meta.yml b/modules/nf-core/cellrangerarc/mkgtf/meta.yml new file mode 100644 index 00000000..923c3e18 --- /dev/null +++ b/modules/nf-core/cellrangerarc/mkgtf/meta.yml @@ -0,0 +1,32 @@ +name: cellrangerarc_mkgtf +description: Module to build a filtered gtf needed by the 10x Genomics Cell Ranger Arc tool. Uses the cellranger-arc mkgtf command. +keywords: + - reference + - mkref + - index +tools: + - cellrangerarc: + description: Cell Ranger Arc by 10x Genomics is a set of analysis pipelines that process Chromium single-cell data to align reads, generate feature-barcode matrices, perform clustering and other secondary analysis, and more. + homepage: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + documentation: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + tool_dev_url: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + licence: + - 10x Genomics EULA +input: + - gtf: + type: file + description: The reference GTF transcriptome file + pattern: "*.gtf" +output: + - gtf: + type: directory + description: The filtered GTF transcriptome file + pattern: "*.filtered.gtf" + - versions: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@ggabernet" + - "@Emiller88" + - "@heylf" diff --git a/modules/nf-core/cellrangerarc/mkref/main.nf b/modules/nf-core/cellrangerarc/mkref/main.nf new file mode 100644 index 00000000..079776ba --- /dev/null +++ b/modules/nf-core/cellrangerarc/mkref/main.nf @@ -0,0 +1,83 @@ +process CELLRANGERARC_MKREF { + tag "$reference_name" + label 'process_medium' + + container "nf-core/cellranger-arc:2.0.2" + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + exit 1, "CELLRANGERARC_COUNT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + input: + path fasta + path gtf + path motifs + path reference_config + val reference_name + + output: + path "${reference_name}", emit: reference + path "config" , emit: config + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def fast_name = fasta.name + def gtf_name = gtf.name + def motifs_name = motifs.name + def reference_config = reference_config.name + def args = task.ext.args ?: '' + + if ( !reference_name ){ + reference_name = "cellrangerarc_reference" + } + + """ + + python3 < versions.yml + "${task.process}": + cellrangerarc: \$(echo \$( cellranger-arc --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/cellrangerarc/mkref/meta.yml b/modules/nf-core/cellrangerarc/mkref/meta.yml new file mode 100644 index 00000000..cf98e60c --- /dev/null +++ b/modules/nf-core/cellrangerarc/mkref/meta.yml @@ -0,0 +1,46 @@ +name: cellrangerarc_mkref +description: Module to build the reference needed by the 10x Genomics Cell Ranger Arc tool. Uses the cellranger-arc mkref command. +keywords: + - reference + - mkref + - index +tools: + - cellrangerarc: + description: Cell Ranger Arc is a set of analysis pipelines that process Chromium Single Cell Arc data. + homepage: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + documentation: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + tool_dev_url: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + licence: + - 10x Genomics EULA +input: + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fasta,fa}" + - gtf: + type: file + description: Reference transcriptome GTF file + pattern: "*.gtf" + - motifs: + type: file + description: Sequence motif file (e.g., from transcription factors) + pattern: "*.txt" + - reference_config: + type: file + description: JSON-like file holding organism, genome, reference fasta path, reference annotation gtf path, contigs that should be excluded and sequence format motif file path + pattern: config + - reference_name: + type: string + description: The name to give the new reference folder + pattern: str +output: + - reference: + type: directory + description: Folder called like the reference_name containing all the reference indices needed by Cell Ranger Arc + - versions: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@ggabernet" + - "@heylf" diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml new file mode 100644 index 00000000..f0c63f69 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -0,0 +1,7 @@ +name: custom_dumpsoftwareversions +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.17 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index c9d014b1..7685b33c 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.15" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : - 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' : + 'biocontainers/multiqc:1.17--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml index c32657de..5f15a5fd 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: custom_dumpsoftwareversions description: Custom module used to dump software versions within the nf-core pipeline template keywords: @@ -16,7 +16,6 @@ input: type: file description: YML file containing software versions pattern: "*.yml" - output: - yml: type: file @@ -30,7 +29,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@drpatelh" - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test new file mode 100644 index 00000000..eec1db10 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -0,0 +1,38 @@ +nextflow_process { + + name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" + script "../main.nf" + process "CUSTOM_DUMPSOFTWAREVERSIONS" + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "dumpsoftwareversions" + tag "custom/dumpsoftwareversions" + + test("Should run without failures") { + when { + process { + """ + def tool1_version = ''' + TOOL1: + tool1: 0.11.9 + '''.stripIndent() + + def tool2_version = ''' + TOOL2: + tool2: 1.9 + '''.stripIndent() + + input[0] = Channel.of(tool1_version, tool2_version).collectFile() + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap new file mode 100644 index 00000000..4274ed57 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -0,0 +1,27 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" + ], + "1": [ + "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" + ], + "2": [ + "versions.yml:md5,3843ac526e762117eedf8825b40683df" + ], + "mqc_yml": [ + "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" + ], + "versions": [ + "versions.yml:md5,3843ac526e762117eedf8825b40683df" + ], + "yml": [ + "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" + ] + } + ], + "timestamp": "2023-11-03T14:43:22.157011" + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml new file mode 100644 index 00000000..405aa24a --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml @@ -0,0 +1,2 @@ +custom/dumpsoftwareversions: + - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml new file mode 100644 index 00000000..1787b38a --- /dev/null +++ b/modules/nf-core/fastqc/environment.yml @@ -0,0 +1,7 @@ +name: fastqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 249f9064..9e19a74c 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -2,10 +2,10 @@ process FASTQC { tag "$meta.id" label 'process_medium' - conda "bioconda::fastqc=0.11.9" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'biocontainers/fastqc:0.11.9--0' }" + 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : + 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" input: tuple val(meta), path(reads) @@ -37,7 +37,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ @@ -49,7 +49,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ } diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index 4da5bb5a..ee5507e0 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -50,3 +50,8 @@ authors: - "@grst" - "@ewels" - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test index 3961de60..b9e8f926 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -1,13 +1,18 @@ nextflow_process { name "Test Process FASTQC" - script "modules/nf-core/fastqc/main.nf" + script "../main.nf" process "FASTQC" + tag "modules" + tag "modules_nfcore" tag "fastqc" test("Single-Read") { when { + params { + outdir = "$outputDir" + } process { """ input[0] = [ @@ -21,12 +26,84 @@ nextflow_process { } then { - assert process.success - assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" - assert path(process.out.html.get(0).get(1)).getText().contains("File typeConventional base calls") - assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" + assertAll ( + { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
Mon 2 Oct 2023
test.gz
+ // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" }, + { assert path(process.out.html.get(0).get(1)).getText().contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match("versions") }, + { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" } + ) } - } +// TODO +// // +// // Test with paired-end data +// // +// workflow test_fastqc_paired_end { +// input = [ +// [id: 'test', single_end: false], // meta map +// [ +// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) +// ] +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with interleaved data +// // +// workflow test_fastqc_interleaved { +// input = [ +// [id: 'test', single_end: false], // meta map +// file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with bam data +// // +// workflow test_fastqc_bam { +// input = [ +// [id: 'test', single_end: false], // meta map +// file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with multiple samples +// // +// workflow test_fastqc_multiple { +// input = [ +// [id: 'test', single_end: false], // meta map +// [ +// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) +// ] +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with custom prefix +// // +// workflow test_fastqc_custom_prefix { +// input = [ +// [ id:'mysample', single_end:true ], // meta map +// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) +// ] +// FASTQC ( input ) +// } } diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap new file mode 100644 index 00000000..636a32ce --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -0,0 +1,10 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "timestamp": "2023-10-09T23:40:54+0000" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml new file mode 100644 index 00000000..7834294b --- /dev/null +++ b/modules/nf-core/fastqc/tests/tags.yml @@ -0,0 +1,2 @@ +fastqc: + - modules/nf-core/fastqc/** diff --git a/modules/nf-core/gffread/environment.yml b/modules/nf-core/gffread/environment.yml new file mode 100644 index 00000000..5398f71c --- /dev/null +++ b/modules/nf-core/gffread/environment.yml @@ -0,0 +1,7 @@ +name: gffread +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gffread=0.12.1 diff --git a/modules/nf-core/gffread/main.nf b/modules/nf-core/gffread/main.nf index f4472b0e..d8a473e0 100644 --- a/modules/nf-core/gffread/main.nf +++ b/modules/nf-core/gffread/main.nf @@ -2,7 +2,7 @@ process GFFREAD { tag "$gff" label 'process_low' - conda "bioconda::gffread=0.12.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gffread:0.12.1--h8b12597_0' : 'biocontainers/gffread:0.12.1--h8b12597_0' }" @@ -11,20 +11,22 @@ process GFFREAD { path gff output: - path "*.gtf" , emit: gtf + path "*.gtf" , emit: gtf , optional: true + path "*.gff3" , emit: gffread_gff , optional: true path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${gff.baseName}" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${gff.baseName}" + def extension = args.contains("-T") ? 'gtf' : 'gffread.gff3' """ gffread \\ $gff \\ $args \\ - -o ${prefix}.gtf + -o ${prefix}.${extension} cat <<-END_VERSIONS > versions.yml "${task.process}": gffread: \$(gffread --version 2>&1) diff --git a/modules/nf-core/gffread/meta.yml b/modules/nf-core/gffread/meta.yml index 20335747..27ac3105 100644 --- a/modules/nf-core/gffread/meta.yml +++ b/modules/nf-core/gffread/meta.yml @@ -12,22 +12,25 @@ tools: tool_dev_url: https://github.com/gpertea/gffread doi: 10.12688/f1000research.23297.1 licence: ["MIT"] - input: - gff: type: file description: A reference file in either the GFF3, GFF2 or GTF format. pattern: "*.{gff, gtf}" - output: - gtf: type: file - description: GTF file resulting from the conversion of the GFF input file + description: GTF file resulting from the conversion of the GFF input file if '-T' argument is present pattern: "*.{gtf}" + - gffread_gff: + type: file + description: GFF3 file resulting from the conversion of the GFF input file if '-T' argument is absent + pattern: "*.{gff3}" - versions: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@emiller88" +maintainers: + - "@emiller88" diff --git a/modules/nf-core/gffread/tests/main.nf.test b/modules/nf-core/gffread/tests/main.nf.test new file mode 100644 index 00000000..3c064b31 --- /dev/null +++ b/modules/nf-core/gffread/tests/main.nf.test @@ -0,0 +1,61 @@ +nextflow_process { + + name "Test Process GFFREAD" + script "../main.nf" + process "GFFREAD" + + tag "gffread" + tag "modules_nfcore" + tag "modules" + + test("sarscov2-gff3-gtf") { + + config "./nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf != null }, + { assert process.out.gffread_gff == [] } + ) + } + + } + + test("sarscov2-gff3-gff3") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_gff != null }, + ) + } + + } + +} diff --git a/modules/nf-core/gffread/tests/main.nf.test.snap b/modules/nf-core/gffread/tests/main.nf.test.snap new file mode 100644 index 00000000..1f1342e1 --- /dev/null +++ b/modules/nf-core/gffread/tests/main.nf.test.snap @@ -0,0 +1,52 @@ +{ + "sarscov2-gff3-gtf": { + "content": [ + { + "0": [ + "genome.gtf:md5,2394072d7d31530dfd590c4a117bf6e3" + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" + ], + "gffread_gff": [ + + ], + "gtf": [ + "genome.gtf:md5,2394072d7d31530dfd590c4a117bf6e3" + ], + "versions": [ + "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" + ] + } + ], + "timestamp": "2023-11-29T15:39:30.006985" + }, + "sarscov2-gff3-gff3": { + "content": [ + { + "0": [ + + ], + "1": [ + "genome.gffread.gff3:md5,a7d40d99dcddac23ac673c473279ea2d" + ], + "2": [ + "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" + ], + "gffread_gff": [ + "genome.gffread.gff3:md5,a7d40d99dcddac23ac673c473279ea2d" + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" + ] + } + ], + "timestamp": "2023-11-29T15:39:34.636061" + } +} \ No newline at end of file diff --git a/modules/nf-core/gffread/tests/nextflow.config b/modules/nf-core/gffread/tests/nextflow.config new file mode 100644 index 00000000..74b25094 --- /dev/null +++ b/modules/nf-core/gffread/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GFFREAD { + ext.args = '-T' + } +} diff --git a/modules/nf-core/gffread/tests/tags.yml b/modules/nf-core/gffread/tests/tags.yml new file mode 100644 index 00000000..05576065 --- /dev/null +++ b/modules/nf-core/gffread/tests/tags.yml @@ -0,0 +1,2 @@ +gffread: + - modules/nf-core/gffread/** diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml new file mode 100644 index 00000000..25910b34 --- /dev/null +++ b/modules/nf-core/gunzip/environment.yml @@ -0,0 +1,7 @@ +name: gunzip +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::sed=4.7 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf index 73bf08cd..468a6f28 100644 --- a/modules/nf-core/gunzip/main.nf +++ b/modules/nf-core/gunzip/main.nf @@ -2,7 +2,7 @@ process GUNZIP { tag "$archive" label 'process_single' - conda "conda-forge::sed=4.7" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 'nf-core/ubuntu:20.04' }" diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml index 4cdcdf4c..231034f2 100644 --- a/modules/nf-core/gunzip/meta.yml +++ b/modules/nf-core/gunzip/meta.yml @@ -33,3 +33,7 @@ authors: - "@joseespinosa" - "@drpatelh" - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test new file mode 100644 index 00000000..d0317922 --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process GUNZIP" + script "../main.nf" + process "GUNZIP" + tag "gunzip" + tag "modules_nfcore" + tag "modules" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [], + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap new file mode 100644 index 00000000..720fd9ff --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "timestamp": "2023-10-17T15:35:37.690477896" + } +} \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/tags.yml b/modules/nf-core/gunzip/tests/tags.yml new file mode 100644 index 00000000..fd3f6915 --- /dev/null +++ b/modules/nf-core/gunzip/tests/tags.yml @@ -0,0 +1,2 @@ +gunzip: + - modules/nf-core/gunzip/** diff --git a/modules/nf-core/kallistobustools/count/environment.yml b/modules/nf-core/kallistobustools/count/environment.yml new file mode 100644 index 00000000..7ff8a2da --- /dev/null +++ b/modules/nf-core/kallistobustools/count/environment.yml @@ -0,0 +1,7 @@ +name: kallistobustools_count +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::kb-python=0.27.2 diff --git a/modules/nf-core/kallistobustools/count/main.nf b/modules/nf-core/kallistobustools/count/main.nf index b7942fc2..036bb35d 100644 --- a/modules/nf-core/kallistobustools/count/main.nf +++ b/modules/nf-core/kallistobustools/count/main.nf @@ -2,7 +2,7 @@ process KALLISTOBUSTOOLS_COUNT { tag "$meta.id" label 'process_medium' - conda "bioconda::kb-python=0.27.2" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/kb-python:0.27.2--pyhdfd78af_0' : 'biocontainers/kb-python:0.27.2--pyhdfd78af_0' }" diff --git a/modules/nf-core/kallistobustools/count/meta.yml b/modules/nf-core/kallistobustools/count/meta.yml index f25b7bc4..7491248c 100644 --- a/modules/nf-core/kallistobustools/count/meta.yml +++ b/modules/nf-core/kallistobustools/count/meta.yml @@ -13,7 +13,6 @@ tools: documentation: https://kb-python.readthedocs.io/en/latest/index.html tool_dev_url: https://github.com/pachterlab/kb_python licence: MIT License - input: - meta: type: map @@ -49,7 +48,6 @@ input: type: string description: String value defining the sequencing technology used. pattern: "{10XV1,10XV2,10XV3,CELSEQ,CELSEQ2,DROPSEQ,INDROPSV1,INDROPSV2,INDROPSV3,SCRUBSEQ,SURECELL,SMARTSEQ}" - output: - meta: type: map @@ -64,6 +62,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@flowuenne" +maintainers: + - "@flowuenne" diff --git a/modules/nf-core/kallistobustools/ref/environment.yml b/modules/nf-core/kallistobustools/ref/environment.yml new file mode 100644 index 00000000..acbd0e0a --- /dev/null +++ b/modules/nf-core/kallistobustools/ref/environment.yml @@ -0,0 +1,8 @@ +name: kallistobustools_ref +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::kb-python=0.27.2 + - conda-forge::requests>=2.23.0 diff --git a/modules/nf-core/kallistobustools/ref/main.nf b/modules/nf-core/kallistobustools/ref/main.nf index 9d7f1741..68d72ca9 100644 --- a/modules/nf-core/kallistobustools/ref/main.nf +++ b/modules/nf-core/kallistobustools/ref/main.nf @@ -2,7 +2,7 @@ process KALLISTOBUSTOOLS_REF { tag "$fasta" label 'process_medium' - conda "bioconda::kb-python=0.27.2" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/kb-python:0.27.2--pyhdfd78af_0' : 'biocontainers/kb-python:0.27.2--pyhdfd78af_0' }" diff --git a/modules/nf-core/kallistobustools/ref/meta.yml b/modules/nf-core/kallistobustools/ref/meta.yml index aca61082..00be5143 100644 --- a/modules/nf-core/kallistobustools/ref/meta.yml +++ b/modules/nf-core/kallistobustools/ref/meta.yml @@ -1,7 +1,11 @@ name: kallistobustools_ref description: index creation for kb count quantification of single-cell data. keywords: - - kallisto-bustools + - scRNA-seq + - count + - single-cell + - kallisto + - bustools - index tools: - kb: @@ -11,7 +15,6 @@ tools: tool_dev_url: https://github.com/pachterlab/kb_python doi: "10.22002/D1.1876" licence: MIT License - input: - fasta: type: file @@ -22,10 +25,9 @@ input: description: Genomic gtf file pattern: "*.{gtf,gtf.gz}" - workflow_mode: - type: value + type: string description: String value defining workflow to use, can be one of "standard", "lamanno", "nucleus" pattern: "{standard,lamanno,nucleus}" - output: - versions: type: file @@ -55,6 +57,7 @@ output: type: file description: intron transcript to capture file pattern: "*intron_t2c.{txt}" - authors: - "@flowuenne" +maintainers: + - "@flowuenne" diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 00000000..bc0bdb5b --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,7 @@ +name: multiqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.18 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 65d7dd0d..00cc48d2 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.15" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : - 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.18--pyhdfd78af_0' : + 'biocontainers/multiqc:1.18--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -25,12 +25,14 @@ process MULTIQC { def args = task.ext.args ?: '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' """ multiqc \\ --force \\ $args \\ $config \\ $extra_config \\ + $logo \\ . cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index f93b5ee5..f1aa660e 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,5 +1,5 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: MultiQC +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: - QC @@ -13,7 +13,6 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] - input: - multiqc_files: type: file @@ -31,7 +30,6 @@ input: type: file description: Optional logo file for MultiQC pattern: "*.{png}" - output: - report: type: file @@ -54,3 +52,8 @@ authors: - "@bunop" - "@drpatelh" - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 00000000..c2dad217 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,63 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + test("MULTIQC: FASTQC") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.report.get(0)).exists() }, + { assert path(process.out.data.get(0)).exists() }, + { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + ) + } + + } + + test("MULTIQC: FASTQC and a config file") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.report.get(0)).exists() }, + { assert path(process.out.data.get(0)).exists() }, + { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 00000000..bea6c0d3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/modules/nf-core/star/genomegenerate/environment.yml b/modules/nf-core/star/genomegenerate/environment.yml new file mode 100644 index 00000000..93e4476a --- /dev/null +++ b/modules/nf-core/star/genomegenerate/environment.yml @@ -0,0 +1,11 @@ +name: star_genomegenerate + +channels: + - conda-forge + - bioconda + - defaults + +dependencies: + - bioconda::samtools=1.18 + - bioconda::star=2.7.10a + - conda-forge::gawk=5.1.0 diff --git a/modules/nf-core/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf index 43424042..b8855715 100644 --- a/modules/nf-core/star/genomegenerate/main.nf +++ b/modules/nf-core/star/genomegenerate/main.nf @@ -2,10 +2,10 @@ process STAR_GENOMEGENERATE { tag "$fasta" label 'process_high' - conda "bioconda::star=2.7.10a bioconda::samtools=1.16.1 conda-forge::gawk=5.1.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' : - 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' : + 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' }" input: tuple val(meta), path(fasta) @@ -19,9 +19,10 @@ process STAR_GENOMEGENERATE { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def args_list = args.tokenize() - def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + def args = task.ext.args ?: '' + def args_list = args.tokenize() + def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + def include_gtf = gtf ? "--sjdbGTFfile $gtf" : '' if (args_list.contains('--genomeSAindexNbases')) { """ mkdir star @@ -29,7 +30,7 @@ process STAR_GENOMEGENERATE { --runMode genomeGenerate \\ --genomeDir star/ \\ --genomeFastaFiles $fasta \\ - --sjdbGTFfile $gtf \\ + $include_gtf \\ --runThreadN $task.cpus \\ $memory \\ $args @@ -51,7 +52,7 @@ process STAR_GENOMEGENERATE { --runMode genomeGenerate \\ --genomeDir star/ \\ --genomeFastaFiles $fasta \\ - --sjdbGTFfile $gtf \\ + $include_gtf \\ --runThreadN $task.cpus \\ --genomeSAindexNbases \$NUM_BASES \\ $memory \\ @@ -67,30 +68,52 @@ process STAR_GENOMEGENERATE { } stub: - """ - mkdir star - touch star/Genome - touch star/Log.out - touch star/SA - touch star/SAindex - touch star/chrLength.txt - touch star/chrName.txt - touch star/chrNameLength.txt - touch star/chrStart.txt - touch star/exonGeTrInfo.tab - touch star/exonInfo.tab - touch star/geneInfo.tab - touch star/genomeParameters.txt - touch star/sjdbInfo.txt - touch star/sjdbList.fromGTF.out.tab - touch star/sjdbList.out.tab - touch star/transcriptInfo.tab + if (gtf) { + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/exonGeTrInfo.tab + touch star/exonInfo.tab + touch star/geneInfo.tab + touch star/genomeParameters.txt + touch star/sjdbInfo.txt + touch star/sjdbList.fromGTF.out.tab + touch star/sjdbList.out.tab + touch star/transcriptInfo.tab - cat <<-END_VERSIONS > versions.yml - "${task.process}": - star: \$(STAR --version | sed -e "s/STAR_//g") - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') - END_VERSIONS - """ + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } else { + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/genomeParameters.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } } diff --git a/modules/nf-core/star/genomegenerate/meta.yml b/modules/nf-core/star/genomegenerate/meta.yml index eba2d9cf..1061e1b8 100644 --- a/modules/nf-core/star/genomegenerate/meta.yml +++ b/modules/nf-core/star/genomegenerate/meta.yml @@ -31,7 +31,6 @@ input: - gtf: type: file description: GTF file of the reference genome - output: - meta: type: map @@ -46,7 +45,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@kevinmenden" - "@drpatelh" +maintainers: + - "@kevinmenden" + - "@drpatelh" diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test b/modules/nf-core/star/genomegenerate/tests/main.nf.test new file mode 100644 index 00000000..af0c9421 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test @@ -0,0 +1,117 @@ +nextflow_process { + + name "Test Process STAR_GENOMEGENERATE" + script "../main.nf" + process "STAR_GENOMEGENERATE" + tag "modules" + tag "modules_nfcore" + tag "star" + tag "star/genomegenerate" + + test("homo_sapiens") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_with_gtf") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("homo_sapiens-stub") { + + options '-stub' + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_with_gtf") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("homo_sapiens-without_gtf") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ [], [] ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_without_gtf") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("homo_sapiens-without_gtf-stub") { + + options '-stub' + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ]) + input[1] = Channel.of([ [], [] ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_without_gtf") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap new file mode 100644 index 00000000..9de08c74 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap @@ -0,0 +1,22 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ] + ], + "timestamp": "2023-12-19T11:05:51.741109" + }, + "index_with_gtf": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]" + ], + "timestamp": "2023-12-19T11:38:14.551548" + }, + "index_without_gtf": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]" + ], + "timestamp": "2023-12-19T11:38:22.382905" + } +} \ No newline at end of file diff --git a/modules/nf-core/star/genomegenerate/tests/tags.yml b/modules/nf-core/star/genomegenerate/tests/tags.yml new file mode 100644 index 00000000..79f619bf --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/tags.yml @@ -0,0 +1,2 @@ +star/genomegenerate: + - modules/nf-core/star/genomegenerate/** diff --git a/modules/nf-core/universc/environment.yml b/modules/nf-core/universc/environment.yml new file mode 100644 index 00000000..e9cdf650 --- /dev/null +++ b/modules/nf-core/universc/environment.yml @@ -0,0 +1,5 @@ +name: universc +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/universc/meta.yml b/modules/nf-core/universc/meta.yml index 681bb849..92a46bc6 100644 --- a/modules/nf-core/universc/meta.yml +++ b/modules/nf-core/universc/meta.yml @@ -15,7 +15,6 @@ tools: tool_dev_url: "https://github.com/minoda-lab/universc" doi: "10.1101/2021.01.19.427209" licence: ["GPL-3.0-or-later"] - input: - meta: type: map @@ -26,7 +25,6 @@ input: type: file description: FASTQ or FASTQ.GZ file, list of 2 files for paired-end data pattern: "*.{fastq,fq,fastq.gz,fq.gz}" - output: - outs: type: file @@ -36,7 +34,9 @@ output: type: file description: File containing software version pattern: "versions.yml" - authors: - "@kbattenb" - "@tomkellygenetics" +maintainers: + - "@kbattenb" + - "@tomkellygenetics" diff --git a/nextflow.config b/nextflow.config index ee3ba62d..6155b7c4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,7 +14,7 @@ params { outdir = null input = null save_reference = false - protocol = '10XV3' + protocol = 'auto' // reference files genome = null @@ -40,16 +40,20 @@ params { // Cellranger parameters cellranger_index = null + // Cellranger ARC parameters + motifs = null + cellrangerarc_config = null + cellrangerarc_reference = null + // UniverSC paramaters universc_index = null - universc_technology = '10x' // Template Boilerplate options skip_multiqc = false // References genome = null - igenomes_base = 's3://ngi-igenomes/igenomes' + igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false // QC and MultiQC options @@ -116,6 +120,7 @@ profiles { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' cleanup = false + nextflow.enable.configProcessNamesValidation = true } conda { conda.enabled = true @@ -138,16 +143,16 @@ profiles { } docker { docker.enabled = true - docker.userEmulation = true conda.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { singularity.enabled = true @@ -203,6 +208,7 @@ profiles { } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } + test_multiome { includeConfig 'conf/test_multiome.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile @@ -215,7 +221,7 @@ singularity.registry = 'quay.io' // Nextflow plugins plugins { - id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet } // Load igenomes.config if required @@ -238,6 +244,9 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true @@ -263,7 +272,7 @@ manifest { description = """Pipeline for processing 10x Genomics single cell rnaseq data""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.4.1' + version = '2.5.0' doi = '10.5281/zenodo.3568187' } diff --git a/nextflow_schema.json b/nextflow_schema.json index c2642a1b..34af4c64 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -58,14 +58,14 @@ "default": "alevin", "help_text": "The workflow can handle three types of methods:\n\n- Kallisto/Bustools\n- Salmon Alevin + AlevinQC\n- STARsolo\n\nTo choose which one to use, please specify either `alevin`, `star` or `kallisto` as a parameter option for `--aligner`. By default, the pipeline runs the `alevin` option. Note that specifying another aligner option also requires choosing appropriate parameters (see below) for the selected option.", "fa_icon": "fas fa-align-center", - "enum": ["kallisto", "star", "alevin", "cellranger", "universc"] + "enum": ["kallisto", "star", "alevin", "cellranger", "cellrangerarc", "universc"] }, "protocol": { "type": "string", - "description": "The protocol that was used to generate the single cell data, e.g. 10XV2 (default).", - "default": "10XV2", - "fa_icon": "fas fa-cogs", - "enum": ["10XV3", "10XV2", "10XV1", "dropseq"] + "description": "The protocol that was used to generate the single cell data, e.g. 10x Genomics v2 Chemistry.\n\n Can be 'auto' (cellranger only), '10XV1', '10XV2', '10XV3', or any other protocol string that will get directly passed the respective aligner.", + "help_text": "The default is to auto-detect the protocol when running cellranger. For all other aligners the protocol MUST be manually specified. \n\n The following protocols are recognized by the pipeline and mapped to the corresponding protocol name of the respective aligner: '10XV1', '10XV2', '10XV3'. \n\nAny other protocol value is passed to the aligner in verbatim to support other sequencing platforms. See the [kallisto](https://pachterlab.github.io/kallisto/manual#bus), [simpleaf](https://simpleaf.readthedocs.io/en/latest/quant-command.html#a-note-on-the-chemistry-flag), [starsolo](https://gensoft.pasteur.fr/docs/STAR/2.7.9a/STARsolo.html), and [universc](https://github.com/minoda-lab/universc#pre-set-configurations) documentations for more details.", + "default": "auto", + "fa_icon": "fas fa-cogs" } }, "fa_icon": "fas fa-terminal" @@ -234,6 +234,26 @@ } } }, + "cellrangerarc_options": { + "title": "Cellranger ARC Options", + "type": "object", + "description": "Params related to the Cellranger pipeline", + "default": "", + "properties": { + "motifs": { + "type": "string", + "description": "Specify a motif file to create a cellranger-arc index. Can be taken, e.g., from the JASPAR database." + }, + "cellrangerarc_config": { + "type": "string", + "description": "Specify a config file to create the cellranger-arc index." + }, + "cellrangerarc_reference": { + "type": "string", + "description": "Specify the genome reference name used in the config file to create a cellranger-arc index." + } + } + }, "universc_options": { "title": "UniverSC Options", "type": "object", @@ -243,11 +263,6 @@ "universc_index": { "type": "string", "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website." - }, - "universc_technology": { - "type": "string", - "description": "Specify a single-cell technology, vendor, or platform. See the UniverSC documentation or GitHub repository for more details.", - "default": "10x" } } }, @@ -471,6 +486,9 @@ { "$ref": "#/definitions/cellranger_options" }, + { + "$ref": "#/definitions/cellrangerarc_options" + }, { "$ref": "#/definitions/universc_options" }, diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf index 8fc0a983..764c08f8 100644 --- a/subworkflows/local/alevin.nf +++ b/subworkflows/local/alevin.nf @@ -20,7 +20,6 @@ workflow SCRNASEQ_ALEVIN { txp2gene barcode_whitelist protocol - chemistry ch_fastq diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf index 228edb06..bfdd533e 100644 --- a/subworkflows/local/align_cellranger.nf +++ b/subworkflows/local/align_cellranger.nf @@ -13,6 +13,7 @@ workflow CELLRANGER_ALIGN { gtf cellranger_index ch_fastq + protocol main: ch_versions = Channel.empty() @@ -34,7 +35,7 @@ workflow CELLRANGER_ALIGN { // Obtain read counts CELLRANGER_COUNT ( // TODO what is `gem` and why is it needed? - ch_fastq.map{ meta, reads -> [meta + ["gem": meta.id, "samples": [meta.id]], reads] }, + ch_fastq.map{ meta, reads -> [meta + ["chemistry": protocol, "gem": meta.id, "samples": [meta.id]], reads] }, cellranger_index ) ch_versions = ch_versions.mix(CELLRANGER_COUNT.out.versions) diff --git a/subworkflows/local/align_cellrangerarc.nf b/subworkflows/local/align_cellrangerarc.nf new file mode 100644 index 00000000..3232a020 --- /dev/null +++ b/subworkflows/local/align_cellrangerarc.nf @@ -0,0 +1,56 @@ +/* + * Alignment with Cellranger Arc + */ + +include {CELLRANGERARC_MKGTF} from "../../modules/nf-core/cellrangerarc/mkgtf/main.nf" +include {CELLRANGERARC_MKREF} from "../../modules/nf-core/cellrangerarc/mkref/main.nf" +include {CELLRANGERARC_COUNT} from "../../modules/nf-core/cellrangerarc/count/main.nf" + +// Define workflow to subset and index a genome region fasta file +workflow CELLRANGERARC_ALIGN { + take: + fasta + gtf + motifs + cellranger_index + ch_fastq + cellrangerarc_config + + main: + ch_versions = Channel.empty() + + assert cellranger_index || (fasta && gtf): + "Must provide either a cellranger index or a bundle of a fasta file ('--fasta') + gtf file ('--gtf')." + + if (!cellranger_index) { + // Filter GTF based on gene biotypes passed in params.modules + CELLRANGERARC_MKGTF( gtf ) + filtered_gtf = CELLRANGERARC_MKGTF.out.gtf + ch_versions = ch_versions.mix(CELLRANGERARC_MKGTF.out.versions) + + // Make reference genome + assert ( ( !params.cellrangerarc_reference && !cellrangerarc_config ) || + ( params.cellrangerarc_reference && cellrangerarc_config ) ) : + "If you provide a config file you also have to specific the reference name and vice versa." + + cellrangerarc_reference = 'cellrangerarc_reference' + if ( params.cellrangerarc_reference ){ + cellrangerarc_reference = params.cellrangerarc_reference + } + + CELLRANGERARC_MKREF( fasta, filtered_gtf, motifs, cellrangerarc_config, cellrangerarc_reference ) + ch_versions = ch_versions.mix(CELLRANGERARC_MKREF.out.versions) + cellranger_index = CELLRANGERARC_MKREF.out.reference + } + + // Obtain read counts + CELLRANGERARC_COUNT ( + ch_fastq, + cellranger_index + ) + ch_versions = ch_versions.mix(CELLRANGERARC_COUNT.out.versions) + + emit: + ch_versions + cellranger_arc_out = CELLRANGERARC_COUNT.out.outs +} \ No newline at end of file diff --git a/subworkflows/local/fastqc.nf b/subworkflows/local/fastqc.nf index f18214a1..6825a9e0 100644 --- a/subworkflows/local/fastqc.nf +++ b/subworkflows/local/fastqc.nf @@ -8,9 +8,9 @@ workflow FASTQC_CHECK { ch_fastq main: - ch_fastq - .map { ch -> [ ch[0], ch[1] ] } - .set { ch_fastq } + + def n = (params.aligner == 'cellrangerarc') ? 3 : 1 + ch_fastq.map { ch -> [ ch[0], ch[n] ] }.set { ch_fastq } /* * FastQ QC using FASTQC diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index f5a11b18..2e06e889 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -10,21 +10,35 @@ workflow INPUT_CHECK { samplesheet // file: /path/to/samplesheet.csv main: + + reads = null + versions = null + + grouped_ch = SAMPLESHEET_CHECK ( samplesheet ) .csv .splitCsv ( header:true, sep:',' ) .map { create_fastq_channel(it) } - .groupTuple(by: [0]) // group replicate files together, modifies channel to [ val(meta), [ [reads_rep1], [reads_repN] ] ] - .map { meta, reads -> [ meta, reads.flatten() ] } // needs to flatten due to last "groupTuple", so we now have reads as a single array as expected by nf-core modules: [ val(meta), [ reads ] ] + // group replicate files together, modifies channel to [ val(meta), [ [reads_rep1], [reads_repN] ] ] + .groupTuple(by: [0]) + + if (params.aligner == 'cellrangerarc' ) { + grouped_ch + .map { meta, sample_type, sub_sample, reads -> [ meta, sample_type.flatten(), sub_sample.flatten(), reads.flatten() ] } + .set { reads } + } else { + grouped_ch + .map { meta, reads -> [ meta, reads.flatten() ] } .set { reads } + } emit: - reads // channel: [ val(meta), [ reads ] ] + reads // channel: [ val(meta), [*], [ reads ] ] versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] } -// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] +// Function to get list of [ meta, [ multimeta ] , [ fastq_1, fastq_2 ] ] def create_fastq_channel(LinkedHashMap row) { // create meta map def meta = [:] @@ -35,16 +49,48 @@ def create_fastq_channel(LinkedHashMap row) { // add path(s) of the fastq file(s) to the meta map def fastq_meta = [] + def fastqs = [] if (!file(row.fastq_1).exists()) { exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" } if (meta.single_end) { - fastq_meta = [ meta, [ file(row.fastq_1) ] ] + fastqs = [ file(row.fastq_1) ] } else { if (!file(row.fastq_2).exists()) { exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" } - fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] + fastqs = [ file(row.fastq_1), file(row.fastq_2) ] + if (row.sample_type == "atac") { + if (row.fastq_barcode == "") { + exit 1, "ERROR: Please check input samplesheet -> Barcode FastQ (Dual index i5 read) file is missing!\n" + } + if (!file(row.fastq_barcode).exists()) { + exit 1, "ERROR: Please check input samplesheet -> Barcode FastQ (Dual index i5 read) file does not exist!" + + "\n${row.fastq_barcode}" + } + fastqs.add(file(row.fastq_barcode)) + } + } + + // define meta_data for multiome + def sample_type = row.sample_type ? [row.sample_type] : ['gex'] + + def sub_sample = "" + if (params.aligner == "cellrangerarc"){ + sub_sample = row.fastq_1.split("/")[-1].replaceAll("_S[0-9]+_L[0-9]+_R1_[0-9]+.fastq.gz","") + fastqs.each{ + if(!it.name.contains(sub_sample)){ + exit 1, "ERROR: Please check input samplesheet -> Some files do not have the same sample name " + + "${sub_sample} in common!\n${it}" + } + } + } + + fastq_meta = [ meta, fastqs ] + + if (params.aligner == "cellrangerarc"){ + fastq_meta = [ meta, sample_type, sub_sample, fastqs ] } + return fastq_meta -} +} \ No newline at end of file diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf index 9d63ef1e..3210e47a 100644 --- a/subworkflows/local/kallisto_bustools.nf +++ b/subworkflows/local/kallisto_bustools.nf @@ -15,7 +15,6 @@ workflow KALLISTO_BUSTOOLS { kallisto_index txp2gene protocol - chemistry kb_workflow ch_fastq diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf index 1a8381ce..958da400 100644 --- a/subworkflows/local/mtx_conversion.nf +++ b/subworkflows/local/mtx_conversion.nf @@ -15,7 +15,7 @@ workflow MTX_CONVERSION { ch_versions = Channel.empty() // Cellranger module output contains too many files which cause path collisions, we filter to the ones we need. - if ( params.aligner == "cellranger" ) { + if (params.aligner in [ 'cellranger', 'cellrangerarc' ]) { mtx_matrices = mtx_matrices.map { meta, mtx_files -> [ meta, mtx_files.findAll { it.toString().contains("filtered_feature_bc_matrix") } ] } diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 581bf2c4..0f37a17f 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -43,9 +43,11 @@ include { KALLISTO_BUSTOOLS } from '../subworkflows/local/kallisto_bustools' include { SCRNASEQ_ALEVIN } from '../subworkflows/local/alevin' include { STARSOLO } from '../subworkflows/local/starsolo' include { CELLRANGER_ALIGN } from "../subworkflows/local/align_cellranger" +include { CELLRANGERARC_ALIGN } from "../subworkflows/local/align_cellrangerarc" include { UNIVERSC_ALIGN } from "../subworkflows/local/align_universc" include { MTX_CONVERSION } from "../subworkflows/local/mtx_conversion" include { GTF_GENE_FILTER } from '../modules/local/gtf_gene_filter' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT NF-CORE MODULES/SUBWORKFLOWS @@ -68,21 +70,26 @@ include { MULTIQC } from '../modules/nf-core/multiqc/main' // TODO: Are this channels still necessary? ch_output_docs = file("$projectDir/docs/output.md", checkIfExists: true) ch_output_docs_images = file("$projectDir/docs/images/", checkIfExists: true) -(protocol, chemistry, other_parameters) = WorkflowScrnaseq.formatProtocol(params.protocol, params.aligner) +protocol_config = WorkflowScrnaseq.getProtocol(workflow, log, params.aligner, params.protocol) +if (protocol_config['protocol'] == 'auto' && aligner != "cellranger") { + error "Only cellranger supports `protocol = 'auto'`. Please specify the protocol manually!" +} // general input and params ch_input = file(params.input) ch_genome_fasta = Channel.value(params.fasta ? file(params.fasta) : []) ch_gtf = params.gtf ? file(params.gtf) : [] ch_transcript_fasta = params.transcript_fasta ? file(params.transcript_fasta): [] +ch_motifs = params.motifs ? file(params.motifs) : [] +ch_cellrangerarc_config = params.cellrangerarc_config ? file(params.cellrangerarc_config) : [] ch_txp2gene = params.txp2gene ? file(params.txp2gene) : [] ch_multiqc_alevin = Channel.empty() ch_multiqc_star = Channel.empty() ch_multiqc_cellranger = Channel.empty() if (params.barcode_whitelist) { ch_barcode_whitelist = file(params.barcode_whitelist) -} else if (params.protocol.contains("10X")) { - ch_barcode_whitelist = file("$baseDir/assets/whitelist/10x_${chemistry}_barcode_whitelist.txt.gz", checkIfExists: true) +} else if (protocol_config.containsKey("whitelist")) { + ch_barcode_whitelist = file("$projectDir/${protocol_config['whitelist']}") } else { ch_barcode_whitelist = [] } @@ -137,8 +144,7 @@ workflow SCRNASEQ { ch_filter_gtf, ch_kallisto_index, ch_txp2gene, - protocol, - chemistry, + protocol_config['protocol'], kb_workflow, ch_fastq ) @@ -156,8 +162,7 @@ workflow SCRNASEQ { ch_salmon_index, ch_txp2gene, ch_barcode_whitelist, - protocol, - chemistry, + protocol_config['protocol'], ch_fastq ) ch_versions = ch_versions.mix(SCRNASEQ_ALEVIN.out.ch_versions) @@ -171,11 +176,11 @@ workflow SCRNASEQ { ch_genome_fasta, ch_filter_gtf, ch_star_index, - protocol, + protocol_config['protocol'], ch_barcode_whitelist, ch_fastq, star_feature, - other_parameters + protocol_config.get('extra_args', ""), ) ch_versions = ch_versions.mix(STARSOLO.out.ch_versions) ch_mtx_matrices = ch_mtx_matrices.mix(STARSOLO.out.star_counts) @@ -189,7 +194,8 @@ workflow SCRNASEQ { ch_genome_fasta, ch_filter_gtf, ch_cellranger_index, - ch_fastq + ch_fastq, + protocol_config['protocol'] ) ch_versions = ch_versions.mix(CELLRANGER_ALIGN.out.ch_versions) ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_ALIGN.out.cellranger_out) @@ -205,13 +211,27 @@ workflow SCRNASEQ { ch_genome_fasta, ch_filter_gtf, ch_universc_index, - params.universc_technology, + protocol_config['protocol'], ch_fastq ) ch_versions = ch_versions.mix(UNIVERSC_ALIGN.out.ch_versions) ch_mtx_matrices = ch_mtx_matrices.mix(UNIVERSC_ALIGN.out.universc_out) } + // Run cellranger pipeline + if (params.aligner == "cellrangerarc") { + CELLRANGERARC_ALIGN( + ch_genome_fasta, + ch_filter_gtf, + ch_motifs, + ch_cellranger_index, + ch_fastq, + ch_cellrangerarc_config + ) + ch_versions = ch_versions.mix(CELLRANGERARC_ALIGN.out.ch_versions) + ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGERARC_ALIGN.out.cellranger_arc_out) + } + // Run mtx to h5ad conversion subworkflow MTX_CONVERSION ( ch_mtx_matrices,