From c8ca4dab7ec75cef015534bce464e9ad5491a64c Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Tue, 28 Nov 2023 07:47:25 +0100 Subject: [PATCH 01/13] feat added switches --- conf/modules.config | 14 +++++--- conf/test.config | 4 +++ docs/usage.md | 62 +++++++++++++++++++++++---------- nextflow.config | 11 +++--- nextflow_schema.json | 26 +++++++++++--- subworkflows/local/alignment.nf | 10 +++--- workflows/tomte.nf | 4 +-- 7 files changed, 94 insertions(+), 37 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index dfa0244e..1c8fdb04 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -318,7 +318,7 @@ process { process { withName: '.*ANALYSE_TRANSCRIPTS:DROP_SAMPLE_ANNOT' { - ext.when = { params.run_drop_ae_switch | params.run_drop_as_switch } + ext.when = { params.switch_drop_ae | params.switch_drop_as } publishDir = [ path: { "${params.outdir}/analyse_transcripts/drop" }, mode: params.publish_dir_mode, @@ -327,7 +327,7 @@ process { } withName: '.*ANALYSE_TRANSCRIPTS:DROP_CONFIG_RUN_AE' { - ext.when = { params.run_drop_ae_switch } + ext.when = { params.switch_drop_ae } publishDir = [ path: { "${params.outdir}/analyse_transcripts/drop/AE" }, mode: params.publish_dir_mode, @@ -336,7 +336,7 @@ process { } withName: '.*ANALYSE_TRANSCRIPTS:DROP_CONFIG_RUN_AS' { - ext.when = { params.run_drop_as_switch } + ext.when = { params.switch_drop_as } publishDir = [ path: { "${params.outdir}/analyse_transcripts/drop/AS" }, mode: params.publish_dir_mode, @@ -345,7 +345,7 @@ process { } withName: '.*ANALYSE_TRANSCRIPTS:DROP_FILTER_RESULTS' { - ext.when = { params.run_drop_ae_switch | params.run_drop_as_switch } + ext.when = { params.switch_drop_ae | params.switch_drop_as } publishDir = [ path: { "${params.outdir}/analyse_transcripts/drop" }, mode: params.publish_dir_mode, @@ -354,6 +354,7 @@ process { } withName: '.*ANALYSE_TRANSCRIPTS:STRINGTIE_STRINGTIE' { + ext.when = { params.switch_stringtie } publishDir = [ path: { "${params.outdir}/analyse_transcripts" }, mode: params.publish_dir_mode, @@ -362,6 +363,7 @@ process { } withName: '.*ANALYSE_TRANSCRIPTS:GFFCOMPARE' { + ext.when = { params.switch_stringtie } publishDir = [ path: { "${params.outdir}/analyse_transcripts" }, mode: params.publish_dir_mode, @@ -465,6 +467,7 @@ process { process { withName: '.*ANNOTATE_SNV:ENSEMBLVEP' { + ext.when = { params.switch_vep } ext.prefix = { "${vcf.simpleName}_vep" } ext.args = { [ '--dir_plugins cache/Plugins', @@ -500,10 +503,12 @@ process { process { withName: '.*IGV_TRACKS:UCSC_WIGTOBIGWIG' { + ext.when = { params.switch_build_tracks } ext.args = { '-clip' } } withName: '.*IGV_TRACKS:JUNCTION_TRACK' { + ext.when = { params.switch_build_tracks } publishDir = [ path: { "${params.outdir}/ucsc" }, mode: params.publish_dir_mode, @@ -512,6 +517,7 @@ process { } withName: '.*IGV_TRACKS:TABIX_BGZIPTABIX' { + ext.when = { params.switch_build_tracks } ext.args2 = { '--preset bed' } publishDir = [ path: { "${params.outdir}/ucsc" }, diff --git a/conf/test.config b/conf/test.config index c7a9bcba..06deb617 100644 --- a/conf/test.config +++ b/conf/test.config @@ -23,6 +23,10 @@ params { // Input data input = "${projectDir}/test_data/samplesheet_chr21.csv" + switch_vep=false + switch_build_tracks=false + switch_stringtie=false + // Genome references genome = 'GRCh37' fasta = "${projectDir}/test_data/grch37_chr21.fa.gz" diff --git a/docs/usage.md b/docs/usage.md index ddabdb21..f70822f3 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -123,15 +123,17 @@ If you would like to see more examples of what a typical samplesheet looks like In genomic-medicine-sweden/tomte, references can be supplied using parameters. -Note that the pipeline is modular in architecture. It offers you the flexibility to choose between different tools. For example, you can call SNVs either with BCFtools or with GATK. You also have the option to turn off sections of the pipeline if you do not want to run them. For example, drop aberrant expression module can be turned off by setting `--run_drop_ae_switch FALSE`. This flexibility means that in any given analysis run, a combination of tools included in the pipeline will not be executed. So the pipeline is written in a way that can account for these differences while working with reference parameters. If a tool is not going to be executed during the course of a run, parameters used only by that tool need not be provided. For example, if you are not running DROP aberrant splicing, you do not need to provide `--reference_drop_splice_folder`. +Note that the pipeline is modular in architecture. It offers you the flexibility to choose between different tools. For example, you can call SNVs either with BCFtools or with GATK. You also have the option to turn off sections of the pipeline if you do not want to run them. For example, drop aberrant expression module can be turned off by setting `--switch_drop_ae FALSE`. This flexibility means that in any given analysis run, a combination of tools included in the pipeline will not be executed. So the pipeline is written in a way that can account for these differences while working with reference parameters. If a tool is not going to be executed during the course of a run, parameters used only by that tool need not be provided. For example, if you are not running DROP aberrant splicing, you do not need to provide `--reference_drop_splice_folder`. genomic-medicine-sweden/tomte consists of several tools used for various purposes. For convenience, we have grouped those tools under the following categories: 1. Alignment and pseudo quantification (STAR & Salmon) -2. Subsample_region (Samtools) -3. Variant calling - SNV (BCFTools or GATK's GermlineCNVCaller) -4. SNV annotation (ensembl VEP) -5. DROP +2. Junction track and bigwig +3. Subsample_region (Samtools) +4. Variant calling - SNV (BCFTools or GATK's GermlineCNVCaller) +5. SNV annotation (ensembl VEP) +6. Stringtie & gffcompare +7. DROP > We have only listed the groups that require at least one input from the user. For example, the pipeline also runs WigToBigWig, but it does not require any input other than the bam files passed by the pipeline. Hence, it is not mentioned in the list above. To know more about the tools used in the pipeline check the [README](../README.md). @@ -159,17 +161,26 @@ The mandatory and optional parameters for each category are tabulated below. 4 If it is not provided by the user, the default value is 40. 5 If it is not provided by the user, the default value is Basic. -##### 2. Subsample region +##### 2. Junction track and bigwig | Mandatory | Optional | | ------------- | ------------------------------------ | -| subsample_bed | subsample_region_switch 1 | +| | switch_build_tracks 1 | + +1 If it is not provided by the user, the default value is true + + +##### 3. Subsample region + +| Mandatory | Optional | +| ------------- | ------------------------------------ | +| subsample_bed | switch_subsample_region1 | | | seed_frac2 | 1 If it is not provided by the user, the default value is true 2 If it is not provided by the user, the default value is 0.001 -##### 3. Variant calling - SNV +##### 4. Variant calling - SNV | Mandatory | Optional | | --------- | -------------------------------- | @@ -179,28 +190,40 @@ The mandatory and optional parameters for each category are tabulated below. 1 If it is not provided by the user, the default value is bcftools 2 If it is not provided by the user, the default value is multiallelic -#### 4. SNV annotation (ensembl VEP) +#### 5. SNV annotation (ensembl VEP) | Mandatory | Optional | | --------- | ----------------------------- | -| vep_cache | vep_cache_version1 | +| vep_cache | switch_vep1 | +| | vep_cache_version2 | | | vep_filters | -1 For the time being, only 107 is suported +1 If it is not provided by the user, the default value is true +2 For the time being, only 107 is suported + +#### 6. Stringtie & gffcompare + +| Mandatory | Optional | +| --------- | -----------------------------| +| fasta | switch_stringtie1 | +| gtf | | + +1 If it is not provided by the user, the default value is true -#### 5. DROP +#### 7. DROP DROP - aberrant expression | Mandatory | Optional | | ------------------------------------- | --------------------------------- | -| reference_drop_annot_file1 | run_drop_ae_switch2 | +| reference_drop_annot_file1 | switch_drop_ae2 | | reference_drop_count_file | drop_group_samples_ae3 | -| | drop_padjcutoff_ae4 | -| | drop_zscorecutoff5 | +| fasta | drop_padjcutoff_ae4 | +| gtf | drop_zscorecutoff5 | | | gene_panel_clinical_filter | -| | downsample_switch6 | +| | switch_downsample6 | | | num_reads7 | +| | genome8 | 1 To get more information on how to format it, see below 2 If it is not provided by the user, the default value is true @@ -209,17 +232,19 @@ DROP - aberrant expression 5 If it is not provided by the user, the default value is 0 6 If it is not provided by the user, the default value is true 7 If it is not provided by the user, the default value is 120000000 +8 If it is not provided by the user, the default value is GRCh38 DROP - aberrant splicing | Mandatory | Optional | | ------------------------------------- | --------------------------------- | -| reference_drop_annot_file1 | run_drop_as_switch2 | +| reference_drop_annot_file1 | switch_drop_as2 | | reference_drop_splice_folder | drop_group_samples_as3 | | | drop_padjcutoff_as4 | | | gene_panel_clinical_filter | -| | downsample_switch5 | +| | switch_downsample5 | | | num_reads6 | +| | genome7 | 1 To get more information on how to format it, see below 2 If it is not provided by the user, the default value is true @@ -227,6 +252,7 @@ DROP - aberrant splicing 4 If it is not provided by the user, the default value is 0.1 5 If it is not provided by the user, the default value is true 6 If it is not provided by the user, the default value is 120000000 +7 If it is not provided by the user, the default value is GRCh38 ##### Preparing input for DROP diff --git a/nextflow.config b/nextflow.config index 0eef9fe6..8865f833 100644 --- a/nextflow.config +++ b/nextflow.config @@ -25,8 +25,8 @@ params { // Alignment star_two_pass_mode = 'Basic' - subsample_region_switch = true - downsample_switch = true + switch_subsample_region = true + switch_downsample = true num_reads = 120000000 seed_frac = 0.001 save_mapped_as_cram = true @@ -34,13 +34,16 @@ params { // Variant calling variant_caller = 'bcftools' bcftools_caller_mode = 'multiallelic' - run_drop_ae_switch = true - run_drop_as_switch = true + switch_build_tracks = true + switch_stringtie = true + switch_drop_ae = true + switch_drop_as = true drop_group_samples_ae = 'outrider' drop_group_samples_as = 'fraser' drop_padjcutoff_ae = 0.05 drop_padjcutoff_as = 0.1 drop_zscorecutoff = 0 + switch_vep = true // MultiQC options multiqc_config = null diff --git a/nextflow_schema.json b/nextflow_schema.json index b8cfd4c9..46c36892 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -193,13 +193,13 @@ "enum": ["Basic", "None"], "fa_icon": "fas fa-tachometer-alt" }, - "subsample_region_switch": { + "switch_subsample_region": { "type": "boolean", "default": true, "description": "Do you want to subsample the region subsample_bed to the fraction given in seed_frac?", "fa_icon": "fas fa-toggle-off" }, - "downsample_switch": { + "switch_downsample": { "type": "boolean", "default": true, "description": "Do you want to downsample the number of reads to num_reads?", @@ -249,13 +249,31 @@ "enum": ["consensus", "multiallelic"], "help_text": "Bcftools call can eitherbe run in multiallelic mode or in consensus mode. In consensus mode a p-value threshold of 0.01 is applied." }, - "run_drop_ae_switch": { + "switch_build_tracks": { + "type": "boolean", + "default": true, + "description": "Should tracks be build?", + "fa_icon": "fas fa-toggle-off" + }, + "switch_stringtie": { + "type": "boolean", + "default": true, + "description": "Should stringtie module be run?", + "fa_icon": "fas fa-toggle-off" + }, + "switch_vep": { + "type": "boolean", + "default": true, + "description": "Should VEP module be run?", + "fa_icon": "fas fa-toggle-off" + }, + "switch_drop_ae": { "type": "boolean", "default": true, "description": "Should DROP Aberrant Expression module be run?", "fa_icon": "fas fa-toggle-off" }, - "run_drop_as_switch": { + "switch_drop_as": { "type": "boolean", "default": true, "description": "Should DROP Aberrant Splicing module be run?", diff --git a/subworkflows/local/alignment.nf b/subworkflows/local/alignment.nf index b6f26f72..fda8ff1d 100644 --- a/subworkflows/local/alignment.nf +++ b/subworkflows/local/alignment.nf @@ -20,8 +20,8 @@ workflow ALIGNMENT { subsample_bed seed_frac num_reads - subsample_region_switch - downsample_switch + switch_subsample_region + switch_downsample salmon_index ch_genome_fasta @@ -44,10 +44,10 @@ workflow ALIGNMENT { ch_bam_bai = Channel.empty() ch_bam_bai_out = Channel.empty() - if (subsample_region_switch) { + if (switch_subsample_region) { RNA_SUBSAMPLE_REGION( STAR_ALIGN.out.bam, subsample_bed, seed_frac) ch_bam_bai = ch_bam_bai.mix(RNA_SUBSAMPLE_REGION.out.bam_bai) - if (!downsample_switch) { + if (!switch_downsample) { ch_bam_bai_out = RNA_SUBSAMPLE_REGION.out.bam_bai } else { RNA_DOWNSAMPLE( ch_bam_bai, num_reads) @@ -55,7 +55,7 @@ workflow ALIGNMENT { } } else { ch_bam_bai = ch_bam_bai.mix(STAR_ALIGN.out.bam.join(SAMTOOLS_INDEX.out.bai)) - if (!downsample_switch) { + if (!switch_downsample) { ch_bam_bai_out = STAR_ALIGN.out.bam.join(SAMTOOLS_INDEX.out.bai) } else { RNA_DOWNSAMPLE( ch_bam_bai, num_reads) diff --git a/workflows/tomte.nf b/workflows/tomte.nf index 13cc1e9d..5c8f7b88 100644 --- a/workflows/tomte.nf +++ b/workflows/tomte.nf @@ -175,8 +175,8 @@ workflow TOMTE { params.subsample_bed, params.seed_frac, params.num_reads, - params.subsample_region_switch, - params.downsample_switch, + params.switch_subsample_region, + params.switch_downsample, ch_references.salmon_index, ch_references.fasta_meta ).set {ch_alignment} From 3a91f1269d8993edc549791b35ba9e81fec2a8e9 Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Tue, 28 Nov 2023 08:05:07 +0100 Subject: [PATCH 02/13] feat updated changelog --- CHANGELOG.md | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f6777da5..1fb41c8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,14 +3,37 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## 1.0.0 - Nisse [2023-11-06] - +## 1.1.0 - Rudolph [xxxx-xx-xx] Initial release of genomic-medicine-sweden/tomte, created with the [nf-core](https://nf-co.re/) template. ### `Added` +- switch_vep, switch_build_tracks and switch_stringtie to make the pipeline more versatile ### `Fixed` +- Names of other swtiches (subsample_region_switch, downsample_switch, run_drop_ae_switch and run_drop_as_switch) so that they all start with switch_ (switch_subsample_region, switch_downsample, switch_drop_ae and switch_drop_as) ### `Dependencies` ### `Deprecated` + +## 1.0.0 - Nisse [2023-11-06] + +### `Added` +- Trim reads with FASTP +- Read mapping with STAR +- Transcript quantification with Salmon +- Output junction tracks +- Output bigwig +- Choice to subsample overrepresented regions with Samtools +- Choice to downsample number of reads with Samtools +- Detection of aberrant expression with DROP +- Detection of aberrant splicing with DROP +- Filter aberrant expression and aberrant splicing results +- Guided transcript assembly with StringTie +- Filtering results of guided transcript assembly with GffCompare +- SNVs calling with GATK or BCFtools Mpileups +- Allele Specific Read Counter with ASEReadCounter +- Assess allelic imbalance with BootstrapAnn +- Annotation with VEP +- Alignment QC with Picard CollectRnaSeqMetrics +- Present QCs with MultiQC From 4d69d51a5acee42d4a4b5a7c2bd663d1f145a776 Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Tue, 28 Nov 2023 08:49:41 +0100 Subject: [PATCH 03/13] feat separate modules.config into smaller files --- CHANGELOG.md | 1 + conf/modules.config | 528 -------------------- conf/modules/alignment.config | 92 ++++ conf/modules/allele_specific_calling.config | 32 ++ conf/modules/analyse_transcripts.config | 71 +++ conf/modules/annotate_snv.config | 47 ++ conf/modules/bam_qc.config | 30 ++ conf/modules/call_variants.config | 79 +++ conf/modules/general_tomte.config | 49 ++ conf/modules/igv_tracks.config | 41 ++ conf/modules/prepare_references.config | 174 +++++++ nextflow.config | 10 +- 12 files changed, 625 insertions(+), 529 deletions(-) delete mode 100644 conf/modules.config create mode 100644 conf/modules/alignment.config create mode 100644 conf/modules/allele_specific_calling.config create mode 100644 conf/modules/analyse_transcripts.config create mode 100644 conf/modules/annotate_snv.config create mode 100644 conf/modules/bam_qc.config create mode 100644 conf/modules/call_variants.config create mode 100644 conf/modules/general_tomte.config create mode 100644 conf/modules/igv_tracks.config create mode 100644 conf/modules/prepare_references.config diff --git a/CHANGELOG.md b/CHANGELOG.md index 1fb41c8f..c7ef269b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Initial release of genomic-medicine-sweden/tomte, created with the [nf-core](htt ### `Fixed` - Names of other swtiches (subsample_region_switch, downsample_switch, run_drop_ae_switch and run_drop_as_switch) so that they all start with switch_ (switch_subsample_region, switch_downsample, switch_drop_ae and switch_drop_as) +- Separated modules.config into smaller configs ### `Dependencies` diff --git a/conf/modules.config b/conf/modules.config deleted file mode 100644 index 1c8fdb04..00000000 --- a/conf/modules.config +++ /dev/null @@ -1,528 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Config file for defining DSL2 per module options and publishing paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Available keys to override module options: - ext.args = Additional arguments appended to command in module. - ext.args2 = Second set of arguments appended to command in module (multi-tool modules). - ext.args3 = Third set of arguments appended to command in module (multi-tool modules). - ext.prefix = File name prefix for output files. ----------------------------------------------------------------------------------------- -*/ - -process { - - publishDir = [ - path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - - withName: SAMPLESHEET_CHECK { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: FASTQC { - ext.args = { '--quiet' } - } - - withName: CUSTOM_DUMPSOFTWAREVERSIONS { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - pattern: '*_versions.yml' - ] - } - - withName: 'MULTIQC' { - ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } - publishDir = [ - path: { "${params.outdir}/multiqc" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - -} - -// -// Genome preparation options -// - -process { - - withName: '.*PREPARE_REFERENCES:GUNZIP_FASTA' { - ext.when = {params.fasta.endsWith(".gz")} - publishDir = [ - path: { "${params.outdir}/references" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference, - pattern: "*.{fasta,fa}" - ] - } - - withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_GENOME' { - ext.when = { !params.fai } - ext.args2 = " --no-same-owner " - publishDir = [ - path: { "${params.outdir}/references" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference, - pattern: "*fai" - ] - } - - withName: '.*PREPARE_REFERENCES:BUILD_DICT' { - ext.when = { !params.sequence_dict } - publishDir = [ - path: { "${params.outdir}/references" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference, - pattern: "*dict" - ] - } - - withName: '.*PREPARE_REFERENCES:GET_RRNA_TRANSCRIPTS' { - publishDir = [ - path: { "${params.outdir}/references" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference - ] - } - - withName: '.*PREPARE_REFERENCES:GET_CHROM_SIZES' { - publishDir = [ - path: { "${params.outdir}/references" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference - ] - } - - withName: '.*PREPARE_REFERENCES:UNTAR_STAR_INDEX' { - ext.when = { params.star_index && params.star_index.endsWith(".gz") } - publishDir = [ - path: { "${params.outdir}/references" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference - ] - } - - withName: '.*PREPARE_REFERENCES:BUILD_STAR_GENOME' { - ext.when = { !params.star_index } - publishDir = [ - path: { "${params.outdir}/references" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference - ] - } - - withName: '.*PREPARE_REFERENCES:GUNZIP_GTF' { - ext.when = { params.gtf && params.gtf.endsWith(".gz") } - publishDir = [ - path: { "${params.outdir}/references" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference, - pattern: "*gtf" - ] - } - - withName: '.*PREPARE_REFERENCES:GTF_TO_REFFLAT' { - ext.args = { [ - '-genePredExt', - '-geneNameAsName2' - ].join(' ').trim() } - publishDir = [ - path: { "${params.outdir}/references" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference, - pattern: "*refflat" - ] - } - - withName: '.*PREPARE_REFERENCES:BEDTOINTERVALLIST' { - publishDir = [ - path: { "${params.outdir}/references" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference, - ] - } - - withName: '.*PREPARE_REFERENCES:UNTAR_VEP_CACHE' { - ext.when = { (params.vep_cache && params.vep_cache.endsWith("tar.gz")) } - publishDir = [ - enabled: false - ] - } - - withName: '.*PREPARE_REFERENCES:GUNZIP_TRFASTA' { - ext.when = { params.transcript_fasta && params.transcript_fasta.endsWith(".gz") } - publishDir = [ - path: { "${params.outdir}/references" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference, - pattern: "*.{fasta,fa}" - ] - } - - withName: '.*PREPARE_REFERENCES:GFFREAD' { - ext.when = { !params.transcript_fasta } - ext.args = { '-w' } - publishDir = [ - path: { "${params.outdir}/references" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference, - pattern: "*.{fasta,fa}" - ] - } - - withName: '.*PREPARE_REFERENCES:SALMON_INDEX' { - ext.when = { !params.salmon_index } - publishDir = [ - path: { "${params.outdir}/references" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference - ] - } - - withName: '.*PREPARE_REFERENCES:UNTAR_SALMON_INDEX' { - ext.when = { params.salmon_index.endsWith(".gz") } - publishDir = [ - path: { "${params.outdir}/references" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference - ] - } - -} - -// -// Alignment options -// - -process { - withName: '.*ALIGNMENT:FASTP' { - ext.args = { [ - params.min_trimmed_length ? "--length_required ${params.min_trimmed_length}" : '', - '--correction', - '--overrepresentation_analysis' - ].join(' ').trim() } - publishDir = [ - path: { "${params.outdir}/trimming" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - } - - withName: '.*ALIGNMENT:STAR_ALIGN' { - ext.args = { [ - "--twopassMode ${params.star_two_pass_mode}", - '--outReadsUnmapped None', - '--outSAMtype BAM SortedByCoordinate', - '--quantMode GeneCounts', - '--outSAMstrandField intronMotif', - '--peOverlapNbasesMin 10', - '--peOverlapMMp 0.1', - '--chimSegmentMin 12', - '--chimJunctionOverhangMin 12', - '--chimOutType WithinBAM', - '--readFilesCommand gunzip -c', - '--outWigType wiggle', - '--outWigStrand Unstranded' - ].join(' ') } - publishDir = [ - path: { "${params.outdir}/alignment" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - } - - withName: '.*ALIGNMENT:SAMTOOLS_INDEX' { - publishDir = [ - path: { "${params.outdir}/trimming" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - } - - withName: '.*ALIGNMENT:SAMTOOLS_VIEW' { - ext.args = { '--output-fmt cram --write-index' } - ext.when = params.save_mapped_as_cram - publishDir = [ - path: { "${params.outdir}/alignment" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - - withName: '.*ALIGNMENT:RNA_DOWNSAMPLE' { - publishDir = [ - path: { "${params.outdir}/alignment" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - } - - withName: '.*ALIGNMENT:SALMON_QUANT' { - ext.args = { [ - '--gcBias', - '--validateMappings' - ].join(' ') } - publishDir = [ - path: { "${params.outdir}/alignment" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - } -} - -// -// BAM_QC -// - -process { - withName: '.*BAM_QC:PICARD_COLLECTRNASEQMETRICS' { - ext.args = { [ - meta.strandedness.equals('unstranded') ? '--STRAND_SPECIFICITY NONE' : '', - meta.strandedness.equals('forward') ? '--STRAND_SPECIFICITY FIRST_READ_TRANSCRIPTION_STRAND' : '', - meta.strandedness.equals('reverse') ? '--STRAND_SPECIFICITY SECOND_READ_TRANSCRIPTION_STRAND' : '' - ].join(' ').trim() } - publishDir = [ - path: { "${params.outdir}/bam_qc" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - } -} - -// -// ANALYSE_TRANSCRIPTS -// - -process { - withName: '.*ANALYSE_TRANSCRIPTS:DROP_SAMPLE_ANNOT' { - ext.when = { params.switch_drop_ae | params.switch_drop_as } - publishDir = [ - path: { "${params.outdir}/analyse_transcripts/drop" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - } - - withName: '.*ANALYSE_TRANSCRIPTS:DROP_CONFIG_RUN_AE' { - ext.when = { params.switch_drop_ae } - publishDir = [ - path: { "${params.outdir}/analyse_transcripts/drop/AE" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - } - - withName: '.*ANALYSE_TRANSCRIPTS:DROP_CONFIG_RUN_AS' { - ext.when = { params.switch_drop_as } - publishDir = [ - path: { "${params.outdir}/analyse_transcripts/drop/AS" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - } - - withName: '.*ANALYSE_TRANSCRIPTS:DROP_FILTER_RESULTS' { - ext.when = { params.switch_drop_ae | params.switch_drop_as } - publishDir = [ - path: { "${params.outdir}/analyse_transcripts/drop" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - } - - withName: '.*ANALYSE_TRANSCRIPTS:STRINGTIE_STRINGTIE' { - ext.when = { params.switch_stringtie } - publishDir = [ - path: { "${params.outdir}/analyse_transcripts" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - } - - withName: '.*ANALYSE_TRANSCRIPTS:GFFCOMPARE' { - ext.when = { params.switch_stringtie } - publishDir = [ - path: { "${params.outdir}/analyse_transcripts" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - } -} - -// -// CALL_VARIANTS -// - -process { - withName: '.*CALL_VARIANTS_GATK:GATK4_SPLITNCIGARREADS' { - ext.prefix = { "${meta.id}_splitncigar" } - publishDir = [ - enabled: false, - ] - } - - withName: '.*CALL_VARIANTS_GATK:SAMTOOLS_INDEX' { - publishDir = [ - enabled: false, - ] - } - - withName: '.*CALL_VARIANTS_GATK:GATK4_HAPLOTYPECALLER' { - ext.args = { [ - '--dont-use-soft-clipped-bases', - '--create-output-variant-index', - '--standard-min-confidence-threshold-for-calling 20' - ].join(' ').trim() } - ext.prefix = { "${meta.id}_gatk" } - publishDir = [ - enabled: false, - ] - } - - withName: '.*CALL_VARIANTS_GATK:GATK4_VARIANTFILTRATION' { - ext.args = { [ - '--window 3', - '--cluster 35', - '--filter-name "QD" --filter-expression "QD < 2.0"', - '--filter-name "FS" --filter-expression "FS > 30.0"' - ].join(' ').trim() } - publishDir = [ - path: { "${params.outdir}/call_variants" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - } - - withName: '.*CALL_VARIANTS:BCFTOOLS_MPILEUP' { - ext.args = { '--max-depth 2000' } - ext.args2 = { [ - '--variants-only', - params.bcftools_caller_mode.equals('multiallelic') - ? '--multiallelic-caller' - : '--consensus-caller --pval-threshold 0.01', - ].join(' ').trim() } - publishDir = [ - path: { "${params.outdir}/call_variants" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - } - - withName: '.*BCFTOOLS_STATS' { - publishDir = [ - path: { "${params.outdir}/call_variants" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - } -} - -// -// ALLELE_SPECIFIC_CALLING -// - -process { - withName: '.*ALLELE_SPECIFIC_CALLING:BCFTOOLS_VIEW' { - ext.prefix = { "${meta.id}_biallelic" } - ext.args = { '--genotype het --max-alleles 2 --min-alleles 2 --types snps' } - } - - withName: '.*ALLELE_SPECIFIC_CALLING:BCFTOOLS_INDEX' { - ext.args = { '--tbi' } - } - - withName: '.*GATK4_ASEREADCOUNTER' { - publishDir = [ - enabled: false - ] - } -} - -// -// ANNOTATE_SNV -// - -process { - withName: '.*ANNOTATE_SNV:ENSEMBLVEP' { - ext.when = { params.switch_vep } - ext.prefix = { "${vcf.simpleName}_vep" } - ext.args = { [ - '--dir_plugins cache/Plugins', - '--plugin LoFtool,cache/Plugins/LoFtool_scores.txt', - '--plugin pLI,cache/Plugins/pLI_values_107.txt', - '--distance 5000', - '--buffer_size 20000', - '--format vcf --max_sv_size 248956422', - '--appris --biotype --cache --canonical --ccds --compress_output bgzip', - '--domains --exclude_predicted --force_overwrite', - '--hgvs --humdiv --no_progress --no_stats --numbers', - '--merged --polyphen p --protein --offline --regulatory --sift p --symbol --tsl', - '--uniprot --vcf' - ].join(' ') } - publishDir = [ - path: { "${params.outdir}/annotate_vep" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*ANNOTATE_SNV:TABIX_VEP' { - publishDir = [ - path: { "${params.outdir}/annotate_vep" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } -} - -// -// IGV_TRACKS -// - -process { - withName: '.*IGV_TRACKS:UCSC_WIGTOBIGWIG' { - ext.when = { params.switch_build_tracks } - ext.args = { '-clip' } - } - - withName: '.*IGV_TRACKS:JUNCTION_TRACK' { - ext.when = { params.switch_build_tracks } - publishDir = [ - path: { "${params.outdir}/ucsc" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*IGV_TRACKS:TABIX_BGZIPTABIX' { - ext.when = { params.switch_build_tracks } - ext.args2 = { '--preset bed' } - publishDir = [ - path: { "${params.outdir}/ucsc" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } -} diff --git a/conf/modules/alignment.config b/conf/modules/alignment.config new file mode 100644 index 00000000..5bd0534b --- /dev/null +++ b/conf/modules/alignment.config @@ -0,0 +1,92 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +// +// Alignment options +// + +process { + withName: '.*ALIGNMENT:FASTP' { + ext.args = { [ + params.min_trimmed_length ? "--length_required ${params.min_trimmed_length}" : '', + '--correction', + '--overrepresentation_analysis' + ].join(' ').trim() } + publishDir = [ + path: { "${params.outdir}/trimming" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: '.*ALIGNMENT:STAR_ALIGN' { + ext.args = { [ + "--twopassMode ${params.star_two_pass_mode}", + '--outReadsUnmapped None', + '--outSAMtype BAM SortedByCoordinate', + '--quantMode GeneCounts', + '--outSAMstrandField intronMotif', + '--peOverlapNbasesMin 10', + '--peOverlapMMp 0.1', + '--chimSegmentMin 12', + '--chimJunctionOverhangMin 12', + '--chimOutType WithinBAM', + '--readFilesCommand gunzip -c', + '--outWigType wiggle', + '--outWigStrand Unstranded' + ].join(' ') } + publishDir = [ + path: { "${params.outdir}/alignment" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: '.*ALIGNMENT:SAMTOOLS_INDEX' { + publishDir = [ + path: { "${params.outdir}/trimming" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: '.*ALIGNMENT:SAMTOOLS_VIEW' { + ext.args = { '--output-fmt cram --write-index' } + ext.when = params.save_mapped_as_cram + publishDir = [ + path: { "${params.outdir}/alignment" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + + withName: '.*ALIGNMENT:RNA_DOWNSAMPLE' { + publishDir = [ + path: { "${params.outdir}/alignment" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: '.*ALIGNMENT:SALMON_QUANT' { + ext.args = { [ + '--gcBias', + '--validateMappings' + ].join(' ') } + publishDir = [ + path: { "${params.outdir}/alignment" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } +} diff --git a/conf/modules/allele_specific_calling.config b/conf/modules/allele_specific_calling.config new file mode 100644 index 00000000..b05e4958 --- /dev/null +++ b/conf/modules/allele_specific_calling.config @@ -0,0 +1,32 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +// +// ALLELE_SPECIFIC_CALLING +// + +process { + withName: '.*ALLELE_SPECIFIC_CALLING:BCFTOOLS_VIEW' { + ext.prefix = { "${meta.id}_biallelic" } + ext.args = { '--genotype het --max-alleles 2 --min-alleles 2 --types snps' } + } + + withName: '.*ALLELE_SPECIFIC_CALLING:BCFTOOLS_INDEX' { + ext.args = { '--tbi' } + } + + withName: '.*GATK4_ASEREADCOUNTER' { + publishDir = [ + enabled: false + ] + } +} diff --git a/conf/modules/analyse_transcripts.config b/conf/modules/analyse_transcripts.config new file mode 100644 index 00000000..c9ed8ed7 --- /dev/null +++ b/conf/modules/analyse_transcripts.config @@ -0,0 +1,71 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +// +// ANALYSE_TRANSCRIPTS +// + +process { + withName: '.*ANALYSE_TRANSCRIPTS:DROP_SAMPLE_ANNOT' { + ext.when = { params.switch_drop_ae | params.switch_drop_as } + publishDir = [ + path: { "${params.outdir}/analyse_transcripts/drop" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: '.*ANALYSE_TRANSCRIPTS:DROP_CONFIG_RUN_AE' { + ext.when = { params.switch_drop_ae } + publishDir = [ + path: { "${params.outdir}/analyse_transcripts/drop/AE" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: '.*ANALYSE_TRANSCRIPTS:DROP_CONFIG_RUN_AS' { + ext.when = { params.switch_drop_as } + publishDir = [ + path: { "${params.outdir}/analyse_transcripts/drop/AS" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: '.*ANALYSE_TRANSCRIPTS:DROP_FILTER_RESULTS' { + ext.when = { params.switch_drop_ae | params.switch_drop_as } + publishDir = [ + path: { "${params.outdir}/analyse_transcripts/drop" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: '.*ANALYSE_TRANSCRIPTS:STRINGTIE_STRINGTIE' { + ext.when = { params.switch_stringtie } + publishDir = [ + path: { "${params.outdir}/analyse_transcripts" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: '.*ANALYSE_TRANSCRIPTS:GFFCOMPARE' { + ext.when = { params.switch_stringtie } + publishDir = [ + path: { "${params.outdir}/analyse_transcripts" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } +} diff --git a/conf/modules/annotate_snv.config b/conf/modules/annotate_snv.config new file mode 100644 index 00000000..52421c7e --- /dev/null +++ b/conf/modules/annotate_snv.config @@ -0,0 +1,47 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +// +// ANNOTATE_SNV +// + +process { + withName: '.*ANNOTATE_SNV:ENSEMBLVEP' { + ext.when = { params.switch_vep } + ext.prefix = { "${vcf.simpleName}_vep" } + ext.args = { [ + '--dir_plugins cache/Plugins', + '--plugin LoFtool,cache/Plugins/LoFtool_scores.txt', + '--plugin pLI,cache/Plugins/pLI_values_107.txt', + '--distance 5000', + '--buffer_size 20000', + '--format vcf --max_sv_size 248956422', + '--appris --biotype --cache --canonical --ccds --compress_output bgzip', + '--domains --exclude_predicted --force_overwrite', + '--hgvs --humdiv --no_progress --no_stats --numbers', + '--merged --polyphen p --protein --offline --regulatory --sift p --symbol --tsl', + '--uniprot --vcf' + ].join(' ') } + publishDir = [ + path: { "${params.outdir}/annotate_vep" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*ANNOTATE_SNV:TABIX_VEP' { + publishDir = [ + path: { "${params.outdir}/annotate_vep" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/bam_qc.config b/conf/modules/bam_qc.config new file mode 100644 index 00000000..8f9fa2ef --- /dev/null +++ b/conf/modules/bam_qc.config @@ -0,0 +1,30 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +// +// BAM_QC +// + +process { + withName: '.*BAM_QC:PICARD_COLLECTRNASEQMETRICS' { + ext.args = { [ + meta.strandedness.equals('unstranded') ? '--STRAND_SPECIFICITY NONE' : '', + meta.strandedness.equals('forward') ? '--STRAND_SPECIFICITY FIRST_READ_TRANSCRIPTION_STRAND' : '', + meta.strandedness.equals('reverse') ? '--STRAND_SPECIFICITY SECOND_READ_TRANSCRIPTION_STRAND' : '' + ].join(' ').trim() } + publishDir = [ + path: { "${params.outdir}/bam_qc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } +} diff --git a/conf/modules/call_variants.config b/conf/modules/call_variants.config new file mode 100644 index 00000000..9a868a8b --- /dev/null +++ b/conf/modules/call_variants.config @@ -0,0 +1,79 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +// +// CALL_VARIANTS +// + +process { + withName: '.*CALL_VARIANTS_GATK:GATK4_SPLITNCIGARREADS' { + ext.prefix = { "${meta.id}_splitncigar" } + publishDir = [ + enabled: false, + ] + } + + withName: '.*CALL_VARIANTS_GATK:SAMTOOLS_INDEX' { + publishDir = [ + enabled: false, + ] + } + + withName: '.*CALL_VARIANTS_GATK:GATK4_HAPLOTYPECALLER' { + ext.args = { [ + '--dont-use-soft-clipped-bases', + '--create-output-variant-index', + '--standard-min-confidence-threshold-for-calling 20' + ].join(' ').trim() } + ext.prefix = { "${meta.id}_gatk" } + publishDir = [ + enabled: false, + ] + } + + withName: '.*CALL_VARIANTS_GATK:GATK4_VARIANTFILTRATION' { + ext.args = { [ + '--window 3', + '--cluster 35', + '--filter-name "QD" --filter-expression "QD < 2.0"', + '--filter-name "FS" --filter-expression "FS > 30.0"' + ].join(' ').trim() } + publishDir = [ + path: { "${params.outdir}/call_variants" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: '.*CALL_VARIANTS:BCFTOOLS_MPILEUP' { + ext.args = { '--max-depth 2000' } + ext.args2 = { [ + '--variants-only', + params.bcftools_caller_mode.equals('multiallelic') + ? '--multiallelic-caller' + : '--consensus-caller --pval-threshold 0.01', + ].join(' ').trim() } + publishDir = [ + path: { "${params.outdir}/call_variants" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: '.*BCFTOOLS_STATS' { + publishDir = [ + path: { "${params.outdir}/call_variants" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } +} diff --git a/conf/modules/general_tomte.config b/conf/modules/general_tomte.config new file mode 100644 index 00000000..dfaaa9af --- /dev/null +++ b/conf/modules/general_tomte.config @@ -0,0 +1,49 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + + publishDir = [ + path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + + withName: SAMPLESHEET_CHECK { + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: FASTQC { + ext.args = { '--quiet' } + } + + withName: CUSTOM_DUMPSOFTWAREVERSIONS { + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, + mode: params.publish_dir_mode, + pattern: '*_versions.yml' + ] + } + + withName: 'MULTIQC' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + publishDir = [ + path: { "${params.outdir}/multiqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/igv_tracks.config b/conf/modules/igv_tracks.config new file mode 100644 index 00000000..ff29fa1a --- /dev/null +++ b/conf/modules/igv_tracks.config @@ -0,0 +1,41 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +// +// IGV_TRACKS +// + +process { + withName: '.*IGV_TRACKS:UCSC_WIGTOBIGWIG' { + ext.when = { params.switch_build_tracks } + ext.args = { '-clip' } + } + + withName: '.*IGV_TRACKS:JUNCTION_TRACK' { + ext.when = { params.switch_build_tracks } + publishDir = [ + path: { "${params.outdir}/ucsc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*IGV_TRACKS:TABIX_BGZIPTABIX' { + ext.when = { params.switch_build_tracks } + ext.args2 = { '--preset bed' } + publishDir = [ + path: { "${params.outdir}/ucsc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/prepare_references.config b/conf/modules/prepare_references.config new file mode 100644 index 00000000..9559090b --- /dev/null +++ b/conf/modules/prepare_references.config @@ -0,0 +1,174 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +// +// Genome preparation options +// + +process { + + withName: '.*PREPARE_REFERENCES:GUNZIP_FASTA' { + ext.when = {params.fasta.endsWith(".gz")} + publishDir = [ + path: { "${params.outdir}/references" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference, + pattern: "*.{fasta,fa}" + ] + } + + withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_GENOME' { + ext.when = { !params.fai } + ext.args2 = " --no-same-owner " + publishDir = [ + path: { "${params.outdir}/references" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference, + pattern: "*fai" + ] + } + + withName: '.*PREPARE_REFERENCES:BUILD_DICT' { + ext.when = { !params.sequence_dict } + publishDir = [ + path: { "${params.outdir}/references" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference, + pattern: "*dict" + ] + } + + withName: '.*PREPARE_REFERENCES:GET_RRNA_TRANSCRIPTS' { + publishDir = [ + path: { "${params.outdir}/references" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: '.*PREPARE_REFERENCES:GET_CHROM_SIZES' { + publishDir = [ + path: { "${params.outdir}/references" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: '.*PREPARE_REFERENCES:UNTAR_STAR_INDEX' { + ext.when = { params.star_index && params.star_index.endsWith(".gz") } + publishDir = [ + path: { "${params.outdir}/references" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: '.*PREPARE_REFERENCES:BUILD_STAR_GENOME' { + ext.when = { !params.star_index } + publishDir = [ + path: { "${params.outdir}/references" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: '.*PREPARE_REFERENCES:GUNZIP_GTF' { + ext.when = { params.gtf && params.gtf.endsWith(".gz") } + publishDir = [ + path: { "${params.outdir}/references" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference, + pattern: "*gtf" + ] + } + + withName: '.*PREPARE_REFERENCES:GTF_TO_REFFLAT' { + ext.args = { [ + '-genePredExt', + '-geneNameAsName2' + ].join(' ').trim() } + publishDir = [ + path: { "${params.outdir}/references" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference, + pattern: "*refflat" + ] + } + + withName: '.*PREPARE_REFERENCES:BEDTOINTERVALLIST' { + publishDir = [ + path: { "${params.outdir}/references" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference, + ] + } + + withName: '.*PREPARE_REFERENCES:UNTAR_VEP_CACHE' { + ext.when = { (params.vep_cache && params.vep_cache.endsWith("tar.gz")) } + publishDir = [ + enabled: false + ] + } + + withName: '.*PREPARE_REFERENCES:GUNZIP_TRFASTA' { + ext.when = { params.transcript_fasta && params.transcript_fasta.endsWith(".gz") } + publishDir = [ + path: { "${params.outdir}/references" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference, + pattern: "*.{fasta,fa}" + ] + } + + withName: '.*PREPARE_REFERENCES:GFFREAD' { + ext.when = { !params.transcript_fasta } + ext.args = { '-w' } + publishDir = [ + path: { "${params.outdir}/references" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference, + pattern: "*.{fasta,fa}" + ] + } + + withName: '.*PREPARE_REFERENCES:SALMON_INDEX' { + ext.when = { !params.salmon_index } + publishDir = [ + path: { "${params.outdir}/references" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: '.*PREPARE_REFERENCES:UNTAR_SALMON_INDEX' { + ext.when = { params.salmon_index.endsWith(".gz") } + publishDir = [ + path: { "${params.outdir}/references" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + +} diff --git a/nextflow.config b/nextflow.config index 8865f833..36ade18c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -260,7 +260,15 @@ manifest { } // Load modules.config for DSL2 module specific options -includeConfig 'conf/modules.config' +includeConfig 'conf/modules/alignment.config' +includeConfig 'conf/modules/allele_specific_calling.config' +includeConfig 'conf/modules/analyse_transcripts.config' +includeConfig 'conf/modules/annotate_snv.config' +includeConfig 'conf/modules/bam_qc.config' +includeConfig 'conf/modules/call_variants.config' +includeConfig 'conf/modules/igv_tracks.config' +includeConfig 'conf/modules/general_tomte.config' +includeConfig 'conf/modules/prepare_references.config' // Function to ensure that resource requirements don't go beyond // a maximum limit From 0e02f1a86cac8a466d9ba365bded6dbd3b097727 Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Tue, 28 Nov 2023 09:07:31 +0100 Subject: [PATCH 04/13] fix test.config --- conf/test.config | 4 ---- 1 file changed, 4 deletions(-) diff --git a/conf/test.config b/conf/test.config index 06deb617..c7a9bcba 100644 --- a/conf/test.config +++ b/conf/test.config @@ -23,10 +23,6 @@ params { // Input data input = "${projectDir}/test_data/samplesheet_chr21.csv" - switch_vep=false - switch_build_tracks=false - switch_stringtie=false - // Genome references genome = 'GRCh37' fasta = "${projectDir}/test_data/grch37_chr21.fa.gz" From a874b5b766cabbc5b09bc1085de463f836bee7e5 Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Tue, 28 Nov 2023 10:16:22 +0100 Subject: [PATCH 05/13] fix linting --- .nf-core.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.nf-core.yml b/.nf-core.yml index 52171fad..257c73b0 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -7,6 +7,7 @@ lint: - docs/images/tomte_logo.eps - docs/images/tomte_pipeline_metromap.eps - docs/images/tomte_pipeline_metromap.png + - conf/modules.config files_unchanged: false - assets/sendmail_template.txt - .github/CONTRIBUTING.md From f57dc08646949cdc271c9a46d2bdc62f8711bfd2 Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Tue, 28 Nov 2023 10:56:21 +0100 Subject: [PATCH 06/13] feat added option to change name of output --- bin/drop_filter_results.py | 42 +++++++++++++++++++++++----- modules/local/drop_filter_results.nf | 4 ++- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/bin/drop_filter_results.py b/bin/drop_filter_results.py index 64e4b95b..1defee59 100755 --- a/bin/drop_filter_results.py +++ b/bin/drop_filter_results.py @@ -31,7 +31,9 @@ def annotate_with_hgnc(df_family_aberrant_expression_top_hits: DataFrame, out_dr return df_genes.merge(df_family_aberrant_expression_top_hits, left_on="gene_id", right_on="geneID") -def filter_by_gene_panel(df_family_top_hits: DataFrame, gene_panel: str, module_name: str) -> DataFrame: +def filter_by_gene_panel( + df_family_top_hits: DataFrame, gene_panel: str, module_name: str, out_put_file_subfix: str +) -> DataFrame: """Filter out from results any gene that is not present in the provided gene panel.""" if gene_panel != "None": df_panel: DataFrame = read_csv( @@ -42,12 +44,16 @@ def filter_by_gene_panel(df_family_top_hits: DataFrame, gene_panel: str, module_ df_family_top_hits, left_on="hgnc_symbol", right_on="hgncSymbol" ) df_clinical = df_clinical.drop(columns=["hgnc_symbol"]) - file_name = f"{module_name}_provided_samples_top_hits_filtered.tsv" + file_name = f"{module_name}_{out_put_file_subfix}.tsv" df_clinical.to_csv(file_name, sep="\t", index=False, header=True) def filter_outrider_results( - samples: list, gene_panel: str, out_drop_aberrant_expression_rds: str, out_drop_gene_name: str + samples: list, + gene_panel: str, + out_drop_aberrant_expression_rds: str, + out_drop_gene_name: str, + out_put_file_subfix_ae: str, ): """ Filter results to get only those from the sample(s) provided. @@ -79,10 +85,14 @@ def filter_outrider_results( df_family_annotated_aberrant_expression_top_hits.to_csv( "OUTRIDER_provided_samples_top_hits.tsv", sep="\t", index=False, header=True ) - filter_by_gene_panel(df_family_annotated_aberrant_expression_top_hits, gene_panel, "OUTRIDER") + filter_by_gene_panel( + df_family_annotated_aberrant_expression_top_hits, gene_panel, "OUTRIDER", out_put_file_subfix_ae + ) -def filter_fraser_result(samples: list, gene_panel: str, out_drop_aberrant_splicing_tsv: str): +def filter_fraser_result( + samples: list, gene_panel: str, out_drop_aberrant_splicing_tsv: str, out_put_file_subfix_as: str +): """ Filter results to get only those from the sample(s) provided. Two tsvs will be outputed: @@ -97,7 +107,7 @@ def filter_fraser_result(samples: list, gene_panel: str, out_drop_aberrant_splic df_results_family_aberrant_splicing.to_csv( "FRASER_provided_samples_top_hits.tsv", sep="\t", index=False, header=True ) - filter_by_gene_panel(df_results_family_aberrant_splicing, gene_panel, "FRASER") + filter_by_gene_panel(df_results_family_aberrant_splicing, gene_panel, "FRASER", out_put_file_subfix_as) def parse_args(argv=None): @@ -134,6 +144,13 @@ def parse_args(argv=None): help="Path to gene name annotion, output from DROP Aberrant Expression", required=False, ) + parser.add_argument( + "--out_put_file_subfix_ae", + type=str, + default="provided_samples_top_hits_filtered", + help="Subfix of Aberrant Expression output file", + required=False, + ) parser.add_argument( "--out_drop_as_tsv", type=str, @@ -141,6 +158,13 @@ def parse_args(argv=None): help="Path to tsv output from DROP Aberrant Splicing", required=False, ) + parser.add_argument( + "--out_put_file_subfix_as", + type=str, + default="provided_samples_top_hits_filtered", + help="Subfix of Aberrant Splicing output file", + required=False, + ) parser.add_argument( "--version", action="version", @@ -157,9 +181,13 @@ def main(): gene_panel=args.gene_panel, out_drop_aberrant_expression_rds=args.drop_ae_rds, out_drop_gene_name=args.out_drop_gene_name, + out_put_file_subfix_ae=args.out_put_file_subfix_ae, ) filter_fraser_result( - samples=args.samples, gene_panel=args.gene_panel, out_drop_aberrant_splicing_tsv=args.out_drop_as_tsv + samples=args.samples, + gene_panel=args.gene_panel, + out_drop_aberrant_splicing_tsv=args.out_drop_as_tsv, + out_put_file_subfix_as=args.out_put_file_subfix_as, ) diff --git a/modules/local/drop_filter_results.nf b/modules/local/drop_filter_results.nf index 619c57c3..50ebc17d 100644 --- a/modules/local/drop_filter_results.nf +++ b/modules/local/drop_filter_results.nf @@ -39,7 +39,9 @@ process DROP_FILTER_RESULTS { $gene_panel_filter \\ $drop_ae_rds \\ $out_drop_gene_name \\ - $out_drop_as_tsv + $out_drop_as_tsv \\ + --out_put_file_subfix_as "provided_samples_top_hits_filtered" \\ + --out_put_file_subfix_ae "provided_samples_top_hits_filtered" cat <<-END_VERSIONS > versions.yml "${task.process}": From 66503464f9511b652e6fa36e87186b693a30c0a5 Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Mon, 4 Dec 2023 15:10:01 +0100 Subject: [PATCH 07/13] fix run prettier --- CHANGELOG.md | 10 +++++++--- docs/usage.md | 23 +++++++++++------------ 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c7ef269b..6d25c051 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,13 +4,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## 1.1.0 - Rudolph [xxxx-xx-xx] + Initial release of genomic-medicine-sweden/tomte, created with the [nf-core](https://nf-co.re/) template. ### `Added` -- switch_vep, switch_build_tracks and switch_stringtie to make the pipeline more versatile + +- switch_vep, switch_build_tracks and switch_stringtie to make the pipeline more versatile ### `Fixed` -- Names of other swtiches (subsample_region_switch, downsample_switch, run_drop_ae_switch and run_drop_as_switch) so that they all start with switch_ (switch_subsample_region, switch_downsample, switch_drop_ae and switch_drop_as) + +- Names of other swtiches (subsample*region_switch, downsample_switch, run_drop_ae_switch and run_drop_as_switch) so that they all start with switch* (switch_subsample_region, switch_downsample, switch_drop_ae and switch_drop_as) - Separated modules.config into smaller configs ### `Dependencies` @@ -20,11 +23,12 @@ Initial release of genomic-medicine-sweden/tomte, created with the [nf-core](htt ## 1.0.0 - Nisse [2023-11-06] ### `Added` + - Trim reads with FASTP - Read mapping with STAR - Transcript quantification with Salmon - Output junction tracks -- Output bigwig +- Output bigwig - Choice to subsample overrepresented regions with Samtools - Choice to downsample number of reads with Samtools - Detection of aberrant expression with DROP diff --git a/docs/usage.md b/docs/usage.md index f70822f3..50e388c2 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -163,19 +163,18 @@ The mandatory and optional parameters for each category are tabulated below. ##### 2. Junction track and bigwig -| Mandatory | Optional | -| ------------- | ------------------------------------ | -| | switch_build_tracks 1 | +| Mandatory | Optional | +| --------- | -------------------------------- | +| | switch_build_tracks 1 | 1 If it is not provided by the user, the default value is true - ##### 3. Subsample region -| Mandatory | Optional | -| ------------- | ------------------------------------ | -| subsample_bed | switch_subsample_region1 | -| | seed_frac2 | +| Mandatory | Optional | +| ------------- | ----------------------------------- | +| subsample_bed | switch_subsample_region1 | +| | seed_frac2 | 1 If it is not provided by the user, the default value is true 2 If it is not provided by the user, the default value is 0.001 @@ -198,17 +197,17 @@ The mandatory and optional parameters for each category are tabulated below. | | vep_cache_version2 | | | vep_filters | -1 If it is not provided by the user, the default value is true +1 If it is not provided by the user, the default value is true 2 For the time being, only 107 is suported #### 6. Stringtie & gffcompare | Mandatory | Optional | -| --------- | -----------------------------| +| --------- | ---------------------------- | | fasta | switch_stringtie1 | | gtf | | -1 If it is not provided by the user, the default value is true +1 If it is not provided by the user, the default value is true #### 7. DROP @@ -216,7 +215,7 @@ DROP - aberrant expression | Mandatory | Optional | | ------------------------------------- | --------------------------------- | -| reference_drop_annot_file1 | switch_drop_ae2 | +| reference_drop_annot_file1 | switch_drop_ae2 | | reference_drop_count_file | drop_group_samples_ae3 | | fasta | drop_padjcutoff_ae4 | | gtf | drop_zscorecutoff5 | From d0c285c1250196885c78b2c8df1be2ea3aa805a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luc=C3=ADa=20Pe=C3=B1a-P=C3=A9rez?= Date: Tue, 5 Dec 2023 17:49:29 +0100 Subject: [PATCH 08/13] Apply suggestions from code review Co-authored-by: Anders Jemt --- CHANGELOG.md | 2 +- bin/drop_filter_results.py | 16 ++++++++-------- modules/local/drop_filter_results.nf | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d25c051..42f0c633 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ Initial release of genomic-medicine-sweden/tomte, created with the [nf-core](htt ### `Fixed` -- Names of other swtiches (subsample*region_switch, downsample_switch, run_drop_ae_switch and run_drop_as_switch) so that they all start with switch* (switch_subsample_region, switch_downsample, switch_drop_ae and switch_drop_as) +- Renamed the other switches (subsample_region_switch, downsample_switch, run_drop_ae_switch and run_drop_as_switch) so that they all start with switch* (switch_subsample_region, switch_downsample, switch_drop_ae and switch_drop_as) - Separated modules.config into smaller configs ### `Dependencies` diff --git a/bin/drop_filter_results.py b/bin/drop_filter_results.py index 1defee59..d7890186 100755 --- a/bin/drop_filter_results.py +++ b/bin/drop_filter_results.py @@ -32,7 +32,7 @@ def annotate_with_hgnc(df_family_aberrant_expression_top_hits: DataFrame, out_dr def filter_by_gene_panel( - df_family_top_hits: DataFrame, gene_panel: str, module_name: str, out_put_file_subfix: str + df_family_top_hits: DataFrame, gene_panel: str, module_name: str, output_file_subfix: str ) -> DataFrame: """Filter out from results any gene that is not present in the provided gene panel.""" if gene_panel != "None": @@ -44,7 +44,7 @@ def filter_by_gene_panel( df_family_top_hits, left_on="hgnc_symbol", right_on="hgncSymbol" ) df_clinical = df_clinical.drop(columns=["hgnc_symbol"]) - file_name = f"{module_name}_{out_put_file_subfix}.tsv" + file_name = f"{module_name}_{output_file_subfix}.tsv" df_clinical.to_csv(file_name, sep="\t", index=False, header=True) @@ -53,7 +53,7 @@ def filter_outrider_results( gene_panel: str, out_drop_aberrant_expression_rds: str, out_drop_gene_name: str, - out_put_file_subfix_ae: str, + output_file_subfix_ae: str, ): """ Filter results to get only those from the sample(s) provided. @@ -86,12 +86,12 @@ def filter_outrider_results( "OUTRIDER_provided_samples_top_hits.tsv", sep="\t", index=False, header=True ) filter_by_gene_panel( - df_family_annotated_aberrant_expression_top_hits, gene_panel, "OUTRIDER", out_put_file_subfix_ae + df_family_annotated_aberrant_expression_top_hits, gene_panel, "OUTRIDER", output_file_subfix_ae ) def filter_fraser_result( - samples: list, gene_panel: str, out_drop_aberrant_splicing_tsv: str, out_put_file_subfix_as: str + samples: list, gene_panel: str, out_drop_aberrant_splicing_tsv: str, output_file_subfix_as: str ): """ Filter results to get only those from the sample(s) provided. @@ -107,7 +107,7 @@ def filter_fraser_result( df_results_family_aberrant_splicing.to_csv( "FRASER_provided_samples_top_hits.tsv", sep="\t", index=False, header=True ) - filter_by_gene_panel(df_results_family_aberrant_splicing, gene_panel, "FRASER", out_put_file_subfix_as) + filter_by_gene_panel(df_results_family_aberrant_splicing, gene_panel, "FRASER", output_file_subfix_as) def parse_args(argv=None): @@ -145,7 +145,7 @@ def parse_args(argv=None): required=False, ) parser.add_argument( - "--out_put_file_subfix_ae", + "--output_file_subfix_ae", type=str, default="provided_samples_top_hits_filtered", help="Subfix of Aberrant Expression output file", @@ -159,7 +159,7 @@ def parse_args(argv=None): required=False, ) parser.add_argument( - "--out_put_file_subfix_as", + "--output_file_subfix_as", type=str, default="provided_samples_top_hits_filtered", help="Subfix of Aberrant Splicing output file", diff --git a/modules/local/drop_filter_results.nf b/modules/local/drop_filter_results.nf index 50ebc17d..3065dd26 100644 --- a/modules/local/drop_filter_results.nf +++ b/modules/local/drop_filter_results.nf @@ -40,8 +40,8 @@ process DROP_FILTER_RESULTS { $drop_ae_rds \\ $out_drop_gene_name \\ $out_drop_as_tsv \\ - --out_put_file_subfix_as "provided_samples_top_hits_filtered" \\ - --out_put_file_subfix_ae "provided_samples_top_hits_filtered" + --output_file_subfix_as "provided_samples_top_hits_filtered" \\ + --output_file_subfix_ae "provided_samples_top_hits_filtered" cat <<-END_VERSIONS > versions.yml "${task.process}": From 78fbc82e583c35afae3140b07ab54a1f45be2c64 Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Tue, 5 Dec 2023 17:53:37 +0100 Subject: [PATCH 09/13] fix run prettier --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 42f0c633..060eb74b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ Initial release of genomic-medicine-sweden/tomte, created with the [nf-core](htt ### `Fixed` -- Renamed the other switches (subsample_region_switch, downsample_switch, run_drop_ae_switch and run_drop_as_switch) so that they all start with switch* (switch_subsample_region, switch_downsample, switch_drop_ae and switch_drop_as) +- Renamed the other switches (subsample_region_switch, downsample_switch, run_drop_ae_switch and run_drop_as_switch) so that they all start with switch\* (switch_subsample_region, switch_downsample, switch_drop_ae and switch_drop_as) - Separated modules.config into smaller configs ### `Dependencies` From 5c7cdf565205877fb28d14bd146aa1a1b52c2422 Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Tue, 5 Dec 2023 19:10:00 +0100 Subject: [PATCH 10/13] fix variable name --- bin/drop_filter_results.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/drop_filter_results.py b/bin/drop_filter_results.py index d7890186..db0137f9 100755 --- a/bin/drop_filter_results.py +++ b/bin/drop_filter_results.py @@ -181,7 +181,7 @@ def main(): gene_panel=args.gene_panel, out_drop_aberrant_expression_rds=args.drop_ae_rds, out_drop_gene_name=args.out_drop_gene_name, - out_put_file_subfix_ae=args.out_put_file_subfix_ae, + output_file_subfix_ae=args.output_file_subfix_ae, ) filter_fraser_result( samples=args.samples, From 38d96a23f296717a49d8adcddd55c2c19859036f Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Fri, 8 Dec 2023 23:31:48 +0100 Subject: [PATCH 11/13] fix variable name --- bin/drop_filter_results.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/drop_filter_results.py b/bin/drop_filter_results.py index db0137f9..f99eb17b 100755 --- a/bin/drop_filter_results.py +++ b/bin/drop_filter_results.py @@ -187,7 +187,7 @@ def main(): samples=args.samples, gene_panel=args.gene_panel, out_drop_aberrant_splicing_tsv=args.out_drop_as_tsv, - out_put_file_subfix_as=args.out_put_file_subfix_as, + output_file_subfix_as=args.output_file_subfix_as, ) From ee058694a346a401ca2c60182e31ff7f21fe6c97 Mon Sep 17 00:00:00 2001 From: "lucia.pena.perez@scilifelab.se" Date: Tue, 12 Dec 2023 02:35:46 +0100 Subject: [PATCH 12/13] fix name of file and case id --- bin/drop_filter_results.py | 22 +++++++++++++++++----- modules/local/drop_filter_results.nf | 3 +++ subworkflows/local/analyse_transcripts.nf | 2 ++ subworkflows/local/input_check.nf | 2 +- test_data/samplesheet_chr21.csv | 2 +- workflows/tomte.nf | 3 ++- 6 files changed, 26 insertions(+), 8 deletions(-) diff --git a/bin/drop_filter_results.py b/bin/drop_filter_results.py index f99eb17b..644f46b5 100755 --- a/bin/drop_filter_results.py +++ b/bin/drop_filter_results.py @@ -32,9 +32,11 @@ def annotate_with_hgnc(df_family_aberrant_expression_top_hits: DataFrame, out_dr def filter_by_gene_panel( - df_family_top_hits: DataFrame, gene_panel: str, module_name: str, output_file_subfix: str + df_family_top_hits: DataFrame, gene_panel: str, module_name: str, case_id: str, output_file_subfix: str ) -> DataFrame: """Filter out from results any gene that is not present in the provided gene panel.""" + if case_id != "": + case_id_ = f"{case_id}_" if gene_panel != "None": df_panel: DataFrame = read_csv( gene_panel, sep="\t", names=GENE_PANEL_HEADER, header=None, comment="#", index_col=False @@ -44,7 +46,7 @@ def filter_by_gene_panel( df_family_top_hits, left_on="hgnc_symbol", right_on="hgncSymbol" ) df_clinical = df_clinical.drop(columns=["hgnc_symbol"]) - file_name = f"{module_name}_{output_file_subfix}.tsv" + file_name = f"{case_id_}{module_name}_{output_file_subfix}.tsv" df_clinical.to_csv(file_name, sep="\t", index=False, header=True) @@ -53,6 +55,7 @@ def filter_outrider_results( gene_panel: str, out_drop_aberrant_expression_rds: str, out_drop_gene_name: str, + case_id: str, output_file_subfix_ae: str, ): """ @@ -86,12 +89,12 @@ def filter_outrider_results( "OUTRIDER_provided_samples_top_hits.tsv", sep="\t", index=False, header=True ) filter_by_gene_panel( - df_family_annotated_aberrant_expression_top_hits, gene_panel, "OUTRIDER", output_file_subfix_ae + df_family_annotated_aberrant_expression_top_hits, gene_panel, "OUTRIDER", case_id, output_file_subfix_ae ) def filter_fraser_result( - samples: list, gene_panel: str, out_drop_aberrant_splicing_tsv: str, output_file_subfix_as: str + samples: list, gene_panel: str, out_drop_aberrant_splicing_tsv: str, case_id: str, output_file_subfix_as: str ): """ Filter results to get only those from the sample(s) provided. @@ -107,7 +110,7 @@ def filter_fraser_result( df_results_family_aberrant_splicing.to_csv( "FRASER_provided_samples_top_hits.tsv", sep="\t", index=False, header=True ) - filter_by_gene_panel(df_results_family_aberrant_splicing, gene_panel, "FRASER", output_file_subfix_as) + filter_by_gene_panel(df_results_family_aberrant_splicing, gene_panel, "FRASER", case_id, output_file_subfix_as) def parse_args(argv=None): @@ -144,6 +147,13 @@ def parse_args(argv=None): help="Path to gene name annotion, output from DROP Aberrant Expression", required=False, ) + parser.add_argument( + "--case_id", + type=str, + default="", + help="Case id", + required=False, + ) parser.add_argument( "--output_file_subfix_ae", type=str, @@ -181,12 +191,14 @@ def main(): gene_panel=args.gene_panel, out_drop_aberrant_expression_rds=args.drop_ae_rds, out_drop_gene_name=args.out_drop_gene_name, + case_id=args.case_id, output_file_subfix_ae=args.output_file_subfix_ae, ) filter_fraser_result( samples=args.samples, gene_panel=args.gene_panel, out_drop_aberrant_splicing_tsv=args.out_drop_as_tsv, + case_id=args.case_id, output_file_subfix_as=args.output_file_subfix_as, ) diff --git a/modules/local/drop_filter_results.nf b/modules/local/drop_filter_results.nf index 3065dd26..dd8106d4 100644 --- a/modules/local/drop_filter_results.nf +++ b/modules/local/drop_filter_results.nf @@ -11,6 +11,7 @@ process DROP_FILTER_RESULTS { input: val(samples) + val(case_info) path gene_panel_clinical_filter path out_drop_ae_rds_in path out_drop_gene_name_in @@ -28,6 +29,7 @@ process DROP_FILTER_RESULTS { script: def ids = "${samples.id}".replace("[","").replace("]","").replace(",","") + def case_id = "${case_info.case_id}".replace("[","").replace("]","").replace(",","") def gene_panel_filter = gene_panel_clinical_filter ? "--gene_panel ${gene_panel_clinical_filter}" : '' def drop_ae_rds = out_drop_ae_rds_in ? "--drop_ae_rds ${out_drop_ae_rds_in}" : '' def out_drop_gene_name = out_drop_gene_name_in ? "--out_drop_gene_name ${out_drop_gene_name_in}" : '' @@ -40,6 +42,7 @@ process DROP_FILTER_RESULTS { $drop_ae_rds \\ $out_drop_gene_name \\ $out_drop_as_tsv \\ + --case $case_id \\ --output_file_subfix_as "provided_samples_top_hits_filtered" \\ --output_file_subfix_ae "provided_samples_top_hits_filtered" diff --git a/subworkflows/local/analyse_transcripts.nf b/subworkflows/local/analyse_transcripts.nf index 10c9927a..3923db36 100644 --- a/subworkflows/local/analyse_transcripts.nf +++ b/subworkflows/local/analyse_transcripts.nf @@ -26,6 +26,7 @@ workflow ANALYSE_TRANSCRIPTS { drop_padjcutoff_as // channel [val(drop_padjcutoff_as)] drop_zscorecutoff // channel [val(drop_zscorecutoff)] ch_gene_panel_clinical_filter // channel [ path(tsv) ] + case_info // channel [val(case_id)] main: ch_versions = Channel.empty() @@ -83,6 +84,7 @@ workflow ANALYSE_TRANSCRIPTS { DROP_FILTER_RESULTS( star_samples, + case_info, ch_gene_panel_clinical_filter, ch_out_drop_ae_rds, ch_out_drop_gene_name, diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 1b8f9283..791bb49a 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -51,7 +51,7 @@ def create_fastq_channel(LinkedHashMap row) { // Function to get a list of metadata (e.g. case id) for the case [ meta ] def create_case_channel(LinkedHashMap row) { def case_info = [:] - case_info.id = row.case_id + case_info.case = row.case return case_info } diff --git a/test_data/samplesheet_chr21.csv b/test_data/samplesheet_chr21.csv index 0e5e5d02..c7a52e0b 100644 --- a/test_data/samplesheet_chr21.csv +++ b/test_data/samplesheet_chr21.csv @@ -1,3 +1,3 @@ case,sample,fastq_1,fastq_2,strandedness finequagga,ACC5963A1,https://github.com/genomic-medicine-sweden/tomte/raw/master/test_data/ACC5963A1_sub_1.fastq.gz,https://github.com/genomic-medicine-sweden/tomte/raw/master/test_data/ACC5963A1_sub_2.fastq.gz,reverse -sharpparrot,ACC5963A2,https://github.com/genomic-medicine-sweden/tomte/raw/master/test_data/ACC5963A2_sub_1.fastq.gz,https://github.com/genomic-medicine-sweden/tomte/raw/master/test_data/ACC5963A2_sub_2.fastq.gz,reverse +finequagga,ACC5963A2,https://github.com/genomic-medicine-sweden/tomte/raw/master/test_data/ACC5963A2_sub_1.fastq.gz,https://github.com/genomic-medicine-sweden/tomte/raw/master/test_data/ACC5963A2_sub_2.fastq.gz,reverse diff --git a/workflows/tomte.nf b/workflows/tomte.nf index 5c8f7b88..8a6e99a0 100644 --- a/workflows/tomte.nf +++ b/workflows/tomte.nf @@ -207,7 +207,8 @@ workflow TOMTE { params.drop_padjcutoff_ae, params.drop_padjcutoff_as, params.drop_zscorecutoff, - ch_gene_panel_clinical_filter + ch_gene_panel_clinical_filter, + CHECK_INPUT.out.case_info ) ch_versions = ch_versions.mix(ANALYSE_TRANSCRIPTS.out.versions) From 00bab64616bc44a3e77fe8891519278df98ac7d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luc=C3=ADa=20Pe=C3=B1a-P=C3=A9rez?= Date: Tue, 12 Dec 2023 09:32:25 -0600 Subject: [PATCH 13/13] Apply suggestions from code review Co-authored-by: Anders Jemt --- bin/drop_filter_results.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/drop_filter_results.py b/bin/drop_filter_results.py index 644f46b5..ed169ff1 100755 --- a/bin/drop_filter_results.py +++ b/bin/drop_filter_results.py @@ -36,7 +36,7 @@ def filter_by_gene_panel( ) -> DataFrame: """Filter out from results any gene that is not present in the provided gene panel.""" if case_id != "": - case_id_ = f"{case_id}_" + case_id += "_" if gene_panel != "None": df_panel: DataFrame = read_csv( gene_panel, sep="\t", names=GENE_PANEL_HEADER, header=None, comment="#", index_col=False @@ -46,7 +46,7 @@ def filter_by_gene_panel( df_family_top_hits, left_on="hgnc_symbol", right_on="hgncSymbol" ) df_clinical = df_clinical.drop(columns=["hgnc_symbol"]) - file_name = f"{case_id_}{module_name}_{output_file_subfix}.tsv" + file_name = f"{case_id}{module_name}_{output_file_subfix}.tsv" df_clinical.to_csv(file_name, sep="\t", index=False, header=True)