Skip to content

Commit

Permalink
Merge pull request nf-core#47 from atrigila/add_glimpse2
Browse files Browse the repository at this point in the history
Add new functionality (sample removal, glimpse2 chunking)
  • Loading branch information
atrigila authored May 13, 2024
2 parents 9b10d86 + f248788 commit 70ff474
Show file tree
Hide file tree
Showing 21 changed files with 569 additions and 211 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Initial release of nf-core/phaseimpute, created with the [nf-core](https://nf-co
- [#20](https://github.com/nf-core/phaseimpute/pull/20) - Added automatic detection of vcf contigs for the reference panel and automatic renaming available
- [#22](https://github.com/nf-core/phaseimpute/pull/20) - Add validation step for concordance analysis. Input channels changed to match inputs steps. Outdir folder organised by steps. Modules config by subworkflows.
- [#26](https://github.com/nf-core/phaseimpute/pull/26) - Added QUILT method
- [#47](https://github.com/nf-core/phaseimpute/pull/47) - Add possibility to remove samples from reference panel. Add glimpse2 chunking method. Add full-size test parameters.

### `Changed`

Expand All @@ -21,6 +22,7 @@ Initial release of nf-core/phaseimpute, created with the [nf-core](https://nf-co
- correct meta map propagation
- Test impute and test sim works
- [#19](https://github.com/nf-core/phaseimpute/pull/19) - Changed reference panel to accept a csv, update modules and subworkflows (glimpse1/2 and shapeit5)
- [#40](https://github.com/nf-core/phaseimpute/pull/40) - Add STITCH method. Reorganize panelprep subworkflows.
- [#51](https://github.com/nf-core/phaseimpute/pull/51) - Update all process and fix linting errors. Remove fastqc added by the template.

### `Fixed`
Expand Down
56 changes: 43 additions & 13 deletions README.md

Large diffs are not rendered by default.

33 changes: 0 additions & 33 deletions conf/steps/imputation.config

This file was deleted.

34 changes: 16 additions & 18 deletions conf/steps/imputation_quilt.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,26 +20,24 @@ process {
]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:MAKE_CHUNKS:.*' {

ext.prefix = { "${meta.id}_${meta.chr}" }

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:.*' {
publishDir = [
[
path: { "${params.outdir}/imputation/quilt/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}_chunk" },
mode: params.publish_dir_mode,
enabled: false
],


path: { "${params.outdir}/prep_panel/chunks/" },
mode: params.publish_dir_mode,
enabled: true
]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:MAKE_CHUNKS:GLIMPSE_CHUNK' {
ext.prefix = { "${meta.id}_${meta.chr}" }
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE_CHUNK' {
ext.prefix = { "${meta.panel}_${meta.chr}_chunks_glimpse1" }
publishDir = [
path: { "${params.outdir}/prep_panel/chunks/glimpse1/" },
mode: params.publish_dir_mode,
enabled: true
]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:IMPUTE_QUILT:.*' {
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT:.*' {
publishDir = [
[
path: { "${params.outdir}/imputation/quilt/" },
Expand All @@ -48,22 +46,22 @@ process {
]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:IMPUTE_QUILT:QUILT_QUILT' {
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT:QUILT_QUILT' {
ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.impute" }
publishDir = [enabled: false]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:IMPUTE_QUILT:BCFTOOLS_INDEX_1' {
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT:BCFTOOLS_INDEX_1' {
ext.args = "--tbi"
publishDir = [enabled: false]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:IMPUTE_QUILT:BCFTOOLS_ANNOTATE' {
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT:BCFTOOLS_ANNOTATE' {
ext.args = "--set-id '%CHROM:%POS:%REF:%ALT' -Oz"
ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.impute.annotate" }
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:IMPUTE_QUILT:BCFTOOLS_INDEX_2' {
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT:BCFTOOLS_INDEX_2' {
ext.args = "--tbi"
}

Expand Down
127 changes: 85 additions & 42 deletions conf/steps/panel_prep.config
Original file line number Diff line number Diff line change
Expand Up @@ -28,40 +28,62 @@ process {
publishDir = [ enabled: false ]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_NORM' {
// Subworkflow: VCF_NORMALIZE_BCFTOOLS

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:.*' {
publishDir = [ enabled: false ]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_NORM' {
ext.args = '-m +any --no-version --output-type z'
ext.prefix = { "${meta.id}_${meta.chr}_multiallelic" }
maxRetries = 2
publishDir = [ enabled: false ]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_INDEX' {
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_INDEX' {
ext.args = "--tbi"
publishDir = [enabled: false]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_VIEW' {
ext.args = '-v snps -Oz'
ext.prefix = { "${meta.id}_${meta.chr}_biallelic" }
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_VIEW' {
ext.args = '-v snps -m 2 -M 2 -Oz'
ext.prefix = { "${meta.id}_${meta.chr}_biallelic_snps" }
maxRetries = 2
publishDir = [ enabled: false ]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_INDEX_2' {
ext.args = '--tbi'
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_INDEX_2' {
ext.args = "--tbi"
publishDir = [enabled: false]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_CONVERT' {
ext.args = {"--haplegendsample ${meta.id}_${meta.chr}"}
maxRetries = 2
publishDir = [
path: { "${params.outdir}/prep_panel/haplegend/" },
mode: params.publish_dir_mode,
enabled: true
]
}

// (Optional) Subworkflow: Remove samples from panel
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_REMOVE' {
ext.args = { "-Oz -s^${params.remove_samples}" }
ext.prefix = { "${meta.id}_${meta.chr}_biallelic_removed_samples" }
maxRetries = 2
publishDir = [ enabled: false ]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:VIEW_VCF_SNPS' {
ext.args = [
"-m 2",
"-M 2",
"-v snps",
"--output-type z",
"--no-version"
].join(' ')
ext.prefix = { "${meta.id}_SNPS" }
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_INDEX_4' {
ext.args = "--tbi"
publishDir = [enabled: false]
}

// Subworkflow: VCF_SITES_EXTRACT_BCFTOOLS

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:.*' {
publishDir = [ enabled: false ]
}

Expand All @@ -74,7 +96,17 @@ process {
"--output-type z",
"--no-version"
].join(' ')
ext.prefix = { "${meta.id}_C${meta.chr}_SITES" }
ext.prefix = { "${meta.id}_${meta.chr}_glimpse1_sites" }
publishDir = [
path: { "${params.outdir}/prep_panel/sites/vcf/" },
mode: params.publish_dir_mode,
enabled: true
]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:BCFTOOLS_INDEX_2' {
maxRetries = 2
ext.prefix = { "${meta.id}_${meta.chr}_glimpse1_sites" }
publishDir = [
path: { "${params.outdir}/prep_panel/sites/vcf/" },
mode: params.publish_dir_mode,
Expand All @@ -86,7 +118,12 @@ process {
ext.args = [
"-f'%CHROM\t%POS\t%REF,%ALT\\n'",
].join(' ')
ext.prefix = { "${meta.id}_glimpse_SITES_TSV" }
ext.prefix = { "${meta.id}_${meta.chr}_glimpse1_sites_tsv" }
publishDir = [ enabled: false ]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:TABIX_BGZIP' {
ext.prefix = { "${meta.id}_${meta.chr}_glimpse1_sites_tsv" }
publishDir = [
path: { "${params.outdir}/prep_panel/sites/tsv/" },
mode: params.publish_dir_mode,
Expand All @@ -100,26 +137,16 @@ process {
"-b2",
"-e2"
].join(' ')
ext.prefix = { "${meta.id}_glimpse_SITES_TSV" }
ext.prefix = { "${meta.id}_${meta.chr}_glimpse1_sites_tsv" }
publishDir = [
path: { "${params.outdir}/prep_panel/sites/tsv/" },
mode: params.publish_dir_mode,
enabled: true
]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_CONVERT' {
ext.args = {"--haplegendsample ${meta.id}_${meta.chr}"}
maxRetries = 2
publishDir = [
path: { "${params.outdir}/prep_panel/haplegend/" },
mode: params.publish_dir_mode,
enabled: true
]
}

// Phasing
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_PHASE_PANEL:VCF_PHASE_SHAPEIT5:BEDTOOLS_MAKEWINDOWS' {
// (Optional) Subworkflow: Phasing
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_PHASE_PANEL:VCF_PHASE_SHAPEIT5:BEDTOOLS_MAKEWINDOWS' {
ext.args = [
'-w 60000',
'-s 40000'
Expand All @@ -128,25 +155,41 @@ process {
publishDir = [ enabled: false ]
}

// TSV
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:BCFTOOLS_QUERY_STITCH' {
ext.args = [
"-f'%CHROM\t%POS\t%REF\t%ALT\\n'",
].join(' ')
ext.prefix = { "${meta.id}_${meta.chr}_posfile_stitch" }
// Subworkflow: Concat phased panel
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_PANEL:.*' {
publishDir = [ enabled: false ]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:GAWK_STITCH' {
ext.args = "'{ key = \$1 FS \$2 } !seen[key]++'"
ext.prefix = { "${meta.id}_${meta.chr}_posfile_stitch" }
ext.suffix = "txt"
// Subworkflow: Make chunks

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:.*' {
publishDir = [
path: { "${params.outdir}/prep_panel/sites/tsv/" },
path: { "${params.outdir}/prep_panel/chunks/" },
mode: params.publish_dir_mode,
enabled: true
]
}


withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE2_CHUNK' {
ext.args = [
"--window-mb 2.0"
].join(' ')
ext.prefix = { "${meta.panel}_${meta.chr}_chunks_glimpse2" }
publishDir = [
path: { "${params.outdir}/prep_panel/chunks/glimpse2/" },
mode: params.publish_dir_mode,
enabled: true
]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE2_SPLITREFERENCE' {
ext.prefix = { "${meta.panel}_${meta.chr}_chunks_glimpse2" }
publishDir = [
path: { "${params.outdir}/prep_panel/chunks/glimpse2/" },
mode: params.publish_dir_mode,
enabled: true
]
}

}
9 changes: 6 additions & 3 deletions conf/test_full.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ params {
config_profile_description = 'Full test dataset to check pipeline function'

// Genome references
map = "https://bochet.gcc.biostat.washington.edu/beagle/genetic_maps/plink.GRCh38.map.zip"
//map = "https://bochet.gcc.biostat.washington.edu/beagle/genetic_maps/plink.GRCh38.map.zip"
genome = "GRCh38"

// Resources increase incompatible with Github Action
Expand All @@ -26,6 +26,9 @@ params {
// Input data
input = "${projectDir}/tests/csv/sample_sim_full.csv"
panel = "${projectDir}/tests/csv/panel_full.csv"
input_region_string = "all"
step = "simulate"
step = "all"

// Settings
tools = "glimpse1"
remove_samples = "NA12878,NA12891,NA12892"
}
33 changes: 33 additions & 0 deletions conf/test_glimpse2.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.
Use as follows:
nextflow run nf-core/phaseimpute -profile test_glimpse2,<docker/singularity> --outdir <OUTDIR>
----------------------------------------------------------------------------------------
*/

params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset to check pipeline function with GLIMPSE2'

// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '2.GB'
max_time = '1.h'

// Input data
input = "${projectDir}/tests/csv/sample_bam.csv"

// Genome references
fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/reference_genome/21_22/hs38DH.chr21_22.fa"
panel = "${projectDir}/tests/csv/panel.csv"
phased = true

// Impute parameters
step = "panelprep,impute"
tools = "glimpse2"
}
Loading

0 comments on commit 70ff474

Please sign in to comment.