diff --git a/CHANGELOG.md b/CHANGELOG.md index 643632a7..69d8dea8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ Initial release of nf-core/phaseimpute, created with the [nf-core](https://nf-co - [#51](https://github.com/nf-core/phaseimpute/pull/51) - Update all process and fix linting errors. Remove fastqc added by the template. - [#56](https://github.com/nf-core/phaseimpute/pull/56) - Move to nf-test to check the output files names generated. Fix validation and concatenation by chromosomes missing. Add dedicated GLIMPSE1 subworkflow. Fix posfile generation to be done once for glimpse and stitch. - [#68](https://github.com/nf-core/phaseimpute/pull/68) - QUILT can handle external params chunks and hap-legend files. +- [#78](https://github.com/nf-core/phaseimpute/pull/78) - Separate validate step from panel preparation. ### `Fixed` diff --git a/assets/schema_posfile.json b/assets/schema_posfile.json index 98634b76..c44247ff 100644 --- a/assets/schema_posfile.json +++ b/assets/schema_posfile.json @@ -11,7 +11,7 @@ "type": "string", "pattern": "^\\S+$", "errorMessage": "Panel name must be provided as a string and cannot contain spaces", - "meta": ["panel"] + "meta": ["id"] }, "chr": { "type": "string", diff --git a/conf/test_validate.config b/conf/test_validate.config index 332aca75..98641844 100644 --- a/conf/test_validate.config +++ b/conf/test_validate.config @@ -25,10 +25,9 @@ params { input_region = "${projectDir}/tests/csv/region.csv" // Genome references - fasta = params.pipelines_testdata_base_path + "reference_genome/21_22/hs38DH.chr21_22.fa" - panel = "${projectDir}/tests/csv/panel.csv" - phased = true - map = "${projectDir}/tests/csv/map.csv" + fasta = params.pipelines_testdata_base_path + "reference_genome/21_22/hs38DH.chr21_22.fa" + posfile = "${projectDir}/tests/csv/posfile.csv" + map = "${projectDir}/tests/csv/map.csv" // Pipeline steps steps = "validate" diff --git a/docs/usage.md b/docs/usage.md index a3cc2baf..b8d166a7 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -320,12 +320,13 @@ Make sure the csv with the input panel is the output from `--step panelprep` or ### Start with validation `--steps validate` This steps compares a _truth_ VCF to an _imputed_ VCF in order to compute imputation accuracy. +This also needs the frequency of the alleles. They can be computed from the reference panel by running the `--steps panelprep` and using the `--panel` with the `--compute_freq` flag ; or by using `--posfile samplesheet.csv`. ```bash nextflow run nf-core/phaseimpute --input samplesheet.csv --input_truth truth.csv --steps validate --outdir results --genome GRCh37 -profile docker ``` -The required flags for this mode are: +The required flags for this mode only are: - `--steps validate`: The steps to run. - `--input samplesheet.csv`: The samplesheet containing the input sample files in `vcf` format. diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index 272125d0..5ab1cf6a 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -216,8 +216,7 @@ workflow PIPELINE_INITIALISATION { // if (params.posfile) { ch_posfile = Channel - .fromSamplesheet("posfile") - .map {meta, vcf, csi, txt -> [ meta, vcf, csi, txt ]} + .fromSamplesheet("posfile") } else { ch_posfile = [[],[]] } diff --git a/tests/csv/posfile.csv b/tests/csv/posfile.csv index 437aa776..b2a113bd 100644 --- a/tests/csv/posfile.csv +++ b/tests/csv/posfile.csv @@ -1,3 +1,3 @@ panel,chr,vcf,index,txt 1000GP.s.norel,chr21,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.sites.vcf.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.sites.vcf.gz.csi","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.tsv.gz" -1000GP.s.norel,chr22,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.sites.vcf.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21/1000GP.chr21.s.norel.sites.vcf.gz.csi","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.tsv.gz" +1000GP.s.norel,chr22,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.sites.vcf.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.sites.vcf.gz.csi","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/22/1000GP.chr22.s.norel.tsv.gz" diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 5ac848e5..12933520 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -83,7 +83,7 @@ workflow PHASEIMPUTE { ch_region // channel: region to use [ [chr, region], region] ch_depth // channel: depth select [ [depth], depth ] ch_map // channel: genetic map [ [chr], map] - ch_posfile // channel: posfile [ [chr], txt] + ch_posfile // channel: posfile [ [chr], vcf, txt] ch_chunks // channel: chunks [ [chr], txt] ch_versions // channel: versions of software used @@ -135,7 +135,7 @@ workflow PHASEIMPUTE { // // Prepare panel // - if (params.steps.split(',').contains("panelprep") || params.steps.split(',').contains("validate") || params.steps.split(',').contains("all")) { + if (params.steps.split(',').contains("panelprep") || params.steps.split(',').contains("all")) { // Check chr prefix and remove if necessary VCF_CHR_CHECK(ch_panel, ch_fasta) ch_versions = ch_versions.mix(VCF_CHR_CHECK.out.versions)