From 0206c4f4182422bc055c61aad89040114d8a73b6 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 17 Mar 2021 14:01:21 +0100 Subject: [PATCH 1/7] Update example config file and add to tests --- .github/workflows/ci.yml | 1 + config/example.json | 40 ++++++++++++---------------------------- tests/test_sanity.yml | 6 ++++++ 3 files changed, 19 insertions(+), 28 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7116bf2..2350038 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,6 +23,7 @@ jobs: - sanity-targets-only - sanity-samples-overlapping-name - sanity-multisample + - sanity-example-config - dry-run-vanilla - dry-run-target-baits diff --git a/config/example.json b/config/example.json index 139a68a..0c9e189 100644 --- a/config/example.json +++ b/config/example.json @@ -1,31 +1,15 @@ { - "samples": { - "sample_01": { - "read_groups": { - "lib_l1": { - "R1": "1.fq.gz", - "R2": "2.fq.gz" - }, - "lib_l2": { - "R1": "1.1.fq.gz", - "R2": "1.2.fq.gz" - } - } - }, - "sample_02": { - "read_groups": { - "lib_l1": { - "R1": "3.1.fq.gz", - "R2": "3.2.fq.gz" - } - } + "samples": { + "micro": { + "read_groups": { + "lib_01": { + "R1": "tests/data/fastq/micro_R1.fq.gz", + "R2": "tests/data/fastq/micro_R2.fq.gz" } - }, - "reference": "/path/to/ref", - "dbsnp": "/path/to/vcf1", - "known_sites": ["/path/to/vcf1", "/path/to/vcf2"], - "scatter_size": 1000000000, - "female_threshold": 0.6, - "bedfile": "/path/to/bed", - "refflat": "/path/to/refflat" + } + } + }, + "reference":"tests/data/reference/ref.fa", + "dbsnp": "tests/data/reference/database.vcf.gz", + "known_sites": ["tests/data/reference/database.vcf.gz"] } diff --git a/tests/test_sanity.yml b/tests/test_sanity.yml index 37e74f7..d67b322 100644 --- a/tests/test_sanity.yml +++ b/tests/test_sanity.yml @@ -80,3 +80,9 @@ - sanity command: > snakemake --lint --configfile tests/data/config/sample_config.json + +- name: sanity-example-config + tags: + - sanity + command: > + jsonschema -i config/example.json config/schema.json From bc318dc8473bd8e615ba427d433569793e1a4347 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 17 Mar 2021 14:20:24 +0100 Subject: [PATCH 2/7] Automatically remove intermediate bam and fastq files --- CHANGELOG.md | 2 ++ Snakefile | 14 ++++++++------ common.smk | 5 +++++ tests/test_integration_run.yml | 8 ++++++++ 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index acd3f87..ab9cdc1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. v2.0.1 --------------------------- ++ Intermediate .bam, .bai and fastq files are automatically removed when no +longer needed. + Switch to using chunked-scatter v2.0.0 diff --git a/Snakefile b/Snakefile index 3d1a5c8..16e7ff1 100644 --- a/Snakefile +++ b/Snakefile @@ -80,8 +80,8 @@ rule cutadapt: r1 = lambda wc: (config['samples'][wc.sample]['read_groups'][wc.read_group]['R1']), r2 = lambda wc: (config['samples'][wc.sample]['read_groups'][wc.read_group]['R2']) output: - r1 = "{sample}/pre_process/{sample}-{read_group}_R1.fastq.gz", - r2 = "{sample}/pre_process/{sample}-{read_group}_R2.fastq.gz" + r1 = temp("{sample}/pre_process/{sample}-{read_group}_R1.fastq.gz"), + r2 = temp("{sample}/pre_process/{sample}-{read_group}_R2.fastq.gz") log: "{sample}/pre_process/{sample}-{read_group}.txt" container: @@ -104,11 +104,11 @@ rule align: ref = config["reference"], tmp = rules.create_tmp.output output: - "{sample}/bams/{sample}-{read_group}.sorted.bam" + bam = temp("{sample}/bams/{sample}-{read_group}.sorted.bam"), + bai = temp("{sample}/bams/{sample}-{read_group}.sorted.bam.bai") params: compression_level = 1, rg = "@RG\\tID:{sample}-library-{read_group}\\tSM:{sample}\\tLB:library\\tPL:ILLUMINA" - log: bwa = "log/{sample}/align.{read_group}.bwa.log", samtools = "log/{sample}/align.{read_group}.samtools.log" @@ -122,13 +122,14 @@ rule align: "{input.r1} {input.r2} 2> {log.bwa} | " "samtools sort " "-l {params.compression_level} " - "- -o {output} 2> {log.samtools};" - "samtools index {output}" + "- -o {output.bam} 2> {log.samtools};" + "samtools index {output.bam}" rule markdup: """Mark duplicates in BAM file""" input: bam = sample_bamfiles, + bai = sample_baifiles, tmp = rules.create_tmp.output output: bam = "{sample}/bams/{sample}.bam", @@ -152,6 +153,7 @@ rule baserecal: """Base recalibrated BAM files""" input: bam = sample_bamfiles, + bai = sample_baifiles, ref = config["reference"], vcfs = config["known_sites"] output: diff --git a/common.smk b/common.smk index 1dabb22..7b1e9a3 100644 --- a/common.smk +++ b/common.smk @@ -99,6 +99,11 @@ def sample_bamfiles(wildcards): files.append(f'{sample_name}/bams/{sample_name}-{read_group}.sorted.bam') return files +def sample_baifiles(wildcards): + """ Determine the bai files for a sample (one for each readgroup) + """ + return [f"{bam}.bai" for bam in sample_bamfiles(wildcards)] + def gather_gvcf(wildcards): """ Gather the gvcf files based on the scatterregions checkpoint diff --git a/tests/test_integration_run.yml b/tests/test_integration_run.yml index b7db54e..2018005 100644 --- a/tests/test_integration_run.yml +++ b/tests/test_integration_run.yml @@ -46,6 +46,14 @@ - WIDTH_OF_99_PERCENT - picard_AlignmentSummaryMetrics - picard_DuplicationMetrics + - path: micro/pre_process/micro-lib_01_R1.fastq.gz + should_exist: false + - path: micro/pre_process/micro-lib_01_R2.fastq.gz + should_exist: false + - path: micro/bams/bams/micro-lib_01.sorted.bam + should_exist: false + - path: micro/bams/bams/micro-lib_01.sorted.bam.bai + should_exist: false - name: integration-small-scatter tags: From 3e7d785226e95acbc1087ad3254a69f6febd9c7c Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 17 Mar 2021 14:22:36 +0100 Subject: [PATCH 3/7] Update slurm cluster configuration --- cluster/slurm_cluster.yml | 30 +++--------------------------- 1 file changed, 3 insertions(+), 27 deletions(-) diff --git a/cluster/slurm_cluster.yml b/cluster/slurm_cluster.yml index e808a86..b61559d 100644 --- a/cluster/slurm_cluster.yml +++ b/cluster/slurm_cluster.yml @@ -7,36 +7,21 @@ __default__: align: threads: 8 - vmem: 4G + vmem: 8G time: 0-2 baserecal: threads: 8 - vmem: 6G + vmem: 48G time: 0-2 covstats: - vmem: 6G + vmem: 12G cutadapt: threads: 8 time: 0-2 -fastqc_raw: - threads: 4 - time: 0-1 - -fastqc_merged: - threads: 4 - time: 0-1 - -fastqc_postqc: - threads: 4 - time: 0-1 - -fqcount_postqc: - time: 0-1 - gvcf_scatter: vmem: 20G time: 0-1 @@ -58,12 +43,3 @@ markdup: multiqc: vmem: 30G time: 0-1 - -sickle: - time: 0-1 - -split_vcf: - vmem: 20G - -vcfstats: - time: 0-1 From 84dd18999b38b197762c3efe66099c12b6286b03 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 23 Mar 2021 08:34:11 +0100 Subject: [PATCH 4/7] Rewrite multi sample calling to use scatters Previously, multi sample calling was done on the merged per sample g.vcf files. However, in rare cases, VCF files that are merged by bcftools can trigger a crash in GATK. To resolve this, the multi sample calling now uses the scattered per sample g.vcf files, so that GATK does not have to act on VCF files that were modified by bcftools. As an added advantage, using the scatters directly for the multi sample calling allows for better parallelisation. --- CHANGELOG.md | 3 + Snakefile | 60 ++++++++++++------- common.smk | 18 ++++++ .../config/sample_config_multisample.json | 1 + tests/test_dry_run.yml | 2 +- tests/test_integration_run.yml | 8 ++- 6 files changed, 68 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab9cdc1..11d5dc7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,9 @@ that users understand how the changes affect the new version. v2.0.1 --------------------------- ++ `multisample_vcf` now acts on the scatters, instead of on the merged g.vcf +files. ++ The multisample output is located in `multisample/multisample.vcf.gz`. + Intermediate .bam, .bai and fastq files are automatically removed when no longer needed. + Switch to using chunked-scatter diff --git a/Snakefile b/Snakefile index 16e7ff1..5679d83 100644 --- a/Snakefile +++ b/Snakefile @@ -44,7 +44,7 @@ rule all: gvcf_tbi = expand("{s}/vcf/{s}.g.vcf.gz.tbi", s=config["samples"]), coverage_stats = coverage_stats, coverage_files = coverage_files, - multisample_vcf = "multisample.vcf.gz" if config["multisample_vcf"] else [] + multisample_vcf = "multisample/multisample.vcf.gz" if config["multisample_vcf"] else [] rule create_tmp: """ @@ -285,6 +285,44 @@ rule genotype_gather: "--output {output.vcf} --output-type z 2> {log} && " "bcftools index --tbi --output-file {output.vcf_tbi} {output.vcf}" +rule multisample_scatter: + """ Generate a true multisample VCF file with all samples """ + input: + gvcfs = expand("{sample}/vcf/{sample}.{{chunk}}.g.vcf.gz", sample=config["samples"]), + tbis = expand("{sample}/vcf/{sample}.{{chunk}}.g.vcf.gz.tbi", sample=config["samples"]), + ref = config["reference"] + params: + gvcf_files = lambda wc: expand("-V {sample}/vcf/{sample}.{chunk}.g.vcf.gz", sample=config["samples"], chunk=wc.chunk), + output: + multisample_vcf = temp("multisample/{chunk}.vcf.gz"), + multisample_tbi = temp("multisample/{chunk}.vcf.gz.tbi") + log: + "log/multisample.{chunk}.log" + container: + containers["gatk"] + threads: + 8 + shell: "java -jar -Xmx15G -XX:ParallelGCThreads=1 /usr/GenomeAnalysisTK.jar -T " + "GenotypeGVCFs -R {input.ref} " + "{params.gvcf_files} -o {output.multisample_vcf} 2> {log}" + +rule multisample_gather: + """ Gather all multisample VCFs scatters, and join them together """ + input: + vcfs = gather_multisample_vcf, + vcfs_tbi = gather_multisample_vcf_tbi + output: + vcf = "multisample/multisample.vcf.gz", + vcf_tbi = "multisample/multisample.vcf.gz.tbi" + log: + "log/multisample_gather.log" + container: + containers["bcftools"] + shell: + "bcftools concat {input.vcfs} --allow-overlaps " + "--output {output.vcf} --output-type z 2> {log} && " + "bcftools index --tbi --output-file {output.vcf_tbi} {output.vcf}" + rule fastqc: """Run fastqc on fastq files post pre-processing""" input: @@ -525,23 +563,3 @@ rule gvcf2coverage: containers["gvcf2coverage"] shell: "gvcf2coverage -t {wildcards.threshold} < {input} 2> {log} | cut -f 1,2,3 > {output}" - -rule multisample_vcf: - """ Generate a true multisample VCF file with all samples """ - input: - gvcfs = expand("{sample}/vcf/{sample}.g.vcf.gz", sample=config["samples"]), - tbis = expand("{sample}/vcf/{sample}.g.vcf.gz.tbi", sample=config["samples"]), - ref = config["reference"] - params: - gvcf_files = lambda wc: expand("-V {sample}/vcf/{sample}.g.vcf.gz", sample=config["samples"]), - output: - "multisample.vcf.gz" - log: - "log/multisample.log" - container: - containers["gatk"] - threads: - 8 - shell: "java -jar -Xmx15G -XX:ParallelGCThreads=1 /usr/GenomeAnalysisTK.jar -T " - "GenotypeGVCFs -R {input.ref} " - "{params.gvcf_files} -o '{output}'" diff --git a/common.smk b/common.smk index 7b1e9a3..5474851 100644 --- a/common.smk +++ b/common.smk @@ -141,6 +141,24 @@ def gather_vcf_tbi(wildcards): return expand("{{sample}}/vcf/{{sample}}.{i}.vcf.gz.tbi", i=glob_wildcards(os.path.join(checkpoint_output, 'scatter-{i}.bed')).i) +def gather_multisample_vcf(wildcards): + """ Gather the multisample vcf files based on the scatterregions checkpoint + This is depends on the 'scatter_size' parameter and the reference genome + used + """ + checkpoint_output = checkpoints.scatterregions.get(**wildcards).output[0] + return expand("multisample/{i}.vcf.gz", + i=glob_wildcards(os.path.join(checkpoint_output, 'scatter-{i}.bed')).i) + +def gather_multisample_vcf_tbi(wildcards): + """ Gather the multisample vcf index files based on the scatterregions checkpoint + This is depends on the 'scatter_size' parameter and the reference genome + used + """ + checkpoint_output = checkpoints.scatterregions.get(**wildcards).output[0] + return expand("multisample/{i}.vcf.gz.tbi", + i=glob_wildcards(os.path.join(checkpoint_output, 'scatter-{i}.bed')).i) + def sample_cutadapt_files(wildcards): """ Determine the cutadapt log files files for a sample (one for each readgroup). diff --git a/tests/data/config/sample_config_multisample.json b/tests/data/config/sample_config_multisample.json index ccbe7cb..2b82b0e 100644 --- a/tests/data/config/sample_config_multisample.json +++ b/tests/data/config/sample_config_multisample.json @@ -22,5 +22,6 @@ "known_sites": ["tests/data/reference/database.vcf.gz"], "targetsfile": "tests/data/reference/full_chrM.bed", "baitsfile": "tests/data/reference/target_baits.bed", + "scatter_size": 1000, "multisample_vcf": true } diff --git a/tests/test_dry_run.yml b/tests/test_dry_run.yml index 9b92222..959b363 100644 --- a/tests/test_dry_run.yml +++ b/tests/test_dry_run.yml @@ -64,4 +64,4 @@ stdout: contains: - Job counts - - rule multisample_vcf + - rule multisample_gather diff --git a/tests/test_integration_run.yml b/tests/test_integration_run.yml index 2018005..d3feb81 100644 --- a/tests/test_integration_run.yml +++ b/tests/test_integration_run.yml @@ -323,5 +323,9 @@ --jobs 1 -w 120 -r -p --configfile tests/data/config/sample_config_multisample.json files: - - path: 'multisample.vcf.gz' - - path: 'multisample.vcf.gz.tbi' + - path: 'multisample/multisample.vcf.gz' + - path: 'multisample/multisample.vcf.gz.tbi' + - path: 'multisample/0.vcf.gz' + should_exist: false + - path: 'multisample/0.vcf.gz.tbi' + should_exist: false From ea939fd4a37ed2781496af8011158cbb3748ce6c Mon Sep 17 00:00:00 2001 From: Redmar Date: Mon, 7 Jun 2021 14:07:23 +0200 Subject: [PATCH 5/7] Move gvcf2coverage image to quay.io Docker hub has started to remove unused images from free accounts, which means that it might remove images used by this pipeline without notice. Therefore the pipeline now exclusively uses images from quay.io or official repositories from docker hub, which do not have this limitation. --- common.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common.smk b/common.smk index 5474851..afbad5b 100644 --- a/common.smk +++ b/common.smk @@ -12,7 +12,7 @@ containers = { 'debian': 'docker://debian:buster-slim', 'fastqc': 'docker://quay.io/biocontainers/fastqc:0.11.7--4', 'gatk': 'docker://broadinstitute/gatk3:3.7-0', - 'gvcf2coverage': 'docker://lumc/gvcf2coverage:0.1-dirty-2', + 'gvcf2coverage': 'docker://redmar_van_den_berg/gvcf2coverage:0.1-dirty-2', 'multiqc': 'docker://quay.io/biocontainers/multiqc:1.8--py_2', 'picard': 'docker://quay.io/biocontainers/picard:2.22.8--0', 'python3': 'docker://python:3.6-slim', From 5063dbb2eda951575130a542f7eb7f1673123b6c Mon Sep 17 00:00:00 2001 From: Redmar Date: Mon, 7 Jun 2021 14:11:10 +0200 Subject: [PATCH 6/7] Remove slurm status submodule --- .gitmodules | 3 --- cluster/slurm-cluster-status | 1 - 2 files changed, 4 deletions(-) delete mode 100644 .gitmodules delete mode 160000 cluster/slurm-cluster-status diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index fd20905..0000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "cluster/slurm-cluster-status"] - path = cluster/slurm-cluster-status - url = https://github.com/LUMC/slurm-cluster-status.git diff --git a/cluster/slurm-cluster-status b/cluster/slurm-cluster-status deleted file mode 160000 index 4dd6917..0000000 --- a/cluster/slurm-cluster-status +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 4dd69175adc5360cadea79feac6386ccef41d923 From 5649dff1d3dc2c7cba7065b060140ec167ff6114 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 12 Jan 2022 13:09:25 +0100 Subject: [PATCH 7/7] Add github CI testing * Use tmp on shared filesystem for aling rule * Group up tests to reduce load on test runners * Only test small-scatter on github * Check size of created files on failure * Reduce the number of test scatters * Add new entry to changelog * Pin Snakemake version --- .github/workflows/ci.yml | 37 ++++++------------- CHANGELOG.md | 3 ++ Snakefile | 1 + environment.yml | 4 +- .../config/sample_config_multisample.json | 2 +- tests/data/config/sample_config_scatter.json | 2 +- tests/test_integration_run.yml | 6 +-- 7 files changed, 21 insertions(+), 34 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2350038..ff458f7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,35 +14,11 @@ jobs: strategy: matrix: test: - - sanity-snakemake - - sanity-snakemake-lint - - sanity-singularity - - sanity-no-reference - - sanity-reference-does-not-exist - - sanity-baits-only - - sanity-targets-only - - sanity-samples-overlapping-name - - sanity-multisample - - sanity-example-config + - sanity - - dry-run-vanilla - - dry-run-target-baits - - dry-run-bed-coverage - - dry-run-multisample + - dry-run - - integration-vanilla - integration-small-scatter - - integration-refflat - - integration-all-on-target - - integration-gene-bedfile - - integration-two-known-sites - - integration-two-readgroups - - integration-two-samples - - integration-target-baits - - integration-bed-coverage - - integration-restrict-BQSR - - integration-targets-only - - integration-multisample steps: - uses: actions/checkout@v2 @@ -102,3 +78,12 @@ jobs: echo $file; cat $file done ' + + - name: Check size of created files + if: ${{ failure() }} + run: >- + bash -c ' + for file in $(find /tmp/pytest_workflow_*/${{ matrix.test}}/ -type f); do + du -sh $file + done + ' diff --git a/CHANGELOG.md b/CHANGELOG.md index 11d5dc7..0dd9a48 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,9 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +v2.2.2-dev +--------------------------- + v2.0.1 --------------------------- + `multisample_vcf` now acts on the scatters, instead of on the merged g.vcf diff --git a/Snakefile b/Snakefile index 5679d83..8d6f21e 100644 --- a/Snakefile +++ b/Snakefile @@ -121,6 +121,7 @@ rule align: "bwa mem -t {threads} -R '{params.rg}' {input.ref} " "{input.r1} {input.r2} 2> {log.bwa} | " "samtools sort " + "-T {input.tmp} " "-l {params.compression_level} " "- -o {output.bam} 2> {log.samtools};" "samtools index {output.bam}" diff --git a/environment.yml b/environment.yml index b9804d8..25c4881 100644 --- a/environment.yml +++ b/environment.yml @@ -7,6 +7,4 @@ channels: - conda-forge dependencies: - pytest-workflow>=1.4.0 - - snakemake-minimal - - boto3 - - smart_open + - snakemake-minimal=5.31.1 diff --git a/tests/data/config/sample_config_multisample.json b/tests/data/config/sample_config_multisample.json index 2b82b0e..23a8df1 100644 --- a/tests/data/config/sample_config_multisample.json +++ b/tests/data/config/sample_config_multisample.json @@ -22,6 +22,6 @@ "known_sites": ["tests/data/reference/database.vcf.gz"], "targetsfile": "tests/data/reference/full_chrM.bed", "baitsfile": "tests/data/reference/target_baits.bed", - "scatter_size": 1000, + "scatter_size": 8000, "multisample_vcf": true } diff --git a/tests/data/config/sample_config_scatter.json b/tests/data/config/sample_config_scatter.json index 57a9cd9..f914de5 100644 --- a/tests/data/config/sample_config_scatter.json +++ b/tests/data/config/sample_config_scatter.json @@ -12,5 +12,5 @@ "reference":"tests/data/reference/ref.fa", "dbsnp": "tests/data/reference/database.vcf.gz", "known_sites": ["tests/data/reference/database.vcf.gz"], - "scatter_size": 1000 + "scatter_size": 8000 } diff --git a/tests/test_integration_run.yml b/tests/test_integration_run.yml index d3feb81..d2a8db9 100644 --- a/tests/test_integration_run.yml +++ b/tests/test_integration_run.yml @@ -71,7 +71,7 @@ - rror files: - path: scatter/scatter-0.bed - - path: scatter/scatter-15.bed + - path: scatter/scatter-1.bed - path: micro/vcf/micro.vcf.gz.tbi - path: micro/vcf/micro.vcf.gz contains_regex: @@ -79,14 +79,14 @@ - 'chrM\t263\t.\tA\tG\t323.*GT:AD:DP:GQ:PL\t1/1:0,108:108:99:3267,323,0' - 'chrM\t4745\t.\tA\tG\t56.*GT:AD:DP:GQ:PGT:PID:PL\t1/1:1,134:135:99:1|1:4745_A_G:5718,407,0' - 'chrM\t4769\t.\tA\tG\t5182.*GT:AD:DP:GQ:PGT:PID:PL\t1/1:1,120:121:99:1|1:4745_A_G:5211,363,0' - - 'chrM\t16023\t.\tG\tA\t.*GT:AD:DP:GQ:PL\t0/1:75,74:' + - 'chrM\t16023\t.\tG\tA\t.*GT:AD:DP:GQ:PL\t0/1:74,74:' - path: micro/vcf/micro.g.vcf.gz contains: - "chrM\t1\t.\tG\t\t.\t.\tEND=151\tGT:DP:GQ:MIN_DP:PL\t0/0:164:99:137:0,120,1800" - "chrM\t16560\t.\tC\t\t.\t.\tEND=16569\tGT:DP:GQ:MIN_DP:PL\t0/0:195:0:187:0,0,0" contains_regex: - 'chrM\t152\t.\tT\tC,\t3960.*GT:AD:DP:GQ:PL:SB\t1/1:0,130,0:130:99:3989,388,0,3989,388,3989:0,0,47,83' - - 'chrM\t16023\t.\tG\tA,\t.*GT:AD:DP:GQ:PL:SB\t0/1:75,74,0:' + - 'chrM\t16023\t.\tG\tA,\t.*GT:AD:DP:GQ:PL:SB\t0/1:74,74,0:' - path: micro/vcf/micro.g.vcf.gz.tbi - path: micro/vcf/micro.0.vcf.gz should_exist: false