diff --git a/tests/data/probe_mapping/annotate_vcf_with_probe_mapping.expect.rev.vcf b/tests/data/probe_mapping/annotate_vcf_with_probe_mapping.expect.rev.vcf deleted file mode 100644 index 3c9af86..0000000 --- a/tests/data/probe_mapping/annotate_vcf_with_probe_mapping.expect.rev.vcf +++ /dev/null @@ -1,164 +0,0 @@ -##contig= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample -ref 37 1 T A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 37 2 T C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 37 3 T G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 38 4 G A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 38 5 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 38 6 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 39 7 T A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 39 8 T C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 39 9 T G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 40 10 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 40 11 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:FAIL_CONFLICT:1:1:0:1:1:1.0:0:TP -ref 40 12 A T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 41 13 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 41 14 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 41 15 C T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 42 16 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 42 17 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 42 18 A T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 43 19 G A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 43 20 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 43 21 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 57 22 G A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 57 23 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 57 24 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 58 25 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 58 26 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 58 27 C T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 59 28 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 59 29 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 59 30 A T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 60 31 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 60 32 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:FAIL_CONFLICT:1:1:0:1:1:1.0:0:TP -ref 60 33 A T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 61 34 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 61 35 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 61 36 A T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 62 37 G A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 62 38 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 62 39 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 63 40 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 63 41 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 63 42 A T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 77 43 G A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 77 44 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 77 45 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 78 46 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 78 47 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 78 48 C T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 79 49 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 79 50 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 79 51 C T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 80 52 T A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 80 53 T C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:FAIL_CONFLICT:1:1:0:1:1:1.0:0:TP -ref 80 54 T G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 81 55 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 81 56 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 81 57 C T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 82 58 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 82 59 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 82 60 C T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 83 61 G A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 83 62 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 83 63 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 106 64 G A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 106 65 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 106 66 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 107 67 T A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 107 68 T C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 107 69 T G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 108 70 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 108 71 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 108 72 C T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 109 73 G A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 109 74 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:FAIL_CONFLICT:1:1:0:1:1:1.0:0:TP -ref 109 75 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 110 76 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 110 77 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 110 78 C T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:FAIL_CONFLICT:1:1:0:1:1:1.0:0:TP -ref 111 79 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 111 80 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:FAIL_CONFLICT:1:1:0:1:1:1.0:0:TP -ref 111 81 A T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 112 82 T A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 112 83 T C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 112 84 T G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 113 85 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 113 86 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 113 87 A T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 114 88 G A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 114 89 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 114 90 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 137 91 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 137 92 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 137 93 C T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 138 94 T A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 138 95 T C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 138 96 T G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 139 97 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 139 98 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 139 99 C T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 140 100 G A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 140 101 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 140 102 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 140 103 GT AC . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:2:1:2:2:0:0.0:0:FP -ref 140 104 GT G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:FAIL_CONFLICT:1:1:0:1:1:1.0:0:TP -ref 160 105 A AG . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:Partial_TP:FAIL_CONFLICT:1:1:1:2:1:0.5:0:Partial_TP -ref 160 106 A AT . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:FAIL_CONFLICT:1:1:0:2:2:1.0:0:TP -ref 160 107 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 160 108 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 161 109 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 161 110 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 161 111 A T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 161 112 AAC ACGTACGGGTGGTGTGTTTGAAAGATAG . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:Partial_TP:FAIL_CONFLICT:2:0:4:28:1:0.03571:0:Partial_TP -ref 161 113 AACAAGACGTCCTCT ACG . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:Partial_TP:FAIL_CONFLICT:2:0:3:16:1:0.0625:0:Partial_TP -ref 162 114 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 162 115 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 162 116 A T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 163 117 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 163 118 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 163 119 C T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 164 120 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 164 121 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 164 122 A T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 230 123 TG T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:PASS:1:0:0:1:1:1.0:0:FP_REF_PROBE_BETTER_MATCH -ref 250 124 . G . PASS . GT:EXPECT:VFR_FILTER 1/1:VFR_FILTER_NO_REF_SEQ:NO_REF_SEQ -ref 250 125 C G . PASS . GT:EXPECT:VFR_FILTER 1/1:VFR_FILTER_REF_STRING_MISMATCH:REF_STRING_MISMATCH -ref 250 126 G . . PASS . GT:EXPECT:VFR_FILTER 1/1:VFR_FILTER_NO_ALTS:NO_ALTS -ref 250 127 G C . FILTER_X . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:FAIL_CONFLICT:1:1:0:1:1:1.0:0:TP -ref 250 128 G C . MISMAPPED_UNPLACEABLE . GT:EXPECT:VFR_FILTER 1/1:TP:MISMAPPED_UNPLACEABLE -ref 250 129 G C . PASS . EXPECT:VFR_FILTER VFR_FILTER_NO_GT:NO_GT -ref 250 130 G C . PASS . GT:EXPECT:VFR_FILTER ./.:VFR_FILTER_CANNOT_USE_GT:CANNOT_USE_GT -ref 250 131 G C . PASS . GT:EXPECT:VFR_FILTER 0/1:VFR_FILTER_CANNOT_USE_GT:CANNOT_USE_GT -ref 298 132 G A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 298 133 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 298 134 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 299 135 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 299 136 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 299 137 C T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 300 138 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 300 139 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 300 140 A T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 300 141 ATGCGTATCG AGGTAACCCC . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:Partial_TP:FAIL_CONFLICT:7:8:5:10:5:0.5:0:Partial_TP -ref 300 142 ATGCGTATCG AGGTACGAGA . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:FAIL_CONFLICT:8:8:0:10:10:1.0:0:TP -ref 301 143 T A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 301 144 T C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 302 145 G A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 302 146 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 302 147 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 303 148 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 303 149 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 304 150 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 304 151 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 400 152 G AAAAA . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_RESULT:VFR_ED_SCORE 1/1:FP_PROBE_UNMAPPED:PASS:2:FP_PROBE_UNMAPPED:0 diff --git a/tests/data/probe_mapping/annotate_vcf_with_probe_mapping.expect.vcf b/tests/data/probe_mapping/annotate_vcf_with_probe_mapping.expect.vcf index a863576..7debd80 100644 --- a/tests/data/probe_mapping/annotate_vcf_with_probe_mapping.expect.vcf +++ b/tests/data/probe_mapping/annotate_vcf_with_probe_mapping.expect.vcf @@ -1,5 +1,4 @@ ##contig= -##FORMAT= ##FORMAT= ##FORMAT= ##FORMAT= @@ -10,155 +9,17 @@ ##FORMAT= ##FORMAT= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample -ref 37 1 T A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 37 2 T C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 37 3 T G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 38 4 G A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 38 5 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 38 6 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 39 7 T A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 39 8 T C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 39 9 T G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 40 10 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 40 11 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:FAIL_CONFLICT:1:1:0:1:1:1.0:0:TP -ref 40 12 A T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 41 13 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 41 14 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 41 15 C T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 42 16 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 42 17 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 42 18 A T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 43 19 G A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 43 20 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 43 21 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 57 22 G A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 57 23 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 57 24 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 58 25 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 58 26 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 58 27 C T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 59 28 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 59 29 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 59 30 A T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 60 31 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 60 32 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:FAIL_CONFLICT:1:1:0:1:1:1.0:0:TP -ref 60 33 A T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 61 34 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 61 35 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 61 36 A T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 62 37 G A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 62 38 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 62 39 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 63 40 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 63 41 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 63 42 A T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 77 43 G A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 77 44 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 77 45 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 78 46 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 78 47 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 78 48 C T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 79 49 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 79 50 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 79 51 C T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 80 52 T A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 80 53 T C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:FAIL_CONFLICT:1:1:0:1:1:1.0:0:TP -ref 80 54 T G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 81 55 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:1:FP -ref 81 56 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:1:FP -ref 81 57 C T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:1:FP -ref 82 58 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:1:FP -ref 82 59 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:1:FP -ref 82 60 C T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:1:FP -ref 83 61 G A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:1:FP -ref 83 62 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:1:FP -ref 83 63 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:1:FP -ref 106 64 G A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 106 65 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 106 66 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 107 67 T A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 107 68 T C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 107 69 T G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 108 70 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 108 71 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 108 72 C T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 109 73 G A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 109 74 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:FAIL_CONFLICT:1:1:0:1:1:1.0:0:TP -ref 109 75 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 110 76 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 110 77 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 110 78 C T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:FAIL_CONFLICT:1:1:0:1:1:1.0:0:TP -ref 111 79 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 111 80 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:FAIL_CONFLICT:1:1:0:1:1:1.0:0:TP -ref 111 81 A T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 112 82 T A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 112 83 T C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 112 84 T G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 113 85 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 113 86 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 113 87 A T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 114 88 G A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 114 89 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 114 90 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 137 91 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 137 92 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 137 93 C T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 138 94 T A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 138 95 T C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 138 96 T G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 139 97 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 139 98 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 139 99 C T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 140 100 G A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 140 101 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 140 102 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 140 103 GT AC . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:2:1:2:2:0:0.0:0:FP -ref 140 104 GT G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:FAIL_CONFLICT:1:1:0:1:1:1.0:0:TP -ref 160 105 A AG . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:Partial_TP:FAIL_CONFLICT:1:1:1:2:1:0.5:0:Partial_TP -ref 160 106 A AT . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:FAIL_CONFLICT:1:1:0:2:2:1.0:0:TP -ref 160 107 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 160 108 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 161 109 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 161 110 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 161 111 A T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 161 112 AAC ACGTACGGGTGGTGTGTTTGAAAGATAG . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:Partial_TP:FAIL_CONFLICT:2:0:4:28:1:0.03571:0:Partial_TP -ref 161 113 AACAAGACGTCCTCT ACG . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:Partial_TP:FAIL_CONFLICT:2:0:2:16:1:0.0625:0:Partial_TP -ref 162 114 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 162 115 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 162 116 A T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 163 117 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 163 118 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 163 119 C T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 164 120 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 164 121 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 164 122 A T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 230 123 TG T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:PASS:1:0:0:1:1:1.0:0:FP_REF_PROBE_BETTER_MATCH -ref 250 124 . G . PASS . GT:EXPECT:VFR_FILTER 1/1:VFR_FILTER_NO_REF_SEQ:NO_REF_SEQ -ref 250 125 C G . PASS . GT:EXPECT:VFR_FILTER 1/1:VFR_FILTER_REF_STRING_MISMATCH:REF_STRING_MISMATCH -ref 250 126 G . . PASS . GT:EXPECT:VFR_FILTER 1/1:VFR_FILTER_NO_ALTS:NO_ALTS -ref 250 127 G C . FILTER_X . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:FAIL_CONFLICT:1:1:0:1:1:1.0:0:TP -ref 250 128 G C . MISMAPPED_UNPLACEABLE . GT:EXPECT:VFR_FILTER 1/1:TP:MISMAPPED_UNPLACEABLE -ref 250 129 G C . PASS . EXPECT:VFR_FILTER VFR_FILTER_NO_GT:NO_GT -ref 250 130 G C . PASS . GT:EXPECT:VFR_FILTER ./.:VFR_FILTER_CANNOT_USE_GT:CANNOT_USE_GT -ref 250 131 G C . PASS . GT:EXPECT:VFR_FILTER 0/1:VFR_FILTER_CANNOT_USE_GT:CANNOT_USE_GT -ref 298 132 G A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 298 133 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 298 134 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 299 135 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 299 136 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 299 137 C T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 300 138 A C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 300 139 A G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 300 140 A T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 300 141 ATGCGTATCG AGGTAACCCC . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:Partial_TP:FAIL_CONFLICT:7:8:5:10:5:0.5:0:Partial_TP -ref 300 142 ATGCGTATCG AGGTACGAGA . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:FAIL_CONFLICT:8:8:0:10:10:1.0:0:TP -ref 301 143 T A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 301 144 T C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 302 145 G A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 302 146 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 302 147 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:0:1:1:0:0.0:0:FP -ref 303 148 C A . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 303 149 C G . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 304 150 G C . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 304 151 G T . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:FAIL_CONFLICT:1:1:1:1:0:0.0:0:FP -ref 400 152 G AAAAA . PASS . GT:EXPECT:VFR_FILTER:VFR_ED_RA:VFR_RESULT:VFR_ED_SCORE 1/1:FP_PROBE_UNMAPPED:PASS:2:FP_PROBE_UNMAPPED:0 +ref 40 1 A G . PASS . GT:EXPECT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:1:1:0:1:1:1.0:0:TP +ref 60 2 A G . PASS . GT:EXPECT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:1:1:0:1:1:1.0:0:TP +ref 80 3 T C . PASS . GT:EXPECT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:1:1:0:1:1:1.0:0:TP +ref 109 4 G C . PASS . GT:EXPECT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:1:1:0:1:1:1.0:0:TP +ref 110 5 C T . PASS . GT:EXPECT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:1:1:0:1:1:1.0:0:TP +ref 111 6 A G . PASS . GT:EXPECT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:1:1:0:1:1:1.0:0:TP +ref 140 7 GT AC . PASS . GT:EXPECT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:2:1:2:2:0:0.0:0:FP +ref 140 8 GT G . PASS . GT:EXPECT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:1:1:0:1:1:1.0:0:TP +ref 160 9 A AG . PASS . GT:EXPECT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:Partial_TP:1:0:1:2:1:0.5:0:Partial_TP +ref 160 10 A AT . PASS . GT:EXPECT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:1:0:0:2:2:1.0:0:TP +ref 230 11 TG T . PASS . GT:EXPECT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:FP:1:0:0:1:1:1.0:0:FP_REF_PROBE_BETTER_MATCH +ref 300 12 ATGCGTATCG AGGTAACCCC . PASS . GT:EXPECT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:Partial_TP:7:8:5:10:5:0.5:0:Partial_TP +ref 300 13 ATGCGTATCG AGGTACGAGA . PASS . GT:EXPECT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:TP:8:8:0:10:10:1.0:0:TP +ref 400 14 G AAAAA . PASS . GT:EXPECT:VFR_ED_RA:VFR_RESULT:VFR_ED_SCORE 1/1:FP_PROBE_UNMAPPED:2:FP_PROBE_UNMAPPED:0 diff --git a/tests/data/probe_mapping/annotate_vcf_with_probe_mapping.in.vcf b/tests/data/probe_mapping/annotate_vcf_with_probe_mapping.in.vcf index ba64057..9369b95 100644 --- a/tests/data/probe_mapping/annotate_vcf_with_probe_mapping.in.vcf +++ b/tests/data/probe_mapping/annotate_vcf_with_probe_mapping.in.vcf @@ -1,154 +1,16 @@ ##contig= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample -ref 37 1 T A . PASS . GT:EXPECT 1/1:FP -ref 37 2 T C . PASS . GT:EXPECT 1/1:FP -ref 37 3 T G . PASS . GT:EXPECT 1/1:FP -ref 38 4 G A . PASS . GT:EXPECT 1/1:FP -ref 38 5 G C . PASS . GT:EXPECT 1/1:FP -ref 38 6 G T . PASS . GT:EXPECT 1/1:FP -ref 39 7 T A . PASS . GT:EXPECT 1/1:FP -ref 39 8 T C . PASS . GT:EXPECT 1/1:FP -ref 39 9 T G . PASS . GT:EXPECT 1/1:FP -ref 40 10 A C . PASS . GT:EXPECT 1/1:FP -ref 40 11 A G . PASS . GT:EXPECT 1/1:TP -ref 40 12 A T . PASS . GT:EXPECT 1/1:FP -ref 41 13 C A . PASS . GT:EXPECT 1/1:FP -ref 41 14 C G . PASS . GT:EXPECT 1/1:FP -ref 41 15 C T . PASS . GT:EXPECT 1/1:FP -ref 42 16 A C . PASS . GT:EXPECT 1/1:FP -ref 42 17 A G . PASS . GT:EXPECT 1/1:FP -ref 42 18 A T . PASS . GT:EXPECT 1/1:FP -ref 43 19 G A . PASS . GT:EXPECT 1/1:FP -ref 43 20 G C . PASS . GT:EXPECT 1/1:FP -ref 43 21 G T . PASS . GT:EXPECT 1/1:FP -ref 57 22 G A . PASS . GT:EXPECT 1/1:FP -ref 57 23 G C . PASS . GT:EXPECT 1/1:FP -ref 57 24 G T . PASS . GT:EXPECT 1/1:FP -ref 58 25 C A . PASS . GT:EXPECT 1/1:FP -ref 58 26 C G . PASS . GT:EXPECT 1/1:FP -ref 58 27 C T . PASS . GT:EXPECT 1/1:FP -ref 59 28 A C . PASS . GT:EXPECT 1/1:FP -ref 59 29 A G . PASS . GT:EXPECT 1/1:FP -ref 59 30 A T . PASS . GT:EXPECT 1/1:FP -ref 60 31 A C . PASS . GT:EXPECT 1/1:FP -ref 60 32 A G . PASS . GT:EXPECT 1/1:TP -ref 60 33 A T . PASS . GT:EXPECT 1/1:FP -ref 61 34 A C . PASS . GT:EXPECT 1/1:FP -ref 61 35 A G . PASS . GT:EXPECT 1/1:FP -ref 61 36 A T . PASS . GT:EXPECT 1/1:FP -ref 62 37 G A . PASS . GT:EXPECT 1/1:FP -ref 62 38 G C . PASS . GT:EXPECT 1/1:FP -ref 62 39 G T . PASS . GT:EXPECT 1/1:FP -ref 63 40 A C . PASS . GT:EXPECT 1/1:FP -ref 63 41 A G . PASS . GT:EXPECT 1/1:FP -ref 63 42 A T . PASS . GT:EXPECT 1/1:FP -ref 77 43 G A . PASS . GT:EXPECT 1/1:FP -ref 77 44 G C . PASS . GT:EXPECT 1/1:FP -ref 77 45 G T . PASS . GT:EXPECT 1/1:FP -ref 78 46 C A . PASS . GT:EXPECT 1/1:FP -ref 78 47 C G . PASS . GT:EXPECT 1/1:FP -ref 78 48 C T . PASS . GT:EXPECT 1/1:FP -ref 79 49 C A . PASS . GT:EXPECT 1/1:FP -ref 79 50 C G . PASS . GT:EXPECT 1/1:FP -ref 79 51 C T . PASS . GT:EXPECT 1/1:FP -ref 80 52 T A . PASS . GT:EXPECT 1/1:FP -ref 80 53 T C . PASS . GT:EXPECT 1/1:TP -ref 80 54 T G . PASS . GT:EXPECT 1/1:FP -ref 81 55 C A . PASS . GT:EXPECT 1/1:FP -ref 81 56 C G . PASS . GT:EXPECT 1/1:FP -ref 81 57 C T . PASS . GT:EXPECT 1/1:FP -ref 82 58 C A . PASS . GT:EXPECT 1/1:FP -ref 82 59 C G . PASS . GT:EXPECT 1/1:FP -ref 82 60 C T . PASS . GT:EXPECT 1/1:FP -ref 83 61 G A . PASS . GT:EXPECT 1/1:FP -ref 83 62 G C . PASS . GT:EXPECT 1/1:FP -ref 83 63 G T . PASS . GT:EXPECT 1/1:FP -ref 106 64 G A . PASS . GT:EXPECT 1/1:FP -ref 106 65 G C . PASS . GT:EXPECT 1/1:FP -ref 106 66 G T . PASS . GT:EXPECT 1/1:FP -ref 107 67 T A . PASS . GT:EXPECT 1/1:FP -ref 107 68 T C . PASS . GT:EXPECT 1/1:FP -ref 107 69 T G . PASS . GT:EXPECT 1/1:FP -ref 108 70 C A . PASS . GT:EXPECT 1/1:FP -ref 108 71 C G . PASS . GT:EXPECT 1/1:FP -ref 108 72 C T . PASS . GT:EXPECT 1/1:FP -ref 109 73 G A . PASS . GT:EXPECT 1/1:FP -ref 109 74 G C . PASS . GT:EXPECT 1/1:TP -ref 109 75 G T . PASS . GT:EXPECT 1/1:FP -ref 110 76 C A . PASS . GT:EXPECT 1/1:FP -ref 110 77 C G . PASS . GT:EXPECT 1/1:FP -ref 110 78 C T . PASS . GT:EXPECT 1/1:TP -ref 111 79 A C . PASS . GT:EXPECT 1/1:FP -ref 111 80 A G . PASS . GT:EXPECT 1/1:TP -ref 111 81 A T . PASS . GT:EXPECT 1/1:FP -ref 112 82 T A . PASS . GT:EXPECT 1/1:FP -ref 112 83 T C . PASS . GT:EXPECT 1/1:FP -ref 112 84 T G . PASS . GT:EXPECT 1/1:FP -ref 113 85 A C . PASS . GT:EXPECT 1/1:FP -ref 113 86 A G . PASS . GT:EXPECT 1/1:FP -ref 113 87 A T . PASS . GT:EXPECT 1/1:FP -ref 114 88 G A . PASS . GT:EXPECT 1/1:FP -ref 114 89 G C . PASS . GT:EXPECT 1/1:FP -ref 114 90 G T . PASS . GT:EXPECT 1/1:FP -ref 137 91 C A . PASS . GT:EXPECT 1/1:FP -ref 137 92 C G . PASS . GT:EXPECT 1/1:FP -ref 137 93 C T . PASS . GT:EXPECT 1/1:FP -ref 138 94 T A . PASS . GT:EXPECT 1/1:FP -ref 138 95 T C . PASS . GT:EXPECT 1/1:FP -ref 138 96 T G . PASS . GT:EXPECT 1/1:FP -ref 139 97 C A . PASS . GT:EXPECT 1/1:FP -ref 139 98 C G . PASS . GT:EXPECT 1/1:FP -ref 139 99 C T . PASS . GT:EXPECT 1/1:FP -ref 140 100 G A . PASS . GT:EXPECT 1/1:FP -ref 140 101 G C . PASS . GT:EXPECT 1/1:FP -ref 140 102 G T . PASS . GT:EXPECT 1/1:FP -ref 140 103 GT AC . PASS . GT:EXPECT 1/1:FP -ref 140 104 GT G . PASS . GT:EXPECT 1/1:TP -ref 160 105 A AG . PASS . GT:EXPECT 1/1:Partial_TP -ref 160 106 A AT . PASS . GT:EXPECT 1/1:TP -ref 160 107 A C . PASS . GT:EXPECT 1/1:FP -ref 160 108 A G . PASS . GT:EXPECT 1/1:FP -ref 161 109 A C . PASS . GT:EXPECT 1/1:FP -ref 161 110 A G . PASS . GT:EXPECT 1/1:FP -ref 161 111 A T . PASS . GT:EXPECT 1/1:FP -ref 161 112 AAC ACGTACGGGTGGTGTGTTTGAAAGATAG . PASS . GT:EXPECT 1/1:Partial_TP -ref 161 113 AACAAGACGTCCTCT ACG . PASS . GT:EXPECT 1/1:Partial_TP -ref 162 114 A C . PASS . GT:EXPECT 1/1:FP -ref 162 115 A G . PASS . GT:EXPECT 1/1:FP -ref 162 116 A T . PASS . GT:EXPECT 1/1:FP -ref 163 117 C A . PASS . GT:EXPECT 1/1:FP -ref 163 118 C G . PASS . GT:EXPECT 1/1:FP -ref 163 119 C T . PASS . GT:EXPECT 1/1:FP -ref 164 120 A C . PASS . GT:EXPECT 1/1:FP -ref 164 121 A G . PASS . GT:EXPECT 1/1:FP -ref 164 122 A T . PASS . GT:EXPECT 1/1:FP -ref 230 123 TG T . PASS . GT:EXPECT 1/1:FP -ref 250 124 . G . PASS . GT:EXPECT 1/1:VFR_FILTER_NO_REF_SEQ -ref 250 125 C G . PASS . GT:EXPECT 1/1:VFR_FILTER_REF_STRING_MISMATCH -ref 250 126 G . . PASS . GT:EXPECT 1/1:VFR_FILTER_NO_ALTS -ref 250 127 G C . FILTER_X . GT:EXPECT 1/1:TP -ref 250 128 G C . MISMAPPED_UNPLACEABLE . GT:EXPECT 1/1:TP -ref 250 129 G C . PASS . EXPECT VFR_FILTER_NO_GT -ref 250 130 G C . PASS . GT:EXPECT ./.:VFR_FILTER_CANNOT_USE_GT -ref 250 131 G C . PASS . GT:EXPECT 0/1:VFR_FILTER_CANNOT_USE_GT -ref 298 132 G A . PASS . GT:EXPECT 1/1:FP -ref 298 133 G C . PASS . GT:EXPECT 1/1:FP -ref 298 134 G T . PASS . GT:EXPECT 1/1:FP -ref 299 135 C A . PASS . GT:EXPECT 1/1:FP -ref 299 136 C G . PASS . GT:EXPECT 1/1:FP -ref 299 137 C T . PASS . GT:EXPECT 1/1:FP -ref 300 138 A C . PASS . GT:EXPECT 1/1:FP -ref 300 139 A G . PASS . GT:EXPECT 1/1:FP -ref 300 140 A T . PASS . GT:EXPECT 1/1:FP -ref 300 141 ATGCGTATCG AGGTAACCCC . PASS . GT:EXPECT 1/1:Partial_TP -ref 300 142 ATGCGTATCG AGGTACGAGA . PASS . GT:EXPECT 1/1:TP -ref 301 143 T A . PASS . GT:EXPECT 1/1:FP -ref 301 144 T C . PASS . GT:EXPECT 1/1:FP -ref 302 145 G A . PASS . GT:EXPECT 1/1:FP -ref 302 146 G C . PASS . GT:EXPECT 1/1:FP -ref 302 147 G T . PASS . GT:EXPECT 1/1:FP -ref 303 148 C A . PASS . GT:EXPECT 1/1:FP -ref 303 149 C G . PASS . GT:EXPECT 1/1:FP -ref 304 150 G C . PASS . GT:EXPECT 1/1:FP -ref 304 151 G T . PASS . GT:EXPECT 1/1:FP -ref 400 152 G AAAAA . PASS . GT:EXPECT 1/1:FP_PROBE_UNMAPPED +ref 40 1 A G . PASS . GT:EXPECT 1/1:TP +ref 60 2 A G . PASS . GT:EXPECT 1/1:TP +ref 80 3 T C . PASS . GT:EXPECT 1/1:TP +ref 109 4 G C . PASS . GT:EXPECT 1/1:TP +ref 110 5 C T . PASS . GT:EXPECT 1/1:TP +ref 111 6 A G . PASS . GT:EXPECT 1/1:TP +ref 140 7 GT AC . PASS . GT:EXPECT 1/1:FP +ref 140 8 GT G . PASS . GT:EXPECT 1/1:TP +ref 160 9 A AG . PASS . GT:EXPECT 1/1:Partial_TP +ref 160 10 A AT . PASS . GT:EXPECT 1/1:TP +ref 230 11 TG T . PASS . GT:EXPECT 1/1:FP +ref 300 12 ATGCGTATCG AGGTAACCCC . PASS . GT:EXPECT 1/1:Partial_TP +ref 300 13 ATGCGTATCG AGGTACGAGA . PASS . GT:EXPECT 1/1:TP +ref 400 14 G AAAAA . PASS . GT:EXPECT 1/1:FP_PROBE_UNMAPPED diff --git a/tests/data/probe_mapping/annotate_vcf_with_probe_mapping.truth.fa b/tests/data/probe_mapping/annotate_vcf_with_probe_mapping.truth.fa index 9aa7e68..dbd2bc0 100644 --- a/tests/data/probe_mapping/annotate_vcf_with_probe_mapping.truth.fa +++ b/tests/data/probe_mapping/annotate_vcf_with_probe_mapping.truth.fa @@ -3,7 +3,7 @@ CTGACTGGCCGAATAGGTCAGATATAGGCAACGACATGTGCAGTGCGGCGACCCTTGCAG AGACAGTGACGCTTTCGCCCCCGTTGCCTAAACCTATTTGAAGGAGTCCTGTAGCAGCCG CAGTAAGGCACAATACCTCGGTCCGTGTTACCAGACCAATAACAAGACGTCCTCTTCAAT GTTTAAATGACCCTCTCGTCATAAAACCTTTCTACTATGTGTTCCGCAATGATCAACAAC -TACAATGGCCCGTCGTGAATAACGCGACGGCTGAGACGAACGGCGCGTGAATGAAGCGCA +TACAATGGCGCGTCGTGAATAACGCGACGGCTGAGACGAACGGCGCGTGAATGAAGCGCA GGTACGAGATTAAACAGCTCAGGAGCCAGTTTTCCAATCCTACATCTGTTTCTTGCGTCG TAGCGGGACCCTCCATTGTTACTTATTAGGTTCTCGTTATGTCTCATAATCTCAGTGCTG GTGTGATAAG diff --git a/tests/data/probe_mapping/annotate_vcf_with_probe_mapping.truth.revcomp.fa b/tests/data/probe_mapping/annotate_vcf_with_probe_mapping.truth.revcomp.fa index 2736c70..e2c7210 100644 --- a/tests/data/probe_mapping/annotate_vcf_with_probe_mapping.truth.revcomp.fa +++ b/tests/data/probe_mapping/annotate_vcf_with_probe_mapping.truth.revcomp.fa @@ -2,7 +2,7 @@ CTTATCACACCAGCACTGAGATTATGAGACATAACGAGAACCTAATAAGTAACAATGGAG GGTCCCGCTACGACGCAAGAAACAGATGTAGGATTGGAAAACTGGCTCCTGAGCTGTTTA ATCTCGTACCTGCGCTTCATTCACGCGCCGTTCGTCTCAGCCGTCGCGTTATTCACGACG -GGCCATTGTAGTTGTTGATCATTGCGGAACACATAGTAGAAAGGTTTTATGACGAGAGGG +CGCCATTGTAGTTGTTGATCATTGCGGAACACATAGTAGAAAGGTTTTATGACGAGAGGG TCATTTAAACATTGAAGAGGACGTCTTGTTATTGGTCTGGTAACACGGACCGAGGTATTG TGCCTTACTGCGGCTGCTACAGGACTCCTTCAAATAGGTTTAGGCAACGGGGGCGAAAGC GTCACTGTCTCTGCAAGGGTCGCCGCACTGCACATGTCGTTGCCTATATCTGACCTATTC diff --git a/tests/data/probe_mapping/make_test_data.py b/tests/data/probe_mapping/annotate_vcf_with_probe_mapping_make_data.py similarity index 82% rename from tests/data/probe_mapping/make_test_data.py rename to tests/data/probe_mapping/annotate_vcf_with_probe_mapping_make_data.py index 7dd393f..344448d 100755 --- a/tests/data/probe_mapping/make_test_data.py +++ b/tests/data/probe_mapping/annotate_vcf_with_probe_mapping_make_data.py @@ -97,9 +97,11 @@ def add_fp_snps(positions, vcf_lines, ref_seq, truth_seq): # These two make sure we hit lines in the code where the cigar string has I or D # while the probe position that is tracked is inside the allele of interest. -# The other variants in this test file don't fo that. -vcf_lines.append(["ref", 161, ".", "".join(ref_seq[160:163]), "ACGTACGGGTGGTGTGTTTGAAAGATAG", ".", "PASS", ".", "GT:EXPECT", "1/1:Partial_TP"]) -vcf_lines.append(["ref", 161, ".", "".join(ref_seq[160:175]), "ACG", ".", "PASS", ".", "GT:EXPECT", "1/1:Partial_TP"]) +# The other variants in this test file don't do that. +# EDIT: have to remove these because they mess up the previous variants at position 160. +# Added this case into the cluster_snp_indel test instead +#vcf_lines.append(["ref", 161, ".", "".join(ref_seq[160:163]), "ACGTACGGGTGGTGTGTTTGAAAGATAG", ".", "PASS", ".", "GT:EXPECT", "1/1:Partial_TP"]) +#vcf_lines.append(["ref", 161, ".", "".join(ref_seq[160:175]), "ACG", ".", "PASS", ".", "GT:EXPECT", "1/1:Partial_TP"]) fp_positions.extend(range(159, 164)) assert len(ref_seq) == len(truth_seq) == 299 @@ -131,24 +133,17 @@ def add_fp_snps(positions, vcf_lines, ref_seq, truth_seq): ref_seq[230] = truth_seq[230] = "G" vcf_lines.append(["ref", 230, ".", "TG", "T", ".", "PASS", ".", "GT:EXPECT", "1/1:FP"]) - # SNP so near the end that probe won't map vcf_lines.append(["ref", 400, ".", ref_seq[399], "AAAAA", ".", "PASS", ".", "GT:EXPECT", "1/1:FP_PROBE_UNMAPPED"]) -# Add in records that can't be evaluated for various reasons -ref_seq[249] = "G" -truth_seq[249] = "C" -vcf_lines.append(["ref", 250, ".", "G", "C", ".", "FILTER_X", ".", "GT:EXPECT", "1/1:TP"]) -vcf_lines.append(["ref", 250, ".", "G", "C", ".", "MISMAPPED_UNPLACEABLE", ".", "GT:EXPECT", "1/1:TP"]) -vcf_lines.append(["ref", 250, ".", "G", "C", ".", "PASS", ".", "GT:EXPECT", "0/1:VFR_FILTER_CANNOT_USE_GT"]) -vcf_lines.append(["ref", 250, ".", "G", "C", ".", "PASS", ".", "GT:EXPECT", "./.:VFR_FILTER_CANNOT_USE_GT"]) -vcf_lines.append(["ref", 250, ".", "C", "G", ".", "PASS", ".", "GT:EXPECT", "1/1:VFR_FILTER_REF_STRING_MISMATCH"]) -vcf_lines.append(["ref", 250, ".", "G", "C", ".", "PASS", ".", "EXPECT", "VFR_FILTER_NO_GT"]) -vcf_lines.append(["ref", 250, ".", "G", ".", ".", "PASS", ".", "GT:EXPECT", "1/1:VFR_FILTER_NO_ALTS"]) -vcf_lines.append(["ref", 250, ".", ".", "G", ".", "PASS", ".", "GT:EXPECT", "1/1:VFR_FILTER_NO_REF_SEQ"]) - - -add_fp_snps(fp_positions, vcf_lines, ref_seq, truth_seq) +# We now have an underlying assumption that the input VCF does not contain a +# large number of errors, allowing us to apply called variants in the flanks +# of probes. This is to correctly call clusters of variants. This means that +# we can no longer add in all these FP SNPs at every position in this test - +# it results in flanks having junk and messing up the TP/FP call that +# we're testing. So don't do it for now (but leave here commented out in case +# we change the method again the future). +#add_fp_snps(fp_positions, vcf_lines, ref_seq, truth_seq) with open("annotate_vcf_with_probe_mapping.in.vcf", "w") as f: diff --git a/tests/data/probe_mapping/clustered_snp_indel.expect.vcf b/tests/data/probe_mapping/clustered_snp_indel.expect.vcf new file mode 100644 index 0000000..a045bf4 --- /dev/null +++ b/tests/data/probe_mapping/clustered_snp_indel.expect.vcf @@ -0,0 +1,32 @@ +##fileformat=VCFv4.2 +##FILTER= +##FORMAT= +##contig= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample +NC_000962 1001 . A T . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +NC_000962 1004 . T C . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +NC_000962 1005 . GGAGA G . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +NC_000962 1013 . G C . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +NC_000962 1015 . A AC . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:0:0:2:2:1.0:0:TP +NC_000962 1019 . C G . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +NC_000962 1022 . T TAG . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:0:0:3:3:1.0:0:TP +NC_000962 1024 . T TGC . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:0:0:3:3:1.0:0:TP +NC_000962 1026 . G A . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +NC_000962 1028 . GC G . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +NC_000962 1033 . A T . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +NC_000962 1035 . A G . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +NC_000962 1038 . A C . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +NC_000962 1041 . G A . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +NC_000962 1044 . G A . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +NC_000962 1045 . G C . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +NC_000962 1046 . A T . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +NC_000962 1048 . C CG . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:0:0:2:2:1.0:0:TP diff --git a/tests/data/probe_mapping/clustered_snp_indel.in.vcf b/tests/data/probe_mapping/clustered_snp_indel.in.vcf new file mode 100644 index 0000000..138a4ba --- /dev/null +++ b/tests/data/probe_mapping/clustered_snp_indel.in.vcf @@ -0,0 +1,23 @@ +##fileformat=VCFv4.2 +##FILTER= +##FORMAT= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample +NC_000962 1001 . A T . PASS . GT 1/1 +NC_000962 1004 . T C . PASS . GT 1/1 +NC_000962 1005 . GGAGA G . PASS . GT 1/1 +NC_000962 1013 . G C . PASS . GT 1/1 +NC_000962 1015 . A AC . PASS . GT 1/1 +NC_000962 1019 . C G . PASS . GT 1/1 +NC_000962 1022 . T TAG . PASS . GT 1/1 +NC_000962 1024 . T TGC . PASS . GT 1/1 +NC_000962 1026 . G A . PASS . GT 1/1 +NC_000962 1028 . GC G . PASS . GT 1/1 +NC_000962 1033 . A T . PASS . GT 1/1 +NC_000962 1035 . A G . PASS . GT 1/1 +NC_000962 1038 . A C . PASS . GT 1/1 +NC_000962 1041 . G A . PASS . GT 1/1 +NC_000962 1044 . G A . PASS . GT 1/1 +NC_000962 1045 . G C . PASS . GT 1/1 +NC_000962 1046 . A T . PASS . GT 1/1 +NC_000962 1048 . C CG . PASS . GT 1/1 diff --git a/tests/data/probe_mapping/clustered_snp_indel.ref.fa b/tests/data/probe_mapping/clustered_snp_indel.ref.fa new file mode 100644 index 0000000..5e27d67 --- /dev/null +++ b/tests/data/probe_mapping/clustered_snp_indel.ref.fa @@ -0,0 +1,35 @@ +>NC_000962 +TTGACCGATGACCCCGGTTCAGGCTTCACCACAGTGTGGAACGCGGTCGTCTCCGAACTT +AACGGCGACCCTAAGGTTGACGACGGACCCAGCAGTGATGCTAATCTCAGCGCTCCGCTG +ACCCCTCAGCAAAGGGCTTGGCTCAATCTCGTCCAGCCATTGACCATCGTCGAGGGGTTT +GCTCTGTTATCCGTGCCGAGCAGCTTTGTCCAAAACGAAATCGAGCGCCATCTGCGGGCC +CCGATTACCGACGCTCTCAGCCGCCGACTCGGACATCAGATCCAACTCGGGGTCCGCATC +GCTCCGCCGGCGACCGACGAAGCCGACGACACTACCGTGCCGCCTTCCGAAAATCCTGCT +ACCACATCGCCAGACACCACAACCGACAACGACGAGATTGATGACAGCGCTGCGGCACGG +GGCGATAACCAGCACAGTTGGCCAAGTTACTTCACCGAGCGCCCGCACAATACCGATTCC +GCTACCGCTGGCGTAACCAGCCTTAACCGTCGCTACACCTTTGATACGTTCGTTATCGGC +GCCTCCAACCGGTTCGCGCACGCCGCCGCCTTGGCGATCGCAGAAGCACCCGCCCGCGCT +TACAACCCCCTGTTCATCTGGGGCGAGTCCGGTCTCGGCAAGACACACCTGCTACACGCG +GCAGGCAACTATGCCCAACGGTTGTTCCCGGGAATGCGGGTCAAATATGTCTCCACCGAG +GAATTCACCAACGACTTCATTAACTCGCTCCGCGATGACCGCAAGGTCGCATTCAAACGC +AGCTACCGCGACGTAGACGTGCTGTTGGTCGACGACATCCAATTCATTGAAGGCAAAGAG +GGTATTCAAGAGGAGTTCTTCCACACCTTCAACACCTTGCACAATGCCAACAAGCAAATC +GTCATCTCATCTGACCGCCCACCCAAGCAGCTCGCCACCCTCGAGGACCGGCTGAGAACC +CGCTTTGAGTGGGGGCTGATCACTGACGTACAACCACCCGAGCTGGAGACCCGCATCGCC +ATCTTGCGCAAGAAAGCACAGATGGAACGGCTCGCGGTCCCCGACGATGTCCTCGAACTC +ATCGCCAGCAGTATCGAACGCAATATCCGTGAACTCGAGGGCGCGCTGATCCGGGTCACC +GCGTTCGCCTCATTGAACAAAACACCAATCGACAAAGCGCTGGCCGAGATTGTGCTTCGC +GATCTGATCGCCGACGCCAACACCATGCAAATCAGCGCGGCGACGATCATGGCTGCCACC +GCCGAATACTTCGACACTACCGTCGAAGAGCTTCGCGGGCCCGGCAAGACCCGAGCACTG +GCCCAGTCACGACAGATTGCGATGTACCTGTGTCGTGAGCTCACCGATCTTTCGTTGCCC +AAAATCGGCCAAGCGTTCGGCCGTGATCACACAACCGTCATGTACGCCCAACGCAAGATC +CTGTCCGAGATGGCCGAGCGCCGTGAGGTCTTTGATCACGTCAAAGAACTCACCACTCGC +ATCCGTCAGCGCTCCAAGCGCTAGCACGGCGTGTTCTTCCGACAACGTTCTTAAAAAAAC +TTCTCTCTCCCAGGTCACACCAGTCACAGAGATTGGCTGTGAGTGTCGCTGTGCACAAAC +CGCGCACAGACTCATACAGTCCCGGCGGTTCCGTTCACAACCCACGCCTCATCCCCACCG +ACCCAACACACACCCCACAGTCATCGCCACCGTCATCCACAACTCCGACCGACGTCGACC +TGCACCAAGACCAGACTGTCCCCAAACTGCACACCCTCTAATACTGTTACCGAGATTTCT +TCGTCGTTTGTTCTTGGAAAGACAGCGCTGGGGATCGTTCGCTGGATACCACCCGCATAA +CTGGCTCGTCGCGGTGGGTCAGAGGTCAATGATGAACTTTCAAGTTGACGTGAGAAGCTC +TACGGTTGTTGTTCGACTGCTGTTGCGGCCGTCGTGGCGGGTCACGCGTCATGGGCATTC +GTCGTTGGCAGTCCCCACGC diff --git a/tests/data/probe_mapping/clustered_snp_indel.truth.fa b/tests/data/probe_mapping/clustered_snp_indel.truth.fa new file mode 100644 index 0000000..d716821 --- /dev/null +++ b/tests/data/probe_mapping/clustered_snp_indel.truth.fa @@ -0,0 +1,35 @@ +>truth +TTGACCGATGACCCCGGTTCAGGCTTCACCACAGTGTGGAACGCGGTCGTCTCCGAACTT +AACGGCGACCCTAAGGTTGACGACGGACCCAGCAGTGATGCTAATCTCAGCGCTCCGCTG +ACCCCTCAGCAAAGGGCTTGGCTCAATCTCGTCCAGCCATTGACCATCGTCGAGGGGTTT +GCTCTGTTATCCGTGCCGAGCAGCTTTGTCCAAAACGAAATCGAGCGCCATCTGCGGGCC +CCGATTACCGACGCTCTCAGCCGCCGACTCGGACATCAGATCCAACTCGGGGTCCGCATC +GCTCCGCCGGCGACCGACGAAGCCGACGACACTACCGTGCCGCCTTCCGAAAATCCTGCT +ACCACATCGCCAGACACCACAACCGACAACGACGAGATTGATGACAGCGCTGCGGCACGG +GGCGATAACCAGCACAGTTGGCCAAGTTACTTCACCGAGCGCCCGCACAATACCGATTCC +GCTACCGCTGGCGTAACCAGCCTTAACCGTCGCTACACCTTTGATACGTTCGTTATCGGC +GCCTCCAACCGGTTCGCGCACGCCGCCGCCTTGGCGATCGCAGAAGCACCCGCCCGCGCT +TACAACCCCCTGTTCATCTGGGGCGAGTCCGGTCTCGGCAAGACACACCTGCTACACGCG +GCAGGCAACTATGCCCAACGGTTGTTCCCGGGAATGCGGGTCAAATATGTCTCCACCGAG +GAATTCACCAACGACTTCATTAACTCGCTCCGCGATGACCGCAAGGTCGCATTCAAACGC +AGCTACCGCGACGTAGACGTGCTGTTGGTCGACGACATCCAATTCATTGAAGGCAAAGAG +GGTATTCAAGAGGAGTTCTTCCACACCTTCAACACCTTGCACAATGCCAACAAGCAAATC +GTCATCTCATCTGACCGCCCACCCAAGCAGCTCGCCACCCTCGAGGACCGGCTGAGAACC +CGCTTTGAGTGGGGGCTGATCACTGACGTATGAACCACCCGTGCCGCCCCCACTCGGCAT +AGCTGCTACGAAGTAGGCCCAAATACTACGGGCTCGGGTCCCCGACGATGTCCTCGAACT +CATCGCCAGCAGTATCGAACGCAATATCCGTGAACTCGAGGGCGCGCTGATCCGGGTCAC +CGCGTTCGCCTCATTGAACAAAACACCAATCGACAAAGCGCTGGCCGAGATTGTGCTTCG +CGATCTGATCGCCGACGCCAACACCATGCAAATCAGCGCGGCGACGATCATGGCTGCCAC +CGCCGAATACTTCGACACTACCGTCGAAGAGCTTCGCGGGCCCGGCAAGACCCGAGCACT +GGCCCAGTCACGACAGATTGCGATGTACCTGTGTCGTGAGCTCACCGATCTTTCGTTGCC +CAAAATCGGCCAAGCGTTCGGCCGTGATCACACAACCGTCATGTACGCCCAACGCAAGAT +CCTGTCCGAGATGGCCGAGCGCCGTGAGGTCTTTGATCACGTCAAAGAACTCACCACTCG +CATCCGTCAGCGCTCCAAGCGCTAGCACGGCGTGTTCTTCCGACAACGTTCTTAAAAAAA +CTTCTCTCTCCCAGGTCACACCAGTCACAGAGATTGGCTGTGAGTGTCGCTGTGCACAAA +CCGCGCACAGACTCATACAGTCCCGGCGGTTCCGTTCACAACCCACGCCTCATCCCCACC +GACCCAACACACACCCCACAGTCATCGCCACCGTCATCCACAACTCCGACCGACGTCGAC +CTGCACCAAGACCAGACTGTCCCCAAACTGCACACCCTCTAATACTGTTACCGAGATTTC +TTCGTCGTTTGTTCTTGGAAAGACAGCGCTGGGGATCGTTCGCTGGATACCACCCGCATA +ACTGGCTCGTCGCGGTGGGTCAGAGGTCAATGATGAACTTTCAAGTTGACGTGAGAAGCT +CTACGGTTGTTGTTCGACTGCTGTTGCGGCCGTCGTGGCGGGTCACGCGTCATGGGCATT +CGTCGTTGGCAGTCCCCACGC diff --git a/tests/data/probe_mapping/clustered_snp_indel.truth.revcomp.fa b/tests/data/probe_mapping/clustered_snp_indel.truth.revcomp.fa new file mode 100644 index 0000000..92c19f8 --- /dev/null +++ b/tests/data/probe_mapping/clustered_snp_indel.truth.revcomp.fa @@ -0,0 +1,35 @@ +>truth +GCGTGGGGACTGCCAACGACGAATGCCCATGACGCGTGACCCGCCACGACGGCCGCAACA +GCAGTCGAACAACAACCGTAGAGCTTCTCACGTCAACTTGAAAGTTCATCATTGACCTCT +GACCCACCGCGACGAGCCAGTTATGCGGGTGGTATCCAGCGAACGATCCCCAGCGCTGTC +TTTCCAAGAACAAACGACGAAGAAATCTCGGTAACAGTATTAGAGGGTGTGCAGTTTGGG +GACAGTCTGGTCTTGGTGCAGGTCGACGTCGGTCGGAGTTGTGGATGACGGTGGCGATGA +CTGTGGGGTGTGTGTTGGGTCGGTGGGGATGAGGCGTGGGTTGTGAACGGAACCGCCGGG +ACTGTATGAGTCTGTGCGCGGTTTGTGCACAGCGACACTCACAGCCAATCTCTGTGACTG +GTGTGACCTGGGAGAGAGAAGTTTTTTTAAGAACGTTGTCGGAAGAACACGCCGTGCTAG +CGCTTGGAGCGCTGACGGATGCGAGTGGTGAGTTCTTTGACGTGATCAAAGACCTCACGG +CGCTCGGCCATCTCGGACAGGATCTTGCGTTGGGCGTACATGACGGTTGTGTGATCACGG +CCGAACGCTTGGCCGATTTTGGGCAACGAAAGATCGGTGAGCTCACGACACAGGTACATC +GCAATCTGTCGTGACTGGGCCAGTGCTCGGGTCTTGCCGGGCCCGCGAAGCTCTTCGACG +GTAGTGTCGAAGTATTCGGCGGTGGCAGCCATGATCGTCGCCGCGCTGATTTGCATGGTG +TTGGCGTCGGCGATCAGATCGCGAAGCACAATCTCGGCCAGCGCTTTGTCGATTGGTGTT +TTGTTCAATGAGGCGAACGCGGTGACCCGGATCAGCGCGCCCTCGAGTTCACGGATATTG +CGTTCGATACTGCTGGCGATGAGTTCGAGGACATCGTCGGGGACCCGAGCCCGTAGTATT +TGGGCCTACTTCGTAGCAGCTATGCCGAGTGGGGGCGGCACGGGTGGTTCATACGTCAGT +GATCAGCCCCCACTCAAAGCGGGTTCTCAGCCGGTCCTCGAGGGTGGCGAGCTGCTTGGG +TGGGCGGTCAGATGAGATGACGATTTGCTTGTTGGCATTGTGCAAGGTGTTGAAGGTGTG +GAAGAACTCCTCTTGAATACCCTCTTTGCCTTCAATGAATTGGATGTCGTCGACCAACAG +CACGTCTACGTCGCGGTAGCTGCGTTTGAATGCGACCTTGCGGTCATCGCGGAGCGAGTT +AATGAAGTCGTTGGTGAATTCCTCGGTGGAGACATATTTGACCCGCATTCCCGGGAACAA +CCGTTGGGCATAGTTGCCTGCCGCGTGTAGCAGGTGTGTCTTGCCGAGACCGGACTCGCC +CCAGATGAACAGGGGGTTGTAAGCGCGGGCGGGTGCTTCTGCGATCGCCAAGGCGGCGGC +GTGCGCGAACCGGTTGGAGGCGCCGATAACGAACGTATCAAAGGTGTAGCGACGGTTAAG +GCTGGTTACGCCAGCGGTAGCGGAATCGGTATTGTGCGGGCGCTCGGTGAAGTAACTTGG +CCAACTGTGCTGGTTATCGCCCCGTGCCGCAGCGCTGTCATCAATCTCGTCGTTGTCGGT +TGTGGTGTCTGGCGATGTGGTAGCAGGATTTTCGGAAGGCGGCACGGTAGTGTCGTCGGC +TTCGTCGGTCGCCGGCGGAGCGATGCGGACCCCGAGTTGGATCTGATGTCCGAGTCGGCG +GCTGAGAGCGTCGGTAATCGGGGCCCGCAGATGGCGCTCGATTTCGTTTTGGACAAAGCT +GCTCGGCACGGATAACAGAGCAAACCCCTCGACGATGGTCAATGGCTGGACGAGATTGAG +CCAAGCCCTTTGCTGAGGGGTCAGCGGAGCGCTGAGATTAGCATCACTGCTGGGTCCGTC +GTCAACCTTAGGGTCGCCGTTAAGTTCGGAGACGACCGCGTTCCACACTGTGGTGAAGCC +TGAACCGGGGTCATCGGTCAA diff --git a/tests/data/probe_mapping/clustered_snp_indel_make_data.py b/tests/data/probe_mapping/clustered_snp_indel_make_data.py new file mode 100755 index 0000000..a61bddc --- /dev/null +++ b/tests/data/probe_mapping/clustered_snp_indel_make_data.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 + +import pyfastaq + +# These variants are at 0-based positions +variants = [ + (1000, "A", "T"), + (1003, "T", "C"), + (1004, "GGAGA", "G"), + (1012, "G", "C"), + (1014, "A", "AC"), + (1018, "C", "G"), + (1021, "T", "TAG"), + (1023, "T", "TGC"), + (1025, "G", "A"), + (1027, "GC", "G"), + (1032, "A", "T"), + (1034, "A", "G"), + (1037, "A", "C"), + (1040, "G", "A"), + (1043, "G", "A"), + (1044, "G", "C"), + (1045, "A", "T"), + (1047, "C", "CG"), +] + + +ref = next(pyfastaq.sequences.file_reader("clustered_snp_indel.ref.fa")) + +# Make the truth sequence by applying the variants. Start at the end and work +# backwards so indels don't mess up the coordinates. +truth_seq = list(ref.seq) +vcf_lines = [] +for (position, ref_allele, alt_allele) in reversed(variants): + vcf_lines.append("\t".join([ref.id, str(position + 1), ".", ref_allele, alt_allele, ".", "PASS", ".", "GT", "1/1"])) + assert "".join(truth_seq[position:position + len(ref_allele)]) == ref_allele + truth_seq[position:position + len(ref_allele)] = alt_allele + +# varifier expects the variants to be sorted by position +vcf_lines.reverse() + +with open("clustered_snp_indel.in.vcf", "w") as f: + print("##fileformat=VCFv4.2", file=f) + print('##FILTER=', file=f) + print('##FORMAT=', file=f) + print(f"##contig=", file=f) + print("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT", "sample", sep="\t", file=f) + print(*vcf_lines, sep="\n", file=f) + + +# Add in insertion and deletion before and after the cluster, so that the +# probe mapping will have I and/or D in it (this case had to be removed +# from test_annotate_vcf_with_probe_mapping(), so put it in here) +truth_seq[990] = "TG" +truth_seq[1055] = "" + +truth_seq = pyfastaq.sequences.Fasta("truth", "".join(truth_seq)) +with open("clustered_snp_indel.truth.fa", "w") as f: + print(truth_seq, file=f) + +truth_seq.revcomp() +with open("clustered_snp_indel.truth.revcomp.fa", "w") as f: + print(truth_seq, file=f) + diff --git a/tests/data/recall/get_recall.expect.all.masked.vcf b/tests/data/recall/get_recall.expect.all.masked.vcf deleted file mode 100644 index 5996b6b..0000000 --- a/tests/data/recall/get_recall.expect.all.masked.vcf +++ /dev/null @@ -1,23 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER= -##contig= -##FORMAT= -##bcftools_normVersion=1.10.2 (pysam)+htslib-1.10.2 (pysam) -##bcftools_normCommand=norm -c x -d any -f /home/vagrant/git/varifier/tests/data/recall/get_recall.ref.fa -o /tmp/tmp8sanadrv tmp.get_recall/truth_vcf/03.probe_filtered.vcf; Date=Mon Jul 6 17:36:08 2020 -##bcftools_normCommand=norm -c x -d any -f /home/vagrant/git/varifier/tests/data/recall/get_recall.ref.fa -o /tmp/tmp2oqcrgql tmp.get_recall/truth_vcf/04.truth.vcf; Date=Mon Jul 6 17:36:08 2020 -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample -ref 111 5 A G . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:1:0:1:1:1.0:0:TP -ref 140 6 GT G . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:0:0:1:1:1.0:0:FP_REF_PROBE_BETTER_MATCH -ref 160 7 A AT . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:0:0:2:2:1.0:0:TP -ref 300 9 AT A . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:1:0:1:1:1.0:0:TP -ref 302 10 GC G . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:1:0:1:1:1.0:0:TP -ref 309 12 G GAGA . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:1:0:4:4:1.0:0:TP diff --git a/tests/data/recall/get_recall.expect.all.vcf b/tests/data/recall/get_recall.expect.all.vcf deleted file mode 100644 index be2e27d..0000000 --- a/tests/data/recall/get_recall.expect.all.vcf +++ /dev/null @@ -1,29 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER= -##contig= -##FORMAT= -##bcftools_normVersion=1.10.2 (pysam)+htslib-1.10.2 (pysam) -##bcftools_normCommand=norm -c x -d any -f /home/vagrant/git/varifier/tests/data/recall/get_recall.ref.fa -o /tmp/tmp0yemnfic tmp.get_recall/truth_vcf/03.probe_filtered.vcf; Date=Mon Jul 6 15:06:22 2020 -##bcftools_normCommand=norm -c x -d any -f /home/vagrant/git/varifier/tests/data/recall/get_recall.ref.fa -o /tmp/tmp_65jbi0e tmp.get_recall/truth_vcf/04.truth.vcf; Date=Mon Jul 6 15:06:22 2020 -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample -ref 40 0 A G . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:1:0:1:1:1.0:0:TP -ref 60 1 A G . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:0:1:1:0:0.0:0:FP -ref 80 2 T C . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:1:0:1:1:1.0:0:TP -ref 109 3 G C . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:1:0:1:1:1.0:0:TP -ref 110 4 C T . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:1:0:1:1:1.0:0:TP -ref 111 5 A G . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:1:0:1:1:1.0:0:TP -ref 140 6 GT G . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:0:0:1:1:1.0:0:FP_REF_PROBE_BETTER_MATCH -ref 160 7 A AT . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:0:0:2:2:1.0:0:TP -ref 250 8 G C . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:1:0:1:1:1.0:0:TP -ref 300 9 AT A . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:1:0:1:1:1.0:0:TP -ref 302 10 GC G . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:1:0:1:1:1.0:0:TP -ref 309 12 G GAGA . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:1:0:4:4:1.0:0:TP diff --git a/tests/data/recall/get_recall.expect.filtered.masked.vcf b/tests/data/recall/get_recall.expect.filtered.masked.vcf deleted file mode 100644 index 532a381..0000000 --- a/tests/data/recall/get_recall.expect.filtered.masked.vcf +++ /dev/null @@ -1,23 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER= -##contig= -##FORMAT= -##bcftools_normVersion=1.10.2 (pysam)+htslib-1.10.2 (pysam) -##bcftools_normCommand=norm -c x -d any -f /home/vagrant/git/varifier/tests/data/recall/get_recall.ref.fa -o /tmp/tmpx5zxn426 tmp.get_recall/truth_vcf/03.probe_filtered.vcf; Date=Mon Jul 6 17:36:58 2020 -##bcftools_normCommand=norm -c x -d any -f /home/vagrant/git/varifier/tests/data/recall/get_recall.ref.fa -o /tmp/tmp13hlpt90 tmp.get_recall/truth_vcf/04.truth.vcf; Date=Mon Jul 6 17:36:58 2020 -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample -ref 111 5 A G . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:1:0:1:1:1.0:0:TP -ref 140 6 GT G . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:0:0:1:1:1.0:0:FP_REF_PROBE_BETTER_MATCH -ref 160 7 A AT . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:0:0:2:2:1.0:0:TP -ref 300 9 AT A . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:1:0:1:1:1.0:0:TP -ref 302 10 GC G . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:1:0:1:1:1.0:0:TP -ref 309 12 G GAGA . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:1:0:4:4:1.0:0:TP diff --git a/tests/data/recall/get_recall.expect.filtered.vcf b/tests/data/recall/get_recall.expect.filtered.vcf deleted file mode 100644 index c534e1e..0000000 --- a/tests/data/recall/get_recall.expect.filtered.vcf +++ /dev/null @@ -1,29 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER= -##contig= -##FORMAT= -##bcftools_normVersion=1.10.2 (pysam)+htslib-1.10.2 (pysam) -##bcftools_normCommand=norm -c x -d any -f /home/vagrant/git/varifier/tests/data/recall/get_recall.ref.fa -o /tmp/tmp_hzrnxfo tmp.get_recall/truth_vcf/03.probe_filtered.vcf; Date=Mon Jul 6 15:08:20 2020 -##bcftools_normCommand=norm -c x -d any -f /home/vagrant/git/varifier/tests/data/recall/get_recall.ref.fa -o /tmp/tmp80_dhqu5 tmp.get_recall/truth_vcf/04.truth.vcf; Date=Mon Jul 6 15:08:20 2020 -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample -ref 40 0 A G . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:0:1:1:0:0.0:0:FP -ref 60 1 A G . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:0:1:1:0:0.0:0:FP -ref 80 2 T C . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:1:0:1:1:1.0:0:TP -ref 109 3 G C . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:1:0:1:1:1.0:0:TP -ref 110 4 C T . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:1:0:1:1:1.0:0:TP -ref 111 5 A G . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:1:0:1:1:1.0:0:TP -ref 140 6 GT G . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:0:0:1:1:1.0:0:FP_REF_PROBE_BETTER_MATCH -ref 160 7 A AT . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:0:0:2:2:1.0:0:TP -ref 250 8 G C . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:1:0:1:1:1.0:0:TP -ref 300 9 AT A . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:1:0:1:1:1.0:0:TP -ref 302 10 GC G . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:1:0:1:1:1.0:0:TP -ref 309 12 G GAGA . PASS . GT:VFR_FILTER:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:PASS:1:1:0:4:4:1.0:0:TP diff --git a/tests/data/recall/get_recall.expect.masked.vcf b/tests/data/recall/get_recall.expect.masked.vcf new file mode 100644 index 0000000..e001651 --- /dev/null +++ b/tests/data/recall/get_recall.expect.masked.vcf @@ -0,0 +1,24 @@ +##fileformat=VCFv4.2 +##FILTER= +##contig= +##FORMAT= +##bcftools_normVersion=1.10.2 (pysam)+htslib-1.10.2 (pysam) +##bcftools_normCommand=norm -c x -d any -f /home/vagrant/git/varifier/tests/data/recall/get_recall.ref.fa -o /tmp/tmp465mi5zq tmp.get_recall/truth_vcf/03.probe_filtered.vcf; Date=Tue Oct 6 08:21:03 2020 +##bcftools_normCommand=norm -c x -d any -f /home/vagrant/git/varifier/tests/data/recall/get_recall.ref.fa -o /tmp/tmphabt46n5 tmp.get_recall/truth_vcf/04.truth.vcf; Date=Tue Oct 6 08:21:03 2020 +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample +ref 111 5 A G . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +ref 140 6 GT G . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:0:0:1:1:1.0:0:FP_REF_PROBE_BETTER_MATCH +ref 160 7 A AT . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:0:0:2:2:1.0:0:TP +ref 300 9 AT A . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +ref 302 10 GC G . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +ref 306 11 AT A . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +ref 309 12 G GAGA . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:0:0:4:4:1.0:0:TP diff --git a/tests/data/recall/get_recall.expect.vcf b/tests/data/recall/get_recall.expect.vcf new file mode 100644 index 0000000..a5ac03e --- /dev/null +++ b/tests/data/recall/get_recall.expect.vcf @@ -0,0 +1,30 @@ +##fileformat=VCFv4.2 +##FILTER= +##contig= +##FORMAT= +##bcftools_normVersion=1.10.2 (pysam)+htslib-1.10.2 (pysam) +##bcftools_normCommand=norm -c x -d any -f /home/vagrant/git/varifier/tests/data/recall/get_recall.ref.fa -o /tmp/tmp98yq2irc tmp.get_recall/truth_vcf/03.probe_filtered.vcf; Date=Tue Oct 6 08:19:46 2020 +##bcftools_normCommand=norm -c x -d any -f /home/vagrant/git/varifier/tests/data/recall/get_recall.ref.fa -o /tmp/tmpxls0qgau tmp.get_recall/truth_vcf/04.truth.vcf; Date=Tue Oct 6 08:19:46 2020 +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample +ref 40 0 A G . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +ref 60 1 A G . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:0:1:1:0:0.0:0:FP +ref 80 2 T C . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +ref 109 3 G C . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +ref 110 4 C T . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +ref 111 5 A G . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +ref 140 6 GT G . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:0:0:1:1:1.0:0:FP_REF_PROBE_BETTER_MATCH +ref 160 7 A AT . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:0:0:2:2:1.0:0:TP +ref 250 8 G C . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +ref 300 9 AT A . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +ref 302 10 GC G . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +ref 306 11 AT A . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:1:0:1:1:1.0:0:TP +ref 309 12 G GAGA . PASS . GT:VFR_ED_RA:VFR_ED_TR:VFR_ED_TA:VFR_ALLELE_LEN:VFR_ALLELE_MATCH_COUNT:VFR_ALLELE_MATCH_FRAC:VFR_IN_MASK:VFR_RESULT 1/1:1:0:0:4:4:1.0:0:TP diff --git a/tests/data/recall/vcf_file_to_dict.vcf b/tests/data/recall/vcf_file_to_dict.vcf index b595dbc..beea2db 100644 --- a/tests/data/recall/vcf_file_to_dict.vcf +++ b/tests/data/recall/vcf_file_to_dict.vcf @@ -1,6 +1,5 @@ #header line 1 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample -ref1 42 1 T A . PASS . GT:VFR_FILTER 1/1:PASS -ref2 44 2 T A . PASS . GT:VFR_FILTER 1/1:PASS -ref2 43 3 T A,C . PASS . GT:VFR_FILTER 2/2:PASS -ref2 45 4 T A . FAIL . GT:VFR_FILTER 1/1:FAIL_BUT_TEST +ref1 42 1 T A . PASS . GT 1/1 +ref2 44 2 T A . PASS . GT 1/1 +ref2 43 3 T A,C . PASS . GT 2/2 diff --git a/tests/data/tasks/vcf_eval.expect.masked.summary_stats.json b/tests/data/tasks/vcf_eval.expect.masked.summary_stats.json index 81790d4..2f95be4 100644 --- a/tests/data/tasks/vcf_eval.expect.masked.summary_stats.json +++ b/tests/data/tasks/vcf_eval.expect.masked.summary_stats.json @@ -1,87 +1,57 @@ { + "Excluded_record_counts": { + "filter_fail": 1, + "heterozygous": 0, + "no_genotype": 0, + "other": 0, + "ref_call": 1 + }, "Precision": { - "ALL": { - "EDIT_DIST_COUNTS": { - "denominator": 10, - "numerator": 9 - }, - "FP": { - "Count": 1, - "SUM_ALLELE_MATCH_FRAC": 0.0, - "SUM_EDIT_DIST": 1 - }, - "Precision": 0.9, - "Precision_edit_dist": 0.9, - "Precision_frac": 0.9, - "TP": { - "Count": 9, - "SUM_ALLELE_MATCH_FRAC": 9.0, - "SUM_EDIT_DIST": 9 - } + "EDIT_DIST_COUNTS": { + "denominator": 9, + "numerator": 8 + }, + "FP": { + "Count": 1, + "SUM_ALLELE_MATCH_FRAC": 0.0, + "SUM_EDIT_DIST": 1 }, - "FILT": { - "EDIT_DIST_COUNTS": { - "denominator": 9, - "numerator": 8 - }, - "FP": { - "Count": 1, - "SUM_ALLELE_MATCH_FRAC": 0.0, - "SUM_EDIT_DIST": 1 - }, - "Precision": 0.88888889, - "Precision_edit_dist": 0.88888889, - "Precision_frac": 0.88888889, - "TP": { - "Count": 8, - "SUM_ALLELE_MATCH_FRAC": 8.0, - "SUM_EDIT_DIST": 8 - } + "Precision": 0.88888889, + "Precision_edit_dist": 0.88888889, + "Precision_frac": 0.88888889, + "TP": { + "Count": 8, + "SUM_ALLELE_MATCH_FRAC": 8.0, + "SUM_EDIT_DIST": 8 }, "UNUSED": { "CONFLICT": 0, "MASKED": 0, - "OTHER": 1 + "OTHER": 0 } }, "Recall": { - "ALL": { - "EDIT_DIST_COUNTS": { - "denominator": 12, - "numerator": 9 - }, - "FN": { - "Count": 3, - "SUM_ALLELE_MATCH_FRAC": 1.0, - "SUM_EDIT_DIST": 3 - }, - "Recall": 0.75, - "Recall_edit_dist": 0.75, - "Recall_frac": 0.83333333, - "TP": { - "Count": 9, - "SUM_ALLELE_MATCH_FRAC": 9.0, - "SUM_EDIT_DIST": 9 - } + "EDIT_DIST_COUNTS": { + "denominator": 12, + "numerator": 8 + }, + "FN": { + "Count": 4, + "SUM_ALLELE_MATCH_FRAC": 1.0, + "SUM_EDIT_DIST": 4 + }, + "Recall": 0.66666667, + "Recall_edit_dist": 0.66666667, + "Recall_frac": 0.75, + "TP": { + "Count": 8, + "SUM_ALLELE_MATCH_FRAC": 8.0, + "SUM_EDIT_DIST": 8 }, - "FILT": { - "EDIT_DIST_COUNTS": { - "denominator": 12, - "numerator": 8 - }, - "FN": { - "Count": 4, - "SUM_ALLELE_MATCH_FRAC": 1.0, - "SUM_EDIT_DIST": 4 - }, - "Recall": 0.66666667, - "Recall_edit_dist": 0.66666667, - "Recall_frac": 0.75, - "TP": { - "Count": 8, - "SUM_ALLELE_MATCH_FRAC": 8.0, - "SUM_EDIT_DIST": 8 - } + "UNUSED": { + "CONFLICT": 0, + "MASKED": 0, + "OTHER": 0 } } } \ No newline at end of file diff --git a/tests/data/tasks/vcf_eval.expect.summary_stats.json b/tests/data/tasks/vcf_eval.expect.summary_stats.json index a76d58f..d7ef846 100644 --- a/tests/data/tasks/vcf_eval.expect.summary_stats.json +++ b/tests/data/tasks/vcf_eval.expect.summary_stats.json @@ -1,87 +1,57 @@ { + "Excluded_record_counts": { + "filter_fail": 2, + "heterozygous": 0, + "no_genotype": 0, + "other": 0, + "ref_call": 1 + }, "Precision": { - "ALL": { - "EDIT_DIST_COUNTS": { - "denominator": 11, - "numerator": 10 - }, - "FP": { - "Count": 1, - "SUM_ALLELE_MATCH_FRAC": 0.0, - "SUM_EDIT_DIST": 1 - }, - "Precision": 0.90909091, - "Precision_edit_dist": 0.90909091, - "Precision_frac": 0.90909091, - "TP": { - "Count": 10, - "SUM_ALLELE_MATCH_FRAC": 10.0, - "SUM_EDIT_DIST": 10 - } + "EDIT_DIST_COUNTS": { + "denominator": 10, + "numerator": 9 + }, + "FP": { + "Count": 1, + "SUM_ALLELE_MATCH_FRAC": 0.0, + "SUM_EDIT_DIST": 1 }, - "FILT": { - "EDIT_DIST_COUNTS": { - "denominator": 10, - "numerator": 9 - }, - "FP": { - "Count": 1, - "SUM_ALLELE_MATCH_FRAC": 0.0, - "SUM_EDIT_DIST": 1 - }, - "Precision": 0.9, - "Precision_edit_dist": 0.9, - "Precision_frac": 0.9, - "TP": { - "Count": 9, - "SUM_ALLELE_MATCH_FRAC": 9.0, - "SUM_EDIT_DIST": 9 - } + "Precision": 0.9, + "Precision_edit_dist": 0.9, + "Precision_frac": 0.9, + "TP": { + "Count": 9, + "SUM_ALLELE_MATCH_FRAC": 9.0, + "SUM_EDIT_DIST": 9 }, "UNUSED": { - "CONFLICT": 1, + "CONFLICT": 0, "MASKED": 0, - "OTHER": 1 + "OTHER": 0 } }, "Recall": { - "ALL": { - "EDIT_DIST_COUNTS": { - "denominator": 13, - "numerator": 10 - }, - "FN": { - "Count": 3, - "SUM_ALLELE_MATCH_FRAC": 1.0, - "SUM_EDIT_DIST": 3 - }, - "Recall": 0.76923077, - "Recall_edit_dist": 0.76923077, - "Recall_frac": 0.84615385, - "TP": { - "Count": 10, - "SUM_ALLELE_MATCH_FRAC": 10.0, - "SUM_EDIT_DIST": 10 - } + "EDIT_DIST_COUNTS": { + "denominator": 13, + "numerator": 9 + }, + "FN": { + "Count": 4, + "SUM_ALLELE_MATCH_FRAC": 1.0, + "SUM_EDIT_DIST": 4 }, - "FILT": { - "EDIT_DIST_COUNTS": { - "denominator": 13, - "numerator": 9 - }, - "FN": { - "Count": 4, - "SUM_ALLELE_MATCH_FRAC": 1.0, - "SUM_EDIT_DIST": 4 - }, - "Recall": 0.69230769, - "Recall_edit_dist": 0.69230769, - "Recall_frac": 0.76923077, - "TP": { - "Count": 9, - "SUM_ALLELE_MATCH_FRAC": 9.0, - "SUM_EDIT_DIST": 9 - } + "Recall": 0.69230769, + "Recall_edit_dist": 0.69230769, + "Recall_frac": 0.76923077, + "TP": { + "Count": 9, + "SUM_ALLELE_MATCH_FRAC": 9.0, + "SUM_EDIT_DIST": 9 + }, + "UNUSED": { + "CONFLICT": 0, + "MASKED": 0, + "OTHER": 0 } } } \ No newline at end of file diff --git a/tests/data/truth_variant_finding/merge_vcf_files_for_probe_mapping.expect.vcf b/tests/data/truth_variant_finding/merge_vcf_files_for_probe_mapping.expect.vcf index 85925a2..4137ed3 100644 --- a/tests/data/truth_variant_finding/merge_vcf_files_for_probe_mapping.expect.vcf +++ b/tests/data/truth_variant_finding/merge_vcf_files_for_probe_mapping.expect.vcf @@ -2,8 +2,8 @@ ##contig= ##FORMAT= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample -ref 40 0 A C . PASS . GT:VFR_FILTER 1/1:PASS -ref 40 0 A G . PASS . GT:VFR_FILTER 1/1:PASS -ref 41 1 C G . PASS . GT:VFR_FILTER 1/1:PASS -ref 60 2 A G . PASS . GT:VFR_FILTER 1/1:PASS -ref 80 3 T C . PASS . GT:VFR_FILTER 1/1:PASS +ref 40 0 A C . PASS . GT 1/1 +ref 40 0 A G . PASS . GT 1/1 +ref 41 1 C G . PASS . GT 1/1 +ref 60 2 A G . PASS . GT 1/1 +ref 80 3 T C . PASS . GT 1/1 diff --git a/tests/data/vcf_evaluate/evaluate_vcf.expect.masked.summary_stats.json b/tests/data/vcf_evaluate/evaluate_vcf.expect.masked.summary_stats.json index 81790d4..a9c5616 100644 --- a/tests/data/vcf_evaluate/evaluate_vcf.expect.masked.summary_stats.json +++ b/tests/data/vcf_evaluate/evaluate_vcf.expect.masked.summary_stats.json @@ -1,87 +1,57 @@ { + "Excluded_record_counts": { + "filter_fail": 1, + "heterozygous": 0, + "no_genotype": 0, + "other": 0, + "ref_call": 1 + }, "Precision": { - "ALL": { - "EDIT_DIST_COUNTS": { - "denominator": 10, - "numerator": 9 - }, - "FP": { - "Count": 1, - "SUM_ALLELE_MATCH_FRAC": 0.0, - "SUM_EDIT_DIST": 1 - }, - "Precision": 0.9, - "Precision_edit_dist": 0.9, - "Precision_frac": 0.9, - "TP": { - "Count": 9, - "SUM_ALLELE_MATCH_FRAC": 9.0, - "SUM_EDIT_DIST": 9 - } + "EDIT_DIST_COUNTS": { + "denominator": 8, + "numerator": 8 + }, + "FP": { + "Count": 0, + "SUM_ALLELE_MATCH_FRAC": 0, + "SUM_EDIT_DIST": 0 }, - "FILT": { - "EDIT_DIST_COUNTS": { - "denominator": 9, - "numerator": 8 - }, - "FP": { - "Count": 1, - "SUM_ALLELE_MATCH_FRAC": 0.0, - "SUM_EDIT_DIST": 1 - }, - "Precision": 0.88888889, - "Precision_edit_dist": 0.88888889, - "Precision_frac": 0.88888889, - "TP": { - "Count": 8, - "SUM_ALLELE_MATCH_FRAC": 8.0, - "SUM_EDIT_DIST": 8 - } + "Precision": 1.0, + "Precision_edit_dist": 1.0, + "Precision_frac": 1.0, + "TP": { + "Count": 8, + "SUM_ALLELE_MATCH_FRAC": 8.0, + "SUM_EDIT_DIST": 8 }, "UNUSED": { "CONFLICT": 0, - "MASKED": 0, - "OTHER": 1 + "MASKED": 1, + "OTHER": 0 } }, "Recall": { - "ALL": { - "EDIT_DIST_COUNTS": { - "denominator": 12, - "numerator": 9 - }, - "FN": { - "Count": 3, - "SUM_ALLELE_MATCH_FRAC": 1.0, - "SUM_EDIT_DIST": 3 - }, - "Recall": 0.75, - "Recall_edit_dist": 0.75, - "Recall_frac": 0.83333333, - "TP": { - "Count": 9, - "SUM_ALLELE_MATCH_FRAC": 9.0, - "SUM_EDIT_DIST": 9 - } + "EDIT_DIST_COUNTS": { + "denominator": 12, + "numerator": 8 + }, + "FN": { + "Count": 4, + "SUM_ALLELE_MATCH_FRAC": 1.0, + "SUM_EDIT_DIST": 4 }, - "FILT": { - "EDIT_DIST_COUNTS": { - "denominator": 12, - "numerator": 8 - }, - "FN": { - "Count": 4, - "SUM_ALLELE_MATCH_FRAC": 1.0, - "SUM_EDIT_DIST": 4 - }, - "Recall": 0.66666667, - "Recall_edit_dist": 0.66666667, - "Recall_frac": 0.75, - "TP": { - "Count": 8, - "SUM_ALLELE_MATCH_FRAC": 8.0, - "SUM_EDIT_DIST": 8 - } + "Recall": 0.66666667, + "Recall_edit_dist": 0.66666667, + "Recall_frac": 0.75, + "TP": { + "Count": 8, + "SUM_ALLELE_MATCH_FRAC": 8.0, + "SUM_EDIT_DIST": 8 + }, + "UNUSED": { + "CONFLICT": 0, + "MASKED": 0, + "OTHER": 0 } } } \ No newline at end of file diff --git a/tests/data/vcf_evaluate/evaluate_vcf.expect.summary_stats.json b/tests/data/vcf_evaluate/evaluate_vcf.expect.summary_stats.json index a76d58f..d7ef846 100644 --- a/tests/data/vcf_evaluate/evaluate_vcf.expect.summary_stats.json +++ b/tests/data/vcf_evaluate/evaluate_vcf.expect.summary_stats.json @@ -1,87 +1,57 @@ { + "Excluded_record_counts": { + "filter_fail": 2, + "heterozygous": 0, + "no_genotype": 0, + "other": 0, + "ref_call": 1 + }, "Precision": { - "ALL": { - "EDIT_DIST_COUNTS": { - "denominator": 11, - "numerator": 10 - }, - "FP": { - "Count": 1, - "SUM_ALLELE_MATCH_FRAC": 0.0, - "SUM_EDIT_DIST": 1 - }, - "Precision": 0.90909091, - "Precision_edit_dist": 0.90909091, - "Precision_frac": 0.90909091, - "TP": { - "Count": 10, - "SUM_ALLELE_MATCH_FRAC": 10.0, - "SUM_EDIT_DIST": 10 - } + "EDIT_DIST_COUNTS": { + "denominator": 10, + "numerator": 9 + }, + "FP": { + "Count": 1, + "SUM_ALLELE_MATCH_FRAC": 0.0, + "SUM_EDIT_DIST": 1 }, - "FILT": { - "EDIT_DIST_COUNTS": { - "denominator": 10, - "numerator": 9 - }, - "FP": { - "Count": 1, - "SUM_ALLELE_MATCH_FRAC": 0.0, - "SUM_EDIT_DIST": 1 - }, - "Precision": 0.9, - "Precision_edit_dist": 0.9, - "Precision_frac": 0.9, - "TP": { - "Count": 9, - "SUM_ALLELE_MATCH_FRAC": 9.0, - "SUM_EDIT_DIST": 9 - } + "Precision": 0.9, + "Precision_edit_dist": 0.9, + "Precision_frac": 0.9, + "TP": { + "Count": 9, + "SUM_ALLELE_MATCH_FRAC": 9.0, + "SUM_EDIT_DIST": 9 }, "UNUSED": { - "CONFLICT": 1, + "CONFLICT": 0, "MASKED": 0, - "OTHER": 1 + "OTHER": 0 } }, "Recall": { - "ALL": { - "EDIT_DIST_COUNTS": { - "denominator": 13, - "numerator": 10 - }, - "FN": { - "Count": 3, - "SUM_ALLELE_MATCH_FRAC": 1.0, - "SUM_EDIT_DIST": 3 - }, - "Recall": 0.76923077, - "Recall_edit_dist": 0.76923077, - "Recall_frac": 0.84615385, - "TP": { - "Count": 10, - "SUM_ALLELE_MATCH_FRAC": 10.0, - "SUM_EDIT_DIST": 10 - } + "EDIT_DIST_COUNTS": { + "denominator": 13, + "numerator": 9 + }, + "FN": { + "Count": 4, + "SUM_ALLELE_MATCH_FRAC": 1.0, + "SUM_EDIT_DIST": 4 }, - "FILT": { - "EDIT_DIST_COUNTS": { - "denominator": 13, - "numerator": 9 - }, - "FN": { - "Count": 4, - "SUM_ALLELE_MATCH_FRAC": 1.0, - "SUM_EDIT_DIST": 4 - }, - "Recall": 0.69230769, - "Recall_edit_dist": 0.69230769, - "Recall_frac": 0.76923077, - "TP": { - "Count": 9, - "SUM_ALLELE_MATCH_FRAC": 9.0, - "SUM_EDIT_DIST": 9 - } + "Recall": 0.69230769, + "Recall_edit_dist": 0.69230769, + "Recall_frac": 0.76923077, + "TP": { + "Count": 9, + "SUM_ALLELE_MATCH_FRAC": 9.0, + "SUM_EDIT_DIST": 9 + }, + "UNUSED": { + "CONFLICT": 0, + "MASKED": 0, + "OTHER": 0 } } } \ No newline at end of file diff --git a/tests/data/vcf_evaluate/evaluate_vcf.truth_mask.bed b/tests/data/vcf_evaluate/evaluate_vcf.truth_mask.bed new file mode 100644 index 0000000..1475054 --- /dev/null +++ b/tests/data/vcf_evaluate/evaluate_vcf.truth_mask.bed @@ -0,0 +1 @@ +truth 85 95 diff --git a/tests/data/vcf_evaluate/filter_vcf.expect.no_filter_pass_exclude_ref_calls.exclude.vcf b/tests/data/vcf_evaluate/filter_vcf.expect.no_filter_pass_exclude_ref_calls.exclude.vcf new file mode 100644 index 0000000..59065bc --- /dev/null +++ b/tests/data/vcf_evaluate/filter_vcf.expect.no_filter_pass_exclude_ref_calls.exclude.vcf @@ -0,0 +1,11 @@ +##fileformat=VCFv4.2 +##contig= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample +ref 3 . G C . MISMAPPED_UNPLACEABLE . GT:NOTES:VFR_EXCLUDE_REASON 0:Should always get removed because of FILTER column:filter_fail +ref 4 . T A . PASS . GT:NOTES:VFR_EXCLUDE_REASON 1:Should always get removed because REF does not match ref genome:other +ref 5 . A . . PASS . GT:NOTES:VFR_EXCLUDE_REASON 1:Should always get removed because ALT is .:other +ref 6 . T C . PASS . NOTES:VFR_EXCLUDE_REASON Should always get removed because no GT:no_genotype +ref 7 . . A . PASS . GT:NOTES:VFR_EXCLUDE_REASON 1:Should always get removed because REF is .:other +ref 8 . A T . PASS . GT:NOTES:VFR_EXCLUDE_REASON 0/1:Should always get removed because heterozygous:heterozygous diff --git a/tests/data/vcf_evaluate/filter_vcf.expect.no_filter_pass_keep_ref_calls.keep.vcf b/tests/data/vcf_evaluate/filter_vcf.expect.no_filter_pass_keep_ref_calls.keep.vcf new file mode 100644 index 0000000..c1de474 --- /dev/null +++ b/tests/data/vcf_evaluate/filter_vcf.expect.no_filter_pass_keep_ref_calls.keep.vcf @@ -0,0 +1,10 @@ +##fileformat=VCFv4.2 +##contig= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample +ref 1 . A G . PASS . GT:NOTES 0:Good ref call +ref 2 . T G . PASS . GT:NOTES 1:Good alt call +ref 9 . C G . . . GT:NOTES 1/1:Removal depends on filter_pass option +ref 10 . T G . FILTER_1 . GT:NOTES 1/1:Removal depends on filter_pass option +ref 11 . G G . FILTER_1;FILTER_2 . GT:NOTES 1/1:Removal depends on filter_pass option diff --git a/tests/data/vcf_evaluate/filter_vcf.expect.with_filtering.1.exclude.vcf b/tests/data/vcf_evaluate/filter_vcf.expect.with_filtering.1.exclude.vcf new file mode 100644 index 0000000..8ff519d --- /dev/null +++ b/tests/data/vcf_evaluate/filter_vcf.expect.with_filtering.1.exclude.vcf @@ -0,0 +1,15 @@ +##fileformat=VCFv4.2 +##contig= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample +ref 1 . A G . PASS . GT:NOTES:VFR_EXCLUDE_REASON 0:Good ref call:ref_call +ref 3 . G C . MISMAPPED_UNPLACEABLE . GT:NOTES:VFR_EXCLUDE_REASON 0:Should always get removed because of FILTER column:filter_fail +ref 4 . T A . PASS . GT:NOTES:VFR_EXCLUDE_REASON 1:Should always get removed because REF does not match ref genome:other +ref 5 . A . . PASS . GT:NOTES:VFR_EXCLUDE_REASON 1:Should always get removed because ALT is .:other +ref 6 . T C . PASS . NOTES:VFR_EXCLUDE_REASON Should always get removed because no GT:no_genotype +ref 7 . . A . PASS . GT:NOTES:VFR_EXCLUDE_REASON 1:Should always get removed because REF is .:other +ref 8 . A T . PASS . GT:NOTES:VFR_EXCLUDE_REASON 0/1:Should always get removed because heterozygous:heterozygous +ref 9 . C G . . . GT:NOTES:VFR_EXCLUDE_REASON 1/1:Removal depends on filter_pass option:filter_fail +ref 10 . T G . FILTER_1 . GT:NOTES:VFR_EXCLUDE_REASON 1/1:Removal depends on filter_pass option:filter_fail +ref 11 . G G . FILTER_1;FILTER_2 . GT:NOTES:VFR_EXCLUDE_REASON 1/1:Removal depends on filter_pass option:filter_fail diff --git a/tests/data/vcf_evaluate/filter_vcf.expect.with_filtering.1.keep.vcf b/tests/data/vcf_evaluate/filter_vcf.expect.with_filtering.1.keep.vcf new file mode 100644 index 0000000..d8d2c6d --- /dev/null +++ b/tests/data/vcf_evaluate/filter_vcf.expect.with_filtering.1.keep.vcf @@ -0,0 +1,6 @@ +##fileformat=VCFv4.2 +##contig= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample +ref 2 . T G . PASS . GT:NOTES 1:Good alt call diff --git a/tests/data/vcf_evaluate/filter_vcf.expect.with_filtering.2.exclude.vcf b/tests/data/vcf_evaluate/filter_vcf.expect.with_filtering.2.exclude.vcf new file mode 100644 index 0000000..e2af4fa --- /dev/null +++ b/tests/data/vcf_evaluate/filter_vcf.expect.with_filtering.2.exclude.vcf @@ -0,0 +1,13 @@ +##fileformat=VCFv4.2 +##contig= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample +ref 1 . A G . PASS . GT:NOTES:VFR_EXCLUDE_REASON 0:Good ref call:ref_call +ref 3 . G C . MISMAPPED_UNPLACEABLE . GT:NOTES:VFR_EXCLUDE_REASON 0:Should always get removed because of FILTER column:filter_fail +ref 4 . T A . PASS . GT:NOTES:VFR_EXCLUDE_REASON 1:Should always get removed because REF does not match ref genome:other +ref 5 . A . . PASS . GT:NOTES:VFR_EXCLUDE_REASON 1:Should always get removed because ALT is .:other +ref 6 . T C . PASS . NOTES:VFR_EXCLUDE_REASON Should always get removed because no GT:no_genotype +ref 7 . . A . PASS . GT:NOTES:VFR_EXCLUDE_REASON 1:Should always get removed because REF is .:other +ref 8 . A T . PASS . GT:NOTES:VFR_EXCLUDE_REASON 0/1:Should always get removed because heterozygous:heterozygous +ref 10 . T G . FILTER_1 . GT:NOTES:VFR_EXCLUDE_REASON 1/1:Removal depends on filter_pass option:filter_fail diff --git a/tests/data/vcf_evaluate/filter_vcf.expect.with_filtering.2.keep.vcf b/tests/data/vcf_evaluate/filter_vcf.expect.with_filtering.2.keep.vcf new file mode 100644 index 0000000..b0d3012 --- /dev/null +++ b/tests/data/vcf_evaluate/filter_vcf.expect.with_filtering.2.keep.vcf @@ -0,0 +1,8 @@ +##fileformat=VCFv4.2 +##contig= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample +ref 2 . T G . PASS . GT:NOTES 1:Good alt call +ref 9 . C G . . . GT:NOTES 1/1:Removal depends on filter_pass option +ref 11 . G G . FILTER_1;FILTER_2 . GT:NOTES 1/1:Removal depends on filter_pass option diff --git a/tests/data/vcf_evaluate/filter_vcf.in.vcf b/tests/data/vcf_evaluate/filter_vcf.in.vcf new file mode 100644 index 0000000..d246a3f --- /dev/null +++ b/tests/data/vcf_evaluate/filter_vcf.in.vcf @@ -0,0 +1,16 @@ +##fileformat=VCFv4.2 +##contig= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample +ref 1 . A G . PASS . GT:NOTES 0:Good ref call +ref 2 . T G . PASS . GT:NOTES 1:Good alt call +ref 3 . G C . MISMAPPED_UNPLACEABLE . GT:NOTES 0:Should always get removed because of FILTER column +ref 4 . T A . PASS . GT:NOTES 1:Should always get removed because REF does not match ref genome +ref 5 . A . . PASS . GT:NOTES 1:Should always get removed because ALT is . +ref 6 . T C . PASS . NOTES Should always get removed because no GT +ref 7 . . A . PASS . GT:NOTES 1:Should always get removed because REF is . +ref 8 . A T . PASS . GT:NOTES 0/1:Should always get removed because heterozygous +ref 9 . C G . . . GT:NOTES 1/1:Removal depends on filter_pass option +ref 10 . T G . FILTER_1 . GT:NOTES 1/1:Removal depends on filter_pass option +ref 11 . G G . FILTER_1;FILTER_2 . GT:NOTES 1/1:Removal depends on filter_pass option diff --git a/tests/data/vcf_qc_annotate/add_qc_to_vcf.expect.not_want_ref_calls.vcf b/tests/data/vcf_qc_annotate/add_qc_to_vcf.expect.not_want_ref_calls.vcf deleted file mode 100644 index 0604483..0000000 --- a/tests/data/vcf_qc_annotate/add_qc_to_vcf.expect.not_want_ref_calls.vcf +++ /dev/null @@ -1,12 +0,0 @@ -##fileformat=VCFv4.2 -##FORMAT= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample -ref 1 . A G . PASS . GT:VFR_FILTER 1/1:PASS -ref 1 . A C . FAIL . GT:VFR_FILTER 1/1:FAIL_CONFLICT -ref 5 . G T . PASS . GT:VFR_FILTER 1/1:PASS -ref 10 . CTG C . PASS . GT:VFR_FILTER 1/1:FAIL_CONFLICT -ref 12 . G A . PASS . GT:VFR_FILTER 1/1:FAIL_CONFLICT -ref 42 . C G . PASS . GT:VFR_FILTER 1/1:PASS -ref 50 . C G . PASS . GT:VFR_FILTER 1/1:PASS -ref 50 . C A . PASS . GT:VFR_FILTER 0/0:CANNOT_USE_GT -ref 60 . T G . PASS . GT:VFR_FILTER 0/0:CANNOT_USE_GT diff --git a/tests/data/vcf_qc_annotate/add_qc_to_vcf.expect.want_ref_calls.vcf b/tests/data/vcf_qc_annotate/add_qc_to_vcf.expect.want_ref_calls.vcf deleted file mode 100644 index 2b42c77..0000000 --- a/tests/data/vcf_qc_annotate/add_qc_to_vcf.expect.want_ref_calls.vcf +++ /dev/null @@ -1,12 +0,0 @@ -##fileformat=VCFv4.2 -##FORMAT= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample -ref 1 . A G . PASS . GT:VFR_FILTER 1/1:PASS -ref 1 . A C . FAIL . GT:VFR_FILTER 1/1:FAIL_CONFLICT -ref 5 . G T . PASS . GT:VFR_FILTER 1/1:PASS -ref 10 . CTG C . PASS . GT:VFR_FILTER 1/1:FAIL_CONFLICT -ref 12 . G A . PASS . GT:VFR_FILTER 1/1:FAIL_CONFLICT -ref 42 . C G . PASS . GT:VFR_FILTER 1/1:PASS -ref 50 . C G . PASS . GT:VFR_FILTER 1/1:FAIL_CONFLICT -ref 50 . C A . PASS . GT:VFR_FILTER 0/0:FAIL_CONFLICT -ref 60 . T G . PASS . GT:VFR_FILTER 0/0:PASS diff --git a/tests/data/vcf_qc_annotate/add_qc_to_vcf.in.vcf b/tests/data/vcf_qc_annotate/add_qc_to_vcf.in.vcf deleted file mode 100644 index 38e6ea1..0000000 --- a/tests/data/vcf_qc_annotate/add_qc_to_vcf.in.vcf +++ /dev/null @@ -1,11 +0,0 @@ -##fileformat=VCFv4.2 -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample -ref 1 . A G . PASS . GT 1/1 -ref 1 . A C . FAIL . GT 1/1 -ref 5 . G T . PASS . GT 1/1 -ref 10 . CTG C . PASS . GT 1/1 -ref 12 . G A . PASS . GT 1/1 -ref 42 . C G . PASS . GT 1/1 -ref 50 . C G . PASS . GT 1/1 -ref 50 . C A . PASS . GT 0/0 -ref 60 . T G . PASS . GT 0/0 diff --git a/tests/probe_mapping_test.py b/tests/probe_mapping_test.py index c9511ed..57791a1 100644 --- a/tests/probe_mapping_test.py +++ b/tests/probe_mapping_test.py @@ -19,17 +19,9 @@ def test_annotate_vcf_with_probe_mapping(): # Input files are made by the script tests/data/probe_mapping/make_test_data.py. # It makes a VCF file + matching ref FASTA, and a truth reference FASTA. # - # The aim was to make this as comprehensive as reasonably possible. - # Tests calling TPs correctly, and also calling FPs correctly - particularly - # in the positions flanking the true variants in case of off-by-one errors - # or the minimap2/mappy mapping doing unexpected things. - # # Also, test reverse-complementing the truth genome results in exactly the # same output. Important to test because probes then all map to the reverse # strand, which is a potential source of bugs. - # There is one variant that is slightly different because of how minimap - # aligns the ref to the probe. Can't do anything about this, it's just - # how alignemnts work. vcf_ref_fa = os.path.join(data_dir, "annotate_vcf_with_probe_mapping.ref.fa") vcf_in = os.path.join(data_dir, "annotate_vcf_with_probe_mapping.in.vcf") truth_ref_fa = os.path.join(data_dir, "annotate_vcf_with_probe_mapping.truth.fa") @@ -40,7 +32,7 @@ def test_annotate_vcf_with_probe_mapping(): tmp_vcf_revcomp = f"{tmp_vcf}.revcomp" tmp_map = "tmp.probe_mapping.annotate_vcf_with_probe_mapping.map" clean_files((tmp_vcf, tmp_vcf_revcomp, tmp_map)) - truth_mask = {"truth":{80, 81, 82}} + truth_mask = {"truth": {80, 81, 82}} probe_mapping.annotate_vcf_with_probe_mapping( vcf_in, vcf_ref_fa, @@ -61,9 +53,42 @@ def test_annotate_vcf_with_probe_mapping(): truth_mask=truth_mask, ) expect_vcf = os.path.join(data_dir, "annotate_vcf_with_probe_mapping.expect.vcf") - expect_rev_vcf = os.path.join( - data_dir, "annotate_vcf_with_probe_mapping.expect.rev.vcf" + assert filecmp.cmp(tmp_vcf, expect_vcf, shallow=False) + assert filecmp.cmp(tmp_vcf_revcomp, expect_vcf, shallow=False) + clean_files((tmp_vcf, tmp_vcf_revcomp, tmp_map)) + + +# Clusters of SNPs and indels are hard to evaluate when they are in separate +# records. This test is to check that it works. It was found when testing +# simulated TB data, where there was a cluster of SNPs and indels. This test +# was added when the code was changed to apply variants to the flanks of each +# probe. It is all TPs. Without adding variants to the flanks, the probe mapping +# gets messed up and about 1/3 of the calls in this test were incorrectly called +# as FP instead of TP. It's the first 2kb of the H37Rv version 3 reference +# genome with a cluster of variants at around 1000-1050 +def test_annotate_with_probe_mapping_clustered_snps_and_indels(): + vcf_ref_fa = os.path.join(data_dir, "clustered_snp_indel.ref.fa") + vcf_in = os.path.join(data_dir, "clustered_snp_indel.in.vcf") + truth_ref_fa = os.path.join(data_dir, "clustered_snp_indel.truth.fa") + truth_ref_revcomp_fa = os.path.join( + data_dir, "clustered_snp_indel.truth.revcomp.fa" + ) + tmp_vcf = "tmp.probe_mapping.clustered_snp_indel.vcf" + tmp_vcf_revcomp = f"{tmp_vcf}.revcomp" + tmp_map = "tmp.probe_mapping.clustered_snp_indel.map" + clean_files((tmp_vcf, tmp_vcf_revcomp, tmp_map)) + probe_mapping.annotate_vcf_with_probe_mapping( + vcf_in, vcf_ref_fa, truth_ref_fa, 100, tmp_vcf, map_outfile=tmp_map, + ) + probe_mapping.annotate_vcf_with_probe_mapping( + vcf_in, + vcf_ref_fa, + truth_ref_revcomp_fa, + 100, + tmp_vcf_revcomp, + map_outfile=tmp_map, ) + expect_vcf = os.path.join(data_dir, "clustered_snp_indel.expect.vcf") assert filecmp.cmp(tmp_vcf, expect_vcf, shallow=False) - assert filecmp.cmp(tmp_vcf_revcomp, expect_rev_vcf, shallow=False) + assert filecmp.cmp(tmp_vcf_revcomp, expect_vcf, shallow=False) clean_files((tmp_vcf, tmp_vcf_revcomp, tmp_map)) diff --git a/tests/recall_test.py b/tests/recall_test.py index 7ee5223..5950ff3 100644 --- a/tests/recall_test.py +++ b/tests/recall_test.py @@ -11,42 +11,18 @@ data_dir = os.path.join(this_dir, "data", "recall") -def vcf_records_are_the_same(file1, file2): - """Returns True if records in the two VCF files are the same. - Ignores header lines in the files. Returns False if any lines are different""" - _, expect_records = vcf_file_read.vcf_file_to_list(file1) - _, got_records = vcf_file_read.vcf_file_to_list(file2) - return got_records == expect_records - - def test_vcf_file_to_dict(): vcf_file = os.path.join(data_dir, "vcf_file_to_dict.vcf") expect = { - "ref1": [ - vcf_record.VcfRecord( - "ref1\t42\t1\tT\tA\t.\tPASS\t.\tGT:VFR_FILTER\t1/1:PASS" - ) - ], + "ref1": [vcf_record.VcfRecord("ref1\t42\t1\tT\tA\t.\tPASS\t.\tGT\t1/1")], "ref2": [ - vcf_record.VcfRecord( - "ref2\t43\t3\tT\tA,C\t.\tPASS\t.\tGT:VFR_FILTER\t2/2:PASS" - ), - vcf_record.VcfRecord( - "ref2\t44\t2\tT\tA\t.\tPASS\t.\tGT:VFR_FILTER\t1/1:PASS" - ), + vcf_record.VcfRecord("ref2\t43\t3\tT\tA,C\t.\tPASS\t.\tGT\t2/2"), + vcf_record.VcfRecord("ref2\t44\t2\tT\tA\t.\tPASS\t.\tGT\t1/1"), ], } got = recall._vcf_file_to_dict(vcf_file) assert got == expect - expect["ref2"].append( - vcf_record.VcfRecord( - "ref2\t45\t4\tT\tA\t.\tFAIL\t.\tGT:VFR_FILTER\t1/1:FAIL_BUT_TEST" - ) - ) - got = recall._vcf_file_to_dict(vcf_file, pass_only=False) - assert got == expect - def test_apply_variants_to_genome(): ref_fasta = os.path.join(data_dir, "apply_variants_to_genome.ref.fa") @@ -65,23 +41,25 @@ def test_get_recall(): vcf_to_test = os.path.join(data_dir, "get_recall.to_test.vcf") tmp_out = "tmp.get_recall" subprocess.check_output(f"rm -rf {tmp_out}", shell=True) - got_vcf_all, got_vcf_filtered = recall.get_recall( + got_vcf = recall.get_recall( ref_fasta, vcf_to_test, tmp_out, 100, debug=True, truth_fasta=truth_fasta, ) - expect_vcf_all = os.path.join(data_dir, "get_recall.expect.all.vcf") - assert utils.vcf_records_are_the_same(got_vcf_all, expect_vcf_all) - expect_vcf_filtered = os.path.join(data_dir, "get_recall.expect.filtered.vcf") - assert utils.vcf_records_are_the_same(got_vcf_filtered, expect_vcf_filtered) + expect_vcf = os.path.join(data_dir, "get_recall.expect.vcf") + assert utils.vcf_records_are_the_same(got_vcf, expect_vcf) subprocess.check_output(f"rm -r {tmp_out}", shell=True) # Same again, but with a mask that removes a few variants mask = {"truth": set(list(range(320, 391)))} mask["truth"].add(180) - got_vcf_all, got_vcf_filtered = recall.get_recall( - ref_fasta, vcf_to_test, tmp_out, 100, debug=True, truth_fasta=truth_fasta, truth_mask=mask, + got_vcf = recall.get_recall( + ref_fasta, + vcf_to_test, + tmp_out, + 100, + debug=True, + truth_fasta=truth_fasta, + truth_mask=mask, ) - expect_vcf_all = os.path.join(data_dir, "get_recall.expect.all.masked.vcf") - assert utils.vcf_records_are_the_same(got_vcf_all, expect_vcf_all) - expect_vcf_filtered = os.path.join(data_dir, "get_recall.expect.filtered.masked.vcf") - assert utils.vcf_records_are_the_same(got_vcf_filtered, expect_vcf_filtered) + expect_vcf = os.path.join(data_dir, "get_recall.expect.masked.vcf") + assert utils.vcf_records_are_the_same(got_vcf, expect_vcf) subprocess.check_output(f"rm -r {tmp_out}", shell=True) diff --git a/tests/tasks_test.py b/tests/tasks_test.py index f07e98c..16abb6f 100644 --- a/tests/tasks_test.py +++ b/tests/tasks_test.py @@ -47,6 +47,7 @@ def test_vcf_eval(): options.truth_mask = None options.use_ref_calls = False options.max_recall_ref_len = None + options.filter_pass = "PASS,." subprocess.check_output(f"rm -rf {options.outdir}", shell=True) tasks.vcf_eval.run(options) expect_json = os.path.join(data_dir, "vcf_eval.expect.summary_stats.json") diff --git a/tests/truth_variant_finding_test.py b/tests/truth_variant_finding_test.py index 419c1f3..29781cc 100644 --- a/tests/truth_variant_finding_test.py +++ b/tests/truth_variant_finding_test.py @@ -3,8 +3,6 @@ import pytest import subprocess -from cluster_vcf_records import vcf_file_read - from varifier import truth_variant_finding, utils this_dir = os.path.dirname(os.path.abspath(__file__)) diff --git a/tests/utils_test.py b/tests/utils_test.py index 37a1f15..eb2cd9f 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -4,7 +4,6 @@ import subprocess import pyfastaq -from cluster_vcf_records import vcf_record from varifier import utils diff --git a/tests/vcf_evaluate_test.py b/tests/vcf_evaluate_test.py index 12e3a5e..d08a232 100644 --- a/tests/vcf_evaluate_test.py +++ b/tests/vcf_evaluate_test.py @@ -4,7 +4,7 @@ import pytest import subprocess -from varifier import vcf_evaluate +from varifier import utils, vcf_evaluate this_dir = os.path.dirname(os.path.abspath(__file__)) data_dir = os.path.join(this_dir, "data", "vcf_evaluate") @@ -13,50 +13,108 @@ def test_add_overall_precision_and_recall_to_summary_stats(): stats = { "Precision": { - "ALL": { - "TP": {"Count": 9, "SUM_ALLELE_MATCH_FRAC": 9.0}, - "EDIT_DIST_COUNTS": {"numerator": 18, "denominator": 20}, - "FP": {"Count": 1, "SUM_ALLELE_MATCH_FRAC": 0.99}, - }, - "FILT": { - "TP": {"Count": 5, "SUM_ALLELE_MATCH_FRAC": 5.0}, - "EDIT_DIST_COUNTS": {"numerator": 16, "denominator": 18}, - "FP": {"Count": 0, "SUM_ALLELE_MATCH_FRAC": 0}, - }, + "TP": {"Count": 9, "SUM_ALLELE_MATCH_FRAC": 9.0}, + "EDIT_DIST_COUNTS": {"numerator": 18, "denominator": 20}, + "FP": {"Count": 1, "SUM_ALLELE_MATCH_FRAC": 0.99}, }, "Recall": { - "ALL": { - "TP": {"Count": 5, "SUM_ALLELE_MATCH_FRAC": 5.0}, - "EDIT_DIST_COUNTS": {"numerator": 15, "denominator": 20}, - "FN": {"Count": 1, "SUM_ALLELE_MATCH_FRAC": 0.2}, - }, - "FILT": { - "TP": {"Count": 0, "SUM_ALLELE_MATCH_FRAC": 0.0}, - "EDIT_DIST_COUNTS": {"numerator": 14, "denominator": 15}, - "FN": {"Count": 0, "SUM_ALLELE_MATCH_FRAC": 0.0}, - }, + "TP": {"Count": 5, "SUM_ALLELE_MATCH_FRAC": 5.0}, + "EDIT_DIST_COUNTS": {"numerator": 15, "denominator": 20}, + "FN": {"Count": 1, "SUM_ALLELE_MATCH_FRAC": 0.2}, }, } expect = copy.deepcopy(stats) vcf_evaluate._add_overall_precision_and_recall_to_summary_stats(stats) assert stats != expect - expect["Precision"]["ALL"]["Precision"] = 0.9 - expect["Precision"]["FILT"]["Precision"] = 1 - expect["Recall"]["ALL"]["Recall"] = 0.83333333 - expect["Recall"]["FILT"]["Recall"] = 0 - expect["Precision"]["ALL"]["Precision_frac"] = 0.999 - expect["Precision"]["FILT"]["Precision_frac"] = 1.0 - expect["Recall"]["ALL"]["Recall_frac"] = 0.86666667 - expect["Recall"]["FILT"]["Recall_frac"] = 0 - expect["Precision"]["ALL"]["Precision_edit_dist"] = 0.9 - expect["Precision"]["FILT"]["Precision_edit_dist"] = 0.88888889 - expect["Recall"]["ALL"]["Recall_edit_dist"] = 0.75 - expect["Recall"]["FILT"]["Recall_edit_dist"] = 0.93333333 + expect["Precision"]["Precision"] = 0.9 + expect["Recall"]["Recall"] = 0.83333333 + expect["Precision"]["Precision_frac"] = 0.999 + expect["Recall"]["Recall_frac"] = 0.86666667 + expect["Precision"]["Precision_edit_dist"] = 0.9 + expect["Recall"]["Recall_edit_dist"] = 0.75 assert stats == expect +def test_filter_vcf(): + infile = os.path.join(data_dir, "filter_vcf.in.vcf") + got_keep = "tmp.filter_vcf.keep.vcf" + got_exclude = "tmp.filter_vcf.exclude.vcf" + subprocess.check_output(f"rm -rf {got_keep} {got_exclude}", shell=True) + ref_seqs = {"ref": "ATGCATGACTGCATTACTCATCATCGAATG"} + got_counts = vcf_evaluate._filter_vcf( + infile, got_keep, got_exclude, ref_seqs, filter_pass=None, keep_ref_calls=True + ) + expect_counts = { + "filter_fail": 1, + "heterozygous": 1, + "no_genotype": 1, + "ref_call": 0, + "other": 3, + } + assert got_counts == expect_counts + expect_keep = os.path.join( + data_dir, "filter_vcf.expect.no_filter_pass_keep_ref_calls.keep.vcf" + ) + expect_exclude = os.path.join( + data_dir, "filter_vcf.expect.no_filter_pass_exclude_ref_calls.exclude.vcf" + ) + utils.vcf_records_are_the_same(got_keep, expect_keep) + utils.vcf_records_are_the_same(got_exclude, expect_exclude) + os.unlink(got_keep) + os.unlink(got_exclude) + + got_counts = vcf_evaluate._filter_vcf( + infile, + got_keep, + got_exclude, + ref_seqs, + filter_pass={"PASS"}, + keep_ref_calls=False, + ) + expect_counts = { + "filter_fail": 4, + "heterozygous": 1, + "no_genotype": 1, + "ref_call": 1, + "other": 3, + } + assert got_counts == expect_counts + expect_keep = os.path.join(data_dir, "filter_vcf.expect.with_filtering.1.keep.vcf") + expect_exclude = os.path.join( + data_dir, "filter_vcf.expect.with_filtering.1.exclude.vcf" + ) + utils.vcf_records_are_the_same(got_keep, expect_keep) + utils.vcf_records_are_the_same(got_exclude, expect_exclude) + os.unlink(got_keep) + os.unlink(got_exclude) + + got_counts = vcf_evaluate._filter_vcf( + infile, + got_keep, + got_exclude, + ref_seqs, + filter_pass={".", "FILTER_2", "PASS"}, + keep_ref_calls=False, + ) + expect_counts = { + "filter_fail": 2, + "heterozygous": 1, + "no_genotype": 1, + "ref_call": 1, + "other": 3, + } + assert got_counts == expect_counts + expect_keep = os.path.join(data_dir, "filter_vcf.expect.with_filtering.2.keep.vcf") + expect_exclude = os.path.join( + data_dir, "filter_vcf.expect.with_filtering.2.exclude.vcf" + ) + utils.vcf_records_are_the_same(got_keep, expect_keep) + utils.vcf_records_are_the_same(got_exclude, expect_exclude) + os.unlink(got_keep) + os.unlink(got_exclude) + + def test_evaluate_vcf(): - ref_mask_bed_file = os.path.join(data_dir, "evaluate_vcf.ref_mask.bed") truth_fasta = os.path.join(data_dir, "evaluate_vcf.truth.fa") ref_fasta = os.path.join(data_dir, "evaluate_vcf.ref.fa") vcf_to_eval = os.path.join(data_dir, "evaluate_vcf.to_eval.vcf") @@ -64,7 +122,14 @@ def test_evaluate_vcf(): subprocess.check_output(f"rm -rf {tmp_out}", shell=True) vcf_evaluate.evaluate_vcf( - vcf_to_eval, ref_fasta, truth_fasta, 100, tmp_out, debug=True, force=True + vcf_to_eval, + ref_fasta, + truth_fasta, + 100, + tmp_out, + debug=True, + force=True, + filter_pass={"PASS"}, ) summary_stats_expect_json = os.path.join( data_dir, "evaluate_vcf.expect.summary_stats.json" @@ -73,6 +138,8 @@ def test_evaluate_vcf(): assert filecmp.cmp(summary_stats_got_json, summary_stats_expect_json, shallow=False) subprocess.check_output(f"rm -r {tmp_out}", shell=True) + ref_mask_bed_file = os.path.join(data_dir, "evaluate_vcf.ref_mask.bed") + truth_mask_bed_file = os.path.join(data_dir, "evaluate_vcf.truth_mask.bed") vcf_evaluate.evaluate_vcf( vcf_to_eval, ref_fasta, @@ -81,7 +148,9 @@ def test_evaluate_vcf(): tmp_out, debug=True, force=True, + filter_pass={"PASS"}, ref_mask_bed_file=ref_mask_bed_file, + truth_mask_bed_file=truth_mask_bed_file, ) summary_stats_expect_json = os.path.join( data_dir, "evaluate_vcf.expect.masked.summary_stats.json" diff --git a/tests/vcf_qc_annotate_test.py b/tests/vcf_qc_annotate_test.py deleted file mode 100644 index a34d4fa..0000000 --- a/tests/vcf_qc_annotate_test.py +++ /dev/null @@ -1,147 +0,0 @@ -import filecmp -import os -import pytest - -from cluster_vcf_records import vcf_record - -from varifier import vcf_qc_annotate - -this_dir = os.path.dirname(os.path.abspath(__file__)) -data_dir = os.path.join(this_dir, "data", "vcf_qc_annotate") - - -def test_add_vfr_filter_to_record(): - record = vcf_record.VcfRecord("ref\t42\t.\tT\t.\t.\tPASS\t.\tGT\t0/0") - vcf_qc_annotate._add_vfr_filter_to_record(record) - assert record.FORMAT["VFR_FILTER"] == "NO_ALTS" - - record = vcf_record.VcfRecord( - "ref\t42\t.\tT\tA\t.\tMISMAPPED_UNPLACEABLE\t.\tGT\t0/0" - ) - vcf_qc_annotate._add_vfr_filter_to_record(record) - assert record.FORMAT["VFR_FILTER"] == "MISMAPPED_UNPLACEABLE" - - record = vcf_record.VcfRecord("ref\t42\t.\tT\tA\t.\tPASS\t.\tFOO\tBAR") - vcf_qc_annotate._add_vfr_filter_to_record(record) - assert record.FORMAT["VFR_FILTER"] == "NO_GT" - record = vcf_record.VcfRecord("ref\t42\t.\tT\tA\t.\tPASS\t.\t\t") - vcf_qc_annotate._add_vfr_filter_to_record(record) - assert record.FORMAT["VFR_FILTER"] == "NO_GT" - - record = vcf_record.VcfRecord("ref\t42\t.\t.\tA\t.\tPASS\t.\tGT\t1/1") - vcf_qc_annotate._add_vfr_filter_to_record(record) - assert record.FORMAT["VFR_FILTER"] == "NO_REF_SEQ" - - record = vcf_record.VcfRecord("ref\t42\t.\tT\tA\t.\tPASS\t.\tGT\t0/1") - vcf_qc_annotate._add_vfr_filter_to_record(record) - assert record.FORMAT["VFR_FILTER"] == "CANNOT_USE_GT" - - record = vcf_record.VcfRecord("ref\t42\t.\tT\tA,*\t.\tPASS\t.\tGT\t1/1") - vcf_qc_annotate._add_vfr_filter_to_record(record) - assert record.FORMAT["VFR_FILTER"] == "PASS" - record = vcf_record.VcfRecord("ref\t42\t.\tT\tA,*\t.\tPASS\t.\tGT\t2/2") - vcf_qc_annotate._add_vfr_filter_to_record(record) - assert record.FORMAT["VFR_FILTER"] == "CANNOT_USE_GT" - - record = vcf_record.VcfRecord("ref\t42\t.\tT\tA\t.\tPASS\t.\tGT\t0/0") - vcf_qc_annotate._add_vfr_filter_to_record(record) - assert record.FORMAT["VFR_FILTER"] == "CANNOT_USE_GT" - vcf_qc_annotate._add_vfr_filter_to_record(record, want_ref_calls=True) - assert record.FORMAT["VFR_FILTER"] == "PASS" - - record = vcf_record.VcfRecord("ref\t42\t.\tT\tA\t.\tPASS\t.\tGT\t1/1") - vcf_qc_annotate._add_vfr_filter_to_record(record) - assert record.FORMAT["VFR_FILTER"] == "PASS" - - record = vcf_record.VcfRecord("ref\t42\t.\tT\tA\t.\tFAIL_FILTER\t.\tGT\t1/1") - vcf_qc_annotate._add_vfr_filter_to_record(record) - assert record.FORMAT["VFR_FILTER"] == "FAIL_BUT_TEST" - - -def test_fix_cluster_filter_tag(): - cluster = [vcf_record.VcfRecord("ref\t42\t.\tT\tA\t.\tPASS\t.\tVFR_FILTER\tPASS")] - vcf_qc_annotate._fix_cluster_filter_tag(cluster) - assert len(cluster) == 1 - assert cluster[0].FORMAT["VFR_FILTER"] == "PASS" - - cluster = [ - vcf_record.VcfRecord("ref\t42\t.\tT\tA\t.\tPASS\t.\tVFR_FILTER\tFAIL_BUT_TEST") - ] - vcf_qc_annotate._fix_cluster_filter_tag(cluster) - assert len(cluster) == 1 - assert cluster[0].FORMAT["VFR_FILTER"] == "FAIL_BUT_TEST" - - cluster = [ - vcf_record.VcfRecord("ref\t42\t.\tT\tA\t.\tPASS\t.\tVFR_FILTER\tPASS"), - vcf_record.VcfRecord("ref\t42\t.\tT\tA\t.\tPASS\t.\tVFR_FILTER\tFAIL_BUT_TEST"), - ] - vcf_qc_annotate._fix_cluster_filter_tag(cluster) - assert len(cluster) == 2 - assert cluster[0].FORMAT["VFR_FILTER"] == "PASS" - assert cluster[1].FORMAT["VFR_FILTER"] == "FAIL_CONFLICT" - - cluster = [ - vcf_record.VcfRecord("ref\t42\t.\tT\tA\t.\tPASS\t.\tVFR_FILTER\tFAIL_BUT_TEST"), - vcf_record.VcfRecord("ref\t42\t.\tT\tA\t.\tPASS\t.\tVFR_FILTER\tFAIL_BUT_TEST"), - ] - vcf_qc_annotate._fix_cluster_filter_tag(cluster) - assert len(cluster) == 2 - assert cluster[0].FORMAT["VFR_FILTER"] == "FAIL_CONFLICT" - assert cluster[1].FORMAT["VFR_FILTER"] == "FAIL_CONFLICT" - - cluster = [ - vcf_record.VcfRecord("ref\t42\t.\tT\tA\t.\tPASS\t.\tVFR_FILTER\tPASS"), - vcf_record.VcfRecord("ref\t42\t.\tT\tA\t.\tPASS\t.\tVFR_FILTER\tPASS"), - ] - vcf_qc_annotate._fix_cluster_filter_tag(cluster) - assert len(cluster) == 2 - assert cluster[0].FORMAT["VFR_FILTER"] == "FAIL_CONFLICT" - assert cluster[1].FORMAT["VFR_FILTER"] == "FAIL_CONFLICT" - - -def test_annotate_sorted_list_of_records(): - records = [ - vcf_record.VcfRecord("ref\t1\t.\tTA\tA\t.\tPASS\t.\tGT\t1/1"), - vcf_record.VcfRecord("ref\t1\t.\tT\tA\t.\tPASS\t.\tGT\t1/1"), - vcf_record.VcfRecord("ref\t5\t.\tG\tA\t.\tPASS\t.\tGT\t1/1"), - vcf_record.VcfRecord("ref\t5\t.\tG\tT\t.\tPASS\t.\tFOO\tBAR"), - vcf_record.VcfRecord("ref\t5\t.\tG\tC\t.\tPASS\t.\tGT\t0/1"), - vcf_record.VcfRecord("ref\t10\t.\tTA\tA\t.\tFAIL\t.\tGT\t1/1"), - vcf_record.VcfRecord("ref\t11\t.\tA\tC\t.\tPASS\t.\tGT\t1/1"), - ] - vcf_qc_annotate._annotate_sorted_list_of_records(records) - expect = [ - vcf_record.VcfRecord( - "ref\t1\t.\tTA\tA\t.\tPASS\t.\tGT:VFR_FILTER\t1/1:FAIL_CONFLICT" - ), - vcf_record.VcfRecord( - "ref\t1\t.\tT\tA\t.\tPASS\t.\tGT:VFR_FILTER\t1/1:FAIL_CONFLICT" - ), - vcf_record.VcfRecord("ref\t5\t.\tG\tA\t.\tPASS\t.\tGT:VFR_FILTER\t1/1:PASS"), - vcf_record.VcfRecord("ref\t5\t.\tG\tT\t.\tPASS\t.\tFOO:VFR_FILTER\tBAR:NO_GT"), - vcf_record.VcfRecord( - "ref\t5\t.\tG\tC\t.\tPASS\t.\tGT:VFR_FILTER\t0/1:CANNOT_USE_GT" - ), - vcf_record.VcfRecord( - "ref\t10\t.\tTA\tA\t.\tFAIL\t.\tGT:VFR_FILTER\t1/1:FAIL_CONFLICT" - ), - vcf_record.VcfRecord("ref\t11\t.\tA\tC\t.\tPASS\t.\tGT:VFR_FILTER\t1/1:PASS"), - ] - assert records == expect - - -def test_add_qc_to_vcf(): - infile = os.path.join(data_dir, "add_qc_to_vcf.in.vcf") - expect_want_ref = os.path.join(data_dir, "add_qc_to_vcf.expect.want_ref_calls.vcf") - expect_not_want_ref = os.path.join( - data_dir, "add_qc_to_vcf.expect.not_want_ref_calls.vcf" - ) - outfile = "tmp.add_qc_to_vcf.out.vcf" - if os.path.exists(outfile): - os.unlink(outfile) - vcf_qc_annotate.add_qc_to_vcf(infile, outfile, want_ref_calls=True) - assert filecmp.cmp(outfile, expect_want_ref, shallow=False) - os.unlink(outfile) - vcf_qc_annotate.add_qc_to_vcf(infile, outfile, want_ref_calls=False) - assert filecmp.cmp(outfile, expect_not_want_ref, shallow=False) - os.unlink(outfile) diff --git a/tests/vcf_stats_test.py b/tests/vcf_stats_test.py index d848f65..ece7eeb 100644 --- a/tests/vcf_stats_test.py +++ b/tests/vcf_stats_test.py @@ -87,6 +87,7 @@ def test_format_dict_to_edit_dist_scores(): } assert (0.75, 1) == vcf_stats.format_dict_to_edit_dist_scores(format_dict) + def test_per_record_stats_from_vcf_file(): infile = os.path.join(data_dir, "per_record_stats_from_vcf_file.vcf") expect = [ @@ -105,7 +106,6 @@ def test_per_record_stats_from_vcf_file(): "VFR_ALLELE_LEN": 1, "VFR_ALLELE_MATCH_COUNT": 0, "VFR_ALLELE_MATCH_FRAC": 0.0, - "VFR_FILTER": "PASS", "VFR_RESULT": "FP", }, { @@ -123,19 +123,16 @@ def test_per_record_stats_from_vcf_file(): "VFR_ALLELE_LEN": 1, "VFR_ALLELE_MATCH_COUNT": 0, "VFR_ALLELE_MATCH_FRAC": 0.0, - "VFR_FILTER": "PASS", "VFR_RESULT": "FP", }, ] got = vcf_stats.per_record_stats_from_vcf_file(infile) - print(got) assert got == expect def test_summary_stats_from_per_record_stats(): record_stats = [ { - "VFR_FILTER": "PASS", "VFR_RESULT": "TP", "VFR_ALLELE_MATCH_FRAC": 0.1, "VFR_ED_RA": 1, @@ -143,7 +140,6 @@ def test_summary_stats_from_per_record_stats(): "VFR_ED_TA": 0, }, { - "VFR_FILTER": "PASS", "VFR_RESULT": "TP", "VFR_ALLELE_MATCH_FRAC": 0.12, "VFR_ED_RA": 1, @@ -151,7 +147,6 @@ def test_summary_stats_from_per_record_stats(): "VFR_ED_TA": 0, }, { - "VFR_FILTER": "PASS", "VFR_RESULT": "FP", "VFR_ALLELE_MATCH_FRAC": 0.2, "VFR_ED_RA": 1, @@ -159,7 +154,6 @@ def test_summary_stats_from_per_record_stats(): "VFR_ED_TA": 1, }, { - "VFR_FILTER": "PASS", "VFR_RESULT": "Partial_TP", "VFR_ALLELE_MATCH_FRAC": 0.25, "VFR_ED_RA": 1, @@ -167,7 +161,6 @@ def test_summary_stats_from_per_record_stats(): "VFR_ED_TA": 1, }, { - "VFR_FILTER": "PASS", "VFR_RESULT": "FP", "VFR_ALLELE_MATCH_FRAC": 0.3, "VFR_ED_RA": 1, @@ -175,7 +168,6 @@ def test_summary_stats_from_per_record_stats(): "VFR_ED_TA": 1, }, { - "VFR_FILTER": "FAIL", "VFR_RESULT": "FP", "VFR_ALLELE_MATCH_FRAC": 0.4, "VFR_ED_RA": 1, @@ -183,7 +175,6 @@ def test_summary_stats_from_per_record_stats(): "VFR_ED_TA": 1, }, { - "VFR_FILTER": "FAIL_BUT_TEST", "VFR_RESULT": "TP", "VFR_ALLELE_MATCH_FRAC": 0.5, "VFR_ED_RA": 1, @@ -191,7 +182,6 @@ def test_summary_stats_from_per_record_stats(): "VFR_ED_TA": 0, }, { - "VFR_FILTER": "PASS", "VFR_IN_MASK": 1, "VFR_RESULT": "TP", "VFR_ALLELE_MATCH_FRAC": 0.1, @@ -201,23 +191,15 @@ def test_summary_stats_from_per_record_stats(): }, ] expect = { - "UNUSED": {"CONFLICT": 0, "OTHER": 1, "MASKED": 1}, - "ALL": { - "TP": {"Count": 3, "SUM_ALLELE_MATCH_FRAC": 0.72, "SUM_EDIT_DIST": 3}, - "FP": {"Count": 3, "SUM_ALLELE_MATCH_FRAC": 0.75, "SUM_EDIT_DIST": 3}, - "EDIT_DIST_COUNTS": {"numerator": 4, "denominator": 7}, - }, - "FILT": { - "TP": {"Count": 2, "SUM_ALLELE_MATCH_FRAC": 0.22, "SUM_EDIT_DIST": 2}, - "FP": {"Count": 3, "SUM_ALLELE_MATCH_FRAC": 0.75, "SUM_EDIT_DIST": 3}, - "EDIT_DIST_COUNTS": {"numerator": 3, "denominator": 6}, - }, + "TP": {"Count": 3, "SUM_ALLELE_MATCH_FRAC": 0.72, "SUM_EDIT_DIST": 3}, + "FP": {"Count": 4, "SUM_ALLELE_MATCH_FRAC": 1.15, "SUM_EDIT_DIST": 4}, + "EDIT_DIST_COUNTS": {"numerator": 6, "denominator": 10}, + "UNUSED": {"CONFLICT": 0, "MASKED": 1, "OTHER": 0}, } got = vcf_stats.summary_stats_from_per_record_stats(record_stats) assert got == expect - for all_or_filt in "ALL", "FILT": - expect[all_or_filt]["FN"] = expect[all_or_filt]["FP"] - del expect[all_or_filt]["FP"] got = vcf_stats.summary_stats_from_per_record_stats(record_stats, for_recall=True) + expect["FN"] = expect["FP"] + del expect["FP"] assert got == expect diff --git a/varifier/__init__.py b/varifier/__init__.py index eaf2a89..4b17a1a 100644 --- a/varifier/__init__.py +++ b/varifier/__init__.py @@ -16,7 +16,6 @@ "truth_variant_finding", "utils", "vcf_evaluate", - "vcf_qc_annotate", "vcf_stats", ] diff --git a/varifier/__main__.py b/varifier/__main__.py index 3254df8..538cd3a 100755 --- a/varifier/__main__.py +++ b/varifier/__main__.py @@ -76,6 +76,11 @@ def main(args=None): subparser_vcf_eval.add_argument( "--force", help="Replace outdir if it already exists", action="store_true" ) + subparser_vcf_eval.add_argument( + "--filter_pass", + help="Defines how to handle FILTER column of input VCF file. Comma-separated list of filter names. A VCF line is kept if any of its FILTER entries are in the provided list. Put '.' in the list to keep records where the filter column is '.'. Default behaviour is to ignore the filter column and use all records", + metavar="FILTER1[,FILTER2[,...]]", + ) subparser_vcf_eval.add_argument( "--ref_mask", help="BED file of ref regions to mask. Any variants in the VCF overlapping the mask are removed at the start of the pipeline", diff --git a/varifier/probe.py b/varifier/probe.py index 3a79189..9585c97 100644 --- a/varifier/probe.py +++ b/varifier/probe.py @@ -170,8 +170,6 @@ def edit_distance_vs_ref(self, map_hit, ref_seq, ref_mask=None): map_hit, ref_seq=ref_seq, ref_mask=ref_mask ) start, end = self.padded_seq_allele_start_end_coords(padded_probe_seq) - x = "".join([{"N": "N", False: "0", True: "1"}[x] for x in padded_ref_mask]) - diffs = ["|" if padded_probe_seq[i] == padded_ref_seq[i] else " " for i in range(len(padded_ref_seq))] if start == None: return -1, False probe_allele = padded_probe_seq[start : end + 1] diff --git a/varifier/probe_mapping.py b/varifier/probe_mapping.py index 41bfeb7..0d4af2f 100644 --- a/varifier/probe_mapping.py +++ b/varifier/probe_mapping.py @@ -1,70 +1,110 @@ import operator -import os import mappy -import pyfastaq from cluster_vcf_records import vcf_file_read -from varifier import edit_distance, probe, utils, vcf_qc_annotate +from varifier import edit_distance, probe, utils -def _get_wanted_format(use_fail_conflict): - wanted_format = {"PASS", "FAIL_BUT_TEST"} - if use_fail_conflict: - wanted_format.add("FAIL_CONFLICT") - return wanted_format +def get_flanking_variants(vcf_records, record_index, end_pos, left=True): + centre_record = vcf_records[record_index] + used_ref_positions = {centre_record.POS} + wanted_variants = [] + i = record_index - 1 if left else record_index + 1 + i_add = -1 if left else 1 + + while 0 <= i < len(vcf_records): + if ( + vcf_records[i].CHROM != centre_record.CHROM + or (left and vcf_records[i].POS < end_pos) + or (not left and vcf_records[i].ref_end_pos() > end_pos) + ): + break + + if not used_ref_positions.isdisjoint( + range(vcf_records[i].POS, vcf_records[i].ref_end_pos() + 1) + ): + i += i_add + continue + genotype = set(vcf_records[i].FORMAT["GT"].split("/")) + assert len(genotype) == 1 + genotype = genotype.pop() + if genotype != "0": + used_ref_positions.update( + range(vcf_records[i].POS, vcf_records[i].ref_end_pos() + 1) + ) + allele = vcf_records[i].ALT[int(genotype) - 1] + wanted_variants.append( + (vcf_records[i].POS, vcf_records[i].ref_end_pos(), allele) + ) + i += i_add + + wanted_variants.sort(key=operator.itemgetter(0)) + return wanted_variants + + +def apply_variants_to_seq(seq, seq_start_in_ref, variants): + for ref_start, ref_end, allele in reversed(variants): + seq_start = ref_start - seq_start_in_ref + seq_end = ref_end - seq_start_in_ref + seq[seq_start : seq_end + 1] = [allele] + + +def make_probes(ref_seqs, vcf_records, record_index, flank_length): + record = vcf_records[record_index] + ref_seq = ref_seqs[record.CHROM] + left_flank_start = max(0, record.POS - flank_length) + right_flank_start = record.ref_end_pos() + 1 + right_flank_end = min(len(ref_seq) - 1, record.ref_end_pos() + flank_length) + left_variants = get_flanking_variants( + vcf_records, record_index, left_flank_start, left=True + ) + right_variants = get_flanking_variants( + vcf_records, record_index, right_flank_end, left=False + ) + left_flank = list(ref_seq[left_flank_start : record.POS]) + right_flank = list(ref_seq[record.ref_end_pos() + 1 : right_flank_end + 1]) + apply_variants_to_seq(left_flank, left_flank_start, left_variants) + apply_variants_to_seq(right_flank, right_flank_start, right_variants) + left_flank = "".join(left_flank)[-flank_length:] + right_flank = "".join(right_flank)[:flank_length] + # We should not ever see a VCF record without a GT entry at this point in + # the code, because the VCF file to be evaluated is filtered at the start, + # removing records without GT. + try: + alt_index = int(record.FORMAT["GT"].split("/")[0]) + except KeyError: + raise KeyError( + f"GT not found in the following VCF record. Cannot continue\n{record}" + ) + alt_allele = record.REF if alt_index == 0 else record.ALT[alt_index - 1] + ref_probe_seq = left_flank + record.REF + right_flank + alt_probe_seq = left_flank + alt_allele + right_flank + ref_probe = probe.Probe( + ref_probe_seq, len(left_flank), len(left_flank) + len(record.REF) - 1 + ) + alt_probe = probe.Probe( + alt_probe_seq, len(left_flank), len(left_flank) + len(alt_allele) - 1 + ) + assert ref_probe.allele_seq() == record.REF + assert alt_probe.allele_seq() == alt_allele + return ref_probe, alt_probe def get_probes_and_vcf_records( vcf_file, ref_seqs, flank_length, use_fail_conflict=False ): - """Input vcf_file is assumed to have been made by vcf_qc_annotate.add_qc_to_vcf(), - so that each record has the FORMAT tag VFR_FILTER. - For each line of the input VCF file, yields a + """For each line of the input VCF file, yields a tuple (vcf_record, alt probe sequence). vcf_file = name of VCF file. ref_seqs = dictionary of sequence name -> sequence. flank_length = number of nucleotides to add either side of variant sequence.""" header_lines, vcf_records = vcf_file_read.vcf_file_to_list(vcf_file) yield header_lines - wanted_format = _get_wanted_format(use_fail_conflict) - - for record in vcf_records: - if record.FORMAT["VFR_FILTER"] not in wanted_format: - yield record, None, None - continue - - flank_start = max(0, record.POS - flank_length) - ref_seq = ref_seqs[record.CHROM] - if ref_seq[record.POS : record.POS + len(record.REF)] != record.REF: - record.set_format_key_value("VFR_FILTER", "REF_STRING_MISMATCH") - yield record, None, None - continue - - flank_end = min(len(ref_seq) - 1, record.ref_end_pos() + flank_length) - probe_allele_start = record.POS - flank_start - - alt_index = int(record.FORMAT["GT"].split("/")[0]) - alt_allele = record.REF if alt_index == 0 else record.ALT[alt_index - 1] - alt_probe_allele_end = probe_allele_start + len(alt_allele) - 1 - alt_probe_seq = ( - ref_seq[flank_start : record.POS] - + alt_allele - + ref_seq[record.ref_end_pos() + 1 : flank_end + 1] - ) - alt_probe = probe.Probe(alt_probe_seq, probe_allele_start, alt_probe_allele_end) - assert alt_probe.allele_seq() == alt_allele - - ref_probe_allele_end = probe_allele_start + len(record.REF) - 1 - ref_probe_seq = ( - ref_seq[flank_start : record.POS] - + record.REF - + ref_seq[record.ref_end_pos() + 1 : flank_end + 1] - ) - ref_probe = probe.Probe(ref_probe_seq, probe_allele_start, ref_probe_allele_end) - assert ref_probe.allele_seq() == record.REF - yield record, ref_probe, alt_probe + for i, vcf_record in enumerate(vcf_records): + ref_probe, alt_probe = make_probes(ref_seqs, vcf_records, i, flank_length) + yield vcf_record, ref_probe, alt_probe def probe_hits_to_best_allele_counts(probe, hits, debug_outfile=None): @@ -103,9 +143,6 @@ def evaluate_vcf_record( use_fail_conflict=False, truth_mask=None, ): - if vcf_record.FORMAT["VFR_FILTER"] not in _get_wanted_format(use_fail_conflict): - return - edit_dist_allele_v_ref = edit_distance.edit_distance_between_seqs( ref_probe.allele_seq(), alt_probe.allele_seq() ) @@ -130,7 +167,9 @@ def evaluate_vcf_record( file=map_outfile, ) - alt_hits = [x for x in alt_hits if alt_probe.map_hit_includes_allele(x) and x.mapq > 0] + alt_hits = [ + x for x in alt_hits if alt_probe.map_hit_includes_allele(x) and x.mapq > 0 + ] alt_match, alt_allele_length, alt_best_hit = probe_hits_to_best_allele_counts( alt_probe, alt_hits, debug_outfile=map_outfile ) @@ -218,10 +257,8 @@ def annotate_vcf_with_probe_mapping( ): vcf_ref_seqs = utils.file_to_dict_of_seqs(vcf_ref_fasta) truth_ref_seqs = utils.file_to_dict_of_seqs(truth_ref_fasta) - vcf_with_qc = vcf_out + ".debug.vcf" - vcf_qc_annotate.add_qc_to_vcf(vcf_in, vcf_with_qc, want_ref_calls=use_ref_calls) probes_and_vcf_reader = get_probes_and_vcf_records( - vcf_with_qc, vcf_ref_seqs, flank_length, use_fail_conflict=use_fail_conflict, + vcf_in, vcf_ref_seqs, flank_length, use_fail_conflict=use_fail_conflict, ) # Some notes on the mapper options... @@ -295,6 +332,3 @@ def annotate_vcf_with_probe_mapping( if map_outfile is not None: f_map.close() - - if not debug: - os.unlink(vcf_with_qc) diff --git a/varifier/recall.py b/varifier/recall.py index 7a93d02..026c552 100644 --- a/varifier/recall.py +++ b/varifier/recall.py @@ -1,3 +1,4 @@ +import logging import operator import os @@ -7,19 +8,13 @@ from varifier import probe_mapping, truth_variant_finding, utils -def _vcf_file_to_dict(vcf_file, pass_only=True): +def _vcf_file_to_dict(vcf_file): """Loads VCF file. Returns a dictionary of sequence name -> sorted list by position of variants""" records = {} - wanted_format = {"PASS"} - if not pass_only: - wanted_format.add("FAIL_BUT_TEST") header_lines, vcf_records = vcf_file_read.vcf_file_to_list(vcf_file) for record in vcf_records: - if record.FORMAT["VFR_FILTER"] not in wanted_format: - continue - if record.CHROM not in records: records[record.CHROM] = [] records[record.CHROM].append(record) @@ -30,16 +25,17 @@ def _vcf_file_to_dict(vcf_file, pass_only=True): return records -def apply_variants_to_genome(ref_fasta, vcf_file, out_fasta, pass_only=True): +def apply_variants_to_genome(ref_fasta, vcf_file, out_fasta): """Takes the variants in vcf_file, and applies them to the associated reference genome in ref_fasta. Writes a new file out_fasta that has those variants applied""" ref_sequences = utils.file_to_dict_of_seqs(ref_fasta) - vcf_dict = _vcf_file_to_dict(vcf_file, pass_only=pass_only) + vcf_dict = _vcf_file_to_dict(vcf_file) with open(out_fasta, "w") as f: for ref_name, vcf_records in sorted(vcf_dict.items()): old_seq = ref_sequences[ref_name] new_seq = list(old_seq.seq) + previous_ref_start = None # Applying indels messes up the coords of any subsequent variant, # so start at the end and work backwards for vcf_record in reversed(vcf_records): @@ -48,6 +44,23 @@ def apply_variants_to_genome(ref_fasta, vcf_file, out_fasta, pass_only=True): allele_index = int(genotype.pop()) if allele_index == 0: continue + + # Some tools report two (or more) variants that overlap. + # No clear "right" option here. + # If the current record overlaps the previous one, ignore it. + # We could try to be cleverer about this (take best records + # based on likelihoods or whatever else), but every tool is + # different so no sane consistent way of doing this across tools + if ( + previous_ref_start is not None + and vcf_record.ref_end_pos() >= previous_ref_start + ): + logging.warn( + f"Skipping this record when calculating recall because it overlaps another record: {vcf_record}" + ) + continue + + previous_ref_start = vcf_record.POS allele = vcf_record.ALT[allele_index - 1] start, end = vcf_record.POS, vcf_record.ref_end_pos() + 1 assert old_seq[start:end] == "".join(new_seq[start:end]) @@ -71,10 +84,6 @@ def get_recall( if truth_vcf is None: assert truth_fasta is not None - # Make truth VCF. This only depends on ref_fasta and truth_fasta, not - # on VCF to test. In particular, is independent of whether or not - # were using all records in vcf_to_test, or PASS records only. This means - # only need to make one truth VCF, which can be used for both cases. truth_outdir = os.path.join(outdir, "truth_vcf") truth_vcf = truth_variant_finding.make_truth_vcf( ref_fasta, @@ -88,28 +97,17 @@ def get_recall( else: assert truth_fasta is None - vcfs_out = {} - for all_or_filt in "ALL", "FILT": - run_outdir = os.path.join(outdir, all_or_filt) - os.mkdir(run_outdir) - mutated_ref_fasta = os.path.join(run_outdir, "00.ref_with_mutations_added.fa") - apply_variants_to_genome( - ref_fasta, vcf_to_test, mutated_ref_fasta, pass_only=all_or_filt == "FILT" - ) + mutated_ref_fasta = os.path.join(outdir, "ref_with_mutations_added.fa") + apply_variants_to_genome(ref_fasta, vcf_to_test, mutated_ref_fasta) - # For each record in the truth VCF, make a probe and map to the mutated genome - vcfs_out[all_or_filt] = os.path.join( - run_outdir, "02.truth.probe_mapped_to_mutated_genome.vcf" - ) - map_outfile = ( - os.path.join(run_outdir, "02.probe_map_debug.txt") if debug else None - ) - probe_mapping.annotate_vcf_with_probe_mapping( - truth_vcf, - ref_fasta, - mutated_ref_fasta, - flank_length, - vcfs_out[all_or_filt], - map_outfile=map_outfile, - ) - return vcfs_out["ALL"], vcfs_out["FILT"] + vcf_out = os.path.join(outdir, "recall.vcf") + map_outfile = os.path.join(outdir, "probe_map_debug.txt") if debug else None + probe_mapping.annotate_vcf_with_probe_mapping( + truth_vcf, + ref_fasta, + mutated_ref_fasta, + flank_length, + vcf_out, + map_outfile=map_outfile, + ) + return vcf_out diff --git a/varifier/tasks/vcf_eval.py b/varifier/tasks/vcf_eval.py index 3b61ffa..4c4bdef 100644 --- a/varifier/tasks/vcf_eval.py +++ b/varifier/tasks/vcf_eval.py @@ -2,6 +2,9 @@ def run(options): + filter_pass = ( + None if options.filter_pass is None else set(options.filter_pass.split(",")) + ) vcf_evaluate.evaluate_vcf( options.vcf_in, options.vcf_fasta, @@ -11,6 +14,7 @@ def run(options): truth_vcf=options.truth_vcf, debug=options.debug, force=options.force, + filter_pass=filter_pass, ref_mask_bed_file=options.ref_mask, truth_mask_bed_file=options.truth_mask, discard_ref_calls=not options.use_ref_calls, diff --git a/varifier/truth_variant_finding.py b/varifier/truth_variant_finding.py index de6f51f..e62e155 100644 --- a/varifier/truth_variant_finding.py +++ b/varifier/truth_variant_finding.py @@ -4,8 +4,6 @@ import shutil import subprocess -import pyfastaq -import pymummer import pysam import pysam.bcftools @@ -57,7 +55,7 @@ def _merge_vcf_files_for_probe_mapping(list_of_vcf_files, ref_fasta, vcf_out): new_record.ALT = [alt] new_record.INFO = {} new_record.FILTER = set(["PASS"]) - new_record.FORMAT = {"GT": "1/1", "VFR_FILTER": "PASS"} + new_record.FORMAT = {"GT": "1/1"} print(new_record, file=f) @@ -145,7 +143,7 @@ def make_truth_vcf( to_merge = [dnadiff_vcf, minimap2_vcf] _merge_vcf_files_for_probe_mapping(to_merge, ref_fasta, merged_vcf) logging.info(f"Made merged VCF file {merged_vcf}") - logging.info(f"Probe mapping to remove incorrect calls") + logging.info("Probe mapping to remove incorrect calls") probe_mapping.annotate_vcf_with_probe_mapping( merged_vcf, ref_fasta, @@ -159,7 +157,7 @@ def make_truth_vcf( probe_mapped_vcf, probe_filtered_vcf, max_ref_len ) logging.info(f"Made filtered VCF file {probe_filtered_vcf}") - logging.info(f"Using bcftools to normalise and remove duplicates") + logging.info("Using bcftools to normalise and remove duplicates") _bcftools_norm(ref_fasta, probe_filtered_vcf, truth_vcf) logging.info(f"Finished making truth VCF file {truth_vcf}") return truth_vcf diff --git a/varifier/vcf_evaluate.py b/varifier/vcf_evaluate.py index a2510b0..e02108f 100644 --- a/varifier/vcf_evaluate.py +++ b/varifier/vcf_evaluate.py @@ -3,6 +3,8 @@ import os import subprocess +from cluster_vcf_records import vcf_file_read, vcf_record + from varifier import probe_mapping, recall, utils, vcf_stats @@ -13,29 +15,98 @@ def _add_overall_precision_and_recall_to_summary_stats(summary_stats): # So we either have FP or FN for the "wrong" variants from probe mapping. for prec_or_recall in "Precision", "Recall": fp_key = "FP" if prec_or_recall == "Precision" else "FN" - for all_or_filt in "ALL", "FILT": - d = summary_stats[prec_or_recall][all_or_filt] - tp = d["TP"]["Count"] - tp_frac = ( - d["TP"]["SUM_ALLELE_MATCH_FRAC"] + d[fp_key]["SUM_ALLELE_MATCH_FRAC"] + d = summary_stats[prec_or_recall] + tp = d["TP"]["Count"] + tp_frac = d["TP"]["SUM_ALLELE_MATCH_FRAC"] + d[fp_key]["SUM_ALLELE_MATCH_FRAC"] + fp = d[fp_key]["Count"] + total_calls = tp + fp + if total_calls > 0: + d[prec_or_recall] = round(tp / total_calls, 8) + d[f"{prec_or_recall}_frac"] = round(tp_frac / total_calls, 8) + else: + d[prec_or_recall] = 0 + d[f"{prec_or_recall}_frac"] = 0 + + if d["EDIT_DIST_COUNTS"]["denominator"] > 0: + d[f"{prec_or_recall}_edit_dist"] = round( + d["EDIT_DIST_COUNTS"]["numerator"] + / d["EDIT_DIST_COUNTS"]["denominator"], + 8, ) - fp = d[fp_key]["Count"] - total_calls = tp + fp - if total_calls > 0: - d[prec_or_recall] = round(tp / total_calls, 8) - d[f"{prec_or_recall}_frac"] = round(tp_frac / total_calls, 8) - else: - d[prec_or_recall] = 0 - d[f"{prec_or_recall}_frac"] = 0 - - if d["EDIT_DIST_COUNTS"]["denominator"] > 0: - d[f"{prec_or_recall}_edit_dist"] = round( - d["EDIT_DIST_COUNTS"]["numerator"] - / d["EDIT_DIST_COUNTS"]["denominator"], - 8, - ) - else: - d[f"{prec_or_recall}_edit_dist"] = 0 + else: + d[f"{prec_or_recall}_edit_dist"] = 0 + + +def _filter_vcf( + infile, + outfile_keep, + outfile_exclude, + ref_seqs, + filter_pass=None, + keep_ref_calls=False, +): + counts = { + "filter_fail": 0, + "heterozygous": 0, + "no_genotype": 0, + "ref_call": 0, + "other": 0, + } + with vcf_file_read.open_vcf_file_for_reading(infile) as f_in, open( + outfile_keep, "w" + ) as f_out_keep, open(outfile_exclude, "w") as f_out_exclude: + for line in f_in: + if line.startswith("#"): + print(line, end="", file=f_out_keep) + print(line, end="", file=f_out_exclude) + continue + + record = vcf_record.VcfRecord(line) + exclude_reason = None + + filter_is_dot_and_fails = ( + filter_pass is not None + and len(record.FILTER) == 0 + and "." not in filter_pass + ) + filter_not_dot_and_fails = ( + filter_pass is not None + and len(record.FILTER) > 0 + and record.FILTER.isdisjoint(filter_pass) + ) + filter_fails = filter_is_dot_and_fails or filter_not_dot_and_fails + + if "MISMAPPED_UNPLACEABLE" in record.FILTER or filter_fails: + exclude_reason = "filter_fail" + elif len(record.ALT) == 0 or record.ALT == ["."]: + exclude_reason = "other" + elif record.FORMAT is None: + exclude_reason = "no_genotype" + elif record.REF in [".", ""]: + exclude_reason = "other" + if ( + ref_seqs[record.CHROM][record.POS : record.POS + len(record.REF)] + != record.REF + ): + exclude_reason = "other" + + if exclude_reason is None: + gt = set(record.FORMAT.get("GT", ".").split("/")) + if len(gt) > 1: + exclude_reason = "heterozygous" + elif "." in gt: + exclude_reason = "no_genotype" + elif not keep_ref_calls and "0" in gt: + exclude_reason = "ref_call" + else: + print(record, file=f_out_keep) + + if exclude_reason is not None: + record.set_format_key_value("VFR_EXCLUDE_REASON", exclude_reason) + print(record, file=f_out_exclude) + counts[exclude_reason] += 1 + + return counts def evaluate_vcf( @@ -51,35 +122,46 @@ def evaluate_vcf( truth_mask_bed_file=None, discard_ref_calls=True, max_recall_ref_len=None, + filter_pass=None, ): if force: subprocess.check_output(f"rm -rf {outdir}", shell=True) os.mkdir(outdir) # Mask if needed - if ref_mask_bed_file is not None: + if ref_mask_bed_file is None: + vcf_to_filter = vcf_to_eval + else: logging.info("Masking VCF...") masked_vcf = os.path.join(outdir, "variants_to_eval.masked.vcf") utils.mask_vcf_file(vcf_to_eval, ref_mask_bed_file, masked_vcf) - vcf_to_eval = masked_vcf - logging.info(f"Masked VCF") + vcf_to_filter = masked_vcf + logging.info("Masked VCF") + + vcf_ref_seqs = utils.file_to_dict_of_seqs(vcf_ref_fasta) + filtered_vcf = os.path.join(outdir, "variants_to_eval.filtered.vcf") + excluded_vcf = os.path.join(outdir, "variants_to_eval.excluded.vcf") + logging.info("Filtering VCF...") + filtered_counts = _filter_vcf( + vcf_to_filter, + filtered_vcf, + excluded_vcf, + vcf_ref_seqs, + filter_pass=filter_pass, + keep_ref_calls=not discard_ref_calls, + ) + logging.info("Filtering VCF done") - logging.info("Annotate VCF with TP/FP for precision...") vcf_for_precision = os.path.join(outdir, "precision.vcf") - if debug: - map_outfile = f"{vcf_for_precision}.debug.map" - else: - map_outfile = None - logging.info("Annotation for precision done") - + map_outfile = f"{vcf_for_precision}.debug.map" if debug else None if truth_mask_bed_file is None: truth_mask = None else: truth_mask = utils.load_mask_bed_file(truth_mask_bed_file) - logging.info("Annotating VCF with probe mapping...") + logging.info("Annotating VCF with TP/FP for precision...") probe_mapping.annotate_vcf_with_probe_mapping( - vcf_to_eval, + filtered_vcf, vcf_ref_fasta, truth_ref_fasta, flank_length, @@ -88,13 +170,13 @@ def evaluate_vcf( use_ref_calls=not discard_ref_calls, truth_mask=truth_mask, ) - logging.info("Annotation of probe mapping done") + logging.info("Annotatiing VCF with with TP/FP for precision done") logging.info("Calculating recall...") recall_dir = os.path.join(outdir, "recall") - vcf_for_recall_all, vcf_for_recall_filtered = recall.get_recall( + vcf_for_recall = recall.get_recall( vcf_ref_fasta, - vcf_for_precision, + filtered_vcf, recall_dir, flank_length, debug=debug, @@ -106,34 +188,18 @@ def evaluate_vcf( if ref_mask_bed_file is not None: logging.info("Masking recall VCF...") utils.mask_vcf_file( - vcf_for_recall_all, ref_mask_bed_file, f"{vcf_for_recall_all}.masked.vcf" + vcf_for_recall, ref_mask_bed_file, f"{vcf_for_recall}.masked.vcf" ) - vcf_for_recall_all = f"{vcf_for_recall_all}.masked.vcf" - utils.mask_vcf_file( - vcf_for_recall_filtered, - ref_mask_bed_file, - f"{vcf_for_recall_filtered}.masked.vcf", - ) - vcf_for_recall_filtered = f"{vcf_for_recall_filtered}.masked.vcf" - os.unlink(masked_vcf) + vcf_for_recall = f"{vcf_for_recall}.masked.vcf" + logging.info("Masking recall VCF done") logging.info("Recall calculation done") # Gather stats and make plots - logging.info("Gathering stats and making plots...") - per_record_recall_all = vcf_stats.per_record_stats_from_vcf_file(vcf_for_recall_all) - per_record_recall_filtered = vcf_stats.per_record_stats_from_vcf_file( - vcf_for_recall_filtered - ) - recall_stats_all = vcf_stats.summary_stats_from_per_record_stats( - per_record_recall_all, for_recall=True + logging.info("Gathering stats...") + per_record_recall = vcf_stats.per_record_stats_from_vcf_file(vcf_for_recall) + recall_stats = vcf_stats.summary_stats_from_per_record_stats( + per_record_recall, for_recall=True ) - recall_stats_filtered = vcf_stats.summary_stats_from_per_record_stats( - per_record_recall_filtered, for_recall=True - ) - recall_stats = { - "ALL": recall_stats_all["ALL"], - "FILT": recall_stats_filtered["ALL"], - } per_record_precision = vcf_stats.per_record_stats_from_vcf_file(vcf_for_precision) precision_stats = vcf_stats.summary_stats_from_per_record_stats( @@ -142,6 +208,7 @@ def evaluate_vcf( summary_stats = {"Recall": recall_stats, "Precision": precision_stats} _add_overall_precision_and_recall_to_summary_stats(summary_stats) + summary_stats["Excluded_record_counts"] = filtered_counts summary_stats_json = os.path.join(outdir, "summary_stats.json") with open(summary_stats_json, "w") as f: diff --git a/varifier/vcf_qc_annotate.py b/varifier/vcf_qc_annotate.py deleted file mode 100644 index f82831d..0000000 --- a/varifier/vcf_qc_annotate.py +++ /dev/null @@ -1,98 +0,0 @@ -from cluster_vcf_records import vcf_file_read - - -def _add_vfr_filter_to_record(record, want_ref_calls=False): - record.remove_useless_start_nucleotides() - if len(record.ALT) == 0 or record.ALT == ["."]: - record.set_format_key_value("VFR_FILTER", "NO_ALTS") - return - if "MISMAPPED_UNPLACEABLE" in record.FILTER: - record.set_format_key_value("VFR_FILTER", "MISMAPPED_UNPLACEABLE") - return - if record.FORMAT is None or "GT" not in record.FORMAT: - record.set_format_key_value("VFR_FILTER", "NO_GT") - return - if record.REF in [".", ""]: - record.set_format_key_value("VFR_FILTER", "NO_REF_SEQ") - return - - genotype = record.FORMAT["GT"] - genotypes = genotype.split("/") - called_alleles = set(genotypes) - - if ( - len(called_alleles) != 1 - or "." in called_alleles - or (called_alleles == {"0"} and not want_ref_calls) - ): - record.set_format_key_value("VFR_FILTER", "CANNOT_USE_GT") - return - - allele_index = int(called_alleles.pop()) - if allele_index > 0 and record.ALT[allele_index - 1] == "*": - record.set_format_key_value("VFR_FILTER", "CANNOT_USE_GT") - return - - if record.FILTER != {"PASS"}: - record.set_format_key_value("VFR_FILTER", "FAIL_BUT_TEST") - return - - record.set_format_key_value("VFR_FILTER", "PASS") - - -def _fix_cluster_filter_tag(records): - if len(records) > 1: - pass_filter = [x for x in records if x.FORMAT["VFR_FILTER"] == "PASS"] - fail_filter = [x for x in records if x.FORMAT["VFR_FILTER"] == "FAIL_BUT_TEST"] - for x in fail_filter: - x.set_format_key_value("VFR_FILTER", "FAIL_CONFLICT") - if len(pass_filter) > 1: - for x in pass_filter: - x.set_format_key_value("VFR_FILTER", "FAIL_CONFLICT") - - -def _annotate_sorted_list_of_records(records, want_ref_calls=False): - """Annotated sorted list of VCF records. Assumes they all belong to - the same CHROM. Changes the records in place. No copying""" - wanted_filters = {"PASS", "FAIL_BUT_TEST"} - cluster = [] - cluster_end = None - - for record in records: - _add_vfr_filter_to_record(record, want_ref_calls=want_ref_calls) - if record.FORMAT["VFR_FILTER"] not in wanted_filters: - continue - - if len(cluster) == 0: - cluster = [record] - cluster_end = record.ref_end_pos() - elif record.POS > cluster_end: - _fix_cluster_filter_tag(cluster) - cluster = [record] - cluster_end = record.ref_end_pos() - else: - cluster.append(record) - cluster_end = max(cluster_end, record.ref_end_pos()) - - _fix_cluster_filter_tag(cluster) - - -def add_qc_to_vcf(infile, outfile, want_ref_calls=False): - """Annotated VCF file with QC info needed for calculating precision and recall. - Adds various tags to each record.""" - header_lines, vcf_records = vcf_file_read.vcf_file_to_dict( - infile, remove_useless_start_nucleotides=True - ) - assert header_lines[-1].startswith("#CHROM") - - with open(outfile, "w") as f: - print(*header_lines[:-1], sep="\n", file=f) - print( - '##FORMAT=', - file=f, - ) - print(header_lines[-1], file=f) - - for chrom, records in sorted(vcf_records.items()): - _annotate_sorted_list_of_records(records, want_ref_calls=want_ref_calls) - print(*records, sep="\n", file=f) diff --git a/varifier/vcf_stats.py b/varifier/vcf_stats.py index 279609e..fb16b8a 100644 --- a/varifier/vcf_stats.py +++ b/varifier/vcf_stats.py @@ -47,7 +47,6 @@ def per_record_stats_from_vcf_file(infile): "VFR_ED_RA", "VFR_ED_TR", "VFR_ED_TA", - "VFR_FILTER", "VFR_ALLELE_LEN", "VFR_ALLELE_MATCH_COUNT", "VFR_ALLELE_MATCH_FRAC", @@ -124,7 +123,6 @@ def summary_stats_from_per_record_stats(per_record_stats, for_recall=False): returns a dictionary of summary stats. Set for_recall to True if the VCF was made for getting recall""" default_counts = {k: 0 for k in ("Count", "SUM_ALLELE_MATCH_FRAC", "SUM_EDIT_DIST")} - stats = {"UNUSED": {"CONFLICT": 0, "OTHER": 0, "MASKED": 0}} # By default, this is for getting the precision. Which means counting up # TPs and FPs. For recall, each call is an expected call from the truth. @@ -132,44 +130,34 @@ def summary_stats_from_per_record_stats(per_record_stats, for_recall=False): # a FN. We expected to find the variant, but didn't. fp_key = "FN" if for_recall else "FP" - for key in "ALL", "FILT": - stats[key] = { - "TP": copy.copy(default_counts), - fp_key: copy.copy(default_counts), - } - stats[key]["EDIT_DIST_COUNTS"] = {"numerator": 0, "denominator": 0} + stats = { + "UNUSED": {"CONFLICT": 0, "OTHER": 0, "MASKED": 0}, + "TP": copy.copy(default_counts), + fp_key: copy.copy(default_counts), + "EDIT_DIST_COUNTS": {"numerator": 0, "denominator": 0}, + } for d in per_record_stats: - if d["VFR_FILTER"] == "FAIL_CONFLICT": - stats["UNUSED"]["CONFLICT"] += 1 - elif d.get("VFR_IN_MASK", 0) == 1: + if d.get("VFR_IN_MASK", 0) == 1: stats["UNUSED"]["MASKED"] += 1 - elif d["VFR_FILTER"] not in ["PASS", "FAIL_BUT_TEST"]: - stats["UNUSED"]["OTHER"] += 1 else: if d["VFR_RESULT"] == "TP": result = "TP" else: result = fp_key - keys_to_update = ["ALL"] - if d["VFR_FILTER"] == "PASS": - keys_to_update.append("FILT") ed_num, ed_den = format_dict_to_edit_dist_scores(d) - for key in keys_to_update: - try: - stats[key][result]["SUM_ALLELE_MATCH_FRAC"] += d[ - "VFR_ALLELE_MATCH_FRAC" - ] - except TypeError: # the value could be "NA" - pass + try: + stats[result]["SUM_ALLELE_MATCH_FRAC"] += d["VFR_ALLELE_MATCH_FRAC"] + except TypeError: # the value could be "NA" + pass - stats[key][result]["SUM_EDIT_DIST"] += d["VFR_ED_RA"] - stats[key][result]["Count"] += 1 + stats[result]["SUM_EDIT_DIST"] += d["VFR_ED_RA"] + stats[result]["Count"] += 1 - if ed_num is not None: - stats[key]["EDIT_DIST_COUNTS"]["numerator"] += ed_num - stats[key]["EDIT_DIST_COUNTS"]["denominator"] += ed_den + if ed_num is not None: + stats["EDIT_DIST_COUNTS"]["numerator"] += ed_num + stats["EDIT_DIST_COUNTS"]["denominator"] += ed_den return stats