From e0d115757db19f977daa03415e1fab00b199a87d Mon Sep 17 00:00:00 2001 From: fubar2 Date: Mon, 30 Sep 2024 10:00:12 +1000 Subject: [PATCH 1/2] Add option to find regions with zeros - important for VGP where some large 200k+ regions on one or both haplotypes have absolutely no coverage from Pacbio hifi reads. Possibly from bionano optical stuff - and it may not really exist since absolutely nothing maps there! --- .../bigwig_outlier_bed/bigwig_outlier_bed.py | 29 +++++++++++++++++-- .../bigwig_outlier_bed/bigwig_outlier_bed.xml | 25 ++++++++++++---- 2 files changed, 45 insertions(+), 9 deletions(-) diff --git a/tools/bigwig_outlier_bed/bigwig_outlier_bed.py b/tools/bigwig_outlier_bed/bigwig_outlier_bed.py index c6f4a0bbec9..052745bf86a 100644 --- a/tools/bigwig_outlier_bed/bigwig_outlier_bed.py +++ b/tools/bigwig_outlier_bed/bigwig_outlier_bed.py @@ -117,6 +117,7 @@ def __init__(self, args): self.bedouthilo = args.bedouthilo self.tableoutfile = args.tableoutfile self.bedwin = args.minwin + self.bedoutzero = args.bedoutzero self.qlo = None self.qhi = None if args.outbeds != "outtab": @@ -133,7 +134,7 @@ def __init__(self, args): self.bwlabels += ["Nolabel"] * (nbw - nlab) self.makeBed() - def processVals(self, bw, isTop): + def processVals(self, bw, isTop, isZero): """ idea from http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html Fast segmentation into regions by taking np.diff on the boolean array of over (under) cutpoint indicators in bwex. @@ -143,6 +144,8 @@ def processVals(self, bw, isTop): """ if isTop: bwex = np.r_[False, bw >= self.bwtop, False] # extend with 0s + elif isZero: + bwex = np.r_[False, bw == 0, False] # extend with 0s else: bwex = np.r_[False, bw <= self.bwbot, False] bwexd = np.diff(bwex) @@ -190,6 +193,7 @@ def makeTableRow(self, bw, bwlabel, chr): def makeBed(self): bedhi = [] bedlo = [] + bedzero = [] restab = [] bwlabels = self.bwlabels bwnames = self.bwnames @@ -229,9 +233,24 @@ def makeBed(self): + histo ) bw = bw[~np.isnan(bw)] # some have NaN if parts of a contig not covered + if self.bedoutzero is not None: + bwzero = self.processVals(bw, isTop=False, isZero=True) + for j, seg in enumerate(bwzero): + seglen = seg[1] - seg[0] + if seglen >= self.bedwin: + score = seglen + bedzero.append( + ( + chr, + seg[0], + seg[1], + "%s_%d" % (bwlabel, score), + score, + ) + ) if self.qhi is not None: self.bwtop = np.quantile(bw, self.qhi) - bwhi = self.processVals(bw, isTop=True) + bwhi = self.processVals(bw, isTop=True, isZero=False) for j, seg in enumerate(bwhi): seglen = seg[1] - seg[0] if seglen >= self.bedwin: @@ -247,7 +266,7 @@ def makeBed(self): ) if self.qlo is not None: self.bwbot = np.quantile(bw, self.qlo) - bwlo = self.processVals(bw, isTop=False) + bwlo = self.processVals(bw, isTop=False, isZero=False) for j, seg in enumerate(bwlo): seglen = seg[1] - seg[0] if seg[1] - seg[0] >= self.bedwin: @@ -280,6 +299,9 @@ def makeBed(self): t.write(stable) t.write("\n") some = False + if self.outbeds in ["outzero"]: + self.writeBed(bedzero, self.bedoutzeo) + some = True if self.qlo: if self.outbeds in ["outall", "outlo", "outlohi"]: self.writeBed(bedlo, self.bedoutlo) @@ -308,6 +330,7 @@ def makeBed(self): a("--bedouthi", default=None) a("--bedoutlo", default=None) a("--bedouthilo", default=None) + a("--bedoutzero", default=None) a("-w", "--bigwig", nargs="+") a("-n", "--bigwiglabels", nargs="+") a("-o", "--outbeds", default="outhilo", help="optional high and low combined bed") diff --git a/tools/bigwig_outlier_bed/bigwig_outlier_bed.xml b/tools/bigwig_outlier_bed/bigwig_outlier_bed.xml index 8750a174a39..209c3835a48 100644 --- a/tools/bigwig_outlier_bed/bigwig_outlier_bed.xml +++ b/tools/bigwig_outlier_bed/bigwig_outlier_bed.xml @@ -1,9 +1,9 @@ - + Writes high and low bigwig runs as features in a bed file 0.2.0 3.12.3 - 1 + 2 topic_0157 @@ -43,6 +43,9 @@ #if $outbeds in ['outlo', 'outall', 'outlohi']: --bedoutlo '$bedoutlo' #end if +#if $outbeds in ['outzero']: + --bedoutzero '$bedoutzero' +#end if --minwin '$minwin' #if $qhi: --qhi '$qhi' @@ -67,9 +70,10 @@ + - + @@ -86,6 +90,9 @@ outbeds in ["outall", "outlohi", "outlo"] + + outbeds in ["outzero"] + tableout == "create" or outbeds == "outtab" @@ -150,9 +157,15 @@ - - - + + + + + + + + + Date: Mon, 30 Sep 2024 10:24:29 +1000 Subject: [PATCH 2/2] fix new test for bedzero - must have qhi add new test output --- tools/bigwig_outlier_bed/bigwig_outlier_bed.py | 4 ++-- tools/bigwig_outlier_bed/bigwig_outlier_bed.xml | 4 ++-- tools/bigwig_outlier_bed/test-data/bedoutzero_sample | 2 ++ 3 files changed, 6 insertions(+), 4 deletions(-) create mode 100644 tools/bigwig_outlier_bed/test-data/bedoutzero_sample diff --git a/tools/bigwig_outlier_bed/bigwig_outlier_bed.py b/tools/bigwig_outlier_bed/bigwig_outlier_bed.py index 052745bf86a..1c41eebdf6d 100644 --- a/tools/bigwig_outlier_bed/bigwig_outlier_bed.py +++ b/tools/bigwig_outlier_bed/bigwig_outlier_bed.py @@ -300,8 +300,8 @@ def makeBed(self): t.write("\n") some = False if self.outbeds in ["outzero"]: - self.writeBed(bedzero, self.bedoutzeo) - some = True + self.writeBed(bedzero, self.bedoutzero) + some = True if self.qlo: if self.outbeds in ["outall", "outlo", "outlohi"]: self.writeBed(bedlo, self.bedoutlo) diff --git a/tools/bigwig_outlier_bed/bigwig_outlier_bed.xml b/tools/bigwig_outlier_bed/bigwig_outlier_bed.xml index 209c3835a48..963429a1f86 100644 --- a/tools/bigwig_outlier_bed/bigwig_outlier_bed.xml +++ b/tools/bigwig_outlier_bed/bigwig_outlier_bed.xml @@ -162,8 +162,8 @@ - - + + diff --git a/tools/bigwig_outlier_bed/test-data/bedoutzero_sample b/tools/bigwig_outlier_bed/test-data/bedoutzero_sample new file mode 100644 index 00000000000..523f316b17b --- /dev/null +++ b/tools/bigwig_outlier_bed/test-data/bedoutzero_sample @@ -0,0 +1,2 @@ +Merlin 0 9 bigwig_sample_9 9 +Merlin 70049 180929 bigwig_sample_110880 110880