diff --git a/scripts/l_merge.py b/scripts/l_merge.py index b45c3f0..5eb642d 100755 --- a/scripts/l_merge.py +++ b/scripts/l_merge.py @@ -1,4 +1,5 @@ -#!/usr/bin/env python -u +#!/usr/bin/env python + from operator import add import time import sys @@ -68,6 +69,7 @@ def print_var_line(l): PRPOS = m['PREND'] PREND = m['PRPOS'] SNAME = m['SNAME'] + SULIST = m.get('SULIST', m['SU']) # Fall back to single SU as default EVENT = A[2] A[4] = ALT @@ -84,6 +86,7 @@ def print_var_line(l): 'PRPOS=' + str(PRPOS), 'PREND=' + str(PREND), 'SNAME=' + str(SNAME), + 'SULIST=' + str(SULIST), 'EVENT=' + str(EVENT)]) # reconstruct the line @@ -123,6 +126,7 @@ def print_var_line(l): PRPOS = m['PREND'] PREND = m['PRPOS'] SNAME = m['SNAME'] + SULIST = m.get('SULIST', m['SU']) # Fall back to single SU as default EVENT = A[2] SECONDARY = 'SECONDARY' MATEID=A[2] + '_1' @@ -141,6 +145,7 @@ def print_var_line(l): 'PRPOS=' + str(PRPOS), 'PREND=' + str(PREND), 'SNAME=' + str(SNAME), + 'SULIST=' + str(SULIST), 'EVENT=' + str(EVENT), 'MATEID=' + str(MATEID)]) @@ -458,6 +463,8 @@ def merge(BP, sample_order, v_id, use_product): gt_list = [] + su_list = [] + #for g_i in c: for b_i in c: #A = G[g_i].b.l.rstrip().split('\t') @@ -481,10 +488,14 @@ def merge(BP, sample_order, v_id, use_product): gt_list += A[9:] + su_list.append(m['SU']) + SNAME=','.join(s_name_list) GTS = '\t'.join(gt_list) + SULIST = ','.join(su_list) + strand_types_counts = [] for strand in strand_map: strand_types_counts.append(strand + ':' + str(strand_map[strand])) @@ -516,7 +527,8 @@ def merge(BP, sample_order, v_id, use_product): 'PRPOS=' + str(PRPOS), 'PREND=' + str(PREND), 'ALG=' + str(ALG), - 'SNAME=' + str(SNAME)] + 'SNAME=' + str(SNAME), + 'SULIST=' + str(SULIST)] if BP[c[0]].sv_type == 'BND': I.append('EVENT=' + str(ID)) diff --git a/scripts/pairend_distro.py b/scripts/pairend_distro.py index 407476a..7661ed2 100755 --- a/scripts/pairend_distro.py +++ b/scripts/pairend_distro.py @@ -54,7 +54,13 @@ def unscaled_upper_mad(xs): above the median. """ med = np.median(xs) - return med, np.median(xs[xs > med] - med) + if len(xs[xs > med]): + umad = np.median(xs[xs > med] - med) + else: + # When lengths are all the same, return a safe default. + # This is common with simulated data used for testing. + umad = 1 + return med, umad (options, args) = parser.parse_args()