-
Notifications
You must be signed in to change notification settings - Fork 2
/
mp_val_ncol.sh
executable file
·99 lines (84 loc) · 2.26 KB
/
mp_val_ncol.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/bin/bash
############################################################
# Program:
# Author :
############################################################
## BEGIN SCRIPT
usage()
{
cat << EOF
usage: $0 OPTIONS
OPTIONS can be:
-h Show this message
-i BEDPE input file name
-s slop (default: 0)
-m Moleculo file (Default /mnt/hall13_local/cc2qe/na12878_pacbio...)
-p Pacbio file (Default /mnt/hall13_local/cc2qe/na12878_moleculo...)
-k Keep intermediate files
EOF
}
# Show usage when there are no arguments.
if test -z "$1"
then
usage
exit
fi
VERBOSE=
FILENAME=
PB="/mnt/hall13_local/cc2qe/na12878_pacbio/NA12878.pacbio.splitreads.excldups.breakpoint.dels.bedpe.gz"
MO="/mnt/hall13_local/cc2qe/na12878_moleculo/NA12878.moleculo.splitreads.excldups.breakpoint.dels.bedpe.gz"
L=
SLOP=0
KEEP=0
# Check options passed in.
while getopts "h m:p:i:s:k" OPTION
do
case $OPTION in
h)
usage
exit 1
;;
m)
MO=$OPTARG
;;
p)
PB=$OPTARG
;;
i)
L=$OPTARG
;;
s)
SLOP=$OPTARG
;;
k)
KEEP=1
;;
?)
usage
exit
;;
esac
done
# calculate the number of columns in the input file
NCOL=`head -n 1 $L | awk '{ print NF }'`
pairToPair -type both -is -slop $SLOP -a <(zcat $PB | awk '{ $1="chr"$1 ; $4="chr"$4 ; print $0 }' OFS="\t") -b $L \
| zapdups -u \
> $L.p.slop$SLOP.tmp
pairToPair -type both -is -slop $SLOP -a <(zcat $MO | awk '{ $1="chr"$1 ; $4="chr"$4 ; print $0 }' OFS="\t") -b $L \
| zapdups -u \
> $L.m.slop$SLOP.tmp
cat $L.p.slop$SLOP.tmp \
| cut -f 19- \
| sort -k7,7n \
| groupBy -g 1,2,3,4,5,6 -c 1 -o count -full \
| zjoin -r -a $L -b stdin -1 7 -2 7 \
| cut -f -$NCOL,$(($NCOL+$NCOL+1)) \
| awk '{ if ($NF=="NA") { $NF=0 } print $0 }' OFS="\t" \
| zjoin -r -1 7 -2 7 -a stdin -b <(cat $L.m.slop$SLOP.tmp | cut -f 19- | sort -k7,7n | groupBy -g 1,2,3,4,5,6 -c 1 -o count -full) \
| cut -f -$(($NCOL+1)),$(($NCOL+$NCOL+2)) \
| awk '{ if ($NF=="NA") { $NF=0 } print $0 }' OFS="\t" \
> $L.slop$SLOP.val
if [[ $KEEP -ne 1 ]]
then
rm $L.p.slop$SLOP.tmp $L.m.slop$SLOP.tmp
fi