-
Notifications
You must be signed in to change notification settings - Fork 5
/
summarize_qtl_results.sh
executable file
·60 lines (39 loc) · 1.61 KB
/
summarize_qtl_results.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/bin/bash
#Code to collect the QTL colocalization results after they have all been generated.
#set variables for date and time
today=`date +%F`
now=`date +%T`
pwd
#load config file
source qtl_config.sh
source $setup_config_sh
#write header to the summary results file
printf "SNP\tGene\tGeneID-Tissue\tTrait\tPPID\tPP\n" > $trait"_"$qtlType"_coloc_results_all_summary_"$today"_"$now".txt"
#go through each lead SNP directory
for dir in ./rs*/
do
#echo $dir
dirname=`echo $dir | tr "/" "\t" | cut -f 2`
echo $dirname
cd $dir
#for each GTEx coloc output in the directory
for colocOut in ./*coloc_results_summary.txt
do
#echo $colocOut
colocfilename=`basename $colocOut`
#echo $colocfilename
#save a "trait" string for formatting
trait_str="_"$trait"_"
#grab the information we need from the locus coloc results file
sed "s/^/$colocfilename /" $colocOut | sed "s/^/$dirname /" | sed "s,$trait_str, $trait," | sed "s/coloc_results_summary.txt//" | sed "s/_ENSG/ ENSG/"| tr " " "\t" | tail -n+3 >> ../$trait"_"$qtlType"_coloc_results_all_summary_"$today"_"$now".txt"
done
cd ..
done
#if using sQTL data you need to parse out the gene name
if [ "$qtlType" == "sqtl" ]
then
sed -i "s/_/\\t/" $trait"_"$qtlType"_coloc_results_all_summary_"$today"_"$now".txt"
fi
#run Rscript to generate a file with PP3, PP4, and PP4/(PP3 + PP4) for each lead SNP-Gene-Tissue result.
Rscript $colocquial_dir"/summarize_qtl_coloc_PP3_PP4_results.R" $trait"_"$qtlType"_coloc_results_all_summary_"$today"_"$now".txt"
echo "Your summary QTL file is ready!"