forked from stevekm/reportIT
-
Notifications
You must be signed in to change notification settings - Fork 0
/
global_settings.sh
executable file
·134 lines (100 loc) · 4.57 KB
/
global_settings.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/bin/bash
umask 007 # permissions for all files; group + user all rw, no outside user access
# This file contains paths and settings to be used by all bash scripts in the pipeline
# this should be 'source'd
#~~~~~ DEFAULT LOCATIONS & ITEMS ~~~~~~#
# path to output directory
outdir="output"
# path to code dir
codedir="code"
samplesheet_dir="samplesheets"
# path to report templates directory
report_template_dir="report"
# file for per sample full report template
full_report_template="${report_template_dir}/full_report.Rmd"
# file for the overall analysis full report
overview_report_template="${report_template_dir}/overview_report.Rmd"
# file containing list of actionable genes
actionable_genes_file="data/actionable_genes.txt"
# list of canonical transcripts to use; one per line
transcr_file="ref/hg19/canonical_transcript_list.txt"
# genes in the panel
panel_genes_file="data/panel_genes.txt"
# file with server login information
server_info_file="data/server_info.txt"
# file with extended regex NC sample ID's
control_sample_regex_file="data/control_sample_regex.txt"
# file with control sample ID's
control_sample_ID_file="data/control_sample_IDs.txt"
# file with SC control sample ID's to be hidden in report template
SC_control_sample_IDs_file="data/SC_control_sample_IDs.txt"
#~~~~~ CUSTOM FUNCTIONS ~~~~~~#
if [ -f "${codedir}/custom_bash_functions.sh" ]; then
source "${codedir}/custom_bash_functions.sh"
elif [ ! -f "${codedir}/custom_bash_functions.sh" ]; then
echo -e "ERROR: file not found:\n${codedir}/custom_bash_functions.sh"
echo -e "WARNING: Pipeline might not work correctly unless the file has been sourced from elsewhere"
fi
#~~~~~ DEFAULT SCRIPTS ~~~~~~#
compile_report_script="${codedir}/compile_RMD_report.R"
# ~~~~~~ ANNOTATION PARAMETERS ~~~~~~ #
# ANNOVAR version:
# Version: $Date: 2015-06-17 21:43:53 -0700 (Wed, 17 Jun 2015) $
build_version="hg19"
annovar_db_dir="$(readlink -f bin/annovar/db)"
convert2annovar_bin="$(readlink -f bin/annovar/convert2annovar.pl)"
table_annovar_bin="$(readlink -f bin/annovar/table_annovar.pl)"
annovar_protocol="-protocol refGene,cosmic68,clinvar_20150629,1000g2015aug_all -operation g,f,f,f"
# bcftools-1.3.1
bcftools_bin="$(readlink -f bin/bcftools)"
vcf2tsv_bin="$(readlink -f bin/vcf2tsv)"
# ~~~~~~ REMOTE IONTORRENT SERVER LOCATION NAMES AND PATTERNS ~~~~~~ #
# e.g. a path to remote IT server results looks like this:
# /results/analysis/output/Home/Auto_user_SN2-282-IT17-26-1_375_368/plugin_out/coverageAnalysis_out.1070/IonXpress_001
# ^^ this is the coverage output dir (.bam, .bai) for a single sample
# /results/analysis/output/Home/Auto_user_SN2-282-IT17-26-1_375_368/plugin_out/variantCaller_out.1071/IonXpress_001
#
# ^^ this is the coverage output dir (.vcf) for a single sample
# path to the 'Home' directory on the IonTorrent remote server that holds the
# run output results
IT_server_results_home_dir="/results/analysis/output/Home/"
# subdir name prefix for IT results dir that hold the coverege files for all samples; .bam and .bai files
IT_coverage_dir_name_pattern="coverageAnalysis_out"
# same but for the variant calling output
IT_variant_dir_name_pattern="variantCaller_out"
# subdir basename pattern for each sample on the IonTorrent output coverage and variant dirs
IT_sample_results_dir_name_pattern="IonXpress"
# source VCF file name on the remote IonTorrent server: TSVC_variants.vcf
source_vcf_basename="TSVC_variants.vcf"
# ~~~~~~~~~~~~ # file extensions & naming # ~~~~~~~~~~~~ #
# split VCF extensions
split_vcf_ext=".split"
# ANNOVAR converted avinput
avinput_ext=".avinput"
# ANNOVAR annotated output; automatically added by ANNOVAR
annovar_output_ext=".${build_version}_multianno.txt" # .hg19_multianno.txt
# rebuilt VCF file from ANNOVAR avinput
rebuilt_ext=".rebuilt"
# VCF field query tables
query_ext="_query.tsv"
# VCF converted to TSV
tsv_ext=".tsv"
## NOTE: These are hardcoded in merge_vcf_annotations.py
# variant summary table
summary_table_ext="_summary.tsv"
# variant table without summary criteria (not actually filtered why is it called that??)
filtered_table_ext="_filtered.tsv"
# full table with all variants and all fields
full_table_ext="_full_table.tsv"
# summary table with version control information
summary_version_ext="_summary_version.tsv"
# current_date="$(date +%Y-%m-%d:%H:%M:%S)"
# ~~~~~~ REPORTING PARAMETERS ~~~~~~ #
# if the 'module' system is installed, try to load the correct versions
if (module)&>/dev/null ; then
module unload r
module load r/3.3.0
module load pandoc/1.13.1
else
echo "WARNING: 'module' system not found, R and pandoc were not loaded automatically"
fi