-
Notifications
You must be signed in to change notification settings - Fork 1
/
Makefile
59 lines (49 loc) · 1.87 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# Makefile for downloading and cleaning up the Excel formatted
# clinical interpretations from Weill Cornell Precision Medicine Knowledge Base
URL:=https://pmkb.weill.cornell.edu/therapies/download.xlsx
BASENAME:=pmkb
XLSX:=$(BASENAME).xlsx
TSV:=$(BASENAME).Interpretations.tsv
all: download conda dump tumor tissue
# ~~~~~ download the PMKB database file ~~~~~ #
$(XLSX):
@echo ">>> Downloading clinical interpretations sheet from PMKB"
wget "$(URL)" -O "$(XLSX)"
download: $(XLSX)
# ~~~~~ Setup Conda Python 3 needed to manipulate UTF-16 xlsx ~~~~~ #
CONDASH:=Miniconda3-4.5.4-Linux-x86_64.sh
CONDAURL:=https://repo.continuum.io/miniconda/$(CONDASH)
conda:
wget "$(CONDAURL)" && \
bash "$(CONDASH)" -b -p conda && \
rm -f "$(CONDASH)"
conda-install: conda
unset PYTHONHOME && \
unset PYTHONPATH && \
export PATH=$${PWD}/conda/bin:$${PATH} && \
conda install -y pandas 'xlrd>=0.9.0'
# ~~~~~ dump the .xlsx file to .tsv ~~~~~ #
$(TSV): $(XLSX) conda
@echo ">>> Dumping .xlsx to .tsv"
unset PYTHONHOME && \
unset PYTHONPATH && \
export PATH=$${PWD}/conda/bin:$${PATH} && \
python dump-xlsx.py "$(XLSX)"
dump: $(TSV)
# ~~~~~ get the Tumor and Tissue terms from the sheet ~~~~~ #
TUMORFILE:=$(BASENAME)-tumor-terms.txt
TISSUEFILE:=$(BASENAME)-tissue-terms.txt
$(TUMORFILE): $(TSV)
unset PYTHONHOME && \
unset PYTHONPATH && \
export PATH=$${PWD}/conda/bin:$${PATH} && \
python cut.py "$(TSV)" -f 2 -e "utf-16" | tr ',' '\n'| sed -e 's|^[[:space:]]||g' -e 's|[[:space:]]$$||g' -e 's|^$$||g' | sort -u > "$(TUMORFILE)"
tumor: $(TUMORFILE)
$(TISSUEFILE): $(TSV)
unset PYTHONHOME && \
unset PYTHONPATH && \
export PATH=$${PWD}/conda/bin:$${PATH} && \
python cut.py "$(TSV)" -f 3 -e "utf-16" | tr ',' '\n' | sed -e 's|^[[:space:]]||g' -e 's|[[:space:]]$$||g' -e 's|^$$||g' | sort -u > "$(TISSUEFILE)"
tissue: $(TISSUEFILE)
clean:
rm -f "$(XLSX)" "$(TSV)" "$(TISSUEFILE)" "$(TUMORFILE)"