-
Notifications
You must be signed in to change notification settings - Fork 0
/
run.sh
89 lines (71 loc) · 4.01 KB
/
run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/bin/bash -u
datasetdir=./MAMI # The data source dir, you should download dataset and save it under this dir (e.g. MAMI/training and MAMI/test)
datadir=./Dataset # The dir where saves the processed data and features
modeldir=./model # The dir saves the trained models
results=./results # The dir saves the results, which is predicted by the trained model. The evaluation results are also saved here
ifgpu=false # ifgpu is true, then using GPU for training
ifdebug=false # ifdebug is true, when the test set haven't released. Split a part of training set as the test set
num=700 # ifdebug is true, split $num samples from training set as the test set
best_model=acc # we can selected choose best accuracy model or best loss model
adjdir=./ # the dir save GCAN adjacency matrix
stage=0 # which running process should be started
stop_stage=0 # stop at which process
## Function for pretrained model:
function gdrive_download () {
CONFIRM=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate \
"https://docs.google.com/uc?export=download&id=$1" -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')
wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$CONFIRM&id=$1" -O $2
rm -rf /tmp/cookies.txt
}
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
### split MEMEs
for dset in training test; do
python3 local/orgimagesplit.py $datasetdir $dset || exit 1;
done
git clone https://github.com/sgrvinod/a-PyTorch-Tutorial-to-Image-Captioning.git
mv a-PyTorch-Tutorial-to-Image-Captioning local/imagecap
cp local/caption_multisplit.py local/imagecap/caption_multisplit.py
gdrive_download '1FYZ446OPEqhe-uLkgyVICjD_3-N3IZn1' 'local/imagecap/BEST_checkpoint_coco_5_cap_per_img_5_min_word_freq.pth.tar'|| exit 1;
gdrive_download '1bt_TmTC_rUcss2MJsG_C_6DtwEttRVKc' 'local/imagecap/WORDMAP_coco_5_cap_per_img_5_min_word_freq.json'|| exit 1;
### using image caption to extract the image-text description (https://github.com/sgrvinod/a-PyTorch-Tutorial-to-Image-Captioning)
for dset in training test; do ## if the test set relased, here can add for dset in training test; do
savedir=$datadir/imagecap
capdir=local/imagecap
python3 local/imagecap/caption_multisplit.py $datasetdir $dset $savedir $capdir $ifgpu || exit 1;
done
fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
### train text model
tasktype=classification
featype=bert
iffinetune=true
stream=ocr
for i in 0 1 2 3 4 5 6 7 8 9; do # start from $i folder in 10 folder cross-validation.
python3 ./local/train.py $datadir $tasktype $featype $modeldir $results $stream $best_model $ifgpu $iffinetune $i $adjdir || exit 1;
done
### train image model
stream=image
i=0 # start from $i folder in 10 folder cross-validation
python3 ./local/train.py $datadir $tasktype $featype $modeldir $results $stream $best_model $ifgpu $iffinetune $i $adjdir || exit 1;
stream=BERTC_bi ## BERTC-VIT model
for i in 0 1 2 3 4 5 6 7 8 9; do
python3 ./local/train.py $datadir $tasktype $featype $modeldir $results $stream $best_model $ifgpu $iffinetune $i $adjdir || exit 1;
done
stream=GCAN_bi ## BERTC-VIT model
for i in 0 1 2 3 4 5 6 7 8 9; do
python3 ./local/train.py $datadir $tasktype $featype $modeldir $results $stream $best_model $ifgpu $iffinetune $i $adjdir || exit 1;
done
stream=bi_BERTC_GCAN ## BERTC-GCAN model
for i in 0 1 2 3 4 5 6 7 8 9; do
python3 ./local/train.py $datadir $tasktype $featype $modeldir $results $stream $best_model $ifgpu $iffinetune $i $adjdir || exit 1;
done
stream=bi ## BERTC-GCAN-Vit model
for i in 0 1 2 3 4 5 6 7 8 9; do
python3 ./local/train.py $datadir $tasktype $featype $modeldir $results $stream $best_model $ifgpu $iffinetune $i $adjdir || exit 1;
done
fi
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
#ensemble
ls -R ./model/classification/ > ./classification.txt
python3 ./local/ensemble.py $results $datasetdir/test_labels.txt ./classification.txt #$datasetdir/test_labels.txt: the grund turth of test set
fi