forked from openhatch/oh-mainline
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_importer.sh
executable file
·60 lines (47 loc) · 2.77 KB
/
run_importer.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/bin/bash
# In case anything returns non-zero, just fail immediately.
set -e
BASE_DIR="/var/web/inside.openhatch.org/crawl-logs"
if [ ! -z "$1" ] ; then
BASE_DIR="$1"
fi
BUG_TRACKER_LIST="$(mktemp --suffix=.yaml /tmp/bug-trackers.$(date -I).XXXX)"
SCRAPY_RESULT_FILE="$(mktemp --suffix=.jsonlines /tmp/scrapy-results.$(date -I).XXXX)"
SCRAPY_LOG="$(mktemp --suffix=.log $BASE_DIR/scrapy.$(date -I).XXXX)"
# Set our own output to go there.
exec >>"$SCRAPY_LOG" 2>&1
chmod 644 "$SCRAPY_LOG"
MAX_TRACKERS="500"
if [ ! -z "$2" ] ; then
MAX_TRACKERS="$2"
fi
if [ ! -z "$3" ] ; then
TRACKER_ID="$3"
fi
URL=https://openhatch.org/+api/v1/customs/tracker_model/\?just_stale\=yes\&format\=yaml\&limit\="$MAX_TRACKERS"\&tracker_id="$TRACKER_ID"
function grab_bug_tracker_list() {
# Try to download $URL. If curl bails on us, then
# we exit 1.
curl "$URL" > "$BUG_TRACKER_LIST" || return 1
# Sanity-check the document -- is it actually YAML, or
# is it a "helpful" CloudFlare error message? To do this
# check, we ask Python to parse this document, and if it
# bails out, then we also return 1.
if DJANGO_SETTINGS_MODULE='mysite.settings' python -c "import vendor; vendor.vendorify(); import tastypie.serializers; import yaml; yaml.load(open('$BUG_TRACKER_LIST'), Loader=tastypie.serializers.TastypieLoader)"
then
# Amazing. It is valid YAML. Exit succesfully.
return 0
else
echo 'sleeping for 20 sec'
sleep 20
return 1
fi
}
# It's OK if curl has to try 40 times.
grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || grab_bug_tracker_list || exit 1
pushd ../oh-bugimporters
env/bin/python bugimporters/main.py -i "$BUG_TRACKER_LIST" -o "$SCRAPY_RESULT_FILE"
popd
python manage.py import_bugimporter_data "$SCRAPY_RESULT_FILE"
# Remove old log files.
find "$BASE_DIR" -name 'scrapy.*.log' -mtime +14 -print0 | xargs -0 rm