-
Notifications
You must be signed in to change notification settings - Fork 0
/
Makefile.update
81 lines (69 loc) · 3.1 KB
/
Makefile.update
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Makefile for re-scraping cases and updating the case database
.INTERMEDIATE: rescraped_cases.db
.PHONY : update_db
update_db: rescraped_cases.db cases.db
sqlite3 $< < scripts/update_rescraped_cases.sql
rescraped_cases.db : attorney.csv defendant.csv plaintiff.csv court_case.csv event.csv
csvs-to-sqlite $^ $@
cat scripts/foreign_key.sql | sqlite3 $@
sqlite-utils add-column $@ court_case subdivision text
sqlite3 $@ < scripts/subdivision.sql
sqlite-utils transform $@ court_case \
--drop _key \
--pk case_number \
--column-order case_number \
--column-order filing_date \
--column-order division \
--column-order subdivision \
--column-order case_type \
--column-order calendar \
--column-order ad_damnum
sqlite-utils add-foreign-keys $@ \
attorney case_number court_case case_number \
defendant case_number court_case case_number \
plaintiff case_number court_case case_number \
event case_number court_case case_number
sqlite-utils transform $@ defendant \
--drop _key \
--column-order case_number \
--column-order defendant
sqlite-utils transform $@ attorney \
--drop _key \
--column-order case_number \
--column-order attorney
sqlite-utils transform $@ event \
--drop _key \
--column-order case_number \
--column-order date \
--column-order description \
--column-order comments
sqlite-utils transform $@ plaintiff \
--drop _key \
--column-order case_number \
--column-order plaintiff
sqlite-utils convert $@ court_case filing_date 'r.parsedate(value)'
sqlite-utils convert $@ event date 'r.parsedate(value)'
%.csv: court_case_raw.%.csv
cat $< | \
sed '1s/court_case_raw\._key/case_number/g' | \
sed -r '1s/[a-z0-9_]+\.//g' > $@
court_case.csv : court_case_raw.csv
cat $< | sed -r '1s/[a-z0-9_]+\.//g' > $@
court_case_raw.attorney.csv court_case_raw.defendant.csv court_case_raw.plaintiff.csv court_case_raw.csv court_case_raw.event.csv : rescraped_cases.json
json-to-multicsv.pl --file $< \
--path /:table:court_case_raw \
--path /*/events/:table:event \
--path /*/plaintiffs/:table:plaintiff \
--path /*/defendants/:table:defendant \
--path /*/attorneys/:table:attorney
rescraped_cases.json: rescraped_chancery_cases.jl rescraped_civil_cases.jl
cat $^ | jq --slurp '.' > $@
rescraped_civil_cases.jl : to_rescrape.civil.csv
scrapy crawl civil -s CLOSESPIDER_TIMEOUT=7200 -a case_numbers_file=$< -O $@
rescraped_chancery_cases.jl : to_rescrape.chancery.csv
scrapy crawl chancery -s CLOSESPIDER_TIMEOUT=7200 -a case_numbers_file=$< -O $@
TO_SCRAPE_QUERY=$(shell tail -n +6 scripts/to_scrape.sql)
to_rescrape.civil.csv : cases.db
sqlite-utils query --csv --no-headers $< "$(TO_SCRAPE_QUERY)" -p court civil > $@
to_rescrape.chancery.csv : cases.db
sqlite-utils query --csv --no-headers $< "$(TO_SCRAPE_QUERY)" -p court chancery > $@