diff --git a/Makefile b/Makefile index 0f1552d..877f962 100644 --- a/Makefile +++ b/Makefile @@ -41,7 +41,9 @@ new_plaintiffs.csv: cases.json new_defendants.csv: cases.json cat $^ | jq '.[] | . as $$p | .defendants[] | [., $$p.case_number] | @csv' -r > $@ -cases.json : chancery.jl +cases.json : civil-2.jl civil-3.jl civil-4.jl civil-5.jl \ + civil-6.jl civil-101.jl civil-104.jl civil-11.jl \ + civil-13.jl civil-14.jl civil-15.jl civil-17.jl chancery.jl cat $^ | sort | python scripts/remove_dupe_cases.py | jq --slurp '.' > $@ # Query parameterized by civil case subdivision @@ -50,6 +52,7 @@ CIVIL_SCRAPE_START_QUERY=$(shell tail -n +2 scripts/nightly_civil_start.sql) civil-%.jl: cases.db START=$$(sqlite-utils query --csv --no-headers cases.db \ "$(CIVIL_SCRAPE_START_QUERY)" -p subdivision $*); \ + echo $$START; \ scrapy crawl civil -a division=$* -a start=$$START -O $@; chancery.jl: cases.db diff --git a/scripts/new_cases.sql b/scripts/new_cases.sql index e7216c8..c7aaee0 100644 --- a/scripts/new_cases.sql +++ b/scripts/new_cases.sql @@ -6,7 +6,9 @@ CREATE TEMPORARY TABLE raw_case ( court text, division text, filing_date text, - hash text + hash text, + scraped_at text DEFAULT current_timestamp, + updated_at text DEFAULT current_timestamp ); -- noqa: disable=PRS @@ -23,7 +25,9 @@ INSERT INTO calendar, ad_damnum, court, - hash + hash, + scraped_at, + updated_at ) SELECT case_number, @@ -33,6 +37,8 @@ SELECT calendar, ad_damnum, court, - hash + hash, + scraped_at, + updated_at FROM raw_case; diff --git a/scripts/nightly_civil_start.sql b/scripts/nightly_civil_start.sql index a1afafe..3e574ad 100644 --- a/scripts/nightly_civil_start.sql +++ b/scripts/nightly_civil_start.sql @@ -18,7 +18,6 @@ WITH serials AS ( AND substr(case_number, 1, 4) = strftime('%Y', current_timestamp) ) --- If we don't have any cases for the current year, start from zero SELECT coalesce(( SELECT serial FROM