Skip to content

Commit

Permalink
Merge pull request #29 from datamade/feature/28-nightly-civil
Browse files Browse the repository at this point in the history
Start nightly civil case scrape
  • Loading branch information
antidipyramid authored Feb 2, 2024
2 parents 8e3e50a + a7ce486 commit 9c6f348
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 5 deletions.
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ new_plaintiffs.csv: cases.json
new_defendants.csv: cases.json
cat $^ | jq '.[] | . as $$p | .defendants[] | [., $$p.case_number] | @csv' -r > $@

cases.json : chancery.jl
cases.json : civil-2.jl civil-3.jl civil-4.jl civil-5.jl \
civil-6.jl civil-101.jl civil-104.jl civil-11.jl \
civil-13.jl civil-14.jl civil-15.jl civil-17.jl chancery.jl
cat $^ | sort | python scripts/remove_dupe_cases.py | jq --slurp '.' > $@

# Query parameterized by civil case subdivision
Expand All @@ -50,6 +52,7 @@ CIVIL_SCRAPE_START_QUERY=$(shell tail -n +2 scripts/nightly_civil_start.sql)
civil-%.jl: cases.db
START=$$(sqlite-utils query --csv --no-headers cases.db \
"$(CIVIL_SCRAPE_START_QUERY)" -p subdivision $*); \
echo $$START; \
scrapy crawl civil -a division=$* -a start=$$START -O $@;

chancery.jl: cases.db
Expand Down
12 changes: 9 additions & 3 deletions scripts/new_cases.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ CREATE TEMPORARY TABLE raw_case (
court text,
division text,
filing_date text,
hash text
hash text,
scraped_at text DEFAULT current_timestamp,
updated_at text DEFAULT current_timestamp
);

-- noqa: disable=PRS
Expand All @@ -23,7 +25,9 @@ INSERT INTO
calendar,
ad_damnum,
court,
hash
hash,
scraped_at,
updated_at
)
SELECT
case_number,
Expand All @@ -33,6 +37,8 @@ SELECT
calendar,
ad_damnum,
court,
hash
hash,
scraped_at,
updated_at
FROM
raw_case;
1 change: 0 additions & 1 deletion scripts/nightly_civil_start.sql
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ WITH serials AS (
AND substr(case_number, 1, 4) = strftime('%Y', current_timestamp)
)

-- If we don't have any cases for the current year, start from zero
SELECT coalesce((
SELECT serial
FROM
Expand Down

0 comments on commit 9c6f348

Please sign in to comment.