Skip to content

Commit

Permalink
Merge pull request #22 from datamade/feature/nightly-action
Browse files Browse the repository at this point in the history
Add nightly scrape action
  • Loading branch information
antidipyramid authored Dec 19, 2023
2 parents 32fbaba + 318989d commit d96d754
Show file tree
Hide file tree
Showing 9 changed files with 122 additions and 108 deletions.
99 changes: 0 additions & 99 deletions .github/workflows/build.yml

This file was deleted.

116 changes: 116 additions & 0 deletions .github/workflows/nightly.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
name: Nightly case scrape

on:
workflow_dispatch:
# schedule:
# - cron: '15 4 * * *'

jobs:
scrape:
name: Scrape new cases
runs-on: ubuntu-latest

steps:
- name: Set current date as env variable
run: echo "BEGIN_COURTS_RUN=$(date +'%s')" >> $GITHUB_ENV
- uses: actions/checkout@v3
- name: upgrade sqlite3
run: |
sudo apt-get update
sudo apt-get install sqlite3
- name: Install requirements
run: |
pip install -U pyopenssl cryptography
pip install -r requirements.txt
- name: Download latest database zip
uses: robinraju/release-downloader@v1.8
with:
latest: true
tag: "nightly"
fileName: "*.db.zip"

- name: Decrypt database
run: |
unzip -P '${{ secrets.CASE_DB_PW }}' cases.db.zip && rm cases.db.zip
- name: Run scrape
run: |
echo $BEGIN_COURTS_RUN
make get_new_records
- name: Setup database for upload
run: |
zip -P '${{ secrets.CASE_DB_PW }}' cases.db.zip cases.db
- name: Upload new release
uses: WebFreak001/deploy-nightly@v3.0.0
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: https://uploads.github.com/repos/datamade/court-scrapers/releases/131985702/assets{?name,label}
release_id: 131985702
asset_path: ./cases.db.zip
asset_name: cases.db.zip
asset_content_type: application/zip # required by GitHub API
max_releases: 7

- name: Keepalive
uses: gautamkrishnar/keepalive-workflow@v1

deploy:
name: Deploy to Heroku
needs: scrape
runs-on: ubuntu-latest

env:
HEROKU_ORGANIZATION: ${{ secrets.HEROKU_ORG }}

steps:
- uses: actions/checkout@v3

- name: Install requirements
run: pip install -r requirements.txt

- name: Download latest database zip
uses: robinraju/release-downloader@v1.8
with:
latest: true
tag: "nightly"
fileName: "*.db.zip"

- name: Decrypt database
run: |
unzip -P '${{ secrets.CASE_DB_PW }}' cases.db.zip
- name: Install heroku-builds plugin
run: |
heroku plugins:install heroku-builds
- name: Login to Heroku CLI
uses: akhileshns/heroku-deploy@v3.12.14
with:
heroku_api_key: ${{ secrets.HEROKU_API_KEY }}
heroku_app_name: ""
heroku_email: ${{ secrets.HEROKU_EMAIL }}
justlogin: true

- name: Install Datasette plugins
run: |
datasette install datasette-auth-passwords datasette-auth-tokens
- name: Get hashed Datasette password
run: |
# Store hash as an environment variable
hash=$(echo '${{ secrets.DATASETTE_INSTANCE_PW }}' \
| datasette hash-password --no-confirm); \
echo "hash=$hash" >> $GITHUB_ENV
- name: Deploy Datasette instance to Heroku
run: |
datasette publish heroku cases.db \
-n court-scraper \
-m metadata.json \
--install datasette-auth-passwords \
--plugin-secret datasette-auth-passwords root_password_hash '${{ env.hash }}'
6 changes: 1 addition & 5 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
.INTERMEDIATE: *.csv *.jl *.json

.PHONY: all
all: upload

Expand Down Expand Up @@ -43,9 +41,7 @@ new_plaintiffs.csv: cases.json
new_defendants.csv: cases.json
cat $^ | jq '.[] | . as $$p | .defendants[] | [., $$p.case_number] | @csv' -r > $@

cases.json : civil-2.jl civil-3.jl civil-4.jl civil-5.jl \
civil-6.jl civil-101.jl civil-104.jl civil-11.jl \
civil-13.jl civil-14.jl civil-15.jl civil-17.jl chancery.jl
cases.json : chancery.jl
cat $^ | sort | python scripts/remove_dupe_cases.py | jq --slurp '.' > $@

# Query parameterized by civil case subdivision
Expand Down
Empty file.
2 changes: 1 addition & 1 deletion courtscraper/spiders/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class UnsuccessfulAutomation(Exception):

class CourtSpiderBase(ABC, Spider):
def __init__(
self, division="2", year=2022, start=0, case_numbers_file=None, **kwargs
self, division="2", year=2023, start=0, case_numbers_file=None, **kwargs
):
self.year = year
self.misses = set()
Expand Down
2 changes: 1 addition & 1 deletion courtscraper/spiders/chancery.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class ChancerySpider(CourtSpiderBase):
name = "chancery"
url = "https://casesearch.cookcountyclerkofcourt.org/CivilCaseSearchAPI.aspx"

def __init__(self, year=2022, **kwargs):
def __init__(self, year=2023, **kwargs):
self.case_type = CASE_FORMAT
super().__init__(**kwargs)

Expand Down
2 changes: 1 addition & 1 deletion courtscraper/spiders/civil.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class CivilSpider(CourtSpiderBase):
name = "civil"
url = "https://casesearch.cookcountyclerkofcourt.org/CivilCaseSearchAPI.aspx"

def __init__(self, division="2", year=2022, **kwargs):
def __init__(self, division="2", year=2023, **kwargs):
self.case_type = DIVISIONS[division]
super().__init__(**kwargs)

Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ datasette
csvs-to-sqlite
sqlite-utils
csvkit
sqlean.py
2 changes: 1 addition & 1 deletion scripts/nightly_civil_start.sql
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ WITH serials AS (
court_case
WHERE
court = 'civil'
AND subdivision = ':subdivision'
AND subdivision = :subdivision /* noqa */
AND substr(case_number, 1, 4) = strftime('%Y', current_timestamp)
)

Expand Down

0 comments on commit d96d754

Please sign in to comment.