diff --git a/.github/actions/setup_dbt/action.yaml b/.github/actions/setup_dbt/action.yaml new file mode 100644 index 000000000..8d314f4ec --- /dev/null +++ b/.github/actions/setup_dbt/action.yaml @@ -0,0 +1,23 @@ +name: Setup dbt +description: Install dependencies for dbt and configure it for use in CI. +inputs: + role-to-assume: + description: AWS IAM role to assume when running dbt operations. + required: true +runs: + using: composite + steps: + - name: Install dbt requirements + uses: ./.github/actions/install_dbt_requirements + + - name: Load environment variables + uses: ./.github/actions/load_environment_variables + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v2 + with: + role-to-assume: ${{ inputs.role-to-assume }} + aws-region: us-east-1 + + - name: Configure dbt environment + uses: ./.github/actions/configure_dbt_environment diff --git a/.github/workflows/build_and_test_dbt.yaml b/.github/workflows/build_and_test_dbt.yaml index fe55770b0..8d89b1e21 100644 --- a/.github/workflows/build_and_test_dbt.yaml +++ b/.github/workflows/build_and_test_dbt.yaml @@ -18,20 +18,10 @@ jobs: - name: Checkout uses: actions/checkout@v3 - - name: Install dbt requirements - uses: ./.github/actions/install_dbt_requirements - - - name: Load environment variables - uses: ./.github/actions/load_environment_variables - - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v2 + - name: Setup dbt + uses: ./.github/actions/setup_dbt with: role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }} - aws-region: us-east-1 - - - name: Configure dbt environment - uses: ./.github/actions/configure_dbt_environment - name: Restore dbt state cache id: cache diff --git a/.github/workflows/cleanup_dbt_resources.yaml b/.github/workflows/cleanup_dbt_resources.yaml index 596982031..810e0e6ce 100644 --- a/.github/workflows/cleanup_dbt_resources.yaml +++ b/.github/workflows/cleanup_dbt_resources.yaml @@ -17,25 +17,15 @@ jobs: - name: Checkout uses: actions/checkout@v3 - - name: Install dbt requirements - uses: ./.github/actions/install_dbt_requirements + - name: Setup dbt + uses: ./.github/actions/setup_dbt + with: + role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }} - name: Install requirements for cleaning up dbt resources run: sudo apt-get update && sudo apt-get install jq shell: bash - - name: Load environment variables - uses: ./.github/actions/load_environment_variables - - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v2 - with: - role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }} - aws-region: us-east-1 - - - name: Configure dbt environment - uses: ./.github/actions/configure_dbt_environment - - name: Clean up dbt resources run: ../.github/scripts/cleanup_dbt_resources.sh ci working-directory: ${{ env.PROJECT_DIR }} diff --git a/.github/workflows/deploy_dbt_docs.yaml b/.github/workflows/deploy_dbt_docs.yaml index 13c20e2a9..6463ccb33 100644 --- a/.github/workflows/deploy_dbt_docs.yaml +++ b/.github/workflows/deploy_dbt_docs.yaml @@ -19,20 +19,10 @@ jobs: - name: Checkout uses: actions/checkout@v3 - - name: Install dbt requirements - uses: ./.github/actions/install_dbt_requirements - - - name: Load environment variables - uses: ./.github/actions/load_environment_variables - - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v2 + - name: Setup dbt + uses: ./.github/actions/setup_dbt with: role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }} - aws-region: us-east-1 - - - name: Configure dbt environment - uses: ./.github/actions/configure_dbt_environment - name: Generate docs run: dbt docs generate --target "$TARGET" diff --git a/.github/workflows/test_dbt_models.yaml b/.github/workflows/test_dbt_models.yaml index 2db0f01d4..db4c4eb9a 100644 --- a/.github/workflows/test_dbt_models.yaml +++ b/.github/workflows/test_dbt_models.yaml @@ -14,20 +14,10 @@ jobs: - name: Checkout uses: actions/checkout@v3 - - name: Install dbt requirements - uses: ./.github/actions/install_dbt_requirements - - - name: Load environment variables - uses: ./.github/actions/load_environment_variables - - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v2 + - name: Setup dbt + uses: ./.github/actions/setup_dbt with: role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }} - aws-region: us-east-1 - - - name: Configure dbt environment - uses: ./.github/actions/configure_dbt_environment - name: Test models run: dbt test --target "$TARGET" diff --git a/.github/workflows/test_dbt_source_freshness.yaml b/.github/workflows/test_dbt_source_freshness.yaml new file mode 100644 index 000000000..80cc8d30f --- /dev/null +++ b/.github/workflows/test_dbt_source_freshness.yaml @@ -0,0 +1,28 @@ +name: test-dbt-source-freshness + +on: + schedule: + # Every day at 9am CST (3pm UTC) + - cron: '0 15 * * *' + +jobs: + test-dbt-source-freshness: + runs-on: ubuntu-latest + # These permissions are needed to interact with GitHub's OIDC Token endpoint + # so that we can authenticate with AWS + permissions: + id-token: write + contents: read + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Setup dbt + uses: ./.github/actions/setup_dbt + with: + role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }} + + - name: Test source freshness + run: dbt source freshness --target "$TARGET" + working-directory: ${{ env.PROJECT_DIR }} + shell: bash diff --git a/aws-athena/views/reporting-vw_most_recent_boundary.sql b/aws-athena/views/reporting-vw_most_recent_boundary.sql new file mode 100644 index 000000000..2c149fd15 --- /dev/null +++ b/aws-athena/views/reporting-vw_most_recent_boundary.sql @@ -0,0 +1,168 @@ +-- View that always contains the most recent political boundaries the +-- Data Department has ingested, by 10-digit parcel + +WITH parcel AS ( + SELECT + parcel.pin10, + parcel.town_code AS township_code, + parcel.year AS parcel_year, + ST_POINT(parcel.x_3435, parcel.y_3435) AS geom + FROM {{ source('spatial', 'parcel') }} AS parcel + WHERE parcel.year + = ( + SELECT MAX(parcel.year) + FROM {{ source('spatial', 'parcel') }} AS parcel + ) + AND parcel.town_code IS NOT NULL +), + +municipality AS ( + SELECT + municipality.municipality_name, + municipality.municipality_num, + municipality.year AS municipality_year, + ST_GEOMFROMBINARY(municipality.geometry_3435) AS geom + FROM {{ source('spatial', 'municipality') }} AS municipality + WHERE municipality.year + = ( + SELECT MAX(municipality.year) + FROM {{ source('spatial', 'municipality') }} AS municipality + ) +), + +ward_chicago AS ( + SELECT + ward_chicago.ward_chicago_name, + ward_chicago.ward_chicago_num, + ward_chicago.year AS ward_chicago_year, + ST_GEOMFROMBINARY(ward_chicago.geometry_3435) AS geom + FROM {{ source('spatial', 'ward_chicago') }} AS ward_chicago + WHERE ward_chicago.year + = ( + SELECT MAX(ward_chicago.year) + FROM {{ source('spatial', 'ward_chicago') }} AS ward_chicago + ) +), + +ward_evanston AS ( + SELECT + ward_evanston.ward_evanston_name, + ward_evanston.ward_evanston_num, + ward_evanston.year AS ward_evanston_year, + ST_GEOMFROMBINARY(ward_evanston.geometry_3435) AS geom + FROM {{ source('spatial', 'ward_evanston') }} AS ward_evanston + WHERE ward_evanston.year + = ( + SELECT MAX(ward_evanston.year) + FROM {{ source('spatial', 'ward_evanston') }} AS ward_evanston + ) +), + +community_area AS ( + SELECT + community_area.community AS community_area_name, + community_area.area_number AS community_area_num, + community_area.year AS community_area_year, + ST_GEOMFROMBINARY(community_area.geometry_3435) AS geom + FROM {{ source('spatial', 'community_area') }} AS community_area + WHERE community_area.year + = ( + SELECT MAX(community_area.year) + FROM {{ source('spatial', 'community_area') }} AS community_area + ) +), + +commissioner_district AS ( + SELECT + commissioner_district.commissioner_district_name, + commissioner_district.commissioner_district_num, + commissioner_district.year AS commissioner_district_year, + ST_GEOMFROMBINARY(commissioner_district.geometry_3435) AS geom + FROM + {{ source('spatial', 'commissioner_district') }} + AS commissioner_district + WHERE commissioner_district.year + = ( + SELECT MAX(commissioner_district.year) + FROM + {{ source('spatial', 'commissioner_district') }} + AS commissioner_district + ) +), + +state_representative_district AS ( + SELECT + state_representative_district.state_representative_district_name, + state_representative_district.state_representative_district_num, + state_representative_district.year + AS state_representative_district_year, + ST_GEOMFROMBINARY(state_representative_district.geometry_3435) AS geom + FROM + {{ source('spatial', 'state_representative_district') }} + AS state_representative_district + WHERE state_representative_district.year + = ( + SELECT MAX(state_representative_district.year) + FROM + {{ source('spatial', 'state_representative_district') }} + AS state_representative_district + ) +), + +state_senate_district AS ( + SELECT + state_senate_district.state_senate_district_name, + state_senate_district.state_senate_district_num, + state_senate_district.year AS state_senate_district_year, + ST_GEOMFROMBINARY(state_senate_district.geometry_3435) AS geom + FROM + {{ source('spatial', 'state_senate_district') }} + AS state_senate_district + WHERE state_senate_district.year + = ( + SELECT MAX(state_senate_district.year) + FROM + {{ source('spatial', 'state_senate_district') }} + AS state_senate_district + ) +) + +SELECT + parcel.pin10, + parcel.township_code, + parcel.parcel_year, + municipality.municipality_name, + municipality.municipality_num, + municipality.municipality_year, + ward_chicago.ward_chicago_name, + ward_chicago.ward_chicago_num, + ward_chicago.ward_chicago_year, + ward_evanston.ward_evanston_name, + ward_evanston.ward_evanston_num, + ward_evanston.ward_evanston_year, + community_area.community_area_name, + community_area.community_area_num, + community_area.community_area_year, + commissioner_district.commissioner_district_name, + commissioner_district.commissioner_district_num, + commissioner_district.commissioner_district_year, + state_representative_district.state_representative_district_name, + state_representative_district.state_representative_district_num, + state_representative_district.state_representative_district_year, + state_senate_district.state_senate_district_name, + state_senate_district.state_senate_district_num, + state_senate_district.state_senate_district_year +FROM parcel +LEFT JOIN municipality ON ST_WITHIN(parcel.geom, municipality.geom) +LEFT JOIN ward_chicago ON ST_WITHIN(parcel.geom, ward_chicago.geom) +LEFT JOIN ward_evanston ON ST_WITHIN(parcel.geom, ward_evanston.geom) +LEFT JOIN community_area ON ST_WITHIN(parcel.geom, community_area.geom) +LEFT JOIN + commissioner_district + ON ST_WITHIN(parcel.geom, commissioner_district.geom) +LEFT JOIN + state_representative_district + ON ST_WITHIN(parcel.geom, state_representative_district.geom) +LEFT JOIN + state_senate_district + ON ST_WITHIN(parcel.geom, state_senate_district.geom) diff --git a/dbt/models/default/schema.yml b/dbt/models/default/schema.yml index 1466b63f3..c6d444fb2 100644 --- a/dbt/models/default/schema.yml +++ b/dbt/models/default/schema.yml @@ -115,7 +115,7 @@ models: - mail_address_zipcode_1 - mail_address_zipcode_2 config: - error_if: ">880607" + error_if: ">880615" # TODO: Mailing address changes after validated sale(?) # TODO: Site addresses are all in Cook County - name: default.vw_pin_condo_char diff --git a/dbt/models/iasworld/schema.yml b/dbt/models/iasworld/schema.yml index afdd9722f..19f112d03 100644 --- a/dbt/models/iasworld/schema.yml +++ b/dbt/models/iasworld/schema.yml @@ -3,12 +3,17 @@ version: 2 sources: - name: iasworld + loaded_at_field: date_parse(wen, '%Y-%m-%d %H:%i:%s.0') tables: - name: aasysjur - name: addn - name: addrindx - name: aprval - name: asmt_all + freshness: + filter: &latest_taxyr taxyr >= date_format(current_date - interval '1' year, '%Y') + warn_after: &24_hours {count: 24, period: hour} + error_after: &48_hours {count: 48, period: hour} - name: asmt_hist - name: cname - name: comdat @@ -26,6 +31,10 @@ sources: - name: htagnt - name: htdates - name: htpar + freshness: + filter: *latest_taxyr + warn_after: *24_hours + error_after: *48_hours - name: land - name: legdat - name: lpmod @@ -34,6 +43,10 @@ sources: - name: owndat - name: pardat - name: permit + freshness: + filter: date_format(date_parse(permdt, '%Y-%m-%d %H:%i:%s.0'), '%Y') >= date_format(current_date - interval '1' year, '%Y') + warn_after: *48_hours + error_after: &72_hours {count: 72, period: hour} - name: rcoby - name: sales - name: splcom diff --git a/dbt/models/reporting/reporting.vw_most_recent_boundary.sql b/dbt/models/reporting/reporting.vw_most_recent_boundary.sql new file mode 120000 index 000000000..277ac60f9 --- /dev/null +++ b/dbt/models/reporting/reporting.vw_most_recent_boundary.sql @@ -0,0 +1 @@ +../../../aws-athena/views/reporting-vw_most_recent_boundary.sql \ No newline at end of file