From bd4bc1769fe33fdba1dbe827791b5c41389cf6ec Mon Sep 17 00:00:00 2001 From: William Ridgeway <10358980+wrridgeway@users.noreply.github.com> Date: Fri, 3 May 2024 18:10:10 -0500 Subject: [PATCH] Reporting views for municipalities (#413) * Initial commit * Typo * Typos, add temporary test error tolerance * Test 2024 muni filling * Test legdat as source of muni names * Correct uniqueness test since munis can cross townships * Use legdat for municipality * Typo * Add stage portion column * Switch portion construction to double from int * Remove triad from aggregatation groups * Clean up leg.cityname * Typo * More cityname cleaning * More cityname cleaning * Try location.tax method for muni names * Make missing muni names NULL * Remove blank munis * Update test failure threshold * Test pin count outer join * Typo * Muni reporting view functioning as intended * Commenting * Commenting * Typo * Commenting * Remove test allowance thresholds * Commenting * Commenting * Commenting * Commenting * Commenting * Commenting * Update dbt/models/reporting/docs.md Co-authored-by: Dan Snow <31494343+dfsnow@users.noreply.github.com> * Improve view column names * Commenting --------- Co-authored-by: Dan Snow <31494343+dfsnow@users.noreply.github.com> --- .../reporting-vw_assessment_roll_muni.sql | 118 ++++++++++++++++++ .../views/reporting-vw_pin_township_class.sql | 13 ++ dbt/models/reporting/docs.md | 9 ++ .../reporting.vw_assessment_roll_muni.sql | 1 + dbt/models/reporting/schema.yml | 17 +++ 5 files changed, 158 insertions(+) create mode 100644 aws-athena/views/reporting-vw_assessment_roll_muni.sql create mode 120000 dbt/models/reporting/reporting.vw_assessment_roll_muni.sql diff --git a/aws-athena/views/reporting-vw_assessment_roll_muni.sql b/aws-athena/views/reporting-vw_assessment_roll_muni.sql new file mode 100644 index 000000000..0d6a42858 --- /dev/null +++ b/aws-athena/views/reporting-vw_assessment_roll_muni.sql @@ -0,0 +1,118 @@ +-- Gathers AVs by year, major class, assessment stage, and municipality for +-- reporting + +/* Ensure every municipality/class/year has a row for every stage through +cross-joining. This is to make sure that combinations that do not yet +exist in iasworld.asmt_all for the current year will exist in the view, but have +largely empty columns. For example: even if no class 4s in the City of Chicago +have been mailed yet for the current assessment year, we would still like an +empty City of Chicago/class 4 row to exist for the mailed stage. */ +WITH stages AS ( + + SELECT 'mailed' AS stage + UNION + SELECT 'assessor certified' AS stage + UNION + SELECT 'bor certified' AS stage + +), + +/* This CTE removes historical PINs in reporting.vw_pin_township_class that are +not in reporting.vw_pin_value_long. We do this to make sure the samples we +derive the numerator and denominator from for pct_pin_w_value_in_group are the +same. These differences are inherent in the tables that feed these views +(iasworld.pardat and iasworld.asmt_all, respectively) and are data errors - not +emblematic of what portion of a municipality has actually progressed through an +assessment stage. + +It does NOT remove PINs from the most recent year of +reporting.vw_pin_township_class since we expect differences based on how +iasworld.asmt_all is populated through the year as the assessment cycle +progresses. This means data errors caused by differences between iasworld.pardat +and iasworld.asmt_all won't be addressed in the most recent year. Unfortunately, +we can't know what those errors are (or if they even exist) until asmt_all has +at least one fully complete stage for a given year. + +Starting in 2020 a small number of PINs are present in iasworld.asmt_all for +one or two but not all three stages of assessment when we would expect all three +stages to be present for said PINs. This is also a data error, but is NOT +addressed in this view and leads to a few instances where +pct_pin_w_value_in_group ends up being less than 1 when it should equal 1. +16-07-219-029-1032 missing a mailed value but having CCAO and BOR certified +values in 2021 is an example. */ +trimmed_town_class AS ( + SELECT vptc.* + FROM {{ ref('reporting.vw_pin_township_class') }} AS vptc + LEFT JOIN + ( + SELECT DISTINCT + pin, + year + FROM {{ ref('reporting.vw_pin_value_long') }} + ) + AS pins + ON vptc.pin = pins.pin + AND vptc.year = pins.year + WHERE pins.pin IS NOT NULL + OR vptc.year + = (SELECT MAX(year) FROM {{ ref('reporting.vw_pin_township_class') }}) + +), + +/* Calculate the denominator for the pct_pin_w_value_in_group column. +reporting.vw_pin_township_class serves as the universe of yearly PINs we expect +to see in reporting.vw_pin_value_long. */ +pin_counts AS ( + SELECT + vptc.municipality_name, + vptc.major_class, + vptc.year, + stages.stage, + COUNT(*) AS total_n + FROM trimmed_town_class AS vptc + CROSS JOIN stages + WHERE vptc.municipality_name IS NOT NULL + GROUP BY + vptc.municipality_name, + vptc.year, + vptc.major_class, + stages.stage +) + +-- Calculate total and median values by municipality, as well as the portion of +-- each municipality that has progressed through an assessment stage by class. +SELECT + pin_counts.year, + pin_counts.stage, + pin_counts.municipality_name, + munis.major_class AS class, + SUM(CAST(vpvl.pin IS NOT NULL AS INT)) AS num_pin_w_value, + pin_counts.total_n AS num_pin_total_in_group, + SUM(CAST(vpvl.pin IS NOT NULL AS DOUBLE)) + / CAST(pin_counts.total_n AS DOUBLE) AS pct_pin_w_value_in_group, + SUM(vpvl.bldg) AS bldg_sum, + CAST(APPROX_PERCENTILE(vpvl.bldg, 0.5) AS INT) AS bldg_median, + SUM(vpvl.land) AS land_sum, + CAST(APPROX_PERCENTILE(vpvl.land, 0.5) AS INT) AS land_median, + SUM(vpvl.tot) AS tot_sum, + CAST(APPROX_PERCENTILE(vpvl.tot, 0.5) AS INT) AS tot_median +FROM pin_counts +LEFT JOIN trimmed_town_class AS munis + ON pin_counts.municipality_name = munis.municipality_name + AND pin_counts.major_class = munis.major_class + AND pin_counts.year = munis.year +LEFT JOIN {{ ref('reporting.vw_pin_value_long') }} AS vpvl + ON munis.pin = vpvl.pin + AND munis.year = vpvl.year + AND pin_counts.stage = LOWER(vpvl.stage_name) +GROUP BY + pin_counts.municipality_name, + pin_counts.year, + munis.major_class, + pin_counts.stage, + pin_counts.total_n +ORDER BY + pin_counts.year DESC, + pin_counts.municipality_name ASC, + pin_counts.stage ASC, + munis.major_class ASC diff --git a/aws-athena/views/reporting-vw_pin_township_class.sql b/aws-athena/views/reporting-vw_pin_township_class.sql index ee3aaffa5..612d31784 100644 --- a/aws-athena/views/reporting-vw_pin_township_class.sql +++ b/aws-athena/views/reporting-vw_pin_township_class.sql @@ -19,6 +19,10 @@ SELECT town.township_name, leg.user1 AS township_code, SUBSTR(correct.nbhd, 3, 3) AS nbhd, + CASE + WHEN ARRAY_JOIN(tax.tax_municipality_name, ', ') = '' THEN NULL ELSE + ARRAY_JOIN(tax.tax_municipality_name, ', ') + END AS municipality_name, correct.class, groups.reporting_class_code AS major_class, groups.modeling_group AS property_group, @@ -48,5 +52,14 @@ LEFT JOIN {{ source('spatial', 'township') }} AS town -- Exclude classes without a reporting class INNER JOIN {{ ref('ccao.class_dict') }} AS groups ON correct.class = groups.class_code +-- Tax municipality data lags iasWorld data by a year or two at any given time +LEFT JOIN {{ ref('location.tax') }} AS tax + ON SUBSTR(correct.parid, 1, 10) = tax.pin10 + AND CASE + WHEN + correct.taxyr > (SELECT MAX(year) FROM {{ ref('location.tax') }}) + THEN (SELECT MAX(year) FROM {{ ref('location.tax') }}) + ELSE correct.taxyr + END = tax.year WHERE correct.cur = 'Y' AND correct.deactivat IS NULL diff --git a/dbt/models/reporting/docs.md b/dbt/models/reporting/docs.md index 57330d4ba..58d2bbcd7 100644 --- a/dbt/models/reporting/docs.md +++ b/dbt/models/reporting/docs.md @@ -30,6 +30,15 @@ assessment stage, and year. Feeds public reporting assets. **Primary Key**: `year`, `township_name`, `class`, `stage` {% enddocs %} +# vw_assessment_roll_muni + +{% docs view_vw_assessment_roll_muni %} +View for reporting total AVs and PIN counts per major class group, municipality, +assessment stage, and year. Feeds public reporting assets. + +**Primary Key**: `year`, `municipality_name`, `class`, `stage` +{% enddocs %} + # vw_pin_most_recent_boundary {% docs view_vw_pin_most_recent_boundary %} diff --git a/dbt/models/reporting/reporting.vw_assessment_roll_muni.sql b/dbt/models/reporting/reporting.vw_assessment_roll_muni.sql new file mode 120000 index 000000000..d9c998128 --- /dev/null +++ b/dbt/models/reporting/reporting.vw_assessment_roll_muni.sql @@ -0,0 +1 @@ +../../../aws-athena/views/reporting-vw_assessment_roll_muni.sql \ No newline at end of file diff --git a/dbt/models/reporting/schema.yml b/dbt/models/reporting/schema.yml index efef1e78a..becb94d8e 100644 --- a/dbt/models/reporting/schema.yml +++ b/dbt/models/reporting/schema.yml @@ -84,6 +84,23 @@ models: AND triad IS NOT NULL AND class IS NOT NULL + - name: reporting.vw_assessment_roll_muni + description: '{{ doc("view_vw_assessment_roll_muni") }}' + tests: + - unique_combination_of_columns: + name: reporting_vw_assessment_roll_muni_unique_by_keys + combination_of_columns: + - year + - stage + - municipality_name + - class + - expression_is_true: + name: reporting_vw_assessment_roll_muni_no_nulls + expression: | + stage IS NOT NULL + AND municipality_name IS NOT NULL + AND class IS NOT NULL + - name: reporting.vw_pin_most_recent_boundary description: '{{ doc("view_vw_pin_most_recent_boundary") }}' tests: