diff --git a/aws-athena/views/default-vw_card_res_char.sql b/aws-athena/views/default-vw_card_res_char.sql index 3bebcfe1c..7144d2ace 100644 --- a/aws-athena/views/default-vw_card_res_char.sql +++ b/aws-athena/views/default-vw_card_res_char.sql @@ -12,13 +12,12 @@ WITH multicodes AS ( aggregate_land AS ( SELECT - parid, - taxyr, - COALESCE(COUNT(*) > 1, FALSE) AS pin_is_multiland, - COUNT(*) AS pin_num_landlines, - SUM(sf) AS total_land_sf - FROM {{ source('iasworld', 'land') }} - GROUP BY parid, taxyr + pin AS parid, + year AS taxyr, + COALESCE(num_landlines > 1, FALSE) AS pin_is_multiland, + num_landlines AS pin_num_landlines, + sf AS total_land_sf + FROM {{ ref('default.vw_pin_land') }} ), townships AS ( diff --git a/aws-athena/views/default-vw_pin_condo_char.sql b/aws-athena/views/default-vw_pin_condo_char.sql index c1265861b..6d194ea9a 100644 --- a/aws-athena/views/default-vw_pin_condo_char.sql +++ b/aws-athena/views/default-vw_pin_condo_char.sql @@ -11,13 +11,12 @@ workbooks rather than iasWorld. WITH aggregate_land AS ( SELECT - parid, - taxyr, - COALESCE(COUNT(*) > 1, FALSE) AS pin_is_multiland, - COUNT(*) AS pin_num_landlines, - SUM(sf) AS total_building_land_sf - FROM {{ source('iasworld', 'land') }} - GROUP BY parid, taxyr + pin AS parid, + year AS taxyr, + COALESCE(num_landlines > 1, FALSE) AS pin_is_multiland, + num_landlines AS pin_num_landlines, + sf AS total_building_land_sf + FROM {{ ref('default.vw_pin_land') }} ), -- Valuations-provided PINs that shouldn't be considered parking spaces diff --git a/aws-athena/views/default-vw_pin_land.sql b/aws-athena/views/default-vw_pin_land.sql new file mode 100644 index 000000000..687696536 --- /dev/null +++ b/aws-athena/views/default-vw_pin_land.sql @@ -0,0 +1,46 @@ +-- A view to properly aggregate land square footage at the PIN level. Parcels +-- can have multiple land lines that are sometimes summed or ignored. +WITH total_influ AS ( + SELECT + land.parid, + land.taxyr, + land.lline, + COUNT(*) + OVER (PARTITION BY land.parid, land.taxyr) + AS num_landlines, + SUM(land.sf) OVER (PARTITION BY land.parid, land.taxyr) AS sf_sum, + -- We explicitly want to take the top line land sf if we're only taking + -- one line. + FIRST_VALUE(land.sf) + OVER (PARTITION BY land.parid, land.taxyr ORDER BY land.lline) + AS sf_top, + SUM(CASE WHEN land.influ IS NULL THEN 0 ELSE 1 END) + OVER (PARTITION BY land.parid, land.taxyr) + AS non_null_influ, + MAX(land.sf) OVER (PARTITION BY land.parid, land.taxyr) AS max_sf, + MIN(land.sf) OVER (PARTITION BY land.parid, land.taxyr) AS min_sf, + -- When the first landline for a pin is deactived we should take the + -- minimum value of lline as the top line. + MIN(land.lline) OVER (PARTITION BY land.parid, land.taxyr) AS top_line + FROM {{ source('iasworld', 'land') }} AS land + WHERE + land.cur = 'Y' + AND land.deactivat IS NULL +) + +SELECT + total_influ.parid AS pin, + total_influ.taxyr AS year, + total_influ.num_landlines, + CASE + -- When there are multiple non-null values for influ across land lines + -- and all sf values are the same, we choose the topline land sf, + -- otherwise we sum land sf. + WHEN + total_influ.non_null_influ > 1 + AND total_influ.max_sf = total_influ.min_sf + THEN total_influ.sf_top + ELSE total_influ.sf_sum + END AS sf +FROM total_influ +WHERE total_influ.lline = total_influ.top_line diff --git a/aws-athena/views/reporting-vw_res_report_summary.sql b/aws-athena/views/reporting-vw_res_report_summary.sql index c966de647..09abf4c80 100644 --- a/aws-athena/views/reporting-vw_res_report_summary.sql +++ b/aws-athena/views/reporting-vw_res_report_summary.sql @@ -190,13 +190,13 @@ sales AS ( -- Aggregate land for all parcels aggregate_land AS ( SELECT - parid, - taxyr, - SUM(sf) AS total_land_sf - FROM {{ source('iasworld', 'land') }} - GROUP BY parid, taxyr + pin AS parid, + year AS taxyr, + sf AS total_land_sf + FROM {{ ref('default.vw_pin_land') }} ), + -- Combined SF/MF and condo characteristics chars AS ( SELECT diff --git a/dbt/models/default/default.vw_pin_land.sql b/dbt/models/default/default.vw_pin_land.sql new file mode 120000 index 000000000..286144d69 --- /dev/null +++ b/dbt/models/default/default.vw_pin_land.sql @@ -0,0 +1 @@ +../../../aws-athena/views/default-vw_pin_land.sql \ No newline at end of file diff --git a/dbt/models/default/docs.md b/dbt/models/default/docs.md index 4046579f8..5f3609efd 100644 --- a/dbt/models/default/docs.md +++ b/dbt/models/default/docs.md @@ -98,3 +98,7 @@ Source of truth view for PIN location. for current year can be relatively sparse * `spatial.township` is not yearly {% enddocs %} + +{% docs vw_pin_land %} +View containing aggregated land square footage. +{% enddocs %} \ No newline at end of file diff --git a/dbt/models/default/schema.yml b/dbt/models/default/schema.yml index c6d444fb2..2ac482dbf 100644 --- a/dbt/models/default/schema.yml +++ b/dbt/models/default/schema.yml @@ -89,7 +89,7 @@ models: - char_renovation - char_recent_renovation config: - error_if: ">73941" + error_if: ">73942" # TODO: Characteristics columns should adhere to pre-determined criteria - name: default.vw_pin_address description: '{{ doc("vw_pin_address") }}' @@ -135,6 +135,8 @@ models: # 10-digit PIN # TODO: Sum of proration rate never exceeds 1 per 10-digit PIN # TODO: Characteristics columns should adhere to pre-determined criteria + - name: default.vw_pin_land + description: '{{ doc("vw_pin_land") }}' - name: default.vw_pin_sale description: '{{ doc("vw_pin_sale") }}' columns: