diff --git a/aws-athena/ctas/location-access.sql b/aws-athena/ctas/location-access.sql index af170b470..b39cdcfec 100644 --- a/aws-athena/ctas/location-access.sql +++ b/aws-athena/ctas/location-access.sql @@ -7,71 +7,67 @@ ) }} -WITH access AS ( - WITH distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435 - FROM {{ source('spatial', 'parcel') }} - ), +WITH distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435 + FROM {{ source('spatial', 'parcel') }} +), - distinct_years AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'parcel') }} - ), +distinct_years AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'parcel') }} +), - distinct_years_rhs AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'walkability') }} - ), +distinct_years_rhs AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'walkability') }} +), - walkability AS ( +walkability AS ( + SELECT + dp.x_3435, + dp.y_3435, + CAST(CAST(MAX(cprod.walk_num) AS BIGINT) AS VARCHAR) + AS access_cmap_walk_id, + MAX(cprod.nta_score) AS access_cmap_walk_nta_score, + MAX(cprod.total_score) AS access_cmap_walk_total_score, + MAX(cprod.year) AS access_cmap_walk_data_year, + cprod.pin_year + FROM distinct_pins AS dp + LEFT JOIN ( SELECT - dp.x_3435, - dp.y_3435, - CAST(CAST(MAX(cprod.walk_num) AS BIGINT) AS VARCHAR) - AS access_cmap_walk_id, - MAX(cprod.nta_score) AS access_cmap_walk_nta_score, - MAX(cprod.total_score) AS access_cmap_walk_total_score, - MAX(cprod.year) AS access_cmap_walk_data_year, - cprod.pin_year - FROM distinct_pins AS dp - LEFT JOIN ( + fill_years.pin_year, + fill_data.* + FROM ( SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'walkability') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN {{ source('spatial', 'walkability') }} AS fill_data - ON fill_years.fill_year = fill_data.year - ) AS cprod - ON ST_WITHIN( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(cprod.geometry_3435) - ) - GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year - ) - - SELECT - pcl.pin10, - walk.access_cmap_walk_id, - walk.access_cmap_walk_nta_score, - walk.access_cmap_walk_total_score, - walk.access_cmap_walk_data_year, - pcl.year - FROM {{ source('spatial', 'parcel') }} AS pcl - LEFT JOIN walkability AS walk - ON pcl.x_3435 = walk.x_3435 - AND pcl.y_3435 = walk.y_3435 - AND pcl.year = walk.pin_year - WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs) + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'walkability') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN {{ source('spatial', 'walkability') }} AS fill_data + ON fill_years.fill_year = fill_data.year + ) AS cprod + ON ST_WITHIN( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(cprod.geometry_3435) + ) + GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year ) -SELECT * FROM access +SELECT + pcl.pin10, + walk.access_cmap_walk_id, + walk.access_cmap_walk_nta_score, + walk.access_cmap_walk_total_score, + walk.access_cmap_walk_data_year, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +LEFT JOIN walkability AS walk + ON pcl.x_3435 = walk.x_3435 + AND pcl.y_3435 = walk.y_3435 + AND pcl.year = walk.pin_year +WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs) diff --git a/aws-athena/ctas/location-census.sql b/aws-athena/ctas/location-census.sql index 0e9d2c02c..e6bda679e 100644 --- a/aws-athena/ctas/location-census.sql +++ b/aws-athena/ctas/location-census.sql @@ -7,95 +7,91 @@ ) }} -WITH census AS ( - WITH distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435 - FROM {{ source('spatial', 'parcel') }} - ), +WITH distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435 + FROM {{ source('spatial', 'parcel') }} +), - distinct_years_rhs AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'census') }} - ), - - distinct_joined AS ( - SELECT - dp.x_3435, - dp.y_3435, - MAX(CASE - WHEN cen.geography = 'block_group' THEN cen.geoid - END) AS census_block_group_geoid, - MAX(CASE - WHEN cen.geography = 'block' THEN cen.geoid - END) AS census_block_geoid, - MAX(CASE - WHEN cen.geography = 'congressional_district' THEN cen.geoid - END) AS census_congressional_district_geoid, - MAX(CASE - WHEN cen.geography = 'county_subdivision' THEN cen.geoid - END) AS census_county_subdivision_geoid, - MAX(CASE - WHEN cen.geography = 'place' THEN cen.geoid - END) AS census_place_geoid, - MAX(CASE - WHEN cen.geography = 'puma' THEN cen.geoid - END) AS census_puma_geoid, - MAX(CASE - WHEN cen.geography = 'school_district_elementary' THEN cen.geoid - END) AS census_school_district_elementary_geoid, - MAX(CASE - WHEN cen.geography = 'school_district_secondary' THEN cen.geoid - END) AS census_school_district_secondary_geoid, - MAX(CASE - WHEN cen.geography = 'school_district_unified' THEN cen.geoid - END) AS census_school_district_unified_geoid, - MAX(CASE - WHEN cen.geography = 'state_representative' THEN cen.geoid - END) AS census_state_representative_geoid, - MAX(CASE - WHEN cen.geography = 'state_senate' THEN cen.geoid - END) AS census_state_senate_geoid, - MAX(CASE - WHEN cen.geography = 'tract' THEN cen.geoid - END) AS census_tract_geoid, - MAX(CASE - WHEN cen.geography = 'zcta' THEN cen.geoid - END) AS census_zcta_geoid, - cen.year - FROM distinct_pins AS dp - LEFT JOIN {{ source('spatial', 'census') }} AS cen - ON ST_WITHIN( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(cen.geometry_3435) - ) - GROUP BY dp.x_3435, dp.y_3435, cen.year - ) +distinct_years_rhs AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'census') }} +), +distinct_joined AS ( SELECT - pcl.pin10, - dj.census_block_group_geoid, - dj.census_block_geoid, - dj.census_congressional_district_geoid, - dj.census_county_subdivision_geoid, - dj.census_place_geoid, - dj.census_puma_geoid, - dj.census_school_district_elementary_geoid, - dj.census_school_district_secondary_geoid, - dj.census_school_district_unified_geoid, - dj.census_state_representative_geoid, - dj.census_state_senate_geoid, - dj.census_tract_geoid, - dj.census_zcta_geoid, - dj.year AS census_data_year, - pcl.year - FROM {{ source('spatial', 'parcel') }} AS pcl - LEFT JOIN distinct_joined AS dj - ON pcl.year = dj.year - AND pcl.x_3435 = dj.x_3435 - AND pcl.y_3435 = dj.y_3435 - WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs) + dp.x_3435, + dp.y_3435, + MAX(CASE + WHEN cen.geography = 'block_group' THEN cen.geoid + END) AS census_block_group_geoid, + MAX(CASE + WHEN cen.geography = 'block' THEN cen.geoid + END) AS census_block_geoid, + MAX(CASE + WHEN cen.geography = 'congressional_district' THEN cen.geoid + END) AS census_congressional_district_geoid, + MAX(CASE + WHEN cen.geography = 'county_subdivision' THEN cen.geoid + END) AS census_county_subdivision_geoid, + MAX(CASE + WHEN cen.geography = 'place' THEN cen.geoid + END) AS census_place_geoid, + MAX(CASE + WHEN cen.geography = 'puma' THEN cen.geoid + END) AS census_puma_geoid, + MAX(CASE + WHEN cen.geography = 'school_district_elementary' THEN cen.geoid + END) AS census_school_district_elementary_geoid, + MAX(CASE + WHEN cen.geography = 'school_district_secondary' THEN cen.geoid + END) AS census_school_district_secondary_geoid, + MAX(CASE + WHEN cen.geography = 'school_district_unified' THEN cen.geoid + END) AS census_school_district_unified_geoid, + MAX(CASE + WHEN cen.geography = 'state_representative' THEN cen.geoid + END) AS census_state_representative_geoid, + MAX(CASE + WHEN cen.geography = 'state_senate' THEN cen.geoid + END) AS census_state_senate_geoid, + MAX(CASE + WHEN cen.geography = 'tract' THEN cen.geoid + END) AS census_tract_geoid, + MAX(CASE + WHEN cen.geography = 'zcta' THEN cen.geoid + END) AS census_zcta_geoid, + cen.year + FROM distinct_pins AS dp + LEFT JOIN {{ source('spatial', 'census') }} AS cen + ON ST_WITHIN( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(cen.geometry_3435) + ) + GROUP BY dp.x_3435, dp.y_3435, cen.year ) -SELECT * FROM census +SELECT + pcl.pin10, + dj.census_block_group_geoid, + dj.census_block_geoid, + dj.census_congressional_district_geoid, + dj.census_county_subdivision_geoid, + dj.census_place_geoid, + dj.census_puma_geoid, + dj.census_school_district_elementary_geoid, + dj.census_school_district_secondary_geoid, + dj.census_school_district_unified_geoid, + dj.census_state_representative_geoid, + dj.census_state_senate_geoid, + dj.census_tract_geoid, + dj.census_zcta_geoid, + dj.year AS census_data_year, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +LEFT JOIN distinct_joined AS dj + ON pcl.year = dj.year + AND pcl.x_3435 = dj.x_3435 + AND pcl.y_3435 = dj.y_3435 +WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs) diff --git a/aws-athena/ctas/location-census_acs5.sql b/aws-athena/ctas/location-census_acs5.sql index 8dd91da60..8bfc8e43b 100644 --- a/aws-athena/ctas/location-census_acs5.sql +++ b/aws-athena/ctas/location-census_acs5.sql @@ -11,109 +11,105 @@ ) }} -WITH census_acs5 AS ( - WITH distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435 - FROM {{ source('spatial', 'parcel') }} - ), +WITH distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435 + FROM {{ source('spatial', 'parcel') }} +), - distinct_years AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'parcel') }} - ), +distinct_years AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'parcel') }} +), - distinct_years_rhs AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'census') }} - ), - - acs5_max_year AS ( - SELECT MAX(year) AS max_year - FROM {{ source('census', 'acs5') }} - ), +distinct_years_rhs AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'census') }} +), - acs5_year_fill AS ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('census', 'acs5') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ), +acs5_max_year AS ( + SELECT MAX(year) AS max_year + FROM {{ source('census', 'acs5') }} +), - distinct_joined AS ( - SELECT - dp.x_3435, - dp.y_3435, - MAX(CASE - WHEN cen.geography = 'congressional_district' THEN cen.geoid - END) AS census_acs5_congressional_district_geoid, - MAX(CASE - WHEN cen.geography = 'county_subdivision' THEN cen.geoid - END) AS census_acs5_county_subdivision_geoid, - MAX(CASE - WHEN cen.geography = 'place' THEN cen.geoid - END) AS census_acs5_place_geoid, - MAX(CASE - WHEN cen.geography = 'puma' THEN cen.geoid - END) AS census_acs5_puma_geoid, - MAX(CASE - WHEN cen.geography = 'school_district_elementary' THEN cen.geoid - END) AS census_acs5_school_district_elementary_geoid, - MAX(CASE - WHEN cen.geography = 'school_district_secondary' THEN cen.geoid - END) AS census_acs5_school_district_secondary_geoid, - MAX(CASE - WHEN cen.geography = 'school_district_unified' THEN cen.geoid - END) AS census_acs5_school_district_unified_geoid, - MAX(CASE - WHEN cen.geography = 'state_representative' THEN cen.geoid - END) AS census_acs5_state_representative_geoid, - MAX(CASE - WHEN cen.geography = 'state_senate' THEN cen.geoid - END) AS census_acs5_state_senate_geoid, - MAX(CASE - WHEN cen.geography = 'tract' THEN cen.geoid - END) AS census_acs5_tract_geoid, - cen.year - FROM distinct_pins AS dp - LEFT JOIN ( - SELECT * - FROM {{ source('spatial', 'census') }} - WHERE year <= (SELECT max_year FROM acs5_max_year) - ) AS cen - ON ST_WITHIN( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(cen.geometry_3435) - ) - GROUP BY dp.x_3435, dp.y_3435, cen.year - ) +acs5_year_fill AS ( + SELECT + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('census', 'acs5') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year +), +distinct_joined AS ( SELECT - pcl.pin10, - dj.census_acs5_congressional_district_geoid, - dj.census_acs5_county_subdivision_geoid, - dj.census_acs5_place_geoid, - dj.census_acs5_puma_geoid, - dj.census_acs5_school_district_elementary_geoid, - dj.census_acs5_school_district_secondary_geoid, - dj.census_acs5_school_district_unified_geoid, - dj.census_acs5_state_representative_geoid, - dj.census_acs5_state_senate_geoid, - dj.census_acs5_tract_geoid, - dj.year AS census_acs5_data_year, - pcl.year - FROM {{ source('spatial', 'parcel') }} AS pcl - LEFT JOIN acs5_year_fill AS ayf - ON pcl.year = ayf.pin_year - LEFT JOIN distinct_joined AS dj - ON ayf.fill_year = dj.year - AND pcl.x_3435 = dj.x_3435 - AND pcl.y_3435 = dj.y_3435 - WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs) + dp.x_3435, + dp.y_3435, + MAX(CASE + WHEN cen.geography = 'congressional_district' THEN cen.geoid + END) AS census_acs5_congressional_district_geoid, + MAX(CASE + WHEN cen.geography = 'county_subdivision' THEN cen.geoid + END) AS census_acs5_county_subdivision_geoid, + MAX(CASE + WHEN cen.geography = 'place' THEN cen.geoid + END) AS census_acs5_place_geoid, + MAX(CASE + WHEN cen.geography = 'puma' THEN cen.geoid + END) AS census_acs5_puma_geoid, + MAX(CASE + WHEN cen.geography = 'school_district_elementary' THEN cen.geoid + END) AS census_acs5_school_district_elementary_geoid, + MAX(CASE + WHEN cen.geography = 'school_district_secondary' THEN cen.geoid + END) AS census_acs5_school_district_secondary_geoid, + MAX(CASE + WHEN cen.geography = 'school_district_unified' THEN cen.geoid + END) AS census_acs5_school_district_unified_geoid, + MAX(CASE + WHEN cen.geography = 'state_representative' THEN cen.geoid + END) AS census_acs5_state_representative_geoid, + MAX(CASE + WHEN cen.geography = 'state_senate' THEN cen.geoid + END) AS census_acs5_state_senate_geoid, + MAX(CASE + WHEN cen.geography = 'tract' THEN cen.geoid + END) AS census_acs5_tract_geoid, + cen.year + FROM distinct_pins AS dp + LEFT JOIN ( + SELECT * + FROM {{ source('spatial', 'census') }} + WHERE year <= (SELECT max_year FROM acs5_max_year) + ) AS cen + ON ST_WITHIN( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(cen.geometry_3435) + ) + GROUP BY dp.x_3435, dp.y_3435, cen.year ) -SELECT * FROM census_acs5 +SELECT + pcl.pin10, + dj.census_acs5_congressional_district_geoid, + dj.census_acs5_county_subdivision_geoid, + dj.census_acs5_place_geoid, + dj.census_acs5_puma_geoid, + dj.census_acs5_school_district_elementary_geoid, + dj.census_acs5_school_district_secondary_geoid, + dj.census_acs5_school_district_unified_geoid, + dj.census_acs5_state_representative_geoid, + dj.census_acs5_state_senate_geoid, + dj.census_acs5_tract_geoid, + dj.year AS census_acs5_data_year, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +LEFT JOIN acs5_year_fill AS ayf + ON pcl.year = ayf.pin_year +LEFT JOIN distinct_joined AS dj + ON ayf.fill_year = dj.year + AND pcl.x_3435 = dj.x_3435 + AND pcl.y_3435 = dj.y_3435 +WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs) diff --git a/aws-athena/ctas/location-chicago.sql b/aws-athena/ctas/location-chicago.sql index 395b7f77c..bbf84de24 100644 --- a/aws-athena/ctas/location-chicago.sql +++ b/aws-athena/ctas/location-chicago.sql @@ -7,151 +7,147 @@ ) }} -WITH chicago AS ( - WITH distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435 - FROM {{ source('spatial', 'parcel') }} - ), +WITH distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435 + FROM {{ source('spatial', 'parcel') }} +), - distinct_years AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'parcel') }} - ), +distinct_years AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'parcel') }} +), - distinct_years_rhs AS ( - SELECT DISTINCT year FROM {{ source('spatial', 'police_district') }} - UNION ALL - SELECT DISTINCT year FROM {{ source('spatial', 'community_area') }} - UNION ALL - SELECT DISTINCT year FROM {{ source('spatial', 'industrial_corridor') }} - ), +distinct_years_rhs AS ( + SELECT DISTINCT year FROM {{ source('spatial', 'police_district') }} + UNION ALL + SELECT DISTINCT year FROM {{ source('spatial', 'community_area') }} + UNION ALL + SELECT DISTINCT year FROM {{ source('spatial', 'industrial_corridor') }} +), - police_district AS ( +police_district AS ( + SELECT + dp.x_3435, + dp.y_3435, + MAX(CAST(CAST(cprod.pd_num AS INTEGER) AS VARCHAR)) + AS chicago_police_district_num, + MAX(cprod.year) AS chicago_police_district_data_year, + cprod.pin_year + FROM distinct_pins AS dp + LEFT JOIN ( SELECT - dp.x_3435, - dp.y_3435, - MAX(CAST(CAST(cprod.pd_num AS INTEGER) AS VARCHAR)) - AS chicago_police_district_num, - MAX(cprod.year) AS chicago_police_district_data_year, - cprod.pin_year - FROM distinct_pins AS dp - LEFT JOIN ( + fill_years.pin_year, + fill_data.* + FROM ( SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'police_district') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN {{ source('spatial', 'police_district') }} AS fill_data - ON fill_years.fill_year = fill_data.year - ) AS cprod - ON ST_WITHIN( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(cprod.geometry_3435) - ) - GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year - ), + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'police_district') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN {{ source('spatial', 'police_district') }} AS fill_data + ON fill_years.fill_year = fill_data.year + ) AS cprod + ON ST_WITHIN( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(cprod.geometry_3435) + ) + GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year +), - community_area AS ( +community_area AS ( + SELECT + dp.x_3435, + dp.y_3435, + MAX(CAST(CAST(cprod.area_number AS INTEGER) AS VARCHAR)) + AS chicago_community_area_num, + MAX(cprod.community) AS chicago_community_area_name, + MAX(cprod.year) AS chicago_community_area_data_year, + cprod.pin_year + FROM distinct_pins AS dp + LEFT JOIN ( SELECT - dp.x_3435, - dp.y_3435, - MAX(CAST(CAST(cprod.area_number AS INTEGER) AS VARCHAR)) - AS chicago_community_area_num, - MAX(cprod.community) AS chicago_community_area_name, - MAX(cprod.year) AS chicago_community_area_data_year, - cprod.pin_year - FROM distinct_pins AS dp - LEFT JOIN ( + fill_years.pin_year, + fill_data.* + FROM ( SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'community_area') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN {{ source('spatial', 'community_area') }} AS fill_data - ON fill_years.fill_year = fill_data.year - ) AS cprod - ON ST_WITHIN( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(cprod.geometry_3435) - ) - GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year - ), + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'community_area') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN {{ source('spatial', 'community_area') }} AS fill_data + ON fill_years.fill_year = fill_data.year + ) AS cprod + ON ST_WITHIN( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(cprod.geometry_3435) + ) + GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year +), - industrial_corridor AS ( +industrial_corridor AS ( + SELECT + dp.x_3435, + dp.y_3435, + MAX(CAST(CAST(cprod.num AS INTEGER) AS VARCHAR)) + AS chicago_industrial_corridor_num, + MAX(cprod.name) AS chicago_industrial_corridor_name, + MAX(cprod.year) AS chicago_industrial_corridor_data_year, + cprod.pin_year + FROM distinct_pins AS dp + LEFT JOIN ( SELECT - dp.x_3435, - dp.y_3435, - MAX(CAST(CAST(cprod.num AS INTEGER) AS VARCHAR)) - AS chicago_industrial_corridor_num, - MAX(cprod.name) AS chicago_industrial_corridor_name, - MAX(cprod.year) AS chicago_industrial_corridor_data_year, - cprod.pin_year - FROM distinct_pins AS dp - LEFT JOIN ( + fill_years.pin_year, + fill_data.* + FROM ( SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'industrial_corridor') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN - {{ source('spatial', 'industrial_corridor') }} AS fill_data - ON fill_years.fill_year = fill_data.year - ) AS cprod - ON ST_WITHIN( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(cprod.geometry_3435) - ) - GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year - ) - - SELECT - pcl.pin10, - ca.chicago_community_area_num, - ca.chicago_community_area_name, - ca.chicago_community_area_data_year, - ic.chicago_industrial_corridor_num, - ic.chicago_industrial_corridor_name, - ic.chicago_industrial_corridor_data_year, - pd.chicago_police_district_num, - pd.chicago_police_district_data_year, - pcl.year - FROM {{ source('spatial', 'parcel') }} AS pcl - LEFT JOIN police_district AS pd - ON pcl.x_3435 = pd.x_3435 - AND pcl.y_3435 = pd.y_3435 - AND pcl.year = pd.pin_year - LEFT JOIN community_area AS ca - ON pcl.x_3435 = ca.x_3435 - AND pcl.y_3435 = ca.y_3435 - AND pcl.year = ca.pin_year - LEFT JOIN industrial_corridor AS ic - ON pcl.x_3435 = ic.x_3435 - AND pcl.y_3435 = ic.y_3435 - AND pcl.year = ic.pin_year - WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs) + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'industrial_corridor') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN + {{ source('spatial', 'industrial_corridor') }} AS fill_data + ON fill_years.fill_year = fill_data.year + ) AS cprod + ON ST_WITHIN( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(cprod.geometry_3435) + ) + GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year ) -SELECT * FROM chicago; +SELECT + pcl.pin10, + ca.chicago_community_area_num, + ca.chicago_community_area_name, + ca.chicago_community_area_data_year, + ic.chicago_industrial_corridor_num, + ic.chicago_industrial_corridor_name, + ic.chicago_industrial_corridor_data_year, + pd.chicago_police_district_num, + pd.chicago_police_district_data_year, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +LEFT JOIN police_district AS pd + ON pcl.x_3435 = pd.x_3435 + AND pcl.y_3435 = pd.y_3435 + AND pcl.year = pd.pin_year +LEFT JOIN community_area AS ca + ON pcl.x_3435 = ca.x_3435 + AND pcl.y_3435 = ca.y_3435 + AND pcl.year = ca.pin_year +LEFT JOIN industrial_corridor AS ic + ON pcl.x_3435 = ic.x_3435 + AND pcl.y_3435 = ic.y_3435 + AND pcl.year = ic.pin_year +WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs) diff --git a/aws-athena/ctas/location-crosswalk_year_fill.sql b/aws-athena/ctas/location-crosswalk_year_fill.sql index 44e3e4a22..c1d36a96a 100644 --- a/aws-athena/ctas/location-crosswalk_year_fill.sql +++ b/aws-athena/ctas/location-crosswalk_year_fill.sql @@ -9,262 +9,258 @@ equivalent location data are filled thus: */ {{ config(materialized='table') }} -WITH crosswalk_year_fill AS ( - WITH unfilled AS ( - SELECT - pin.year, - MAX(census.census_data_year) - AS census_data_year, - MAX(census_acs5.census_acs5_data_year) - AS census_acs5_data_year, - MAX(political.cook_board_of_review_district_data_year) - AS cook_board_of_review_district_data_year, - MAX(political.cook_commissioner_district_data_year) - AS cook_commissioner_district_data_year, - MAX(political.cook_judicial_district_data_year) - AS cook_judicial_district_data_year, - MAX(political.ward_chicago_data_year) - AS ward_chicago_data_year, - MAX(political.ward_evanston_data_year) - AS ward_evanston_data_year, - MAX(chicago.chicago_community_area_data_year) - AS chicago_community_area_data_year, - MAX(chicago.chicago_industrial_corridor_data_year) - AS chicago_industrial_corridor_data_year, - MAX(chicago.chicago_police_district_data_year) - AS chicago_police_district_data_year, - MAX(economy.econ_coordinated_care_area_data_year) - AS econ_coordinated_care_area_data_year, - MAX(economy.econ_enterprise_zone_data_year) - AS econ_enterprise_zone_data_year, - MAX(economy.econ_industrial_growth_zone_data_year) - AS econ_industrial_growth_zone_data_year, - MAX(economy.econ_qualified_opportunity_zone_data_year) - AS econ_qualified_opportunity_zone_data_year, - MAX(environment.env_flood_fema_data_year) - AS env_flood_fema_data_year, - MAX(environment.env_flood_fs_data_year) - AS env_flood_fs_data_year, - MAX(environment.env_ohare_noise_contour_data_year) - AS env_ohare_noise_contour_data_year, - MAX(environment.env_airport_noise_data_year) - AS env_airport_noise_data_year, - MAX(school.school_data_year) - AS school_data_year, - MAX(tax.tax_data_year) - AS tax_data_year, - MAX(access.access_cmap_walk_data_year) - AS access_cmap_walk_data_year, - MAX(other.misc_subdivision_data_year) - AS misc_subdivision_data_year - - FROM ( - SELECT DISTINCT year - FROM {{ source('spatial', 'parcel') }} - ) AS pin - LEFT JOIN ( - SELECT DISTINCT - year, - census_data_year - FROM {{ ref('location.census') }} - ) AS census ON pin.year = census.year - LEFT JOIN ( - SELECT DISTINCT - year, - census_acs5_data_year - FROM {{ ref('location.census_acs5') }} - ) AS census_acs5 ON pin.year = census_acs5.year - LEFT JOIN ( - SELECT DISTINCT - year, - cook_board_of_review_district_data_year, - cook_commissioner_district_data_year, - cook_judicial_district_data_year, - ward_chicago_data_year, - ward_evanston_data_year - FROM {{ ref('location.political') }} - ) AS political ON pin.year = political.year - LEFT JOIN ( - SELECT DISTINCT - year, - chicago_community_area_data_year, - chicago_industrial_corridor_data_year, - chicago_police_district_data_year - FROM {{ ref('location.chicago') }} - ) AS chicago ON pin.year = chicago.year - LEFT JOIN ( - SELECT DISTINCT - year, - econ_coordinated_care_area_data_year, - econ_enterprise_zone_data_year, - econ_industrial_growth_zone_data_year, - econ_qualified_opportunity_zone_data_year - FROM {{ ref('location.economy') }} - ) AS economy ON pin.year = economy.year - LEFT JOIN ( - SELECT DISTINCT - year, - env_flood_fema_data_year, - env_flood_fs_data_year, - env_ohare_noise_contour_data_year, - env_airport_noise_data_year - FROM {{ ref('location.environment') }} - ) AS environment ON pin.year = environment.year - LEFT JOIN ( - SELECT DISTINCT - year, - school_data_year - FROM {{ ref('location.school') }} - ) AS school ON pin.year = school.year - LEFT JOIN ( - SELECT DISTINCT - year, - tax_data_year - FROM {{ ref('location.tax') }} - ) AS tax ON pin.year = tax.year - LEFT JOIN ( - SELECT DISTINCT - year, - access_cmap_walk_data_year - FROM {{ ref('location.access') }} - ) AS access ON pin.year = access.year - LEFT JOIN ( - SELECT DISTINCT - year, - misc_subdivision_data_year - FROM {{ ref('location.other') }} - ) AS other ON pin.year = other.year - GROUP BY pin.year - ) - +WITH unfilled AS ( SELECT - unfilled.year, - COALESCE( - census_data_year, LAST_VALUE(census_data_year) - IGNORE NULLS - OVER (ORDER BY unfilled.year DESC) - ) AS census_data_year, - COALESCE( - census_acs5_data_year, LAST_VALUE(census_acs5_data_year) - IGNORE NULLS - OVER (ORDER BY unfilled.year DESC) - ) AS census_acs5_data_year, - COALESCE( + pin.year, + MAX(census.census_data_year) + AS census_data_year, + MAX(census_acs5.census_acs5_data_year) + AS census_acs5_data_year, + MAX(political.cook_board_of_review_district_data_year) + AS cook_board_of_review_district_data_year, + MAX(political.cook_commissioner_district_data_year) + AS cook_commissioner_district_data_year, + MAX(political.cook_judicial_district_data_year) + AS cook_judicial_district_data_year, + MAX(political.ward_chicago_data_year) + AS ward_chicago_data_year, + MAX(political.ward_evanston_data_year) + AS ward_evanston_data_year, + MAX(chicago.chicago_community_area_data_year) + AS chicago_community_area_data_year, + MAX(chicago.chicago_industrial_corridor_data_year) + AS chicago_industrial_corridor_data_year, + MAX(chicago.chicago_police_district_data_year) + AS chicago_police_district_data_year, + MAX(economy.econ_coordinated_care_area_data_year) + AS econ_coordinated_care_area_data_year, + MAX(economy.econ_enterprise_zone_data_year) + AS econ_enterprise_zone_data_year, + MAX(economy.econ_industrial_growth_zone_data_year) + AS econ_industrial_growth_zone_data_year, + MAX(economy.econ_qualified_opportunity_zone_data_year) + AS econ_qualified_opportunity_zone_data_year, + MAX(environment.env_flood_fema_data_year) + AS env_flood_fema_data_year, + MAX(environment.env_flood_fs_data_year) + AS env_flood_fs_data_year, + MAX(environment.env_ohare_noise_contour_data_year) + AS env_ohare_noise_contour_data_year, + MAX(environment.env_airport_noise_data_year) + AS env_airport_noise_data_year, + MAX(school.school_data_year) + AS school_data_year, + MAX(tax.tax_data_year) + AS tax_data_year, + MAX(access.access_cmap_walk_data_year) + AS access_cmap_walk_data_year, + MAX(other.misc_subdivision_data_year) + AS misc_subdivision_data_year + + FROM ( + SELECT DISTINCT year + FROM {{ source('spatial', 'parcel') }} + ) AS pin + LEFT JOIN ( + SELECT DISTINCT + year, + census_data_year + FROM {{ ref('location.census') }} + ) AS census ON pin.year = census.year + LEFT JOIN ( + SELECT DISTINCT + year, + census_acs5_data_year + FROM {{ ref('location.census_acs5') }} + ) AS census_acs5 ON pin.year = census_acs5.year + LEFT JOIN ( + SELECT DISTINCT + year, cook_board_of_review_district_data_year, - LAST_VALUE(cook_board_of_review_district_data_year) - IGNORE NULLS - OVER (ORDER BY unfilled.year DESC) - ) AS cook_board_of_review_district_data_year, - COALESCE( cook_commissioner_district_data_year, - LAST_VALUE(cook_commissioner_district_data_year) - IGNORE NULLS - OVER (ORDER BY unfilled.year DESC) - ) AS cook_commissioner_district_data_year, - COALESCE( cook_judicial_district_data_year, - LAST_VALUE(cook_judicial_district_data_year) - IGNORE NULLS - OVER (ORDER BY unfilled.year DESC) - ) AS cook_judicial_district_data_year, - COALESCE( - ward_chicago_data_year, LAST_VALUE(ward_chicago_data_year) - IGNORE NULLS - OVER (ORDER BY unfilled.year DESC) - ) AS ward_chicago_data_year, - COALESCE( - ward_evanston_data_year, LAST_VALUE(ward_evanston_data_year) - IGNORE NULLS - OVER (ORDER BY unfilled.year DESC) - ) AS ward_evanston_data_year, - COALESCE( + ward_chicago_data_year, + ward_evanston_data_year + FROM {{ ref('location.political') }} + ) AS political ON pin.year = political.year + LEFT JOIN ( + SELECT DISTINCT + year, chicago_community_area_data_year, - LAST_VALUE(chicago_community_area_data_year) - IGNORE NULLS - OVER (ORDER BY unfilled.year DESC) - ) AS chicago_community_area_data_year, - COALESCE( chicago_industrial_corridor_data_year, - LAST_VALUE(chicago_industrial_corridor_data_year) - IGNORE NULLS - OVER (ORDER BY unfilled.year DESC) - ) AS chicago_industrial_corridor_data_year, - COALESCE( - chicago_police_district_data_year, - LAST_VALUE(chicago_police_district_data_year) - IGNORE NULLS - OVER (ORDER BY unfilled.year DESC) - ) AS chicago_police_district_data_year, - COALESCE( + chicago_police_district_data_year + FROM {{ ref('location.chicago') }} + ) AS chicago ON pin.year = chicago.year + LEFT JOIN ( + SELECT DISTINCT + year, econ_coordinated_care_area_data_year, - LAST_VALUE(econ_coordinated_care_area_data_year) - IGNORE NULLS - OVER (ORDER BY unfilled.year DESC) - ) AS econ_coordinated_care_area_data_year, - COALESCE( econ_enterprise_zone_data_year, - LAST_VALUE(econ_enterprise_zone_data_year) - IGNORE NULLS - OVER (ORDER BY unfilled.year DESC) - ) AS econ_enterprise_zone_data_year, - COALESCE( econ_industrial_growth_zone_data_year, - LAST_VALUE(econ_industrial_growth_zone_data_year) - IGNORE NULLS - OVER (ORDER BY unfilled.year DESC) - ) AS econ_industrial_growth_zone_data_year, - COALESCE( - econ_qualified_opportunity_zone_data_year, - LAST_VALUE(econ_qualified_opportunity_zone_data_year) - IGNORE NULLS - OVER (ORDER BY unfilled.year DESC) - ) AS econ_qualified_opportunity_zone_data_year, - COALESCE( - env_flood_fema_data_year, LAST_VALUE(env_flood_fema_data_year) - IGNORE NULLS - OVER (ORDER BY unfilled.year DESC) - ) AS env_flood_fema_data_year, - COALESCE( - env_flood_fs_data_year, LAST_VALUE(env_flood_fs_data_year) - IGNORE NULLS - OVER (ORDER BY unfilled.year DESC) - ) AS env_flood_fs_data_year, - COALESCE( + econ_qualified_opportunity_zone_data_year + FROM {{ ref('location.economy') }} + ) AS economy ON pin.year = economy.year + LEFT JOIN ( + SELECT DISTINCT + year, + env_flood_fema_data_year, + env_flood_fs_data_year, env_ohare_noise_contour_data_year, - LAST_VALUE(env_ohare_noise_contour_data_year) - IGNORE NULLS - OVER (ORDER BY unfilled.year DESC) - ) AS env_ohare_noise_contour_data_year, - COALESCE( - env_airport_noise_data_year, - LAST_VALUE(env_airport_noise_data_year) - IGNORE NULLS - OVER (ORDER BY unfilled.year DESC) - ) AS env_airport_noise_data_year, - COALESCE( - school_data_year, LAST_VALUE(school_data_year) - IGNORE NULLS - OVER (ORDER BY unfilled.year DESC) - ) AS school_data_year, - COALESCE( - tax_data_year, LAST_VALUE(tax_data_year) - IGNORE NULLS - OVER (ORDER BY unfilled.year DESC) - ) AS tax_data_year, - COALESCE( - access_cmap_walk_data_year, - LAST_VALUE(access_cmap_walk_data_year) - IGNORE NULLS - OVER (ORDER BY unfilled.year DESC) - ) AS access_cmap_walk_data_year, - COALESCE( - misc_subdivision_data_year, - LAST_VALUE(misc_subdivision_data_year) - IGNORE NULLS - OVER (ORDER BY unfilled.year DESC) - ) AS misc_subdivision_data_year - FROM unfilled - ORDER BY unfilled.year + env_airport_noise_data_year + FROM {{ ref('location.environment') }} + ) AS environment ON pin.year = environment.year + LEFT JOIN ( + SELECT DISTINCT + year, + school_data_year + FROM {{ ref('location.school') }} + ) AS school ON pin.year = school.year + LEFT JOIN ( + SELECT DISTINCT + year, + tax_data_year + FROM {{ ref('location.tax') }} + ) AS tax ON pin.year = tax.year + LEFT JOIN ( + SELECT DISTINCT + year, + access_cmap_walk_data_year + FROM {{ ref('location.access') }} + ) AS access ON pin.year = access.year + LEFT JOIN ( + SELECT DISTINCT + year, + misc_subdivision_data_year + FROM {{ ref('location.other') }} + ) AS other ON pin.year = other.year + GROUP BY pin.year ) -SELECT * FROM crosswalk_year_fill +SELECT + unfilled.year, + COALESCE( + census_data_year, LAST_VALUE(census_data_year) + IGNORE NULLS + OVER (ORDER BY unfilled.year DESC) + ) AS census_data_year, + COALESCE( + census_acs5_data_year, LAST_VALUE(census_acs5_data_year) + IGNORE NULLS + OVER (ORDER BY unfilled.year DESC) + ) AS census_acs5_data_year, + COALESCE( + cook_board_of_review_district_data_year, + LAST_VALUE(cook_board_of_review_district_data_year) + IGNORE NULLS + OVER (ORDER BY unfilled.year DESC) + ) AS cook_board_of_review_district_data_year, + COALESCE( + cook_commissioner_district_data_year, + LAST_VALUE(cook_commissioner_district_data_year) + IGNORE NULLS + OVER (ORDER BY unfilled.year DESC) + ) AS cook_commissioner_district_data_year, + COALESCE( + cook_judicial_district_data_year, + LAST_VALUE(cook_judicial_district_data_year) + IGNORE NULLS + OVER (ORDER BY unfilled.year DESC) + ) AS cook_judicial_district_data_year, + COALESCE( + ward_chicago_data_year, LAST_VALUE(ward_chicago_data_year) + IGNORE NULLS + OVER (ORDER BY unfilled.year DESC) + ) AS ward_chicago_data_year, + COALESCE( + ward_evanston_data_year, LAST_VALUE(ward_evanston_data_year) + IGNORE NULLS + OVER (ORDER BY unfilled.year DESC) + ) AS ward_evanston_data_year, + COALESCE( + chicago_community_area_data_year, + LAST_VALUE(chicago_community_area_data_year) + IGNORE NULLS + OVER (ORDER BY unfilled.year DESC) + ) AS chicago_community_area_data_year, + COALESCE( + chicago_industrial_corridor_data_year, + LAST_VALUE(chicago_industrial_corridor_data_year) + IGNORE NULLS + OVER (ORDER BY unfilled.year DESC) + ) AS chicago_industrial_corridor_data_year, + COALESCE( + chicago_police_district_data_year, + LAST_VALUE(chicago_police_district_data_year) + IGNORE NULLS + OVER (ORDER BY unfilled.year DESC) + ) AS chicago_police_district_data_year, + COALESCE( + econ_coordinated_care_area_data_year, + LAST_VALUE(econ_coordinated_care_area_data_year) + IGNORE NULLS + OVER (ORDER BY unfilled.year DESC) + ) AS econ_coordinated_care_area_data_year, + COALESCE( + econ_enterprise_zone_data_year, + LAST_VALUE(econ_enterprise_zone_data_year) + IGNORE NULLS + OVER (ORDER BY unfilled.year DESC) + ) AS econ_enterprise_zone_data_year, + COALESCE( + econ_industrial_growth_zone_data_year, + LAST_VALUE(econ_industrial_growth_zone_data_year) + IGNORE NULLS + OVER (ORDER BY unfilled.year DESC) + ) AS econ_industrial_growth_zone_data_year, + COALESCE( + econ_qualified_opportunity_zone_data_year, + LAST_VALUE(econ_qualified_opportunity_zone_data_year) + IGNORE NULLS + OVER (ORDER BY unfilled.year DESC) + ) AS econ_qualified_opportunity_zone_data_year, + COALESCE( + env_flood_fema_data_year, LAST_VALUE(env_flood_fema_data_year) + IGNORE NULLS + OVER (ORDER BY unfilled.year DESC) + ) AS env_flood_fema_data_year, + COALESCE( + env_flood_fs_data_year, LAST_VALUE(env_flood_fs_data_year) + IGNORE NULLS + OVER (ORDER BY unfilled.year DESC) + ) AS env_flood_fs_data_year, + COALESCE( + env_ohare_noise_contour_data_year, + LAST_VALUE(env_ohare_noise_contour_data_year) + IGNORE NULLS + OVER (ORDER BY unfilled.year DESC) + ) AS env_ohare_noise_contour_data_year, + COALESCE( + env_airport_noise_data_year, + LAST_VALUE(env_airport_noise_data_year) + IGNORE NULLS + OVER (ORDER BY unfilled.year DESC) + ) AS env_airport_noise_data_year, + COALESCE( + school_data_year, LAST_VALUE(school_data_year) + IGNORE NULLS + OVER (ORDER BY unfilled.year DESC) + ) AS school_data_year, + COALESCE( + tax_data_year, LAST_VALUE(tax_data_year) + IGNORE NULLS + OVER (ORDER BY unfilled.year DESC) + ) AS tax_data_year, + COALESCE( + access_cmap_walk_data_year, + LAST_VALUE(access_cmap_walk_data_year) + IGNORE NULLS + OVER (ORDER BY unfilled.year DESC) + ) AS access_cmap_walk_data_year, + COALESCE( + misc_subdivision_data_year, + LAST_VALUE(misc_subdivision_data_year) + IGNORE NULLS + OVER (ORDER BY unfilled.year DESC) + ) AS misc_subdivision_data_year +FROM unfilled +ORDER BY unfilled.year diff --git a/aws-athena/ctas/location-economy.sql b/aws-athena/ctas/location-economy.sql index 13d173fa2..28312e75a 100644 --- a/aws-athena/ctas/location-economy.sql +++ b/aws-athena/ctas/location-economy.sql @@ -7,187 +7,183 @@ ) }} -WITH economy AS ( - WITH distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435 - FROM {{ source('spatial', 'parcel') }} - ), +WITH distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435 + FROM {{ source('spatial', 'parcel') }} +), - distinct_years AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'parcel') }} - ), +distinct_years AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'parcel') }} +), - distinct_years_rhs AS ( - SELECT DISTINCT year FROM {{ source('spatial', 'coordinated_care') }} - UNION ALL - SELECT DISTINCT year FROM {{ source('spatial', 'enterprise_zone') }} - UNION ALL - SELECT DISTINCT year - FROM {{ source('spatial', 'industrial_growth_zone') }} - UNION ALL - SELECT DISTINCT year - FROM {{ source('spatial', 'qualified_opportunity_zone') }} - ), +distinct_years_rhs AS ( + SELECT DISTINCT year FROM {{ source('spatial', 'coordinated_care') }} + UNION ALL + SELECT DISTINCT year FROM {{ source('spatial', 'enterprise_zone') }} + UNION ALL + SELECT DISTINCT year + FROM {{ source('spatial', 'industrial_growth_zone') }} + UNION ALL + SELECT DISTINCT year + FROM {{ source('spatial', 'qualified_opportunity_zone') }} +), - coordinated_care AS ( +coordinated_care AS ( + SELECT + dp.x_3435, + dp.y_3435, + MAX(cprod.cc_num) AS econ_coordinated_care_area_num, + MAX(cprod.year) AS econ_coordinated_care_area_data_year, + cprod.pin_year + FROM distinct_pins AS dp + LEFT JOIN ( SELECT - dp.x_3435, - dp.y_3435, - MAX(cprod.cc_num) AS econ_coordinated_care_area_num, - MAX(cprod.year) AS econ_coordinated_care_area_data_year, - cprod.pin_year - FROM distinct_pins AS dp - LEFT JOIN ( + fill_years.pin_year, + fill_data.* + FROM ( SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'coordinated_care') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN {{ source('spatial', 'coordinated_care') }} AS fill_data - ON fill_years.fill_year = fill_data.year - ) AS cprod - ON ST_WITHIN( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(cprod.geometry_3435) - ) - GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year - ), + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'coordinated_care') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN {{ source('spatial', 'coordinated_care') }} AS fill_data + ON fill_years.fill_year = fill_data.year + ) AS cprod + ON ST_WITHIN( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(cprod.geometry_3435) + ) + GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year +), - enterprise_zone AS ( +enterprise_zone AS ( + SELECT + dp.x_3435, + dp.y_3435, + MAX(cprod.ez_num) AS econ_enterprise_zone_num, + MAX(cprod.year) AS econ_enterprise_zone_data_year, + cprod.pin_year + FROM distinct_pins AS dp + LEFT JOIN ( SELECT - dp.x_3435, - dp.y_3435, - MAX(cprod.ez_num) AS econ_enterprise_zone_num, - MAX(cprod.year) AS econ_enterprise_zone_data_year, - cprod.pin_year - FROM distinct_pins AS dp - LEFT JOIN ( + fill_years.pin_year, + fill_data.* + FROM ( SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'enterprise_zone') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN {{ source('spatial', 'enterprise_zone') }} AS fill_data - ON fill_years.fill_year = fill_data.year - ) AS cprod - ON ST_WITHIN( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(cprod.geometry_3435) - ) - GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year - ), + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'enterprise_zone') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN {{ source('spatial', 'enterprise_zone') }} AS fill_data + ON fill_years.fill_year = fill_data.year + ) AS cprod + ON ST_WITHIN( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(cprod.geometry_3435) + ) + GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year +), - industrial_growth_zone AS ( +industrial_growth_zone AS ( + SELECT + dp.x_3435, + dp.y_3435, + MAX(cprod.igz_num) AS econ_industrial_growth_zone_num, + MAX(cprod.year) AS econ_industrial_growth_zone_data_year, + cprod.pin_year + FROM distinct_pins AS dp + LEFT JOIN ( SELECT - dp.x_3435, - dp.y_3435, - MAX(cprod.igz_num) AS econ_industrial_growth_zone_num, - MAX(cprod.year) AS econ_industrial_growth_zone_data_year, - cprod.pin_year - FROM distinct_pins AS dp - LEFT JOIN ( + fill_years.pin_year, + fill_data.* + FROM ( SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'industrial_growth_zone') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN - {{ source('spatial', 'industrial_growth_zone') }} AS fill_data - ON fill_years.fill_year = fill_data.year - ) AS cprod - ON ST_WITHIN( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(cprod.geometry_3435) - ) - GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year - ), + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'industrial_growth_zone') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN + {{ source('spatial', 'industrial_growth_zone') }} AS fill_data + ON fill_years.fill_year = fill_data.year + ) AS cprod + ON ST_WITHIN( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(cprod.geometry_3435) + ) + GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year +), - qualified_opportunity_zone AS ( +qualified_opportunity_zone AS ( + SELECT + dp.x_3435, + dp.y_3435, + MAX(cprod.geoid) AS econ_qualified_opportunity_zone_num, + MAX(cprod.year) AS econ_qualified_opportunity_zone_data_year, + cprod.pin_year + FROM distinct_pins AS dp + LEFT JOIN ( SELECT - dp.x_3435, - dp.y_3435, - MAX(cprod.geoid) AS econ_qualified_opportunity_zone_num, - MAX(cprod.year) AS econ_qualified_opportunity_zone_data_year, - cprod.pin_year - FROM distinct_pins AS dp - LEFT JOIN ( + fill_years.pin_year, + fill_data.* + FROM ( SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'qualified_opportunity_zone') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN - {{ source('spatial', 'qualified_opportunity_zone') }} - AS fill_data - ON fill_years.fill_year = fill_data.year - ) AS cprod - ON ST_WITHIN( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(cprod.geometry_3435) - ) - GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year - ) - - SELECT - pcl.pin10, - cc.econ_coordinated_care_area_num, - cc.econ_coordinated_care_area_data_year, - ez.econ_enterprise_zone_num, - ez.econ_enterprise_zone_data_year, - igz.econ_industrial_growth_zone_num, - igz.econ_industrial_growth_zone_data_year, - qoz.econ_qualified_opportunity_zone_num, - qoz.econ_qualified_opportunity_zone_data_year, - pcl.year - FROM {{ source('spatial', 'parcel') }} AS pcl - LEFT JOIN coordinated_care AS cc - ON pcl.x_3435 = cc.x_3435 - AND pcl.y_3435 = cc.y_3435 - AND pcl.year = cc.pin_year - LEFT JOIN enterprise_zone AS ez - ON pcl.x_3435 = ez.x_3435 - AND pcl.y_3435 = ez.y_3435 - AND pcl.year = ez.pin_year - LEFT JOIN industrial_growth_zone AS igz - ON pcl.x_3435 = igz.x_3435 - AND pcl.y_3435 = igz.y_3435 - AND pcl.year = igz.pin_year - LEFT JOIN qualified_opportunity_zone AS qoz - ON pcl.x_3435 = qoz.x_3435 - AND pcl.y_3435 = qoz.y_3435 - AND pcl.year = qoz.pin_year - WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs) + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'qualified_opportunity_zone') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN + {{ source('spatial', 'qualified_opportunity_zone') }} + AS fill_data + ON fill_years.fill_year = fill_data.year + ) AS cprod + ON ST_WITHIN( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(cprod.geometry_3435) + ) + GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year ) -SELECT * FROM economy +SELECT + pcl.pin10, + cc.econ_coordinated_care_area_num, + cc.econ_coordinated_care_area_data_year, + ez.econ_enterprise_zone_num, + ez.econ_enterprise_zone_data_year, + igz.econ_industrial_growth_zone_num, + igz.econ_industrial_growth_zone_data_year, + qoz.econ_qualified_opportunity_zone_num, + qoz.econ_qualified_opportunity_zone_data_year, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +LEFT JOIN coordinated_care AS cc + ON pcl.x_3435 = cc.x_3435 + AND pcl.y_3435 = cc.y_3435 + AND pcl.year = cc.pin_year +LEFT JOIN enterprise_zone AS ez + ON pcl.x_3435 = ez.x_3435 + AND pcl.y_3435 = ez.y_3435 + AND pcl.year = ez.pin_year +LEFT JOIN industrial_growth_zone AS igz + ON pcl.x_3435 = igz.x_3435 + AND pcl.y_3435 = igz.y_3435 + AND pcl.year = igz.pin_year +LEFT JOIN qualified_opportunity_zone AS qoz + ON pcl.x_3435 = qoz.x_3435 + AND pcl.y_3435 = qoz.y_3435 + AND pcl.year = qoz.pin_year +WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs) diff --git a/aws-athena/ctas/location-environment.sql b/aws-athena/ctas/location-environment.sql index dbcccea27..47481fc7a 100644 --- a/aws-athena/ctas/location-environment.sql +++ b/aws-athena/ctas/location-environment.sql @@ -7,204 +7,200 @@ ) }} -WITH environment AS ( - WITH distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435 - FROM {{ source('spatial', 'parcel') }} - ), +WITH distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435 + FROM {{ source('spatial', 'parcel') }} +), - distinct_years AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'parcel') }} - ), +distinct_years AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'parcel') }} +), - distinct_years_rhs AS ( - SELECT DISTINCT year FROM {{ source('spatial', 'flood_fema') }} - UNION ALL - SELECT DISTINCT year FROM {{ source('other', 'flood_first_street') }} - UNION ALL - SELECT DISTINCT year FROM {{ source('spatial', 'ohare_noise_contour') }} - UNION ALL - SELECT DISTINCT year FROM {{ source('other', 'airport_noise') }} - ), +distinct_years_rhs AS ( + SELECT DISTINCT year FROM {{ source('spatial', 'flood_fema') }} + UNION ALL + SELECT DISTINCT year FROM {{ source('other', 'flood_first_street') }} + UNION ALL + SELECT DISTINCT year FROM {{ source('spatial', 'ohare_noise_contour') }} + UNION ALL + SELECT DISTINCT year FROM {{ source('other', 'airport_noise') }} +), - ohare_years AS ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'ohare_noise_contour') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ), +ohare_years AS ( + SELECT + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'ohare_noise_contour') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year +), - flood_fema AS ( +flood_fema AS ( + SELECT + dp.x_3435, + dp.y_3435, + MAX(cprod.fema_special_flood_hazard_area) AS env_flood_fema_sfha, + MAX(cprod.year) AS env_flood_fema_data_year, + cprod.pin_year + FROM distinct_pins AS dp + LEFT JOIN ( SELECT - dp.x_3435, - dp.y_3435, - MAX(cprod.fema_special_flood_hazard_area) AS env_flood_fema_sfha, - MAX(cprod.year) AS env_flood_fema_data_year, - cprod.pin_year - FROM distinct_pins AS dp - LEFT JOIN ( + fill_years.pin_year, + fill_data.* + FROM ( SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'flood_fema') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN {{ source('spatial', 'flood_fema') }} AS fill_data - ON fill_years.fill_year = fill_data.year - ) AS cprod - ON ST_WITHIN( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(cprod.geometry_3435) - ) - GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year - ), + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'flood_fema') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN {{ source('spatial', 'flood_fema') }} AS fill_data + ON fill_years.fill_year = fill_data.year + ) AS cprod + ON ST_WITHIN( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(cprod.geometry_3435) + ) + GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year +), - ohare_noise_contour_0000 AS ( +ohare_noise_contour_0000 AS ( + SELECT + dp.x_3435, + dp.y_3435, + MAX(cprod.airport) AS airport, + MAX(cprod.year) AS year, + cprod.pin_year + FROM distinct_pins AS dp + LEFT JOIN ( SELECT - dp.x_3435, - dp.y_3435, - MAX(cprod.airport) AS airport, - MAX(cprod.year) AS year, - cprod.pin_year - FROM distinct_pins AS dp - LEFT JOIN ( + fill_years.pin_year, + fill_data.* + FROM ( SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'ohare_noise_contour') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN - {{ source('spatial', 'ohare_noise_contour') }} AS fill_data - ON fill_years.fill_year = fill_data.year - ) AS cprod - ON ST_WITHIN( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(cprod.geometry_3435) - ) - GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year - ), + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'ohare_noise_contour') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN + {{ source('spatial', 'ohare_noise_contour') }} AS fill_data + ON fill_years.fill_year = fill_data.year + ) AS cprod + ON ST_WITHIN( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(cprod.geometry_3435) + ) + GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year +), - ohare_noise_contour_2640 AS ( +ohare_noise_contour_2640 AS ( + SELECT + dp.x_3435, + dp.y_3435, + MAX(cprod.airport) AS airport, + MAX(cprod.year) AS year, + cprod.pin_year + FROM distinct_pins AS dp + LEFT JOIN ( SELECT - dp.x_3435, - dp.y_3435, - MAX(cprod.airport) AS airport, - MAX(cprod.year) AS year, - cprod.pin_year - FROM distinct_pins AS dp - LEFT JOIN ( + fill_years.pin_year, + fill_data.* + FROM ( SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'ohare_noise_contour') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN - {{ source('spatial', 'ohare_noise_contour') }} AS fill_data - ON fill_years.fill_year = fill_data.year - ) AS cprod - ON ST_WITHIN( - ST_POINT(dp.x_3435, dp.y_3435), - ST_BUFFER(ST_GEOMFROMBINARY(cprod.geometry_3435), 2640) - ) - GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year - ) - - SELECT - pcl.pin10, - flood_fema.env_flood_fema_sfha, - flood_fema.env_flood_fema_data_year, - flood_first_street.fs_flood_factor AS env_flood_fs_factor, - flood_first_street.fs_flood_risk_direction - AS env_flood_fs_risk_direction, - flood_first_street.year AS env_flood_fs_data_year, - CASE - WHEN - pcl.year >= oy.fill_year AND onc0000.airport IS NOT NULL - THEN TRUE - WHEN pcl.year >= oy.fill_year AND onc0000.airport IS NULL THEN FALSE - END AS env_ohare_noise_contour_no_buffer_bool, - CASE - WHEN - pcl.year >= oy.fill_year AND onc2640.airport IS NOT NULL - THEN TRUE - WHEN pcl.year >= oy.fill_year AND onc2640.airport IS NULL THEN FALSE - END AS env_ohare_noise_contour_half_mile_buffer_bool, - CASE - WHEN pcl.year >= oy.fill_year THEN oy.fill_year - END AS env_ohare_noise_contour_data_year, - CASE - WHEN pcl.year <= '2020' THEN an.airport_noise_dnl - WHEN pcl.year > '2020' THEN omp.airport_noise_dnl - ELSE 52.5 - END AS env_airport_noise_dnl, - CASE - WHEN pcl.year <= '2020' THEN an.year - ELSE 'omp' - END AS env_airport_noise_data_year, - pcl.year - FROM {{ source('spatial', 'parcel') }} AS pcl - LEFT JOIN flood_fema - ON pcl.x_3435 = flood_fema.x_3435 - AND pcl.y_3435 = flood_fema.y_3435 - AND pcl.year = flood_fema.pin_year - LEFT JOIN {{ source('other', 'flood_first_street') }} AS flood_first_street - ON pcl.pin10 = flood_first_street.pin10 - AND pcl.year >= flood_first_street.year - LEFT JOIN - ( - SELECT * - FROM {{ source('other', 'airport_noise') }} - WHERE year != 'omp' + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'ohare_noise_contour') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN + {{ source('spatial', 'ohare_noise_contour') }} AS fill_data + ON fill_years.fill_year = fill_data.year + ) AS cprod + ON ST_WITHIN( + ST_POINT(dp.x_3435, dp.y_3435), + ST_BUFFER(ST_GEOMFROMBINARY(cprod.geometry_3435), 2640) ) - AS an - ON pcl.pin10 = an.pin10 - AND pcl.year = an.year - LEFT JOIN - ( - SELECT * - FROM {{ source('other', 'airport_noise') }} - WHERE year = 'omp' - ) - AS omp - ON pcl.pin10 = omp.pin10 - AND pcl.year >= '2021' - LEFT JOIN ohare_years AS oy - ON pcl.year = oy.pin_year - LEFT JOIN ohare_noise_contour_0000 AS onc0000 - ON pcl.x_3435 = onc0000.x_3435 - AND pcl.y_3435 = onc0000.y_3435 - AND pcl.year = onc0000.pin_year - LEFT JOIN ohare_noise_contour_2640 AS onc2640 - ON pcl.x_3435 = onc2640.x_3435 - AND pcl.y_3435 = onc2640.y_3435 - AND pcl.year = onc2640.pin_year - WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs) + GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year ) -SELECT * FROM environment +SELECT + pcl.pin10, + flood_fema.env_flood_fema_sfha, + flood_fema.env_flood_fema_data_year, + flood_first_street.fs_flood_factor AS env_flood_fs_factor, + flood_first_street.fs_flood_risk_direction + AS env_flood_fs_risk_direction, + flood_first_street.year AS env_flood_fs_data_year, + CASE + WHEN + pcl.year >= oy.fill_year AND onc0000.airport IS NOT NULL + THEN TRUE + WHEN pcl.year >= oy.fill_year AND onc0000.airport IS NULL THEN FALSE + END AS env_ohare_noise_contour_no_buffer_bool, + CASE + WHEN + pcl.year >= oy.fill_year AND onc2640.airport IS NOT NULL + THEN TRUE + WHEN pcl.year >= oy.fill_year AND onc2640.airport IS NULL THEN FALSE + END AS env_ohare_noise_contour_half_mile_buffer_bool, + CASE + WHEN pcl.year >= oy.fill_year THEN oy.fill_year + END AS env_ohare_noise_contour_data_year, + CASE + WHEN pcl.year <= '2020' THEN an.airport_noise_dnl + WHEN pcl.year > '2020' THEN omp.airport_noise_dnl + ELSE 52.5 + END AS env_airport_noise_dnl, + CASE + WHEN pcl.year <= '2020' THEN an.year + ELSE 'omp' + END AS env_airport_noise_data_year, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +LEFT JOIN flood_fema + ON pcl.x_3435 = flood_fema.x_3435 + AND pcl.y_3435 = flood_fema.y_3435 + AND pcl.year = flood_fema.pin_year +LEFT JOIN {{ source('other', 'flood_first_street') }} AS flood_first_street + ON pcl.pin10 = flood_first_street.pin10 + AND pcl.year >= flood_first_street.year +LEFT JOIN + ( + SELECT * + FROM {{ source('other', 'airport_noise') }} + WHERE year != 'omp' + ) + AS an + ON pcl.pin10 = an.pin10 + AND pcl.year = an.year +LEFT JOIN + ( + SELECT * + FROM {{ source('other', 'airport_noise') }} + WHERE year = 'omp' + ) + AS omp + ON pcl.pin10 = omp.pin10 + AND pcl.year >= '2021' +LEFT JOIN ohare_years AS oy + ON pcl.year = oy.pin_year +LEFT JOIN ohare_noise_contour_0000 AS onc0000 + ON pcl.x_3435 = onc0000.x_3435 + AND pcl.y_3435 = onc0000.y_3435 + AND pcl.year = onc0000.pin_year +LEFT JOIN ohare_noise_contour_2640 AS onc2640 + ON pcl.x_3435 = onc2640.x_3435 + AND pcl.y_3435 = onc2640.y_3435 + AND pcl.year = onc2640.pin_year +WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs) diff --git a/aws-athena/ctas/location-other.sql b/aws-athena/ctas/location-other.sql index 68a4b941d..06209ab03 100644 --- a/aws-athena/ctas/location-other.sql +++ b/aws-athena/ctas/location-other.sql @@ -7,69 +7,65 @@ ) }} -WITH other AS ( - WITH distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435 - FROM {{ source('spatial', 'parcel') }} - ), +WITH distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435 + FROM {{ source('spatial', 'parcel') }} +), - distinct_years AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'parcel') }} - ), +distinct_years AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'parcel') }} +), - distinct_years_rhs AS ( - SELECT DISTINCT '2021' AS year - FROM {{ source('spatial', 'subdivision') }} - UNION ALL - SELECT DISTINCT '2014' AS year - FROM {{ source('spatial', 'enterprise_zone') }} - ), +distinct_years_rhs AS ( + SELECT DISTINCT '2021' AS year + FROM {{ source('spatial', 'subdivision') }} + UNION ALL + SELECT DISTINCT '2014' AS year + FROM {{ source('spatial', 'enterprise_zone') }} +), - subdivision AS ( +subdivision AS ( + SELECT + dp.x_3435, + dp.y_3435, + MAX(cprod.pagesubref) AS misc_subdivision_id, + MAX(cprod.year) AS misc_subdivision_data_year, + cprod.pin_year + FROM distinct_pins AS dp + LEFT JOIN ( SELECT - dp.x_3435, - dp.y_3435, - MAX(cprod.pagesubref) AS misc_subdivision_id, - MAX(cprod.year) AS misc_subdivision_data_year, - cprod.pin_year - FROM distinct_pins AS dp - LEFT JOIN ( + fill_years.pin_year, + fill_data.* + FROM ( SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'subdivision') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN {{ source('spatial', 'subdivision') }} AS fill_data - ON fill_years.fill_year = fill_data.year - ) AS cprod - ON ST_WITHIN( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(cprod.geometry_3435) - ) - GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year - ) - - SELECT - pcl.pin10, - sub.misc_subdivision_id, - sub.misc_subdivision_data_year, - pcl.year - FROM {{ source('spatial', 'parcel') }} AS pcl - LEFT JOIN subdivision AS sub - ON pcl.x_3435 = sub.x_3435 - AND pcl.y_3435 = sub.y_3435 - AND pcl.year = sub.pin_year - WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs) + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'subdivision') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN {{ source('spatial', 'subdivision') }} AS fill_data + ON fill_years.fill_year = fill_data.year + ) AS cprod + ON ST_WITHIN( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(cprod.geometry_3435) + ) + GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year ) -SELECT * FROM other +SELECT + pcl.pin10, + sub.misc_subdivision_id, + sub.misc_subdivision_data_year, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +LEFT JOIN subdivision AS sub + ON pcl.x_3435 = sub.x_3435 + AND pcl.y_3435 = sub.y_3435 + AND pcl.year = sub.pin_year +WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs) diff --git a/aws-athena/ctas/location-political.sql b/aws-athena/ctas/location-political.sql index 825fd3050..65522639d 100644 --- a/aws-athena/ctas/location-political.sql +++ b/aws-athena/ctas/location-political.sql @@ -7,260 +7,256 @@ ) }} -WITH political AS ( - WITH distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435 - FROM {{ source('spatial', 'parcel') }} - ), +WITH distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435 + FROM {{ source('spatial', 'parcel') }} +), - distinct_years AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'parcel') }} - ), +distinct_years AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'parcel') }} +), - distinct_years_rhs AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'board_of_review_district') }} - UNION ALL - SELECT DISTINCT year - FROM {{ source('spatial', 'commissioner_district') }} - UNION ALL - SELECT DISTINCT year FROM {{ source('spatial', 'judicial_district') }} - UNION ALL - SELECT DISTINCT year FROM {{ source('spatial', 'ward') }} - ), +distinct_years_rhs AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'board_of_review_district') }} + UNION ALL + SELECT DISTINCT year + FROM {{ source('spatial', 'commissioner_district') }} + UNION ALL + SELECT DISTINCT year FROM {{ source('spatial', 'judicial_district') }} + UNION ALL + SELECT DISTINCT year FROM {{ source('spatial', 'ward') }} +), - board_of_review_district AS ( - SELECT - dp.x_3435, - dp.y_3435, - MAX( +board_of_review_district AS ( + SELECT + dp.x_3435, + dp.y_3435, + MAX( + CAST( CAST( - CAST( - cprod.board_of_review_district_num AS INTEGER - ) AS VARCHAR - ) - ) AS cook_board_of_review_district_num, - MAX(cprod.year) AS cook_board_of_review_district_data_year, - cprod.pin_year - FROM distinct_pins AS dp - LEFT JOIN ( - SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'board_of_review_district') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN - {{ source('spatial', 'board_of_review_district') }} AS fill_data - ON fill_years.fill_year = fill_data.year - ) AS cprod - ON ST_WITHIN( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(cprod.geometry_3435) + cprod.board_of_review_district_num AS INTEGER + ) AS VARCHAR ) - GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year - ), - - commissioner_district AS ( + ) AS cook_board_of_review_district_num, + MAX(cprod.year) AS cook_board_of_review_district_data_year, + cprod.pin_year + FROM distinct_pins AS dp + LEFT JOIN ( SELECT - dp.x_3435, - dp.y_3435, - MAX( - CAST( - CAST(cprod.commissioner_district_num AS INTEGER) AS VARCHAR - ) - ) AS cook_commissioner_district_num, - MAX(cprod.year) AS cook_commissioner_district_data_year, - cprod.pin_year - FROM distinct_pins AS dp - LEFT JOIN ( + fill_years.pin_year, + fill_data.* + FROM ( SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'commissioner_district') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN - {{ source('spatial', 'commissioner_district') }} AS fill_data - ON fill_years.fill_year = fill_data.year - ) AS cprod - ON ST_WITHIN( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(cprod.geometry_3435) - ) - GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year - ), + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'board_of_review_district') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN + {{ source('spatial', 'board_of_review_district') }} AS fill_data + ON fill_years.fill_year = fill_data.year + ) AS cprod + ON ST_WITHIN( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(cprod.geometry_3435) + ) + GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year +), - judicial_district AS ( +commissioner_district AS ( + SELECT + dp.x_3435, + dp.y_3435, + MAX( + CAST( + CAST(cprod.commissioner_district_num AS INTEGER) AS VARCHAR + ) + ) AS cook_commissioner_district_num, + MAX(cprod.year) AS cook_commissioner_district_data_year, + cprod.pin_year + FROM distinct_pins AS dp + LEFT JOIN ( SELECT - dp.x_3435, - dp.y_3435, - MAX(CAST(CAST(cprod.judicial_district_num AS INTEGER) AS VARCHAR)) - AS cook_judicial_district_num, - MAX(cprod.year) AS cook_judicial_district_data_year, - cprod.pin_year - FROM distinct_pins AS dp - LEFT JOIN ( + fill_years.pin_year, + fill_data.* + FROM ( SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'judicial_district') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN {{ source('spatial', 'judicial_district') }} AS fill_data - ON fill_years.fill_year = fill_data.year - ) AS cprod - ON ST_WITHIN( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(cprod.geometry_3435) - ) - GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year - ), + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'commissioner_district') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN + {{ source('spatial', 'commissioner_district') }} AS fill_data + ON fill_years.fill_year = fill_data.year + ) AS cprod + ON ST_WITHIN( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(cprod.geometry_3435) + ) + GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year +), - ward_chicago AS ( +judicial_district AS ( + SELECT + dp.x_3435, + dp.y_3435, + MAX(CAST(CAST(cprod.judicial_district_num AS INTEGER) AS VARCHAR)) + AS cook_judicial_district_num, + MAX(cprod.year) AS cook_judicial_district_data_year, + cprod.pin_year + FROM distinct_pins AS dp + LEFT JOIN ( SELECT - dp.x_3435, - dp.y_3435, - MAX(cprod.ward_num) AS ward_num, - MAX(cprod.ward_name) AS ward_name, - MAX( - CASE - WHEN - SUBSTR(cprod.ward_name, 1, 1) = 'c' - THEN cprod.year - END - ) AS ward_chicago_data_year, - cprod.pin_year - FROM distinct_pins AS dp - LEFT JOIN ( + fill_years.pin_year, + fill_data.* + FROM ( SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX( - CASE - WHEN - SUBSTR(df.ward_name, 1, 1) = 'c' - THEN df.year - END - ) AS fill_year - FROM {{ source('spatial', 'ward') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN {{ source('spatial', 'ward') }} AS fill_data - ON fill_years.fill_year = fill_data.year - ) AS cprod - ON ST_WITHIN( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(cprod.geometry_3435) - ) - GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year - ), + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'judicial_district') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN {{ source('spatial', 'judicial_district') }} AS fill_data + ON fill_years.fill_year = fill_data.year + ) AS cprod + ON ST_WITHIN( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(cprod.geometry_3435) + ) + GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year +), - ward_evanston AS ( +ward_chicago AS ( + SELECT + dp.x_3435, + dp.y_3435, + MAX(cprod.ward_num) AS ward_num, + MAX(cprod.ward_name) AS ward_name, + MAX( + CASE + WHEN + SUBSTR(cprod.ward_name, 1, 1) = 'c' + THEN cprod.year + END + ) AS ward_chicago_data_year, + cprod.pin_year + FROM distinct_pins AS dp + LEFT JOIN ( SELECT - dp.x_3435, - dp.y_3435, - MAX(cprod.ward_num) AS ward_num, - MAX(cprod.ward_name) AS ward_name, - MAX( - CASE - WHEN - SUBSTR(cprod.ward_name, 1, 1) = 'e' - THEN cprod.year - END - ) AS ward_evanston_data_year, - cprod.pin_year - FROM distinct_pins AS dp - LEFT JOIN ( + fill_years.pin_year, + fill_data.* + FROM ( SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX( - CASE - WHEN - SUBSTR(df.ward_name, 1, 1) = 'e' - THEN df.year - END - ) AS fill_year - FROM {{ source('spatial', 'ward') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN {{ source('spatial', 'ward') }} AS fill_data - ON fill_years.fill_year = fill_data.year - ) AS cprod - ON ST_WITHIN( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(cprod.geometry_3435) - ) - GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year - ) + dy.year AS pin_year, + MAX( + CASE + WHEN + SUBSTR(df.ward_name, 1, 1) = 'c' + THEN df.year + END + ) AS fill_year + FROM {{ source('spatial', 'ward') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN {{ source('spatial', 'ward') }} AS fill_data + ON fill_years.fill_year = fill_data.year + ) AS cprod + ON ST_WITHIN( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(cprod.geometry_3435) + ) + GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year +), +ward_evanston AS ( SELECT - pcl.pin10, - brd.cook_board_of_review_district_num, - brd.cook_board_of_review_district_data_year, - cd.cook_commissioner_district_num, - cd.cook_commissioner_district_data_year, - jd.cook_judicial_district_num, - jd.cook_judicial_district_data_year, - COALESCE(we.ward_num, wc.ward_num) AS ward_num, - COALESCE(we.ward_name, wc.ward_name) AS ward_name, - wc.ward_chicago_data_year, - we.ward_evanston_data_year, - pcl.year - FROM {{ source('spatial', 'parcel') }} AS pcl - LEFT JOIN board_of_review_district AS brd - ON pcl.x_3435 = brd.x_3435 - AND pcl.y_3435 = brd.y_3435 - AND pcl.year = brd.pin_year - LEFT JOIN commissioner_district AS cd - ON pcl.x_3435 = cd.x_3435 - AND pcl.y_3435 = cd.y_3435 - AND pcl.year = cd.pin_year - LEFT JOIN judicial_district AS jd - ON pcl.x_3435 = jd.x_3435 - AND pcl.y_3435 = jd.y_3435 - AND pcl.year = jd.pin_year - LEFT JOIN ward_chicago AS wc - ON pcl.x_3435 = wc.x_3435 - AND pcl.y_3435 = wc.y_3435 - AND pcl.year = wc.pin_year - LEFT JOIN ward_evanston AS we - ON pcl.x_3435 = we.x_3435 - AND pcl.y_3435 = we.y_3435 - AND pcl.year = we.pin_year - WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs) + dp.x_3435, + dp.y_3435, + MAX(cprod.ward_num) AS ward_num, + MAX(cprod.ward_name) AS ward_name, + MAX( + CASE + WHEN + SUBSTR(cprod.ward_name, 1, 1) = 'e' + THEN cprod.year + END + ) AS ward_evanston_data_year, + cprod.pin_year + FROM distinct_pins AS dp + LEFT JOIN ( + SELECT + fill_years.pin_year, + fill_data.* + FROM ( + SELECT + dy.year AS pin_year, + MAX( + CASE + WHEN + SUBSTR(df.ward_name, 1, 1) = 'e' + THEN df.year + END + ) AS fill_year + FROM {{ source('spatial', 'ward') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN {{ source('spatial', 'ward') }} AS fill_data + ON fill_years.fill_year = fill_data.year + ) AS cprod + ON ST_WITHIN( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(cprod.geometry_3435) + ) + GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year ) -SELECT * FROM political +SELECT + pcl.pin10, + brd.cook_board_of_review_district_num, + brd.cook_board_of_review_district_data_year, + cd.cook_commissioner_district_num, + cd.cook_commissioner_district_data_year, + jd.cook_judicial_district_num, + jd.cook_judicial_district_data_year, + COALESCE(we.ward_num, wc.ward_num) AS ward_num, + COALESCE(we.ward_name, wc.ward_name) AS ward_name, + wc.ward_chicago_data_year, + we.ward_evanston_data_year, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +LEFT JOIN board_of_review_district AS brd + ON pcl.x_3435 = brd.x_3435 + AND pcl.y_3435 = brd.y_3435 + AND pcl.year = brd.pin_year +LEFT JOIN commissioner_district AS cd + ON pcl.x_3435 = cd.x_3435 + AND pcl.y_3435 = cd.y_3435 + AND pcl.year = cd.pin_year +LEFT JOIN judicial_district AS jd + ON pcl.x_3435 = jd.x_3435 + AND pcl.y_3435 = jd.y_3435 + AND pcl.year = jd.pin_year +LEFT JOIN ward_chicago AS wc + ON pcl.x_3435 = wc.x_3435 + AND pcl.y_3435 = wc.y_3435 + AND pcl.year = wc.pin_year +LEFT JOIN ward_evanston AS we + ON pcl.x_3435 = we.x_3435 + AND pcl.y_3435 = we.y_3435 + AND pcl.year = we.pin_year +WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs) diff --git a/aws-athena/ctas/location-school.sql b/aws-athena/ctas/location-school.sql index 7c1da8d99..070008ce5 100644 --- a/aws-athena/ctas/location-school.sql +++ b/aws-athena/ctas/location-school.sql @@ -7,70 +7,66 @@ ) }} -WITH school AS ( - WITH distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435 - FROM {{ source('spatial', 'parcel') }} - ), +WITH distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435 + FROM {{ source('spatial', 'parcel') }} +), - distinct_years_rhs AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'school_district') }} - WHERE geoid IS NOT NULL - ), - - distinct_joined AS ( - SELECT - dp.x_3435, - dp.y_3435, - MAX(CASE - WHEN school.district_type = 'elementary' THEN school.geoid - END) AS school_elementary_district_geoid, - MAX(CASE - WHEN school.district_type = 'elementary' THEN school.name - END) AS school_elementary_district_name, - MAX(CASE - WHEN school.district_type = 'secondary' THEN school.geoid - END) AS school_secondary_district_geoid, - MAX(CASE - WHEN school.district_type = 'secondary' THEN school.name - END) AS school_secondary_district_name, - MAX(CASE - WHEN school.district_type = 'unified' THEN school.geoid - END) AS school_unified_district_geoid, - MAX(CASE - WHEN school.district_type = 'unified' THEN school.name - END) AS school_unified_district_name, - school.year - FROM distinct_pins AS dp - LEFT JOIN {{ source('spatial', 'school_district') }} AS school - ON ST_WITHIN( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(school.geometry_3435) - ) - GROUP BY dp.x_3435, dp.y_3435, school.year - ) +distinct_years_rhs AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'school_district') }} + WHERE geoid IS NOT NULL +), +distinct_joined AS ( SELECT - pcl.pin10, - dj.school_elementary_district_geoid, - dj.school_elementary_district_name, - dj.school_secondary_district_geoid, - dj.school_secondary_district_name, - dj.school_unified_district_geoid, - dj.school_unified_district_name, - CONCAT(CAST(CAST(dj.year AS INTEGER) - 1 AS VARCHAR), ' - ', dj.year) - AS school_school_year, - dj.year AS school_data_year, - pcl.year - FROM {{ source('spatial', 'parcel') }} AS pcl - LEFT JOIN distinct_joined AS dj - ON pcl.year = dj.year - AND pcl.x_3435 = dj.x_3435 - AND pcl.y_3435 = dj.y_3435 - WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs) + dp.x_3435, + dp.y_3435, + MAX(CASE + WHEN school.district_type = 'elementary' THEN school.geoid + END) AS school_elementary_district_geoid, + MAX(CASE + WHEN school.district_type = 'elementary' THEN school.name + END) AS school_elementary_district_name, + MAX(CASE + WHEN school.district_type = 'secondary' THEN school.geoid + END) AS school_secondary_district_geoid, + MAX(CASE + WHEN school.district_type = 'secondary' THEN school.name + END) AS school_secondary_district_name, + MAX(CASE + WHEN school.district_type = 'unified' THEN school.geoid + END) AS school_unified_district_geoid, + MAX(CASE + WHEN school.district_type = 'unified' THEN school.name + END) AS school_unified_district_name, + school.year + FROM distinct_pins AS dp + LEFT JOIN {{ source('spatial', 'school_district') }} AS school + ON ST_WITHIN( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(school.geometry_3435) + ) + GROUP BY dp.x_3435, dp.y_3435, school.year ) -SELECT * FROM school +SELECT + pcl.pin10, + dj.school_elementary_district_geoid, + dj.school_elementary_district_name, + dj.school_secondary_district_geoid, + dj.school_secondary_district_name, + dj.school_unified_district_geoid, + dj.school_unified_district_name, + CONCAT(CAST(CAST(dj.year AS INTEGER) - 1 AS VARCHAR), ' - ', dj.year) + AS school_school_year, + dj.year AS school_data_year, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +LEFT JOIN distinct_joined AS dj + ON pcl.year = dj.year + AND pcl.x_3435 = dj.x_3435 + AND pcl.y_3435 = dj.y_3435 +WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs) diff --git a/aws-athena/ctas/location-tax.sql b/aws-athena/ctas/location-tax.sql index 91812ccd5..4033b4ac9 100644 --- a/aws-athena/ctas/location-tax.sql +++ b/aws-athena/ctas/location-tax.sql @@ -7,230 +7,226 @@ ) }} -WITH tax AS ( - WITH long AS ( - SELECT - pcl.pin10, - pcl.year, - pcl.tax_code, - tc.agency_num, - ai.agency_name, - ai.major_type, - ai.minor_type, - tc.year AS tax_data_year - FROM {{ source('spatial', 'parcel') }} AS pcl - INNER JOIN {{ source('tax', 'tax_code') }} AS tc - ON pcl.tax_code = tc.tax_code_num - AND pcl.year = tc.year - LEFT JOIN {{ source('tax', 'agency_info') }} AS ai - ON tc.agency_num = ai.agency_num - WHERE ai.minor_type IN ( - 'MUNI', 'ELEMENTARY', 'SECONDARY', 'UNIFIED', 'COMM COLL', - 'FIRE', 'LIBRARY', 'PARK', 'SANITARY', 'SSA', 'TIF' - ) - ), - - wide AS ( - SELECT - pin10, - FILTER( - ARRAY_AGG( - CASE WHEN minor_type = 'MUNI' THEN agency_num END - ), - x -> x IS NOT NULL - ) AS tax_municipality_num, - FILTER( - ARRAY_AGG( - CASE WHEN minor_type = 'MUNI' THEN agency_name END - ), - x -> x IS NOT NULL - ) AS tax_municipality_name, - FILTER( - ARRAY_AGG( - CASE - WHEN minor_type = 'ELEMENTARY' THEN agency_num - END - ), - x -> x IS NOT NULL - ) AS tax_school_elementary_district_num, - FILTER( - ARRAY_AGG( - CASE - WHEN minor_type = 'ELEMENTARY' THEN agency_name - END - ), - x -> x IS NOT NULL - ) AS tax_school_elementary_district_name, - FILTER( - ARRAY_AGG( - CASE - WHEN minor_type = 'SECONDARY' THEN agency_num - END - ), - x -> x IS NOT NULL - ) AS tax_school_secondary_district_num, - FILTER( - ARRAY_AGG( - CASE - WHEN minor_type = 'SECONDARY' THEN agency_name - END - ), - x -> x IS NOT NULL - ) AS tax_school_secondary_district_name, - FILTER( - ARRAY_AGG( - CASE - WHEN minor_type = 'UNIFIED' THEN agency_num - END - ), - x -> x IS NOT NULL - ) AS tax_school_unified_district_num, - FILTER( - ARRAY_AGG( - CASE - WHEN minor_type = 'UNIFIED' THEN agency_name - END - ), - x -> x IS NOT NULL - ) AS tax_school_unified_district_name, - FILTER( - ARRAY_AGG( - CASE - WHEN minor_type = 'COMM COLL' THEN agency_num - END - ), - x -> x IS NOT NULL - ) AS tax_community_college_district_num, - FILTER( - ARRAY_AGG( - CASE - WHEN minor_type = 'COMM COLL' THEN agency_name - END - ), - x -> x IS NOT NULL - ) AS tax_community_college_district_name, - FILTER( - ARRAY_AGG( - CASE WHEN minor_type = 'FIRE' THEN agency_num END - ), - x -> x IS NOT NULL - ) AS tax_fire_protection_district_num, - FILTER( - ARRAY_AGG( - CASE WHEN minor_type = 'FIRE' THEN agency_name END - ), - x -> x IS NOT NULL - ) AS tax_fire_protection_district_name, - FILTER( - ARRAY_AGG( - CASE - WHEN minor_type = 'LIBRARY' THEN agency_num - END - ), - x -> x IS NOT NULL - ) AS tax_library_district_num, - FILTER( - ARRAY_AGG( - CASE - WHEN minor_type = 'LIBRARY' THEN agency_name - END - ), - x -> x IS NOT NULL - ) AS tax_library_district_name, - FILTER( - ARRAY_AGG( - CASE WHEN minor_type = 'PARK' THEN agency_num END - ), - x -> x IS NOT NULL - ) AS tax_park_district_num, - FILTER( - ARRAY_AGG( - CASE WHEN minor_type = 'PARK' THEN agency_name END - ), - x -> x IS NOT NULL - ) AS tax_park_district_name, - FILTER( - ARRAY_AGG( - CASE - WHEN minor_type = 'SANITARY' THEN agency_num - END - ), - x -> x IS NOT NULL - ) AS tax_sanitation_district_num, - FILTER( - ARRAY_AGG( - CASE - WHEN minor_type = 'SANITARY' THEN agency_name - END - ), - x -> x IS NOT NULL - ) AS tax_sanitation_district_name, - FILTER( - ARRAY_AGG( - CASE WHEN minor_type = 'SSA' THEN agency_num END - ), - x -> x IS NOT NULL - ) AS tax_special_service_area_num, - FILTER( - ARRAY_AGG( - CASE WHEN minor_type = 'SSA' THEN agency_name END - ), - x -> x IS NOT NULL - ) AS tax_special_service_area_name, - FILTER( - ARRAY_AGG( - CASE WHEN minor_type = 'TIF' THEN agency_num END - ), - x -> x IS NOT NULL - ) AS tax_tif_district_num, - FILTER( - ARRAY_AGG( - CASE WHEN minor_type = 'TIF' THEN agency_name END - ), - x -> x IS NOT NULL - ) AS tax_tif_district_name, - tax_data_year, - year - FROM long - GROUP BY pin10, year, tax_data_year - ) - +WITH long AS ( SELECT pcl.pin10, - wide.tax_municipality_num, - wide.tax_municipality_name, - wide.tax_school_elementary_district_num, - wide.tax_school_elementary_district_name, - wide.tax_school_secondary_district_num, - wide.tax_school_secondary_district_name, - wide.tax_school_unified_district_num, - wide.tax_school_unified_district_name, - wide.tax_community_college_district_num, - wide.tax_community_college_district_name, - wide.tax_fire_protection_district_num, - wide.tax_fire_protection_district_name, - wide.tax_library_district_num, - wide.tax_library_district_name, - wide.tax_park_district_num, - wide.tax_park_district_name, - wide.tax_sanitation_district_num, - wide.tax_sanitation_district_name, - wide.tax_special_service_area_num, - wide.tax_special_service_area_name, - wide.tax_tif_district_num, - wide.tax_tif_district_name, - wide.tax_data_year, - pcl.year + pcl.year, + pcl.tax_code, + tc.agency_num, + ai.agency_name, + ai.major_type, + ai.minor_type, + tc.year AS tax_data_year FROM {{ source('spatial', 'parcel') }} AS pcl - LEFT JOIN wide - ON pcl.pin10 = wide.pin10 - -- Join syntax here forward fills with most recent non-null value. - AND ( - CASE WHEN pcl.year > (SELECT MAX(year) FROM wide) - THEN (SELECT MAX(year) FROM wide) - ELSE pcl.year - END = wide.year + INNER JOIN {{ source('tax', 'tax_code') }} AS tc + ON pcl.tax_code = tc.tax_code_num + AND pcl.year = tc.year + LEFT JOIN {{ source('tax', 'agency_info') }} AS ai + ON tc.agency_num = ai.agency_num + WHERE ai.minor_type IN ( + 'MUNI', 'ELEMENTARY', 'SECONDARY', 'UNIFIED', 'COMM COLL', + 'FIRE', 'LIBRARY', 'PARK', 'SANITARY', 'SSA', 'TIF' ) +), + +wide AS ( + SELECT + pin10, + FILTER( + ARRAY_AGG( + CASE WHEN minor_type = 'MUNI' THEN agency_num END + ), + x -> x IS NOT NULL + ) AS tax_municipality_num, + FILTER( + ARRAY_AGG( + CASE WHEN minor_type = 'MUNI' THEN agency_name END + ), + x -> x IS NOT NULL + ) AS tax_municipality_name, + FILTER( + ARRAY_AGG( + CASE + WHEN minor_type = 'ELEMENTARY' THEN agency_num + END + ), + x -> x IS NOT NULL + ) AS tax_school_elementary_district_num, + FILTER( + ARRAY_AGG( + CASE + WHEN minor_type = 'ELEMENTARY' THEN agency_name + END + ), + x -> x IS NOT NULL + ) AS tax_school_elementary_district_name, + FILTER( + ARRAY_AGG( + CASE + WHEN minor_type = 'SECONDARY' THEN agency_num + END + ), + x -> x IS NOT NULL + ) AS tax_school_secondary_district_num, + FILTER( + ARRAY_AGG( + CASE + WHEN minor_type = 'SECONDARY' THEN agency_name + END + ), + x -> x IS NOT NULL + ) AS tax_school_secondary_district_name, + FILTER( + ARRAY_AGG( + CASE + WHEN minor_type = 'UNIFIED' THEN agency_num + END + ), + x -> x IS NOT NULL + ) AS tax_school_unified_district_num, + FILTER( + ARRAY_AGG( + CASE + WHEN minor_type = 'UNIFIED' THEN agency_name + END + ), + x -> x IS NOT NULL + ) AS tax_school_unified_district_name, + FILTER( + ARRAY_AGG( + CASE + WHEN minor_type = 'COMM COLL' THEN agency_num + END + ), + x -> x IS NOT NULL + ) AS tax_community_college_district_num, + FILTER( + ARRAY_AGG( + CASE + WHEN minor_type = 'COMM COLL' THEN agency_name + END + ), + x -> x IS NOT NULL + ) AS tax_community_college_district_name, + FILTER( + ARRAY_AGG( + CASE WHEN minor_type = 'FIRE' THEN agency_num END + ), + x -> x IS NOT NULL + ) AS tax_fire_protection_district_num, + FILTER( + ARRAY_AGG( + CASE WHEN minor_type = 'FIRE' THEN agency_name END + ), + x -> x IS NOT NULL + ) AS tax_fire_protection_district_name, + FILTER( + ARRAY_AGG( + CASE + WHEN minor_type = 'LIBRARY' THEN agency_num + END + ), + x -> x IS NOT NULL + ) AS tax_library_district_num, + FILTER( + ARRAY_AGG( + CASE + WHEN minor_type = 'LIBRARY' THEN agency_name + END + ), + x -> x IS NOT NULL + ) AS tax_library_district_name, + FILTER( + ARRAY_AGG( + CASE WHEN minor_type = 'PARK' THEN agency_num END + ), + x -> x IS NOT NULL + ) AS tax_park_district_num, + FILTER( + ARRAY_AGG( + CASE WHEN minor_type = 'PARK' THEN agency_name END + ), + x -> x IS NOT NULL + ) AS tax_park_district_name, + FILTER( + ARRAY_AGG( + CASE + WHEN minor_type = 'SANITARY' THEN agency_num + END + ), + x -> x IS NOT NULL + ) AS tax_sanitation_district_num, + FILTER( + ARRAY_AGG( + CASE + WHEN minor_type = 'SANITARY' THEN agency_name + END + ), + x -> x IS NOT NULL + ) AS tax_sanitation_district_name, + FILTER( + ARRAY_AGG( + CASE WHEN minor_type = 'SSA' THEN agency_num END + ), + x -> x IS NOT NULL + ) AS tax_special_service_area_num, + FILTER( + ARRAY_AGG( + CASE WHEN minor_type = 'SSA' THEN agency_name END + ), + x -> x IS NOT NULL + ) AS tax_special_service_area_name, + FILTER( + ARRAY_AGG( + CASE WHEN minor_type = 'TIF' THEN agency_num END + ), + x -> x IS NOT NULL + ) AS tax_tif_district_num, + FILTER( + ARRAY_AGG( + CASE WHEN minor_type = 'TIF' THEN agency_name END + ), + x -> x IS NOT NULL + ) AS tax_tif_district_name, + tax_data_year, + year + FROM long + GROUP BY pin10, year, tax_data_year ) -SELECT * FROM tax +SELECT + pcl.pin10, + wide.tax_municipality_num, + wide.tax_municipality_name, + wide.tax_school_elementary_district_num, + wide.tax_school_elementary_district_name, + wide.tax_school_secondary_district_num, + wide.tax_school_secondary_district_name, + wide.tax_school_unified_district_num, + wide.tax_school_unified_district_name, + wide.tax_community_college_district_num, + wide.tax_community_college_district_name, + wide.tax_fire_protection_district_num, + wide.tax_fire_protection_district_name, + wide.tax_library_district_num, + wide.tax_library_district_name, + wide.tax_park_district_num, + wide.tax_park_district_name, + wide.tax_sanitation_district_num, + wide.tax_sanitation_district_name, + wide.tax_special_service_area_num, + wide.tax_special_service_area_name, + wide.tax_tif_district_num, + wide.tax_tif_district_name, + wide.tax_data_year, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +LEFT JOIN wide + ON pcl.pin10 = wide.pin10 + -- Join syntax here forward fills with most recent non-null value. + AND ( + CASE WHEN pcl.year > (SELECT MAX(year) FROM wide) + THEN (SELECT MAX(year) FROM wide) + ELSE pcl.year + END = wide.year + ) diff --git a/aws-athena/ctas/proximity-cnt_pin_num_bus_stop.sql b/aws-athena/ctas/proximity-cnt_pin_num_bus_stop.sql index ede012e63..4f6e10449 100644 --- a/aws-athena/ctas/proximity-cnt_pin_num_bus_stop.sql +++ b/aws-athena/ctas/proximity-cnt_pin_num_bus_stop.sql @@ -9,52 +9,48 @@ ) }} -WITH cnt_pin_num_bus_stop AS ( - WITH distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435 - FROM {{ source('spatial', 'parcel') }} - ), +WITH distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435 + FROM {{ source('spatial', 'parcel') }} +), - distinct_years_rhs AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'transit_stop') }} - WHERE route_type = 3 - ), +distinct_years_rhs AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'transit_stop') }} + WHERE route_type = 3 +), - stop_locations AS ( - SELECT * - FROM {{ source('spatial', 'transit_stop') }} - WHERE route_type = 3 - ), - - xy_stop_counts AS ( - SELECT - dp.x_3435, - dp.y_3435, - loc.year, - COUNT(*) AS num_bus_stop_in_half_mile - FROM distinct_pins AS dp - INNER JOIN stop_locations AS loc - ON ST_CONTAINS( - ST_BUFFER(ST_GEOMFROMBINARY(loc.geometry_3435), 2640), - ST_POINT(dp.x_3435, dp.y_3435) - ) - GROUP BY dp.x_3435, dp.y_3435, loc.year - ) +stop_locations AS ( + SELECT * + FROM {{ source('spatial', 'transit_stop') }} + WHERE route_type = 3 +), +xy_stop_counts AS ( SELECT - pcl.pin10, - COALESCE(xy.num_bus_stop_in_half_mile, 0) AS num_bus_stop_in_half_mile, - xy.year AS num_bus_stop_data_year, - pcl.year - FROM {{ source('spatial', 'parcel') }} AS pcl - LEFT JOIN xy_stop_counts AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.year - WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs) + dp.x_3435, + dp.y_3435, + loc.year, + COUNT(*) AS num_bus_stop_in_half_mile + FROM distinct_pins AS dp + INNER JOIN stop_locations AS loc + ON ST_CONTAINS( + ST_BUFFER(ST_GEOMFROMBINARY(loc.geometry_3435), 2640), + ST_POINT(dp.x_3435, dp.y_3435) + ) + GROUP BY dp.x_3435, dp.y_3435, loc.year ) -SELECT * FROM cnt_pin_num_bus_stop +SELECT + pcl.pin10, + COALESCE(xy.num_bus_stop_in_half_mile, 0) AS num_bus_stop_in_half_mile, + xy.year AS num_bus_stop_data_year, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +LEFT JOIN xy_stop_counts AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.year +WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs) diff --git a/aws-athena/ctas/proximity-cnt_pin_num_foreclosure.sql b/aws-athena/ctas/proximity-cnt_pin_num_foreclosure.sql index 6dc53d876..a17603fbf 100644 --- a/aws-athena/ctas/proximity-cnt_pin_num_foreclosure.sql +++ b/aws-athena/ctas/proximity-cnt_pin_num_foreclosure.sql @@ -9,84 +9,80 @@ ) }} -WITH cnt_pin_num_foreclosure AS ( - WITH pin_locations AS ( - SELECT - pin10, - year, - x_3435, - y_3435, - ST_POINT(x_3435, y_3435) AS point - FROM {{ source('spatial', 'parcel') }} - ), - - distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435 - FROM pin_locations - ), - - foreclosure_locations AS ( - SELECT - *, - ST_BUFFER(ST_GEOMFROMBINARY(geometry_3435), 2640) AS buffer - FROM sale.foreclosure - ), +WITH pin_locations AS ( + SELECT + pin10, + year, + x_3435, + y_3435, + ST_POINT(x_3435, y_3435) AS point + FROM {{ source('spatial', 'parcel') }} +), - pins_in_buffers AS ( - SELECT - pl.pin10, - pl.year, - COUNT(*) AS num_foreclosure_in_half_mile_past_5_years - FROM pin_locations AS pl - INNER JOIN foreclosure_locations AS loc - ON YEAR(loc.foreclosure_recording_date) - BETWEEN CAST(pl.year AS INT) - 5 AND CAST(pl.year AS INT) - AND ST_CONTAINS(loc.buffer, pl.point) - GROUP BY pl.pin10, pl.year - ), +distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435 + FROM pin_locations +), - pin_counts_in_half_mile AS ( - SELECT - dp.x_3435, - dp.y_3435, - pl.year, - COUNT(*) AS num_pin_in_half_mile - FROM distinct_pins AS dp - INNER JOIN pin_locations AS pl - ON ST_CONTAINS( - ST_BUFFER(ST_POINT(dp.x_3435, dp.y_3435), 2640), pl.point - ) - GROUP BY dp.x_3435, dp.y_3435, pl.year - ) +foreclosure_locations AS ( + SELECT + *, + ST_BUFFER(ST_GEOMFROMBINARY(geometry_3435), 2640) AS buffer + FROM sale.foreclosure +), +pins_in_buffers AS ( SELECT pl.pin10, - COALESCE( - pib.num_foreclosure_in_half_mile_past_5_years, - 0 - ) AS num_foreclosure_in_half_mile_past_5_years, - COALESCE(pc.num_pin_in_half_mile, 1) AS num_pin_in_half_mile, - ROUND( - CAST(pib.num_foreclosure_in_half_mile_past_5_years AS DOUBLE) / ( - CAST(pc.num_pin_in_half_mile AS DOUBLE) / 1000 - ), 2 - ) AS num_foreclosure_per_1000_pin_past_5_years, - CONCAT( - CAST(CAST(pl.year AS INT) - 5 AS VARCHAR), - ' - ', - CAST(pl.year AS VARCHAR) - ) AS num_foreclosure_data_year, - pl.year + pl.year, + COUNT(*) AS num_foreclosure_in_half_mile_past_5_years FROM pin_locations AS pl - LEFT JOIN pins_in_buffers AS pib - ON pl.pin10 = pib.pin10 - AND pl.year = pib.year - LEFT JOIN pin_counts_in_half_mile AS pc - ON pl.x_3435 = pc.x_3435 - AND pl.y_3435 = pc.y_3435 - AND pl.year = pc.year + INNER JOIN foreclosure_locations AS loc + ON YEAR(loc.foreclosure_recording_date) + BETWEEN CAST(pl.year AS INT) - 5 AND CAST(pl.year AS INT) + AND ST_CONTAINS(loc.buffer, pl.point) + GROUP BY pl.pin10, pl.year +), + +pin_counts_in_half_mile AS ( + SELECT + dp.x_3435, + dp.y_3435, + pl.year, + COUNT(*) AS num_pin_in_half_mile + FROM distinct_pins AS dp + INNER JOIN pin_locations AS pl + ON ST_CONTAINS( + ST_BUFFER(ST_POINT(dp.x_3435, dp.y_3435), 2640), pl.point + ) + GROUP BY dp.x_3435, dp.y_3435, pl.year ) -SELECT * FROM cnt_pin_num_foreclosure +SELECT + pl.pin10, + COALESCE( + pib.num_foreclosure_in_half_mile_past_5_years, + 0 + ) AS num_foreclosure_in_half_mile_past_5_years, + COALESCE(pc.num_pin_in_half_mile, 1) AS num_pin_in_half_mile, + ROUND( + CAST(pib.num_foreclosure_in_half_mile_past_5_years AS DOUBLE) / ( + CAST(pc.num_pin_in_half_mile AS DOUBLE) / 1000 + ), 2 + ) AS num_foreclosure_per_1000_pin_past_5_years, + CONCAT( + CAST(CAST(pl.year AS INT) - 5 AS VARCHAR), + ' - ', + CAST(pl.year AS VARCHAR) + ) AS num_foreclosure_data_year, + pl.year +FROM pin_locations AS pl +LEFT JOIN pins_in_buffers AS pib + ON pl.pin10 = pib.pin10 + AND pl.year = pib.year +LEFT JOIN pin_counts_in_half_mile AS pc + ON pl.x_3435 = pc.x_3435 + AND pl.y_3435 = pc.y_3435 + AND pl.year = pc.year diff --git a/aws-athena/ctas/proximity-cnt_pin_num_school.sql b/aws-athena/ctas/proximity-cnt_pin_num_school.sql index 54bed48e6..0f0cd2f61 100644 --- a/aws-athena/ctas/proximity-cnt_pin_num_school.sql +++ b/aws-athena/ctas/proximity-cnt_pin_num_school.sql @@ -10,125 +10,121 @@ ) }} -WITH cnt_pin_num_school AS ( - WITH distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435 - FROM {{ source('spatial', 'parcel') }} - ), +WITH distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435 + FROM {{ source('spatial', 'parcel') }} +), - distinct_years AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'parcel') }} - ), +distinct_years AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'parcel') }} +), - distinct_years_rhs AS ( - SELECT DISTINCT year - FROM {{ source('other', 'great_schools_rating') }} - ), +distinct_years_rhs AS ( + SELECT DISTINCT year + FROM {{ source('other', 'great_schools_rating') }} +), - school_locations AS ( +school_locations AS ( + SELECT + fill_years.pin_year, + fill_data.* + FROM ( SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('other', 'great_schools_rating') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN {{ source('other', 'great_schools_rating') }} AS fill_data - ON fill_years.fill_year = fill_data.year - ), - - school_locations_public AS ( - SELECT * - FROM school_locations - WHERE type = 'public' - ), + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('other', 'great_schools_rating') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN {{ source('other', 'great_schools_rating') }} AS fill_data + ON fill_years.fill_year = fill_data.year +), - school_locations_other AS ( - SELECT * - FROM school_locations - WHERE type != 'public' - ), +school_locations_public AS ( + SELECT * + FROM school_locations + WHERE type = 'public' +), - school_ratings AS ( - SELECT DISTINCT - dp.x_3435, - dp.y_3435, - pub.rating, - pub.pin_year, - pub.year - FROM distinct_pins AS dp - -- Keep only public schools with 1/2 mile WITHIN each PIN's district - INNER JOIN {{ source('spatial', 'school_district') }} AS dis - ON ST_CONTAINS( - ST_GEOMFROMBINARY(dis.geometry_3435), - ST_POINT(dp.x_3435, dp.y_3435) - ) - INNER JOIN school_locations_public AS pub - ON ST_CONTAINS( - ST_BUFFER(ST_GEOMFROMBINARY(pub.geometry_3435), 2640), - ST_POINT(dp.x_3435, dp.y_3435) - ) - WHERE dis.geoid = pub.district_geoid - UNION ALL - -- Any and all private schools within 1/2 mile - SELECT - dp.x_3435, - dp.y_3435, - oth.rating, - oth.pin_year, - oth.year - FROM distinct_pins AS dp - INNER JOIN school_locations_other AS oth - ON ST_CONTAINS( - ST_BUFFER(ST_GEOMFROMBINARY(oth.geometry_3435), 2640), - ST_POINT(dp.x_3435, dp.y_3435) - ) - ), +school_locations_other AS ( + SELECT * + FROM school_locations + WHERE type != 'public' +), - school_ratings_agg AS ( - SELECT - pin_year, - x_3435, - y_3435, - COUNT(*) AS num_school_in_half_mile, - SUM( - CASE - WHEN rating IS NOT NULL THEN 1 - ELSE 0 - END - ) AS num_school_with_rating_in_half_mile, - AVG(rating) AS avg_school_rating_in_half_mile, - MAX(year) AS num_school_data_year, - MAX(year) AS num_school_rating_data_year - FROM school_ratings - GROUP BY x_3435, y_3435, pin_year - ) +school_ratings AS ( + SELECT DISTINCT + dp.x_3435, + dp.y_3435, + pub.rating, + pub.pin_year, + pub.year + FROM distinct_pins AS dp + -- Keep only public schools with 1/2 mile WITHIN each PIN's district + INNER JOIN {{ source('spatial', 'school_district') }} AS dis + ON ST_CONTAINS( + ST_GEOMFROMBINARY(dis.geometry_3435), + ST_POINT(dp.x_3435, dp.y_3435) + ) + INNER JOIN school_locations_public AS pub + ON ST_CONTAINS( + ST_BUFFER(ST_GEOMFROMBINARY(pub.geometry_3435), 2640), + ST_POINT(dp.x_3435, dp.y_3435) + ) + WHERE dis.geoid = pub.district_geoid + UNION ALL + -- Any and all private schools within 1/2 mile + SELECT + dp.x_3435, + dp.y_3435, + oth.rating, + oth.pin_year, + oth.year + FROM distinct_pins AS dp + INNER JOIN school_locations_other AS oth + ON ST_CONTAINS( + ST_BUFFER(ST_GEOMFROMBINARY(oth.geometry_3435), 2640), + ST_POINT(dp.x_3435, dp.y_3435) + ) +), +school_ratings_agg AS ( SELECT - pcl.pin10, - COALESCE(sr.num_school_in_half_mile, 0) AS num_school_in_half_mile, - COALESCE( - sr.num_school_with_rating_in_half_mile, - 0 + pin_year, + x_3435, + y_3435, + COUNT(*) AS num_school_in_half_mile, + SUM( + CASE + WHEN rating IS NOT NULL THEN 1 + ELSE 0 + END ) AS num_school_with_rating_in_half_mile, - sr.avg_school_rating_in_half_mile, - sr.num_school_data_year, - sr.num_school_rating_data_year, - pcl.year - FROM {{ source('spatial', 'parcel') }} AS pcl - LEFT JOIN school_ratings_agg AS sr - ON pcl.x_3435 = sr.x_3435 - AND pcl.y_3435 = sr.y_3435 - AND pcl.year = sr.pin_year - WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs) + AVG(rating) AS avg_school_rating_in_half_mile, + MAX(year) AS num_school_data_year, + MAX(year) AS num_school_rating_data_year + FROM school_ratings + GROUP BY x_3435, y_3435, pin_year ) -SELECT * FROM cnt_pin_num_school +SELECT + pcl.pin10, + COALESCE(sr.num_school_in_half_mile, 0) AS num_school_in_half_mile, + COALESCE( + sr.num_school_with_rating_in_half_mile, + 0 + ) AS num_school_with_rating_in_half_mile, + sr.avg_school_rating_in_half_mile, + sr.num_school_data_year, + sr.num_school_rating_data_year, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +LEFT JOIN school_ratings_agg AS sr + ON pcl.x_3435 = sr.x_3435 + AND pcl.y_3435 = sr.y_3435 + AND pcl.year = sr.pin_year +WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs) diff --git a/aws-athena/ctas/proximity-crosswalk_year_fill.sql b/aws-athena/ctas/proximity-crosswalk_year_fill.sql index 27271d533..2dddfb99d 100644 --- a/aws-athena/ctas/proximity-crosswalk_year_fill.sql +++ b/aws-athena/ctas/proximity-crosswalk_year_fill.sql @@ -9,253 +9,249 @@ missing equivalent proximity data are filled thus: */ {{ config(materialized='table') }} -WITH crosswalk_year_fill AS ( - WITH unfilled AS ( - SELECT - pin.year, - MAX(cnt_pin_num_bus_stop.num_bus_stop_data_year) - AS num_bus_stop_data_year, - MAX(cnt_pin_num_foreclosure.num_foreclosure_data_year) - AS num_foreclosure_data_year, - MAX(cnt_pin_num_school.num_school_data_year) - AS num_school_data_year, - MAX(cnt_pin_num_school.num_school_rating_data_year) - AS num_school_rating_data_year, - MAX(dist_pin_to_bike_trail.nearest_bike_trail_data_year) - AS nearest_bike_trail_data_year, - MAX(dist_pin_to_cemetery.nearest_cemetery_data_year) - AS nearest_cemetery_data_year, - MAX(dist_pin_to_cta_route.nearest_cta_route_data_year) - AS nearest_cta_route_data_year, - MAX(dist_pin_to_cta_stop.nearest_cta_stop_data_year) - AS nearest_cta_stop_data_year, - MAX(dist_pin_to_golf_course.nearest_golf_course_data_year) - AS nearest_golf_course_data_year, - MAX(dist_pin_to_hospital.nearest_hospital_data_year) - AS nearest_hospital_data_year, - MAX(dist_pin_to_lake_michigan.lake_michigan_data_year) - AS lake_michigan_data_year, - MAX(dist_pin_to_major_road.nearest_major_road_data_year) - AS nearest_major_road_data_year, - MAX(dist_pin_to_metra_route.nearest_metra_route_data_year) - AS nearest_metra_route_data_year, - MAX(dist_pin_to_metra_stop.nearest_metra_stop_data_year) - AS nearest_metra_stop_data_year, - MAX(dist_pin_to_park.nearest_park_data_year) - AS nearest_park_data_year, - MAX(dist_pin_to_railroad.nearest_railroad_data_year) - AS nearest_railroad_data_year, - MAX(dist_pin_to_water.nearest_water_data_year) - AS nearest_water_data_year - FROM - (SELECT DISTINCT year FROM {{ source('spatial', 'parcel') }}) AS pin - LEFT JOIN ( - SELECT DISTINCT - year, - num_bus_stop_data_year - FROM {{ ref('proximity.cnt_pin_num_bus_stop') }} - ) AS cnt_pin_num_bus_stop ON pin.year = cnt_pin_num_bus_stop.year - LEFT JOIN ( - SELECT DISTINCT - year, - -- Foreclosure data year descripes a range of years, - -- which can't be used for joins - SUBSTR(num_foreclosure_data_year, 8, 11) - AS num_foreclosure_data_year - FROM {{ ref('proximity.cnt_pin_num_foreclosure') }} - ) AS cnt_pin_num_foreclosure ON pin.year = cnt_pin_num_foreclosure.year - LEFT JOIN ( - SELECT DISTINCT - year, - num_school_data_year, - num_school_rating_data_year - FROM {{ ref('proximity.cnt_pin_num_school') }} - ) AS cnt_pin_num_school ON pin.year = cnt_pin_num_school.year - LEFT JOIN ( - SELECT DISTINCT - year, - nearest_bike_trail_data_year - FROM {{ ref('proximity.dist_pin_to_bike_trail') }} - ) AS dist_pin_to_bike_trail ON pin.year = dist_pin_to_bike_trail.year - LEFT JOIN ( - SELECT DISTINCT - year, - nearest_cemetery_data_year - FROM {{ ref('proximity.dist_pin_to_cemetery') }} - ) AS dist_pin_to_cemetery ON pin.year = dist_pin_to_cemetery.year - LEFT JOIN ( - SELECT DISTINCT - year, - nearest_cta_route_data_year - FROM {{ ref('proximity.dist_pin_to_cta_route') }} - ) AS dist_pin_to_cta_route ON pin.year = dist_pin_to_cta_route.year - LEFT JOIN ( - SELECT DISTINCT - year, - nearest_cta_stop_data_year - FROM {{ ref('proximity.dist_pin_to_cta_stop') }} - ) AS dist_pin_to_cta_stop ON pin.year = dist_pin_to_cta_stop.year - LEFT JOIN ( - SELECT DISTINCT - year, - nearest_golf_course_data_year - FROM {{ ref('proximity.dist_pin_to_golf_course') }} - ) AS dist_pin_to_golf_course ON pin.year = dist_pin_to_golf_course.year - LEFT JOIN ( - SELECT DISTINCT - year, - nearest_hospital_data_year - FROM {{ ref('proximity.dist_pin_to_hospital') }} - ) AS dist_pin_to_hospital ON pin.year = dist_pin_to_hospital.year - LEFT JOIN ( - SELECT DISTINCT - year, - lake_michigan_data_year - FROM {{ ref('proximity.dist_pin_to_lake_michigan') }} - ) AS dist_pin_to_lake_michigan - ON pin.year = dist_pin_to_lake_michigan.year - LEFT JOIN ( - SELECT DISTINCT - year, - nearest_major_road_data_year - FROM {{ ref('proximity.dist_pin_to_major_road') }} - ) AS dist_pin_to_major_road ON pin.year = dist_pin_to_major_road.year - LEFT JOIN ( - SELECT DISTINCT - year, - nearest_metra_route_data_year - FROM {{ ref('proximity.dist_pin_to_metra_route') }} - ) AS dist_pin_to_metra_route ON pin.year = dist_pin_to_metra_route.year - LEFT JOIN ( - SELECT DISTINCT - year, - nearest_metra_stop_data_year - FROM {{ ref('proximity.dist_pin_to_metra_stop') }} - ) AS dist_pin_to_metra_stop ON pin.year = dist_pin_to_metra_stop.year - LEFT JOIN ( - SELECT DISTINCT - year, - nearest_park_data_year - FROM {{ ref('proximity.dist_pin_to_park') }} - ) AS dist_pin_to_park ON pin.year = dist_pin_to_park.year - LEFT JOIN ( - SELECT DISTINCT - year, - nearest_railroad_data_year - FROM {{ ref('proximity.dist_pin_to_railroad') }} - ) AS dist_pin_to_railroad ON pin.year = dist_pin_to_railroad.year - LEFT JOIN ( - SELECT DISTINCT - year, - nearest_water_data_year - FROM {{ ref('proximity.dist_pin_to_water') }} - ) AS dist_pin_to_water ON pin.year = dist_pin_to_water.year - - GROUP BY pin.year - ) - +WITH unfilled AS ( SELECT - year, - COALESCE( - num_bus_stop_data_year, LAST_VALUE(num_bus_stop_data_year) - IGNORE NULLS - OVER (ORDER BY year DESC) - ) AS num_bus_stop_data_year, - COALESCE( - num_foreclosure_data_year, - LAST_VALUE(num_foreclosure_data_year) - IGNORE NULLS - OVER (ORDER BY year DESC) - ) AS num_foreclosure_data_year, - COALESCE( - num_school_data_year, LAST_VALUE(num_school_data_year) - IGNORE NULLS - OVER (ORDER BY year DESC) - ) AS num_school_data_year, - COALESCE( - num_school_rating_data_year, - LAST_VALUE(num_school_rating_data_year) - IGNORE NULLS - OVER (ORDER BY year DESC) - ) AS num_school_rating_data_year, - COALESCE( - nearest_bike_trail_data_year, - LAST_VALUE(nearest_bike_trail_data_year) - IGNORE NULLS - OVER (ORDER BY year DESC) - ) AS nearest_bike_trail_data_year, - COALESCE( - nearest_cemetery_data_year, - LAST_VALUE(nearest_cemetery_data_year) - IGNORE NULLS - OVER (ORDER BY year DESC) - ) AS nearest_cemetery_data_year, - COALESCE( - nearest_cta_route_data_year, - LAST_VALUE(nearest_cta_route_data_year) - IGNORE NULLS - OVER (ORDER BY year DESC) - ) AS nearest_cta_route_data_year, - COALESCE( - nearest_cta_stop_data_year, - LAST_VALUE(nearest_cta_stop_data_year) - IGNORE NULLS - OVER (ORDER BY year DESC) - ) AS nearest_cta_stop_data_year, - COALESCE( - nearest_golf_course_data_year, - LAST_VALUE(nearest_golf_course_data_year) - IGNORE NULLS - OVER (ORDER BY year DESC) - ) AS nearest_golf_course_data_year, - COALESCE( - nearest_hospital_data_year, - LAST_VALUE(nearest_hospital_data_year) - IGNORE NULLS - OVER (ORDER BY year DESC) - ) AS nearest_hospital_data_year, - COALESCE( - lake_michigan_data_year, LAST_VALUE(lake_michigan_data_year) - IGNORE NULLS - OVER (ORDER BY year DESC) - ) AS lake_michigan_data_year, - COALESCE( - nearest_major_road_data_year, - LAST_VALUE(nearest_major_road_data_year) - IGNORE NULLS - OVER (ORDER BY year DESC) - ) AS nearest_major_road_data_year, - COALESCE( - nearest_metra_route_data_year, - LAST_VALUE(nearest_metra_route_data_year) - IGNORE NULLS - OVER (ORDER BY year DESC) - ) AS nearest_metra_route_data_year, - COALESCE( - nearest_metra_stop_data_year, - LAST_VALUE(nearest_metra_stop_data_year) - IGNORE NULLS - OVER (ORDER BY year DESC) - ) AS nearest_metra_stop_data_year, - COALESCE( - nearest_park_data_year, LAST_VALUE(nearest_park_data_year) - IGNORE NULLS - OVER (ORDER BY year DESC) - ) AS nearest_park_data_year, - COALESCE( - nearest_railroad_data_year, - LAST_VALUE(nearest_railroad_data_year) - IGNORE NULLS - OVER (ORDER BY year DESC) - ) AS nearest_railroad_data_year, - COALESCE( - nearest_water_data_year, LAST_VALUE(nearest_water_data_year) - IGNORE NULLS - OVER (ORDER BY year DESC) - ) AS nearest_water_data_year + pin.year, + MAX(cnt_pin_num_bus_stop.num_bus_stop_data_year) + AS num_bus_stop_data_year, + MAX(cnt_pin_num_foreclosure.num_foreclosure_data_year) + AS num_foreclosure_data_year, + MAX(cnt_pin_num_school.num_school_data_year) + AS num_school_data_year, + MAX(cnt_pin_num_school.num_school_rating_data_year) + AS num_school_rating_data_year, + MAX(dist_pin_to_bike_trail.nearest_bike_trail_data_year) + AS nearest_bike_trail_data_year, + MAX(dist_pin_to_cemetery.nearest_cemetery_data_year) + AS nearest_cemetery_data_year, + MAX(dist_pin_to_cta_route.nearest_cta_route_data_year) + AS nearest_cta_route_data_year, + MAX(dist_pin_to_cta_stop.nearest_cta_stop_data_year) + AS nearest_cta_stop_data_year, + MAX(dist_pin_to_golf_course.nearest_golf_course_data_year) + AS nearest_golf_course_data_year, + MAX(dist_pin_to_hospital.nearest_hospital_data_year) + AS nearest_hospital_data_year, + MAX(dist_pin_to_lake_michigan.lake_michigan_data_year) + AS lake_michigan_data_year, + MAX(dist_pin_to_major_road.nearest_major_road_data_year) + AS nearest_major_road_data_year, + MAX(dist_pin_to_metra_route.nearest_metra_route_data_year) + AS nearest_metra_route_data_year, + MAX(dist_pin_to_metra_stop.nearest_metra_stop_data_year) + AS nearest_metra_stop_data_year, + MAX(dist_pin_to_park.nearest_park_data_year) + AS nearest_park_data_year, + MAX(dist_pin_to_railroad.nearest_railroad_data_year) + AS nearest_railroad_data_year, + MAX(dist_pin_to_water.nearest_water_data_year) + AS nearest_water_data_year + FROM + (SELECT DISTINCT year FROM {{ source('spatial', 'parcel') }}) AS pin + LEFT JOIN ( + SELECT DISTINCT + year, + num_bus_stop_data_year + FROM {{ ref('proximity.cnt_pin_num_bus_stop') }} + ) AS cnt_pin_num_bus_stop ON pin.year = cnt_pin_num_bus_stop.year + LEFT JOIN ( + SELECT DISTINCT + year, + -- Foreclosure data year descripes a range of years, + -- which can't be used for joins + SUBSTR(num_foreclosure_data_year, 8, 11) + AS num_foreclosure_data_year + FROM {{ ref('proximity.cnt_pin_num_foreclosure') }} + ) AS cnt_pin_num_foreclosure ON pin.year = cnt_pin_num_foreclosure.year + LEFT JOIN ( + SELECT DISTINCT + year, + num_school_data_year, + num_school_rating_data_year + FROM {{ ref('proximity.cnt_pin_num_school') }} + ) AS cnt_pin_num_school ON pin.year = cnt_pin_num_school.year + LEFT JOIN ( + SELECT DISTINCT + year, + nearest_bike_trail_data_year + FROM {{ ref('proximity.dist_pin_to_bike_trail') }} + ) AS dist_pin_to_bike_trail ON pin.year = dist_pin_to_bike_trail.year + LEFT JOIN ( + SELECT DISTINCT + year, + nearest_cemetery_data_year + FROM {{ ref('proximity.dist_pin_to_cemetery') }} + ) AS dist_pin_to_cemetery ON pin.year = dist_pin_to_cemetery.year + LEFT JOIN ( + SELECT DISTINCT + year, + nearest_cta_route_data_year + FROM {{ ref('proximity.dist_pin_to_cta_route') }} + ) AS dist_pin_to_cta_route ON pin.year = dist_pin_to_cta_route.year + LEFT JOIN ( + SELECT DISTINCT + year, + nearest_cta_stop_data_year + FROM {{ ref('proximity.dist_pin_to_cta_stop') }} + ) AS dist_pin_to_cta_stop ON pin.year = dist_pin_to_cta_stop.year + LEFT JOIN ( + SELECT DISTINCT + year, + nearest_golf_course_data_year + FROM {{ ref('proximity.dist_pin_to_golf_course') }} + ) AS dist_pin_to_golf_course ON pin.year = dist_pin_to_golf_course.year + LEFT JOIN ( + SELECT DISTINCT + year, + nearest_hospital_data_year + FROM {{ ref('proximity.dist_pin_to_hospital') }} + ) AS dist_pin_to_hospital ON pin.year = dist_pin_to_hospital.year + LEFT JOIN ( + SELECT DISTINCT + year, + lake_michigan_data_year + FROM {{ ref('proximity.dist_pin_to_lake_michigan') }} + ) AS dist_pin_to_lake_michigan + ON pin.year = dist_pin_to_lake_michigan.year + LEFT JOIN ( + SELECT DISTINCT + year, + nearest_major_road_data_year + FROM {{ ref('proximity.dist_pin_to_major_road') }} + ) AS dist_pin_to_major_road ON pin.year = dist_pin_to_major_road.year + LEFT JOIN ( + SELECT DISTINCT + year, + nearest_metra_route_data_year + FROM {{ ref('proximity.dist_pin_to_metra_route') }} + ) AS dist_pin_to_metra_route ON pin.year = dist_pin_to_metra_route.year + LEFT JOIN ( + SELECT DISTINCT + year, + nearest_metra_stop_data_year + FROM {{ ref('proximity.dist_pin_to_metra_stop') }} + ) AS dist_pin_to_metra_stop ON pin.year = dist_pin_to_metra_stop.year + LEFT JOIN ( + SELECT DISTINCT + year, + nearest_park_data_year + FROM {{ ref('proximity.dist_pin_to_park') }} + ) AS dist_pin_to_park ON pin.year = dist_pin_to_park.year + LEFT JOIN ( + SELECT DISTINCT + year, + nearest_railroad_data_year + FROM {{ ref('proximity.dist_pin_to_railroad') }} + ) AS dist_pin_to_railroad ON pin.year = dist_pin_to_railroad.year + LEFT JOIN ( + SELECT DISTINCT + year, + nearest_water_data_year + FROM {{ ref('proximity.dist_pin_to_water') }} + ) AS dist_pin_to_water ON pin.year = dist_pin_to_water.year - FROM unfilled - ORDER BY year + GROUP BY pin.year ) -SELECT * FROM crosswalk_year_fill +SELECT + year, + COALESCE( + num_bus_stop_data_year, LAST_VALUE(num_bus_stop_data_year) + IGNORE NULLS + OVER (ORDER BY year DESC) + ) AS num_bus_stop_data_year, + COALESCE( + num_foreclosure_data_year, + LAST_VALUE(num_foreclosure_data_year) + IGNORE NULLS + OVER (ORDER BY year DESC) + ) AS num_foreclosure_data_year, + COALESCE( + num_school_data_year, LAST_VALUE(num_school_data_year) + IGNORE NULLS + OVER (ORDER BY year DESC) + ) AS num_school_data_year, + COALESCE( + num_school_rating_data_year, + LAST_VALUE(num_school_rating_data_year) + IGNORE NULLS + OVER (ORDER BY year DESC) + ) AS num_school_rating_data_year, + COALESCE( + nearest_bike_trail_data_year, + LAST_VALUE(nearest_bike_trail_data_year) + IGNORE NULLS + OVER (ORDER BY year DESC) + ) AS nearest_bike_trail_data_year, + COALESCE( + nearest_cemetery_data_year, + LAST_VALUE(nearest_cemetery_data_year) + IGNORE NULLS + OVER (ORDER BY year DESC) + ) AS nearest_cemetery_data_year, + COALESCE( + nearest_cta_route_data_year, + LAST_VALUE(nearest_cta_route_data_year) + IGNORE NULLS + OVER (ORDER BY year DESC) + ) AS nearest_cta_route_data_year, + COALESCE( + nearest_cta_stop_data_year, + LAST_VALUE(nearest_cta_stop_data_year) + IGNORE NULLS + OVER (ORDER BY year DESC) + ) AS nearest_cta_stop_data_year, + COALESCE( + nearest_golf_course_data_year, + LAST_VALUE(nearest_golf_course_data_year) + IGNORE NULLS + OVER (ORDER BY year DESC) + ) AS nearest_golf_course_data_year, + COALESCE( + nearest_hospital_data_year, + LAST_VALUE(nearest_hospital_data_year) + IGNORE NULLS + OVER (ORDER BY year DESC) + ) AS nearest_hospital_data_year, + COALESCE( + lake_michigan_data_year, LAST_VALUE(lake_michigan_data_year) + IGNORE NULLS + OVER (ORDER BY year DESC) + ) AS lake_michigan_data_year, + COALESCE( + nearest_major_road_data_year, + LAST_VALUE(nearest_major_road_data_year) + IGNORE NULLS + OVER (ORDER BY year DESC) + ) AS nearest_major_road_data_year, + COALESCE( + nearest_metra_route_data_year, + LAST_VALUE(nearest_metra_route_data_year) + IGNORE NULLS + OVER (ORDER BY year DESC) + ) AS nearest_metra_route_data_year, + COALESCE( + nearest_metra_stop_data_year, + LAST_VALUE(nearest_metra_stop_data_year) + IGNORE NULLS + OVER (ORDER BY year DESC) + ) AS nearest_metra_stop_data_year, + COALESCE( + nearest_park_data_year, LAST_VALUE(nearest_park_data_year) + IGNORE NULLS + OVER (ORDER BY year DESC) + ) AS nearest_park_data_year, + COALESCE( + nearest_railroad_data_year, + LAST_VALUE(nearest_railroad_data_year) + IGNORE NULLS + OVER (ORDER BY year DESC) + ) AS nearest_railroad_data_year, + COALESCE( + nearest_water_data_year, LAST_VALUE(nearest_water_data_year) + IGNORE NULLS + OVER (ORDER BY year DESC) + ) AS nearest_water_data_year + +FROM unfilled +ORDER BY year diff --git a/aws-athena/ctas/proximity-dist_pin_to_bike_trail.sql b/aws-athena/ctas/proximity-dist_pin_to_bike_trail.sql index 3c295bc8a..dede10d58 100644 --- a/aws-athena/ctas/proximity-dist_pin_to_bike_trail.sql +++ b/aws-athena/ctas/proximity-dist_pin_to_bike_trail.sql @@ -8,90 +8,86 @@ ) }} -WITH dist_pin_to_bike_trail AS ( - WITH distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435 - FROM {{ source('spatial', 'parcel') }} - ), - - distinct_years AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'parcel') }} - ), +WITH distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435 + FROM {{ source('spatial', 'parcel') }} +), - bike_trail_location AS ( - SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'bike_trail') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN {{ source('spatial', 'bike_trail') }} AS fill_data - ON fill_years.fill_year = fill_data.year - ), +distinct_years AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'parcel') }} +), - distances AS ( +bike_trail_location AS ( + SELECT + fill_years.pin_year, + fill_data.* + FROM ( SELECT - dp.x_3435, - dp.y_3435, - loc.name, - loc.street, - loc.pin_year, - loc.year, - ST_DISTANCE( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(loc.geometry_3435) - ) AS distance - FROM distinct_pins AS dp - CROSS JOIN bike_trail_location AS loc - ), + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'bike_trail') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN {{ source('spatial', 'bike_trail') }} AS fill_data + ON fill_years.fill_year = fill_data.year +), - xy_to_bike_trail_dist AS ( - SELECT - d1.x_3435, - d1.y_3435, - d1.name, - d1.street, - d1.pin_year, - d1.year, - d2.dist_ft - FROM distances AS d1 - INNER JOIN ( - SELECT - x_3435, - y_3435, - pin_year, - MIN(distance) AS dist_ft - FROM distances - GROUP BY x_3435, y_3435, pin_year - ) AS d2 - ON d1.x_3435 = d2.x_3435 - AND d1.y_3435 = d2.y_3435 - AND d1.pin_year = d2.pin_year - AND d1.distance = d2.dist_ft - ) +distances AS ( + SELECT + dp.x_3435, + dp.y_3435, + loc.name, + loc.street, + loc.pin_year, + loc.year, + ST_DISTANCE( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(loc.geometry_3435) + ) AS distance + FROM distinct_pins AS dp + CROSS JOIN bike_trail_location AS loc +), +xy_to_bike_trail_dist AS ( SELECT - pcl.pin10, - ARBITRARY(xy.street) AS nearest_bike_trail_id, - ARBITRARY(xy.name) AS nearest_bike_trail_name, - ARBITRARY(xy.dist_ft) AS nearest_bike_trail_dist_ft, - ARBITRARY(xy.year) AS nearest_bike_trail_data_year, - pcl.year - FROM {{ source('spatial', 'parcel') }} AS pcl - INNER JOIN xy_to_bike_trail_dist AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year - GROUP BY pcl.pin10, pcl.year + d1.x_3435, + d1.y_3435, + d1.name, + d1.street, + d1.pin_year, + d1.year, + d2.dist_ft + FROM distances AS d1 + INNER JOIN ( + SELECT + x_3435, + y_3435, + pin_year, + MIN(distance) AS dist_ft + FROM distances + GROUP BY x_3435, y_3435, pin_year + ) AS d2 + ON d1.x_3435 = d2.x_3435 + AND d1.y_3435 = d2.y_3435 + AND d1.pin_year = d2.pin_year + AND d1.distance = d2.dist_ft ) -SELECT * FROM dist_pin_to_bike_trail +SELECT + pcl.pin10, + ARBITRARY(xy.street) AS nearest_bike_trail_id, + ARBITRARY(xy.name) AS nearest_bike_trail_name, + ARBITRARY(xy.dist_ft) AS nearest_bike_trail_dist_ft, + ARBITRARY(xy.year) AS nearest_bike_trail_data_year, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +INNER JOIN xy_to_bike_trail_dist AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year +GROUP BY pcl.pin10, pcl.year diff --git a/aws-athena/ctas/proximity-dist_pin_to_cemetery.sql b/aws-athena/ctas/proximity-dist_pin_to_cemetery.sql index a8865c21b..cabed33d7 100644 --- a/aws-athena/ctas/proximity-dist_pin_to_cemetery.sql +++ b/aws-athena/ctas/proximity-dist_pin_to_cemetery.sql @@ -8,91 +8,87 @@ ) }} -WITH dist_pin_to_cemetery AS ( - WITH distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435 - FROM {{ source('spatial', 'parcel') }} - ), - - distinct_years AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'parcel') }} - ), +WITH distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435 + FROM {{ source('spatial', 'parcel') }} +), - cemetery_location AS ( - SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'cemetery') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN {{ source('spatial', 'cemetery') }} AS fill_data - ON fill_years.fill_year = fill_data.year - ), +distinct_years AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'parcel') }} +), - distances AS ( +cemetery_location AS ( + SELECT + fill_years.pin_year, + fill_data.* + FROM ( SELECT - dp.x_3435, - dp.y_3435, - loc.name, - loc.gniscode, - loc.pin_year, - loc.year, - ST_DISTANCE( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(loc.geometry_3435) - ) AS distance - FROM distinct_pins AS dp - CROSS JOIN cemetery_location AS loc - ), + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'cemetery') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN {{ source('spatial', 'cemetery') }} AS fill_data + ON fill_years.fill_year = fill_data.year +), - xy_to_cemetery_dist AS ( - SELECT - d1.x_3435, - d1.y_3435, - d1.name, - d1.gniscode, - d1.pin_year, - d1.year, - d2.dist_ft - FROM distances AS d1 - INNER JOIN ( - SELECT - x_3435, - y_3435, - pin_year, - MIN(distance) AS dist_ft - FROM distances - GROUP BY x_3435, y_3435, pin_year - ) AS d2 - ON d1.x_3435 = d2.x_3435 - AND d1.y_3435 = d2.y_3435 - AND d1.pin_year = d2.pin_year - AND d1.distance = d2.dist_ft - ) +distances AS ( + SELECT + dp.x_3435, + dp.y_3435, + loc.name, + loc.gniscode, + loc.pin_year, + loc.year, + ST_DISTANCE( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(loc.geometry_3435) + ) AS distance + FROM distinct_pins AS dp + CROSS JOIN cemetery_location AS loc +), +xy_to_cemetery_dist AS ( SELECT - pcl.pin10, - CAST(CAST(ARBITRARY(xy.gniscode) AS BIGINT) AS VARCHAR) - AS nearest_cemetery_gnis_code, - ARBITRARY(xy.name) AS nearest_cemetery_name, - ARBITRARY(xy.dist_ft) AS nearest_cemetery_dist_ft, - ARBITRARY(xy.year) AS nearest_cemetery_data_year, - pcl.year - FROM {{ source('spatial', 'parcel') }} AS pcl - INNER JOIN xy_to_cemetery_dist AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year - GROUP BY pcl.pin10, pcl.year + d1.x_3435, + d1.y_3435, + d1.name, + d1.gniscode, + d1.pin_year, + d1.year, + d2.dist_ft + FROM distances AS d1 + INNER JOIN ( + SELECT + x_3435, + y_3435, + pin_year, + MIN(distance) AS dist_ft + FROM distances + GROUP BY x_3435, y_3435, pin_year + ) AS d2 + ON d1.x_3435 = d2.x_3435 + AND d1.y_3435 = d2.y_3435 + AND d1.pin_year = d2.pin_year + AND d1.distance = d2.dist_ft ) -SELECT * FROM dist_pin_to_cemetery +SELECT + pcl.pin10, + CAST(CAST(ARBITRARY(xy.gniscode) AS BIGINT) AS VARCHAR) + AS nearest_cemetery_gnis_code, + ARBITRARY(xy.name) AS nearest_cemetery_name, + ARBITRARY(xy.dist_ft) AS nearest_cemetery_dist_ft, + ARBITRARY(xy.year) AS nearest_cemetery_data_year, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +INNER JOIN xy_to_cemetery_dist AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year +GROUP BY pcl.pin10, pcl.year diff --git a/aws-athena/ctas/proximity-dist_pin_to_cta_route.sql b/aws-athena/ctas/proximity-dist_pin_to_cta_route.sql index 6f1dd75ad..41b390da2 100644 --- a/aws-athena/ctas/proximity-dist_pin_to_cta_route.sql +++ b/aws-athena/ctas/proximity-dist_pin_to_cta_route.sql @@ -8,92 +8,88 @@ ) }} -WITH dist_pin_to_cta_route AS ( - WITH distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435 - FROM {{ source('spatial', 'parcel') }} - ), - - distinct_years AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'parcel') }} - ), +WITH distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435 + FROM {{ source('spatial', 'parcel') }} +), - cta_route_location AS ( - SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'transit_route') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN {{ source('spatial', 'transit_route') }} AS fill_data - ON fill_years.fill_year = fill_data.year - WHERE fill_data.agency = 'cta' - AND fill_data.route_type = 1 - ), +distinct_years AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'parcel') }} +), - distances AS ( +cta_route_location AS ( + SELECT + fill_years.pin_year, + fill_data.* + FROM ( SELECT - dp.x_3435, - dp.y_3435, - loc.route_id, - loc.route_long_name, - loc.pin_year, - loc.year, - ST_DISTANCE( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(loc.geometry_3435) - ) AS distance - FROM distinct_pins AS dp - CROSS JOIN cta_route_location AS loc - ), + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'transit_route') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN {{ source('spatial', 'transit_route') }} AS fill_data + ON fill_years.fill_year = fill_data.year + WHERE fill_data.agency = 'cta' + AND fill_data.route_type = 1 +), - xy_to_cta_route_dist AS ( - SELECT - d1.x_3435, - d1.y_3435, - d1.route_id, - d1.route_long_name, - d1.pin_year, - d1.year, - d2.dist_ft - FROM distances AS d1 - INNER JOIN ( - SELECT - x_3435, - y_3435, - pin_year, - MIN(distance) AS dist_ft - FROM distances - GROUP BY x_3435, y_3435, pin_year - ) AS d2 - ON d1.x_3435 = d2.x_3435 - AND d1.y_3435 = d2.y_3435 - AND d1.pin_year = d2.pin_year - AND d1.distance = d2.dist_ft - ) +distances AS ( + SELECT + dp.x_3435, + dp.y_3435, + loc.route_id, + loc.route_long_name, + loc.pin_year, + loc.year, + ST_DISTANCE( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(loc.geometry_3435) + ) AS distance + FROM distinct_pins AS dp + CROSS JOIN cta_route_location AS loc +), +xy_to_cta_route_dist AS ( SELECT - pcl.pin10, - ARBITRARY(xy.route_id) AS nearest_cta_route_id, - ARBITRARY(xy.route_long_name) AS nearest_cta_route_name, - ARBITRARY(xy.dist_ft) AS nearest_cta_route_dist_ft, - ARBITRARY(xy.year) AS nearest_cta_route_data_year, - pcl.year - FROM {{ source('spatial', 'parcel') }} AS pcl - INNER JOIN xy_to_cta_route_dist AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year - GROUP BY pcl.pin10, pcl.year + d1.x_3435, + d1.y_3435, + d1.route_id, + d1.route_long_name, + d1.pin_year, + d1.year, + d2.dist_ft + FROM distances AS d1 + INNER JOIN ( + SELECT + x_3435, + y_3435, + pin_year, + MIN(distance) AS dist_ft + FROM distances + GROUP BY x_3435, y_3435, pin_year + ) AS d2 + ON d1.x_3435 = d2.x_3435 + AND d1.y_3435 = d2.y_3435 + AND d1.pin_year = d2.pin_year + AND d1.distance = d2.dist_ft ) -SELECT * FROM dist_pin_to_cta_route +SELECT + pcl.pin10, + ARBITRARY(xy.route_id) AS nearest_cta_route_id, + ARBITRARY(xy.route_long_name) AS nearest_cta_route_name, + ARBITRARY(xy.dist_ft) AS nearest_cta_route_dist_ft, + ARBITRARY(xy.year) AS nearest_cta_route_data_year, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +INNER JOIN xy_to_cta_route_dist AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year +GROUP BY pcl.pin10, pcl.year diff --git a/aws-athena/ctas/proximity-dist_pin_to_cta_stop.sql b/aws-athena/ctas/proximity-dist_pin_to_cta_stop.sql index 45fcf549b..c2b69c7c9 100644 --- a/aws-athena/ctas/proximity-dist_pin_to_cta_stop.sql +++ b/aws-athena/ctas/proximity-dist_pin_to_cta_stop.sql @@ -8,92 +8,88 @@ ) }} -WITH dist_pin_to_cta_stop AS ( - WITH distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435 - FROM {{ source('spatial', 'parcel') }} - ), - - distinct_years AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'parcel') }} - ), +WITH distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435 + FROM {{ source('spatial', 'parcel') }} +), - cta_stop_location AS ( - SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'transit_stop') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN {{ source('spatial', 'transit_stop') }} AS fill_data - ON fill_years.fill_year = fill_data.year - WHERE fill_data.agency = 'cta' - AND fill_data.route_type = 1 - ), +distinct_years AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'parcel') }} +), - distances AS ( +cta_stop_location AS ( + SELECT + fill_years.pin_year, + fill_data.* + FROM ( SELECT - dp.x_3435, - dp.y_3435, - loc.stop_id, - loc.stop_name, - loc.pin_year, - loc.year, - ST_DISTANCE( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(loc.geometry_3435) - ) AS distance - FROM distinct_pins AS dp - CROSS JOIN cta_stop_location AS loc - ), + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'transit_stop') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN {{ source('spatial', 'transit_stop') }} AS fill_data + ON fill_years.fill_year = fill_data.year + WHERE fill_data.agency = 'cta' + AND fill_data.route_type = 1 +), - xy_to_cta_stop_dist AS ( - SELECT - d1.x_3435, - d1.y_3435, - d1.stop_id, - d1.stop_name, - d1.pin_year, - d1.year, - d2.dist_ft - FROM distances AS d1 - INNER JOIN ( - SELECT - x_3435, - y_3435, - pin_year, - MIN(distance) AS dist_ft - FROM distances - GROUP BY x_3435, y_3435, pin_year - ) AS d2 - ON d1.x_3435 = d2.x_3435 - AND d1.y_3435 = d2.y_3435 - AND d1.pin_year = d2.pin_year - AND d1.distance = d2.dist_ft - ) +distances AS ( + SELECT + dp.x_3435, + dp.y_3435, + loc.stop_id, + loc.stop_name, + loc.pin_year, + loc.year, + ST_DISTANCE( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(loc.geometry_3435) + ) AS distance + FROM distinct_pins AS dp + CROSS JOIN cta_stop_location AS loc +), +xy_to_cta_stop_dist AS ( SELECT - pcl.pin10, - ARBITRARY(xy.stop_id) AS nearest_cta_stop_id, - ARBITRARY(xy.stop_name) AS nearest_cta_stop_name, - ARBITRARY(xy.dist_ft) AS nearest_cta_stop_dist_ft, - ARBITRARY(xy.year) AS nearest_cta_stop_data_year, - pcl.year - FROM {{ source('spatial', 'parcel') }} AS pcl - INNER JOIN xy_to_cta_stop_dist AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year - GROUP BY pcl.pin10, pcl.year + d1.x_3435, + d1.y_3435, + d1.stop_id, + d1.stop_name, + d1.pin_year, + d1.year, + d2.dist_ft + FROM distances AS d1 + INNER JOIN ( + SELECT + x_3435, + y_3435, + pin_year, + MIN(distance) AS dist_ft + FROM distances + GROUP BY x_3435, y_3435, pin_year + ) AS d2 + ON d1.x_3435 = d2.x_3435 + AND d1.y_3435 = d2.y_3435 + AND d1.pin_year = d2.pin_year + AND d1.distance = d2.dist_ft ) -SELECT * FROM dist_pin_to_cta_stop +SELECT + pcl.pin10, + ARBITRARY(xy.stop_id) AS nearest_cta_stop_id, + ARBITRARY(xy.stop_name) AS nearest_cta_stop_name, + ARBITRARY(xy.dist_ft) AS nearest_cta_stop_dist_ft, + ARBITRARY(xy.year) AS nearest_cta_stop_data_year, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +INNER JOIN xy_to_cta_stop_dist AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year +GROUP BY pcl.pin10, pcl.year diff --git a/aws-athena/ctas/proximity-dist_pin_to_golf_course.sql b/aws-athena/ctas/proximity-dist_pin_to_golf_course.sql index 2c59be813..cb31c24f4 100644 --- a/aws-athena/ctas/proximity-dist_pin_to_golf_course.sql +++ b/aws-athena/ctas/proximity-dist_pin_to_golf_course.sql @@ -8,87 +8,83 @@ ) }} -WITH dist_pin_to_golf_course AS ( - WITH distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435 - FROM {{ source('spatial', 'parcel') }} - ), - - distinct_years AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'parcel') }} - ), +WITH distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435 + FROM {{ source('spatial', 'parcel') }} +), - golf_course_location AS ( - SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'golf_course') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN {{ source('spatial', 'golf_course') }} AS fill_data - ON fill_years.fill_year = fill_data.year - ), +distinct_years AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'parcel') }} +), - distances AS ( +golf_course_location AS ( + SELECT + fill_years.pin_year, + fill_data.* + FROM ( SELECT - dp.x_3435, - dp.y_3435, - loc.id, - loc.pin_year, - loc.year, - ST_DISTANCE( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(loc.geometry_3435) - ) AS distance - FROM distinct_pins AS dp - CROSS JOIN golf_course_location AS loc - ), + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'golf_course') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN {{ source('spatial', 'golf_course') }} AS fill_data + ON fill_years.fill_year = fill_data.year +), - xy_to_golf_course_dist AS ( - SELECT - d1.x_3435, - d1.y_3435, - d1.id, - d1.pin_year, - d1.year, - d2.dist_ft - FROM distances AS d1 - INNER JOIN ( - SELECT - x_3435, - y_3435, - pin_year, - MIN(distance) AS dist_ft - FROM distances - GROUP BY x_3435, y_3435, pin_year - ) AS d2 - ON d1.x_3435 = d2.x_3435 - AND d1.y_3435 = d2.y_3435 - AND d1.pin_year = d2.pin_year - AND d1.distance = d2.dist_ft - ) +distances AS ( + SELECT + dp.x_3435, + dp.y_3435, + loc.id, + loc.pin_year, + loc.year, + ST_DISTANCE( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(loc.geometry_3435) + ) AS distance + FROM distinct_pins AS dp + CROSS JOIN golf_course_location AS loc +), +xy_to_golf_course_dist AS ( SELECT - pcl.pin10, - ARBITRARY(xy.id) AS nearest_golf_course_id, - ARBITRARY(xy.dist_ft) AS nearest_golf_course_dist_ft, - ARBITRARY(xy.year) AS nearest_golf_course_data_year, - pcl.year - FROM {{ source('spatial', 'parcel') }} AS pcl - INNER JOIN xy_to_golf_course_dist AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year - GROUP BY pcl.pin10, pcl.year + d1.x_3435, + d1.y_3435, + d1.id, + d1.pin_year, + d1.year, + d2.dist_ft + FROM distances AS d1 + INNER JOIN ( + SELECT + x_3435, + y_3435, + pin_year, + MIN(distance) AS dist_ft + FROM distances + GROUP BY x_3435, y_3435, pin_year + ) AS d2 + ON d1.x_3435 = d2.x_3435 + AND d1.y_3435 = d2.y_3435 + AND d1.pin_year = d2.pin_year + AND d1.distance = d2.dist_ft ) -SELECT * FROM dist_pin_to_golf_course +SELECT + pcl.pin10, + ARBITRARY(xy.id) AS nearest_golf_course_id, + ARBITRARY(xy.dist_ft) AS nearest_golf_course_dist_ft, + ARBITRARY(xy.year) AS nearest_golf_course_data_year, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +INNER JOIN xy_to_golf_course_dist AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year +GROUP BY pcl.pin10, pcl.year diff --git a/aws-athena/ctas/proximity-dist_pin_to_hospital.sql b/aws-athena/ctas/proximity-dist_pin_to_hospital.sql index d82692554..7c46118b8 100644 --- a/aws-athena/ctas/proximity-dist_pin_to_hospital.sql +++ b/aws-athena/ctas/proximity-dist_pin_to_hospital.sql @@ -8,91 +8,87 @@ ) }} -WITH dist_pin_to_hospital AS ( - WITH distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435 - FROM {{ source('spatial', 'parcel') }} - ), - - distinct_years AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'parcel') }} - ), +WITH distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435 + FROM {{ source('spatial', 'parcel') }} +), - hospital_location AS ( - SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'hospital') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN {{ source('spatial', 'hospital') }} AS fill_data - ON fill_years.fill_year = fill_data.year - ), +distinct_years AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'parcel') }} +), - distances AS ( +hospital_location AS ( + SELECT + fill_years.pin_year, + fill_data.* + FROM ( SELECT - dp.x_3435, - dp.y_3435, - loc.name, - loc.gniscode, - loc.pin_year, - loc.year, - ST_DISTANCE( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(loc.geometry_3435) - ) AS distance - FROM distinct_pins AS dp - CROSS JOIN hospital_location AS loc - ), + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'hospital') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN {{ source('spatial', 'hospital') }} AS fill_data + ON fill_years.fill_year = fill_data.year +), - xy_to_hospital_dist AS ( - SELECT - d1.x_3435, - d1.y_3435, - d1.name, - d1.gniscode, - d1.pin_year, - d1.year, - d2.dist_ft - FROM distances AS d1 - INNER JOIN ( - SELECT - x_3435, - y_3435, - pin_year, - MIN(distance) AS dist_ft - FROM distances - GROUP BY x_3435, y_3435, pin_year - ) AS d2 - ON d1.x_3435 = d2.x_3435 - AND d1.y_3435 = d2.y_3435 - AND d1.pin_year = d2.pin_year - AND d1.distance = d2.dist_ft - ) +distances AS ( + SELECT + dp.x_3435, + dp.y_3435, + loc.name, + loc.gniscode, + loc.pin_year, + loc.year, + ST_DISTANCE( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(loc.geometry_3435) + ) AS distance + FROM distinct_pins AS dp + CROSS JOIN hospital_location AS loc +), +xy_to_hospital_dist AS ( SELECT - pcl.pin10, - CAST(CAST(ARBITRARY(xy.gniscode) AS BIGINT) AS VARCHAR) - AS nearest_hospital_gnis_code, - ARBITRARY(xy.name) AS nearest_hospital_name, - ARBITRARY(xy.dist_ft) AS nearest_hospital_dist_ft, - ARBITRARY(xy.year) AS nearest_hospital_data_year, - pcl.year - FROM {{ source('spatial', 'parcel') }} AS pcl - INNER JOIN xy_to_hospital_dist AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year - GROUP BY pcl.pin10, pcl.year + d1.x_3435, + d1.y_3435, + d1.name, + d1.gniscode, + d1.pin_year, + d1.year, + d2.dist_ft + FROM distances AS d1 + INNER JOIN ( + SELECT + x_3435, + y_3435, + pin_year, + MIN(distance) AS dist_ft + FROM distances + GROUP BY x_3435, y_3435, pin_year + ) AS d2 + ON d1.x_3435 = d2.x_3435 + AND d1.y_3435 = d2.y_3435 + AND d1.pin_year = d2.pin_year + AND d1.distance = d2.dist_ft ) -SELECT * FROM dist_pin_to_hospital +SELECT + pcl.pin10, + CAST(CAST(ARBITRARY(xy.gniscode) AS BIGINT) AS VARCHAR) + AS nearest_hospital_gnis_code, + ARBITRARY(xy.name) AS nearest_hospital_name, + ARBITRARY(xy.dist_ft) AS nearest_hospital_dist_ft, + ARBITRARY(xy.year) AS nearest_hospital_data_year, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +INNER JOIN xy_to_hospital_dist AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year +GROUP BY pcl.pin10, pcl.year diff --git a/aws-athena/ctas/proximity-dist_pin_to_lake_michigan.sql b/aws-athena/ctas/proximity-dist_pin_to_lake_michigan.sql index e61b79834..ee1aa6d22 100644 --- a/aws-athena/ctas/proximity-dist_pin_to_lake_michigan.sql +++ b/aws-athena/ctas/proximity-dist_pin_to_lake_michigan.sql @@ -9,84 +9,80 @@ ) }} -WITH dist_pin_to_lake_michigan AS ( - WITH distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435 - FROM {{ source('spatial', 'parcel') }} - ), - - distinct_years AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'parcel') }} - ), +WITH distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435 + FROM {{ source('spatial', 'parcel') }} +), - lake_michigan_location AS ( - SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'coastline') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN {{ source('spatial', 'coastline') }} AS fill_data - ON fill_years.fill_year = fill_data.year - ), +distinct_years AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'parcel') }} +), - distances AS ( +lake_michigan_location AS ( + SELECT + fill_years.pin_year, + fill_data.* + FROM ( SELECT - dp.x_3435, - dp.y_3435, - loc.pin_year, - loc.year, - ST_DISTANCE( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(loc.geometry_3435) - ) AS distance - FROM distinct_pins AS dp - CROSS JOIN lake_michigan_location AS loc - ), + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'coastline') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN {{ source('spatial', 'coastline') }} AS fill_data + ON fill_years.fill_year = fill_data.year +), - xy_to_lake_michigan_dist AS ( - SELECT - d1.x_3435, - d1.y_3435, - d1.pin_year, - d1.year, - d2.dist_ft - FROM distances AS d1 - INNER JOIN ( - SELECT - x_3435, - y_3435, - pin_year, - MIN(distance) AS dist_ft - FROM distances - GROUP BY x_3435, y_3435, pin_year - ) AS d2 - ON d1.x_3435 = d2.x_3435 - AND d1.y_3435 = d2.y_3435 - AND d1.pin_year = d2.pin_year - AND d1.distance = d2.dist_ft - ) +distances AS ( + SELECT + dp.x_3435, + dp.y_3435, + loc.pin_year, + loc.year, + ST_DISTANCE( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(loc.geometry_3435) + ) AS distance + FROM distinct_pins AS dp + CROSS JOIN lake_michigan_location AS loc +), +xy_to_lake_michigan_dist AS ( SELECT - pcl.pin10, - ARBITRARY(xy.dist_ft) AS lake_michigan_dist_ft, - ARBITRARY(xy.year) AS lake_michigan_data_year, - pcl.year - FROM {{ source('spatial', 'parcel') }} AS pcl - INNER JOIN xy_to_lake_michigan_dist AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year - GROUP BY pcl.pin10, pcl.year + d1.x_3435, + d1.y_3435, + d1.pin_year, + d1.year, + d2.dist_ft + FROM distances AS d1 + INNER JOIN ( + SELECT + x_3435, + y_3435, + pin_year, + MIN(distance) AS dist_ft + FROM distances + GROUP BY x_3435, y_3435, pin_year + ) AS d2 + ON d1.x_3435 = d2.x_3435 + AND d1.y_3435 = d2.y_3435 + AND d1.pin_year = d2.pin_year + AND d1.distance = d2.dist_ft ) -SELECT * FROM dist_pin_to_lake_michigan +SELECT + pcl.pin10, + ARBITRARY(xy.dist_ft) AS lake_michigan_dist_ft, + ARBITRARY(xy.year) AS lake_michigan_data_year, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +INNER JOIN xy_to_lake_michigan_dist AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year +GROUP BY pcl.pin10, pcl.year diff --git a/aws-athena/ctas/proximity-dist_pin_to_major_road.sql b/aws-athena/ctas/proximity-dist_pin_to_major_road.sql index 718b973bd..af2a46ea4 100644 --- a/aws-athena/ctas/proximity-dist_pin_to_major_road.sql +++ b/aws-athena/ctas/proximity-dist_pin_to_major_road.sql @@ -8,90 +8,86 @@ ) }} -WITH dist_pin_to_major_road AS ( - WITH distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435 - FROM {{ source('spatial', 'parcel') }} - ), - - distinct_years AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'parcel') }} - ), +WITH distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435 + FROM {{ source('spatial', 'parcel') }} +), - major_road_location AS ( - SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'major_road') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN {{ source('spatial', 'major_road') }} AS fill_data - ON fill_years.fill_year = fill_data.year - ), +distinct_years AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'parcel') }} +), - distances AS ( +major_road_location AS ( + SELECT + fill_years.pin_year, + fill_data.* + FROM ( SELECT - dp.x_3435, - dp.y_3435, - loc.osm_id, - loc.name, - loc.pin_year, - loc.year, - ST_DISTANCE( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(loc.geometry_3435) - ) AS distance - FROM distinct_pins AS dp - CROSS JOIN major_road_location AS loc - ), + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'major_road') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN {{ source('spatial', 'major_road') }} AS fill_data + ON fill_years.fill_year = fill_data.year +), - xy_to_major_road_dist AS ( - SELECT - d1.x_3435, - d1.y_3435, - d1.osm_id, - d1.name, - d1.pin_year, - d1.year, - d2.dist_ft - FROM distances AS d1 - INNER JOIN ( - SELECT - x_3435, - y_3435, - pin_year, - MIN(distance) AS dist_ft - FROM distances - GROUP BY x_3435, y_3435, pin_year - ) AS d2 - ON d1.x_3435 = d2.x_3435 - AND d1.y_3435 = d2.y_3435 - AND d1.pin_year = d2.pin_year - AND d1.distance = d2.dist_ft - ) +distances AS ( + SELECT + dp.x_3435, + dp.y_3435, + loc.osm_id, + loc.name, + loc.pin_year, + loc.year, + ST_DISTANCE( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(loc.geometry_3435) + ) AS distance + FROM distinct_pins AS dp + CROSS JOIN major_road_location AS loc +), +xy_to_major_road_dist AS ( SELECT - pcl.pin10, - ARBITRARY(xy.osm_id) AS nearest_major_road_osm_id, - ARBITRARY(xy.name) AS nearest_major_road_name, - ARBITRARY(xy.dist_ft) AS nearest_major_road_dist_ft, - ARBITRARY(xy.year) AS nearest_major_road_data_year, - pcl.year - FROM {{ source('spatial', 'parcel') }} AS pcl - INNER JOIN xy_to_major_road_dist AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year - GROUP BY pcl.pin10, pcl.year + d1.x_3435, + d1.y_3435, + d1.osm_id, + d1.name, + d1.pin_year, + d1.year, + d2.dist_ft + FROM distances AS d1 + INNER JOIN ( + SELECT + x_3435, + y_3435, + pin_year, + MIN(distance) AS dist_ft + FROM distances + GROUP BY x_3435, y_3435, pin_year + ) AS d2 + ON d1.x_3435 = d2.x_3435 + AND d1.y_3435 = d2.y_3435 + AND d1.pin_year = d2.pin_year + AND d1.distance = d2.dist_ft ) -SELECT * FROM dist_pin_to_major_road +SELECT + pcl.pin10, + ARBITRARY(xy.osm_id) AS nearest_major_road_osm_id, + ARBITRARY(xy.name) AS nearest_major_road_name, + ARBITRARY(xy.dist_ft) AS nearest_major_road_dist_ft, + ARBITRARY(xy.year) AS nearest_major_road_data_year, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +INNER JOIN xy_to_major_road_dist AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year +GROUP BY pcl.pin10, pcl.year diff --git a/aws-athena/ctas/proximity-dist_pin_to_metra_route.sql b/aws-athena/ctas/proximity-dist_pin_to_metra_route.sql index e5e0b3576..fb109511c 100644 --- a/aws-athena/ctas/proximity-dist_pin_to_metra_route.sql +++ b/aws-athena/ctas/proximity-dist_pin_to_metra_route.sql @@ -8,92 +8,88 @@ ) }} -WITH dist_pin_to_metra_route AS ( - WITH distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435 - FROM {{ source('spatial', 'parcel') }} - ), - - distinct_years AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'parcel') }} - ), +WITH distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435 + FROM {{ source('spatial', 'parcel') }} +), - metra_route_location AS ( - SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'transit_route') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN {{ source('spatial', 'transit_route') }} AS fill_data - ON fill_years.fill_year = fill_data.year - WHERE fill_data.agency = 'metra' - AND fill_data.route_type = 2 - ), +distinct_years AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'parcel') }} +), - distances AS ( +metra_route_location AS ( + SELECT + fill_years.pin_year, + fill_data.* + FROM ( SELECT - dp.x_3435, - dp.y_3435, - loc.route_id, - loc.route_long_name, - loc.pin_year, - loc.year, - ST_DISTANCE( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(loc.geometry_3435) - ) AS distance - FROM distinct_pins AS dp - CROSS JOIN metra_route_location AS loc - ), + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'transit_route') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN {{ source('spatial', 'transit_route') }} AS fill_data + ON fill_years.fill_year = fill_data.year + WHERE fill_data.agency = 'metra' + AND fill_data.route_type = 2 +), - xy_to_metra_route_dist AS ( - SELECT - d1.x_3435, - d1.y_3435, - d1.route_id, - d1.route_long_name, - d1.pin_year, - d1.year, - d2.dist_ft - FROM distances AS d1 - INNER JOIN ( - SELECT - x_3435, - y_3435, - pin_year, - MIN(distance) AS dist_ft - FROM distances - GROUP BY x_3435, y_3435, pin_year - ) AS d2 - ON d1.x_3435 = d2.x_3435 - AND d1.y_3435 = d2.y_3435 - AND d1.pin_year = d2.pin_year - AND d1.distance = d2.dist_ft - ) +distances AS ( + SELECT + dp.x_3435, + dp.y_3435, + loc.route_id, + loc.route_long_name, + loc.pin_year, + loc.year, + ST_DISTANCE( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(loc.geometry_3435) + ) AS distance + FROM distinct_pins AS dp + CROSS JOIN metra_route_location AS loc +), +xy_to_metra_route_dist AS ( SELECT - pcl.pin10, - ARBITRARY(xy.route_id) AS nearest_metra_route_id, - ARBITRARY(xy.route_long_name) AS nearest_metra_route_name, - ARBITRARY(xy.dist_ft) AS nearest_metra_route_dist_ft, - ARBITRARY(xy.year) AS nearest_metra_route_data_year, - pcl.year - FROM {{ source('spatial', 'parcel') }} AS pcl - INNER JOIN xy_to_metra_route_dist AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year - GROUP BY pcl.pin10, pcl.year + d1.x_3435, + d1.y_3435, + d1.route_id, + d1.route_long_name, + d1.pin_year, + d1.year, + d2.dist_ft + FROM distances AS d1 + INNER JOIN ( + SELECT + x_3435, + y_3435, + pin_year, + MIN(distance) AS dist_ft + FROM distances + GROUP BY x_3435, y_3435, pin_year + ) AS d2 + ON d1.x_3435 = d2.x_3435 + AND d1.y_3435 = d2.y_3435 + AND d1.pin_year = d2.pin_year + AND d1.distance = d2.dist_ft ) -SELECT * FROM dist_pin_to_metra_route +SELECT + pcl.pin10, + ARBITRARY(xy.route_id) AS nearest_metra_route_id, + ARBITRARY(xy.route_long_name) AS nearest_metra_route_name, + ARBITRARY(xy.dist_ft) AS nearest_metra_route_dist_ft, + ARBITRARY(xy.year) AS nearest_metra_route_data_year, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +INNER JOIN xy_to_metra_route_dist AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year +GROUP BY pcl.pin10, pcl.year diff --git a/aws-athena/ctas/proximity-dist_pin_to_metra_stop.sql b/aws-athena/ctas/proximity-dist_pin_to_metra_stop.sql index be90f354f..d25c0c645 100644 --- a/aws-athena/ctas/proximity-dist_pin_to_metra_stop.sql +++ b/aws-athena/ctas/proximity-dist_pin_to_metra_stop.sql @@ -8,92 +8,88 @@ ) }} -WITH dist_pin_to_metra_stop AS ( - WITH distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435 - FROM {{ source('spatial', 'parcel') }} - ), - - distinct_years AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'parcel') }} - ), +WITH distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435 + FROM {{ source('spatial', 'parcel') }} +), - metra_stop_location AS ( - SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'transit_stop') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN {{ source('spatial', 'transit_stop') }} AS fill_data - ON fill_years.fill_year = fill_data.year - WHERE fill_data.agency = 'metra' - AND fill_data.route_type = 2 - ), +distinct_years AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'parcel') }} +), - distances AS ( +metra_stop_location AS ( + SELECT + fill_years.pin_year, + fill_data.* + FROM ( SELECT - dp.x_3435, - dp.y_3435, - loc.stop_id, - loc.stop_name, - loc.pin_year, - loc.year, - ST_DISTANCE( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(loc.geometry_3435) - ) AS distance - FROM distinct_pins AS dp - CROSS JOIN metra_stop_location AS loc - ), + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'transit_stop') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN {{ source('spatial', 'transit_stop') }} AS fill_data + ON fill_years.fill_year = fill_data.year + WHERE fill_data.agency = 'metra' + AND fill_data.route_type = 2 +), - xy_to_metra_stop_dist AS ( - SELECT - d1.x_3435, - d1.y_3435, - d1.stop_id, - d1.stop_name, - d1.pin_year, - d1.year, - d2.dist_ft - FROM distances AS d1 - INNER JOIN ( - SELECT - x_3435, - y_3435, - pin_year, - MIN(distance) AS dist_ft - FROM distances - GROUP BY x_3435, y_3435, pin_year - ) AS d2 - ON d1.x_3435 = d2.x_3435 - AND d1.y_3435 = d2.y_3435 - AND d1.pin_year = d2.pin_year - AND d1.distance = d2.dist_ft - ) +distances AS ( + SELECT + dp.x_3435, + dp.y_3435, + loc.stop_id, + loc.stop_name, + loc.pin_year, + loc.year, + ST_DISTANCE( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(loc.geometry_3435) + ) AS distance + FROM distinct_pins AS dp + CROSS JOIN metra_stop_location AS loc +), +xy_to_metra_stop_dist AS ( SELECT - pcl.pin10, - ARBITRARY(xy.stop_id) AS nearest_metra_stop_id, - ARBITRARY(xy.stop_name) AS nearest_metra_stop_name, - ARBITRARY(xy.dist_ft) AS nearest_metra_stop_dist_ft, - ARBITRARY(xy.year) AS nearest_metra_stop_data_year, - pcl.year - FROM {{ source('spatial', 'parcel') }} AS pcl - INNER JOIN xy_to_metra_stop_dist AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year - GROUP BY pcl.pin10, pcl.year + d1.x_3435, + d1.y_3435, + d1.stop_id, + d1.stop_name, + d1.pin_year, + d1.year, + d2.dist_ft + FROM distances AS d1 + INNER JOIN ( + SELECT + x_3435, + y_3435, + pin_year, + MIN(distance) AS dist_ft + FROM distances + GROUP BY x_3435, y_3435, pin_year + ) AS d2 + ON d1.x_3435 = d2.x_3435 + AND d1.y_3435 = d2.y_3435 + AND d1.pin_year = d2.pin_year + AND d1.distance = d2.dist_ft ) -SELECT * FROM dist_pin_to_metra_stop +SELECT + pcl.pin10, + ARBITRARY(xy.stop_id) AS nearest_metra_stop_id, + ARBITRARY(xy.stop_name) AS nearest_metra_stop_name, + ARBITRARY(xy.dist_ft) AS nearest_metra_stop_dist_ft, + ARBITRARY(xy.year) AS nearest_metra_stop_data_year, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +INNER JOIN xy_to_metra_stop_dist AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year +GROUP BY pcl.pin10, pcl.year diff --git a/aws-athena/ctas/proximity-dist_pin_to_park.sql b/aws-athena/ctas/proximity-dist_pin_to_park.sql index 089e0d419..75cb91710 100644 --- a/aws-athena/ctas/proximity-dist_pin_to_park.sql +++ b/aws-athena/ctas/proximity-dist_pin_to_park.sql @@ -8,91 +8,87 @@ ) }} -WITH dist_pin_to_park AS ( - WITH distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435 - FROM {{ source('spatial', 'parcel') }} - ), - - distinct_years AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'parcel') }} - ), +WITH distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435 + FROM {{ source('spatial', 'parcel') }} +), - park_location AS ( - SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'park') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN {{ source('spatial', 'park') }} AS fill_data - ON fill_years.fill_year = fill_data.year - WHERE ST_AREA(ST_GEOMFROMBINARY(fill_data.geometry_3435)) > 87120 - ), +distinct_years AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'parcel') }} +), - distances AS ( +park_location AS ( + SELECT + fill_years.pin_year, + fill_data.* + FROM ( SELECT - dp.x_3435, - dp.y_3435, - loc.osm_id, - loc.name, - loc.pin_year, - loc.year, - ST_DISTANCE( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(loc.geometry_3435) - ) AS distance - FROM distinct_pins AS dp - CROSS JOIN park_location AS loc - ), + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'park') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN {{ source('spatial', 'park') }} AS fill_data + ON fill_years.fill_year = fill_data.year + WHERE ST_AREA(ST_GEOMFROMBINARY(fill_data.geometry_3435)) > 87120 +), - xy_to_park_dist AS ( - SELECT - d1.x_3435, - d1.y_3435, - d1.osm_id, - d1.name, - d1.pin_year, - d1.year, - d2.dist_ft - FROM distances AS d1 - INNER JOIN ( - SELECT - x_3435, - y_3435, - pin_year, - MIN(distance) AS dist_ft - FROM distances - GROUP BY x_3435, y_3435, pin_year - ) AS d2 - ON d1.x_3435 = d2.x_3435 - AND d1.y_3435 = d2.y_3435 - AND d1.pin_year = d2.pin_year - AND d1.distance = d2.dist_ft - ) +distances AS ( + SELECT + dp.x_3435, + dp.y_3435, + loc.osm_id, + loc.name, + loc.pin_year, + loc.year, + ST_DISTANCE( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(loc.geometry_3435) + ) AS distance + FROM distinct_pins AS dp + CROSS JOIN park_location AS loc +), +xy_to_park_dist AS ( SELECT - pcl.pin10, - ARBITRARY(xy.osm_id) AS nearest_park_osm_id, - ARBITRARY(xy.name) AS nearest_park_name, - ARBITRARY(xy.dist_ft) AS nearest_park_dist_ft, - ARBITRARY(xy.year) AS nearest_park_data_year, - pcl.year - FROM {{ source('spatial', 'parcel') }} AS pcl - INNER JOIN xy_to_park_dist AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year - GROUP BY pcl.pin10, pcl.year + d1.x_3435, + d1.y_3435, + d1.osm_id, + d1.name, + d1.pin_year, + d1.year, + d2.dist_ft + FROM distances AS d1 + INNER JOIN ( + SELECT + x_3435, + y_3435, + pin_year, + MIN(distance) AS dist_ft + FROM distances + GROUP BY x_3435, y_3435, pin_year + ) AS d2 + ON d1.x_3435 = d2.x_3435 + AND d1.y_3435 = d2.y_3435 + AND d1.pin_year = d2.pin_year + AND d1.distance = d2.dist_ft ) -SELECT * FROM dist_pin_to_park +SELECT + pcl.pin10, + ARBITRARY(xy.osm_id) AS nearest_park_osm_id, + ARBITRARY(xy.name) AS nearest_park_name, + ARBITRARY(xy.dist_ft) AS nearest_park_dist_ft, + ARBITRARY(xy.year) AS nearest_park_data_year, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +INNER JOIN xy_to_park_dist AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year +GROUP BY pcl.pin10, pcl.year diff --git a/aws-athena/ctas/proximity-dist_pin_to_railroad.sql b/aws-athena/ctas/proximity-dist_pin_to_railroad.sql index 0576d5adc..b7957a7d6 100644 --- a/aws-athena/ctas/proximity-dist_pin_to_railroad.sql +++ b/aws-athena/ctas/proximity-dist_pin_to_railroad.sql @@ -8,90 +8,86 @@ ) }} -WITH dist_pin_to_railroad AS ( - WITH distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435 - FROM {{ source('spatial', 'parcel') }} - ), - - distinct_years AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'parcel') }} - ), +WITH distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435 + FROM {{ source('spatial', 'parcel') }} +), - railroad_location AS ( - SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'railroad') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN {{ source('spatial', 'railroad') }} AS fill_data - ON fill_years.fill_year = fill_data.year - ), +distinct_years AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'parcel') }} +), - distances AS ( +railroad_location AS ( + SELECT + fill_years.pin_year, + fill_data.* + FROM ( SELECT - dp.x_3435, - dp.y_3435, - loc.name_id, - loc.name_anno, - loc.pin_year, - loc.year, - ST_DISTANCE( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(loc.geometry_3435) - ) AS distance - FROM distinct_pins AS dp - CROSS JOIN railroad_location AS loc - ), + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'railroad') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN {{ source('spatial', 'railroad') }} AS fill_data + ON fill_years.fill_year = fill_data.year +), - xy_to_railroad_dist AS ( - SELECT - d1.x_3435, - d1.y_3435, - d1.name_id, - d1.name_anno, - d1.pin_year, - d1.year, - d2.dist_ft - FROM distances AS d1 - INNER JOIN ( - SELECT - x_3435, - y_3435, - pin_year, - MIN(distance) AS dist_ft - FROM distances - GROUP BY x_3435, y_3435, pin_year - ) AS d2 - ON d1.x_3435 = d2.x_3435 - AND d1.y_3435 = d2.y_3435 - AND d1.pin_year = d2.pin_year - AND d1.distance = d2.dist_ft - ) +distances AS ( + SELECT + dp.x_3435, + dp.y_3435, + loc.name_id, + loc.name_anno, + loc.pin_year, + loc.year, + ST_DISTANCE( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(loc.geometry_3435) + ) AS distance + FROM distinct_pins AS dp + CROSS JOIN railroad_location AS loc +), +xy_to_railroad_dist AS ( SELECT - pcl.pin10, - ARBITRARY(xy.name_id) AS nearest_railroad_id, - ARBITRARY(xy.name_anno) AS nearest_railroad_name, - ARBITRARY(xy.dist_ft) AS nearest_railroad_dist_ft, - ARBITRARY(xy.year) AS nearest_railroad_data_year, - pcl.year - FROM {{ source('spatial', 'parcel') }} AS pcl - INNER JOIN xy_to_railroad_dist AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year - GROUP BY pcl.pin10, pcl.year + d1.x_3435, + d1.y_3435, + d1.name_id, + d1.name_anno, + d1.pin_year, + d1.year, + d2.dist_ft + FROM distances AS d1 + INNER JOIN ( + SELECT + x_3435, + y_3435, + pin_year, + MIN(distance) AS dist_ft + FROM distances + GROUP BY x_3435, y_3435, pin_year + ) AS d2 + ON d1.x_3435 = d2.x_3435 + AND d1.y_3435 = d2.y_3435 + AND d1.pin_year = d2.pin_year + AND d1.distance = d2.dist_ft ) -SELECT * FROM dist_pin_to_railroad +SELECT + pcl.pin10, + ARBITRARY(xy.name_id) AS nearest_railroad_id, + ARBITRARY(xy.name_anno) AS nearest_railroad_name, + ARBITRARY(xy.dist_ft) AS nearest_railroad_dist_ft, + ARBITRARY(xy.year) AS nearest_railroad_data_year, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +INNER JOIN xy_to_railroad_dist AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year +GROUP BY pcl.pin10, pcl.year diff --git a/aws-athena/ctas/proximity-dist_pin_to_water.sql b/aws-athena/ctas/proximity-dist_pin_to_water.sql index 23dd0c100..f8a4fa1c0 100644 --- a/aws-athena/ctas/proximity-dist_pin_to_water.sql +++ b/aws-athena/ctas/proximity-dist_pin_to_water.sql @@ -8,90 +8,86 @@ ) }} -WITH dist_pin_to_water AS ( - WITH distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435 - FROM {{ source('spatial', 'parcel') }} - ), - - distinct_years AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'parcel') }} - ), +WITH distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435 + FROM {{ source('spatial', 'parcel') }} +), - water_location AS ( - SELECT - fill_years.pin_year, - fill_data.* - FROM ( - SELECT - dy.year AS pin_year, - MAX(df.year) AS fill_year - FROM {{ source('spatial', 'hydrology') }} AS df - CROSS JOIN distinct_years AS dy - WHERE dy.year >= df.year - GROUP BY dy.year - ) AS fill_years - LEFT JOIN {{ source('spatial', 'hydrology') }} AS fill_data - ON fill_years.fill_year = fill_data.year - ), +distinct_years AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'parcel') }} +), - distances AS ( +water_location AS ( + SELECT + fill_years.pin_year, + fill_data.* + FROM ( SELECT - dp.x_3435, - dp.y_3435, - loc.id, - loc.name, - loc.pin_year, - loc.year, - ST_DISTANCE( - ST_POINT(dp.x_3435, dp.y_3435), - ST_GEOMFROMBINARY(loc.geometry_3435) - ) AS distance - FROM distinct_pins AS dp - CROSS JOIN water_location AS loc - ), + dy.year AS pin_year, + MAX(df.year) AS fill_year + FROM {{ source('spatial', 'hydrology') }} AS df + CROSS JOIN distinct_years AS dy + WHERE dy.year >= df.year + GROUP BY dy.year + ) AS fill_years + LEFT JOIN {{ source('spatial', 'hydrology') }} AS fill_data + ON fill_years.fill_year = fill_data.year +), - xy_to_water_dist AS ( - SELECT - d1.x_3435, - d1.y_3435, - d1.id, - d1.name, - d1.pin_year, - d1.year, - d2.dist_ft - FROM distances AS d1 - INNER JOIN ( - SELECT - x_3435, - y_3435, - pin_year, - MIN(distance) AS dist_ft - FROM distances - GROUP BY x_3435, y_3435, pin_year - ) AS d2 - ON d1.x_3435 = d2.x_3435 - AND d1.y_3435 = d2.y_3435 - AND d1.pin_year = d2.pin_year - AND d1.distance = d2.dist_ft - ) +distances AS ( + SELECT + dp.x_3435, + dp.y_3435, + loc.id, + loc.name, + loc.pin_year, + loc.year, + ST_DISTANCE( + ST_POINT(dp.x_3435, dp.y_3435), + ST_GEOMFROMBINARY(loc.geometry_3435) + ) AS distance + FROM distinct_pins AS dp + CROSS JOIN water_location AS loc +), +xy_to_water_dist AS ( SELECT - pcl.pin10, - ARBITRARY(xy.id) AS nearest_water_id, - ARBITRARY(xy.name) AS nearest_water_name, - ARBITRARY(xy.dist_ft) AS nearest_water_dist_ft, - ARBITRARY(xy.year) AS nearest_water_data_year, - pcl.year - FROM {{ source('spatial', 'parcel') }} AS pcl - INNER JOIN xy_to_water_dist AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year - GROUP BY pcl.pin10, pcl.year + d1.x_3435, + d1.y_3435, + d1.id, + d1.name, + d1.pin_year, + d1.year, + d2.dist_ft + FROM distances AS d1 + INNER JOIN ( + SELECT + x_3435, + y_3435, + pin_year, + MIN(distance) AS dist_ft + FROM distances + GROUP BY x_3435, y_3435, pin_year + ) AS d2 + ON d1.x_3435 = d2.x_3435 + AND d1.y_3435 = d2.y_3435 + AND d1.pin_year = d2.pin_year + AND d1.distance = d2.dist_ft ) -SELECT * FROM dist_pin_to_water +SELECT + pcl.pin10, + ARBITRARY(xy.id) AS nearest_water_id, + ARBITRARY(xy.name) AS nearest_water_name, + ARBITRARY(xy.dist_ft) AS nearest_water_dist_ft, + ARBITRARY(xy.year) AS nearest_water_data_year, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +INNER JOIN xy_to_water_dist AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year +GROUP BY pcl.pin10, pcl.year