Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove unnecessary subqueries in CTAS models #137

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 57 additions & 61 deletions aws-athena/ctas/location-access.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,71 +7,67 @@
)
}}

WITH access AS (
WITH distinct_pins AS (
SELECT DISTINCT
x_3435,
y_3435
FROM {{ source('spatial', 'parcel') }}
),
WITH distinct_pins AS (
SELECT DISTINCT
x_3435,
y_3435
FROM {{ source('spatial', 'parcel') }}
),

distinct_years AS (
SELECT DISTINCT year
FROM {{ source('spatial', 'parcel') }}
),
distinct_years AS (
SELECT DISTINCT year
FROM {{ source('spatial', 'parcel') }}
),

distinct_years_rhs AS (
SELECT DISTINCT year
FROM {{ source('spatial', 'walkability') }}
),
distinct_years_rhs AS (
SELECT DISTINCT year
FROM {{ source('spatial', 'walkability') }}
),

walkability AS (
walkability AS (
SELECT
dp.x_3435,
dp.y_3435,
CAST(CAST(MAX(cprod.walk_num) AS BIGINT) AS VARCHAR)
AS access_cmap_walk_id,
MAX(cprod.nta_score) AS access_cmap_walk_nta_score,
MAX(cprod.total_score) AS access_cmap_walk_total_score,
MAX(cprod.year) AS access_cmap_walk_data_year,
cprod.pin_year
FROM distinct_pins AS dp
LEFT JOIN (
SELECT
dp.x_3435,
dp.y_3435,
CAST(CAST(MAX(cprod.walk_num) AS BIGINT) AS VARCHAR)
AS access_cmap_walk_id,
MAX(cprod.nta_score) AS access_cmap_walk_nta_score,
MAX(cprod.total_score) AS access_cmap_walk_total_score,
MAX(cprod.year) AS access_cmap_walk_data_year,
cprod.pin_year
FROM distinct_pins AS dp
LEFT JOIN (
fill_years.pin_year,
fill_data.*
FROM (
SELECT
fill_years.pin_year,
fill_data.*
FROM (
SELECT
dy.year AS pin_year,
MAX(df.year) AS fill_year
FROM {{ source('spatial', 'walkability') }} AS df
CROSS JOIN distinct_years AS dy
WHERE dy.year >= df.year
GROUP BY dy.year
) AS fill_years
LEFT JOIN {{ source('spatial', 'walkability') }} AS fill_data
ON fill_years.fill_year = fill_data.year
) AS cprod
ON ST_WITHIN(
ST_POINT(dp.x_3435, dp.y_3435),
ST_GEOMFROMBINARY(cprod.geometry_3435)
)
GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year
)

SELECT
pcl.pin10,
walk.access_cmap_walk_id,
walk.access_cmap_walk_nta_score,
walk.access_cmap_walk_total_score,
walk.access_cmap_walk_data_year,
pcl.year
FROM {{ source('spatial', 'parcel') }} AS pcl
LEFT JOIN walkability AS walk
ON pcl.x_3435 = walk.x_3435
AND pcl.y_3435 = walk.y_3435
AND pcl.year = walk.pin_year
WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs)
dy.year AS pin_year,
MAX(df.year) AS fill_year
FROM {{ source('spatial', 'walkability') }} AS df
CROSS JOIN distinct_years AS dy
WHERE dy.year >= df.year
GROUP BY dy.year
) AS fill_years
LEFT JOIN {{ source('spatial', 'walkability') }} AS fill_data
ON fill_years.fill_year = fill_data.year
) AS cprod
ON ST_WITHIN(
ST_POINT(dp.x_3435, dp.y_3435),
ST_GEOMFROMBINARY(cprod.geometry_3435)
)
GROUP BY dp.x_3435, dp.y_3435, cprod.pin_year
)

SELECT * FROM access
SELECT
pcl.pin10,
walk.access_cmap_walk_id,
walk.access_cmap_walk_nta_score,
walk.access_cmap_walk_total_score,
walk.access_cmap_walk_data_year,
pcl.year
FROM {{ source('spatial', 'parcel') }} AS pcl
LEFT JOIN walkability AS walk
ON pcl.x_3435 = walk.x_3435
AND pcl.y_3435 = walk.y_3435
AND pcl.year = walk.pin_year
WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs)
170 changes: 83 additions & 87 deletions aws-athena/ctas/location-census.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,95 +7,91 @@
)
}}

WITH census AS (
WITH distinct_pins AS (
SELECT DISTINCT
x_3435,
y_3435
FROM {{ source('spatial', 'parcel') }}
),
WITH distinct_pins AS (
SELECT DISTINCT
x_3435,
y_3435
FROM {{ source('spatial', 'parcel') }}
),

distinct_years_rhs AS (
SELECT DISTINCT year
FROM {{ source('spatial', 'census') }}
),

distinct_joined AS (
SELECT
dp.x_3435,
dp.y_3435,
MAX(CASE
WHEN cen.geography = 'block_group' THEN cen.geoid
END) AS census_block_group_geoid,
MAX(CASE
WHEN cen.geography = 'block' THEN cen.geoid
END) AS census_block_geoid,
MAX(CASE
WHEN cen.geography = 'congressional_district' THEN cen.geoid
END) AS census_congressional_district_geoid,
MAX(CASE
WHEN cen.geography = 'county_subdivision' THEN cen.geoid
END) AS census_county_subdivision_geoid,
MAX(CASE
WHEN cen.geography = 'place' THEN cen.geoid
END) AS census_place_geoid,
MAX(CASE
WHEN cen.geography = 'puma' THEN cen.geoid
END) AS census_puma_geoid,
MAX(CASE
WHEN cen.geography = 'school_district_elementary' THEN cen.geoid
END) AS census_school_district_elementary_geoid,
MAX(CASE
WHEN cen.geography = 'school_district_secondary' THEN cen.geoid
END) AS census_school_district_secondary_geoid,
MAX(CASE
WHEN cen.geography = 'school_district_unified' THEN cen.geoid
END) AS census_school_district_unified_geoid,
MAX(CASE
WHEN cen.geography = 'state_representative' THEN cen.geoid
END) AS census_state_representative_geoid,
MAX(CASE
WHEN cen.geography = 'state_senate' THEN cen.geoid
END) AS census_state_senate_geoid,
MAX(CASE
WHEN cen.geography = 'tract' THEN cen.geoid
END) AS census_tract_geoid,
MAX(CASE
WHEN cen.geography = 'zcta' THEN cen.geoid
END) AS census_zcta_geoid,
cen.year
FROM distinct_pins AS dp
LEFT JOIN {{ source('spatial', 'census') }} AS cen
ON ST_WITHIN(
ST_POINT(dp.x_3435, dp.y_3435),
ST_GEOMFROMBINARY(cen.geometry_3435)
)
GROUP BY dp.x_3435, dp.y_3435, cen.year
)
distinct_years_rhs AS (
SELECT DISTINCT year
FROM {{ source('spatial', 'census') }}
),

distinct_joined AS (
SELECT
pcl.pin10,
dj.census_block_group_geoid,
dj.census_block_geoid,
dj.census_congressional_district_geoid,
dj.census_county_subdivision_geoid,
dj.census_place_geoid,
dj.census_puma_geoid,
dj.census_school_district_elementary_geoid,
dj.census_school_district_secondary_geoid,
dj.census_school_district_unified_geoid,
dj.census_state_representative_geoid,
dj.census_state_senate_geoid,
dj.census_tract_geoid,
dj.census_zcta_geoid,
dj.year AS census_data_year,
pcl.year
FROM {{ source('spatial', 'parcel') }} AS pcl
LEFT JOIN distinct_joined AS dj
ON pcl.year = dj.year
AND pcl.x_3435 = dj.x_3435
AND pcl.y_3435 = dj.y_3435
WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs)
dp.x_3435,
dp.y_3435,
MAX(CASE
WHEN cen.geography = 'block_group' THEN cen.geoid
END) AS census_block_group_geoid,
MAX(CASE
WHEN cen.geography = 'block' THEN cen.geoid
END) AS census_block_geoid,
MAX(CASE
WHEN cen.geography = 'congressional_district' THEN cen.geoid
END) AS census_congressional_district_geoid,
MAX(CASE
WHEN cen.geography = 'county_subdivision' THEN cen.geoid
END) AS census_county_subdivision_geoid,
MAX(CASE
WHEN cen.geography = 'place' THEN cen.geoid
END) AS census_place_geoid,
MAX(CASE
WHEN cen.geography = 'puma' THEN cen.geoid
END) AS census_puma_geoid,
MAX(CASE
WHEN cen.geography = 'school_district_elementary' THEN cen.geoid
END) AS census_school_district_elementary_geoid,
MAX(CASE
WHEN cen.geography = 'school_district_secondary' THEN cen.geoid
END) AS census_school_district_secondary_geoid,
MAX(CASE
WHEN cen.geography = 'school_district_unified' THEN cen.geoid
END) AS census_school_district_unified_geoid,
MAX(CASE
WHEN cen.geography = 'state_representative' THEN cen.geoid
END) AS census_state_representative_geoid,
MAX(CASE
WHEN cen.geography = 'state_senate' THEN cen.geoid
END) AS census_state_senate_geoid,
MAX(CASE
WHEN cen.geography = 'tract' THEN cen.geoid
END) AS census_tract_geoid,
MAX(CASE
WHEN cen.geography = 'zcta' THEN cen.geoid
END) AS census_zcta_geoid,
cen.year
FROM distinct_pins AS dp
LEFT JOIN {{ source('spatial', 'census') }} AS cen
ON ST_WITHIN(
ST_POINT(dp.x_3435, dp.y_3435),
ST_GEOMFROMBINARY(cen.geometry_3435)
)
GROUP BY dp.x_3435, dp.y_3435, cen.year
)

SELECT * FROM census
SELECT
pcl.pin10,
dj.census_block_group_geoid,
dj.census_block_geoid,
dj.census_congressional_district_geoid,
dj.census_county_subdivision_geoid,
dj.census_place_geoid,
dj.census_puma_geoid,
dj.census_school_district_elementary_geoid,
dj.census_school_district_secondary_geoid,
dj.census_school_district_unified_geoid,
dj.census_state_representative_geoid,
dj.census_state_senate_geoid,
dj.census_tract_geoid,
dj.census_zcta_geoid,
dj.year AS census_data_year,
pcl.year
FROM {{ source('spatial', 'parcel') }} AS pcl
LEFT JOIN distinct_joined AS dj
ON pcl.year = dj.year
AND pcl.x_3435 = dj.x_3435
AND pcl.y_3435 = dj.y_3435
WHERE pcl.year >= (SELECT MIN(year) FROM distinct_years_rhs)
Loading