From 574d69d1ba8cbab3b130a5db3d0bf20cfaed6f15 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 11 Sep 2023 21:31:07 +0000 Subject: [PATCH] Revert dist_pin_to_pin to a source --- aws-athena/ctas/proximity-dist_pin_to_pin.sql | 60 ++++++++++++++----- dbt/models/proximity/schema.yml | 6 +- 2 files changed, 51 insertions(+), 15 deletions(-) diff --git a/aws-athena/ctas/proximity-dist_pin_to_pin.sql b/aws-athena/ctas/proximity-dist_pin_to_pin.sql index 8e7c39cc1..1ec34ac97 100644 --- a/aws-athena/ctas/proximity-dist_pin_to_pin.sql +++ b/aws-athena/ctas/proximity-dist_pin_to_pin.sql @@ -1,14 +1,12 @@ -- CTAS to find the 3 nearest neighbor PINs for every PIN for every year -{{ - config( - materialized='table', - partitioned_by=['year'], - bucketed_by=['pin10'], - bucket_count=1 - ) -}} - -WITH dist_pin_to_pin AS ( +CREATE TABLE IF NOT EXISTS proximity.dist_pin_to_pin_temp +WITH ( + FORMAT = 'Parquet', + WRITE_COMPRESSION = 'SNAPPY', + EXTERNAL_LOCATION + = 's3://ccao-athena-results-us-east-1/dist_pin_to_pin_temp', + PARTITIONED_BY = ARRAY['year'] +) AS ( WITH pin_locations AS ( SELECT pin10, @@ -16,7 +14,7 @@ WITH dist_pin_to_pin AS ( x_3435, y_3435, ST_POINT(x_3435, y_3435) AS point - FROM {{ source('spatial', 'parcel') }} + FROM spatial.parcel ), most_recent_pins AS ( @@ -27,7 +25,7 @@ WITH dist_pin_to_pin AS ( x_3435, y_3435, RANK() OVER (PARTITION BY pin10 ORDER BY year DESC) AS r - FROM {{ source('spatial', 'parcel') }} + FROM spatial.parcel ), distinct_pins AS ( @@ -89,7 +87,7 @@ WITH dist_pin_to_pin AS ( WHEN pd.row_num = 4 THEN pd.dist END) AS nearest_neighbor_3_dist_ft, pcl.year - FROM {{ source('spatial', 'parcel') }} AS pcl + FROM spatial.parcel AS pcl INNER JOIN pin_dists AS pd ON pcl.x_3435 = pd.x_3435 AND pcl.y_3435 = pd.y_3435 @@ -101,4 +99,38 @@ WITH dist_pin_to_pin AS ( AND nearest_neighbor_3_pin10 IS NOT NULL ) -SELECT * FROM dist_pin_to_pin +-- Consolidate unbucketed files into single files and delete temp table +CREATE TABLE IF NOT EXISTS proximity.dist_pin_to_pin +WITH ( + format='Parquet', + write_compression = 'SNAPPY', + external_location='s3://ccao-athena-ctas-us-east-1/proximity/dist_pin_to_pin', + partitioned_by = ARRAY['year'], + bucketed_by = ARRAY['pin10'], + bucket_count = 1 +) AS ( + SELECT + pin10, + nearest_neighbor_1_pin10, + nearest_neighbor_1_dist_ft, + nearest_neighbor_2_pin10, + nearest_neighbor_2_dist_ft, + nearest_neighbor_3_pin10, + nearest_neighbor_3_dist_ft, + year + FROM proximity.dist_pin_to_pin_temp + UNION + SELECT + pin10, + nearest_neighbor_1_pin10, + nearest_neighbor_1_dist_ft, + nearest_neighbor_2_pin10, + nearest_neighbor_2_dist_ft, + nearest_neighbor_3_pin10, + nearest_neighbor_3_dist_ft, + year + FROM proximity.dist_pin_to_pin_temp2 +); + +DROP TABLE IF EXISTS proximity.dist_pin_to_pin_temp +DROP TABLE IF EXISTS proximity.dist_pin_to_pin_temp2 diff --git a/dbt/models/proximity/schema.yml b/dbt/models/proximity/schema.yml index f435a48f6..9159ade85 100644 --- a/dbt/models/proximity/schema.yml +++ b/dbt/models/proximity/schema.yml @@ -1,6 +1,11 @@ version: 2 +sources: + - name: proximity + tables: + - name: dist_pin_to_pin + models: - name: proximity.cnt_pin_num_bus_stop - name: proximity.cnt_pin_num_foreclosure @@ -17,7 +22,6 @@ models: - name: proximity.dist_pin_to_metra_route - name: proximity.dist_pin_to_metra_stop - name: proximity.dist_pin_to_park - - name: proximity.dist_pin_to_pin - name: proximity.dist_pin_to_railroad - name: proximity.dist_pin_to_water - name: proximity.vw_pin10_proximity