Skip to content

Commit

Permalink
Revert dist_pin_to_pin to a source
Browse files Browse the repository at this point in the history
  • Loading branch information
Jean Cochrane committed Sep 11, 2023
1 parent 33e8fa8 commit 574d69d
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 15 deletions.
60 changes: 46 additions & 14 deletions aws-athena/ctas/proximity-dist_pin_to_pin.sql
Original file line number Diff line number Diff line change
@@ -1,22 +1,20 @@
-- CTAS to find the 3 nearest neighbor PINs for every PIN for every year
{{
config(
materialized='table',
partitioned_by=['year'],
bucketed_by=['pin10'],
bucket_count=1
)
}}

WITH dist_pin_to_pin AS (
CREATE TABLE IF NOT EXISTS proximity.dist_pin_to_pin_temp
WITH (
FORMAT = 'Parquet',
WRITE_COMPRESSION = 'SNAPPY',
EXTERNAL_LOCATION
= 's3://ccao-athena-results-us-east-1/dist_pin_to_pin_temp',
PARTITIONED_BY = ARRAY['year']
) AS (
WITH pin_locations AS (
SELECT
pin10,
year,
x_3435,
y_3435,
ST_POINT(x_3435, y_3435) AS point
FROM {{ source('spatial', 'parcel') }}
FROM spatial.parcel
),

most_recent_pins AS (
Expand All @@ -27,7 +25,7 @@ WITH dist_pin_to_pin AS (
x_3435,
y_3435,
RANK() OVER (PARTITION BY pin10 ORDER BY year DESC) AS r
FROM {{ source('spatial', 'parcel') }}
FROM spatial.parcel
),

distinct_pins AS (
Expand Down Expand Up @@ -89,7 +87,7 @@ WITH dist_pin_to_pin AS (
WHEN pd.row_num = 4 THEN pd.dist
END) AS nearest_neighbor_3_dist_ft,
pcl.year
FROM {{ source('spatial', 'parcel') }} AS pcl
FROM spatial.parcel AS pcl
INNER JOIN pin_dists AS pd
ON pcl.x_3435 = pd.x_3435
AND pcl.y_3435 = pd.y_3435
Expand All @@ -101,4 +99,38 @@ WITH dist_pin_to_pin AS (
AND nearest_neighbor_3_pin10 IS NOT NULL
)

SELECT * FROM dist_pin_to_pin
-- Consolidate unbucketed files into single files and delete temp table
CREATE TABLE IF NOT EXISTS proximity.dist_pin_to_pin
WITH (
format='Parquet',
write_compression = 'SNAPPY',
external_location='s3://ccao-athena-ctas-us-east-1/proximity/dist_pin_to_pin',
partitioned_by = ARRAY['year'],
bucketed_by = ARRAY['pin10'],
bucket_count = 1
) AS (
SELECT
pin10,
nearest_neighbor_1_pin10,
nearest_neighbor_1_dist_ft,
nearest_neighbor_2_pin10,
nearest_neighbor_2_dist_ft,
nearest_neighbor_3_pin10,
nearest_neighbor_3_dist_ft,
year
FROM proximity.dist_pin_to_pin_temp
UNION
SELECT
pin10,
nearest_neighbor_1_pin10,
nearest_neighbor_1_dist_ft,
nearest_neighbor_2_pin10,
nearest_neighbor_2_dist_ft,
nearest_neighbor_3_pin10,
nearest_neighbor_3_dist_ft,
year
FROM proximity.dist_pin_to_pin_temp2
);

DROP TABLE IF EXISTS proximity.dist_pin_to_pin_temp
DROP TABLE IF EXISTS proximity.dist_pin_to_pin_temp2
6 changes: 5 additions & 1 deletion dbt/models/proximity/schema.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
version: 2


sources:
- name: proximity
tables:
- name: dist_pin_to_pin

models:
- name: proximity.cnt_pin_num_bus_stop
- name: proximity.cnt_pin_num_foreclosure
Expand All @@ -17,7 +22,6 @@ models:
- name: proximity.dist_pin_to_metra_route
- name: proximity.dist_pin_to_metra_stop
- name: proximity.dist_pin_to_park
- name: proximity.dist_pin_to_pin
- name: proximity.dist_pin_to_railroad
- name: proximity.dist_pin_to_water
- name: proximity.vw_pin10_proximity
Expand Down

0 comments on commit 574d69d

Please sign in to comment.