From 1bf7a957a3fd98809425030734eca99349b721e4 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 9 Sep 2024 17:05:30 +0000 Subject: [PATCH 001/126] Try adding new view --- .../default/default.vw_experimental_sales.sql | 227 ++++++++++++++++++ dbt/models/default/docs.md | 8 +- .../schema/default.vw_experimental_sales.yml | 3 + 3 files changed, 237 insertions(+), 1 deletion(-) create mode 100644 dbt/models/default/default.vw_experimental_sales.sql create mode 100644 dbt/models/default/schema/default.vw_experimental_sales.yml diff --git a/dbt/models/default/default.vw_experimental_sales.sql b/dbt/models/default/default.vw_experimental_sales.sql new file mode 100644 index 000000000..fe916a13b --- /dev/null +++ b/dbt/models/default/default.vw_experimental_sales.sql @@ -0,0 +1,227 @@ +-- View containing unique, filtered sales +-- Class and township of associated PIN +WITH town_class AS ( + SELECT + par.parid, + REGEXP_REPLACE(par.class, '[^[:alnum:]]', '') AS class, + par.taxyr, + leg.user1 AS township_code, + CONCAT( + leg.user1, SUBSTR(REGEXP_REPLACE(par.nbhd, '([^0-9])', ''), 3, 3) + ) AS nbhd + FROM {{ source('iasworld', 'pardat') }} AS par + LEFT JOIN {{ source('iasworld', 'legdat') }} AS leg + ON par.parid = leg.parid + AND par.taxyr = leg.taxyr + AND leg.cur = 'Y' + AND leg.deactivat IS NULL + WHERE par.cur = 'Y' + AND par.deactivat IS NULL +), + +-- "nopar" isn't entirely accurate for sales associated with only one parcel, +-- so we create our own counter +calculated AS ( + SELECT + instruno, + COUNT(*) AS nopar_calculated + FROM ( + SELECT DISTINCT + parid, + NULLIF(REPLACE(instruno, 'D', ''), '') AS instruno + FROM {{ source('iasworld', 'sales') }} + WHERE deactivat IS NULL + AND cur = 'Y' + ) + GROUP BY instruno +), + +unique_sales AS ( + SELECT + *, + -- Historically, this view excluded sales for a given pin if it had sold + -- within the last 12 months for the same price. This filter allows us + -- to filter out those sales. + COALESCE( + EXTRACT(DAY FROM sale_date - same_price_earlier_date) <= 365, + FALSE + ) AS sale_filter_same_sale_within_365 + FROM ( + SELECT + sales.parid AS pin, + SUBSTR(sales.saledt, 1, 4) AS year, + tc.township_code, + tc.nbhd, + tc.class, + DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') AS sale_date, + CAST(sales.price AS BIGINT) AS sale_price, + sales.salekey AS sale_key, + NULLIF(REPLACE(sales.instruno, 'D', ''), '') AS doc_no, + NULLIF(sales.instrtyp, '') AS deed_type, + -- "nopar" is number of parcels sold + COALESCE( + sales.nopar > 1 OR calculated.nopar_calculated > 1, + FALSE + ) AS is_multisale, + CASE + WHEN sales.nopar > 1 THEN sales.nopar ELSE + calculated.nopar_calculated + END AS num_parcels_sale, + CASE WHEN TRIM(sales.oldown) IN ('', 'MISSING SELLER NAME') + THEN NULL + ELSE sales.oldown + END AS seller_name, + CASE WHEN TRIM(sales.own1) IN ('', 'MISSING BUYER NAME') + THEN NULL + ELSE sales.own1 + END AS buyer_name, + CASE + WHEN sales.saletype = '0' THEN 'LAND' + WHEN sales.saletype = '1' THEN 'LAND AND BUILDING' + END AS sale_type, + -- Sales are not entirely unique by pin/date so we group all + -- sales by pin/date, then order by descending price + -- and give the top observation a value of 1 for "max_price". + -- We need to order by salekey as well in case of any ties within + -- price, date, and pin. + ROW_NUMBER() OVER ( + PARTITION BY + sales.parid, + sales.saledt, + sales.instrtyp NOT IN ('03', '04', '06') + ORDER BY sales.price DESC, sales.salekey ASC + ) AS max_price, + -- We remove the letter 'D' that trails some document numbers in + -- iasworld.sales since it prevents us from joining to mydec sales. + -- This creates one instance where we have duplicate document + -- numbers, so we sort by sale date (specifically to avoid conflicts + -- with detecting the easliest duplicate sale when there are + -- multiple within one document number, within a year) within the + -- new doument number to identify and remove the sale causing the + -- duplicate document number. + ROW_NUMBER() OVER ( + PARTITION BY + NULLIF(REPLACE(sales.instruno, 'D', ''), ''), + sales.instrtyp NOT IN ('03', '04', '06'), + sales.price > 10000 + ORDER BY sales.saledt ASC, sales.salekey ASC + ) AS bad_doc_no, + -- Some pins sell for the exact same price a few months after + -- they're sold (we need to make sure to only include deed types we + -- want). These sales are unecessary for modeling and may be + -- duplicates. We need to order by salekey as well in case of any + -- ties within price, date, and pin. + LAG(DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d')) OVER ( + PARTITION BY + sales.parid, + sales.price, + sales.instrtyp NOT IN ('03', '04', '06') + ORDER BY sales.saledt ASC, sales.salekey ASC + ) AS same_price_earlier_date, + -- Historically, this view filtered out sales less than $10k and + -- as well as quit claims, executor deeds, beneficial interests, + -- and NULL deed types. Now we create "legacy" filter columns so + -- that this filtering can reproduced while still allowing all sales + -- into the view. + sales.price <= 10000 AS sale_filter_less_than_10k, + COALESCE( + sales.instrtyp IN ('03', '04', '06') OR sales.instrtyp IS NULL, + FALSE + ) AS sale_filter_deed_type + FROM {{ source('iasworld', 'sales') }} AS sales + LEFT JOIN calculated + ON NULLIF(REPLACE(sales.instruno, 'D', ''), '') + = calculated.instruno + LEFT JOIN + town_class AS tc + ON sales.parid = tc.parid + AND SUBSTR(sales.saledt, 1, 4) = tc.taxyr + WHERE sales.instruno IS NOT NULL + -- Indicates whether a record has been deactivated + AND sales.deactivat IS NULL + AND sales.cur = 'Y' + AND CAST(SUBSTR(sales.saledt, 1, 4) AS INT) BETWEEN 1997 AND YEAR( + CURRENT_DATE + ) + AND tc.township_code IS NOT NULL + AND sales.price IS NOT NULL + ) + -- Only use max price by pin/sale date + WHERE max_price = 1 + AND (bad_doc_no = 1 OR is_multisale = TRUE) +), + +mydec_sales AS ( + SELECT * FROM ( + SELECT + REPLACE(document_number, 'D', '') AS doc_no, + REPLACE(line_1_primary_pin, '-', '') AS pin, + DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS mydec_date, + year_of_sale + FROM {{ source('sale', 'mydec') }} + WHERE line_2_total_parcels = 1 -- Remove multisales + ) + /* Some sales in mydec have multiple rows for one pin on a given sale date. + Sometimes they have different dates than iasworld prior to 2021 and when + joined back onto unique_sales will create duplicates by pin/sale date. */ + WHERE num_single_day_sales = 1 + OR (YEAR(mydec_date) > 2020) +), +*/ +max_version_flag AS ( + SELECT + meta_sale_document_num, + MAX(version) AS max_version + FROM {{ source('sale', 'flag') }} + GROUP BY meta_sale_document_num +) + +SELECT + unique_sales.pin, + -- In the past, mydec sale dates were more precise than iasworld dates + -- which had been truncated + CASE + WHEN + mydec_sales.mydec_date IS NOT NULL + AND mydec_sales.mydec_date != unique_sales.sale_date + THEN mydec_sales.year_of_sale + ELSE unique_sales.year + END AS year, + unique_sales.township_code, + unique_sales.nbhd, + unique_sales.class, + -- In the past, mydec sale dates were more precise than iasworld dates + -- which had been truncated + CASE + WHEN + mydec_sales.mydec_date IS NOT NULL + AND mydec_sales.mydec_date != unique_sales.sale_date + THEN mydec_sales.mydec_date + ELSE unique_sales.sale_date + END AS sale_date, + -- From 2021 on iasWorld uses precise MyDec dates + COALESCE( + mydec_sales.mydec_date IS NOT NULL + OR YEAR(unique_sales.sale_date) >= 2021, + FALSE + ) AS is_mydec_date, + unique_sales.sale_price, + unique_sales.sale_key, + unique_sales.doc_no, + unique_sales.deed_type, + COALESCE(unique_sales.seller_name, mydec_sales.seller_name) AS seller_name, + unique_sales.is_multisale, + unique_sales.num_parcels_sale, + COALESCE(unique_sales.buyer_name, mydec_sales.buyer_name) AS buyer_name, + unique_sales.sale_type, + unique_sales.sale_filter_same_sale_within_365, + unique_sales.sale_filter_less_than_10k, + unique_sales.sale_filter_deed_type, + -- Our sales validation pipeline only validates sales past 2014 due to MyDec + -- limitations. Previous to that values for sv_is_outlier will be NULL, so + -- if we want to both exclude detected outliers and include sales prior to + -- 2014, we need to code everything NULL as FALSE. + COALESCE(sales_val.sv_is_outlier, FALSE) AS sale_filter_is_outlier, +FROM unique_sales +LEFT JOIN mydec_sales + ON unique_sales.doc_no = mydec_sales.doc_no; \ No newline at end of file diff --git a/dbt/models/default/docs.md b/dbt/models/default/docs.md index 1e42cbd4b..b770851f5 100644 --- a/dbt/models/default/docs.md +++ b/dbt/models/default/docs.md @@ -15,6 +15,12 @@ and reporting. **Primary Key**: `year`, `pin`, `card` {% enddocs %} +# vw_experimental_sales + +{% docs experimental_sales%} + +Experimental sales view that grabs sales from iasworld and mydec + # vw_pin_address {% docs view_vw_pin_address %} @@ -146,7 +152,7 @@ Sourced from `iasworld.sales`, which is sourced from - Multicard sales are excluded from `mydec` data because they can't be joined to `iasworld.sales` (which is only parcel-level) without creating duplicates - Sales are unique by `doc_no` if multisales are excluded. When multisales are - *not* excluded, sales are unique by `doc_no` and `pin`. + _not_ excluded, sales are unique by `doc_no` and `pin`. ### Lineage diff --git a/dbt/models/default/schema/default.vw_experimental_sales.yml b/dbt/models/default/schema/default.vw_experimental_sales.yml new file mode 100644 index 000000000..13fc11163 --- /dev/null +++ b/dbt/models/default/schema/default.vw_experimental_sales.yml @@ -0,0 +1,3 @@ +models: + - name: default.experimental_sales + description: '{{ doc("experimental_sales") }}' \ No newline at end of file From 4b154ec8b98742938505725047de369e498030bb Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 9 Sep 2024 17:08:55 +0000 Subject: [PATCH 002/126] Add docs --- dbt/models/default/docs.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbt/models/default/docs.md b/dbt/models/default/docs.md index b770851f5..ab10327fd 100644 --- a/dbt/models/default/docs.md +++ b/dbt/models/default/docs.md @@ -21,6 +21,8 @@ and reporting. Experimental sales view that grabs sales from iasworld and mydec +{% enddocs %} + # vw_pin_address {% docs view_vw_pin_address %} From b984a63c9ba5d015ad6b94e694162e808f9cc958 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 9 Sep 2024 18:25:30 +0000 Subject: [PATCH 003/126] Remove trailing comma --- dbt/models/default/default.vw_experimental_sales.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_experimental_sales.sql b/dbt/models/default/default.vw_experimental_sales.sql index fe916a13b..ea193832d 100644 --- a/dbt/models/default/default.vw_experimental_sales.sql +++ b/dbt/models/default/default.vw_experimental_sales.sql @@ -221,7 +221,7 @@ SELECT -- limitations. Previous to that values for sv_is_outlier will be NULL, so -- if we want to both exclude detected outliers and include sales prior to -- 2014, we need to code everything NULL as FALSE. - COALESCE(sales_val.sv_is_outlier, FALSE) AS sale_filter_is_outlier, + COALESCE(sales_val.sv_is_outlier, FALSE) AS sale_filter_is_outlier FROM unique_sales LEFT JOIN mydec_sales ON unique_sales.doc_no = mydec_sales.doc_no; \ No newline at end of file From 32bbb734013f52a3e36f900e1f62ca4b565069ab Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 9 Sep 2024 18:28:50 +0000 Subject: [PATCH 004/126] Remove hanging comment --- dbt/models/default/default.vw_experimental_sales.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/dbt/models/default/default.vw_experimental_sales.sql b/dbt/models/default/default.vw_experimental_sales.sql index ea193832d..4f320fb98 100644 --- a/dbt/models/default/default.vw_experimental_sales.sql +++ b/dbt/models/default/default.vw_experimental_sales.sql @@ -167,7 +167,6 @@ mydec_sales AS ( WHERE num_single_day_sales = 1 OR (YEAR(mydec_date) > 2020) ), -*/ max_version_flag AS ( SELECT meta_sale_document_num, From 72ba94098183cb754e5bf71eaeb89ecb382ce516 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 9 Sep 2024 18:38:24 +0000 Subject: [PATCH 005/126] remove dead references --- dbt/models/default/default.vw_experimental_sales.sql | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/dbt/models/default/default.vw_experimental_sales.sql b/dbt/models/default/default.vw_experimental_sales.sql index 4f320fb98..b3ed366c4 100644 --- a/dbt/models/default/default.vw_experimental_sales.sql +++ b/dbt/models/default/default.vw_experimental_sales.sql @@ -166,13 +166,6 @@ mydec_sales AS ( joined back onto unique_sales will create duplicates by pin/sale date. */ WHERE num_single_day_sales = 1 OR (YEAR(mydec_date) > 2020) -), -max_version_flag AS ( - SELECT - meta_sale_document_num, - MAX(version) AS max_version - FROM {{ source('sale', 'flag') }} - GROUP BY meta_sale_document_num ) SELECT @@ -220,7 +213,6 @@ SELECT -- limitations. Previous to that values for sv_is_outlier will be NULL, so -- if we want to both exclude detected outliers and include sales prior to -- 2014, we need to code everything NULL as FALSE. - COALESCE(sales_val.sv_is_outlier, FALSE) AS sale_filter_is_outlier FROM unique_sales LEFT JOIN mydec_sales - ON unique_sales.doc_no = mydec_sales.doc_no; \ No newline at end of file + ON unique_sales.doc_no = mydec_sales.doc_no; From 229f28d090af26944520d20f99b210795975082b Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 9 Sep 2024 18:49:07 +0000 Subject: [PATCH 006/126] Remove comma --- dbt/models/default/default.vw_experimental_sales.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_experimental_sales.sql b/dbt/models/default/default.vw_experimental_sales.sql index b3ed366c4..335be1c78 100644 --- a/dbt/models/default/default.vw_experimental_sales.sql +++ b/dbt/models/default/default.vw_experimental_sales.sql @@ -208,7 +208,7 @@ SELECT unique_sales.sale_type, unique_sales.sale_filter_same_sale_within_365, unique_sales.sale_filter_less_than_10k, - unique_sales.sale_filter_deed_type, + unique_sales.sale_filter_deed_type -- Our sales validation pipeline only validates sales past 2014 due to MyDec -- limitations. Previous to that values for sv_is_outlier will be NULL, so -- if we want to both exclude detected outliers and include sales prior to From 4ccb9682bbb28d87f8d1016a2ca313e89c5ad32a Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 9 Sep 2024 18:59:01 +0000 Subject: [PATCH 007/126] Try to correct mydec CTA --- dbt/models/default/default.vw_experimental_sales.sql | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dbt/models/default/default.vw_experimental_sales.sql b/dbt/models/default/default.vw_experimental_sales.sql index 335be1c78..12c3cf78a 100644 --- a/dbt/models/default/default.vw_experimental_sales.sql +++ b/dbt/models/default/default.vw_experimental_sales.sql @@ -157,6 +157,9 @@ mydec_sales AS ( REPLACE(document_number, 'D', '') AS doc_no, REPLACE(line_1_primary_pin, '-', '') AS pin, DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS mydec_date, + COUNT() OVER ( + PARTITION BY line_1_primary_pin, line_4_instrument_date + ) AS num_single_day_sales, year_of_sale FROM {{ source('sale', 'mydec') }} WHERE line_2_total_parcels = 1 -- Remove multisales @@ -166,8 +169,7 @@ mydec_sales AS ( joined back onto unique_sales will create duplicates by pin/sale date. */ WHERE num_single_day_sales = 1 OR (YEAR(mydec_date) > 2020) -) - +), SELECT unique_sales.pin, -- In the past, mydec sale dates were more precise than iasworld dates From 325f42060faaba06968b21709418efad1924af1f Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 9 Sep 2024 19:33:14 +0000 Subject: [PATCH 008/126] Remove comma --- dbt/models/default/default.vw_experimental_sales.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_experimental_sales.sql b/dbt/models/default/default.vw_experimental_sales.sql index 12c3cf78a..31b4964ca 100644 --- a/dbt/models/default/default.vw_experimental_sales.sql +++ b/dbt/models/default/default.vw_experimental_sales.sql @@ -169,7 +169,8 @@ mydec_sales AS ( joined back onto unique_sales will create duplicates by pin/sale date. */ WHERE num_single_day_sales = 1 OR (YEAR(mydec_date) > 2020) -), +) + SELECT unique_sales.pin, -- In the past, mydec sale dates were more precise than iasworld dates From 8e54d9a4c700e48ac7e25f6aa50906e72f737747 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 9 Sep 2024 19:39:20 +0000 Subject: [PATCH 009/126] Add seller name --- dbt/models/default/default.vw_experimental_sales.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbt/models/default/default.vw_experimental_sales.sql b/dbt/models/default/default.vw_experimental_sales.sql index 31b4964ca..c625a5435 100644 --- a/dbt/models/default/default.vw_experimental_sales.sql +++ b/dbt/models/default/default.vw_experimental_sales.sql @@ -157,6 +157,8 @@ mydec_sales AS ( REPLACE(document_number, 'D', '') AS doc_no, REPLACE(line_1_primary_pin, '-', '') AS pin, DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS mydec_date, + NULLIF(TRIM(seller_name), '') AS seller_name, + NULLIF(TRIM(buyer_name), '') AS buyer_name, COUNT() OVER ( PARTITION BY line_1_primary_pin, line_4_instrument_date ) AS num_single_day_sales, From 8218d6ecd1d568b99c5242d9a4267b777327b1fc Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 9 Sep 2024 20:03:37 +0000 Subject: [PATCH 010/126] Try some coalesces --- .../default/default.vw_experimental_sales.sql | 214 ++++++------------ 1 file changed, 73 insertions(+), 141 deletions(-) diff --git a/dbt/models/default/default.vw_experimental_sales.sql b/dbt/models/default/default.vw_experimental_sales.sql index c625a5435..85528a5e0 100644 --- a/dbt/models/default/default.vw_experimental_sales.sql +++ b/dbt/models/default/default.vw_experimental_sales.sql @@ -36,29 +36,53 @@ calculated AS ( GROUP BY instruno ), +all_mydec_sales AS ( + SELECT + REPLACE(document_number, 'D', '') AS doc_no, + REPLACE(line_1_primary_pin, '-', '') AS pin, + DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS mydec_date, + NULLIF(TRIM(seller_name), '') AS seller_name, + NULLIF(TRIM(buyer_name), '') AS buyer_name, + CAST(line_5_sale_price AS BIGINT) AS sale_price, + year_of_sale, + line_2_total_parcels + FROM {{ source('sale', 'mydec') }} +), + unique_sales AS ( SELECT - *, - -- Historically, this view excluded sales for a given pin if it had sold - -- within the last 12 months for the same price. This filter allows us - -- to filter out those sales. - COALESCE( - EXTRACT(DAY FROM sale_date - same_price_earlier_date) <= 365, - FALSE - ) AS sale_filter_same_sale_within_365 + COALESCE(iasw.parid, mydec.pin) AS pin, + COALESCE(iasw.year, mydec.year_of_sale) AS year, + tc.township_code, + tc.nbhd, + tc.class, + COALESCE(iasw.sale_date, mydec.mydec_date) AS sale_date, + COALESCE(iasw.sale_price, mydec.sale_price) AS sale_price, + iasw.sale_key, + COALESCE(iasw.doc_no, mydec.doc_no) AS doc_no, + iasw.deed_type, + COALESCE(iasw.seller_name, mydec.seller_name) AS seller_name, + COALESCE(iasw.buyer_name, mydec.buyer_name) AS buyer_name, + COALESCE(iasw.is_multisale, mydec.line_2_total_parcels > 1) AS is_multisale, + COALESCE(iasw.num_parcels_sale, mydec.line_2_total_parcels) AS num_parcels_sale, + iasw.sale_type, + iasw.sale_filter_same_sale_within_365, + iasw.sale_filter_less_than_10k, + iasw.sale_filter_deed_type, + CASE + WHEN iasw.parid IS NULL THEN 'MyDec' + WHEN mydec.pin IS NULL THEN 'iasWorld' + ELSE 'Both' + END AS data_source FROM ( SELECT - sales.parid AS pin, + sales.parid, SUBSTR(sales.saledt, 1, 4) AS year, - tc.township_code, - tc.nbhd, - tc.class, DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') AS sale_date, CAST(sales.price AS BIGINT) AS sale_price, sales.salekey AS sale_key, NULLIF(REPLACE(sales.instruno, 'D', ''), '') AS doc_no, NULLIF(sales.instrtyp, '') AS deed_type, - -- "nopar" is number of parcels sold COALESCE( sales.nopar > 1 OR calculated.nopar_calculated > 1, FALSE @@ -79,50 +103,13 @@ unique_sales AS ( WHEN sales.saletype = '0' THEN 'LAND' WHEN sales.saletype = '1' THEN 'LAND AND BUILDING' END AS sale_type, - -- Sales are not entirely unique by pin/date so we group all - -- sales by pin/date, then order by descending price - -- and give the top observation a value of 1 for "max_price". - -- We need to order by salekey as well in case of any ties within - -- price, date, and pin. - ROW_NUMBER() OVER ( - PARTITION BY - sales.parid, - sales.saledt, - sales.instrtyp NOT IN ('03', '04', '06') - ORDER BY sales.price DESC, sales.salekey ASC - ) AS max_price, - -- We remove the letter 'D' that trails some document numbers in - -- iasworld.sales since it prevents us from joining to mydec sales. - -- This creates one instance where we have duplicate document - -- numbers, so we sort by sale date (specifically to avoid conflicts - -- with detecting the easliest duplicate sale when there are - -- multiple within one document number, within a year) within the - -- new doument number to identify and remove the sale causing the - -- duplicate document number. - ROW_NUMBER() OVER ( - PARTITION BY - NULLIF(REPLACE(sales.instruno, 'D', ''), ''), - sales.instrtyp NOT IN ('03', '04', '06'), - sales.price > 10000 - ORDER BY sales.saledt ASC, sales.salekey ASC - ) AS bad_doc_no, - -- Some pins sell for the exact same price a few months after - -- they're sold (we need to make sure to only include deed types we - -- want). These sales are unecessary for modeling and may be - -- duplicates. We need to order by salekey as well in case of any - -- ties within price, date, and pin. - LAG(DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d')) OVER ( - PARTITION BY - sales.parid, - sales.price, - sales.instrtyp NOT IN ('03', '04', '06') - ORDER BY sales.saledt ASC, sales.salekey ASC - ) AS same_price_earlier_date, - -- Historically, this view filtered out sales less than $10k and - -- as well as quit claims, executor deeds, beneficial interests, - -- and NULL deed types. Now we create "legacy" filter columns so - -- that this filtering can reproduced while still allowing all sales - -- into the view. + COALESCE( + EXTRACT(DAY FROM sale_date - LAG(sale_date) OVER ( + PARTITION BY sales.parid, sales.price + ORDER BY sale_date + )) <= 365, + FALSE + ) AS sale_filter_same_sale_within_365, sales.price <= 10000 AS sale_filter_less_than_10k, COALESCE( sales.instrtyp IN ('03', '04', '06') OR sales.instrtyp IS NULL, @@ -130,94 +117,39 @@ unique_sales AS ( ) AS sale_filter_deed_type FROM {{ source('iasworld', 'sales') }} AS sales LEFT JOIN calculated - ON NULLIF(REPLACE(sales.instruno, 'D', ''), '') - = calculated.instruno - LEFT JOIN - town_class AS tc - ON sales.parid = tc.parid - AND SUBSTR(sales.saledt, 1, 4) = tc.taxyr + ON NULLIF(REPLACE(sales.instruno, 'D', ''), '') = calculated.instruno WHERE sales.instruno IS NOT NULL - -- Indicates whether a record has been deactivated AND sales.deactivat IS NULL AND sales.cur = 'Y' - AND CAST(SUBSTR(sales.saledt, 1, 4) AS INT) BETWEEN 1997 AND YEAR( - CURRENT_DATE - ) - AND tc.township_code IS NOT NULL + AND CAST(SUBSTR(sales.saledt, 1, 4) AS INT) BETWEEN 1997 AND YEAR(CURRENT_DATE) AND sales.price IS NOT NULL - ) - -- Only use max price by pin/sale date - WHERE max_price = 1 - AND (bad_doc_no = 1 OR is_multisale = TRUE) -), - -mydec_sales AS ( - SELECT * FROM ( - SELECT - REPLACE(document_number, 'D', '') AS doc_no, - REPLACE(line_1_primary_pin, '-', '') AS pin, - DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS mydec_date, - NULLIF(TRIM(seller_name), '') AS seller_name, - NULLIF(TRIM(buyer_name), '') AS buyer_name, - COUNT() OVER ( - PARTITION BY line_1_primary_pin, line_4_instrument_date - ) AS num_single_day_sales, - year_of_sale - FROM {{ source('sale', 'mydec') }} - WHERE line_2_total_parcels = 1 -- Remove multisales - ) - /* Some sales in mydec have multiple rows for one pin on a given sale date. - Sometimes they have different dates than iasworld prior to 2021 and when - joined back onto unique_sales will create duplicates by pin/sale date. */ - WHERE num_single_day_sales = 1 - OR (YEAR(mydec_date) > 2020) + ) iasw + FULL OUTER JOIN all_mydec_sales mydec + ON iasw.doc_no = mydec.doc_no + LEFT JOIN town_class AS tc + ON COALESCE(iasw.parid, mydec.pin) = tc.parid + AND COALESCE(iasw.year, mydec.year_of_sale) = tc.taxyr + WHERE tc.township_code IS NOT NULL ) SELECT - unique_sales.pin, - -- In the past, mydec sale dates were more precise than iasworld dates - -- which had been truncated - CASE - WHEN - mydec_sales.mydec_date IS NOT NULL - AND mydec_sales.mydec_date != unique_sales.sale_date - THEN mydec_sales.year_of_sale - ELSE unique_sales.year - END AS year, - unique_sales.township_code, - unique_sales.nbhd, - unique_sales.class, - -- In the past, mydec sale dates were more precise than iasworld dates - -- which had been truncated - CASE - WHEN - mydec_sales.mydec_date IS NOT NULL - AND mydec_sales.mydec_date != unique_sales.sale_date - THEN mydec_sales.mydec_date - ELSE unique_sales.sale_date - END AS sale_date, - -- From 2021 on iasWorld uses precise MyDec dates - COALESCE( - mydec_sales.mydec_date IS NOT NULL - OR YEAR(unique_sales.sale_date) >= 2021, - FALSE - ) AS is_mydec_date, - unique_sales.sale_price, - unique_sales.sale_key, - unique_sales.doc_no, - unique_sales.deed_type, - COALESCE(unique_sales.seller_name, mydec_sales.seller_name) AS seller_name, - unique_sales.is_multisale, - unique_sales.num_parcels_sale, - COALESCE(unique_sales.buyer_name, mydec_sales.buyer_name) AS buyer_name, - unique_sales.sale_type, - unique_sales.sale_filter_same_sale_within_365, - unique_sales.sale_filter_less_than_10k, - unique_sales.sale_filter_deed_type - -- Our sales validation pipeline only validates sales past 2014 due to MyDec - -- limitations. Previous to that values for sv_is_outlier will be NULL, so - -- if we want to both exclude detected outliers and include sales prior to - -- 2014, we need to code everything NULL as FALSE. -FROM unique_sales -LEFT JOIN mydec_sales - ON unique_sales.doc_no = mydec_sales.doc_no; + pin, + year, + township_code, + nbhd, + class, + sale_date, + sale_price, + sale_key, + doc_no, + deed_type, + seller_name, + buyer_name, + is_multisale, + num_parcels_sale, + sale_type, + sale_filter_same_sale_within_365, + sale_filter_less_than_10k, + sale_filter_deed_type, + data_source +FROM unique_sales; \ No newline at end of file From 483791b575d8dbc26742a2ffe92d6e4dde098828 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 9 Sep 2024 20:34:17 +0000 Subject: [PATCH 011/126] Add souurce sale info --- .../default/default.vw_experimental_sales.sql | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/dbt/models/default/default.vw_experimental_sales.sql b/dbt/models/default/default.vw_experimental_sales.sql index 85528a5e0..d5bfebdd4 100644 --- a/dbt/models/default/default.vw_experimental_sales.sql +++ b/dbt/models/default/default.vw_experimental_sales.sql @@ -43,7 +43,7 @@ all_mydec_sales AS ( DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS mydec_date, NULLIF(TRIM(seller_name), '') AS seller_name, NULLIF(TRIM(buyer_name), '') AS buyer_name, - CAST(line_5_sale_price AS BIGINT) AS sale_price, + CAST(line_11_full_consideration AS BIGINT) AS sale_price, year_of_sale, line_2_total_parcels FROM {{ source('sale', 'mydec') }} @@ -63,8 +63,10 @@ unique_sales AS ( iasw.deed_type, COALESCE(iasw.seller_name, mydec.seller_name) AS seller_name, COALESCE(iasw.buyer_name, mydec.buyer_name) AS buyer_name, - COALESCE(iasw.is_multisale, mydec.line_2_total_parcels > 1) AS is_multisale, - COALESCE(iasw.num_parcels_sale, mydec.line_2_total_parcels) AS num_parcels_sale, + COALESCE(iasw.is_multisale, mydec.line_2_total_parcels > 1) + AS is_multisale, + COALESCE(iasw.num_parcels_sale, mydec.line_2_total_parcels) + AS num_parcels_sale, iasw.sale_type, iasw.sale_filter_same_sale_within_365, iasw.sale_filter_less_than_10k, @@ -73,7 +75,11 @@ unique_sales AS ( WHEN iasw.parid IS NULL THEN 'MyDec' WHEN mydec.pin IS NULL THEN 'iasWorld' ELSE 'Both' - END AS data_source + END AS data_source, + CASE + WHEN iasw.doc_no IS NOT NULL THEN 'iasWorld' + ELSE 'MyDec' + END AS source_sale FROM ( SELECT sales.parid, @@ -117,14 +123,17 @@ unique_sales AS ( ) AS sale_filter_deed_type FROM {{ source('iasworld', 'sales') }} AS sales LEFT JOIN calculated - ON NULLIF(REPLACE(sales.instruno, 'D', ''), '') = calculated.instruno + ON NULLIF(REPLACE(sales.instruno, 'D', ''), '') + = calculated.instruno WHERE sales.instruno IS NOT NULL AND sales.deactivat IS NULL AND sales.cur = 'Y' - AND CAST(SUBSTR(sales.saledt, 1, 4) AS INT) BETWEEN 1997 AND YEAR(CURRENT_DATE) + AND CAST(SUBSTR(sales.saledt, 1, 4) AS INT) BETWEEN 1997 AND YEAR( + CURRENT_DATE + ) AND sales.price IS NOT NULL - ) iasw - FULL OUTER JOIN all_mydec_sales mydec + ) AS iasw + FULL OUTER JOIN all_mydec_sales AS mydec ON iasw.doc_no = mydec.doc_no LEFT JOIN town_class AS tc ON COALESCE(iasw.parid, mydec.pin) = tc.parid @@ -152,4 +161,4 @@ SELECT sale_filter_less_than_10k, sale_filter_deed_type, data_source -FROM unique_sales; \ No newline at end of file +FROM unique_sales; From a29fd032a681687cdc03664981b9b8dbb17dd5dc Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 9 Sep 2024 20:56:05 +0000 Subject: [PATCH 012/126] Try coalescing --- dbt/models/default/default.vw_experimental_sales.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt/models/default/default.vw_experimental_sales.sql b/dbt/models/default/default.vw_experimental_sales.sql index d5bfebdd4..8b704de3b 100644 --- a/dbt/models/default/default.vw_experimental_sales.sql +++ b/dbt/models/default/default.vw_experimental_sales.sql @@ -56,7 +56,7 @@ unique_sales AS ( tc.township_code, tc.nbhd, tc.class, - COALESCE(iasw.sale_date, mydec.mydec_date) AS sale_date, + COALESCE(iasw.iasw_sale_date, mydec.mydec_date) AS sale_date, COALESCE(iasw.sale_price, mydec.sale_price) AS sale_price, iasw.sale_key, COALESCE(iasw.doc_no, mydec.doc_no) AS doc_no, @@ -84,7 +84,7 @@ unique_sales AS ( SELECT sales.parid, SUBSTR(sales.saledt, 1, 4) AS year, - DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') AS sale_date, + DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') AS iasw_sale_date, CAST(sales.price AS BIGINT) AS sale_price, sales.salekey AS sale_key, NULLIF(REPLACE(sales.instruno, 'D', ''), '') AS doc_no, From cc9b0cdc158bcbd1172b3c7e7dadb015191c6970 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 10 Sep 2024 16:40:04 +0000 Subject: [PATCH 013/126] Update pin sale for outer join --- .../default/default.vw_experimental_sales.sql | 168 +----------------- dbt/models/default/default.vw_pin_sale.sql | 2 +- 2 files changed, 8 insertions(+), 162 deletions(-) diff --git a/dbt/models/default/default.vw_experimental_sales.sql b/dbt/models/default/default.vw_experimental_sales.sql index 8b704de3b..0f36ed9cd 100644 --- a/dbt/models/default/default.vw_experimental_sales.sql +++ b/dbt/models/default/default.vw_experimental_sales.sql @@ -1,164 +1,10 @@ --- View containing unique, filtered sales --- Class and township of associated PIN -WITH town_class AS ( +WITH mydec_sales as ( SELECT - par.parid, - REGEXP_REPLACE(par.class, '[^[:alnum:]]', '') AS class, - par.taxyr, - leg.user1 AS township_code, - CONCAT( - leg.user1, SUBSTR(REGEXP_REPLACE(par.nbhd, '([^0-9])', ''), 3, 3) - ) AS nbhd - FROM {{ source('iasworld', 'pardat') }} AS par - LEFT JOIN {{ source('iasworld', 'legdat') }} AS leg - ON par.parid = leg.parid - AND par.taxyr = leg.taxyr - AND leg.cur = 'Y' - AND leg.deactivat IS NULL - WHERE par.cur = 'Y' - AND par.deactivat IS NULL -), - --- "nopar" isn't entirely accurate for sales associated with only one parcel, --- so we create our own counter -calculated AS ( - SELECT - instruno, - COUNT(*) AS nopar_calculated - FROM ( - SELECT DISTINCT - parid, - NULLIF(REPLACE(instruno, 'D', ''), '') AS instruno - FROM {{ source('iasworld', 'sales') }} - WHERE deactivat IS NULL - AND cur = 'Y' - ) - GROUP BY instruno -), - -all_mydec_sales AS ( - SELECT - REPLACE(document_number, 'D', '') AS doc_no, REPLACE(line_1_primary_pin, '-', '') AS pin, - DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS mydec_date, + REPLACE(document_number, 'D', '') AS doc_no, + DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS sale_date, + line_11_full_consideration as sale_price, NULLIF(TRIM(seller_name), '') AS seller_name, - NULLIF(TRIM(buyer_name), '') AS buyer_name, - CAST(line_11_full_consideration AS BIGINT) AS sale_price, - year_of_sale, - line_2_total_parcels - FROM {{ source('sale', 'mydec') }} -), - -unique_sales AS ( - SELECT - COALESCE(iasw.parid, mydec.pin) AS pin, - COALESCE(iasw.year, mydec.year_of_sale) AS year, - tc.township_code, - tc.nbhd, - tc.class, - COALESCE(iasw.iasw_sale_date, mydec.mydec_date) AS sale_date, - COALESCE(iasw.sale_price, mydec.sale_price) AS sale_price, - iasw.sale_key, - COALESCE(iasw.doc_no, mydec.doc_no) AS doc_no, - iasw.deed_type, - COALESCE(iasw.seller_name, mydec.seller_name) AS seller_name, - COALESCE(iasw.buyer_name, mydec.buyer_name) AS buyer_name, - COALESCE(iasw.is_multisale, mydec.line_2_total_parcels > 1) - AS is_multisale, - COALESCE(iasw.num_parcels_sale, mydec.line_2_total_parcels) - AS num_parcels_sale, - iasw.sale_type, - iasw.sale_filter_same_sale_within_365, - iasw.sale_filter_less_than_10k, - iasw.sale_filter_deed_type, - CASE - WHEN iasw.parid IS NULL THEN 'MyDec' - WHEN mydec.pin IS NULL THEN 'iasWorld' - ELSE 'Both' - END AS data_source, - CASE - WHEN iasw.doc_no IS NOT NULL THEN 'iasWorld' - ELSE 'MyDec' - END AS source_sale - FROM ( - SELECT - sales.parid, - SUBSTR(sales.saledt, 1, 4) AS year, - DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') AS iasw_sale_date, - CAST(sales.price AS BIGINT) AS sale_price, - sales.salekey AS sale_key, - NULLIF(REPLACE(sales.instruno, 'D', ''), '') AS doc_no, - NULLIF(sales.instrtyp, '') AS deed_type, - COALESCE( - sales.nopar > 1 OR calculated.nopar_calculated > 1, - FALSE - ) AS is_multisale, - CASE - WHEN sales.nopar > 1 THEN sales.nopar ELSE - calculated.nopar_calculated - END AS num_parcels_sale, - CASE WHEN TRIM(sales.oldown) IN ('', 'MISSING SELLER NAME') - THEN NULL - ELSE sales.oldown - END AS seller_name, - CASE WHEN TRIM(sales.own1) IN ('', 'MISSING BUYER NAME') - THEN NULL - ELSE sales.own1 - END AS buyer_name, - CASE - WHEN sales.saletype = '0' THEN 'LAND' - WHEN sales.saletype = '1' THEN 'LAND AND BUILDING' - END AS sale_type, - COALESCE( - EXTRACT(DAY FROM sale_date - LAG(sale_date) OVER ( - PARTITION BY sales.parid, sales.price - ORDER BY sale_date - )) <= 365, - FALSE - ) AS sale_filter_same_sale_within_365, - sales.price <= 10000 AS sale_filter_less_than_10k, - COALESCE( - sales.instrtyp IN ('03', '04', '06') OR sales.instrtyp IS NULL, - FALSE - ) AS sale_filter_deed_type - FROM {{ source('iasworld', 'sales') }} AS sales - LEFT JOIN calculated - ON NULLIF(REPLACE(sales.instruno, 'D', ''), '') - = calculated.instruno - WHERE sales.instruno IS NOT NULL - AND sales.deactivat IS NULL - AND sales.cur = 'Y' - AND CAST(SUBSTR(sales.saledt, 1, 4) AS INT) BETWEEN 1997 AND YEAR( - CURRENT_DATE - ) - AND sales.price IS NOT NULL - ) AS iasw - FULL OUTER JOIN all_mydec_sales AS mydec - ON iasw.doc_no = mydec.doc_no - LEFT JOIN town_class AS tc - ON COALESCE(iasw.parid, mydec.pin) = tc.parid - AND COALESCE(iasw.year, mydec.year_of_sale) = tc.taxyr - WHERE tc.township_code IS NOT NULL -) - -SELECT - pin, - year, - township_code, - nbhd, - class, - sale_date, - sale_price, - sale_key, - doc_no, - deed_type, - seller_name, - buyer_name, - is_multisale, - num_parcels_sale, - sale_type, - sale_filter_same_sale_within_365, - sale_filter_less_than_10k, - sale_filter_deed_type, - data_source -FROM unique_sales; + NULLIF(TRIM(buyer_name), '') AS buyer_name + FROM sale.mydec +) \ No newline at end of file diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 6aa5e146f..d4a01930c 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -335,7 +335,7 @@ SELECT sales_val.sv_run_id, sales_val.sv_version FROM unique_sales -LEFT JOIN mydec_sales +OUTER JOIN mydec_sales ON unique_sales.doc_no = mydec_sales.doc_no LEFT JOIN sales_val ON unique_sales.doc_no = sales_val.meta_sale_document_num; From 92e136859f8429b1bcf93aa6e99a02645cd56edb Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 10 Sep 2024 18:00:16 +0000 Subject: [PATCH 014/126] Aadd full outer join --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index d4a01930c..f94aba0e2 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -335,7 +335,7 @@ SELECT sales_val.sv_run_id, sales_val.sv_version FROM unique_sales -OUTER JOIN mydec_sales +FULL OUTER JOIN mydec_sales ON unique_sales.doc_no = mydec_sales.doc_no LEFT JOIN sales_val ON unique_sales.doc_no = sales_val.meta_sale_document_num; From a0fc872d4d9591a2a433460d29f9c0d015dee410 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 10 Sep 2024 18:01:00 +0000 Subject: [PATCH 015/126] Add indicator --- dbt/models/default/default.vw_pin_sale.sql | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index f94aba0e2..60b830a70 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -257,6 +257,7 @@ sales_val AS ( ) SELECT + COALESCE(unique_sales.doc_no, mydec_sales.doc_no) AS doc_no, unique_sales.pin, -- In the past, mydec sale dates were more precise than iasworld dates -- which had been truncated @@ -333,9 +334,14 @@ SELECT sales_val.sv_outlier_reason2, sales_val.sv_outlier_reason3, sales_val.sv_run_id, - sales_val.sv_version + sales_val.sv_version, + CASE + WHEN unique_sales.doc_no IS NULL THEN 'Only in mydec_sales' + WHEN mydec_sales.doc_no IS NULL THEN 'Only in unique_sales' + ELSE 'In both' + END AS source_indicator FROM unique_sales FULL OUTER JOIN mydec_sales ON unique_sales.doc_no = mydec_sales.doc_no LEFT JOIN sales_val - ON unique_sales.doc_no = sales_val.meta_sale_document_num; + ON COALESCE(unique_sales.doc_no, mydec_sales.doc_no) = sales_val.meta_sale_document_num; \ No newline at end of file From 236d4fddf77f99a51de92ea02714fc72d918c696 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 10 Sep 2024 18:06:09 +0000 Subject: [PATCH 016/126] Try different indicator --- dbt/models/default/default.vw_pin_sale.sql | 24 ++++++---------------- 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 60b830a70..b5d34f9d2 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -257,10 +257,7 @@ sales_val AS ( ) SELECT - COALESCE(unique_sales.doc_no, mydec_sales.doc_no) AS doc_no, unique_sales.pin, - -- In the past, mydec sale dates were more precise than iasworld dates - -- which had been truncated CASE WHEN mydec_sales.mydec_date IS NOT NULL @@ -271,8 +268,6 @@ SELECT unique_sales.township_code, unique_sales.nbhd, unique_sales.class, - -- In the past, mydec sale dates were more precise than iasworld dates - -- which had been truncated CASE WHEN mydec_sales.mydec_date IS NOT NULL @@ -280,7 +275,6 @@ SELECT THEN mydec_sales.mydec_date ELSE unique_sales.sale_date END AS sale_date, - -- From 2021 on iasWorld uses precise MyDec dates COALESCE( mydec_sales.mydec_date IS NOT NULL OR YEAR(unique_sales.sale_date) >= 2021, @@ -298,16 +292,12 @@ SELECT unique_sales.sale_filter_same_sale_within_365, unique_sales.sale_filter_less_than_10k, unique_sales.sale_filter_deed_type, - -- Our sales validation pipeline only validates sales past 2014 due to MyDec - -- limitations. Previous to that values for sv_is_outlier will be NULL, so - -- if we want to both exclude detected outliers and include sales prior to - -- 2014, we need to code everything NULL as FALSE. COALESCE(sales_val.sv_is_outlier, FALSE) AS sale_filter_is_outlier, mydec_sales.mydec_deed_type, mydec_sales.sale_filter_ptax_flag, mydec_sales.mydec_property_advertised, mydec_sales.mydec_is_installment_contract_fulfilled, - mydec_sales.mydec_is_sale_between_related_individuals_or_corporate_affiliates, -- noqa + mydec_sales.mydec_is_sale_between_related_individuals_or_corporate_affiliates, mydec_sales.mydec_is_transfer_of_less_than_100_percent_interest, mydec_sales.mydec_is_court_ordered_sale, mydec_sales.mydec_is_sale_in_lieu_of_foreclosure, @@ -316,7 +306,7 @@ SELECT mydec_sales.mydec_is_bank_reo_real_estate_owned, mydec_sales.mydec_is_auction_sale, mydec_sales.mydec_is_seller_buyer_a_relocation_company, - mydec_sales.mydec_is_seller_buyer_a_financial_institution_or_government_agency, -- noqa + mydec_sales.mydec_is_seller_buyer_a_financial_institution_or_government_agency, mydec_sales.mydec_is_buyer_a_real_estate_investment_trust, mydec_sales.mydec_is_buyer_a_pension_fund, mydec_sales.mydec_is_buyer_an_adjacent_property_owner, @@ -335,13 +325,11 @@ SELECT sales_val.sv_outlier_reason3, sales_val.sv_run_id, sales_val.sv_version, - CASE - WHEN unique_sales.doc_no IS NULL THEN 'Only in mydec_sales' - WHEN mydec_sales.doc_no IS NULL THEN 'Only in unique_sales' - ELSE 'In both' - END AS source_indicator + -- Add indicator columns for null document numbers + CASE WHEN unique_sales.doc_no IS NULL THEN 1 ELSE 0 END AS is_unique_sales_null, + CASE WHEN mydec_sales.doc_no IS NULL THEN 1 ELSE 0 END AS is_mydec_sales_null FROM unique_sales FULL OUTER JOIN mydec_sales ON unique_sales.doc_no = mydec_sales.doc_no LEFT JOIN sales_val - ON COALESCE(unique_sales.doc_no, mydec_sales.doc_no) = sales_val.meta_sale_document_num; \ No newline at end of file + ON unique_sales.doc_no = sales_val.meta_sale_document_num; \ No newline at end of file From 10068d8845d8c1c08f6179e7e2d6a6461cd432d6 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 10 Sep 2024 20:37:52 +0000 Subject: [PATCH 017/126] Add num sales and is_multisale --- .../default/default.vw_experimental_sales.sql | 93 ++++++++++++++++++- 1 file changed, 90 insertions(+), 3 deletions(-) diff --git a/dbt/models/default/default.vw_experimental_sales.sql b/dbt/models/default/default.vw_experimental_sales.sql index 0f36ed9cd..721f625c4 100644 --- a/dbt/models/default/default.vw_experimental_sales.sql +++ b/dbt/models/default/default.vw_experimental_sales.sql @@ -1,10 +1,97 @@ -WITH mydec_sales as ( +WITH +-- "nopar" isn't entirely accurate for sales associated with only one parcel, +-- so we create our own counter +calculated AS ( + SELECT + instruno, + COUNT(*) AS nopar_calculated + FROM ( + SELECT DISTINCT + parid, + NULLIF(REPLACE(instruno, 'D', ''), '') AS instruno + FROM {{ source('iasworld', 'sales') }} + WHERE deactivat IS NULL + AND cur = 'Y' + ) + GROUP BY instruno +), + +ias_sales AS ( + SELECT + parid as pin, + NULLIF(REPLACE(sales.instruno, 'D', ''), '') as doc_no, + DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') AS sale_date, + CAST(sales.price AS BIGINT) AS sale_price, + COALESCE( + sales.nopar > 1 OR calculated.nopar_calculated > 1, + FALSE + ) AS is_multisale, + CASE + WHEN sales.nopar > 1 THEN sales.nopar ELSE + calculated.nopar_calculated + END AS num_parcels_sale, + CASE WHEN TRIM(oldown) IN ('', 'MISSING SELLER NAME') + THEN NULL + ELSE oldown + END AS seller_name, + CASE WHEN TRIM(own1) IN ('', 'MISSING BUYER NAME') + THEN NULL + ELSE own1 + END AS buyer_name + FROM iasworld.sales + WHERE + deactivat IS null + and cur = 'Y' + LEFT JOIN calculated.doc_no = calculated.instruno +), +mydec_sales AS ( SELECT REPLACE(line_1_primary_pin, '-', '') AS pin, REPLACE(document_number, 'D', '') AS doc_no, DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS sale_date, line_11_full_consideration as sale_price, NULLIF(TRIM(seller_name), '') AS seller_name, - NULLIF(TRIM(buyer_name), '') AS buyer_name + NULLIF(TRIM(buyer_name), '') AS buyer_name, + COALESCE( + line_2_total_parcels > 1 + FALSE + ) AS is_multisale, + line_2_total_parcels as num_parcels_sale FROM sale.mydec -) \ No newline at end of file +), + +WITH combined_sales AS ( + -- Select all rows from ias_sales + SELECT + ias.pin, + ias.doc_no, + COALESCE(mydec.sale_date, ias.sale_date) AS sale_date, + CASE WHEN mydec.sale_date IS NOT NULL THEN TRUE ELSE FALSE END AS is_mydec_date, + ias.sale_price, + ias.seller_name, + ias.buyer_name, + ias.is_multisale + ias.num_parcels_sale + 'iasworld' AS source + FROM ias_sales ias + LEFT JOIN mydec_sales mydec ON ias.doc_no = mydec.doc_no + + UNION ALL + + -- Select rows from mydec_sales that don't exist in ias_sales + SELECT + mydec.pin, + mydec.doc_no, + mydec.sale_date, + TRUE AS is_mydec_date, + mydec.sale_price, + mydec.seller_name, + mydec.buyer_name, + mydec.num_parcels_sale + 'mydec' AS source + FROM mydec_sales mydec + LEFT JOIN ias_sales ias ON mydec.doc_no = ias.doc_no + WHERE ias.doc_no IS NULL +) + +SELECT * FROM combined_sales \ No newline at end of file From c2f99cc184813055f3114b4a594a1fb58ffed6c3 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 10 Sep 2024 20:49:43 +0000 Subject: [PATCH 018/126] Fix coalesce --- .../default/default.vw_experimental_sales.sql | 45 ++++++++++--------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/dbt/models/default/default.vw_experimental_sales.sql b/dbt/models/default/default.vw_experimental_sales.sql index 721f625c4..e8eb407e8 100644 --- a/dbt/models/default/default.vw_experimental_sales.sql +++ b/dbt/models/default/default.vw_experimental_sales.sql @@ -1,6 +1,4 @@ WITH --- "nopar" isn't entirely accurate for sales associated with only one parcel, --- so we create our own counter calculated AS ( SELECT instruno, @@ -9,40 +7,42 @@ calculated AS ( SELECT DISTINCT parid, NULLIF(REPLACE(instruno, 'D', ''), '') AS instruno - FROM {{ source('iasworld', 'sales') }} + FROM iasworld.sales WHERE deactivat IS NULL AND cur = 'Y' - ) + ) AS distinct_sales GROUP BY instruno ), ias_sales AS ( SELECT - parid as pin, - NULLIF(REPLACE(sales.instruno, 'D', ''), '') as doc_no, + sales.parid AS pin, + NULLIF(REPLACE(sales.instruno, 'D', ''), '') AS doc_no, DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') AS sale_date, CAST(sales.price AS BIGINT) AS sale_price, COALESCE( - sales.nopar > 1 OR calculated.nopar_calculated > 1, + (sales.nopar > 1 OR calculated.nopar_calculated > 1), FALSE ) AS is_multisale, CASE - WHEN sales.nopar > 1 THEN sales.nopar ELSE - calculated.nopar_calculated + WHEN sales.nopar > 1 THEN sales.nopar + ELSE calculated.nopar_calculated END AS num_parcels_sale, - CASE WHEN TRIM(oldown) IN ('', 'MISSING SELLER NAME') + CASE + WHEN TRIM(sales.oldown) IN ('', 'MISSING SELLER NAME') THEN NULL - ELSE oldown + ELSE sales.oldown END AS seller_name, - CASE WHEN TRIM(own1) IN ('', 'MISSING BUYER NAME') + CASE + WHEN TRIM(sales.own1) IN ('', 'MISSING BUYER NAME') THEN NULL - ELSE own1 + ELSE sales.own1 END AS buyer_name - FROM iasworld.sales + FROM iasworld.sales AS sales + LEFT JOIN calculated ON calculated.instruno = NULLIF(REPLACE(sales.instruno, 'D', ''), '') WHERE - deactivat IS null - and cur = 'Y' - LEFT JOIN calculated.doc_no = calculated.instruno + sales.deactivat IS NULL + AND sales.cur = 'Y' ), mydec_sales AS ( SELECT @@ -53,14 +53,14 @@ mydec_sales AS ( NULLIF(TRIM(seller_name), '') AS seller_name, NULLIF(TRIM(buyer_name), '') AS buyer_name, COALESCE( - line_2_total_parcels > 1 + line_2_total_parcels > 1, FALSE ) AS is_multisale, line_2_total_parcels as num_parcels_sale FROM sale.mydec ), -WITH combined_sales AS ( +combined_sales AS ( -- Select all rows from ias_sales SELECT ias.pin, @@ -70,8 +70,8 @@ WITH combined_sales AS ( ias.sale_price, ias.seller_name, ias.buyer_name, - ias.is_multisale - ias.num_parcels_sale + ias.is_multisale, + ias.num_parcels_sale, 'iasworld' AS source FROM ias_sales ias LEFT JOIN mydec_sales mydec ON ias.doc_no = mydec.doc_no @@ -87,7 +87,8 @@ WITH combined_sales AS ( mydec.sale_price, mydec.seller_name, mydec.buyer_name, - mydec.num_parcels_sale + mydec.is_multisale, + mydec.num_parcels_sale, 'mydec' AS source FROM mydec_sales mydec LEFT JOIN ias_sales ias ON mydec.doc_no = ias.doc_no From 76411348a968b163597396f95466bb81a6781c2a Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 10 Sep 2024 20:53:35 +0000 Subject: [PATCH 019/126] Revert pin sale --- dbt/models/default/default.vw_pin_sale.sql | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index b5d34f9d2..5a50b28a5 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -258,6 +258,8 @@ sales_val AS ( SELECT unique_sales.pin, + -- In the past, mydec sale dates were more precise than iasworld dates + -- which had been truncated CASE WHEN mydec_sales.mydec_date IS NOT NULL @@ -268,6 +270,8 @@ SELECT unique_sales.township_code, unique_sales.nbhd, unique_sales.class, + -- In the past, mydec sale dates were more precise than iasworld dates + -- which had been truncated CASE WHEN mydec_sales.mydec_date IS NOT NULL @@ -275,6 +279,7 @@ SELECT THEN mydec_sales.mydec_date ELSE unique_sales.sale_date END AS sale_date, + -- From 2021 on iasWorld uses precise MyDec dates COALESCE( mydec_sales.mydec_date IS NOT NULL OR YEAR(unique_sales.sale_date) >= 2021, @@ -292,12 +297,16 @@ SELECT unique_sales.sale_filter_same_sale_within_365, unique_sales.sale_filter_less_than_10k, unique_sales.sale_filter_deed_type, + -- Our sales validation pipeline only validates sales past 2014 due to MyDec + -- limitations. Previous to that values for sv_is_outlier will be NULL, so + -- if we want to both exclude detected outliers and include sales prior to + -- 2014, we need to code everything NULL as FALSE. COALESCE(sales_val.sv_is_outlier, FALSE) AS sale_filter_is_outlier, mydec_sales.mydec_deed_type, mydec_sales.sale_filter_ptax_flag, mydec_sales.mydec_property_advertised, mydec_sales.mydec_is_installment_contract_fulfilled, - mydec_sales.mydec_is_sale_between_related_individuals_or_corporate_affiliates, + mydec_sales.mydec_is_sale_between_related_individuals_or_corporate_affiliates, -- noqa mydec_sales.mydec_is_transfer_of_less_than_100_percent_interest, mydec_sales.mydec_is_court_ordered_sale, mydec_sales.mydec_is_sale_in_lieu_of_foreclosure, @@ -306,7 +315,7 @@ SELECT mydec_sales.mydec_is_bank_reo_real_estate_owned, mydec_sales.mydec_is_auction_sale, mydec_sales.mydec_is_seller_buyer_a_relocation_company, - mydec_sales.mydec_is_seller_buyer_a_financial_institution_or_government_agency, + mydec_sales.mydec_is_seller_buyer_a_financial_institution_or_government_agency, -- noqa mydec_sales.mydec_is_buyer_a_real_estate_investment_trust, mydec_sales.mydec_is_buyer_a_pension_fund, mydec_sales.mydec_is_buyer_an_adjacent_property_owner, @@ -324,12 +333,9 @@ SELECT sales_val.sv_outlier_reason2, sales_val.sv_outlier_reason3, sales_val.sv_run_id, - sales_val.sv_version, - -- Add indicator columns for null document numbers - CASE WHEN unique_sales.doc_no IS NULL THEN 1 ELSE 0 END AS is_unique_sales_null, - CASE WHEN mydec_sales.doc_no IS NULL THEN 1 ELSE 0 END AS is_mydec_sales_null + sales_val.sv_version FROM unique_sales -FULL OUTER JOIN mydec_sales +LEFT JOIN mydec_sales ON unique_sales.doc_no = mydec_sales.doc_no LEFT JOIN sales_val ON unique_sales.doc_no = sales_val.meta_sale_document_num; \ No newline at end of file From b3be3f3e241bd566e52d8a07fe532eac66d08687 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 11 Sep 2024 15:52:37 +0000 Subject: [PATCH 020/126] Add mydec filters --- .../default/default.vw_experimental_sales.sql | 68 +++++++++++-------- 1 file changed, 41 insertions(+), 27 deletions(-) diff --git a/dbt/models/default/default.vw_experimental_sales.sql b/dbt/models/default/default.vw_experimental_sales.sql index e8eb407e8..ed969a74a 100644 --- a/dbt/models/default/default.vw_experimental_sales.sql +++ b/dbt/models/default/default.vw_experimental_sales.sql @@ -1,4 +1,4 @@ -WITH +WITH calculated AS ( SELECT instruno, @@ -25,61 +25,75 @@ ias_sales AS ( FALSE ) AS is_multisale, CASE - WHEN sales.nopar > 1 THEN sales.nopar + WHEN sales.nopar > 1 THEN sales.nopar ELSE calculated.nopar_calculated END AS num_parcels_sale, - CASE + CASE WHEN TRIM(sales.oldown) IN ('', 'MISSING SELLER NAME') - THEN NULL + THEN NULL ELSE sales.oldown END AS seller_name, - CASE + CASE WHEN TRIM(sales.own1) IN ('', 'MISSING BUYER NAME') THEN NULL ELSE sales.own1 END AS buyer_name FROM iasworld.sales AS sales - LEFT JOIN calculated ON calculated.instruno = NULLIF(REPLACE(sales.instruno, 'D', ''), '') + LEFT JOIN + calculated + ON calculated.instruno = NULLIF(REPLACE(sales.instruno, 'D', ''), '') WHERE sales.deactivat IS NULL AND sales.cur = 'Y' ), + mydec_sales AS ( - SELECT - REPLACE(line_1_primary_pin, '-', '') AS pin, - REPLACE(document_number, 'D', '') AS doc_no, - DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS sale_date, - line_11_full_consideration as sale_price, - NULLIF(TRIM(seller_name), '') AS seller_name, - NULLIF(TRIM(buyer_name), '') AS buyer_name, - COALESCE( - line_2_total_parcels > 1, - FALSE - ) AS is_multisale, - line_2_total_parcels as num_parcels_sale - FROM sale.mydec + SELECT * + FROM ( + SELECT + REPLACE(document_number, 'D', '') AS doc_no, + REPLACE(line_1_primary_pin, '-', '') AS pin, + DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS sale_date, + line_11_full_consideration AS sale_price, + NULLIF(TRIM(seller_name), '') AS seller_name, + NULLIF(TRIM(buyer_name), '') AS buyer_name, + COALESCE( + line_2_total_parcels > 1, + FALSE + ) AS is_multisale, + line_2_total_parcels AS num_parcels_sale, + COUNT(*) OVER ( + PARTITION BY line_1_primary_pin, line_4_instrument_date + ) AS num_single_day_sales, + year_of_sale + FROM sale.mydec + WHERE line_2_total_parcels = 1 -- Remove multisales + ) AS derived_table + WHERE num_single_day_sales = 1 + OR (YEAR(sale_date) > 2020) ), combined_sales AS ( -- Select all rows from ias_sales - SELECT + SELECT ias.pin, ias.doc_no, COALESCE(mydec.sale_date, ias.sale_date) AS sale_date, - CASE WHEN mydec.sale_date IS NOT NULL THEN TRUE ELSE FALSE END AS is_mydec_date, + COALESCE(mydec.sale_date IS NOT NULL, FALSE) + AS is_mydec_date, ias.sale_price, ias.seller_name, ias.buyer_name, ias.is_multisale, ias.num_parcels_sale, 'iasworld' AS source - FROM ias_sales ias - LEFT JOIN mydec_sales mydec ON ias.doc_no = mydec.doc_no + FROM ias_sales AS ias + LEFT JOIN mydec_sales AS mydec ON ias.doc_no = mydec.doc_no UNION ALL -- Select rows from mydec_sales that don't exist in ias_sales - SELECT + SELECT mydec.pin, mydec.doc_no, mydec.sale_date, @@ -90,9 +104,9 @@ combined_sales AS ( mydec.is_multisale, mydec.num_parcels_sale, 'mydec' AS source - FROM mydec_sales mydec - LEFT JOIN ias_sales ias ON mydec.doc_no = ias.doc_no + FROM mydec_sales AS mydec + LEFT JOIN ias_sales AS ias ON mydec.doc_no = ias.doc_no WHERE ias.doc_no IS NULL ) -SELECT * FROM combined_sales \ No newline at end of file +SELECT * FROM combined_sales From 76a4ef47b332e85e421eb58007e2c883fb7abd87 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 11 Sep 2024 18:30:45 +0000 Subject: [PATCH 021/126] Add iasworld filters --- .../default/default.vw_experimental_sales.sql | 94 +++++++++++++------ 1 file changed, 64 insertions(+), 30 deletions(-) diff --git a/dbt/models/default/default.vw_experimental_sales.sql b/dbt/models/default/default.vw_experimental_sales.sql index ed969a74a..8a2e37972 100644 --- a/dbt/models/default/default.vw_experimental_sales.sql +++ b/dbt/models/default/default.vw_experimental_sales.sql @@ -15,36 +15,70 @@ calculated AS ( ), ias_sales AS ( - SELECT - sales.parid AS pin, - NULLIF(REPLACE(sales.instruno, 'D', ''), '') AS doc_no, - DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') AS sale_date, - CAST(sales.price AS BIGINT) AS sale_price, - COALESCE( - (sales.nopar > 1 OR calculated.nopar_calculated > 1), - FALSE - ) AS is_multisale, - CASE - WHEN sales.nopar > 1 THEN sales.nopar - ELSE calculated.nopar_calculated - END AS num_parcels_sale, - CASE - WHEN TRIM(sales.oldown) IN ('', 'MISSING SELLER NAME') - THEN NULL - ELSE sales.oldown - END AS seller_name, - CASE - WHEN TRIM(sales.own1) IN ('', 'MISSING BUYER NAME') - THEN NULL - ELSE sales.own1 - END AS buyer_name - FROM iasworld.sales AS sales - LEFT JOIN - calculated - ON calculated.instruno = NULLIF(REPLACE(sales.instruno, 'D', ''), '') - WHERE - sales.deactivat IS NULL - AND sales.cur = 'Y' + SELECT * + FROM ( + SELECT + sales.parid AS pin, + NULLIF(REPLACE(sales.instruno, 'D', ''), '') AS doc_no, + DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') AS sale_date, + CAST(sales.price AS BIGINT) AS sale_price, + COALESCE( + (sales.nopar > 1 OR calculated.nopar_calculated > 1), + FALSE + ) AS is_multisale, + CASE + WHEN sales.nopar > 1 THEN sales.nopar + ELSE calculated.nopar_calculated + END AS num_parcels_sale, + CASE + WHEN TRIM(sales.oldown) IN ('', 'MISSING SELLER NAME') + THEN NULL + ELSE sales.oldown + END AS seller_name, + CASE + WHEN TRIM(sales.own1) IN ('', 'MISSING BUYER NAME') + THEN NULL + ELSE sales.own1 + END AS buyer_name, + -- Sales are not entirely unique by pin/date so we group all + -- sales by pin/date, then order by descending price + -- and give the top observation a value of 1 for "max_price". + -- We need to order by salekey as well in case of any ties within + -- price, date, and pin. + ROW_NUMBER() OVER ( + PARTITION BY + sales.parid, + sales.saledt, + sales.instrtyp NOT IN ('03', '04', '06') + ORDER BY sales.price DESC, sales.salekey ASC + ) AS max_price, + -- We remove the letter 'D' that trails some document numbers in + -- iasworld.sales since it prevents us from joining to mydec sales. + -- This creates one instance where we have duplicate document + -- numbers, so we sort by sale date (specifically to avoid conflicts + -- with detecting the easliest duplicate sale when there are + -- multiple within one document number, within a year) within the + -- new doument number to identify and remove the sale causing the + -- duplicate document number. + ROW_NUMBER() OVER ( + PARTITION BY + NULLIF(REPLACE(sales.instruno, 'D', ''), ''), + sales.instrtyp NOT IN ('03', '04', '06'), + sales.price > 10000 + ORDER BY sales.saledt ASC, sales.salekey ASC + ) AS bad_doc_no + FROM iasworld.sales AS sales + LEFT JOIN + calculated + ON calculated.instruno + = NULLIF(REPLACE(sales.instruno, 'D', ''), '') + WHERE + sales.deactivat IS NULL + AND sales.cur = 'Y' + ) AS subquery + -- Only use max price by pin/sale date + WHERE max_price = 1 + AND (bad_doc_no = 1 OR is_multisale = TRUE) ), mydec_sales AS ( From 30fc3a3a4c0b5aa38a41152a71b68ee214e92a65 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 11 Sep 2024 18:55:42 +0000 Subject: [PATCH 022/126] Add sale_10k_filter --- .../default/default.vw_experimental_sales.sql | 24 ++++++------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/dbt/models/default/default.vw_experimental_sales.sql b/dbt/models/default/default.vw_experimental_sales.sql index 8a2e37972..eb95c4a4d 100644 --- a/dbt/models/default/default.vw_experimental_sales.sql +++ b/dbt/models/default/default.vw_experimental_sales.sql @@ -40,11 +40,6 @@ ias_sales AS ( THEN NULL ELSE sales.own1 END AS buyer_name, - -- Sales are not entirely unique by pin/date so we group all - -- sales by pin/date, then order by descending price - -- and give the top observation a value of 1 for "max_price". - -- We need to order by salekey as well in case of any ties within - -- price, date, and pin. ROW_NUMBER() OVER ( PARTITION BY sales.parid, @@ -52,26 +47,18 @@ ias_sales AS ( sales.instrtyp NOT IN ('03', '04', '06') ORDER BY sales.price DESC, sales.salekey ASC ) AS max_price, - -- We remove the letter 'D' that trails some document numbers in - -- iasworld.sales since it prevents us from joining to mydec sales. - -- This creates one instance where we have duplicate document - -- numbers, so we sort by sale date (specifically to avoid conflicts - -- with detecting the easliest duplicate sale when there are - -- multiple within one document number, within a year) within the - -- new doument number to identify and remove the sale causing the - -- duplicate document number. ROW_NUMBER() OVER ( PARTITION BY NULLIF(REPLACE(sales.instruno, 'D', ''), ''), sales.instrtyp NOT IN ('03', '04', '06'), sales.price > 10000 ORDER BY sales.saledt ASC, sales.salekey ASC - ) AS bad_doc_no + ) AS bad_doc_no, + sales.price <= 10000 AS sale_filter_less_than_10k FROM iasworld.sales AS sales LEFT JOIN calculated - ON calculated.instruno - = NULLIF(REPLACE(sales.instruno, 'D', ''), '') + ON calculated.instruno = NULLIF(REPLACE(sales.instruno, 'D', ''), '') WHERE sales.deactivat IS NULL AND sales.cur = 'Y' @@ -99,7 +86,8 @@ mydec_sales AS ( COUNT(*) OVER ( PARTITION BY line_1_primary_pin, line_4_instrument_date ) AS num_single_day_sales, - year_of_sale + year_of_sale, + line_11_full_consideration <= 10000 AS sale_filter_less_than_10k FROM sale.mydec WHERE line_2_total_parcels = 1 -- Remove multisales ) AS derived_table @@ -120,6 +108,7 @@ combined_sales AS ( ias.buyer_name, ias.is_multisale, ias.num_parcels_sale, + ias.sale_filter_less_than_10k, 'iasworld' AS source FROM ias_sales AS ias LEFT JOIN mydec_sales AS mydec ON ias.doc_no = mydec.doc_no @@ -137,6 +126,7 @@ combined_sales AS ( mydec.buyer_name, mydec.is_multisale, mydec.num_parcels_sale, + mydec.sale_filter_less_than_10k, 'mydec' AS source FROM mydec_sales AS mydec LEFT JOIN ias_sales AS ias ON mydec.doc_no = ias.doc_no From 75e3956a8c1df00892cb852016822abd6c6ca2ae Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 11 Sep 2024 20:16:37 +0000 Subject: [PATCH 023/126] Add more from pin sale --- .../default/default.vw_experimental_sales.sql | 68 +++++++++++++++++-- 1 file changed, 64 insertions(+), 4 deletions(-) diff --git a/dbt/models/default/default.vw_experimental_sales.sql b/dbt/models/default/default.vw_experimental_sales.sql index eb95c4a4d..8bc2106d7 100644 --- a/dbt/models/default/default.vw_experimental_sales.sql +++ b/dbt/models/default/default.vw_experimental_sales.sql @@ -1,4 +1,29 @@ -WITH +-- Class and township of associated PIN +WITH town_class AS ( + SELECT + par.parid, + REGEXP_REPLACE(par.class, '[^[:alnum:]]', '') AS class, + par.taxyr, + leg.user1 AS township_code, + td.township_name, + CONCAT( + leg.user1, SUBSTR(REGEXP_REPLACE(par.nbhd, '([^0-9])', ''), 3, 3) + ) AS nbhd + FROM iasworld.pardat AS par + LEFT JOIN iasworld.legdat AS leg + ON par.parid = leg.parid + AND par.taxyr = leg.taxyr + AND leg.cur = 'Y' + AND leg.deactivat IS NULL + LEFT JOIN ( + SELECT DISTINCT township_name, township_code + FROM default.vw_pin_universe + ) AS td + ON leg.user1 = td.township_code + WHERE par.cur = 'Y' + AND par.deactivat IS NULL +), + calculated AS ( SELECT instruno, @@ -15,11 +40,20 @@ calculated AS ( ), ias_sales AS ( - SELECT * + SELECT *, + -- Historically, this view excluded sales for a given pin if it had sold + -- within the last 12 months for the same price. This filter allows us + -- to filter out those sales. + COALESCE( + EXTRACT(DAY FROM sale_date - same_price_earlier_date) <= 365, + FALSE + ) AS sale_filter_same_sale_within_365 FROM ( SELECT sales.parid AS pin, NULLIF(REPLACE(sales.instruno, 'D', ''), '') AS doc_no, + tc.class, + tc.township_code, DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') AS sale_date, CAST(sales.price AS BIGINT) AS sale_price, COALESCE( @@ -54,14 +88,30 @@ ias_sales AS ( sales.price > 10000 ORDER BY sales.saledt ASC, sales.salekey ASC ) AS bad_doc_no, + LAG(DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d')) OVER ( + PARTITION BY + sales.parid, + sales.price, + sales.instrtyp NOT IN ('03', '04', '06') + ORDER BY sales.saledt ASC, sales.salekey ASC + ) AS same_price_earlier_date, sales.price <= 10000 AS sale_filter_less_than_10k FROM iasworld.sales AS sales LEFT JOIN calculated ON calculated.instruno = NULLIF(REPLACE(sales.instruno, 'D', ''), '') + LEFT JOIN + town_class AS tc + ON sales.parid = tc.parid + AND SUBSTR(sales.saledt, 1, 4) = tc.taxyr WHERE sales.deactivat IS NULL AND sales.cur = 'Y' + AND CAST(SUBSTR(sales.saledt, 1, 4) AS INT) BETWEEN 1997 AND YEAR( + CURRENT_DATE + ) + AND tc.township_code IS NOT NULL + AND sales.price IS NOT NULL ) AS subquery -- Only use max price by pin/sale date WHERE max_price = 1 @@ -72,8 +122,10 @@ mydec_sales AS ( SELECT * FROM ( SELECT - REPLACE(document_number, 'D', '') AS doc_no, REPLACE(line_1_primary_pin, '-', '') AS pin, + REPLACE(document_number, 'D', '') AS doc_no, + tc.class, + tc.township_code, DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS sale_date, line_11_full_consideration AS sale_price, NULLIF(TRIM(seller_name), '') AS seller_name, @@ -89,6 +141,10 @@ mydec_sales AS ( year_of_sale, line_11_full_consideration <= 10000 AS sale_filter_less_than_10k FROM sale.mydec + LEFT JOIN + town_class AS tc + ON mydec.line_1_primary_pin = tc.parid + AND SUBSTR(mydec.line_4_instrument_date, 1, 4) = tc.taxyr WHERE line_2_total_parcels = 1 -- Remove multisales ) AS derived_table WHERE num_single_day_sales = 1 @@ -100,6 +156,8 @@ combined_sales AS ( SELECT ias.pin, ias.doc_no, + ias.township_code, + ias.class, COALESCE(mydec.sale_date, ias.sale_date) AS sale_date, COALESCE(mydec.sale_date IS NOT NULL, FALSE) AS is_mydec_date, @@ -119,6 +177,8 @@ combined_sales AS ( SELECT mydec.pin, mydec.doc_no, + mydec.township_code, + mydec.class, mydec.sale_date, TRUE AS is_mydec_date, mydec.sale_price, @@ -133,4 +193,4 @@ combined_sales AS ( WHERE ias.doc_no IS NULL ) -SELECT * FROM combined_sales +SELECT * FROM combined_sales \ No newline at end of file From bfaee097ec634643bc18d3ada9438a24ab8a26ea Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 11 Sep 2024 20:56:27 +0000 Subject: [PATCH 024/126] Add year column --- dbt/models/default/default.vw_experimental_sales.sql | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dbt/models/default/default.vw_experimental_sales.sql b/dbt/models/default/default.vw_experimental_sales.sql index 8bc2106d7..fc4c70e3f 100644 --- a/dbt/models/default/default.vw_experimental_sales.sql +++ b/dbt/models/default/default.vw_experimental_sales.sql @@ -54,6 +54,7 @@ ias_sales AS ( NULLIF(REPLACE(sales.instruno, 'D', ''), '') AS doc_no, tc.class, tc.township_code, + SUBSTR(sales.saledt, 1, 4) AS year, DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') AS sale_date, CAST(sales.price AS BIGINT) AS sale_price, COALESCE( @@ -126,6 +127,7 @@ mydec_sales AS ( REPLACE(document_number, 'D', '') AS doc_no, tc.class, tc.township_code, + SUBSTR(line_4_instrument_date, 1, 4) AS year, DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS sale_date, line_11_full_consideration AS sale_price, NULLIF(TRIM(seller_name), '') AS seller_name, @@ -143,9 +145,11 @@ mydec_sales AS ( FROM sale.mydec LEFT JOIN town_class AS tc - ON mydec.line_1_primary_pin = tc.parid + ON REPLACE(line_1_primary_pin, '-', '') = tc.parid AND SUBSTR(mydec.line_4_instrument_date, 1, 4) = tc.taxyr WHERE line_2_total_parcels = 1 -- Remove multisales + AND tc.township_code IS NOT NULL + AND line_11_full_consideration IS NOT NULL ) AS derived_table WHERE num_single_day_sales = 1 OR (YEAR(sale_date) > 2020) @@ -158,6 +162,7 @@ combined_sales AS ( ias.doc_no, ias.township_code, ias.class, + ias.year, COALESCE(mydec.sale_date, ias.sale_date) AS sale_date, COALESCE(mydec.sale_date IS NOT NULL, FALSE) AS is_mydec_date, @@ -179,6 +184,7 @@ combined_sales AS ( mydec.doc_no, mydec.township_code, mydec.class, + mydec.year, mydec.sale_date, TRUE AS is_mydec_date, mydec.sale_price, @@ -193,4 +199,4 @@ combined_sales AS ( WHERE ias.doc_no IS NULL ) -SELECT * FROM combined_sales \ No newline at end of file +SELECT * FROM combined_sales \ No newline at end of file From 7afdce4c16f1ebd1a7dffb75ebfa2aecd89869ee Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 11 Sep 2024 21:52:09 +0000 Subject: [PATCH 025/126] Add sales val cols --- .../default/default.vw_experimental_sales.sql | 52 +++++++++++++++++-- 1 file changed, 49 insertions(+), 3 deletions(-) diff --git a/dbt/models/default/default.vw_experimental_sales.sql b/dbt/models/default/default.vw_experimental_sales.sql index fc4c70e3f..9313e3b2b 100644 --- a/dbt/models/default/default.vw_experimental_sales.sql +++ b/dbt/models/default/default.vw_experimental_sales.sql @@ -89,7 +89,7 @@ ias_sales AS ( sales.price > 10000 ORDER BY sales.saledt ASC, sales.salekey ASC ) AS bad_doc_no, - LAG(DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d')) OVER ( + LAG(DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d')) OVER ( PARTITION BY sales.parid, sales.price, @@ -155,6 +155,32 @@ mydec_sales AS ( OR (YEAR(sale_date) > 2020) ), +max_version_flag AS ( + SELECT + meta_sale_document_num, + MAX(version) AS max_version + FROM sale.flag + GROUP BY meta_sale_document_num +), + +sales_val AS ( + SELECT + sf.meta_sale_document_num, + sf.sv_is_outlier, + sf.sv_is_ptax_outlier, + sf.sv_is_heuristic_outlier, + sf.sv_outlier_reason1, + sf.sv_outlier_reason2, + sf.sv_outlier_reason3, + sf.run_id AS sv_run_id, + sf.version AS sv_version + FROM + sale.flag AS sf + INNER JOIN max_version_flag AS mv + ON sf.meta_sale_document_num = mv.meta_sale_document_num + AND sf.version = mv.max_version +), + combined_sales AS ( -- Select all rows from ias_sales SELECT @@ -172,9 +198,19 @@ combined_sales AS ( ias.is_multisale, ias.num_parcels_sale, ias.sale_filter_less_than_10k, - 'iasworld' AS source + 'iasworld' AS source, + sales_val.sv_is_outlier, + sales_val.sv_is_ptax_outlier, + sales_val.sv_is_heuristic_outlier, + sales_val.sv_outlier_reason1, + sales_val.sv_outlier_reason2, + sales_val.sv_outlier_reason3, + sales_val.sv_run_id, + sales_val.sv_version FROM ias_sales AS ias LEFT JOIN mydec_sales AS mydec ON ias.doc_no = mydec.doc_no + LEFT JOIN sales_val + ON ias.doc_no = sales_val.meta_sale_document_num UNION ALL @@ -193,8 +229,18 @@ combined_sales AS ( mydec.is_multisale, mydec.num_parcels_sale, mydec.sale_filter_less_than_10k, - 'mydec' AS source + 'mydec' AS source, + sales_val.sv_is_outlier, + sales_val.sv_is_ptax_outlier, + sales_val.sv_is_heuristic_outlier, + sales_val.sv_outlier_reason1, + sales_val.sv_outlier_reason2, + sales_val.sv_outlier_reason3, + sales_val.sv_run_id, + sales_val.sv_version FROM mydec_sales AS mydec + LEFT JOIN sales_val + ON mydec.doc_no = sales_val.meta_sale_document_num LEFT JOIN ias_sales AS ias ON mydec.doc_no = ias.doc_no WHERE ias.doc_no IS NULL ) From 2a824679ab7051831e48a5a74e4954580b90b05c Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Thu, 12 Sep 2024 19:05:46 +0000 Subject: [PATCH 026/126] Remove same status changes --- eda_new_sales_view.qmd | 99 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 eda_new_sales_view.qmd diff --git a/eda_new_sales_view.qmd b/eda_new_sales_view.qmd new file mode 100644 index 000000000..cc350bb7f --- /dev/null +++ b/eda_new_sales_view.qmd @@ -0,0 +1,99 @@ +--- +title: "Compare sales val specs" +execute: + echo: false + warning: false +format: + html: + embed-resources: true + toc: true + toc_float: true + fig-align: center + fontsize: 12pt +knitr: + opts_chunk: + out.width: "100%" +editor: source +--- + +```{r _libraries} +library(ggplot2) +library(tidyr) +library(dplyr) +library(here) +library(noctua) +``` + +```{r _data_ingest} +# Ingest data +noctua_options(cache_size = 10, unload = FALSE) + +AWS_ATHENA_CONN_NOCTUA <- dbConnect(noctua::athena()) + +vw_pin_sales <- dbGetQuery( + conn = AWS_ATHENA_CONN_NOCTUA, + "select * from default.vw_pin_sale" +) + +ias_sales <- dbGetQuery( + conn = AWS_ATHENA_CONN_NOCTUA, + "select * from z_ci_583_create_a_sales_view_that_combines_iasworld_mydec_and_ccrd_sales_default.vw_experimental_sales + where source = 'iasworld'" +) + +mydec_sales <- dbGetQuery( + conn = AWS_ATHENA_CONN_NOCTUA, + "select * from z_ci_583_create_a_sales_view_that_combines_iasworld_mydec_and_ccrd_sales_default.vw_experimental_sales + where source = 'mydec'" +) + +``` + + + +```{r} +buckets_vw_pin_sales <- vw_pin_sales %>% + mutate(bucket = floor(sale_price / 50000) * 50000) %>% + group_by(bucket) %>% + summarise(count = n(), .groups = 'drop') + +buckets_ias_sales <- ias_sales %>% + mutate(bucket = floor(sale_price / 50000) * 50000) %>% + group_by(bucket) %>% + summarise(count = n(), .groups = 'drop') + +buckets_mydec_sales <- mydec_sales %>% + mutate(bucket = floor(sale_price / 50000) * 50000) %>% + group_by(bucket) %>% + summarise(count = n(), .groups = 'drop') + +``` + +```{r} +percentage_zero_sales_mydec <- mydec_sales %>% + group_by(year) %>% + summarise( + total_count = n(), + zero_count = sum(sale_price == 0), + percentage_zero = (zero_count / total_count) * 100, + .groups = 'drop' + ) + +percentage_under_10k_mydec <- mydec_sales %>% + group_by(year) %>% + summarise( + total_count = n(), + under10k_count_count = sum(sale_filter_less_than_10k == TRUE), + percentage_zero = (under10k_count_count / total_count) * 100, + .groups = 'drop' + ) + +``` + + + + + + + + From 6eb10891ad5f0339ac340b09e65ac37b6c5b557f Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 16 Sep 2024 15:50:56 +0000 Subject: [PATCH 027/126] Try new view solution --- dbt/models/default/default.vw_pin_sale.sql | 165 ++++++++++++--------- 1 file changed, 96 insertions(+), 69 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 5a50b28a5..2f896778e 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -95,9 +95,9 @@ unique_sales AS ( -- iasworld.sales since it prevents us from joining to mydec sales. -- This creates one instance where we have duplicate document -- numbers, so we sort by sale date (specifically to avoid conflicts - -- with detecting the easliest duplicate sale when there are + -- with detecting the earliest duplicate sale when there are -- multiple within one document number, within a year) within the - -- new doument number to identify and remove the sale causing the + -- new document number to identify and remove the sale causing the -- duplicate document number. ROW_NUMBER() OVER ( PARTITION BY @@ -108,7 +108,7 @@ unique_sales AS ( ) AS bad_doc_no, -- Some pins sell for the exact same price a few months after -- they're sold (we need to make sure to only include deed types we - -- want). These sales are unecessary for modeling and may be + -- want). These sales are unnecessary for modeling and may be -- duplicates. We need to order by salekey as well in case of any -- ties within price, date, and pin. LAG(DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d')) OVER ( @@ -137,16 +137,12 @@ unique_sales AS ( ON sales.parid = tc.parid AND SUBSTR(sales.saledt, 1, 4) = tc.taxyr WHERE sales.instruno IS NOT NULL - -- Indicates whether a record has been deactivated AND sales.deactivat IS NULL AND sales.cur = 'Y' - AND CAST(SUBSTR(sales.saledt, 1, 4) AS INT) BETWEEN 1997 AND YEAR( - CURRENT_DATE - ) + AND CAST(SUBSTR(sales.saledt, 1, 4) AS INT) BETWEEN 1997 AND YEAR(CURRENT_DATE) AND tc.township_code IS NOT NULL AND sales.price IS NOT NULL ) - -- Only use max price by pin/sale date WHERE max_price = 1 AND (bad_doc_no = 1 OR is_multisale = TRUE) ), @@ -165,7 +161,7 @@ mydec_sales AS ( COALESCE(line_10a = 1, FALSE) AS mydec_is_installment_contract_fulfilled, COALESCE(line_10b = 1, FALSE) - AS mydec_is_sale_between_related_individuals_or_corporate_affiliates, -- noqa + AS mydec_is_sale_between_related_individuals_or_corporate_affiliates, COALESCE(line_10c = 1, FALSE) AS mydec_is_transfer_of_less_than_100_percent_interest, COALESCE(line_10d = 1, FALSE) @@ -183,7 +179,7 @@ mydec_sales AS ( COALESCE(line_10j = 1, FALSE) AS mydec_is_seller_buyer_a_relocation_company, COALESCE(line_10k = 1, FALSE) - AS mydec_is_seller_buyer_a_financial_institution_or_government_agency, -- noqa + AS mydec_is_seller_buyer_a_financial_institution_or_government_agency, COALESCE(line_10l = 1, FALSE) AS mydec_is_buyer_a_real_estate_investment_trust, COALESCE(line_10m = 1, FALSE) @@ -204,10 +200,7 @@ mydec_sales AS ( AS mydec_homestead_exemption_senior_citizens, line_10s_senior_citizens_assessment_freeze AS mydec_homestead_exemption_senior_citizens_assessment_freeze, - -- Flag for booting outlier PTAX-203 sales from modeling and - -- reporting. Used in combination with sale_filter upper and lower, - -- which finds sales more than 2 SD from the year, town, and - -- class mean + -- Flag for booting outlier PTAX-203 sales from modeling and reporting ( COALESCE(line_10b, 0) + COALESCE(line_10c, 0) + COALESCE(line_10d, 0) + COALESCE(line_10e, 0) @@ -222,9 +215,6 @@ mydec_sales AS ( FROM {{ source('sale', 'mydec') }} WHERE line_2_total_parcels = 1 -- Remove multisales ) - /* Some sales in mydec have multiple rows for one pin on a given sale date. - Sometimes they have different dates than iasworld prior to 2021 and when - joined back onto unique_sales will create duplicates by pin/sale date. */ WHERE num_single_day_sales = 1 OR (YEAR(mydec_date) > 2020) ), @@ -249,64 +239,100 @@ sales_val AS ( sf.run_id AS sv_run_id, sf.version AS sv_version FROM - {{ source('sale', 'flag') }} - AS sf + {{ source('sale', 'flag') }} AS sf INNER JOIN max_version_flag AS mv ON sf.meta_sale_document_num = mv.meta_sale_document_num AND sf.version = mv.max_version +), + +-- Identify extra sales from MyDec not present in unique_sales +extra_mydec_sales AS ( + SELECT + *, + -- Calculate 'same_price_earlier_date' using LAG + LAG(sale_date) OVER ( + PARTITION BY pin, sale_price + ORDER BY sale_date ASC + ) AS same_price_earlier_date + FROM ( + SELECT + REPLACE(line_1_primary_pin, '-', '') AS pin, + SUBSTR(line_4_instrument_date, 1, 4) AS year, + NULL AS township_code, + NULL AS nbhd, + NULL AS class, + DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS sale_date, + CAST(line_11_full_consideration AS BIGINT) AS sale_price, + NULL AS sale_key, + REPLACE(document_number, 'D', '') AS doc_no, + line_5_instrument_type AS deed_type, + NULLIF(TRIM(seller_name), '') AS seller_name, + FALSE AS is_multisale, + line_2_total_parcels AS num_parcels_sale, + NULLIF(TRIM(buyer_name), '') AS buyer_name, + NULL AS sale_type, + 'mydec' AS source + FROM {{ source('sale', 'mydec') }} + WHERE line_2_total_parcels = 1 + AND REPLACE(document_number, 'D', '') NOT IN (SELECT doc_no FROM unique_sales) + ) +), + +-- Now compute the filters for extra MyDec sales +extra_mydec_sales_with_filters AS ( + SELECT + *, + -- Compute 'sale_filter_same_sale_within_365' as in unique_sales + COALESCE( + EXTRACT(DAY FROM sale_date - same_price_earlier_date) <= 365, + FALSE + ) AS sale_filter_same_sale_within_365, + -- Compute 'sale_filter_less_than_10k' + sale_price <= 10000 AS sale_filter_less_than_10k, + FALSE AS sale_filter_deed_type + FROM extra_mydec_sales +), + +-- Combine unique_sales and extra_mydec_sales_with_filters +all_sales AS ( + SELECT + unique_sales.*, + 'iasworld' AS source + FROM unique_sales + + UNION ALL + + SELECT + extra_mydec_sales_with_filters.* + FROM extra_mydec_sales_with_filters ) SELECT - unique_sales.pin, - -- In the past, mydec sale dates were more precise than iasworld dates - -- which had been truncated - CASE - WHEN - mydec_sales.mydec_date IS NOT NULL - AND mydec_sales.mydec_date != unique_sales.sale_date - THEN mydec_sales.year_of_sale - ELSE unique_sales.year - END AS year, - unique_sales.township_code, - unique_sales.nbhd, - unique_sales.class, - -- In the past, mydec sale dates were more precise than iasworld dates - -- which had been truncated - CASE - WHEN - mydec_sales.mydec_date IS NOT NULL - AND mydec_sales.mydec_date != unique_sales.sale_date - THEN mydec_sales.mydec_date - ELSE unique_sales.sale_date - END AS sale_date, - -- From 2021 on iasWorld uses precise MyDec dates - COALESCE( - mydec_sales.mydec_date IS NOT NULL - OR YEAR(unique_sales.sale_date) >= 2021, - FALSE - ) AS is_mydec_date, - unique_sales.sale_price, - unique_sales.sale_key, - unique_sales.doc_no, - unique_sales.deed_type, - COALESCE(unique_sales.seller_name, mydec_sales.seller_name) AS seller_name, - unique_sales.is_multisale, - unique_sales.num_parcels_sale, - COALESCE(unique_sales.buyer_name, mydec_sales.buyer_name) AS buyer_name, - unique_sales.sale_type, - unique_sales.sale_filter_same_sale_within_365, - unique_sales.sale_filter_less_than_10k, - unique_sales.sale_filter_deed_type, - -- Our sales validation pipeline only validates sales past 2014 due to MyDec - -- limitations. Previous to that values for sv_is_outlier will be NULL, so - -- if we want to both exclude detected outliers and include sales prior to - -- 2014, we need to code everything NULL as FALSE. + all_sales.pin, + all_sales.year, + all_sales.township_code, + all_sales.nbhd, + all_sales.class, + all_sales.sale_date, + (all_sales.source = 'mydec' OR YEAR(all_sales.sale_date) >= 2021) AS is_mydec_date, + all_sales.sale_price, + all_sales.sale_key, + all_sales.doc_no, + all_sales.deed_type, + COALESCE(all_sales.seller_name, mydec_sales.seller_name) AS seller_name, + all_sales.is_multisale, + all_sales.num_parcels_sale, + COALESCE(all_sales.buyer_name, mydec_sales.buyer_name) AS buyer_name, + all_sales.sale_type, + all_sales.sale_filter_same_sale_within_365, + all_sales.sale_filter_less_than_10k, + all_sales.sale_filter_deed_type, COALESCE(sales_val.sv_is_outlier, FALSE) AS sale_filter_is_outlier, mydec_sales.mydec_deed_type, mydec_sales.sale_filter_ptax_flag, mydec_sales.mydec_property_advertised, mydec_sales.mydec_is_installment_contract_fulfilled, - mydec_sales.mydec_is_sale_between_related_individuals_or_corporate_affiliates, -- noqa + mydec_sales.mydec_is_sale_between_related_individuals_or_corporate_affiliates, mydec_sales.mydec_is_transfer_of_less_than_100_percent_interest, mydec_sales.mydec_is_court_ordered_sale, mydec_sales.mydec_is_sale_in_lieu_of_foreclosure, @@ -315,7 +341,7 @@ SELECT mydec_sales.mydec_is_bank_reo_real_estate_owned, mydec_sales.mydec_is_auction_sale, mydec_sales.mydec_is_seller_buyer_a_relocation_company, - mydec_sales.mydec_is_seller_buyer_a_financial_institution_or_government_agency, -- noqa + mydec_sales.mydec_is_seller_buyer_a_financial_institution_or_government_agency, mydec_sales.mydec_is_buyer_a_real_estate_investment_trust, mydec_sales.mydec_is_buyer_a_pension_fund, mydec_sales.mydec_is_buyer_an_adjacent_property_owner, @@ -333,9 +359,10 @@ SELECT sales_val.sv_outlier_reason2, sales_val.sv_outlier_reason3, sales_val.sv_run_id, - sales_val.sv_version -FROM unique_sales + sales_val.sv_version, + all_sales.source +FROM all_sales LEFT JOIN mydec_sales - ON unique_sales.doc_no = mydec_sales.doc_no + ON all_sales.doc_no = mydec_sales.doc_no LEFT JOIN sales_val - ON unique_sales.doc_no = sales_val.meta_sale_document_num; \ No newline at end of file + ON all_sales.doc_no = sales_val.meta_sale_document_num; From 78b9c7922fff3d4b271be9215435207d86f152b6 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 16 Sep 2024 16:15:42 +0000 Subject: [PATCH 028/126] Add missing cols --- dbt/models/default/default.vw_pin_sale.sql | 83 ++++++++++++++++++---- 1 file changed, 70 insertions(+), 13 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 2f896778e..8d87008a4 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -248,19 +248,31 @@ sales_val AS ( -- Identify extra sales from MyDec not present in unique_sales extra_mydec_sales AS ( SELECT - *, + m.pin, + m.year, + tc.township_code, + tc.nbhd, + tc.class, + m.sale_date, + m.sale_price, + m.sale_key, + m.doc_no, + m.deed_type, + m.seller_name, + m.is_multisale, + m.num_parcels_sale, + m.buyer_name, + m.sale_type, + 'mydec' AS source, -- Calculate 'same_price_earlier_date' using LAG - LAG(sale_date) OVER ( - PARTITION BY pin, sale_price - ORDER BY sale_date ASC + LAG(m.sale_date) OVER ( + PARTITION BY m.pin, m.sale_price + ORDER BY m.sale_date ASC ) AS same_price_earlier_date FROM ( SELECT REPLACE(line_1_primary_pin, '-', '') AS pin, SUBSTR(line_4_instrument_date, 1, 4) AS year, - NULL AS township_code, - NULL AS nbhd, - NULL AS class, DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS sale_date, CAST(line_11_full_consideration AS BIGINT) AS sale_price, NULL AS sale_key, @@ -270,12 +282,14 @@ extra_mydec_sales AS ( FALSE AS is_multisale, line_2_total_parcels AS num_parcels_sale, NULLIF(TRIM(buyer_name), '') AS buyer_name, - NULL AS sale_type, - 'mydec' AS source + NULL AS sale_type FROM {{ source('sale', 'mydec') }} WHERE line_2_total_parcels = 1 AND REPLACE(document_number, 'D', '') NOT IN (SELECT doc_no FROM unique_sales) - ) + ) m + LEFT JOIN town_class AS tc + ON m.pin = tc.parid + AND m.year = tc.taxyr ), -- Now compute the filters for extra MyDec sales @@ -289,21 +303,64 @@ extra_mydec_sales_with_filters AS ( ) AS sale_filter_same_sale_within_365, -- Compute 'sale_filter_less_than_10k' sale_price <= 10000 AS sale_filter_less_than_10k, - FALSE AS sale_filter_deed_type + FALSE AS sale_filter_deed_type, + NULL AS max_price, + NULL AS bad_doc_no FROM extra_mydec_sales ), -- Combine unique_sales and extra_mydec_sales_with_filters all_sales AS ( SELECT - unique_sales.*, + pin, + year, + township_code, + nbhd, + class, + sale_date, + sale_price, + sale_key, + doc_no, + deed_type, + seller_name, + is_multisale, + num_parcels_sale, + buyer_name, + sale_type, + max_price, + bad_doc_no, + same_price_earlier_date, + sale_filter_less_than_10k, + sale_filter_deed_type, + sale_filter_same_sale_within_365, 'iasworld' AS source FROM unique_sales UNION ALL SELECT - extra_mydec_sales_with_filters.* + pin, + year, + township_code, + nbhd, + class, + sale_date, + sale_price, + sale_key, + doc_no, + deed_type, + seller_name, + is_multisale, + num_parcels_sale, + buyer_name, + sale_type, + max_price, + bad_doc_no, + same_price_earlier_date, + sale_filter_less_than_10k, + sale_filter_deed_type, + sale_filter_same_sale_within_365, + source FROM extra_mydec_sales_with_filters ) From 777cd39543d41bbb7315429dac241ad259562792 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 16 Sep 2024 19:29:32 +0000 Subject: [PATCH 029/126] Try full outer join --- dbt/models/default/default.vw_pin_sale.sql | 267 ++++++++------------- 1 file changed, 99 insertions(+), 168 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 8d87008a4..6b5b09036 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -152,10 +152,14 @@ mydec_sales AS ( SELECT REPLACE(document_number, 'D', '') AS doc_no, REPLACE(line_1_primary_pin, '-', '') AS pin, - DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS mydec_date, + DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS sale_date, + SUBSTR(line_4_instrument_date, 1, 4) AS year, line_5_instrument_type AS mydec_deed_type, NULLIF(TRIM(seller_name), '') AS seller_name, NULLIF(TRIM(buyer_name), '') AS buyer_name, + CAST(line_11_full_consideration AS BIGINT) AS sale_price, + line_2_total_parcels AS num_parcels_sale, + FALSE AS is_multisale, COALESCE(line_7_property_advertised = 1, FALSE) AS mydec_property_advertised, COALESCE(line_10a = 1, FALSE) @@ -210,13 +214,12 @@ mydec_sales AS ( ) > 0 AS sale_filter_ptax_flag, COUNT() OVER ( PARTITION BY line_1_primary_pin, line_4_instrument_date - ) AS num_single_day_sales, - year_of_sale + ) AS num_single_day_sales FROM {{ source('sale', 'mydec') }} WHERE line_2_total_parcels = 1 -- Remove multisales ) WHERE num_single_day_sales = 1 - OR (YEAR(mydec_date) > 2020) + OR (YEAR(sale_date) > 2020) ), max_version_flag AS ( @@ -244,171 +247,101 @@ sales_val AS ( ON sf.meta_sale_document_num = mv.meta_sale_document_num AND sf.version = mv.max_version ), - --- Identify extra sales from MyDec not present in unique_sales -extra_mydec_sales AS ( - SELECT - m.pin, - m.year, - tc.township_code, - tc.nbhd, - tc.class, - m.sale_date, - m.sale_price, - m.sale_key, - m.doc_no, - m.deed_type, - m.seller_name, - m.is_multisale, - m.num_parcels_sale, - m.buyer_name, - m.sale_type, - 'mydec' AS source, - -- Calculate 'same_price_earlier_date' using LAG - LAG(m.sale_date) OVER ( - PARTITION BY m.pin, m.sale_price - ORDER BY m.sale_date ASC - ) AS same_price_earlier_date - FROM ( - SELECT - REPLACE(line_1_primary_pin, '-', '') AS pin, - SUBSTR(line_4_instrument_date, 1, 4) AS year, - DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS sale_date, - CAST(line_11_full_consideration AS BIGINT) AS sale_price, - NULL AS sale_key, - REPLACE(document_number, 'D', '') AS doc_no, - line_5_instrument_type AS deed_type, - NULLIF(TRIM(seller_name), '') AS seller_name, - FALSE AS is_multisale, - line_2_total_parcels AS num_parcels_sale, - NULLIF(TRIM(buyer_name), '') AS buyer_name, - NULL AS sale_type - FROM {{ source('sale', 'mydec') }} - WHERE line_2_total_parcels = 1 - AND REPLACE(document_number, 'D', '') NOT IN (SELECT doc_no FROM unique_sales) - ) m - LEFT JOIN town_class AS tc - ON m.pin = tc.parid - AND m.year = tc.taxyr -), - --- Now compute the filters for extra MyDec sales -extra_mydec_sales_with_filters AS ( +sales_full_outer AS ( SELECT - *, - -- Compute 'sale_filter_same_sale_within_365' as in unique_sales - COALESCE( - EXTRACT(DAY FROM sale_date - same_price_earlier_date) <= 365, - FALSE - ) AS sale_filter_same_sale_within_365, + COALESCE(u.pin, m.pin) AS pin, + COALESCE(u.year, m.year) AS year, + COALESCE(u.township_code, tc.township_code) AS township_code, + COALESCE(u.nbhd, tc.nbhd) AS nbhd, + COALESCE(u.class, tc.class) AS class, + COALESCE(u.sale_date, m.sale_date) AS sale_date, + COALESCE(u.sale_price, m.sale_price) AS sale_price, + u.sale_key, + COALESCE(u.doc_no, m.doc_no) AS doc_no, + COALESCE(u.deed_type, m.mydec_deed_type) AS deed_type, + COALESCE(u.seller_name, m.seller_name) AS seller_name, + COALESCE(u.is_multisale, m.is_multisale) AS is_multisale, + COALESCE(u.num_parcels_sale, m.num_parcels_sale) AS num_parcels_sale, + COALESCE(u.buyer_name, m.buyer_name) AS buyer_name, + COALESCE(u.sale_type, NULL) AS sale_type, + u.max_price, + u.bad_doc_no, + -- Compute 'same_price_earlier_date' for all sales + LAG(COALESCE(u.sale_date, m.sale_date)) OVER ( + PARTITION BY COALESCE(u.pin, m.pin), COALESCE(u.sale_price, m.sale_price) + ORDER BY COALESCE(u.sale_date, m.sale_date) ASC + ) AS same_price_earlier_date, -- Compute 'sale_filter_less_than_10k' - sale_price <= 10000 AS sale_filter_less_than_10k, - FALSE AS sale_filter_deed_type, - NULL AS max_price, - NULL AS bad_doc_no - FROM extra_mydec_sales -), - --- Combine unique_sales and extra_mydec_sales_with_filters -all_sales AS ( - SELECT - pin, - year, - township_code, - nbhd, - class, - sale_date, - sale_price, - sale_key, - doc_no, - deed_type, - seller_name, - is_multisale, - num_parcels_sale, - buyer_name, - sale_type, - max_price, - bad_doc_no, - same_price_earlier_date, - sale_filter_less_than_10k, - sale_filter_deed_type, - sale_filter_same_sale_within_365, - 'iasworld' AS source - FROM unique_sales - - UNION ALL - - SELECT - pin, - year, - township_code, - nbhd, - class, - sale_date, - sale_price, - sale_key, - doc_no, - deed_type, - seller_name, - is_multisale, - num_parcels_sale, - buyer_name, - sale_type, - max_price, - bad_doc_no, - same_price_earlier_date, - sale_filter_less_than_10k, - sale_filter_deed_type, - sale_filter_same_sale_within_365, - source - FROM extra_mydec_sales_with_filters + (COALESCE(u.sale_price, m.sale_price) <= 10000) AS sale_filter_less_than_10k, + -- Compute 'sale_filter_deed_type' + (COALESCE(u.deed_type, m.mydec_deed_type) IN ('03', '04', '06') OR COALESCE(u.deed_type, m.mydec_deed_type) IS NULL) AS sale_filter_deed_type, + -- Compute 'sale_filter_same_sale_within_365' + CASE + WHEN LAG(COALESCE(u.sale_date, m.sale_date)) OVER ( + PARTITION BY COALESCE(u.pin, m.pin), COALESCE(u.sale_price, m.sale_price) + ORDER BY COALESCE(u.sale_date, m.sale_date) ASC + ) IS NOT NULL THEN + EXTRACT(DAY FROM COALESCE(u.sale_date, m.sale_date) - LAG(COALESCE(u.sale_date, m.sale_date)) OVER ( + PARTITION BY COALESCE(u.pin, m.pin), COALESCE(u.sale_price, m.sale_price) + ORDER BY COALESCE(u.sale_date, m.sale_date) ASC + )) <= 365 + ELSE FALSE + END AS sale_filter_same_sale_within_365, + CASE WHEN u.doc_no IS NOT NULL THEN 'iasworld' ELSE 'mydec' END AS source, + m.* + FROM unique_sales u + FULL OUTER JOIN mydec_sales m ON u.doc_no = m.doc_no + LEFT JOIN town_class tc + ON COALESCE(u.pin, m.pin) = tc.parid + AND COALESCE(u.year, m.year) = tc.taxyr ) + SELECT - all_sales.pin, - all_sales.year, - all_sales.township_code, - all_sales.nbhd, - all_sales.class, - all_sales.sale_date, - (all_sales.source = 'mydec' OR YEAR(all_sales.sale_date) >= 2021) AS is_mydec_date, - all_sales.sale_price, - all_sales.sale_key, - all_sales.doc_no, - all_sales.deed_type, - COALESCE(all_sales.seller_name, mydec_sales.seller_name) AS seller_name, - all_sales.is_multisale, - all_sales.num_parcels_sale, - COALESCE(all_sales.buyer_name, mydec_sales.buyer_name) AS buyer_name, - all_sales.sale_type, - all_sales.sale_filter_same_sale_within_365, - all_sales.sale_filter_less_than_10k, - all_sales.sale_filter_deed_type, + sales_full_outer.pin, + sales_full_outer.year, + sales_full_outer.township_code, + sales_full_outer.nbhd, + sales_full_outer.class, + sales_full_outer.sale_date, + (sales_full_outer.source = 'mydec' OR YEAR(sales_full_outer.sale_date) >= 2021) AS is_mydec_date, + sales_full_outer.sale_price, + sales_full_outer.sale_key, + sales_full_outer.doc_no, + sales_full_outer.deed_type, + sales_full_outer.seller_name, + sales_full_outer.is_multisale, + sales_full_outer.num_parcels_sale, + sales_full_outer.buyer_name, + sales_full_outer.sale_type, + sales_full_outer.sale_filter_same_sale_within_365, + sales_full_outer.sale_filter_less_than_10k, + sales_full_outer.sale_filter_deed_type, COALESCE(sales_val.sv_is_outlier, FALSE) AS sale_filter_is_outlier, - mydec_sales.mydec_deed_type, - mydec_sales.sale_filter_ptax_flag, - mydec_sales.mydec_property_advertised, - mydec_sales.mydec_is_installment_contract_fulfilled, - mydec_sales.mydec_is_sale_between_related_individuals_or_corporate_affiliates, - mydec_sales.mydec_is_transfer_of_less_than_100_percent_interest, - mydec_sales.mydec_is_court_ordered_sale, - mydec_sales.mydec_is_sale_in_lieu_of_foreclosure, - mydec_sales.mydec_is_condemnation, - mydec_sales.mydec_is_short_sale, - mydec_sales.mydec_is_bank_reo_real_estate_owned, - mydec_sales.mydec_is_auction_sale, - mydec_sales.mydec_is_seller_buyer_a_relocation_company, - mydec_sales.mydec_is_seller_buyer_a_financial_institution_or_government_agency, - mydec_sales.mydec_is_buyer_a_real_estate_investment_trust, - mydec_sales.mydec_is_buyer_a_pension_fund, - mydec_sales.mydec_is_buyer_an_adjacent_property_owner, - mydec_sales.mydec_is_buyer_exercising_an_option_to_purchase, - mydec_sales.mydec_is_simultaneous_trade_of_property, - mydec_sales.mydec_is_sale_leaseback, - mydec_sales.mydec_is_homestead_exemption, - mydec_sales.mydec_homestead_exemption_general_alternative, - mydec_sales.mydec_homestead_exemption_senior_citizens, - mydec_sales.mydec_homestead_exemption_senior_citizens_assessment_freeze, + sales_full_outer.mydec_deed_type, + sales_full_outer.sale_filter_ptax_flag, + sales_full_outer.mydec_property_advertised, + sales_full_outer.mydec_is_installment_contract_fulfilled, + sales_full_outer.mydec_is_sale_between_related_individuals_or_corporate_affiliates, + sales_full_outer.mydec_is_transfer_of_less_than_100_percent_interest, + sales_full_outer.mydec_is_court_ordered_sale, + sales_full_outer.mydec_is_sale_in_lieu_of_foreclosure, + sales_full_outer.mydec_is_condemnation, + sales_full_outer.mydec_is_short_sale, + sales_full_outer.mydec_is_bank_reo_real_estate_owned, + sales_full_outer.mydec_is_auction_sale, + sales_full_outer.mydec_is_seller_buyer_a_relocation_company, + sales_full_outer.mydec_is_seller_buyer_a_financial_institution_or_government_agency, + sales_full_outer.mydec_is_buyer_a_real_estate_investment_trust, + sales_full_outer.mydec_is_buyer_a_pension_fund, + sales_full_outer.mydec_is_buyer_an_adjacent_property_owner, + sales_full_outer.mydec_is_buyer_exercising_an_option_to_purchase, + sales_full_outer.mydec_is_simultaneous_trade_of_property, + sales_full_outer.mydec_is_sale_leaseback, + sales_full_outer.mydec_is_homestead_exemption, + sales_full_outer.mydec_homestead_exemption_general_alternative, + sales_full_outer.mydec_homestead_exemption_senior_citizens, + sales_full_outer.mydec_homestead_exemption_senior_citizens_assessment_freeze, sales_val.sv_is_outlier, sales_val.sv_is_ptax_outlier, sales_val.sv_is_heuristic_outlier, @@ -417,9 +350,7 @@ SELECT sales_val.sv_outlier_reason3, sales_val.sv_run_id, sales_val.sv_version, - all_sales.source -FROM all_sales -LEFT JOIN mydec_sales - ON all_sales.doc_no = mydec_sales.doc_no + sales_full_outer.source +FROM sales_full_outer LEFT JOIN sales_val - ON all_sales.doc_no = sales_val.meta_sale_document_num; + ON sales_full_outer.doc_no = sales_val.meta_sale_document_num; \ No newline at end of file From 1bd4834c42a1609984dafdba61027489ce323519 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 16 Sep 2024 19:51:04 +0000 Subject: [PATCH 030/126] Add m columns --- dbt/models/default/default.vw_pin_sale.sql | 29 +++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 6b5b09036..6aa98e4a2 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -288,7 +288,31 @@ sales_full_outer AS ( ELSE FALSE END AS sale_filter_same_sale_within_365, CASE WHEN u.doc_no IS NOT NULL THEN 'iasworld' ELSE 'mydec' END AS source, - m.* + -- Include necessary columns from 'm' explicitly + m.mydec_deed_type, + m.sale_filter_ptax_flag, + m.mydec_property_advertised, + m.mydec_is_installment_contract_fulfilled, + m.mydec_is_sale_between_related_individuals_or_corporate_affiliates, + m.mydec_is_transfer_of_less_than_100_percent_interest, + m.mydec_is_court_ordered_sale, + m.mydec_is_sale_in_lieu_of_foreclosure, + m.mydec_is_condemnation, + m.mydec_is_short_sale, + m.mydec_is_bank_reo_real_estate_owned, + m.mydec_is_auction_sale, + m.mydec_is_seller_buyer_a_relocation_company, + m.mydec_is_seller_buyer_a_financial_institution_or_government_agency, + m.mydec_is_buyer_a_real_estate_investment_trust, + m.mydec_is_buyer_a_pension_fund, + m.mydec_is_buyer_an_adjacent_property_owner, + m.mydec_is_buyer_exercising_an_option_to_purchase, + m.mydec_is_simultaneous_trade_of_property, + m.mydec_is_sale_leaseback, + m.mydec_is_homestead_exemption, + m.mydec_homestead_exemption_general_alternative, + m.mydec_homestead_exemption_senior_citizens, + m.mydec_homestead_exemption_senior_citizens_assessment_freeze FROM unique_sales u FULL OUTER JOIN mydec_sales m ON u.doc_no = m.doc_no LEFT JOIN town_class tc @@ -296,7 +320,6 @@ sales_full_outer AS ( AND COALESCE(u.year, m.year) = tc.taxyr ) - SELECT sales_full_outer.pin, sales_full_outer.year, @@ -353,4 +376,4 @@ SELECT sales_full_outer.source FROM sales_full_outer LEFT JOIN sales_val - ON sales_full_outer.doc_no = sales_val.meta_sale_document_num; \ No newline at end of file + ON sales_full_outer.doc_no = sales_val.meta_sale_document_num; From 3d99d69731cd22c2cd9474646db180920c524f57 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 17 Sep 2024 18:33:50 +0000 Subject: [PATCH 031/126] Change cte name --- dbt/models/default/default.vw_pin_sale.sql | 142 ++++++++++++--------- 1 file changed, 81 insertions(+), 61 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 6aa98e4a2..ccb2eced4 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -139,7 +139,9 @@ unique_sales AS ( WHERE sales.instruno IS NOT NULL AND sales.deactivat IS NULL AND sales.cur = 'Y' - AND CAST(SUBSTR(sales.saledt, 1, 4) AS INT) BETWEEN 1997 AND YEAR(CURRENT_DATE) + AND CAST(SUBSTR(sales.saledt, 1, 4) AS INT) BETWEEN 1997 AND YEAR( + CURRENT_DATE + ) AND tc.township_code IS NOT NULL AND sales.price IS NOT NULL ) @@ -247,7 +249,8 @@ sales_val AS ( ON sf.meta_sale_document_num = mv.meta_sale_document_num AND sf.version = mv.max_version ), -sales_full_outer AS ( + +combined_sales AS ( SELECT COALESCE(u.pin, m.pin) AS pin, COALESCE(u.year, m.year) AS year, @@ -268,26 +271,40 @@ sales_full_outer AS ( u.bad_doc_no, -- Compute 'same_price_earlier_date' for all sales LAG(COALESCE(u.sale_date, m.sale_date)) OVER ( - PARTITION BY COALESCE(u.pin, m.pin), COALESCE(u.sale_price, m.sale_price) + PARTITION BY + COALESCE(u.pin, m.pin), COALESCE(u.sale_price, m.sale_price) ORDER BY COALESCE(u.sale_date, m.sale_date) ASC ) AS same_price_earlier_date, -- Compute 'sale_filter_less_than_10k' - (COALESCE(u.sale_price, m.sale_price) <= 10000) AS sale_filter_less_than_10k, + (COALESCE(u.sale_price, m.sale_price) <= 10000) + AS sale_filter_less_than_10k, -- Compute 'sale_filter_deed_type' - (COALESCE(u.deed_type, m.mydec_deed_type) IN ('03', '04', '06') OR COALESCE(u.deed_type, m.mydec_deed_type) IS NULL) AS sale_filter_deed_type, + ( + COALESCE(u.deed_type, m.mydec_deed_type) IN ('03', '04', '06') + OR COALESCE(u.deed_type, m.mydec_deed_type) IS NULL + ) AS sale_filter_deed_type, -- Compute 'sale_filter_same_sale_within_365' CASE WHEN LAG(COALESCE(u.sale_date, m.sale_date)) OVER ( - PARTITION BY COALESCE(u.pin, m.pin), COALESCE(u.sale_price, m.sale_price) - ORDER BY COALESCE(u.sale_date, m.sale_date) ASC - ) IS NOT NULL THEN - EXTRACT(DAY FROM COALESCE(u.sale_date, m.sale_date) - LAG(COALESCE(u.sale_date, m.sale_date)) OVER ( - PARTITION BY COALESCE(u.pin, m.pin), COALESCE(u.sale_price, m.sale_price) + PARTITION BY + COALESCE(u.pin, m.pin), + COALESCE(u.sale_price, m.sale_price) ORDER BY COALESCE(u.sale_date, m.sale_date) ASC - )) <= 365 + ) IS NOT NULL + THEN + EXTRACT( + DAY FROM COALESCE(u.sale_date, m.sale_date) + - LAG(COALESCE(u.sale_date, m.sale_date)) OVER ( + PARTITION BY + COALESCE(u.pin, m.pin), + COALESCE(u.sale_price, m.sale_price) + ORDER BY COALESCE(u.sale_date, m.sale_date) ASC + ) + ) <= 365 ELSE FALSE END AS sale_filter_same_sale_within_365, - CASE WHEN u.doc_no IS NOT NULL THEN 'iasworld' ELSE 'mydec' END AS source, + CASE WHEN u.doc_no IS NOT NULL THEN 'iasworld' ELSE 'mydec' END + AS source, -- Include necessary columns from 'm' explicitly m.mydec_deed_type, m.sale_filter_ptax_flag, @@ -313,58 +330,61 @@ sales_full_outer AS ( m.mydec_homestead_exemption_general_alternative, m.mydec_homestead_exemption_senior_citizens, m.mydec_homestead_exemption_senior_citizens_assessment_freeze - FROM unique_sales u - FULL OUTER JOIN mydec_sales m ON u.doc_no = m.doc_no - LEFT JOIN town_class tc + FROM unique_sales AS u + FULL OUTER JOIN mydec_sales AS m ON u.doc_no = m.doc_no + LEFT JOIN town_class AS tc ON COALESCE(u.pin, m.pin) = tc.parid AND COALESCE(u.year, m.year) = tc.taxyr ) SELECT - sales_full_outer.pin, - sales_full_outer.year, - sales_full_outer.township_code, - sales_full_outer.nbhd, - sales_full_outer.class, - sales_full_outer.sale_date, - (sales_full_outer.source = 'mydec' OR YEAR(sales_full_outer.sale_date) >= 2021) AS is_mydec_date, - sales_full_outer.sale_price, - sales_full_outer.sale_key, - sales_full_outer.doc_no, - sales_full_outer.deed_type, - sales_full_outer.seller_name, - sales_full_outer.is_multisale, - sales_full_outer.num_parcels_sale, - sales_full_outer.buyer_name, - sales_full_outer.sale_type, - sales_full_outer.sale_filter_same_sale_within_365, - sales_full_outer.sale_filter_less_than_10k, - sales_full_outer.sale_filter_deed_type, + combined_sales.pin, + combined_sales.year, + combined_sales.township_code, + combined_sales.nbhd, + combined_sales.class, + combined_sales.sale_date, + ( + combined_sales.source = 'mydec' + OR YEAR(combined_sales.sale_date) >= 2021 + ) AS is_mydec_date, + combined_sales.sale_price, + combined_sales.sale_key, + combined_sales.doc_no, + combined_sales.deed_type, + combined_sales.seller_name, + combined_sales.is_multisale, + combined_sales.num_parcels_sale, + combined_sales.buyer_name, + combined_sales.sale_type, + combined_sales.sale_filter_same_sale_within_365, + combined_sales.sale_filter_less_than_10k, + combined_sales.sale_filter_deed_type, COALESCE(sales_val.sv_is_outlier, FALSE) AS sale_filter_is_outlier, - sales_full_outer.mydec_deed_type, - sales_full_outer.sale_filter_ptax_flag, - sales_full_outer.mydec_property_advertised, - sales_full_outer.mydec_is_installment_contract_fulfilled, - sales_full_outer.mydec_is_sale_between_related_individuals_or_corporate_affiliates, - sales_full_outer.mydec_is_transfer_of_less_than_100_percent_interest, - sales_full_outer.mydec_is_court_ordered_sale, - sales_full_outer.mydec_is_sale_in_lieu_of_foreclosure, - sales_full_outer.mydec_is_condemnation, - sales_full_outer.mydec_is_short_sale, - sales_full_outer.mydec_is_bank_reo_real_estate_owned, - sales_full_outer.mydec_is_auction_sale, - sales_full_outer.mydec_is_seller_buyer_a_relocation_company, - sales_full_outer.mydec_is_seller_buyer_a_financial_institution_or_government_agency, - sales_full_outer.mydec_is_buyer_a_real_estate_investment_trust, - sales_full_outer.mydec_is_buyer_a_pension_fund, - sales_full_outer.mydec_is_buyer_an_adjacent_property_owner, - sales_full_outer.mydec_is_buyer_exercising_an_option_to_purchase, - sales_full_outer.mydec_is_simultaneous_trade_of_property, - sales_full_outer.mydec_is_sale_leaseback, - sales_full_outer.mydec_is_homestead_exemption, - sales_full_outer.mydec_homestead_exemption_general_alternative, - sales_full_outer.mydec_homestead_exemption_senior_citizens, - sales_full_outer.mydec_homestead_exemption_senior_citizens_assessment_freeze, + combined_sales.mydec_deed_type, + combined_sales.sale_filter_ptax_flag, + combined_sales.mydec_property_advertised, + combined_sales.mydec_is_installment_contract_fulfilled, + combined_sales.mydec_is_sale_between_related_individuals_or_corporate_affiliates, + combined_sales.mydec_is_transfer_of_less_than_100_percent_interest, + combined_sales.mydec_is_court_ordered_sale, + combined_sales.mydec_is_sale_in_lieu_of_foreclosure, + combined_sales.mydec_is_condemnation, + combined_sales.mydec_is_short_sale, + combined_sales.mydec_is_bank_reo_real_estate_owned, + combined_sales.mydec_is_auction_sale, + combined_sales.mydec_is_seller_buyer_a_relocation_company, + combined_sales.mydec_is_seller_buyer_a_financial_institution_or_government_agency, + combined_sales.mydec_is_buyer_a_real_estate_investment_trust, + combined_sales.mydec_is_buyer_a_pension_fund, + combined_sales.mydec_is_buyer_an_adjacent_property_owner, + combined_sales.mydec_is_buyer_exercising_an_option_to_purchase, + combined_sales.mydec_is_simultaneous_trade_of_property, + combined_sales.mydec_is_sale_leaseback, + combined_sales.mydec_is_homestead_exemption, + combined_sales.mydec_homestead_exemption_general_alternative, + combined_sales.mydec_homestead_exemption_senior_citizens, + combined_sales.mydec_homestead_exemption_senior_citizens_assessment_freeze, sales_val.sv_is_outlier, sales_val.sv_is_ptax_outlier, sales_val.sv_is_heuristic_outlier, @@ -373,7 +393,7 @@ SELECT sales_val.sv_outlier_reason3, sales_val.sv_run_id, sales_val.sv_version, - sales_full_outer.source -FROM sales_full_outer + combined_sales.source +FROM combined_sales LEFT JOIN sales_val - ON sales_full_outer.doc_no = sales_val.meta_sale_document_num; + ON combined_sales.doc_no = sales_val.meta_sale_document_num; From 50b8d22ed9dc30049cd63db97531c227b5f0b6fb Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 17 Sep 2024 19:04:39 +0000 Subject: [PATCH 032/126] Remove experimental view --- .../default/default.vw_experimental_sales.sql | 248 ------------------ 1 file changed, 248 deletions(-) delete mode 100644 dbt/models/default/default.vw_experimental_sales.sql diff --git a/dbt/models/default/default.vw_experimental_sales.sql b/dbt/models/default/default.vw_experimental_sales.sql deleted file mode 100644 index 9313e3b2b..000000000 --- a/dbt/models/default/default.vw_experimental_sales.sql +++ /dev/null @@ -1,248 +0,0 @@ --- Class and township of associated PIN -WITH town_class AS ( - SELECT - par.parid, - REGEXP_REPLACE(par.class, '[^[:alnum:]]', '') AS class, - par.taxyr, - leg.user1 AS township_code, - td.township_name, - CONCAT( - leg.user1, SUBSTR(REGEXP_REPLACE(par.nbhd, '([^0-9])', ''), 3, 3) - ) AS nbhd - FROM iasworld.pardat AS par - LEFT JOIN iasworld.legdat AS leg - ON par.parid = leg.parid - AND par.taxyr = leg.taxyr - AND leg.cur = 'Y' - AND leg.deactivat IS NULL - LEFT JOIN ( - SELECT DISTINCT township_name, township_code - FROM default.vw_pin_universe - ) AS td - ON leg.user1 = td.township_code - WHERE par.cur = 'Y' - AND par.deactivat IS NULL -), - -calculated AS ( - SELECT - instruno, - COUNT(*) AS nopar_calculated - FROM ( - SELECT DISTINCT - parid, - NULLIF(REPLACE(instruno, 'D', ''), '') AS instruno - FROM iasworld.sales - WHERE deactivat IS NULL - AND cur = 'Y' - ) AS distinct_sales - GROUP BY instruno -), - -ias_sales AS ( - SELECT *, - -- Historically, this view excluded sales for a given pin if it had sold - -- within the last 12 months for the same price. This filter allows us - -- to filter out those sales. - COALESCE( - EXTRACT(DAY FROM sale_date - same_price_earlier_date) <= 365, - FALSE - ) AS sale_filter_same_sale_within_365 - FROM ( - SELECT - sales.parid AS pin, - NULLIF(REPLACE(sales.instruno, 'D', ''), '') AS doc_no, - tc.class, - tc.township_code, - SUBSTR(sales.saledt, 1, 4) AS year, - DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') AS sale_date, - CAST(sales.price AS BIGINT) AS sale_price, - COALESCE( - (sales.nopar > 1 OR calculated.nopar_calculated > 1), - FALSE - ) AS is_multisale, - CASE - WHEN sales.nopar > 1 THEN sales.nopar - ELSE calculated.nopar_calculated - END AS num_parcels_sale, - CASE - WHEN TRIM(sales.oldown) IN ('', 'MISSING SELLER NAME') - THEN NULL - ELSE sales.oldown - END AS seller_name, - CASE - WHEN TRIM(sales.own1) IN ('', 'MISSING BUYER NAME') - THEN NULL - ELSE sales.own1 - END AS buyer_name, - ROW_NUMBER() OVER ( - PARTITION BY - sales.parid, - sales.saledt, - sales.instrtyp NOT IN ('03', '04', '06') - ORDER BY sales.price DESC, sales.salekey ASC - ) AS max_price, - ROW_NUMBER() OVER ( - PARTITION BY - NULLIF(REPLACE(sales.instruno, 'D', ''), ''), - sales.instrtyp NOT IN ('03', '04', '06'), - sales.price > 10000 - ORDER BY sales.saledt ASC, sales.salekey ASC - ) AS bad_doc_no, - LAG(DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d')) OVER ( - PARTITION BY - sales.parid, - sales.price, - sales.instrtyp NOT IN ('03', '04', '06') - ORDER BY sales.saledt ASC, sales.salekey ASC - ) AS same_price_earlier_date, - sales.price <= 10000 AS sale_filter_less_than_10k - FROM iasworld.sales AS sales - LEFT JOIN - calculated - ON calculated.instruno = NULLIF(REPLACE(sales.instruno, 'D', ''), '') - LEFT JOIN - town_class AS tc - ON sales.parid = tc.parid - AND SUBSTR(sales.saledt, 1, 4) = tc.taxyr - WHERE - sales.deactivat IS NULL - AND sales.cur = 'Y' - AND CAST(SUBSTR(sales.saledt, 1, 4) AS INT) BETWEEN 1997 AND YEAR( - CURRENT_DATE - ) - AND tc.township_code IS NOT NULL - AND sales.price IS NOT NULL - ) AS subquery - -- Only use max price by pin/sale date - WHERE max_price = 1 - AND (bad_doc_no = 1 OR is_multisale = TRUE) -), - -mydec_sales AS ( - SELECT * - FROM ( - SELECT - REPLACE(line_1_primary_pin, '-', '') AS pin, - REPLACE(document_number, 'D', '') AS doc_no, - tc.class, - tc.township_code, - SUBSTR(line_4_instrument_date, 1, 4) AS year, - DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS sale_date, - line_11_full_consideration AS sale_price, - NULLIF(TRIM(seller_name), '') AS seller_name, - NULLIF(TRIM(buyer_name), '') AS buyer_name, - COALESCE( - line_2_total_parcels > 1, - FALSE - ) AS is_multisale, - line_2_total_parcels AS num_parcels_sale, - COUNT(*) OVER ( - PARTITION BY line_1_primary_pin, line_4_instrument_date - ) AS num_single_day_sales, - year_of_sale, - line_11_full_consideration <= 10000 AS sale_filter_less_than_10k - FROM sale.mydec - LEFT JOIN - town_class AS tc - ON REPLACE(line_1_primary_pin, '-', '') = tc.parid - AND SUBSTR(mydec.line_4_instrument_date, 1, 4) = tc.taxyr - WHERE line_2_total_parcels = 1 -- Remove multisales - AND tc.township_code IS NOT NULL - AND line_11_full_consideration IS NOT NULL - ) AS derived_table - WHERE num_single_day_sales = 1 - OR (YEAR(sale_date) > 2020) -), - -max_version_flag AS ( - SELECT - meta_sale_document_num, - MAX(version) AS max_version - FROM sale.flag - GROUP BY meta_sale_document_num -), - -sales_val AS ( - SELECT - sf.meta_sale_document_num, - sf.sv_is_outlier, - sf.sv_is_ptax_outlier, - sf.sv_is_heuristic_outlier, - sf.sv_outlier_reason1, - sf.sv_outlier_reason2, - sf.sv_outlier_reason3, - sf.run_id AS sv_run_id, - sf.version AS sv_version - FROM - sale.flag AS sf - INNER JOIN max_version_flag AS mv - ON sf.meta_sale_document_num = mv.meta_sale_document_num - AND sf.version = mv.max_version -), - -combined_sales AS ( - -- Select all rows from ias_sales - SELECT - ias.pin, - ias.doc_no, - ias.township_code, - ias.class, - ias.year, - COALESCE(mydec.sale_date, ias.sale_date) AS sale_date, - COALESCE(mydec.sale_date IS NOT NULL, FALSE) - AS is_mydec_date, - ias.sale_price, - ias.seller_name, - ias.buyer_name, - ias.is_multisale, - ias.num_parcels_sale, - ias.sale_filter_less_than_10k, - 'iasworld' AS source, - sales_val.sv_is_outlier, - sales_val.sv_is_ptax_outlier, - sales_val.sv_is_heuristic_outlier, - sales_val.sv_outlier_reason1, - sales_val.sv_outlier_reason2, - sales_val.sv_outlier_reason3, - sales_val.sv_run_id, - sales_val.sv_version - FROM ias_sales AS ias - LEFT JOIN mydec_sales AS mydec ON ias.doc_no = mydec.doc_no - LEFT JOIN sales_val - ON ias.doc_no = sales_val.meta_sale_document_num - - UNION ALL - - -- Select rows from mydec_sales that don't exist in ias_sales - SELECT - mydec.pin, - mydec.doc_no, - mydec.township_code, - mydec.class, - mydec.year, - mydec.sale_date, - TRUE AS is_mydec_date, - mydec.sale_price, - mydec.seller_name, - mydec.buyer_name, - mydec.is_multisale, - mydec.num_parcels_sale, - mydec.sale_filter_less_than_10k, - 'mydec' AS source, - sales_val.sv_is_outlier, - sales_val.sv_is_ptax_outlier, - sales_val.sv_is_heuristic_outlier, - sales_val.sv_outlier_reason1, - sales_val.sv_outlier_reason2, - sales_val.sv_outlier_reason3, - sales_val.sv_run_id, - sales_val.sv_version - FROM mydec_sales AS mydec - LEFT JOIN sales_val - ON mydec.doc_no = sales_val.meta_sale_document_num - LEFT JOIN ias_sales AS ias ON mydec.doc_no = ias.doc_no - WHERE ias.doc_no IS NULL -) - -SELECT * FROM combined_sales \ No newline at end of file From 80113f6cfbc510887046f6336e409f6d7eed3888 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 17 Sep 2024 19:36:59 +0000 Subject: [PATCH 033/126] Edit comments --- dbt/models/default/default.vw_pin_sale.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index ccb2eced4..41d6fecef 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -305,7 +305,6 @@ combined_sales AS ( END AS sale_filter_same_sale_within_365, CASE WHEN u.doc_no IS NOT NULL THEN 'iasworld' ELSE 'mydec' END AS source, - -- Include necessary columns from 'm' explicitly m.mydec_deed_type, m.sale_filter_ptax_flag, m.mydec_property_advertised, From 13d69cbd8c144862df1848dadb96f730c9e65d6c Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 17 Sep 2024 20:00:17 +0000 Subject: [PATCH 034/126] Fix aliases and add line ignores --- dbt/models/default/default.vw_pin_sale.sql | 149 +++++++++++---------- 1 file changed, 80 insertions(+), 69 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 41d6fecef..e740e34bc 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -167,7 +167,7 @@ mydec_sales AS ( COALESCE(line_10a = 1, FALSE) AS mydec_is_installment_contract_fulfilled, COALESCE(line_10b = 1, FALSE) - AS mydec_is_sale_between_related_individuals_or_corporate_affiliates, + AS mydec_is_sale_between_related_individuals_or_corporate_affiliates, --noqa COALESCE(line_10c = 1, FALSE) AS mydec_is_transfer_of_less_than_100_percent_interest, COALESCE(line_10d = 1, FALSE) @@ -185,7 +185,7 @@ mydec_sales AS ( COALESCE(line_10j = 1, FALSE) AS mydec_is_seller_buyer_a_relocation_company, COALESCE(line_10k = 1, FALSE) - AS mydec_is_seller_buyer_a_financial_institution_or_government_agency, + AS mydec_is_seller_buyer_a_financial_institution_or_government_agency, --noqa COALESCE(line_10l = 1, FALSE) AS mydec_is_buyer_a_real_estate_investment_trust, COALESCE(line_10m = 1, FALSE) @@ -206,7 +206,7 @@ mydec_sales AS ( AS mydec_homestead_exemption_senior_citizens, line_10s_senior_citizens_assessment_freeze AS mydec_homestead_exemption_senior_citizens_assessment_freeze, - -- Flag for booting outlier PTAX-203 sales from modeling and reporting + -- Flag for booting outlier PTAX-203 sales from modeling and reporting --noqa ( COALESCE(line_10b, 0) + COALESCE(line_10c, 0) + COALESCE(line_10d, 0) + COALESCE(line_10e, 0) @@ -252,88 +252,99 @@ sales_val AS ( combined_sales AS ( SELECT - COALESCE(u.pin, m.pin) AS pin, - COALESCE(u.year, m.year) AS year, - COALESCE(u.township_code, tc.township_code) AS township_code, - COALESCE(u.nbhd, tc.nbhd) AS nbhd, - COALESCE(u.class, tc.class) AS class, - COALESCE(u.sale_date, m.sale_date) AS sale_date, - COALESCE(u.sale_price, m.sale_price) AS sale_price, - u.sale_key, - COALESCE(u.doc_no, m.doc_no) AS doc_no, - COALESCE(u.deed_type, m.mydec_deed_type) AS deed_type, - COALESCE(u.seller_name, m.seller_name) AS seller_name, - COALESCE(u.is_multisale, m.is_multisale) AS is_multisale, - COALESCE(u.num_parcels_sale, m.num_parcels_sale) AS num_parcels_sale, - COALESCE(u.buyer_name, m.buyer_name) AS buyer_name, - COALESCE(u.sale_type, NULL) AS sale_type, - u.max_price, - u.bad_doc_no, + COALESCE(uq_sales.pin, md_sales.pin) AS pin, + COALESCE(uq_sales.year, md_sales.year) AS year, + COALESCE(uq_sales.township_code, tc.township_code) AS township_code, + COALESCE(uq_sales.nbhd, tc.nbhd) AS nbhd, + COALESCE(uq_sales.class, tc.class) AS class, + COALESCE(uq_sales.sale_date, md_sales.sale_date) AS sale_date, + COALESCE(uq_sales.sale_price, md_sales.sale_price) AS sale_price, + uq_sales.sale_key, + COALESCE(uq_sales.doc_no, md_sales.doc_no) AS doc_no, + COALESCE(uq_sales.deed_type, md_sales.mydec_deed_type) AS deed_type, + COALESCE(uq_sales.seller_name, md_sales.seller_name) AS seller_name, + COALESCE(uq_sales.is_multisale, md_sales.is_multisale) AS is_multisale, + COALESCE(uq_sales.num_parcels_sale, md_sales.num_parcels_sale) --noqa + AS num_parcels_sale, + COALESCE(uq_sales.buyer_name, md_sales.buyer_name) AS buyer_name, + COALESCE(uq_sales.sale_type, NULL) AS sale_type, + uq_sales.max_price, + uq_sales.bad_doc_no, -- Compute 'same_price_earlier_date' for all sales - LAG(COALESCE(u.sale_date, m.sale_date)) OVER ( - PARTITION BY - COALESCE(u.pin, m.pin), COALESCE(u.sale_price, m.sale_price) - ORDER BY COALESCE(u.sale_date, m.sale_date) ASC + LAG(COALESCE(uq_sales.sale_date, md_sales.sale_date)) OVER ( + PARTITION BY --noqa + COALESCE(uq_sales.pin, md_sales.pin), + COALESCE(uq_sales.sale_price, md_sales.sale_price) + ORDER BY COALESCE(uq_sales.sale_date, md_sales.sale_date) ASC ) AS same_price_earlier_date, -- Compute 'sale_filter_less_than_10k' - (COALESCE(u.sale_price, m.sale_price) <= 10000) + (COALESCE(uq_sales.sale_price, md_sales.sale_price) <= 10000) AS sale_filter_less_than_10k, -- Compute 'sale_filter_deed_type' ( - COALESCE(u.deed_type, m.mydec_deed_type) IN ('03', '04', '06') - OR COALESCE(u.deed_type, m.mydec_deed_type) IS NULL + COALESCE(uq_sales.deed_type, md_sales.mydec_deed_type) IN ( + '03', '04', '06' + ) + OR COALESCE(uq_sales.deed_type, md_sales.mydec_deed_type) IS NULL ) AS sale_filter_deed_type, -- Compute 'sale_filter_same_sale_within_365' CASE - WHEN LAG(COALESCE(u.sale_date, m.sale_date)) OVER ( + WHEN LAG(COALESCE(uq_sales.sale_date, md_sales.sale_date)) OVER ( PARTITION BY - COALESCE(u.pin, m.pin), - COALESCE(u.sale_price, m.sale_price) - ORDER BY COALESCE(u.sale_date, m.sale_date) ASC + COALESCE(uq_sales.pin, md_sales.pin), + COALESCE(uq_sales.sale_price, md_sales.sale_price) + ORDER BY + COALESCE(uq_sales.sale_date, md_sales.sale_date) ASC ) IS NOT NULL THEN EXTRACT( - DAY FROM COALESCE(u.sale_date, m.sale_date) - - LAG(COALESCE(u.sale_date, m.sale_date)) OVER ( - PARTITION BY - COALESCE(u.pin, m.pin), - COALESCE(u.sale_price, m.sale_price) - ORDER BY COALESCE(u.sale_date, m.sale_date) ASC - ) + DAY FROM COALESCE(uq_sales.sale_date, md_sales.sale_date) + - LAG(COALESCE(uq_sales.sale_date, md_sales.sale_date)) + OVER ( + PARTITION BY + COALESCE(uq_sales.pin, md_sales.pin), + COALESCE( + uq_sales.sale_price, md_sales.sale_price + ) + ORDER BY + COALESCE( + uq_sales.sale_date, md_sales.sale_date + ) ASC + ) ) <= 365 ELSE FALSE END AS sale_filter_same_sale_within_365, - CASE WHEN u.doc_no IS NOT NULL THEN 'iasworld' ELSE 'mydec' END + CASE WHEN uq_sales.doc_no IS NOT NULL THEN 'iasworld' ELSE 'mydec' END AS source, - m.mydec_deed_type, - m.sale_filter_ptax_flag, - m.mydec_property_advertised, - m.mydec_is_installment_contract_fulfilled, - m.mydec_is_sale_between_related_individuals_or_corporate_affiliates, - m.mydec_is_transfer_of_less_than_100_percent_interest, - m.mydec_is_court_ordered_sale, - m.mydec_is_sale_in_lieu_of_foreclosure, - m.mydec_is_condemnation, - m.mydec_is_short_sale, - m.mydec_is_bank_reo_real_estate_owned, - m.mydec_is_auction_sale, - m.mydec_is_seller_buyer_a_relocation_company, - m.mydec_is_seller_buyer_a_financial_institution_or_government_agency, - m.mydec_is_buyer_a_real_estate_investment_trust, - m.mydec_is_buyer_a_pension_fund, - m.mydec_is_buyer_an_adjacent_property_owner, - m.mydec_is_buyer_exercising_an_option_to_purchase, - m.mydec_is_simultaneous_trade_of_property, - m.mydec_is_sale_leaseback, - m.mydec_is_homestead_exemption, - m.mydec_homestead_exemption_general_alternative, - m.mydec_homestead_exemption_senior_citizens, - m.mydec_homestead_exemption_senior_citizens_assessment_freeze - FROM unique_sales AS u - FULL OUTER JOIN mydec_sales AS m ON u.doc_no = m.doc_no + md_sales.mydec_deed_type, + md_sales.sale_filter_ptax_flag, + md_sales.mydec_property_advertised, + md_sales.mydec_is_installment_contract_fulfilled, + md_sales.mydec_is_sale_between_related_individuals_or_corporate_affiliates, --noqa + md_sales.mydec_is_transfer_of_less_than_100_percent_interest, + md_sales.mydec_is_court_ordered_sale, + md_sales.mydec_is_sale_in_lieu_of_foreclosure, + md_sales.mydec_is_condemnation, + md_sales.mydec_is_short_sale, + md_sales.mydec_is_bank_reo_real_estate_owned, + md_sales.mydec_is_auction_sale, + md_sales.mydec_is_seller_buyer_a_relocation_company, + md_sales.mydec_is_seller_buyer_a_financial_institution_or_government_agency, --noqa + md_sales.mydec_is_buyer_a_real_estate_investment_trust, + md_sales.mydec_is_buyer_a_pension_fund, + md_sales.mydec_is_buyer_an_adjacent_property_owner, + md_sales.mydec_is_buyer_exercising_an_option_to_purchase, + md_sales.mydec_is_simultaneous_trade_of_property, + md_sales.mydec_is_sale_leaseback, + md_sales.mydec_is_homestead_exemption, + md_sales.mydec_homestead_exemption_general_alternative, + md_sales.mydec_homestead_exemption_senior_citizens, + md_sales.mydec_homestead_exemption_senior_citizens_assessment_freeze + FROM unique_sales AS uq_sales + FULL OUTER JOIN mydec_sales AS md_sales ON uq_sales.doc_no = md_sales.doc_no LEFT JOIN town_class AS tc - ON COALESCE(u.pin, m.pin) = tc.parid - AND COALESCE(u.year, m.year) = tc.taxyr + ON COALESCE(uq_sales.pin, md_sales.pin) = tc.parid + AND COALESCE(uq_sales.year, md_sales.year) = tc.taxyr ) SELECT @@ -364,7 +375,7 @@ SELECT combined_sales.sale_filter_ptax_flag, combined_sales.mydec_property_advertised, combined_sales.mydec_is_installment_contract_fulfilled, - combined_sales.mydec_is_sale_between_related_individuals_or_corporate_affiliates, + combined_sales.mydec_is_sale_between_related_individuals_or_corporate_affiliates, --noqa combined_sales.mydec_is_transfer_of_less_than_100_percent_interest, combined_sales.mydec_is_court_ordered_sale, combined_sales.mydec_is_sale_in_lieu_of_foreclosure, @@ -373,7 +384,7 @@ SELECT combined_sales.mydec_is_bank_reo_real_estate_owned, combined_sales.mydec_is_auction_sale, combined_sales.mydec_is_seller_buyer_a_relocation_company, - combined_sales.mydec_is_seller_buyer_a_financial_institution_or_government_agency, + combined_sales.mydec_is_seller_buyer_a_financial_institution_or_government_agency, --noqa combined_sales.mydec_is_buyer_a_real_estate_investment_trust, combined_sales.mydec_is_buyer_a_pension_fund, combined_sales.mydec_is_buyer_an_adjacent_property_owner, From a46ebb316553142a1f115a58978155694868220e Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 17 Sep 2024 20:07:17 +0000 Subject: [PATCH 035/126] remove eda qmd --- eda_new_sales_view.qmd | 99 ------------------------------------------ 1 file changed, 99 deletions(-) delete mode 100644 eda_new_sales_view.qmd diff --git a/eda_new_sales_view.qmd b/eda_new_sales_view.qmd deleted file mode 100644 index cc350bb7f..000000000 --- a/eda_new_sales_view.qmd +++ /dev/null @@ -1,99 +0,0 @@ ---- -title: "Compare sales val specs" -execute: - echo: false - warning: false -format: - html: - embed-resources: true - toc: true - toc_float: true - fig-align: center - fontsize: 12pt -knitr: - opts_chunk: - out.width: "100%" -editor: source ---- - -```{r _libraries} -library(ggplot2) -library(tidyr) -library(dplyr) -library(here) -library(noctua) -``` - -```{r _data_ingest} -# Ingest data -noctua_options(cache_size = 10, unload = FALSE) - -AWS_ATHENA_CONN_NOCTUA <- dbConnect(noctua::athena()) - -vw_pin_sales <- dbGetQuery( - conn = AWS_ATHENA_CONN_NOCTUA, - "select * from default.vw_pin_sale" -) - -ias_sales <- dbGetQuery( - conn = AWS_ATHENA_CONN_NOCTUA, - "select * from z_ci_583_create_a_sales_view_that_combines_iasworld_mydec_and_ccrd_sales_default.vw_experimental_sales - where source = 'iasworld'" -) - -mydec_sales <- dbGetQuery( - conn = AWS_ATHENA_CONN_NOCTUA, - "select * from z_ci_583_create_a_sales_view_that_combines_iasworld_mydec_and_ccrd_sales_default.vw_experimental_sales - where source = 'mydec'" -) - -``` - - - -```{r} -buckets_vw_pin_sales <- vw_pin_sales %>% - mutate(bucket = floor(sale_price / 50000) * 50000) %>% - group_by(bucket) %>% - summarise(count = n(), .groups = 'drop') - -buckets_ias_sales <- ias_sales %>% - mutate(bucket = floor(sale_price / 50000) * 50000) %>% - group_by(bucket) %>% - summarise(count = n(), .groups = 'drop') - -buckets_mydec_sales <- mydec_sales %>% - mutate(bucket = floor(sale_price / 50000) * 50000) %>% - group_by(bucket) %>% - summarise(count = n(), .groups = 'drop') - -``` - -```{r} -percentage_zero_sales_mydec <- mydec_sales %>% - group_by(year) %>% - summarise( - total_count = n(), - zero_count = sum(sale_price == 0), - percentage_zero = (zero_count / total_count) * 100, - .groups = 'drop' - ) - -percentage_under_10k_mydec <- mydec_sales %>% - group_by(year) %>% - summarise( - total_count = n(), - under10k_count_count = sum(sale_filter_less_than_10k == TRUE), - percentage_zero = (under10k_count_count / total_count) * 100, - .groups = 'drop' - ) - -``` - - - - - - - - From 7c8c0846145c3cc5a7226905811bf5210e974070 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 17 Sep 2024 20:13:02 +0000 Subject: [PATCH 036/126] Remove docs for experimental table --- dbt/models/default/docs.md | 8 -------- .../default/schema/default.vw_experimental_sales.yml | 3 --- 2 files changed, 11 deletions(-) delete mode 100644 dbt/models/default/schema/default.vw_experimental_sales.yml diff --git a/dbt/models/default/docs.md b/dbt/models/default/docs.md index ab10327fd..a1cc93e46 100644 --- a/dbt/models/default/docs.md +++ b/dbt/models/default/docs.md @@ -15,14 +15,6 @@ and reporting. **Primary Key**: `year`, `pin`, `card` {% enddocs %} -# vw_experimental_sales - -{% docs experimental_sales%} - -Experimental sales view that grabs sales from iasworld and mydec - -{% enddocs %} - # vw_pin_address {% docs view_vw_pin_address %} diff --git a/dbt/models/default/schema/default.vw_experimental_sales.yml b/dbt/models/default/schema/default.vw_experimental_sales.yml deleted file mode 100644 index 13fc11163..000000000 --- a/dbt/models/default/schema/default.vw_experimental_sales.yml +++ /dev/null @@ -1,3 +0,0 @@ -models: - - name: default.experimental_sales - description: '{{ doc("experimental_sales") }}' \ No newline at end of file From 5e67742556e19b46625c76baadcdf2d46af8e575 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 17 Sep 2024 20:14:11 +0000 Subject: [PATCH 037/126] Add column in docs --- dbt/models/default/schema/default.vw_pin_sale.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbt/models/default/schema/default.vw_pin_sale.yml b/dbt/models/default/schema/default.vw_pin_sale.yml index 720cc99fd..ced82c108 100644 --- a/dbt/models/default/schema/default.vw_pin_sale.yml +++ b/dbt/models/default/schema/default.vw_pin_sale.yml @@ -39,6 +39,8 @@ models: description: '{{ doc("shared_column_sale_price") }}' - name: seller_name description: '{{ doc("shared_column_seller_name") }}' + - name: source + description: Which table the doc_no was drawn from (iasworld or mydec) - name: sv_is_heuristic_outlier description: '{{ doc("shared_column_sv_is_heuristic_outlier") }}' - name: sv_is_ptax_outlier From 9df5f665db08e4390807055b50e3d9a1d60c1541 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 17 Sep 2024 20:23:40 +0000 Subject: [PATCH 038/126] Change test limmit --- dbt/models/default/schema/default.vw_pin_sale.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/schema/default.vw_pin_sale.yml b/dbt/models/default/schema/default.vw_pin_sale.yml index ced82c108..9f81cc06d 100644 --- a/dbt/models/default/schema/default.vw_pin_sale.yml +++ b/dbt/models/default/schema/default.vw_pin_sale.yml @@ -65,7 +65,7 @@ models: - year allowed_duplicates: 2 config: - error_if: ">4032" + error_if: ">5032" # No sales for same price/pin within 12 months - unique_combination_of_columns: name: default_vw_pin_sale_unique_price_pin_and_year From b3362454b047e36ada0455469fc86ddd886f17af Mon Sep 17 00:00:00 2001 From: wagnerlmichael <93889413+wagnerlmichael@users.noreply.github.com> Date: Wed, 18 Sep 2024 14:32:38 -0500 Subject: [PATCH 039/126] Update dbt/models/default/default.vw_pin_sale.sql Co-authored-by: William Ridgeway <10358980+wrridgeway@users.noreply.github.com> --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index e740e34bc..9388d4c34 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -161,7 +161,7 @@ mydec_sales AS ( NULLIF(TRIM(buyer_name), '') AS buyer_name, CAST(line_11_full_consideration AS BIGINT) AS sale_price, line_2_total_parcels AS num_parcels_sale, - FALSE AS is_multisale, + COALESCE(line_2_total_parcels > 1, FALSE) AS is_multisale, COALESCE(line_7_property_advertised = 1, FALSE) AS mydec_property_advertised, COALESCE(line_10a = 1, FALSE) From 9e3a340dfe6720167a14e559a5198be82553c16f Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 18 Sep 2024 19:39:49 +0000 Subject: [PATCH 040/126] Fix sale date ranges --- dbt/models/default/default.vw_pin_sale.sql | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 9388d4c34..9fd09c774 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -257,7 +257,14 @@ combined_sales AS ( COALESCE(uq_sales.township_code, tc.township_code) AS township_code, COALESCE(uq_sales.nbhd, tc.nbhd) AS nbhd, COALESCE(uq_sales.class, tc.class) AS class, - COALESCE(uq_sales.sale_date, md_sales.sale_date) AS sale_date, + COALESCE( + CASE + WHEN md_sales.sale_date < '2021-01-01' THEN md_sales.sale_date + WHEN uq_sales.sale_date >= '2021-01-01' THEN uq_sales.sale_date + END, + md_sales.sale_date, + uq_sales.sale_date + ) AS sale_date, COALESCE(uq_sales.sale_price, md_sales.sale_price) AS sale_price, uq_sales.sale_key, COALESCE(uq_sales.doc_no, md_sales.doc_no) AS doc_no, From ef3e1ac2e49c0f853f7d4b3682f0d6307cfc821e Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 18 Sep 2024 20:06:43 +0000 Subject: [PATCH 041/126] Add mydec_date change --- dbt/models/default/default.vw_pin_sale.sql | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 9fd09c774..954c2f9a8 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -265,6 +265,12 @@ combined_sales AS ( md_sales.sale_date, uq_sales.sale_date ) AS sale_date, + CASE + WHEN md_sales.sale_date < '2021-01-01' THEN TRUE + WHEN uq_sales.sale_date >= '2021-01-01' THEN FALSE + WHEN md_sales.sale_date IS NOT NULL THEN TRUE + WHEN uq_sales.sale_date IS NOT NULL THEN FALSE + END AS is_mydec_sale, COALESCE(uq_sales.sale_price, md_sales.sale_price) AS sale_price, uq_sales.sale_key, COALESCE(uq_sales.doc_no, md_sales.doc_no) AS doc_no, @@ -361,10 +367,7 @@ SELECT combined_sales.nbhd, combined_sales.class, combined_sales.sale_date, - ( - combined_sales.source = 'mydec' - OR YEAR(combined_sales.sale_date) >= 2021 - ) AS is_mydec_date, + combined_sales.is_mydec_date, combined_sales.sale_price, combined_sales.sale_key, combined_sales.doc_no, From 0ea03f478374b32ace5b94ca81321b0cb699d6ce Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 18 Sep 2024 21:08:42 +0000 Subject: [PATCH 042/126] Add better version of sale_date calc --- dbt/models/default/default.vw_pin_sale.sql | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 954c2f9a8..1b3c4daa6 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -257,14 +257,17 @@ combined_sales AS ( COALESCE(uq_sales.township_code, tc.township_code) AS township_code, COALESCE(uq_sales.nbhd, tc.nbhd) AS nbhd, COALESCE(uq_sales.class, tc.class) AS class, - COALESCE( - CASE - WHEN md_sales.sale_date < '2021-01-01' THEN md_sales.sale_date - WHEN uq_sales.sale_date >= '2021-01-01' THEN uq_sales.sale_date - END, - md_sales.sale_date, - uq_sales.sale_date - ) AS sale_date, + CASE + WHEN uq_sales.year < '2021' + THEN COALESCE( + md_sales.sale_date, + uq_sales.sale_date + ) + ELSE COALESCE( + uq_sales.sale_date, + md_sales.sale_date + ) + END AS sale_date, CASE WHEN md_sales.sale_date < '2021-01-01' THEN TRUE WHEN uq_sales.sale_date >= '2021-01-01' THEN FALSE From 7ea265f9b9532699b4407dac5ad2ee367f95d533 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 18 Sep 2024 21:13:59 +0000 Subject: [PATCH 043/126] Make is_mydec_date more readable --- dbt/models/default/default.vw_pin_sale.sql | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 1b3c4daa6..53e16f171 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -269,10 +269,12 @@ combined_sales AS ( ) END AS sale_date, CASE - WHEN md_sales.sale_date < '2021-01-01' THEN TRUE - WHEN uq_sales.sale_date >= '2021-01-01' THEN FALSE - WHEN md_sales.sale_date IS NOT NULL THEN TRUE - WHEN uq_sales.sale_date IS NOT NULL THEN FALSE + WHEN (uq_sales.year < '2021' OR uq_sales.sale_date IS NULL) + AND md_sales.sale_date IS NOT NULL + THEN TRUE + WHEN (uq_sales.year >= '2021' OR md_sales.sale_date IS NULL) + AND uq_sales.sale_date IS NOT NULL + THEN FALSE END AS is_mydec_sale, COALESCE(uq_sales.sale_price, md_sales.sale_price) AS sale_price, uq_sales.sale_key, From a8398c7b857381fd14fbe81a66c7939bd1b06305 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Fri, 20 Sep 2024 17:12:31 +0000 Subject: [PATCH 044/126] Add proper 365 calculation --- dbt/models/default/default.vw_pin_sale.sql | 58 ++++++++++++++++------ 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 53e16f171..6cc1fc8e1 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -307,29 +307,59 @@ combined_sales AS ( ) AS sale_filter_deed_type, -- Compute 'sale_filter_same_sale_within_365' CASE - WHEN LAG(COALESCE(uq_sales.sale_date, md_sales.sale_date)) OVER ( + -- If there is a previous sale date within the same partition, we will perform a day difference calculation --noqa + WHEN LAG( + CASE + WHEN uq_sales.year < '2021' THEN COALESCE(md_sales.sale_date, uq_sales.sale_date) --noqa + ELSE COALESCE(uq_sales.sale_date, md_sales.sale_date) + END + ) OVER ( + -- Define the partition or grouping for the LAG function. + -- Partition by pin and sale price PARTITION BY COALESCE(uq_sales.pin, md_sales.pin), COALESCE(uq_sales.sale_price, md_sales.sale_price) + -- Order the sales by the sale date (using the same conditional logic as above) in ascending order --noqa ORDER BY - COALESCE(uq_sales.sale_date, md_sales.sale_date) ASC + CASE + WHEN uq_sales.year < '2021' THEN COALESCE(md_sales.sale_date, uq_sales.sale_date) --noqa + ELSE COALESCE( + uq_sales.sale_date, md_sales.sale_date + ) + END ASC ) IS NOT NULL + -- If there is a previous sale, calculate difference in days between the current sale date and the previous sale date. --noqa THEN + -- Use EXTRACT to compute the day difference between the current sale date and the previous sale date. --noqa EXTRACT( - DAY FROM COALESCE(uq_sales.sale_date, md_sales.sale_date) - - LAG(COALESCE(uq_sales.sale_date, md_sales.sale_date)) - OVER ( - PARTITION BY - COALESCE(uq_sales.pin, md_sales.pin), - COALESCE( - uq_sales.sale_price, md_sales.sale_price - ) - ORDER BY - COALESCE( + DAY FROM + -- The current sale date + CASE + WHEN uq_sales.year < '2021' THEN COALESCE(md_sales.sale_date, uq_sales.sale_date) --noqa + ELSE COALESCE(uq_sales.sale_date, md_sales.sale_date) + END + -- Subtract the previous sale date, which is found using the LAG function. --noqa + - LAG( + CASE + WHEN uq_sales.year < '2021' THEN COALESCE(md_sales.sale_date, uq_sales.sale_date) --noqa + ELSE COALESCE( uq_sales.sale_date, md_sales.sale_date - ) ASC - ) + ) + END + ) OVER ( + PARTITION BY + COALESCE(uq_sales.pin, md_sales.pin), + COALESCE(uq_sales.sale_price, md_sales.sale_price) + ORDER BY + CASE + WHEN uq_sales.year < '2021' THEN COALESCE(md_sales.sale_date, uq_sales.sale_date) --noqa + ELSE COALESCE( + uq_sales.sale_date, md_sales.sale_date + ) + END ASC + ) ) <= 365 + -- If there is no previous sale in the same partition (first sale or no prior sale at the same price for that pin), return FALSE. --noqa ELSE FALSE END AS sale_filter_same_sale_within_365, CASE WHEN uq_sales.doc_no IS NOT NULL THEN 'iasworld' ELSE 'mydec' END From b180593ffef3f02c30d29ac7fddf28ca484456da Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Fri, 20 Sep 2024 17:32:22 +0000 Subject: [PATCH 045/126] Remove dup test --- dbt/models/default/schema/default.vw_pin_sale.yml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/dbt/models/default/schema/default.vw_pin_sale.yml b/dbt/models/default/schema/default.vw_pin_sale.yml index 2113faad0..6844e79cc 100644 --- a/dbt/models/default/schema/default.vw_pin_sale.yml +++ b/dbt/models/default/schema/default.vw_pin_sale.yml @@ -79,15 +79,6 @@ models: allowed_duplicates: 2 config: error_if: ">5032" - # No sales for same price/pin within 12 months - - unique_combination_of_columns: - name: default_vw_pin_sale_unique_price_pin_and_year - combination_of_columns: - - pin - - year - - sale_price - config: - where: NOT sale_filter_same_sale_within_365 AND NOT sale_filter_deed_type - row_count: name: default_vw_pin_sale_row_count above: 2477674 # as of 2023-11-22 From fb4991301f6fe86a03dc0e074c948aad24743b00 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Fri, 20 Sep 2024 17:47:55 +0000 Subject: [PATCH 046/126] Switch to is_mydec_date --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 6cc1fc8e1..906f3aa05 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -275,7 +275,7 @@ combined_sales AS ( WHEN (uq_sales.year >= '2021' OR md_sales.sale_date IS NULL) AND uq_sales.sale_date IS NOT NULL THEN FALSE - END AS is_mydec_sale, + END AS is_mydec_date, COALESCE(uq_sales.sale_price, md_sales.sale_price) AS sale_price, uq_sales.sale_key, COALESCE(uq_sales.doc_no, md_sales.doc_no) AS doc_no, From e651755af0bbb9ef64c4bea3a0f6298e0062364e Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 25 Sep 2024 16:52:43 +0000 Subject: [PATCH 047/126] Try a refactor for 365 calc --- dbt/models/default/default.vw_pin_sale.sql | 226 +++++++-------------- 1 file changed, 72 insertions(+), 154 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 906f3aa05..c74eee6f3 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -1,5 +1,5 @@ -- View containing unique, filtered sales --- Class and township of associated PIN + WITH town_class AS ( SELECT par.parid, @@ -19,8 +19,6 @@ WITH town_class AS ( AND par.deactivat IS NULL ), --- "nopar" isn't entirely accurate for sales associated with only one parcel, --- so we create our own counter calculated AS ( SELECT instruno, @@ -39,9 +37,6 @@ calculated AS ( unique_sales AS ( SELECT *, - -- Historically, this view excluded sales for a given pin if it had sold - -- within the last 12 months for the same price. This filter allows us - -- to filter out those sales. COALESCE( EXTRACT(DAY FROM sale_date - same_price_earlier_date) <= 365, FALSE @@ -58,7 +53,6 @@ unique_sales AS ( sales.salekey AS sale_key, NULLIF(REPLACE(sales.instruno, 'D', ''), '') AS doc_no, NULLIF(sales.instrtyp, '') AS deed_type, - -- "nopar" is number of parcels sold COALESCE( sales.nopar > 1 OR calculated.nopar_calculated > 1, FALSE @@ -79,11 +73,6 @@ unique_sales AS ( WHEN sales.saletype = '0' THEN 'LAND' WHEN sales.saletype = '1' THEN 'LAND AND BUILDING' END AS sale_type, - -- Sales are not entirely unique by pin/date so we group all - -- sales by pin/date, then order by descending price - -- and give the top observation a value of 1 for "max_price". - -- We need to order by salekey as well in case of any ties within - -- price, date, and pin. ROW_NUMBER() OVER ( PARTITION BY sales.parid, @@ -91,14 +80,6 @@ unique_sales AS ( sales.instrtyp NOT IN ('03', '04', '06') ORDER BY sales.price DESC, sales.salekey ASC ) AS max_price, - -- We remove the letter 'D' that trails some document numbers in - -- iasworld.sales since it prevents us from joining to mydec sales. - -- This creates one instance where we have duplicate document - -- numbers, so we sort by sale date (specifically to avoid conflicts - -- with detecting the earliest duplicate sale when there are - -- multiple within one document number, within a year) within the - -- new document number to identify and remove the sale causing the - -- duplicate document number. ROW_NUMBER() OVER ( PARTITION BY NULLIF(REPLACE(sales.instruno, 'D', ''), ''), @@ -106,11 +87,6 @@ unique_sales AS ( sales.price > 10000 ORDER BY sales.saledt ASC, sales.salekey ASC ) AS bad_doc_no, - -- Some pins sell for the exact same price a few months after - -- they're sold (we need to make sure to only include deed types we - -- want). These sales are unnecessary for modeling and may be - -- duplicates. We need to order by salekey as well in case of any - -- ties within price, date, and pin. LAG(DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d')) OVER ( PARTITION BY sales.parid, @@ -118,11 +94,6 @@ unique_sales AS ( sales.instrtyp NOT IN ('03', '04', '06') ORDER BY sales.saledt ASC, sales.salekey ASC ) AS same_price_earlier_date, - -- Historically, this view filtered out sales less than $10k and - -- as well as quit claims, executor deeds, beneficial interests, - -- and NULL deed types. Now we create "legacy" filter columns so - -- that this filtering can reproduced while still allowing all sales - -- into the view. sales.price <= 10000 AS sale_filter_less_than_10k, COALESCE( sales.instrtyp IN ('03', '04', '06') OR sales.instrtyp IS NULL, @@ -167,7 +138,7 @@ mydec_sales AS ( COALESCE(line_10a = 1, FALSE) AS mydec_is_installment_contract_fulfilled, COALESCE(line_10b = 1, FALSE) - AS mydec_is_sale_between_related_individuals_or_corporate_affiliates, --noqa + AS mydec_is_sale_between_related_individuals_or_corporate_affiliates, COALESCE(line_10c = 1, FALSE) AS mydec_is_transfer_of_less_than_100_percent_interest, COALESCE(line_10d = 1, FALSE) @@ -185,7 +156,7 @@ mydec_sales AS ( COALESCE(line_10j = 1, FALSE) AS mydec_is_seller_buyer_a_relocation_company, COALESCE(line_10k = 1, FALSE) - AS mydec_is_seller_buyer_a_financial_institution_or_government_agency, --noqa + AS mydec_is_seller_buyer_a_financial_institution_or_government_agency, COALESCE(line_10l = 1, FALSE) AS mydec_is_buyer_a_real_estate_investment_trust, COALESCE(line_10m = 1, FALSE) @@ -206,7 +177,6 @@ mydec_sales AS ( AS mydec_homestead_exemption_senior_citizens, line_10s_senior_citizens_assessment_freeze AS mydec_homestead_exemption_senior_citizens_assessment_freeze, - -- Flag for booting outlier PTAX-203 sales from modeling and reporting --noqa ( COALESCE(line_10b, 0) + COALESCE(line_10c, 0) + COALESCE(line_10d, 0) + COALESCE(line_10e, 0) @@ -218,7 +188,7 @@ mydec_sales AS ( PARTITION BY line_1_primary_pin, line_4_instrument_date ) AS num_single_day_sales FROM {{ source('sale', 'mydec') }} - WHERE line_2_total_parcels = 1 -- Remove multisales + WHERE line_2_total_parcels = 1 ) WHERE num_single_day_sales = 1 OR (YEAR(sale_date) > 2020) @@ -250,24 +220,19 @@ sales_val AS ( AND sf.version = mv.max_version ), -combined_sales AS ( +-- Introducing cte_sales to precompute the coalesced values +cte_sales AS ( SELECT - COALESCE(uq_sales.pin, md_sales.pin) AS pin, - COALESCE(uq_sales.year, md_sales.year) AS year, - COALESCE(uq_sales.township_code, tc.township_code) AS township_code, - COALESCE(uq_sales.nbhd, tc.nbhd) AS nbhd, - COALESCE(uq_sales.class, tc.class) AS class, + -- Precompute coalesced columns + COALESCE(uq_sales.pin, md_sales.pin) AS pin_coalesced, + COALESCE(uq_sales.year, md_sales.year) AS year_coalesced, + COALESCE(uq_sales.township_code, tc.township_code) AS township_code_coalesced, + COALESCE(uq_sales.nbhd, tc.nbhd) AS nbhd_coalesced, + COALESCE(uq_sales.class, tc.class) AS class_coalesced, CASE - WHEN uq_sales.year < '2021' - THEN COALESCE( - md_sales.sale_date, - uq_sales.sale_date - ) - ELSE COALESCE( - uq_sales.sale_date, - md_sales.sale_date - ) - END AS sale_date, + WHEN uq_sales.year < '2021' THEN COALESCE(md_sales.sale_date, uq_sales.sale_date) + ELSE COALESCE(uq_sales.sale_date, md_sales.sale_date) + END AS sale_date_coalesced, CASE WHEN (uq_sales.year < '2021' OR uq_sales.sale_date IS NULL) AND md_sales.sale_date IS NOT NULL @@ -276,99 +241,23 @@ combined_sales AS ( AND uq_sales.sale_date IS NOT NULL THEN FALSE END AS is_mydec_date, - COALESCE(uq_sales.sale_price, md_sales.sale_price) AS sale_price, + COALESCE(uq_sales.sale_price, md_sales.sale_price) AS sale_price_coalesced, uq_sales.sale_key, - COALESCE(uq_sales.doc_no, md_sales.doc_no) AS doc_no, - COALESCE(uq_sales.deed_type, md_sales.mydec_deed_type) AS deed_type, - COALESCE(uq_sales.seller_name, md_sales.seller_name) AS seller_name, - COALESCE(uq_sales.is_multisale, md_sales.is_multisale) AS is_multisale, - COALESCE(uq_sales.num_parcels_sale, md_sales.num_parcels_sale) --noqa - AS num_parcels_sale, - COALESCE(uq_sales.buyer_name, md_sales.buyer_name) AS buyer_name, - COALESCE(uq_sales.sale_type, NULL) AS sale_type, + COALESCE(uq_sales.doc_no, md_sales.doc_no) AS doc_no_coalesced, + COALESCE(uq_sales.deed_type, md_sales.mydec_deed_type) AS deed_type_coalesced, + COALESCE(uq_sales.seller_name, md_sales.seller_name) AS seller_name_coalesced, + COALESCE(uq_sales.is_multisale, md_sales.is_multisale) AS is_multisale_coalesced, + COALESCE(uq_sales.num_parcels_sale, md_sales.num_parcels_sale) AS num_parcels_sale_coalesced, + COALESCE(uq_sales.buyer_name, md_sales.buyer_name) AS buyer_name_coalesced, + COALESCE(uq_sales.sale_type, NULL) AS sale_type_coalesced, uq_sales.max_price, uq_sales.bad_doc_no, - -- Compute 'same_price_earlier_date' for all sales - LAG(COALESCE(uq_sales.sale_date, md_sales.sale_date)) OVER ( - PARTITION BY --noqa - COALESCE(uq_sales.pin, md_sales.pin), - COALESCE(uq_sales.sale_price, md_sales.sale_price) - ORDER BY COALESCE(uq_sales.sale_date, md_sales.sale_date) ASC - ) AS same_price_earlier_date, - -- Compute 'sale_filter_less_than_10k' - (COALESCE(uq_sales.sale_price, md_sales.sale_price) <= 10000) - AS sale_filter_less_than_10k, - -- Compute 'sale_filter_deed_type' - ( - COALESCE(uq_sales.deed_type, md_sales.mydec_deed_type) IN ( - '03', '04', '06' - ) - OR COALESCE(uq_sales.deed_type, md_sales.mydec_deed_type) IS NULL - ) AS sale_filter_deed_type, - -- Compute 'sale_filter_same_sale_within_365' - CASE - -- If there is a previous sale date within the same partition, we will perform a day difference calculation --noqa - WHEN LAG( - CASE - WHEN uq_sales.year < '2021' THEN COALESCE(md_sales.sale_date, uq_sales.sale_date) --noqa - ELSE COALESCE(uq_sales.sale_date, md_sales.sale_date) - END - ) OVER ( - -- Define the partition or grouping for the LAG function. - -- Partition by pin and sale price - PARTITION BY - COALESCE(uq_sales.pin, md_sales.pin), - COALESCE(uq_sales.sale_price, md_sales.sale_price) - -- Order the sales by the sale date (using the same conditional logic as above) in ascending order --noqa - ORDER BY - CASE - WHEN uq_sales.year < '2021' THEN COALESCE(md_sales.sale_date, uq_sales.sale_date) --noqa - ELSE COALESCE( - uq_sales.sale_date, md_sales.sale_date - ) - END ASC - ) IS NOT NULL - -- If there is a previous sale, calculate difference in days between the current sale date and the previous sale date. --noqa - THEN - -- Use EXTRACT to compute the day difference between the current sale date and the previous sale date. --noqa - EXTRACT( - DAY FROM - -- The current sale date - CASE - WHEN uq_sales.year < '2021' THEN COALESCE(md_sales.sale_date, uq_sales.sale_date) --noqa - ELSE COALESCE(uq_sales.sale_date, md_sales.sale_date) - END - -- Subtract the previous sale date, which is found using the LAG function. --noqa - - LAG( - CASE - WHEN uq_sales.year < '2021' THEN COALESCE(md_sales.sale_date, uq_sales.sale_date) --noqa - ELSE COALESCE( - uq_sales.sale_date, md_sales.sale_date - ) - END - ) OVER ( - PARTITION BY - COALESCE(uq_sales.pin, md_sales.pin), - COALESCE(uq_sales.sale_price, md_sales.sale_price) - ORDER BY - CASE - WHEN uq_sales.year < '2021' THEN COALESCE(md_sales.sale_date, uq_sales.sale_date) --noqa - ELSE COALESCE( - uq_sales.sale_date, md_sales.sale_date - ) - END ASC - ) - ) <= 365 - -- If there is no previous sale in the same partition (first sale or no prior sale at the same price for that pin), return FALSE. --noqa - ELSE FALSE - END AS sale_filter_same_sale_within_365, - CASE WHEN uq_sales.doc_no IS NOT NULL THEN 'iasworld' ELSE 'mydec' END - AS source, + CASE WHEN uq_sales.doc_no IS NOT NULL THEN 'iasworld' ELSE 'mydec' END AS source, md_sales.mydec_deed_type, md_sales.sale_filter_ptax_flag, md_sales.mydec_property_advertised, md_sales.mydec_is_installment_contract_fulfilled, - md_sales.mydec_is_sale_between_related_individuals_or_corporate_affiliates, --noqa + md_sales.mydec_is_sale_between_related_individuals_or_corporate_affiliates, md_sales.mydec_is_transfer_of_less_than_100_percent_interest, md_sales.mydec_is_court_ordered_sale, md_sales.mydec_is_sale_in_lieu_of_foreclosure, @@ -377,7 +266,7 @@ combined_sales AS ( md_sales.mydec_is_bank_reo_real_estate_owned, md_sales.mydec_is_auction_sale, md_sales.mydec_is_seller_buyer_a_relocation_company, - md_sales.mydec_is_seller_buyer_a_financial_institution_or_government_agency, --noqa + md_sales.mydec_is_seller_buyer_a_financial_institution_or_government_agency, md_sales.mydec_is_buyer_a_real_estate_investment_trust, md_sales.mydec_is_buyer_a_pension_fund, md_sales.mydec_is_buyer_an_adjacent_property_owner, @@ -393,25 +282,54 @@ combined_sales AS ( LEFT JOIN town_class AS tc ON COALESCE(uq_sales.pin, md_sales.pin) = tc.parid AND COALESCE(uq_sales.year, md_sales.year) = tc.taxyr +), + +combined_sales AS ( + SELECT + cte_sales.*, + + -- Simplify 'sale_filter_same_sale_within_365' using precomputed columns + CASE + WHEN LAG(sale_date_coalesced) OVER ( + PARTITION BY pin_coalesced, sale_price_coalesced + ORDER BY sale_date_coalesced ASC + ) IS NOT NULL + THEN + (sale_date_coalesced - LAG(sale_date_coalesced) OVER ( + PARTITION BY pin_coalesced, sale_price_coalesced + ORDER BY sale_date_coalesced ASC + )) <= 365 + ELSE FALSE + END AS sale_filter_same_sale_within_365, + + -- Compute 'sale_filter_less_than_10k' + (sale_price_coalesced <= 10000) AS sale_filter_less_than_10k, + + -- Compute 'sale_filter_deed_type' + ( + deed_type_coalesced IN ('03', '04', '06') + OR deed_type_coalesced IS NULL + ) AS sale_filter_deed_type + FROM cte_sales ) SELECT - combined_sales.pin, - combined_sales.year, - combined_sales.township_code, - combined_sales.nbhd, - combined_sales.class, - combined_sales.sale_date, + combined_sales.pin_coalesced AS pin, + combined_sales.year_coalesced AS year, + combined_sales.township_code_coalesced AS township_code, + combined_sales.nbhd_coalesced AS nbhd, + combined_sales.class_coalesced AS class, + combined_sales.sale_date_coalesced AS sale_date, combined_sales.is_mydec_date, - combined_sales.sale_price, + combined_sales.sale_price_coalesced AS sale_price, combined_sales.sale_key, - combined_sales.doc_no, - combined_sales.deed_type, - combined_sales.seller_name, - combined_sales.is_multisale, - combined_sales.num_parcels_sale, - combined_sales.buyer_name, - combined_sales.sale_type, + combined_sales.doc_no_coalesced AS doc_no, + combined_sales.deed_type_coalesced AS deed_type, + combined_sales.seller_name_coalesced AS seller_name, + combined_sales.is_multisale_coalesced AS is_multisale, + combined_sales.num_parcels_sale_coalesced AS num_parcels_sale, + combined_sales.buyer_name_coalesced AS buyer_name, + combined_sales.sale_type_coalesced AS sale_type, combined_sales.sale_filter_same_sale_within_365, combined_sales.sale_filter_less_than_10k, combined_sales.sale_filter_deed_type, @@ -420,7 +338,7 @@ SELECT combined_sales.sale_filter_ptax_flag, combined_sales.mydec_property_advertised, combined_sales.mydec_is_installment_contract_fulfilled, - combined_sales.mydec_is_sale_between_related_individuals_or_corporate_affiliates, --noqa + combined_sales.mydec_is_sale_between_related_individuals_or_corporate_affiliates, combined_sales.mydec_is_transfer_of_less_than_100_percent_interest, combined_sales.mydec_is_court_ordered_sale, combined_sales.mydec_is_sale_in_lieu_of_foreclosure, @@ -429,7 +347,7 @@ SELECT combined_sales.mydec_is_bank_reo_real_estate_owned, combined_sales.mydec_is_auction_sale, combined_sales.mydec_is_seller_buyer_a_relocation_company, - combined_sales.mydec_is_seller_buyer_a_financial_institution_or_government_agency, --noqa + combined_sales.mydec_is_seller_buyer_a_financial_institution_or_government_agency, combined_sales.mydec_is_buyer_a_real_estate_investment_trust, combined_sales.mydec_is_buyer_a_pension_fund, combined_sales.mydec_is_buyer_an_adjacent_property_owner, @@ -451,4 +369,4 @@ SELECT combined_sales.source FROM combined_sales LEFT JOIN sales_val - ON combined_sales.doc_no = sales_val.meta_sale_document_num; + ON combined_sales.doc_no_coalesced = sales_val.meta_sale_document_num; From a41fa49d16b4c97c9e9adb8bf0241c61d00c42c4 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 25 Sep 2024 16:58:19 +0000 Subject: [PATCH 048/126] Try a refactor for 365 calc --- dbt/models/default/default.vw_pin_sale.sql | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index c74eee6f3..fa5de7b72 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -38,7 +38,11 @@ unique_sales AS ( SELECT *, COALESCE( - EXTRACT(DAY FROM sale_date - same_price_earlier_date) <= 365, + DATE_DIFF( + 'day', + same_price_earlier_date, + sale_date + ) <= 365, FALSE ) AS sale_filter_same_sale_within_365 FROM ( @@ -288,17 +292,21 @@ combined_sales AS ( SELECT cte_sales.*, - -- Simplify 'sale_filter_same_sale_within_365' using precomputed columns + -- Simplify 'sale_filter_same_sale_within_365' using DATE_DIFF CASE WHEN LAG(sale_date_coalesced) OVER ( PARTITION BY pin_coalesced, sale_price_coalesced ORDER BY sale_date_coalesced ASC ) IS NOT NULL THEN - (sale_date_coalesced - LAG(sale_date_coalesced) OVER ( - PARTITION BY pin_coalesced, sale_price_coalesced - ORDER BY sale_date_coalesced ASC - )) <= 365 + DATE_DIFF( + 'day', + LAG(sale_date_coalesced) OVER ( + PARTITION BY pin_coalesced, sale_price_coalesced + ORDER BY sale_date_coalesced ASC + ), + sale_date_coalesced + ) <= 365 ELSE FALSE END AS sale_filter_same_sale_within_365, From 6924b06c35e3a03b47a2f5d5883bce7c40c56606 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 25 Sep 2024 18:46:38 +0000 Subject: [PATCH 049/126] Start fixing sql format errors --- dbt/models/default/default.vw_pin_sale.sql | 193 ++++++++++++--------- 1 file changed, 115 insertions(+), 78 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index fa5de7b72..6dbf0c9c1 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -84,6 +84,14 @@ unique_sales AS ( sales.instrtyp NOT IN ('03', '04', '06') ORDER BY sales.price DESC, sales.salekey ASC ) AS max_price, + -- We remove the letter 'D' that trails some document numbers in + -- iasworld.sales since it prevents us from joining to mydec sales. + -- This creates one instance where we have duplicate document + -- numbers, so we sort by sale date (specifically to avoid conflicts + -- with detecting the easliest duplicate sale when there are + -- multiple within one document number, within a year) within the + -- new doument number to identify and remove the sale causing the + -- duplicate document number. ROW_NUMBER() OVER ( PARTITION BY NULLIF(REPLACE(sales.instruno, 'D', ''), ''), @@ -91,6 +99,11 @@ unique_sales AS ( sales.price > 10000 ORDER BY sales.saledt ASC, sales.salekey ASC ) AS bad_doc_no, + -- Some pins sell for the exact same price a few months after + -- they're sold (we need to make sure to only include deed types we + -- want). These sales are unecessary for modeling and may be + -- duplicates. We need to order by salekey as well in case of any + -- ties within price, date, and pin. LAG(DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d')) OVER ( PARTITION BY sales.parid, @@ -98,6 +111,11 @@ unique_sales AS ( sales.instrtyp NOT IN ('03', '04', '06') ORDER BY sales.saledt ASC, sales.salekey ASC ) AS same_price_earlier_date, + -- Historically, this view filtered out sales less than $10k and + -- as well as quit claims, executor deeds, beneficial interests, + -- and NULL deed types. Now we create "legacy" filter columns so + -- that this filtering can reproduced while still allowing all sales + -- into the view. sales.price <= 10000 AS sale_filter_less_than_10k, COALESCE( sales.instrtyp IN ('03', '04', '06') OR sales.instrtyp IS NULL, @@ -141,8 +159,8 @@ mydec_sales AS ( AS mydec_property_advertised, COALESCE(line_10a = 1, FALSE) AS mydec_is_installment_contract_fulfilled, - COALESCE(line_10b = 1, FALSE) - AS mydec_is_sale_between_related_individuals_or_corporate_affiliates, + COALESCE(line_10b = 1, FALSE) --noqa + AS mydec_is_sale_between_related_individuals_or_corporate_affiliates, --noqa COALESCE(line_10c = 1, FALSE) AS mydec_is_transfer_of_less_than_100_percent_interest, COALESCE(line_10d = 1, FALSE) @@ -160,7 +178,7 @@ mydec_sales AS ( COALESCE(line_10j = 1, FALSE) AS mydec_is_seller_buyer_a_relocation_company, COALESCE(line_10k = 1, FALSE) - AS mydec_is_seller_buyer_a_financial_institution_or_government_agency, + AS mydec_is_seller_buyer_a_financial_institution_or_government_agency, --noqa COALESCE(line_10l = 1, FALSE) AS mydec_is_buyer_a_real_estate_investment_trust, COALESCE(line_10m = 1, FALSE) @@ -194,6 +212,9 @@ mydec_sales AS ( FROM {{ source('sale', 'mydec') }} WHERE line_2_total_parcels = 1 ) + /* Some sales in mydec have multiple rows for one pin on a given sale date. + Sometimes they have different dates than iasworld prior to 2021 and when + joined back onto unique_sales will create duplicates by pin/sale date. */ WHERE num_single_day_sales = 1 OR (YEAR(sale_date) > 2020) ), @@ -230,11 +251,14 @@ cte_sales AS ( -- Precompute coalesced columns COALESCE(uq_sales.pin, md_sales.pin) AS pin_coalesced, COALESCE(uq_sales.year, md_sales.year) AS year_coalesced, - COALESCE(uq_sales.township_code, tc.township_code) AS township_code_coalesced, + COALESCE(uq_sales.township_code, tc.township_code) + AS township_code_coalesced, --noqa COALESCE(uq_sales.nbhd, tc.nbhd) AS nbhd_coalesced, COALESCE(uq_sales.class, tc.class) AS class_coalesced, - CASE - WHEN uq_sales.year < '2021' THEN COALESCE(md_sales.sale_date, uq_sales.sale_date) + CASE --noqa + WHEN + uq_sales.year < '2021' + THEN COALESCE(md_sales.sale_date, uq_sales.sale_date) ELSE COALESCE(uq_sales.sale_date, md_sales.sale_date) END AS sale_date_coalesced, CASE @@ -242,26 +266,33 @@ cte_sales AS ( AND md_sales.sale_date IS NOT NULL THEN TRUE WHEN (uq_sales.year >= '2021' OR md_sales.sale_date IS NULL) - AND uq_sales.sale_date IS NOT NULL + AND uq_sales.sale_date IS NOT NULL --noqa THEN FALSE END AS is_mydec_date, - COALESCE(uq_sales.sale_price, md_sales.sale_price) AS sale_price_coalesced, + COALESCE(uq_sales.sale_price, md_sales.sale_price) + AS sale_price_coalesced, --noqa uq_sales.sale_key, - COALESCE(uq_sales.doc_no, md_sales.doc_no) AS doc_no_coalesced, - COALESCE(uq_sales.deed_type, md_sales.mydec_deed_type) AS deed_type_coalesced, - COALESCE(uq_sales.seller_name, md_sales.seller_name) AS seller_name_coalesced, - COALESCE(uq_sales.is_multisale, md_sales.is_multisale) AS is_multisale_coalesced, - COALESCE(uq_sales.num_parcels_sale, md_sales.num_parcels_sale) AS num_parcels_sale_coalesced, - COALESCE(uq_sales.buyer_name, md_sales.buyer_name) AS buyer_name_coalesced, + COALESCE(uq_sales.doc_no, md_sales.doc_no) AS doc_no_coalesced, --noqa + COALESCE(uq_sales.deed_type, md_sales.mydec_deed_type) + AS deed_type_coalesced, + COALESCE(uq_sales.seller_name, md_sales.seller_name) + AS seller_name_coalesced, + COALESCE(uq_sales.is_multisale, md_sales.is_multisale) + AS is_multisale_coalesced, + COALESCE(uq_sales.num_parcels_sale, md_sales.num_parcels_sale) + AS num_parcels_sale_coalesced, + COALESCE(uq_sales.buyer_name, md_sales.buyer_name) + AS buyer_name_coalesced, COALESCE(uq_sales.sale_type, NULL) AS sale_type_coalesced, uq_sales.max_price, uq_sales.bad_doc_no, - CASE WHEN uq_sales.doc_no IS NOT NULL THEN 'iasworld' ELSE 'mydec' END AS source, + CASE WHEN uq_sales.doc_no IS NOT NULL THEN 'iasworld' ELSE 'mydec' END + AS source, md_sales.mydec_deed_type, md_sales.sale_filter_ptax_flag, md_sales.mydec_property_advertised, md_sales.mydec_is_installment_contract_fulfilled, - md_sales.mydec_is_sale_between_related_individuals_or_corporate_affiliates, + md_sales.mydec_is_sale_between_related_individuals_or_corporate_affiliates, --noqa md_sales.mydec_is_transfer_of_less_than_100_percent_interest, md_sales.mydec_is_court_ordered_sale, md_sales.mydec_is_sale_in_lieu_of_foreclosure, @@ -270,7 +301,7 @@ cte_sales AS ( md_sales.mydec_is_bank_reo_real_estate_owned, md_sales.mydec_is_auction_sale, md_sales.mydec_is_seller_buyer_a_relocation_company, - md_sales.mydec_is_seller_buyer_a_financial_institution_or_government_agency, + md_sales.mydec_is_seller_buyer_a_financial_institution_or_government_agency, --noqa md_sales.mydec_is_buyer_a_real_estate_investment_trust, md_sales.mydec_is_buyer_a_pension_fund, md_sales.mydec_is_buyer_an_adjacent_property_owner, @@ -288,84 +319,90 @@ cte_sales AS ( AND COALESCE(uq_sales.year, md_sales.year) = tc.taxyr ), +-- Handle various filters combined_sales AS ( SELECT - cte_sales.*, + cte_s.*, - -- Simplify 'sale_filter_same_sale_within_365' using DATE_DIFF + -- Calculate 'sale_filter_same_sale_within_365' using DATE_DIFF CASE - WHEN LAG(sale_date_coalesced) OVER ( - PARTITION BY pin_coalesced, sale_price_coalesced - ORDER BY sale_date_coalesced ASC - ) IS NOT NULL - THEN + WHEN LAG(cte_s.sale_date_coalesced) OVER ( + PARTITION BY cte_s.pin_coalesced, cte_s.sale_price_coalesced + ORDER BY cte_s.sale_date_coalesced ASC + ) IS NOT NULL + THEN DATE_DIFF( 'day', - LAG(sale_date_coalesced) OVER ( - PARTITION BY pin_coalesced, sale_price_coalesced - ORDER BY sale_date_coalesced ASC + LAG(cte_s.sale_date_coalesced) OVER ( + PARTITION BY + cte_s.pin_coalesced, cte_s.sale_price_coalesced + ORDER BY cte_s.sale_date_coalesced ASC ), - sale_date_coalesced + cte_s.sale_date_coalesced ) <= 365 ELSE FALSE END AS sale_filter_same_sale_within_365, -- Compute 'sale_filter_less_than_10k' - (sale_price_coalesced <= 10000) AS sale_filter_less_than_10k, + (cte_s.sale_price_coalesced <= 10000) AS sale_filter_less_than_10k, -- Compute 'sale_filter_deed_type' ( - deed_type_coalesced IN ('03', '04', '06') - OR deed_type_coalesced IS NULL + cte_s.deed_type_coalesced IN ('03', '04', '06') + OR cte_s.deed_type_coalesced IS NULL ) AS sale_filter_deed_type - FROM cte_sales + FROM cte_sales AS cte_s ) SELECT - combined_sales.pin_coalesced AS pin, - combined_sales.year_coalesced AS year, - combined_sales.township_code_coalesced AS township_code, - combined_sales.nbhd_coalesced AS nbhd, - combined_sales.class_coalesced AS class, - combined_sales.sale_date_coalesced AS sale_date, - combined_sales.is_mydec_date, - combined_sales.sale_price_coalesced AS sale_price, - combined_sales.sale_key, - combined_sales.doc_no_coalesced AS doc_no, - combined_sales.deed_type_coalesced AS deed_type, - combined_sales.seller_name_coalesced AS seller_name, - combined_sales.is_multisale_coalesced AS is_multisale, - combined_sales.num_parcels_sale_coalesced AS num_parcels_sale, - combined_sales.buyer_name_coalesced AS buyer_name, - combined_sales.sale_type_coalesced AS sale_type, - combined_sales.sale_filter_same_sale_within_365, - combined_sales.sale_filter_less_than_10k, - combined_sales.sale_filter_deed_type, + cs.pin_coalesced AS pin, + cs.year_coalesced AS year, + cs.township_code_coalesced AS township_code, + cs.nbhd_coalesced AS nbhd, + cs.class_coalesced AS class, + cs.sale_date_coalesced AS sale_date, + cs.is_mydec_date, + cs.sale_price_coalesced AS sale_price, + cs.sale_key, + cs.doc_no_coalesced AS doc_no, + cs.deed_type_coalesced AS deed_type, + cs.seller_name_coalesced AS seller_name, + cs.is_multisale_coalesced AS is_multisale, + cs.num_parcels_sale_coalesced AS num_parcels_sale, + cs.buyer_name_coalesced AS buyer_name, + cs.sale_type_coalesced AS sale_type, + cs.sale_filter_same_sale_within_365, + cs.sale_filter_less_than_10k, + cs.sale_filter_deed_type, + -- Our sales validation pipeline only validates sales past 2014 due to MyDec + -- limitations. Previous to that values for sv_is_outlier will be NULL, so + -- if we want to both exclude detected outliers and include sales prior to + -- 2014, we need to code everything NULL as FALSE. COALESCE(sales_val.sv_is_outlier, FALSE) AS sale_filter_is_outlier, - combined_sales.mydec_deed_type, - combined_sales.sale_filter_ptax_flag, - combined_sales.mydec_property_advertised, - combined_sales.mydec_is_installment_contract_fulfilled, - combined_sales.mydec_is_sale_between_related_individuals_or_corporate_affiliates, - combined_sales.mydec_is_transfer_of_less_than_100_percent_interest, - combined_sales.mydec_is_court_ordered_sale, - combined_sales.mydec_is_sale_in_lieu_of_foreclosure, - combined_sales.mydec_is_condemnation, - combined_sales.mydec_is_short_sale, - combined_sales.mydec_is_bank_reo_real_estate_owned, - combined_sales.mydec_is_auction_sale, - combined_sales.mydec_is_seller_buyer_a_relocation_company, - combined_sales.mydec_is_seller_buyer_a_financial_institution_or_government_agency, - combined_sales.mydec_is_buyer_a_real_estate_investment_trust, - combined_sales.mydec_is_buyer_a_pension_fund, - combined_sales.mydec_is_buyer_an_adjacent_property_owner, - combined_sales.mydec_is_buyer_exercising_an_option_to_purchase, - combined_sales.mydec_is_simultaneous_trade_of_property, - combined_sales.mydec_is_sale_leaseback, - combined_sales.mydec_is_homestead_exemption, - combined_sales.mydec_homestead_exemption_general_alternative, - combined_sales.mydec_homestead_exemption_senior_citizens, - combined_sales.mydec_homestead_exemption_senior_citizens_assessment_freeze, + cs.mydec_deed_type, + cs.sale_filter_ptax_flag, + cs.mydec_property_advertised, + cs.mydec_is_installment_contract_fulfilled, + cs.mydec_is_sale_between_related_individuals_or_corporate_affiliates, --noqa + cs.mydec_is_transfer_of_less_than_100_percent_interest, + cs.mydec_is_court_ordered_sale, + cs.mydec_is_sale_in_lieu_of_foreclosure, + cs.mydec_is_condemnation, + cs.mydec_is_short_sale, + cs.mydec_is_bank_reo_real_estate_owned, + cs.mydec_is_auction_sale, + cs.mydec_is_seller_buyer_a_relocation_company, + cs.mydec_is_seller_buyer_a_financial_institution_or_government_agency, --noqa + cs.mydec_is_buyer_a_real_estate_investment_trust, + cs.mydec_is_buyer_a_pension_fund, + cs.mydec_is_buyer_an_adjacent_property_owner, + cs.mydec_is_buyer_exercising_an_option_to_purchase, + cs.mydec_is_simultaneous_trade_of_property, + cs.mydec_is_sale_leaseback, + cs.mydec_is_homestead_exemption, + cs.mydec_homestead_exemption_general_alternative, + cs.mydec_homestead_exemption_senior_citizens, + cs.mydec_homestead_exemption_senior_citizens_assessment_freeze, sales_val.sv_is_outlier, sales_val.sv_is_ptax_outlier, sales_val.sv_is_heuristic_outlier, @@ -374,7 +411,7 @@ SELECT sales_val.sv_outlier_reason3, sales_val.sv_run_id, sales_val.sv_version, - combined_sales.source -FROM combined_sales + cs.source +FROM combined_sales AS cs LEFT JOIN sales_val - ON combined_sales.doc_no_coalesced = sales_val.meta_sale_document_num; + ON cs.doc_no_coalesced = sales_val.meta_sale_document_num; From 9a1f8c25b5a82c59246cb705f02a125785444294 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 25 Sep 2024 18:47:41 +0000 Subject: [PATCH 050/126] Start fixing sql format errors --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 6dbf0c9c1..b3be732f3 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -334,7 +334,7 @@ combined_sales AS ( DATE_DIFF( 'day', LAG(cte_s.sale_date_coalesced) OVER ( - PARTITION BY + PARTITION BY --noqa cte_s.pin_coalesced, cte_s.sale_price_coalesced ORDER BY cte_s.sale_date_coalesced ASC ), From 251bb65a0ee73e5e94b089186aa0ed08b70c8247 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 30 Sep 2024 17:16:58 +0000 Subject: [PATCH 051/126] Try adding deed type partition --- dbt/models/default/default.vw_pin_sale.sql | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index b3be732f3..48a651648 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -327,7 +327,10 @@ combined_sales AS ( -- Calculate 'sale_filter_same_sale_within_365' using DATE_DIFF CASE WHEN LAG(cte_s.sale_date_coalesced) OVER ( - PARTITION BY cte_s.pin_coalesced, cte_s.sale_price_coalesced + PARTITION BY + cte_s.pin_coalesced, + cte_s.sale_price_coalesced, + cte_s.instrtyp NOT IN ('03', '04', '06') ORDER BY cte_s.sale_date_coalesced ASC ) IS NOT NULL THEN From 2c8a9c8cc37fc0e8887855cacea0c0ca0bf8e06c Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 30 Sep 2024 18:51:56 +0000 Subject: [PATCH 052/126] Try fixing deed type --- dbt/models/default/default.vw_pin_sale.sql | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 48a651648..ed9779a4d 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -275,6 +275,7 @@ cte_sales AS ( COALESCE(uq_sales.doc_no, md_sales.doc_no) AS doc_no_coalesced, --noqa COALESCE(uq_sales.deed_type, md_sales.mydec_deed_type) AS deed_type_coalesced, + uq_sales.deed_type_ias, COALESCE(uq_sales.seller_name, md_sales.seller_name) AS seller_name_coalesced, COALESCE(uq_sales.is_multisale, md_sales.is_multisale) @@ -330,7 +331,7 @@ combined_sales AS ( PARTITION BY cte_s.pin_coalesced, cte_s.sale_price_coalesced, - cte_s.instrtyp NOT IN ('03', '04', '06') + cte_s.deed_type_ias NOT IN ('03', '04', '06') ORDER BY cte_s.sale_date_coalesced ASC ) IS NOT NULL THEN @@ -338,8 +339,10 @@ combined_sales AS ( 'day', LAG(cte_s.sale_date_coalesced) OVER ( PARTITION BY --noqa - cte_s.pin_coalesced, cte_s.sale_price_coalesced - ORDER BY cte_s.sale_date_coalesced ASC + cte_s.pin_coalesced, + cte_s.sale_price_coalesced, + cte_s.instrtyp NOT IN ('03', '04', '06') + ORDER BY cte_s.deed_type_ias ASC ), cte_s.sale_date_coalesced ) <= 365 From 8fb9764cee62d14d1913539e15c0f66161cbbb05 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 30 Sep 2024 19:03:38 +0000 Subject: [PATCH 053/126] Try fixing 365 --- dbt/models/default/default.vw_pin_sale.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index ed9779a4d..d61acb47d 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -54,7 +54,7 @@ unique_sales AS ( tc.class, DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') AS sale_date, CAST(sales.price AS BIGINT) AS sale_price, - sales.salekey AS sale_key, + sales.salekey AS salekey, NULLIF(REPLACE(sales.instruno, 'D', ''), '') AS doc_no, NULLIF(sales.instrtyp, '') AS deed_type, COALESCE( @@ -332,7 +332,7 @@ combined_sales AS ( cte_s.pin_coalesced, cte_s.sale_price_coalesced, cte_s.deed_type_ias NOT IN ('03', '04', '06') - ORDER BY cte_s.sale_date_coalesced ASC + ORDER BY cte_s.sale_date_coalesced ASC, cte_s.salekey ASC ) IS NOT NULL THEN DATE_DIFF( From 33b5bdbfe8e38444379b7911e6cff6a80a1d3aae Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 30 Sep 2024 19:10:15 +0000 Subject: [PATCH 054/126] Revert sale key change --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index d61acb47d..567d592ee 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -54,7 +54,7 @@ unique_sales AS ( tc.class, DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') AS sale_date, CAST(sales.price AS BIGINT) AS sale_price, - sales.salekey AS salekey, + sales.salekey AS sale_key, NULLIF(REPLACE(sales.instruno, 'D', ''), '') AS doc_no, NULLIF(sales.instrtyp, '') AS deed_type, COALESCE( From 62bb6a1baae3e2b59d85236735c02287c85841b4 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 30 Sep 2024 19:31:04 +0000 Subject: [PATCH 055/126] Revert sale key change --- dbt/models/default/default.vw_pin_sale.sql | 50 +++++++++++++--------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 567d592ee..36a0e0eee 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -327,27 +327,35 @@ combined_sales AS ( -- Calculate 'sale_filter_same_sale_within_365' using DATE_DIFF CASE - WHEN LAG(cte_s.sale_date_coalesced) OVER ( - PARTITION BY - cte_s.pin_coalesced, - cte_s.sale_price_coalesced, - cte_s.deed_type_ias NOT IN ('03', '04', '06') - ORDER BY cte_s.sale_date_coalesced ASC, cte_s.salekey ASC - ) IS NOT NULL - THEN - DATE_DIFF( - 'day', - LAG(cte_s.sale_date_coalesced) OVER ( - PARTITION BY --noqa - cte_s.pin_coalesced, - cte_s.sale_price_coalesced, - cte_s.instrtyp NOT IN ('03', '04', '06') - ORDER BY cte_s.deed_type_ias ASC - ), - cte_s.sale_date_coalesced - ) <= 365 - ELSE FALSE - END AS sale_filter_same_sale_within_365, + WHEN cs.source = 'iasworld' THEN + CASE + WHEN LAG(cs.sale_date_coalesced) OVER ( + PARTITION BY + cs.pin_coalesced, + cs.sale_price_coalesced, + cs.deed_type_ias NOT IN ('03', '04', '06'), + cs.source + ORDER BY cs.sale_date_coalesced ASC, cs.salekey ASC + ) IS NOT NULL + THEN + DATE_DIFF( + 'day', + LAG(cs.sale_date_coalesced) OVER ( + PARTITION BY + cs.pin_coalesced, + cs.sale_price_coalesced, + cs.deed_type_ias NOT IN ('03', '04', '06'), + cs.source + ORDER BY cs.sale_date_coalesced ASC, cs.salekey ASC + ), + cs.sale_date_coalesced + ) <= 365 + ELSE FALSE + END + ELSE + -- Original calculation or appropriate logic for other sources + END AS sale_filter_same_sale_within_365 + -- Compute 'sale_filter_less_than_10k' (cte_s.sale_price_coalesced <= 10000) AS sale_filter_less_than_10k, From 1a447f78fe71d1c815d41d3dbbb77582553477a3 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 30 Sep 2024 19:41:37 +0000 Subject: [PATCH 056/126] Revert sale key change --- dbt/models/default/default.vw_pin_sale.sql | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 36a0e0eee..0cd5df53c 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -354,8 +354,7 @@ combined_sales AS ( END ELSE -- Original calculation or appropriate logic for other sources - END AS sale_filter_same_sale_within_365 - + END AS sale_filter_same_sale_within_365, -- Compute 'sale_filter_less_than_10k' (cte_s.sale_price_coalesced <= 10000) AS sale_filter_less_than_10k, From fb481a3aeba3693ac6d05c3dc9a61d8fbece6ddf Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 30 Sep 2024 19:54:57 +0000 Subject: [PATCH 057/126] Correct cte calls --- dbt/models/default/default.vw_pin_sale.sql | 34 ++++++++++++---------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 0cd5df53c..8541da817 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -327,33 +327,34 @@ combined_sales AS ( -- Calculate 'sale_filter_same_sale_within_365' using DATE_DIFF CASE - WHEN cs.source = 'iasworld' THEN + WHEN cte_s.source = 'iasworld' THEN CASE - WHEN LAG(cs.sale_date_coalesced) OVER ( + WHEN LAG(cte_s.sale_date_coalesced) OVER ( PARTITION BY - cs.pin_coalesced, - cs.sale_price_coalesced, - cs.deed_type_ias NOT IN ('03', '04', '06'), - cs.source - ORDER BY cs.sale_date_coalesced ASC, cs.salekey ASC + cte_s.pin_coalesced, + cte_s.sale_price_coalesced, + cte_s.deed_type_ias NOT IN ('03', '04', '06'), + cte_s.source + ORDER BY cte_s.sale_date_coalesced ASC, cte_s.salekey ASC ) IS NOT NULL - THEN + THEN DATE_DIFF( 'day', - LAG(cs.sale_date_coalesced) OVER ( + LAG(cte_s.sale_date_coalesced) OVER ( PARTITION BY - cs.pin_coalesced, - cs.sale_price_coalesced, - cs.deed_type_ias NOT IN ('03', '04', '06'), - cs.source - ORDER BY cs.sale_date_coalesced ASC, cs.salekey ASC + cte_s.pin_coalesced, + cte_s.sale_price_coalesced, + cte_s.deed_type_ias NOT IN ('03', '04', '06'), + cte_s.source + ORDER BY cte_s.sale_date_coalesced ASC, cte_s.salekey ASC ), - cs.sale_date_coalesced + cte_s.sale_date_coalesced ) <= 365 ELSE FALSE END ELSE - -- Original calculation or appropriate logic for other sources + -- For other sources, default to FALSE or use appropriate logic + FALSE END AS sale_filter_same_sale_within_365, -- Compute 'sale_filter_less_than_10k' @@ -367,6 +368,7 @@ combined_sales AS ( FROM cte_sales AS cte_s ) + SELECT cs.pin_coalesced AS pin, cs.year_coalesced AS year, From a5f7004b75c0c3921d599ab40a9ca25acfa77750 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 30 Sep 2024 20:02:19 +0000 Subject: [PATCH 058/126] Correct column name alias --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 8541da817..e9380ecc8 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -275,7 +275,7 @@ cte_sales AS ( COALESCE(uq_sales.doc_no, md_sales.doc_no) AS doc_no_coalesced, --noqa COALESCE(uq_sales.deed_type, md_sales.mydec_deed_type) AS deed_type_coalesced, - uq_sales.deed_type_ias, + uq_sales.instrtyp as deed_type_ias, COALESCE(uq_sales.seller_name, md_sales.seller_name) AS seller_name_coalesced, COALESCE(uq_sales.is_multisale, md_sales.is_multisale) From a5d069e2d017f4d4d455b48f84d8fc44f9c0a73c Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 30 Sep 2024 20:11:46 +0000 Subject: [PATCH 059/126] Correct column name alias --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index e9380ecc8..9f2302c7c 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -275,7 +275,7 @@ cte_sales AS ( COALESCE(uq_sales.doc_no, md_sales.doc_no) AS doc_no_coalesced, --noqa COALESCE(uq_sales.deed_type, md_sales.mydec_deed_type) AS deed_type_coalesced, - uq_sales.instrtyp as deed_type_ias, + uq_sales.deed_type as deed_type_ias, COALESCE(uq_sales.seller_name, md_sales.seller_name) AS seller_name_coalesced, COALESCE(uq_sales.is_multisale, md_sales.is_multisale) From 78fc25d3f15f62dc9638eefc2d5a9c44f2277c8f Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 30 Sep 2024 20:17:32 +0000 Subject: [PATCH 060/126] Correct column name alias --- dbt/models/default/default.vw_pin_sale.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 9f2302c7c..e79e71aec 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -335,7 +335,7 @@ combined_sales AS ( cte_s.sale_price_coalesced, cte_s.deed_type_ias NOT IN ('03', '04', '06'), cte_s.source - ORDER BY cte_s.sale_date_coalesced ASC, cte_s.salekey ASC + ORDER BY cte_s.sale_date_coalesced ASC, cte_s.sale_key ASC ) IS NOT NULL THEN DATE_DIFF( @@ -346,7 +346,7 @@ combined_sales AS ( cte_s.sale_price_coalesced, cte_s.deed_type_ias NOT IN ('03', '04', '06'), cte_s.source - ORDER BY cte_s.sale_date_coalesced ASC, cte_s.salekey ASC + ORDER BY cte_s.sale_date_coalesced ASC, cte_s.sale_key ASC ), cte_s.sale_date_coalesced ) <= 365 From 19fd594d3d021250d3a313c83d827c6c7f010a92 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 30 Sep 2024 20:36:55 +0000 Subject: [PATCH 061/126] Try making less than equal to or less than --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index e79e71aec..4d5457227 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -257,7 +257,7 @@ cte_sales AS ( COALESCE(uq_sales.class, tc.class) AS class_coalesced, CASE --noqa WHEN - uq_sales.year < '2021' + uq_sales.year <= '2021' THEN COALESCE(md_sales.sale_date, uq_sales.sale_date) ELSE COALESCE(uq_sales.sale_date, md_sales.sale_date) END AS sale_date_coalesced, From a1227b33a623f5350d18f2ac586dd8cd1b0cc480 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 30 Sep 2024 20:43:04 +0000 Subject: [PATCH 062/126] Revert change --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 4d5457227..e79e71aec 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -257,7 +257,7 @@ cte_sales AS ( COALESCE(uq_sales.class, tc.class) AS class_coalesced, CASE --noqa WHEN - uq_sales.year <= '2021' + uq_sales.year < '2021' THEN COALESCE(md_sales.sale_date, uq_sales.sale_date) ELSE COALESCE(uq_sales.sale_date, md_sales.sale_date) END AS sale_date_coalesced, From a69042f5f02ac10c235a8dbab86d231e18823279 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 1 Oct 2024 18:41:31 +0000 Subject: [PATCH 063/126] Remove iasworld source filter on 365 calc --- dbt/models/default/default.vw_pin_sale.sql | 50 ++++++++++------------ 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index e79e71aec..9dd854541 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -327,34 +327,28 @@ combined_sales AS ( -- Calculate 'sale_filter_same_sale_within_365' using DATE_DIFF CASE - WHEN cte_s.source = 'iasworld' THEN - CASE - WHEN LAG(cte_s.sale_date_coalesced) OVER ( - PARTITION BY - cte_s.pin_coalesced, - cte_s.sale_price_coalesced, - cte_s.deed_type_ias NOT IN ('03', '04', '06'), - cte_s.source - ORDER BY cte_s.sale_date_coalesced ASC, cte_s.sale_key ASC - ) IS NOT NULL - THEN - DATE_DIFF( - 'day', - LAG(cte_s.sale_date_coalesced) OVER ( - PARTITION BY - cte_s.pin_coalesced, - cte_s.sale_price_coalesced, - cte_s.deed_type_ias NOT IN ('03', '04', '06'), - cte_s.source - ORDER BY cte_s.sale_date_coalesced ASC, cte_s.sale_key ASC - ), - cte_s.sale_date_coalesced - ) <= 365 - ELSE FALSE - END - ELSE - -- For other sources, default to FALSE or use appropriate logic - FALSE + WHEN LAG(cte_s.sale_date_coalesced) OVER ( + PARTITION BY + cte_s.pin_coalesced, + cte_s.sale_price_coalesced, + cte_s.deed_type_ias NOT IN ('03', '04', '06'), + cte_s.source + ORDER BY cte_s.sale_date_coalesced ASC, cte_s.sale_key ASC + ) IS NOT NULL + THEN + DATE_DIFF( + 'day', + LAG(cte_s.sale_date_coalesced) OVER ( + PARTITION BY + cte_s.pin_coalesced, + cte_s.sale_price_coalesced, + cte_s.deed_type_ias NOT IN ('03', '04', '06'), + cte_s.source + ORDER BY cte_s.sale_date_coalesced ASC, cte_s.sale_key ASC + ), + cte_s.sale_date_coalesced + ) <= 365 + ELSE FALSE END AS sale_filter_same_sale_within_365, -- Compute 'sale_filter_less_than_10k' From 22254411373a2b45cac6b918668db036dd050ec6 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 1 Oct 2024 19:01:04 +0000 Subject: [PATCH 064/126] Try another strategy --- dbt/models/default/default.vw_pin_sale.sql | 99 +++++++++------------- 1 file changed, 41 insertions(+), 58 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 9dd854541..090978462 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -88,9 +88,9 @@ unique_sales AS ( -- iasworld.sales since it prevents us from joining to mydec sales. -- This creates one instance where we have duplicate document -- numbers, so we sort by sale date (specifically to avoid conflicts - -- with detecting the easliest duplicate sale when there are + -- with detecting the earliest duplicate sale when there are -- multiple within one document number, within a year) within the - -- new doument number to identify and remove the sale causing the + -- new document number to identify and remove the sale causing the -- duplicate document number. ROW_NUMBER() OVER ( PARTITION BY @@ -101,7 +101,7 @@ unique_sales AS ( ) AS bad_doc_no, -- Some pins sell for the exact same price a few months after -- they're sold (we need to make sure to only include deed types we - -- want). These sales are unecessary for modeling and may be + -- want). These sales are unnecessary for modeling and may be -- duplicates. We need to order by salekey as well in case of any -- ties within price, date, and pin. LAG(DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d')) OVER ( @@ -114,7 +114,7 @@ unique_sales AS ( -- Historically, this view filtered out sales less than $10k and -- as well as quit claims, executor deeds, beneficial interests, -- and NULL deed types. Now we create "legacy" filter columns so - -- that this filtering can reproduced while still allowing all sales + -- that this filtering can be reproduced while still allowing all sales -- into the view. sales.price <= 10000 AS sale_filter_less_than_10k, COALESCE( @@ -159,8 +159,8 @@ mydec_sales AS ( AS mydec_property_advertised, COALESCE(line_10a = 1, FALSE) AS mydec_is_installment_contract_fulfilled, - COALESCE(line_10b = 1, FALSE) --noqa - AS mydec_is_sale_between_related_individuals_or_corporate_affiliates, --noqa + COALESCE(line_10b = 1, FALSE) + AS mydec_is_sale_between_related_individuals_or_corporate_affiliates, COALESCE(line_10c = 1, FALSE) AS mydec_is_transfer_of_less_than_100_percent_interest, COALESCE(line_10d = 1, FALSE) @@ -178,7 +178,7 @@ mydec_sales AS ( COALESCE(line_10j = 1, FALSE) AS mydec_is_seller_buyer_a_relocation_company, COALESCE(line_10k = 1, FALSE) - AS mydec_is_seller_buyer_a_financial_institution_or_government_agency, --noqa + AS mydec_is_seller_buyer_a_financial_institution_or_government_agency, COALESCE(line_10l = 1, FALSE) AS mydec_is_buyer_a_real_estate_investment_trust, COALESCE(line_10m = 1, FALSE) @@ -245,19 +245,17 @@ sales_val AS ( AND sf.version = mv.max_version ), --- Introducing cte_sales to precompute the coalesced values cte_sales AS ( SELECT -- Precompute coalesced columns COALESCE(uq_sales.pin, md_sales.pin) AS pin_coalesced, COALESCE(uq_sales.year, md_sales.year) AS year_coalesced, COALESCE(uq_sales.township_code, tc.township_code) - AS township_code_coalesced, --noqa + AS township_code_coalesced, COALESCE(uq_sales.nbhd, tc.nbhd) AS nbhd_coalesced, COALESCE(uq_sales.class, tc.class) AS class_coalesced, - CASE --noqa - WHEN - uq_sales.year < '2021' + CASE + WHEN uq_sales.year < '2021' THEN COALESCE(md_sales.sale_date, uq_sales.sale_date) ELSE COALESCE(uq_sales.sale_date, md_sales.sale_date) END AS sale_date_coalesced, @@ -266,16 +264,16 @@ cte_sales AS ( AND md_sales.sale_date IS NOT NULL THEN TRUE WHEN (uq_sales.year >= '2021' OR md_sales.sale_date IS NULL) - AND uq_sales.sale_date IS NOT NULL --noqa + AND uq_sales.sale_date IS NOT NULL THEN FALSE END AS is_mydec_date, COALESCE(uq_sales.sale_price, md_sales.sale_price) - AS sale_price_coalesced, --noqa + AS sale_price_coalesced, uq_sales.sale_key, - COALESCE(uq_sales.doc_no, md_sales.doc_no) AS doc_no_coalesced, --noqa + COALESCE(uq_sales.doc_no, md_sales.doc_no) AS doc_no_coalesced, COALESCE(uq_sales.deed_type, md_sales.mydec_deed_type) AS deed_type_coalesced, - uq_sales.deed_type as deed_type_ias, + uq_sales.deed_type AS deed_type_ias, COALESCE(uq_sales.seller_name, md_sales.seller_name) AS seller_name_coalesced, COALESCE(uq_sales.is_multisale, md_sales.is_multisale) @@ -293,7 +291,7 @@ cte_sales AS ( md_sales.sale_filter_ptax_flag, md_sales.mydec_property_advertised, md_sales.mydec_is_installment_contract_fulfilled, - md_sales.mydec_is_sale_between_related_individuals_or_corporate_affiliates, --noqa + md_sales.mydec_is_sale_between_related_individuals_or_corporate_affiliates, md_sales.mydec_is_transfer_of_less_than_100_percent_interest, md_sales.mydec_is_court_ordered_sale, md_sales.mydec_is_sale_in_lieu_of_foreclosure, @@ -302,7 +300,7 @@ cte_sales AS ( md_sales.mydec_is_bank_reo_real_estate_owned, md_sales.mydec_is_auction_sale, md_sales.mydec_is_seller_buyer_a_relocation_company, - md_sales.mydec_is_seller_buyer_a_financial_institution_or_government_agency, --noqa + md_sales.mydec_is_seller_buyer_a_financial_institution_or_government_agency, md_sales.mydec_is_buyer_a_real_estate_investment_trust, md_sales.mydec_is_buyer_a_pension_fund, md_sales.mydec_is_buyer_an_adjacent_property_owner, @@ -312,57 +310,42 @@ cte_sales AS ( md_sales.mydec_is_homestead_exemption, md_sales.mydec_homestead_exemption_general_alternative, md_sales.mydec_homestead_exemption_senior_citizens, - md_sales.mydec_homestead_exemption_senior_citizens_assessment_freeze + md_sales.mydec_homestead_exemption_senior_citizens_assessment_freeze, + -- Include sale_filter_same_sale_within_365 from unique_sales + uq_sales.sale_filter_same_sale_within_365, + uq_sales.sale_filter_less_than_10k, + uq_sales.sale_filter_deed_type FROM unique_sales AS uq_sales - FULL OUTER JOIN mydec_sales AS md_sales ON uq_sales.doc_no = md_sales.doc_no + FULL OUTER JOIN mydec_sales AS md_sales + ON uq_sales.doc_no = md_sales.doc_no LEFT JOIN town_class AS tc ON COALESCE(uq_sales.pin, md_sales.pin) = tc.parid AND COALESCE(uq_sales.year, md_sales.year) = tc.taxyr ), --- Handle various filters combined_sales AS ( SELECT cte_s.*, - - -- Calculate 'sale_filter_same_sale_within_365' using DATE_DIFF + -- Remove recalculation of sale_filter_same_sale_within_365 + -- Keep the filters calculated in unique_sales + -- Also ensure that sale_filter_same_sale_within_365 is not NULL + COALESCE(cte_s.sale_filter_same_sale_within_365, FALSE) + AS sale_filter_same_sale_within_365, + -- Use the filters from unique_sales or calculate for MyDec sales CASE - WHEN LAG(cte_s.sale_date_coalesced) OVER ( - PARTITION BY - cte_s.pin_coalesced, - cte_s.sale_price_coalesced, - cte_s.deed_type_ias NOT IN ('03', '04', '06'), - cte_s.source - ORDER BY cte_s.sale_date_coalesced ASC, cte_s.sale_key ASC - ) IS NOT NULL - THEN - DATE_DIFF( - 'day', - LAG(cte_s.sale_date_coalesced) OVER ( - PARTITION BY - cte_s.pin_coalesced, - cte_s.sale_price_coalesced, - cte_s.deed_type_ias NOT IN ('03', '04', '06'), - cte_s.source - ORDER BY cte_s.sale_date_coalesced ASC, cte_s.sale_key ASC - ), - cte_s.sale_date_coalesced - ) <= 365 - ELSE FALSE - END AS sale_filter_same_sale_within_365, - - -- Compute 'sale_filter_less_than_10k' - (cte_s.sale_price_coalesced <= 10000) AS sale_filter_less_than_10k, - - -- Compute 'sale_filter_deed_type' - ( - cte_s.deed_type_coalesced IN ('03', '04', '06') - OR cte_s.deed_type_coalesced IS NULL - ) AS sale_filter_deed_type + WHEN cte_s.source = 'iasworld' THEN cte_s.sale_filter_less_than_10k + ELSE (cte_s.sale_price_coalesced <= 10000) + END AS sale_filter_less_than_10k, + CASE + WHEN cte_s.source = 'iasworld' THEN cte_s.sale_filter_deed_type + ELSE ( + cte_s.deed_type_coalesced IN ('03', '04', '06') + OR cte_s.deed_type_coalesced IS NULL + ) + END AS sale_filter_deed_type FROM cte_sales AS cte_s ) - SELECT cs.pin_coalesced AS pin, cs.year_coalesced AS year, @@ -392,7 +375,7 @@ SELECT cs.sale_filter_ptax_flag, cs.mydec_property_advertised, cs.mydec_is_installment_contract_fulfilled, - cs.mydec_is_sale_between_related_individuals_or_corporate_affiliates, --noqa + cs.mydec_is_sale_between_related_individuals_or_corporate_affiliates, cs.mydec_is_transfer_of_less_than_100_percent_interest, cs.mydec_is_court_ordered_sale, cs.mydec_is_sale_in_lieu_of_foreclosure, @@ -401,7 +384,7 @@ SELECT cs.mydec_is_bank_reo_real_estate_owned, cs.mydec_is_auction_sale, cs.mydec_is_seller_buyer_a_relocation_company, - cs.mydec_is_seller_buyer_a_financial_institution_or_government_agency, --noqa + cs.mydec_is_seller_buyer_a_financial_institution_or_government_agency, cs.mydec_is_buyer_a_real_estate_investment_trust, cs.mydec_is_buyer_a_pension_fund, cs.mydec_is_buyer_an_adjacent_property_owner, From 77683f740f155b9d8c5882f6ed345d2baf3741c6 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 1 Oct 2024 19:08:53 +0000 Subject: [PATCH 065/126] Try another strategy --- dbt/models/default/default.vw_pin_sale.sql | 213 ++++++++++----------- 1 file changed, 100 insertions(+), 113 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 090978462..e16e8feb9 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -84,14 +84,6 @@ unique_sales AS ( sales.instrtyp NOT IN ('03', '04', '06') ORDER BY sales.price DESC, sales.salekey ASC ) AS max_price, - -- We remove the letter 'D' that trails some document numbers in - -- iasworld.sales since it prevents us from joining to mydec sales. - -- This creates one instance where we have duplicate document - -- numbers, so we sort by sale date (specifically to avoid conflicts - -- with detecting the earliest duplicate sale when there are - -- multiple within one document number, within a year) within the - -- new document number to identify and remove the sale causing the - -- duplicate document number. ROW_NUMBER() OVER ( PARTITION BY NULLIF(REPLACE(sales.instruno, 'D', ''), ''), @@ -99,11 +91,6 @@ unique_sales AS ( sales.price > 10000 ORDER BY sales.saledt ASC, sales.salekey ASC ) AS bad_doc_no, - -- Some pins sell for the exact same price a few months after - -- they're sold (we need to make sure to only include deed types we - -- want). These sales are unnecessary for modeling and may be - -- duplicates. We need to order by salekey as well in case of any - -- ties within price, date, and pin. LAG(DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d')) OVER ( PARTITION BY sales.parid, @@ -111,11 +98,6 @@ unique_sales AS ( sales.instrtyp NOT IN ('03', '04', '06') ORDER BY sales.saledt ASC, sales.salekey ASC ) AS same_price_earlier_date, - -- Historically, this view filtered out sales less than $10k and - -- as well as quit claims, executor deeds, beneficial interests, - -- and NULL deed types. Now we create "legacy" filter columns so - -- that this filtering can be reproduced while still allowing all sales - -- into the view. sales.price <= 10000 AS sale_filter_less_than_10k, COALESCE( sales.instrtyp IN ('03', '04', '06') OR sales.instrtyp IS NULL, @@ -143,80 +125,91 @@ unique_sales AS ( ), mydec_sales AS ( - SELECT * FROM ( - SELECT - REPLACE(document_number, 'D', '') AS doc_no, - REPLACE(line_1_primary_pin, '-', '') AS pin, - DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS sale_date, - SUBSTR(line_4_instrument_date, 1, 4) AS year, - line_5_instrument_type AS mydec_deed_type, - NULLIF(TRIM(seller_name), '') AS seller_name, - NULLIF(TRIM(buyer_name), '') AS buyer_name, - CAST(line_11_full_consideration AS BIGINT) AS sale_price, - line_2_total_parcels AS num_parcels_sale, - COALESCE(line_2_total_parcels > 1, FALSE) AS is_multisale, - COALESCE(line_7_property_advertised = 1, FALSE) - AS mydec_property_advertised, - COALESCE(line_10a = 1, FALSE) - AS mydec_is_installment_contract_fulfilled, - COALESCE(line_10b = 1, FALSE) - AS mydec_is_sale_between_related_individuals_or_corporate_affiliates, - COALESCE(line_10c = 1, FALSE) - AS mydec_is_transfer_of_less_than_100_percent_interest, - COALESCE(line_10d = 1, FALSE) - AS mydec_is_court_ordered_sale, - COALESCE(line_10e = 1, FALSE) - AS mydec_is_sale_in_lieu_of_foreclosure, - COALESCE(line_10f = 1, FALSE) - AS mydec_is_condemnation, - COALESCE(line_10g = 1, FALSE) - AS mydec_is_short_sale, - COALESCE(line_10h = 1, FALSE) - AS mydec_is_bank_reo_real_estate_owned, - COALESCE(line_10i = 1, FALSE) - AS mydec_is_auction_sale, - COALESCE(line_10j = 1, FALSE) - AS mydec_is_seller_buyer_a_relocation_company, - COALESCE(line_10k = 1, FALSE) - AS mydec_is_seller_buyer_a_financial_institution_or_government_agency, - COALESCE(line_10l = 1, FALSE) - AS mydec_is_buyer_a_real_estate_investment_trust, - COALESCE(line_10m = 1, FALSE) - AS mydec_is_buyer_a_pension_fund, - COALESCE(line_10n = 1, FALSE) - AS mydec_is_buyer_an_adjacent_property_owner, - COALESCE(line_10o = 1, FALSE) - AS mydec_is_buyer_exercising_an_option_to_purchase, - COALESCE(line_10p = 1, FALSE) - AS mydec_is_simultaneous_trade_of_property, - COALESCE(line_10q = 1, FALSE) - AS mydec_is_sale_leaseback, - COALESCE(line_10s = 1, FALSE) - AS mydec_is_homestead_exemption, - line_10s_generalalternative - AS mydec_homestead_exemption_general_alternative, - line_10s_senior_citizens - AS mydec_homestead_exemption_senior_citizens, - line_10s_senior_citizens_assessment_freeze - AS mydec_homestead_exemption_senior_citizens_assessment_freeze, - ( - COALESCE(line_10b, 0) + COALESCE(line_10c, 0) - + COALESCE(line_10d, 0) + COALESCE(line_10e, 0) - + COALESCE(line_10f, 0) + COALESCE(line_10g, 0) - + COALESCE(line_10h, 0) + COALESCE(line_10i, 0) - + COALESCE(line_10k, 0) - ) > 0 AS sale_filter_ptax_flag, - COUNT() OVER ( - PARTITION BY line_1_primary_pin, line_4_instrument_date - ) AS num_single_day_sales - FROM {{ source('sale', 'mydec') }} - WHERE line_2_total_parcels = 1 + SELECT *, + COALESCE( + DATE_DIFF( + 'day', + LAG(sale_date) OVER ( + PARTITION BY pin + ORDER BY sale_date ASC + ), + sale_date + ) <= 365, + FALSE + ) AS sale_filter_same_sale_within_365 + FROM ( + SELECT * FROM ( + SELECT + REPLACE(document_number, 'D', '') AS doc_no, + REPLACE(line_1_primary_pin, '-', '') AS pin, + DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS sale_date, + SUBSTR(line_4_instrument_date, 1, 4) AS year, + line_5_instrument_type AS mydec_deed_type, + NULLIF(TRIM(seller_name), '') AS seller_name, + NULLIF(TRIM(buyer_name), '') AS buyer_name, + CAST(line_11_full_consideration AS BIGINT) AS sale_price, + line_2_total_parcels AS num_parcels_sale, + COALESCE(line_2_total_parcels > 1, FALSE) AS is_multisale, + COALESCE(line_7_property_advertised = 1, FALSE) + AS mydec_property_advertised, + COALESCE(line_10a = 1, FALSE) + AS mydec_is_installment_contract_fulfilled, + COALESCE(line_10b = 1, FALSE) + AS mydec_is_sale_between_related_individuals_or_corporate_affiliates, + COALESCE(line_10c = 1, FALSE) + AS mydec_is_transfer_of_less_than_100_percent_interest, + COALESCE(line_10d = 1, FALSE) + AS mydec_is_court_ordered_sale, + COALESCE(line_10e = 1, FALSE) + AS mydec_is_sale_in_lieu_of_foreclosure, + COALESCE(line_10f = 1, FALSE) + AS mydec_is_condemnation, + COALESCE(line_10g = 1, FALSE) + AS mydec_is_short_sale, + COALESCE(line_10h = 1, FALSE) + AS mydec_is_bank_reo_real_estate_owned, + COALESCE(line_10i = 1, FALSE) + AS mydec_is_auction_sale, + COALESCE(line_10j = 1, FALSE) + AS mydec_is_seller_buyer_a_relocation_company, + COALESCE(line_10k = 1, FALSE) + AS mydec_is_seller_buyer_a_financial_institution_or_government_agency, + COALESCE(line_10l = 1, FALSE) + AS mydec_is_buyer_a_real_estate_investment_trust, + COALESCE(line_10m = 1, FALSE) + AS mydec_is_buyer_a_pension_fund, + COALESCE(line_10n = 1, FALSE) + AS mydec_is_buyer_an_adjacent_property_owner, + COALESCE(line_10o = 1, FALSE) + AS mydec_is_buyer_exercising_an_option_to_purchase, + COALESCE(line_10p = 1, FALSE) + AS mydec_is_simultaneous_trade_of_property, + COALESCE(line_10q = 1, FALSE) + AS mydec_is_sale_leaseback, + COALESCE(line_10s = 1, FALSE) + AS mydec_is_homestead_exemption, + line_10s_generalalternative + AS mydec_homestead_exemption_general_alternative, + line_10s_senior_citizens + AS mydec_homestead_exemption_senior_citizens, + line_10s_senior_citizens_assessment_freeze + AS mydec_homestead_exemption_senior_citizens_assessment_freeze, + ( + COALESCE(line_10b, 0) + COALESCE(line_10c, 0) + + COALESCE(line_10d, 0) + COALESCE(line_10e, 0) + + COALESCE(line_10f, 0) + COALESCE(line_10g, 0) + + COALESCE(line_10h, 0) + COALESCE(line_10i, 0) + + COALESCE(line_10k, 0) + ) > 0 AS sale_filter_ptax_flag, + COUNT() OVER ( + PARTITION BY line_1_primary_pin, line_4_instrument_date + ) AS num_single_day_sales + FROM {{ source('sale', 'mydec') }} + WHERE line_2_total_parcels = 1 + ) + WHERE num_single_day_sales = 1 + OR (YEAR(sale_date) > 2020) ) - /* Some sales in mydec have multiple rows for one pin on a given sale date. - Sometimes they have different dates than iasworld prior to 2021 and when - joined back onto unique_sales will create duplicates by pin/sale date. */ - WHERE num_single_day_sales = 1 - OR (YEAR(sale_date) > 2020) ), max_version_flag AS ( @@ -311,10 +304,21 @@ cte_sales AS ( md_sales.mydec_homestead_exemption_general_alternative, md_sales.mydec_homestead_exemption_senior_citizens, md_sales.mydec_homestead_exemption_senior_citizens_assessment_freeze, - -- Include sale_filter_same_sale_within_365 from unique_sales - uq_sales.sale_filter_same_sale_within_365, - uq_sales.sale_filter_less_than_10k, - uq_sales.sale_filter_deed_type + -- Include sale_filter_same_sale_within_365 from both sources + COALESCE(uq_sales.sale_filter_same_sale_within_365, md_sales.sale_filter_same_sale_within_365, FALSE) AS sale_filter_same_sale_within_365, + -- Include sale_filter_less_than_10k and sale_filter_deed_type + -- Use appropriate values based on source + CASE + WHEN uq_sales.doc_no IS NOT NULL THEN uq_sales.sale_filter_less_than_10k + ELSE (md_sales.sale_price <= 10000) + END AS sale_filter_less_than_10k, + CASE + WHEN uq_sales.doc_no IS NOT NULL THEN uq_sales.sale_filter_deed_type + ELSE ( + md_sales.mydec_deed_type IN ('03', '04', '06') + OR md_sales.mydec_deed_type IS NULL + ) + END AS sale_filter_deed_type FROM unique_sales AS uq_sales FULL OUTER JOIN mydec_sales AS md_sales ON uq_sales.doc_no = md_sales.doc_no @@ -325,24 +329,7 @@ cte_sales AS ( combined_sales AS ( SELECT - cte_s.*, - -- Remove recalculation of sale_filter_same_sale_within_365 - -- Keep the filters calculated in unique_sales - -- Also ensure that sale_filter_same_sale_within_365 is not NULL - COALESCE(cte_s.sale_filter_same_sale_within_365, FALSE) - AS sale_filter_same_sale_within_365, - -- Use the filters from unique_sales or calculate for MyDec sales - CASE - WHEN cte_s.source = 'iasworld' THEN cte_s.sale_filter_less_than_10k - ELSE (cte_s.sale_price_coalesced <= 10000) - END AS sale_filter_less_than_10k, - CASE - WHEN cte_s.source = 'iasworld' THEN cte_s.sale_filter_deed_type - ELSE ( - cte_s.deed_type_coalesced IN ('03', '04', '06') - OR cte_s.deed_type_coalesced IS NULL - ) - END AS sale_filter_deed_type + cte_s.* FROM cte_sales AS cte_s ) From 377f20b53314c4b6da8f9ea68e934fdcfa5d5c75 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 2 Oct 2024 14:58:43 +0000 Subject: [PATCH 066/126] Remove redundant cte --- dbt/models/default/default.vw_pin_sale.sql | 30 ++++++++++++++++------ 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index e16e8feb9..f95b185ef 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -37,6 +37,9 @@ calculated AS ( unique_sales AS ( SELECT *, + -- Historically, this view excluded sales for a given pin if it had sold + -- within the last 12 months for the same price. This filter allows us + -- to filter out those sales. COALESCE( DATE_DIFF( 'day', @@ -77,6 +80,11 @@ unique_sales AS ( WHEN sales.saletype = '0' THEN 'LAND' WHEN sales.saletype = '1' THEN 'LAND AND BUILDING' END AS sale_type, + -- Sales are not entirely unique by pin/date so we group all + -- sales by pin/date, then order by descending price + -- and give the top observation a value of 1 for "max_price". + -- We need to order by salekey as well in case of any ties within + -- price, date, and pin. ROW_NUMBER() OVER ( PARTITION BY sales.parid, @@ -84,6 +92,14 @@ unique_sales AS ( sales.instrtyp NOT IN ('03', '04', '06') ORDER BY sales.price DESC, sales.salekey ASC ) AS max_price, + -- We remove the letter 'D' that trails some document numbers in + -- iasworld.sales since it prevents us from joining to mydec sales. + -- This creates one instance where we have duplicate document + -- numbers, so we sort by sale date (specifically to avoid conflicts + -- with detecting the easliest duplicate sale when there are + -- multiple within one document number, within a year) within the + -- new doument number to identify and remove the sale causing the + -- duplicate document number. ROW_NUMBER() OVER ( PARTITION BY NULLIF(REPLACE(sales.instruno, 'D', ''), ''), @@ -91,6 +107,11 @@ unique_sales AS ( sales.price > 10000 ORDER BY sales.saledt ASC, sales.salekey ASC ) AS bad_doc_no, + -- Some pins sell for the exact same price a few months after + -- they're sold (we need to make sure to only include deed types we + -- want). These sales are unecessary for modeling and may be + -- duplicates. We need to order by salekey as well in case of any + -- ties within price, date, and pin. LAG(DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d')) OVER ( PARTITION BY sales.parid, @@ -238,9 +259,8 @@ sales_val AS ( AND sf.version = mv.max_version ), -cte_sales AS ( +combined_sales AS ( SELECT - -- Precompute coalesced columns COALESCE(uq_sales.pin, md_sales.pin) AS pin_coalesced, COALESCE(uq_sales.year, md_sales.year) AS year_coalesced, COALESCE(uq_sales.township_code, tc.township_code) @@ -325,12 +345,6 @@ cte_sales AS ( LEFT JOIN town_class AS tc ON COALESCE(uq_sales.pin, md_sales.pin) = tc.parid AND COALESCE(uq_sales.year, md_sales.year) = tc.taxyr -), - -combined_sales AS ( - SELECT - cte_s.* - FROM cte_sales AS cte_s ) SELECT From f8152b72a896ddebb25273f7c0ac889be3a57a44 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 8 Oct 2024 19:53:57 +0000 Subject: [PATCH 067/126] Try first pass at sale filter fix --- dbt/models/default/default.vw_pin_sale.sql | 230 ++++++++++++--------- 1 file changed, 131 insertions(+), 99 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index f95b185ef..1e81534f7 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -34,6 +34,96 @@ calculated AS ( GROUP BY instruno ), + +mydec_sales AS ( + SELECT *, + COALESCE( + DATE_DIFF( + 'day', + LAG(sale_date) OVER ( + PARTITION BY pin + ORDER BY sale_date ASC + ), + sale_date + ) <= 365, + FALSE + ) AS sale_filter_same_sale_within_365 + FROM ( + SELECT * FROM ( + SELECT + REPLACE(document_number, 'D', '') AS doc_no, + REPLACE(line_1_primary_pin, '-', '') AS pin, + DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS sale_date, + SUBSTR(line_4_instrument_date, 1, 4) AS year, + line_5_instrument_type AS mydec_deed_type, + NULLIF(TRIM(seller_name), '') AS seller_name, + NULLIF(TRIM(buyer_name), '') AS buyer_name, + CAST(line_11_full_consideration AS BIGINT) AS sale_price, + line_2_total_parcels AS num_parcels_sale, + COALESCE(line_2_total_parcels > 1, FALSE) AS is_multisale, + COALESCE(line_7_property_advertised = 1, FALSE) + AS mydec_property_advertised, + COALESCE(line_10a = 1, FALSE) + AS mydec_is_installment_contract_fulfilled, + COALESCE(line_10b = 1, FALSE) + AS mydec_is_sale_between_related_individuals_or_corporate_affiliates, + COALESCE(line_10c = 1, FALSE) + AS mydec_is_transfer_of_less_than_100_percent_interest, + COALESCE(line_10d = 1, FALSE) + AS mydec_is_court_ordered_sale, + COALESCE(line_10e = 1, FALSE) + AS mydec_is_sale_in_lieu_of_foreclosure, + COALESCE(line_10f = 1, FALSE) + AS mydec_is_condemnation, + COALESCE(line_10g = 1, FALSE) + AS mydec_is_short_sale, + COALESCE(line_10h = 1, FALSE) + AS mydec_is_bank_reo_real_estate_owned, + COALESCE(line_10i = 1, FALSE) + AS mydec_is_auction_sale, + COALESCE(line_10j = 1, FALSE) + AS mydec_is_seller_buyer_a_relocation_company, + COALESCE(line_10k = 1, FALSE) + AS mydec_is_seller_buyer_a_financial_institution_or_government_agency, + COALESCE(line_10l = 1, FALSE) + AS mydec_is_buyer_a_real_estate_investment_trust, + COALESCE(line_10m = 1, FALSE) + AS mydec_is_buyer_a_pension_fund, + COALESCE(line_10n = 1, FALSE) + AS mydec_is_buyer_an_adjacent_property_owner, + COALESCE(line_10o = 1, FALSE) + AS mydec_is_buyer_exercising_an_option_to_purchase, + COALESCE(line_10p = 1, FALSE) + AS mydec_is_simultaneous_trade_of_property, + COALESCE(line_10q = 1, FALSE) + AS mydec_is_sale_leaseback, + COALESCE(line_10s = 1, FALSE) + AS mydec_is_homestead_exemption, + line_10s_generalalternative + AS mydec_homestead_exemption_general_alternative, + line_10s_senior_citizens + AS mydec_homestead_exemption_senior_citizens, + line_10s_senior_citizens_assessment_freeze + AS mydec_homestead_exemption_senior_citizens_assessment_freeze, + ( + COALESCE(line_10b, 0) + COALESCE(line_10c, 0) + + COALESCE(line_10d, 0) + COALESCE(line_10e, 0) + + COALESCE(line_10f, 0) + COALESCE(line_10g, 0) + + COALESCE(line_10h, 0) + COALESCE(line_10i, 0) + + COALESCE(line_10k, 0) + ) > 0 AS sale_filter_ptax_flag, + COUNT() OVER ( + PARTITION BY line_1_primary_pin, line_4_instrument_date + ) AS num_single_day_sales + FROM {{ source('sale', 'mydec') }} + WHERE line_2_total_parcels = 1 + ) + WHERE num_single_day_sales = 1 + OR (YEAR(sale_date) > 2020) + ) +), + + unique_sales AS ( SELECT *, @@ -55,7 +145,14 @@ unique_sales AS ( tc.township_code, tc.nbhd, tc.class, - DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') AS sale_date, + -- Adjusted sale_date + CASE + WHEN + mydec_sales.mydec_date IS NOT NULL + AND mydec_sales.mydec_date != DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') + THEN mydec_sales.mydec_date + ELSE DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') + END AS adjusted_sale_date, CAST(sales.price AS BIGINT) AS sale_price, sales.salekey AS sale_key, NULLIF(REPLACE(sales.instruno, 'D', ''), '') AS doc_no, @@ -112,12 +209,28 @@ unique_sales AS ( -- want). These sales are unecessary for modeling and may be -- duplicates. We need to order by salekey as well in case of any -- ties within price, date, and pin. - LAG(DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d')) OVER ( + LAG( + CASE + WHEN + mydec_sales.mydec_date IS NOT NULL + AND mydec_sales.mydec_date != DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') + THEN mydec_sales.mydec_date + ELSE DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') + END + ) OVER ( PARTITION BY sales.parid, sales.price, sales.instrtyp NOT IN ('03', '04', '06') - ORDER BY sales.saledt ASC, sales.salekey ASC + ORDER BY + CASE + WHEN + mydec_sales.mydec_date IS NOT NULL + AND mydec_sales.mydec_date != DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') + THEN mydec_sales.mydec_date + ELSE DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') + END ASC, + sales.salekey ASC ) AS same_price_earlier_date, sales.price <= 10000 AS sale_filter_less_than_10k, COALESCE( @@ -132,6 +245,8 @@ unique_sales AS ( town_class AS tc ON sales.parid = tc.parid AND SUBSTR(sales.saledt, 1, 4) = tc.taxyr + LEFT JOIN mydec_sales + ON NULLIF(REPLACE(sales.instruno, 'D', ''), '') = mydec_sales.doc_no WHERE sales.instruno IS NOT NULL AND sales.deactivat IS NULL AND sales.cur = 'Y' @@ -145,94 +260,6 @@ unique_sales AS ( AND (bad_doc_no = 1 OR is_multisale = TRUE) ), -mydec_sales AS ( - SELECT *, - COALESCE( - DATE_DIFF( - 'day', - LAG(sale_date) OVER ( - PARTITION BY pin - ORDER BY sale_date ASC - ), - sale_date - ) <= 365, - FALSE - ) AS sale_filter_same_sale_within_365 - FROM ( - SELECT * FROM ( - SELECT - REPLACE(document_number, 'D', '') AS doc_no, - REPLACE(line_1_primary_pin, '-', '') AS pin, - DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS sale_date, - SUBSTR(line_4_instrument_date, 1, 4) AS year, - line_5_instrument_type AS mydec_deed_type, - NULLIF(TRIM(seller_name), '') AS seller_name, - NULLIF(TRIM(buyer_name), '') AS buyer_name, - CAST(line_11_full_consideration AS BIGINT) AS sale_price, - line_2_total_parcels AS num_parcels_sale, - COALESCE(line_2_total_parcels > 1, FALSE) AS is_multisale, - COALESCE(line_7_property_advertised = 1, FALSE) - AS mydec_property_advertised, - COALESCE(line_10a = 1, FALSE) - AS mydec_is_installment_contract_fulfilled, - COALESCE(line_10b = 1, FALSE) - AS mydec_is_sale_between_related_individuals_or_corporate_affiliates, - COALESCE(line_10c = 1, FALSE) - AS mydec_is_transfer_of_less_than_100_percent_interest, - COALESCE(line_10d = 1, FALSE) - AS mydec_is_court_ordered_sale, - COALESCE(line_10e = 1, FALSE) - AS mydec_is_sale_in_lieu_of_foreclosure, - COALESCE(line_10f = 1, FALSE) - AS mydec_is_condemnation, - COALESCE(line_10g = 1, FALSE) - AS mydec_is_short_sale, - COALESCE(line_10h = 1, FALSE) - AS mydec_is_bank_reo_real_estate_owned, - COALESCE(line_10i = 1, FALSE) - AS mydec_is_auction_sale, - COALESCE(line_10j = 1, FALSE) - AS mydec_is_seller_buyer_a_relocation_company, - COALESCE(line_10k = 1, FALSE) - AS mydec_is_seller_buyer_a_financial_institution_or_government_agency, - COALESCE(line_10l = 1, FALSE) - AS mydec_is_buyer_a_real_estate_investment_trust, - COALESCE(line_10m = 1, FALSE) - AS mydec_is_buyer_a_pension_fund, - COALESCE(line_10n = 1, FALSE) - AS mydec_is_buyer_an_adjacent_property_owner, - COALESCE(line_10o = 1, FALSE) - AS mydec_is_buyer_exercising_an_option_to_purchase, - COALESCE(line_10p = 1, FALSE) - AS mydec_is_simultaneous_trade_of_property, - COALESCE(line_10q = 1, FALSE) - AS mydec_is_sale_leaseback, - COALESCE(line_10s = 1, FALSE) - AS mydec_is_homestead_exemption, - line_10s_generalalternative - AS mydec_homestead_exemption_general_alternative, - line_10s_senior_citizens - AS mydec_homestead_exemption_senior_citizens, - line_10s_senior_citizens_assessment_freeze - AS mydec_homestead_exemption_senior_citizens_assessment_freeze, - ( - COALESCE(line_10b, 0) + COALESCE(line_10c, 0) - + COALESCE(line_10d, 0) + COALESCE(line_10e, 0) - + COALESCE(line_10f, 0) + COALESCE(line_10g, 0) - + COALESCE(line_10h, 0) + COALESCE(line_10i, 0) - + COALESCE(line_10k, 0) - ) > 0 AS sale_filter_ptax_flag, - COUNT() OVER ( - PARTITION BY line_1_primary_pin, line_4_instrument_date - ) AS num_single_day_sales - FROM {{ source('sale', 'mydec') }} - WHERE line_2_total_parcels = 1 - ) - WHERE num_single_day_sales = 1 - OR (YEAR(sale_date) > 2020) - ) -), - max_version_flag AS ( SELECT meta_sale_document_num, @@ -262,7 +289,13 @@ sales_val AS ( combined_sales AS ( SELECT COALESCE(uq_sales.pin, md_sales.pin) AS pin_coalesced, - COALESCE(uq_sales.year, md_sales.year) AS year_coalesced, + CASE + WHEN + mydec_sales.mydec_date IS NOT NULL + AND mydec_sales.mydec_date != unique_sales.adjusted_sale_date + THEN mydec_sales.year_of_sale + ELSE unique_sales.year + END AS year, COALESCE(uq_sales.township_code, tc.township_code) AS township_code_coalesced, COALESCE(uq_sales.nbhd, tc.nbhd) AS nbhd_coalesced, @@ -273,13 +306,12 @@ combined_sales AS ( ELSE COALESCE(uq_sales.sale_date, md_sales.sale_date) END AS sale_date_coalesced, CASE - WHEN (uq_sales.year < '2021' OR uq_sales.sale_date IS NULL) - AND md_sales.sale_date IS NOT NULL - THEN TRUE - WHEN (uq_sales.year >= '2021' OR md_sales.sale_date IS NULL) - AND uq_sales.sale_date IS NOT NULL - THEN FALSE - END AS is_mydec_date, + WHEN + mydec_sales.mydec_date IS NOT NULL + AND mydec_sales.mydec_date != unique_sales.adjusted_sale_date + THEN mydec_sales.mydec_date + ELSE unique_sales.adjusted_sale_date + END AS sale_date, COALESCE(uq_sales.sale_price, md_sales.sale_price) AS sale_price_coalesced, uq_sales.sale_key, From 929eae6ef4364a24ea77ef7c6356f35912a4d6b3 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 8 Oct 2024 20:02:39 +0000 Subject: [PATCH 068/126] Fix reference for mydec date column --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 1e81534f7..29ccab2b8 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -53,7 +53,7 @@ mydec_sales AS ( SELECT REPLACE(document_number, 'D', '') AS doc_no, REPLACE(line_1_primary_pin, '-', '') AS pin, - DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS sale_date, + DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS mydec_date, SUBSTR(line_4_instrument_date, 1, 4) AS year, line_5_instrument_type AS mydec_deed_type, NULLIF(TRIM(seller_name), '') AS seller_name, From f838d8fb77118cbf24065c2f9ee71bdc2803ab6a Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 8 Oct 2024 20:12:08 +0000 Subject: [PATCH 069/126] Try fixing col refs --- dbt/models/default/default.vw_pin_sale.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 29ccab2b8..ae64e13c2 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -40,9 +40,9 @@ mydec_sales AS ( COALESCE( DATE_DIFF( 'day', - LAG(sale_date) OVER ( + LAG(mydec_date) OVER ( PARTITION BY pin - ORDER BY sale_date ASC + ORDER BY mydec_date ASC ), sale_date ) <= 365, From 629f38f3bd8d3badf5dc45ffc0d10f0e5cb9c78a Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 8 Oct 2024 20:28:29 +0000 Subject: [PATCH 070/126] Try fixing col refs --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index ae64e13c2..4b0251c88 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -44,7 +44,7 @@ mydec_sales AS ( PARTITION BY pin ORDER BY mydec_date ASC ), - sale_date + mydec_date ) <= 365, FALSE ) AS sale_filter_same_sale_within_365 From 224061e313a89996233b98e6e6a57f87fb6bfd51 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 8 Oct 2024 20:32:34 +0000 Subject: [PATCH 071/126] Try fixing col refs --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 4b0251c88..c188b6c28 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -134,7 +134,7 @@ unique_sales AS ( DATE_DIFF( 'day', same_price_earlier_date, - sale_date + adjusted_sale_date ) <= 365, FALSE ) AS sale_filter_same_sale_within_365 From e3c0e9a0f308561d4f8b25bd7802bd94ccced10b Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 8 Oct 2024 20:35:47 +0000 Subject: [PATCH 072/126] Try fixing col refs --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index c188b6c28..e46178fd3 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -119,7 +119,7 @@ mydec_sales AS ( WHERE line_2_total_parcels = 1 ) WHERE num_single_day_sales = 1 - OR (YEAR(sale_date) > 2020) + OR (YEAR(mydec_date) > 2020) ) ), From 05fe0c74bd6f4e922735c1c3d5582568732a0abd Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 8 Oct 2024 20:44:34 +0000 Subject: [PATCH 073/126] Correct mydec alias --- dbt/models/default/default.vw_pin_sale.sql | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index e46178fd3..5ae07ed1d 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -291,9 +291,9 @@ combined_sales AS ( COALESCE(uq_sales.pin, md_sales.pin) AS pin_coalesced, CASE WHEN - mydec_sales.mydec_date IS NOT NULL - AND mydec_sales.mydec_date != unique_sales.adjusted_sale_date - THEN mydec_sales.year_of_sale + md_sales.mydec_date IS NOT NULL + AND md_sales.mydec_date != unique_sales.adjusted_sale_date + THEN md_sales.year_of_sale ELSE unique_sales.year END AS year, COALESCE(uq_sales.township_code, tc.township_code) @@ -307,9 +307,9 @@ combined_sales AS ( END AS sale_date_coalesced, CASE WHEN - mydec_sales.mydec_date IS NOT NULL - AND mydec_sales.mydec_date != unique_sales.adjusted_sale_date - THEN mydec_sales.mydec_date + md_sales.mydec_date IS NOT NULL + AND md_sales.mydec_date != unique_sales.adjusted_sale_date + THEN md_sales.mydec_date ELSE unique_sales.adjusted_sale_date END AS sale_date, COALESCE(uq_sales.sale_price, md_sales.sale_price) From 4c30b343141b585a79774f0e837e329da260c1d7 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 8 Oct 2024 20:47:31 +0000 Subject: [PATCH 074/126] Correct mydec alias --- dbt/models/default/default.vw_pin_sale.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 5ae07ed1d..3a3ae4ef9 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -292,9 +292,9 @@ combined_sales AS ( CASE WHEN md_sales.mydec_date IS NOT NULL - AND md_sales.mydec_date != unique_sales.adjusted_sale_date + AND md_sales.mydec_date != uq_sales.adjusted_sale_date THEN md_sales.year_of_sale - ELSE unique_sales.year + ELSE uq_sales.year END AS year, COALESCE(uq_sales.township_code, tc.township_code) AS township_code_coalesced, @@ -308,9 +308,9 @@ combined_sales AS ( CASE WHEN md_sales.mydec_date IS NOT NULL - AND md_sales.mydec_date != unique_sales.adjusted_sale_date + AND md_sales.mydec_date != uq_sales.adjusted_sale_date THEN md_sales.mydec_date - ELSE unique_sales.adjusted_sale_date + ELSE uq_sales.adjusted_sale_date END AS sale_date, COALESCE(uq_sales.sale_price, md_sales.sale_price) AS sale_price_coalesced, From 6de42f5e916f2e1c7969bce5e7955bd16937ce26 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 8 Oct 2024 20:52:02 +0000 Subject: [PATCH 075/126] Fix year col name --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 3a3ae4ef9..41bb8cfda 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -293,7 +293,7 @@ combined_sales AS ( WHEN md_sales.mydec_date IS NOT NULL AND md_sales.mydec_date != uq_sales.adjusted_sale_date - THEN md_sales.year_of_sale + THEN md_sales.year ELSE uq_sales.year END AS year, COALESCE(uq_sales.township_code, tc.township_code) From 95ad519e853514c094393b58243ff562e2fb5ca8 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 8 Oct 2024 20:55:06 +0000 Subject: [PATCH 076/126] Fix year col name --- dbt/models/default/default.vw_pin_sale.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 41bb8cfda..d67f3ce76 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -302,8 +302,8 @@ combined_sales AS ( COALESCE(uq_sales.class, tc.class) AS class_coalesced, CASE WHEN uq_sales.year < '2021' - THEN COALESCE(md_sales.sale_date, uq_sales.sale_date) - ELSE COALESCE(uq_sales.sale_date, md_sales.sale_date) + THEN COALESCE(md_sales.mydec_date, uq_sales.sale_date) + ELSE COALESCE(uq_sales.sale_date, md_sales.mydec_date) END AS sale_date_coalesced, CASE WHEN From b9a6186d3d14e59969bd7761b9a89d61bcac1d2d Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 8 Oct 2024 20:59:12 +0000 Subject: [PATCH 077/126] Fix year col name --- dbt/models/default/default.vw_pin_sale.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index d67f3ce76..928dbdbf5 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -302,8 +302,8 @@ combined_sales AS ( COALESCE(uq_sales.class, tc.class) AS class_coalesced, CASE WHEN uq_sales.year < '2021' - THEN COALESCE(md_sales.mydec_date, uq_sales.sale_date) - ELSE COALESCE(uq_sales.sale_date, md_sales.mydec_date) + THEN COALESCE(md_sales.mydec_date, uq_sales.adjusted_sale_date) + ELSE COALESCE(uq_sales.adjusted_sale_date, md_sales.mydec_date) END AS sale_date_coalesced, CASE WHEN From ac8268add0d912368571e14a461e2cac2d1b4cb8 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 8 Oct 2024 21:03:36 +0000 Subject: [PATCH 078/126] Fix year ref --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 928dbdbf5..4e6b9cc4a 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -381,7 +381,7 @@ combined_sales AS ( SELECT cs.pin_coalesced AS pin, - cs.year_coalesced AS year, + cs.year, cs.township_code_coalesced AS township_code, cs.nbhd_coalesced AS nbhd, cs.class_coalesced AS class, From 60f4cf78eaefd9913d0d3448da554fe3abfe7139 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 8 Oct 2024 21:12:29 +0000 Subject: [PATCH 079/126] Fix mydec date --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 4e6b9cc4a..db0914339 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -311,7 +311,7 @@ combined_sales AS ( AND md_sales.mydec_date != uq_sales.adjusted_sale_date THEN md_sales.mydec_date ELSE uq_sales.adjusted_sale_date - END AS sale_date, + END AS is_mydec_date, COALESCE(uq_sales.sale_price, md_sales.sale_price) AS sale_price_coalesced, uq_sales.sale_key, From c644766987bc688207d685fd2c5d651a34f144a2 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 9 Oct 2024 15:29:32 +0000 Subject: [PATCH 080/126] Fix mydec col ref --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index db0914339..d1e03ffbd 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -293,7 +293,7 @@ combined_sales AS ( WHEN md_sales.mydec_date IS NOT NULL AND md_sales.mydec_date != uq_sales.adjusted_sale_date - THEN md_sales.year + THEN md_sales.year_of_sale ELSE uq_sales.year END AS year, COALESCE(uq_sales.township_code, tc.township_code) From 261557974a39542f2b3d7a7d86d2e6c705605143 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 9 Oct 2024 15:37:48 +0000 Subject: [PATCH 081/126] Switch date for boolean --- dbt/models/default/default.vw_pin_sale.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index d1e03ffbd..0276038d1 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -309,8 +309,8 @@ combined_sales AS ( WHEN md_sales.mydec_date IS NOT NULL AND md_sales.mydec_date != uq_sales.adjusted_sale_date - THEN md_sales.mydec_date - ELSE uq_sales.adjusted_sale_date + THEN true + ELSE false END AS is_mydec_date, COALESCE(uq_sales.sale_price, md_sales.sale_price) AS sale_price_coalesced, From 121828f4e32ead8882ae348a18291e93e0b34524 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 9 Oct 2024 15:44:53 +0000 Subject: [PATCH 082/126] Revert year ref --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 0276038d1..a3bf1c437 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -293,7 +293,7 @@ combined_sales AS ( WHEN md_sales.mydec_date IS NOT NULL AND md_sales.mydec_date != uq_sales.adjusted_sale_date - THEN md_sales.year_of_sale + THEN md_sales.year ELSE uq_sales.year END AS year, COALESCE(uq_sales.township_code, tc.township_code) From 4155f106f6bb2e8b7691c3bf04c1afefbf402b77 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 9 Oct 2024 15:56:09 +0000 Subject: [PATCH 083/126] Try separate ias date ref --- dbt/models/default/default.vw_pin_sale.sql | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index a3bf1c437..965f3dd45 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -145,7 +145,9 @@ unique_sales AS ( tc.township_code, tc.nbhd, tc.class, - -- Adjusted sale_date + -- Ias only sale date for calculation later + tc.ias_sale_date, + -- Adjusted sale_date that captrues nuance CASE WHEN mydec_sales.mydec_date IS NOT NULL @@ -292,7 +294,7 @@ combined_sales AS ( CASE WHEN md_sales.mydec_date IS NOT NULL - AND md_sales.mydec_date != uq_sales.adjusted_sale_date + AND md_sales.mydec_date != uq_sales.ias_sale_date THEN md_sales.year ELSE uq_sales.year END AS year, @@ -302,13 +304,13 @@ combined_sales AS ( COALESCE(uq_sales.class, tc.class) AS class_coalesced, CASE WHEN uq_sales.year < '2021' - THEN COALESCE(md_sales.mydec_date, uq_sales.adjusted_sale_date) - ELSE COALESCE(uq_sales.adjusted_sale_date, md_sales.mydec_date) + THEN COALESCE(md_sales.mydec_date, uq_sales.ias_sale_date) + ELSE COALESCE(uq_sales.ias_sale_date, md_sales.mydec_date) END AS sale_date_coalesced, CASE WHEN md_sales.mydec_date IS NOT NULL - AND md_sales.mydec_date != uq_sales.adjusted_sale_date + AND md_sales.mydec_date != uq_sales.ias_sale_date THEN true ELSE false END AS is_mydec_date, From 64d9977d636d21db0578d493db611f0c9583c39a Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 9 Oct 2024 16:18:14 +0000 Subject: [PATCH 084/126] Correct table ref --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 965f3dd45..8950b5962 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -146,7 +146,7 @@ unique_sales AS ( tc.nbhd, tc.class, -- Ias only sale date for calculation later - tc.ias_sale_date, + sales.ias_sale_date, -- Adjusted sale_date that captrues nuance CASE WHEN From eec996c57249783d340ef4621b52affbd72dd74e Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 9 Oct 2024 16:21:43 +0000 Subject: [PATCH 085/126] Fix sale_date calc --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 8950b5962..b59003b62 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -146,7 +146,7 @@ unique_sales AS ( tc.nbhd, tc.class, -- Ias only sale date for calculation later - sales.ias_sale_date, + DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') AS ias_sale_date, -- Adjusted sale_date that captrues nuance CASE WHEN From f8d08dc7528576249d3311db7549ba50aaae156c Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 9 Oct 2024 16:39:03 +0000 Subject: [PATCH 086/126] Try old logic to fix is_mydec ind --- dbt/models/default/default.vw_pin_sale.sql | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index b59003b62..791e07833 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -308,11 +308,12 @@ combined_sales AS ( ELSE COALESCE(uq_sales.ias_sale_date, md_sales.mydec_date) END AS sale_date_coalesced, CASE - WHEN - md_sales.mydec_date IS NOT NULL - AND md_sales.mydec_date != uq_sales.ias_sale_date - THEN true - ELSE false + WHEN (uq_sales.year < '2021' OR uq_sales.ias_sale_date IS NULL) + AND md_sales.mydec_date IS NOT NULL + THEN TRUE + WHEN (uq_sales.year >= '2021' OR md_sales.ias_sale_date IS NULL) + AND uq_sales.ias_sale_date IS NOT NULL + THEN FALSE END AS is_mydec_date, COALESCE(uq_sales.sale_price, md_sales.sale_price) AS sale_price_coalesced, From 20bb738dfb50cddfc7baaf8c3d9ef4b49a787a52 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 9 Oct 2024 16:41:44 +0000 Subject: [PATCH 087/126] Fix ref --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 791e07833..4398bc619 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -311,7 +311,7 @@ combined_sales AS ( WHEN (uq_sales.year < '2021' OR uq_sales.ias_sale_date IS NULL) AND md_sales.mydec_date IS NOT NULL THEN TRUE - WHEN (uq_sales.year >= '2021' OR md_sales.ias_sale_date IS NULL) + WHEN (uq_sales.year >= '2021' OR md_sales.mydec_sale_date IS NULL) AND uq_sales.ias_sale_date IS NOT NULL THEN FALSE END AS is_mydec_date, From c5d01b8ad7ae665d6202bf14bb13d472ba346172 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 9 Oct 2024 16:44:11 +0000 Subject: [PATCH 088/126] Fix ref --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 4398bc619..ca924980b 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -311,7 +311,7 @@ combined_sales AS ( WHEN (uq_sales.year < '2021' OR uq_sales.ias_sale_date IS NULL) AND md_sales.mydec_date IS NOT NULL THEN TRUE - WHEN (uq_sales.year >= '2021' OR md_sales.mydec_sale_date IS NULL) + WHEN (uq_sales.year >= '2021' OR md_sales.mydec_date IS NULL) AND uq_sales.ias_sale_date IS NOT NULL THEN FALSE END AS is_mydec_date, From 14fddb2dff465822fa1a70a02f2f6824d288c964 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 9 Oct 2024 17:59:48 +0000 Subject: [PATCH 089/126] Try fusion of the logics --- dbt/models/default/default.vw_pin_sale.sql | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index ca924980b..e64a88a15 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -308,13 +308,20 @@ combined_sales AS ( ELSE COALESCE(uq_sales.ias_sale_date, md_sales.mydec_date) END AS sale_date_coalesced, CASE - WHEN (uq_sales.year < '2021' OR uq_sales.ias_sale_date IS NULL) - AND md_sales.mydec_date IS NOT NULL - THEN TRUE - WHEN (uq_sales.year >= '2021' OR md_sales.mydec_date IS NULL) - AND uq_sales.ias_sale_date IS NOT NULL - THEN FALSE - END AS is_mydec_date, + -- If uq_sales.doc_no is not NULL, apply the COALESCE logic + WHEN uq_sales.doc_no IS NOT NULL THEN + CASE + WHEN COALESCE( + mydec_sales.mydec_date IS NOT NULL + OR YEAR(unique_sales.sale_date) >= 2021, + FALSE + ) THEN TRUE + ELSE FALSE + END + -- If uq_sales.doc_no is NULL, set is_mydec_date to TRUE + ELSE + TRUE + END AS is_mydec_date COALESCE(uq_sales.sale_price, md_sales.sale_price) AS sale_price_coalesced, uq_sales.sale_key, From 6aee2e62a7ac2b7eb105df185843052778e0fb08 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 9 Oct 2024 18:02:53 +0000 Subject: [PATCH 090/126] Add missing comma --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index e64a88a15..40ec3047b 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -321,7 +321,7 @@ combined_sales AS ( -- If uq_sales.doc_no is NULL, set is_mydec_date to TRUE ELSE TRUE - END AS is_mydec_date + END AS is_mydec_date, COALESCE(uq_sales.sale_price, md_sales.sale_price) AS sale_price_coalesced, uq_sales.sale_key, From bc1b6e6e2bc8831c8d94acc63b4d6d0d1d540fe2 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 9 Oct 2024 18:07:07 +0000 Subject: [PATCH 091/126] Fix ref --- dbt/models/default/default.vw_pin_sale.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 40ec3047b..c66c2df9a 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -312,7 +312,7 @@ combined_sales AS ( WHEN uq_sales.doc_no IS NOT NULL THEN CASE WHEN COALESCE( - mydec_sales.mydec_date IS NOT NULL + md_sales.mydec_date IS NOT NULL OR YEAR(unique_sales.sale_date) >= 2021, FALSE ) THEN TRUE @@ -321,7 +321,7 @@ combined_sales AS ( -- If uq_sales.doc_no is NULL, set is_mydec_date to TRUE ELSE TRUE - END AS is_mydec_date, + END AS is_mydec_date COALESCE(uq_sales.sale_price, md_sales.sale_price) AS sale_price_coalesced, uq_sales.sale_key, From e0e01a9e0b7808ec92ad0b144e311ccd8d86d695 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 9 Oct 2024 18:09:12 +0000 Subject: [PATCH 092/126] Add comma --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index c66c2df9a..8a825c911 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -321,7 +321,7 @@ combined_sales AS ( -- If uq_sales.doc_no is NULL, set is_mydec_date to TRUE ELSE TRUE - END AS is_mydec_date + END AS is_mydec_date, COALESCE(uq_sales.sale_price, md_sales.sale_price) AS sale_price_coalesced, uq_sales.sale_key, From 146931009081d68bbd7b64a98239478cc4756b9a Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 9 Oct 2024 18:11:19 +0000 Subject: [PATCH 093/126] Fix ref --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 8a825c911..449357df0 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -313,7 +313,7 @@ combined_sales AS ( CASE WHEN COALESCE( md_sales.mydec_date IS NOT NULL - OR YEAR(unique_sales.sale_date) >= 2021, + OR YEAR(uq_sales.sale_date) >= 2021, FALSE ) THEN TRUE ELSE FALSE From bb1ddd142607aac1d0bb29b66e8d79dd56764822 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 9 Oct 2024 18:13:45 +0000 Subject: [PATCH 094/126] Fix date ref --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 449357df0..70372be91 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -313,7 +313,7 @@ combined_sales AS ( CASE WHEN COALESCE( md_sales.mydec_date IS NOT NULL - OR YEAR(uq_sales.sale_date) >= 2021, + OR YEAR(uq_sales.ias_sale_date) >= 2021, FALSE ) THEN TRUE ELSE FALSE From 7007318e616a8d652b349567439c72deb338b009 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 9 Oct 2024 18:54:42 +0000 Subject: [PATCH 095/126] Attempt year fix --- dbt/models/default/default.vw_pin_sale.sql | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 70372be91..9ee0d5467 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -292,10 +292,9 @@ combined_sales AS ( SELECT COALESCE(uq_sales.pin, md_sales.pin) AS pin_coalesced, CASE - WHEN - md_sales.mydec_date IS NOT NULL - AND md_sales.mydec_date != uq_sales.ias_sale_date - THEN md_sales.year + WHEN md_sales.mydec_date IS NOT NULL + AND (uq_sales.ias_sale_date IS NULL OR md_sales.mydec_date != uq_sales.ias_sale_date) + THEN md_sales.year ELSE uq_sales.year END AS year, COALESCE(uq_sales.township_code, tc.township_code) From f13dfe4fd618cfe461dc1e8a184b1cf7275df933 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 9 Oct 2024 19:09:17 +0000 Subject: [PATCH 096/126] Add some documentation --- dbt/models/default/default.vw_pin_sale.sql | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 9ee0d5467..510d1729b 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -145,9 +145,11 @@ unique_sales AS ( tc.township_code, tc.nbhd, tc.class, - -- Ias only sale date for calculation later + -- Ias only sale date for various mydec calculations later DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') AS ias_sale_date, - -- Adjusted sale_date that captrues nuance + -- Adjusted sale_date is added here specifically so we can calculate + -- sale_filter_same_sale_within_365 correctly. This allows us to + -- use sale dates from either mydec or iasworld in the calculation CASE WHEN mydec_sales.mydec_date IS NOT NULL @@ -210,7 +212,9 @@ unique_sales AS ( -- they're sold (we need to make sure to only include deed types we -- want). These sales are unecessary for modeling and may be -- duplicates. We need to order by salekey as well in case of any - -- ties within price, date, and pin. + -- ties within price, date, and pin. This LAG calculation + -- grabs the previous sale dynamically depending on which source table + -- we are using for a given sale date LAG( CASE WHEN @@ -307,7 +311,6 @@ combined_sales AS ( ELSE COALESCE(uq_sales.ias_sale_date, md_sales.mydec_date) END AS sale_date_coalesced, CASE - -- If uq_sales.doc_no is not NULL, apply the COALESCE logic WHEN uq_sales.doc_no IS NOT NULL THEN CASE WHEN COALESCE( @@ -317,7 +320,6 @@ combined_sales AS ( ) THEN TRUE ELSE FALSE END - -- If uq_sales.doc_no is NULL, set is_mydec_date to TRUE ELSE TRUE END AS is_mydec_date, From 21472dce7563901d0f839ec01113966cad84ecaf Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Thu, 10 Oct 2024 19:21:28 +0000 Subject: [PATCH 097/126] Attempt deed type recode --- dbt/models/default/default.vw_pin_sale.sql | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 510d1729b..c1f6e9b93 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -55,7 +55,15 @@ mydec_sales AS ( REPLACE(line_1_primary_pin, '-', '') AS pin, DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS mydec_date, SUBSTR(line_4_instrument_date, 1, 4) AS year, - line_5_instrument_type AS mydec_deed_type, + CASE line_5_instrument_type + WHEN '01' THEN 'Warranty' + WHEN '02' THEN 'Trustee' + WHEN '03' THEN 'Quit claim' + WHEN '04' THEN 'Executor' + WHEN '05' THEN 'Other' + WHEN '06' THEN 'Beneficiary' + ELSE line_5_instrument_type -- or NULL, depending on your preference + END AS mydec_deed_type, NULLIF(TRIM(seller_name), '') AS seller_name, NULLIF(TRIM(buyer_name), '') AS buyer_name, CAST(line_11_full_consideration AS BIGINT) AS sale_price, From 095863855215f9c001bdf71ffdbc71003424dc82 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Thu, 10 Oct 2024 19:24:30 +0000 Subject: [PATCH 098/126] Attempt deed type recode --- dbt/models/default/default.vw_pin_sale.sql | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index c1f6e9b93..e5fe3a7d7 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -56,12 +56,12 @@ mydec_sales AS ( DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS mydec_date, SUBSTR(line_4_instrument_date, 1, 4) AS year, CASE line_5_instrument_type - WHEN '01' THEN 'Warranty' - WHEN '02' THEN 'Trustee' - WHEN '03' THEN 'Quit claim' - WHEN '04' THEN 'Executor' - WHEN '05' THEN 'Other' - WHEN '06' THEN 'Beneficiary' + WHEN 'Warranty' THEN '01' + WHEN 'Trustee' THEN '02' + WHEN 'Quit claim' THEN '03' + WHEN 'Executor' THEN '04' + WHEN 'Other' THEN '05' + WHEN 'Beneficiary' THEN '06' ELSE line_5_instrument_type -- or NULL, depending on your preference END AS mydec_deed_type, NULLIF(TRIM(seller_name), '') AS seller_name, From 56a2279bc5a58cd57354b313a348ba7635512b64 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Thu, 10 Oct 2024 19:38:20 +0000 Subject: [PATCH 099/126] Attempt accurate recode --- dbt/models/default/default.vw_pin_sale.sql | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index e5fe3a7d7..5dfbb327c 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -56,12 +56,12 @@ mydec_sales AS ( DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS mydec_date, SUBSTR(line_4_instrument_date, 1, 4) AS year, CASE line_5_instrument_type - WHEN 'Warranty' THEN '01' - WHEN 'Trustee' THEN '02' - WHEN 'Quit claim' THEN '03' - WHEN 'Executor' THEN '04' + WHEN 'Warranty Deed' THEN '01' + WHEN 'Trustee Deed' THEN '02' + WHEN 'Quit Claim Deed' THEN '03' + WHEN 'Executor Deed' THEN '04' WHEN 'Other' THEN '05' - WHEN 'Beneficiary' THEN '06' + WHEN 'Beneficial interest' THEN '06' ELSE line_5_instrument_type -- or NULL, depending on your preference END AS mydec_deed_type, NULLIF(TRIM(seller_name), '') AS seller_name, From f8cef15cb4795216bf1f9f88acb2a6f8d597119c Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Thu, 10 Oct 2024 20:36:45 +0000 Subject: [PATCH 100/126] Simplify filter --- dbt/models/default/default.vw_pin_sale.sql | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 5dfbb327c..598674d89 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -223,29 +223,18 @@ unique_sales AS ( -- ties within price, date, and pin. This LAG calculation -- grabs the previous sale dynamically depending on which source table -- we are using for a given sale date + --TODO: not about ias integrity LAG( - CASE - WHEN - mydec_sales.mydec_date IS NOT NULL - AND mydec_sales.mydec_date != DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') - THEN mydec_sales.mydec_date - ELSE DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') - END + COALESCE(mydec_sales.mydec_date, DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d')) ) OVER ( PARTITION BY sales.parid, sales.price, - sales.instrtyp NOT IN ('03', '04', '06') + CASE WHEN sales.instrtyp NOT IN ('03', '04', '06') THEN 1 ELSE 0 END ORDER BY - CASE - WHEN - mydec_sales.mydec_date IS NOT NULL - AND mydec_sales.mydec_date != DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') - THEN mydec_sales.mydec_date - ELSE DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') - END ASC, + COALESCE(mydec_sales.mydec_date, DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d')) ASC, sales.salekey ASC - ) AS same_price_earlier_date, + ) AS same_price_earlier_date sales.price <= 10000 AS sale_filter_less_than_10k, COALESCE( sales.instrtyp IN ('03', '04', '06') OR sales.instrtyp IS NULL, From 8d59340c9f0d4058e17513b3d3b4715ece1e7868 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Thu, 10 Oct 2024 20:38:47 +0000 Subject: [PATCH 101/126] Add comma --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 598674d89..f6a4dea19 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -234,7 +234,7 @@ unique_sales AS ( ORDER BY COALESCE(mydec_sales.mydec_date, DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d')) ASC, sales.salekey ASC - ) AS same_price_earlier_date + ) AS same_price_earlier_date, sales.price <= 10000 AS sale_filter_less_than_10k, COALESCE( sales.instrtyp IN ('03', '04', '06') OR sales.instrtyp IS NULL, From d1e5d34ff8561eb84cad80ce7d69728fca06ea25 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Thu, 10 Oct 2024 20:47:58 +0000 Subject: [PATCH 102/126] Add deed type coalesce --- dbt/models/default/default.vw_pin_sale.sql | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index f6a4dea19..7b74af942 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -36,19 +36,6 @@ calculated AS ( mydec_sales AS ( - SELECT *, - COALESCE( - DATE_DIFF( - 'day', - LAG(mydec_date) OVER ( - PARTITION BY pin - ORDER BY mydec_date ASC - ), - mydec_date - ) <= 365, - FALSE - ) AS sale_filter_same_sale_within_365 - FROM ( SELECT * FROM ( SELECT REPLACE(document_number, 'D', '') AS doc_no, @@ -230,7 +217,7 @@ unique_sales AS ( PARTITION BY sales.parid, sales.price, - CASE WHEN sales.instrtyp NOT IN ('03', '04', '06') THEN 1 ELSE 0 END + CASE WHEN COALESCE(mydec_sales.instrtyp, mydec_deed_type) NOT IN ('03', '04', '06') THEN 1 ELSE 0 END ORDER BY COALESCE(mydec_sales.mydec_date, DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d')) ASC, sales.salekey ASC From f63cfdd6fe57c1cfde7684393e90e1c51302ce78 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Thu, 10 Oct 2024 20:52:27 +0000 Subject: [PATCH 103/126] Fix ref --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 7b74af942..d74db7699 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -217,7 +217,7 @@ unique_sales AS ( PARTITION BY sales.parid, sales.price, - CASE WHEN COALESCE(mydec_sales.instrtyp, mydec_deed_type) NOT IN ('03', '04', '06') THEN 1 ELSE 0 END + CASE WHEN COALESCE(mydec_sales.instrtyp, mydec_sales.mydec_deed_type) NOT IN ('03', '04', '06') THEN 1 ELSE 0 END ORDER BY COALESCE(mydec_sales.mydec_date, DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d')) ASC, sales.salekey ASC From 27d8ac2d795361b6ec5c6d11b4640f5ac842e027 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Thu, 10 Oct 2024 20:56:56 +0000 Subject: [PATCH 104/126] Remove hanging parentheses --- dbt/models/default/default.vw_pin_sale.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index d74db7699..abc635e00 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -115,7 +115,6 @@ mydec_sales AS ( ) WHERE num_single_day_sales = 1 OR (YEAR(mydec_date) > 2020) - ) ), From ce5adccf587957c45092fbfa312e8c90ba1f5109 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Thu, 10 Oct 2024 21:01:05 +0000 Subject: [PATCH 105/126] Fix ref --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index abc635e00..50f226e95 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -216,7 +216,7 @@ unique_sales AS ( PARTITION BY sales.parid, sales.price, - CASE WHEN COALESCE(mydec_sales.instrtyp, mydec_sales.mydec_deed_type) NOT IN ('03', '04', '06') THEN 1 ELSE 0 END + CASE WHEN COALESCE(sales.instrtyp, mydec_sales.mydec_deed_type) NOT IN ('03', '04', '06') THEN 1 ELSE 0 END ORDER BY COALESCE(mydec_sales.mydec_date, DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d')) ASC, sales.salekey ASC From 0e05f7c4d18106faec70c7da01bc4fe2fe01a4f5 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Thu, 10 Oct 2024 21:03:50 +0000 Subject: [PATCH 106/126] Remove final 365 coalesce --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 50f226e95..787057b74 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -351,7 +351,7 @@ combined_sales AS ( md_sales.mydec_homestead_exemption_senior_citizens, md_sales.mydec_homestead_exemption_senior_citizens_assessment_freeze, -- Include sale_filter_same_sale_within_365 from both sources - COALESCE(uq_sales.sale_filter_same_sale_within_365, md_sales.sale_filter_same_sale_within_365, FALSE) AS sale_filter_same_sale_within_365, + uq_sales.sale_filter_same_sale_within_365, -- Include sale_filter_less_than_10k and sale_filter_deed_type -- Use appropriate values based on source CASE From befe90ff6e03c73af472bb16b1c1cc0ba940cedc Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Thu, 10 Oct 2024 21:20:09 +0000 Subject: [PATCH 107/126] Try reversion to older state --- dbt/models/default/default.vw_pin_sale.sql | 319 ++++++++++----------- 1 file changed, 144 insertions(+), 175 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 787057b74..a22a931a4 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -1,5 +1,4 @@ -- View containing unique, filtered sales - WITH town_class AS ( SELECT par.parid, @@ -18,7 +17,6 @@ WITH town_class AS ( WHERE par.cur = 'Y' AND par.deactivat IS NULL ), - calculated AS ( SELECT instruno, @@ -33,102 +31,14 @@ calculated AS ( ) GROUP BY instruno ), - - -mydec_sales AS ( - SELECT * FROM ( - SELECT - REPLACE(document_number, 'D', '') AS doc_no, - REPLACE(line_1_primary_pin, '-', '') AS pin, - DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS mydec_date, - SUBSTR(line_4_instrument_date, 1, 4) AS year, - CASE line_5_instrument_type - WHEN 'Warranty Deed' THEN '01' - WHEN 'Trustee Deed' THEN '02' - WHEN 'Quit Claim Deed' THEN '03' - WHEN 'Executor Deed' THEN '04' - WHEN 'Other' THEN '05' - WHEN 'Beneficial interest' THEN '06' - ELSE line_5_instrument_type -- or NULL, depending on your preference - END AS mydec_deed_type, - NULLIF(TRIM(seller_name), '') AS seller_name, - NULLIF(TRIM(buyer_name), '') AS buyer_name, - CAST(line_11_full_consideration AS BIGINT) AS sale_price, - line_2_total_parcels AS num_parcels_sale, - COALESCE(line_2_total_parcels > 1, FALSE) AS is_multisale, - COALESCE(line_7_property_advertised = 1, FALSE) - AS mydec_property_advertised, - COALESCE(line_10a = 1, FALSE) - AS mydec_is_installment_contract_fulfilled, - COALESCE(line_10b = 1, FALSE) - AS mydec_is_sale_between_related_individuals_or_corporate_affiliates, - COALESCE(line_10c = 1, FALSE) - AS mydec_is_transfer_of_less_than_100_percent_interest, - COALESCE(line_10d = 1, FALSE) - AS mydec_is_court_ordered_sale, - COALESCE(line_10e = 1, FALSE) - AS mydec_is_sale_in_lieu_of_foreclosure, - COALESCE(line_10f = 1, FALSE) - AS mydec_is_condemnation, - COALESCE(line_10g = 1, FALSE) - AS mydec_is_short_sale, - COALESCE(line_10h = 1, FALSE) - AS mydec_is_bank_reo_real_estate_owned, - COALESCE(line_10i = 1, FALSE) - AS mydec_is_auction_sale, - COALESCE(line_10j = 1, FALSE) - AS mydec_is_seller_buyer_a_relocation_company, - COALESCE(line_10k = 1, FALSE) - AS mydec_is_seller_buyer_a_financial_institution_or_government_agency, - COALESCE(line_10l = 1, FALSE) - AS mydec_is_buyer_a_real_estate_investment_trust, - COALESCE(line_10m = 1, FALSE) - AS mydec_is_buyer_a_pension_fund, - COALESCE(line_10n = 1, FALSE) - AS mydec_is_buyer_an_adjacent_property_owner, - COALESCE(line_10o = 1, FALSE) - AS mydec_is_buyer_exercising_an_option_to_purchase, - COALESCE(line_10p = 1, FALSE) - AS mydec_is_simultaneous_trade_of_property, - COALESCE(line_10q = 1, FALSE) - AS mydec_is_sale_leaseback, - COALESCE(line_10s = 1, FALSE) - AS mydec_is_homestead_exemption, - line_10s_generalalternative - AS mydec_homestead_exemption_general_alternative, - line_10s_senior_citizens - AS mydec_homestead_exemption_senior_citizens, - line_10s_senior_citizens_assessment_freeze - AS mydec_homestead_exemption_senior_citizens_assessment_freeze, - ( - COALESCE(line_10b, 0) + COALESCE(line_10c, 0) - + COALESCE(line_10d, 0) + COALESCE(line_10e, 0) - + COALESCE(line_10f, 0) + COALESCE(line_10g, 0) - + COALESCE(line_10h, 0) + COALESCE(line_10i, 0) - + COALESCE(line_10k, 0) - ) > 0 AS sale_filter_ptax_flag, - COUNT() OVER ( - PARTITION BY line_1_primary_pin, line_4_instrument_date - ) AS num_single_day_sales - FROM {{ source('sale', 'mydec') }} - WHERE line_2_total_parcels = 1 - ) - WHERE num_single_day_sales = 1 - OR (YEAR(mydec_date) > 2020) -), - - unique_sales AS ( SELECT *, - -- Historically, this view excluded sales for a given pin if it had sold - -- within the last 12 months for the same price. This filter allows us - -- to filter out those sales. COALESCE( DATE_DIFF( 'day', same_price_earlier_date, - adjusted_sale_date + sale_date ) <= 365, FALSE ) AS sale_filter_same_sale_within_365 @@ -139,18 +49,7 @@ unique_sales AS ( tc.township_code, tc.nbhd, tc.class, - -- Ias only sale date for various mydec calculations later - DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') AS ias_sale_date, - -- Adjusted sale_date is added here specifically so we can calculate - -- sale_filter_same_sale_within_365 correctly. This allows us to - -- use sale dates from either mydec or iasworld in the calculation - CASE - WHEN - mydec_sales.mydec_date IS NOT NULL - AND mydec_sales.mydec_date != DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') - THEN mydec_sales.mydec_date - ELSE DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') - END AS adjusted_sale_date, + DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d') AS sale_date, CAST(sales.price AS BIGINT) AS sale_price, sales.salekey AS sale_key, NULLIF(REPLACE(sales.instruno, 'D', ''), '') AS doc_no, @@ -175,11 +74,6 @@ unique_sales AS ( WHEN sales.saletype = '0' THEN 'LAND' WHEN sales.saletype = '1' THEN 'LAND AND BUILDING' END AS sale_type, - -- Sales are not entirely unique by pin/date so we group all - -- sales by pin/date, then order by descending price - -- and give the top observation a value of 1 for "max_price". - -- We need to order by salekey as well in case of any ties within - -- price, date, and pin. ROW_NUMBER() OVER ( PARTITION BY sales.parid, @@ -206,21 +100,19 @@ unique_sales AS ( -- they're sold (we need to make sure to only include deed types we -- want). These sales are unecessary for modeling and may be -- duplicates. We need to order by salekey as well in case of any - -- ties within price, date, and pin. This LAG calculation - -- grabs the previous sale dynamically depending on which source table - -- we are using for a given sale date - --TODO: not about ias integrity - LAG( - COALESCE(mydec_sales.mydec_date, DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d')) - ) OVER ( + -- ties within price, date, and pin. + LAG(DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d')) OVER ( PARTITION BY sales.parid, sales.price, - CASE WHEN COALESCE(sales.instrtyp, mydec_sales.mydec_deed_type) NOT IN ('03', '04', '06') THEN 1 ELSE 0 END - ORDER BY - COALESCE(mydec_sales.mydec_date, DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d')) ASC, - sales.salekey ASC + sales.instrtyp NOT IN ('03', '04', '06') + ORDER BY sales.saledt ASC, sales.salekey ASC ) AS same_price_earlier_date, + -- Historically, this view filtered out sales less than $10k and + -- as well as quit claims, executor deeds, beneficial interests, + -- and NULL deed types. Now we create "legacy" filter columns so + -- that this filtering can reproduced while still allowing all sales + -- into the view. sales.price <= 10000 AS sale_filter_less_than_10k, COALESCE( sales.instrtyp IN ('03', '04', '06') OR sales.instrtyp IS NULL, @@ -234,8 +126,6 @@ unique_sales AS ( town_class AS tc ON sales.parid = tc.parid AND SUBSTR(sales.saledt, 1, 4) = tc.taxyr - LEFT JOIN mydec_sales - ON NULLIF(REPLACE(sales.instruno, 'D', ''), '') = mydec_sales.doc_no WHERE sales.instruno IS NOT NULL AND sales.deactivat IS NULL AND sales.cur = 'Y' @@ -248,7 +138,82 @@ unique_sales AS ( WHERE max_price = 1 AND (bad_doc_no = 1 OR is_multisale = TRUE) ), - +mydec_sales AS ( + SELECT * FROM ( + SELECT + REPLACE(document_number, 'D', '') AS doc_no, + REPLACE(line_1_primary_pin, '-', '') AS pin, + DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS sale_date, + SUBSTR(line_4_instrument_date, 1, 4) AS year, + line_5_instrument_type AS mydec_deed_type, + NULLIF(TRIM(seller_name), '') AS seller_name, + NULLIF(TRIM(buyer_name), '') AS buyer_name, + CAST(line_11_full_consideration AS BIGINT) AS sale_price, + line_2_total_parcels AS num_parcels_sale, + COALESCE(line_2_total_parcels > 1, FALSE) AS is_multisale, + COALESCE(line_7_property_advertised = 1, FALSE) + AS mydec_property_advertised, + COALESCE(line_10a = 1, FALSE) + AS mydec_is_installment_contract_fulfilled, + COALESCE(line_10b = 1, FALSE) --noqa + AS mydec_is_sale_between_related_individuals_or_corporate_affiliates, --noqa + COALESCE(line_10c = 1, FALSE) + AS mydec_is_transfer_of_less_than_100_percent_interest, + COALESCE(line_10d = 1, FALSE) + AS mydec_is_court_ordered_sale, + COALESCE(line_10e = 1, FALSE) + AS mydec_is_sale_in_lieu_of_foreclosure, + COALESCE(line_10f = 1, FALSE) + AS mydec_is_condemnation, + COALESCE(line_10g = 1, FALSE) + AS mydec_is_short_sale, + COALESCE(line_10h = 1, FALSE) + AS mydec_is_bank_reo_real_estate_owned, + COALESCE(line_10i = 1, FALSE) + AS mydec_is_auction_sale, + COALESCE(line_10j = 1, FALSE) + AS mydec_is_seller_buyer_a_relocation_company, + COALESCE(line_10k = 1, FALSE) + AS mydec_is_seller_buyer_a_financial_institution_or_government_agency, --noqa + COALESCE(line_10l = 1, FALSE) + AS mydec_is_buyer_a_real_estate_investment_trust, + COALESCE(line_10m = 1, FALSE) + AS mydec_is_buyer_a_pension_fund, + COALESCE(line_10n = 1, FALSE) + AS mydec_is_buyer_an_adjacent_property_owner, + COALESCE(line_10o = 1, FALSE) + AS mydec_is_buyer_exercising_an_option_to_purchase, + COALESCE(line_10p = 1, FALSE) + AS mydec_is_simultaneous_trade_of_property, + COALESCE(line_10q = 1, FALSE) + AS mydec_is_sale_leaseback, + COALESCE(line_10s = 1, FALSE) + AS mydec_is_homestead_exemption, + line_10s_generalalternative + AS mydec_homestead_exemption_general_alternative, + line_10s_senior_citizens + AS mydec_homestead_exemption_senior_citizens, + line_10s_senior_citizens_assessment_freeze + AS mydec_homestead_exemption_senior_citizens_assessment_freeze, + ( + COALESCE(line_10b, 0) + COALESCE(line_10c, 0) + + COALESCE(line_10d, 0) + COALESCE(line_10e, 0) + + COALESCE(line_10f, 0) + COALESCE(line_10g, 0) + + COALESCE(line_10h, 0) + COALESCE(line_10i, 0) + + COALESCE(line_10k, 0) + ) > 0 AS sale_filter_ptax_flag, + COUNT() OVER ( + PARTITION BY line_1_primary_pin, line_4_instrument_date + ) AS num_single_day_sales + FROM {{ source('sale', 'mydec') }} + WHERE line_2_total_parcels = 1 + ) + /* Some sales in mydec have multiple rows for one pin on a given sale date. + Sometimes they have different dates than iasworld prior to 2021 and when + joined back onto unique_sales will create duplicates by pin/sale date. */ + WHERE num_single_day_sales = 1 + OR (YEAR(sale_date) > 2020) +), max_version_flag AS ( SELECT meta_sale_document_num, @@ -256,7 +221,6 @@ max_version_flag AS ( FROM {{ source('sale', 'flag') }} GROUP BY meta_sale_document_num ), - sales_val AS ( SELECT sf.meta_sale_document_num, @@ -274,45 +238,36 @@ sales_val AS ( ON sf.meta_sale_document_num = mv.meta_sale_document_num AND sf.version = mv.max_version ), - -combined_sales AS ( +-- Introducing cte_sales to precompute the coalesced values +cte_sales AS ( SELECT + -- Precompute coalesced columns COALESCE(uq_sales.pin, md_sales.pin) AS pin_coalesced, - CASE - WHEN md_sales.mydec_date IS NOT NULL - AND (uq_sales.ias_sale_date IS NULL OR md_sales.mydec_date != uq_sales.ias_sale_date) - THEN md_sales.year - ELSE uq_sales.year - END AS year, + COALESCE(uq_sales.year, md_sales.year) AS year_coalesced, COALESCE(uq_sales.township_code, tc.township_code) - AS township_code_coalesced, + AS township_code_coalesced, --noqa COALESCE(uq_sales.nbhd, tc.nbhd) AS nbhd_coalesced, COALESCE(uq_sales.class, tc.class) AS class_coalesced, - CASE - WHEN uq_sales.year < '2021' - THEN COALESCE(md_sales.mydec_date, uq_sales.ias_sale_date) - ELSE COALESCE(uq_sales.ias_sale_date, md_sales.mydec_date) + CASE --noqa + WHEN + uq_sales.year < '2021' + THEN COALESCE(md_sales.sale_date, uq_sales.sale_date) + ELSE COALESCE(uq_sales.sale_date, md_sales.sale_date) END AS sale_date_coalesced, CASE - WHEN uq_sales.doc_no IS NOT NULL THEN - CASE - WHEN COALESCE( - md_sales.mydec_date IS NOT NULL - OR YEAR(uq_sales.ias_sale_date) >= 2021, - FALSE - ) THEN TRUE - ELSE FALSE - END - ELSE - TRUE + WHEN (uq_sales.year < '2021' OR uq_sales.sale_date IS NULL) + AND md_sales.sale_date IS NOT NULL + THEN TRUE + WHEN (uq_sales.year >= '2021' OR md_sales.sale_date IS NULL) + AND uq_sales.sale_date IS NOT NULL --noqa + THEN FALSE END AS is_mydec_date, COALESCE(uq_sales.sale_price, md_sales.sale_price) - AS sale_price_coalesced, + AS sale_price_coalesced, --noqa uq_sales.sale_key, - COALESCE(uq_sales.doc_no, md_sales.doc_no) AS doc_no_coalesced, + COALESCE(uq_sales.doc_no, md_sales.doc_no) AS doc_no_coalesced, --noqa COALESCE(uq_sales.deed_type, md_sales.mydec_deed_type) AS deed_type_coalesced, - uq_sales.deed_type AS deed_type_ias, COALESCE(uq_sales.seller_name, md_sales.seller_name) AS seller_name_coalesced, COALESCE(uq_sales.is_multisale, md_sales.is_multisale) @@ -330,7 +285,7 @@ combined_sales AS ( md_sales.sale_filter_ptax_flag, md_sales.mydec_property_advertised, md_sales.mydec_is_installment_contract_fulfilled, - md_sales.mydec_is_sale_between_related_individuals_or_corporate_affiliates, + md_sales.mydec_is_sale_between_related_individuals_or_corporate_affiliates, --noqa md_sales.mydec_is_transfer_of_less_than_100_percent_interest, md_sales.mydec_is_court_ordered_sale, md_sales.mydec_is_sale_in_lieu_of_foreclosure, @@ -339,7 +294,7 @@ combined_sales AS ( md_sales.mydec_is_bank_reo_real_estate_owned, md_sales.mydec_is_auction_sale, md_sales.mydec_is_seller_buyer_a_relocation_company, - md_sales.mydec_is_seller_buyer_a_financial_institution_or_government_agency, + md_sales.mydec_is_seller_buyer_a_financial_institution_or_government_agency, --noqa md_sales.mydec_is_buyer_a_real_estate_investment_trust, md_sales.mydec_is_buyer_a_pension_fund, md_sales.mydec_is_buyer_an_adjacent_property_owner, @@ -349,33 +304,47 @@ combined_sales AS ( md_sales.mydec_is_homestead_exemption, md_sales.mydec_homestead_exemption_general_alternative, md_sales.mydec_homestead_exemption_senior_citizens, - md_sales.mydec_homestead_exemption_senior_citizens_assessment_freeze, - -- Include sale_filter_same_sale_within_365 from both sources - uq_sales.sale_filter_same_sale_within_365, - -- Include sale_filter_less_than_10k and sale_filter_deed_type - -- Use appropriate values based on source - CASE - WHEN uq_sales.doc_no IS NOT NULL THEN uq_sales.sale_filter_less_than_10k - ELSE (md_sales.sale_price <= 10000) - END AS sale_filter_less_than_10k, - CASE - WHEN uq_sales.doc_no IS NOT NULL THEN uq_sales.sale_filter_deed_type - ELSE ( - md_sales.mydec_deed_type IN ('03', '04', '06') - OR md_sales.mydec_deed_type IS NULL - ) - END AS sale_filter_deed_type + md_sales.mydec_homestead_exemption_senior_citizens_assessment_freeze FROM unique_sales AS uq_sales - FULL OUTER JOIN mydec_sales AS md_sales - ON uq_sales.doc_no = md_sales.doc_no + FULL OUTER JOIN mydec_sales AS md_sales ON uq_sales.doc_no = md_sales.doc_no LEFT JOIN town_class AS tc ON COALESCE(uq_sales.pin, md_sales.pin) = tc.parid AND COALESCE(uq_sales.year, md_sales.year) = tc.taxyr +), +-- Handle various filters +combined_sales AS ( + SELECT + cte_s.*, + -- Calculate 'sale_filter_same_sale_within_365' using DATE_DIFF + CASE + WHEN LAG(cte_s.sale_date_coalesced) OVER ( + PARTITION BY cte_s.pin_coalesced, cte_s.sale_price_coalesced + ORDER BY cte_s.sale_date_coalesced ASC + ) IS NOT NULL + THEN + DATE_DIFF( + 'day', + LAG(cte_s.sale_date_coalesced) OVER ( + PARTITION BY + cte_s.pin_coalesced, cte_s.sale_price_coalesced + ORDER BY cte_s.sale_date_coalesced ASC + ), + cte_s.sale_date_coalesced + ) <= 365 + ELSE FALSE + END AS sale_filter_same_sale_within_365, + -- Compute 'sale_filter_less_than_10k' + (cte_s.sale_price_coalesced <= 10000) AS sale_filter_less_than_10k, + -- Compute 'sale_filter_deed_type' + ( + cte_s.deed_type_coalesced IN ('03', '04', '06') + OR cte_s.deed_type_coalesced IS NULL + ) AS sale_filter_deed_type + FROM cte_sales AS cte_s ) - SELECT cs.pin_coalesced AS pin, - cs.year, + cs.year_coalesced AS year, cs.township_code_coalesced AS township_code, cs.nbhd_coalesced AS nbhd, cs.class_coalesced AS class, @@ -402,7 +371,7 @@ SELECT cs.sale_filter_ptax_flag, cs.mydec_property_advertised, cs.mydec_is_installment_contract_fulfilled, - cs.mydec_is_sale_between_related_individuals_or_corporate_affiliates, + cs.mydec_is_sale_between_related_individuals_or_corporate_affiliates, --noqa cs.mydec_is_transfer_of_less_than_100_percent_interest, cs.mydec_is_court_ordered_sale, cs.mydec_is_sale_in_lieu_of_foreclosure, @@ -411,7 +380,7 @@ SELECT cs.mydec_is_bank_reo_real_estate_owned, cs.mydec_is_auction_sale, cs.mydec_is_seller_buyer_a_relocation_company, - cs.mydec_is_seller_buyer_a_financial_institution_or_government_agency, + cs.mydec_is_seller_buyer_a_financial_institution_or_government_agency, --noqa cs.mydec_is_buyer_a_real_estate_investment_trust, cs.mydec_is_buyer_a_pension_fund, cs.mydec_is_buyer_an_adjacent_property_owner, @@ -433,4 +402,4 @@ SELECT cs.source FROM combined_sales AS cs LEFT JOIN sales_val - ON cs.doc_no_coalesced = sales_val.meta_sale_document_num; + ON cs.doc_no_coalesced = sales_val.meta_sale_document_num; \ No newline at end of file From 5b1c3929686eb959a1d5c92ef483a3a76f189de7 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Fri, 11 Oct 2024 14:54:57 +0000 Subject: [PATCH 108/126] Switch mydec logic --- dbt/models/default/default.vw_pin_sale.sql | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index a22a931a4..4992b1486 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -255,12 +255,19 @@ cte_sales AS ( ELSE COALESCE(uq_sales.sale_date, md_sales.sale_date) END AS sale_date_coalesced, CASE - WHEN (uq_sales.year < '2021' OR uq_sales.sale_date IS NULL) - AND md_sales.sale_date IS NOT NULL - THEN TRUE - WHEN (uq_sales.year >= '2021' OR md_sales.sale_date IS NULL) - AND uq_sales.sale_date IS NOT NULL --noqa - THEN FALSE + -- If uq_sales.doc_no is not NULL, apply the COALESCE logic + WHEN uq_sales.doc_no IS NOT NULL THEN + CASE + WHEN COALESCE( + md_sales.sale_date IS NOT NULL + OR YEAR(uq_sales.sale_date) >= 2021, + FALSE + ) THEN TRUE + ELSE FALSE + END + -- If uq_sales.doc_no is NULL, set is_mydec_date to TRUE + ELSE + TRUE END AS is_mydec_date, COALESCE(uq_sales.sale_price, md_sales.sale_price) AS sale_price_coalesced, --noqa @@ -342,6 +349,7 @@ combined_sales AS ( ) AS sale_filter_deed_type FROM cte_sales AS cte_s ) + SELECT cs.pin_coalesced AS pin, cs.year_coalesced AS year, From 8eccfea9482ed62f4ba300ddcec23a86285a2106 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Fri, 11 Oct 2024 15:10:53 +0000 Subject: [PATCH 109/126] Revert year logic --- dbt/models/default/default.vw_pin_sale.sql | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 4992b1486..f8cf5c5b1 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -243,7 +243,13 @@ cte_sales AS ( SELECT -- Precompute coalesced columns COALESCE(uq_sales.pin, md_sales.pin) AS pin_coalesced, - COALESCE(uq_sales.year, md_sales.year) AS year_coalesced, + CASE + WHEN md_sales.sale_date IS NOT NULL + AND (uq_sales.sale_date IS NULL OR md_sales.sale_date != uq_sales.sale_date) + THEN md_sales.year + ELSE uq_sales.year + END AS year, + --COALESCE(uq_sales.year, md_sales.year) AS year_coalesced, COALESCE(uq_sales.township_code, tc.township_code) AS township_code_coalesced, --noqa COALESCE(uq_sales.nbhd, tc.nbhd) AS nbhd_coalesced, From cd5f435473fd632538e3c735902aa7f34053bd28 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Fri, 11 Oct 2024 15:15:37 +0000 Subject: [PATCH 110/126] Fix ref --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index f8cf5c5b1..a5fae5dd1 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -248,7 +248,7 @@ cte_sales AS ( AND (uq_sales.sale_date IS NULL OR md_sales.sale_date != uq_sales.sale_date) THEN md_sales.year ELSE uq_sales.year - END AS year, + END AS year_coaslesced, --COALESCE(uq_sales.year, md_sales.year) AS year_coalesced, COALESCE(uq_sales.township_code, tc.township_code) AS township_code_coalesced, --noqa From 4fd17d27015f4d23cc386b1bb2bf92c2d96e545e Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Fri, 11 Oct 2024 15:17:43 +0000 Subject: [PATCH 111/126] Fix ref --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index a5fae5dd1..a2235aa8b 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -248,7 +248,7 @@ cte_sales AS ( AND (uq_sales.sale_date IS NULL OR md_sales.sale_date != uq_sales.sale_date) THEN md_sales.year ELSE uq_sales.year - END AS year_coaslesced, + END AS year_coalesced, --COALESCE(uq_sales.year, md_sales.year) AS year_coalesced, COALESCE(uq_sales.township_code, tc.township_code) AS township_code_coalesced, --noqa From a33ac1f149b89343330b7c04728718aa12c48308 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Fri, 11 Oct 2024 15:36:44 +0000 Subject: [PATCH 112/126] Remove comment --- dbt/models/default/default.vw_pin_sale.sql | 35 +++++++++++++--------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index a2235aa8b..e06be6a9d 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -17,6 +17,7 @@ WITH town_class AS ( WHERE par.cur = 'Y' AND par.deactivat IS NULL ), + calculated AS ( SELECT instruno, @@ -31,6 +32,7 @@ calculated AS ( ) GROUP BY instruno ), + unique_sales AS ( SELECT *, @@ -138,6 +140,7 @@ unique_sales AS ( WHERE max_price = 1 AND (bad_doc_no = 1 OR is_multisale = TRUE) ), + mydec_sales AS ( SELECT * FROM ( SELECT @@ -214,6 +217,7 @@ mydec_sales AS ( WHERE num_single_day_sales = 1 OR (YEAR(sale_date) > 2020) ), + max_version_flag AS ( SELECT meta_sale_document_num, @@ -221,6 +225,7 @@ max_version_flag AS ( FROM {{ source('sale', 'flag') }} GROUP BY meta_sale_document_num ), + sales_val AS ( SELECT sf.meta_sale_document_num, @@ -238,15 +243,18 @@ sales_val AS ( ON sf.meta_sale_document_num = mv.meta_sale_document_num AND sf.version = mv.max_version ), + -- Introducing cte_sales to precompute the coalesced values cte_sales AS ( SELECT - -- Precompute coalesced columns COALESCE(uq_sales.pin, md_sales.pin) AS pin_coalesced, - CASE + CASE WHEN md_sales.sale_date IS NOT NULL - AND (uq_sales.sale_date IS NULL OR md_sales.sale_date != uq_sales.sale_date) - THEN md_sales.year + AND ( + uq_sales.sale_date IS NULL + OR md_sales.sale_date != uq_sales.sale_date + ) + THEN md_sales.year ELSE uq_sales.year END AS year_coalesced, --COALESCE(uq_sales.year, md_sales.year) AS year_coalesced, @@ -262,15 +270,13 @@ cte_sales AS ( END AS sale_date_coalesced, CASE -- If uq_sales.doc_no is not NULL, apply the COALESCE logic - WHEN uq_sales.doc_no IS NOT NULL THEN - CASE - WHEN COALESCE( - md_sales.sale_date IS NOT NULL - OR YEAR(uq_sales.sale_date) >= 2021, - FALSE - ) THEN TRUE - ELSE FALSE - END + WHEN uq_sales.doc_no IS NOT NULL + THEN + COALESCE(COALESCE( + md_sales.sale_date IS NOT NULL + OR YEAR(uq_sales.sale_date) >= 2021, + FALSE + ), FALSE) -- If uq_sales.doc_no is NULL, set is_mydec_date to TRUE ELSE TRUE @@ -324,6 +330,7 @@ cte_sales AS ( ON COALESCE(uq_sales.pin, md_sales.pin) = tc.parid AND COALESCE(uq_sales.year, md_sales.year) = tc.taxyr ), + -- Handle various filters combined_sales AS ( SELECT @@ -416,4 +423,4 @@ SELECT cs.source FROM combined_sales AS cs LEFT JOIN sales_val - ON cs.doc_no_coalesced = sales_val.meta_sale_document_num; \ No newline at end of file + ON cs.doc_no_coalesced = sales_val.meta_sale_document_num; From afdc790d1c7d2e0b87eceaccf54749191a44a7b4 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Fri, 11 Oct 2024 15:45:54 +0000 Subject: [PATCH 113/126] Add docs --- dbt/models/default/default.vw_pin_sale.sql | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index e06be6a9d..6fa56eade 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -336,6 +336,9 @@ combined_sales AS ( SELECT cte_s.*, -- Calculate 'sale_filter_same_sale_within_365' using DATE_DIFF + -- Note: the sale_filter_same_sale_within_365 uses both iasworld + -- and mydec doc numbers for the calculation. So if we were to set + -- source = 'iasworld', mydec sales will still influence this filter CASE WHEN LAG(cte_s.sale_date_coalesced) OVER ( PARTITION BY cte_s.pin_coalesced, cte_s.sale_price_coalesced From 1b83450c3b425789fb1b5c5f3a5c1e1f4d457262 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Fri, 11 Oct 2024 15:50:35 +0000 Subject: [PATCH 114/126] Improve cte names --- dbt/models/default/default.vw_pin_sale.sql | 122 ++++++++++----------- 1 file changed, 61 insertions(+), 61 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 6fa56eade..ad88171fc 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -244,8 +244,8 @@ sales_val AS ( AND sf.version = mv.max_version ), --- Introducing cte_sales to precompute the coalesced values -cte_sales AS ( +-- Introducing csales to precompute the coalesced values +combined_sales AS ( SELECT COALESCE(uq_sales.pin, md_sales.pin) AS pin_coalesced, CASE @@ -332,89 +332,89 @@ cte_sales AS ( ), -- Handle various filters -combined_sales AS ( +add_filter_sales AS ( SELECT - cte_s.*, + cs.*, -- Calculate 'sale_filter_same_sale_within_365' using DATE_DIFF -- Note: the sale_filter_same_sale_within_365 uses both iasworld -- and mydec doc numbers for the calculation. So if we were to set -- source = 'iasworld', mydec sales will still influence this filter CASE - WHEN LAG(cte_s.sale_date_coalesced) OVER ( - PARTITION BY cte_s.pin_coalesced, cte_s.sale_price_coalesced - ORDER BY cte_s.sale_date_coalesced ASC + WHEN LAG(cs.sale_date_coalesced) OVER ( + PARTITION BY cs.pin_coalesced, cs.sale_price_coalesced + ORDER BY cs.sale_date_coalesced ASC ) IS NOT NULL THEN DATE_DIFF( 'day', - LAG(cte_s.sale_date_coalesced) OVER ( + LAG(cs.sale_date_coalesced) OVER ( PARTITION BY - cte_s.pin_coalesced, cte_s.sale_price_coalesced - ORDER BY cte_s.sale_date_coalesced ASC + cs.pin_coalesced, cs.sale_price_coalesced + ORDER BY cs.sale_date_coalesced ASC ), - cte_s.sale_date_coalesced + cs.sale_date_coalesced ) <= 365 ELSE FALSE END AS sale_filter_same_sale_within_365, -- Compute 'sale_filter_less_than_10k' - (cte_s.sale_price_coalesced <= 10000) AS sale_filter_less_than_10k, + (cs.sale_price_coalesced <= 10000) AS sale_filter_less_than_10k, -- Compute 'sale_filter_deed_type' ( - cte_s.deed_type_coalesced IN ('03', '04', '06') - OR cte_s.deed_type_coalesced IS NULL + cs.deed_type_coalesced IN ('03', '04', '06') + OR cs.deed_type_coalesced IS NULL ) AS sale_filter_deed_type - FROM cte_sales AS cte_s + FROM combined_sales AS cs ) SELECT - cs.pin_coalesced AS pin, - cs.year_coalesced AS year, - cs.township_code_coalesced AS township_code, - cs.nbhd_coalesced AS nbhd, - cs.class_coalesced AS class, - cs.sale_date_coalesced AS sale_date, - cs.is_mydec_date, - cs.sale_price_coalesced AS sale_price, - cs.sale_key, - cs.doc_no_coalesced AS doc_no, - cs.deed_type_coalesced AS deed_type, - cs.seller_name_coalesced AS seller_name, - cs.is_multisale_coalesced AS is_multisale, - cs.num_parcels_sale_coalesced AS num_parcels_sale, - cs.buyer_name_coalesced AS buyer_name, - cs.sale_type_coalesced AS sale_type, - cs.sale_filter_same_sale_within_365, - cs.sale_filter_less_than_10k, - cs.sale_filter_deed_type, + afs.pin_coalesced AS pin, + afs.year_coalesced AS year, + afs.township_code_coalesced AS township_code, + afs.nbhd_coalesced AS nbhd, + afs.class_coalesced AS class, + afs.sale_date_coalesced AS sale_date, + afs.is_mydec_date, + afs.sale_price_coalesced AS sale_price, + afs.sale_key, + afs.doc_no_coalesced AS doc_no, + afs.deed_type_coalesced AS deed_type, + afs.seller_name_coalesced AS seller_name, + afs.is_multisale_coalesced AS is_multisale, + afs.num_parcels_sale_coalesced AS num_parcels_sale, + afs.buyer_name_coalesced AS buyer_name, + afs.sale_type_coalesced AS sale_type, + afs.sale_filter_same_sale_within_365, + afs.sale_filter_less_than_10k, + afs.sale_filter_deed_type, -- Our sales validation pipeline only validates sales past 2014 due to MyDec -- limitations. Previous to that values for sv_is_outlier will be NULL, so -- if we want to both exclude detected outliers and include sales prior to -- 2014, we need to code everything NULL as FALSE. COALESCE(sales_val.sv_is_outlier, FALSE) AS sale_filter_is_outlier, - cs.mydec_deed_type, - cs.sale_filter_ptax_flag, - cs.mydec_property_advertised, - cs.mydec_is_installment_contract_fulfilled, - cs.mydec_is_sale_between_related_individuals_or_corporate_affiliates, --noqa - cs.mydec_is_transfer_of_less_than_100_percent_interest, - cs.mydec_is_court_ordered_sale, - cs.mydec_is_sale_in_lieu_of_foreclosure, - cs.mydec_is_condemnation, - cs.mydec_is_short_sale, - cs.mydec_is_bank_reo_real_estate_owned, - cs.mydec_is_auction_sale, - cs.mydec_is_seller_buyer_a_relocation_company, - cs.mydec_is_seller_buyer_a_financial_institution_or_government_agency, --noqa - cs.mydec_is_buyer_a_real_estate_investment_trust, - cs.mydec_is_buyer_a_pension_fund, - cs.mydec_is_buyer_an_adjacent_property_owner, - cs.mydec_is_buyer_exercising_an_option_to_purchase, - cs.mydec_is_simultaneous_trade_of_property, - cs.mydec_is_sale_leaseback, - cs.mydec_is_homestead_exemption, - cs.mydec_homestead_exemption_general_alternative, - cs.mydec_homestead_exemption_senior_citizens, - cs.mydec_homestead_exemption_senior_citizens_assessment_freeze, + afs.mydec_deed_type, + afs.sale_filter_ptax_flag, + afs.mydec_property_advertised, + afs.mydec_is_installment_contract_fulfilled, + afs.mydec_is_sale_between_related_individuals_or_corporate_affiliates, --noqa + afs.mydec_is_transfer_of_less_than_100_percent_interest, + afs.mydec_is_court_ordered_sale, + afs.mydec_is_sale_in_lieu_of_foreclosure, + afs.mydec_is_condemnation, + afs.mydec_is_short_sale, + afs.mydec_is_bank_reo_real_estate_owned, + afs.mydec_is_auction_sale, + afs.mydec_is_seller_buyer_a_relocation_company, + afs.mydec_is_seller_buyer_a_financial_institution_or_government_agency, --noqa + afs.mydec_is_buyer_a_real_estate_investment_trust, + afs.mydec_is_buyer_a_pension_fund, + afs.mydec_is_buyer_an_adjacent_property_owner, + afs.mydec_is_buyer_exercising_an_option_to_purchase, + afs.mydec_is_simultaneous_trade_of_property, + afs.mydec_is_sale_leaseback, + afs.mydec_is_homestead_exemption, + afs.mydec_homestead_exemption_general_alternative, + afs.mydec_homestead_exemption_senior_citizens, + afs.mydec_homestead_exemption_senior_citizens_assessment_freeze, sales_val.sv_is_outlier, sales_val.sv_is_ptax_outlier, sales_val.sv_is_heuristic_outlier, @@ -423,7 +423,7 @@ SELECT sales_val.sv_outlier_reason3, sales_val.sv_run_id, sales_val.sv_version, - cs.source -FROM combined_sales AS cs + afs.source +FROM add_filter_sales AS afs LEFT JOIN sales_val - ON cs.doc_no_coalesced = sales_val.meta_sale_document_num; + ON afs.doc_no_coalesced = sales_val.meta_sale_document_num; From 8e0f7c3fbb95faf25391d6150b44b789a9f2b4a2 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 15 Oct 2024 18:49:57 +0000 Subject: [PATCH 115/126] Add some feedback --- dbt/models/default/default.vw_pin_sale.sql | 24 ++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index ad88171fc..94fd167ef 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -1,4 +1,5 @@ -- View containing unique, filtered sales +-- Class and township of associated PIN WITH town_class AS ( SELECT par.parid, @@ -18,6 +19,8 @@ WITH town_class AS ( AND par.deactivat IS NULL ), +-- "nopar" isn't entirely accurate for sales associated with only one parcel, +-- so we create our own counter calculated AS ( SELECT instruno, @@ -36,6 +39,9 @@ calculated AS ( unique_sales AS ( SELECT *, + -- Historically, this view excluded sales for a given pin if it had sold + -- within the last 12 months for the same price. This filter allows us + -- to filter out those sales. COALESCE( DATE_DIFF( 'day', @@ -60,6 +66,7 @@ unique_sales AS ( sales.nopar > 1 OR calculated.nopar_calculated > 1, FALSE ) AS is_multisale, + -- "nopar" is number of parcels sold CASE WHEN sales.nopar > 1 THEN sales.nopar ELSE calculated.nopar_calculated @@ -76,6 +83,11 @@ unique_sales AS ( WHEN sales.saletype = '0' THEN 'LAND' WHEN sales.saletype = '1' THEN 'LAND AND BUILDING' END AS sale_type, + -- Sales are not entirely unique by pin/date so we group all + -- sales by pin/date, then order by descending price + -- and give the top observation a value of 1 for "max_price". + -- We need to order by salekey as well in case of any ties within + -- price, date, and pin. ROW_NUMBER() OVER ( PARTITION BY sales.parid, @@ -147,7 +159,6 @@ mydec_sales AS ( REPLACE(document_number, 'D', '') AS doc_no, REPLACE(line_1_primary_pin, '-', '') AS pin, DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS sale_date, - SUBSTR(line_4_instrument_date, 1, 4) AS year, line_5_instrument_type AS mydec_deed_type, NULLIF(TRIM(seller_name), '') AS seller_name, NULLIF(TRIM(buyer_name), '') AS buyer_name, @@ -207,7 +218,8 @@ mydec_sales AS ( ) > 0 AS sale_filter_ptax_flag, COUNT() OVER ( PARTITION BY line_1_primary_pin, line_4_instrument_date - ) AS num_single_day_sales + ) AS num_single_day_sales, + year_of_sale as year FROM {{ source('sale', 'mydec') }} WHERE line_2_total_parcels = 1 ) @@ -244,7 +256,12 @@ sales_val AS ( AND sf.version = mv.max_version ), --- Introducing csales to precompute the coalesced values +-- For many of the fields we used simple coalesce statement, +-- but some data is a bit more complicated. Prior to 2021, +-- mydec sales and iasworld sales used different sale dates. +-- We preference the mydec sale as they are believed to be more +-- accurate. As of 2021, iasworld utilizes mydec sales, which means +-- we can prioritize iasworld data instead of mydec data. combined_sales AS ( SELECT COALESCE(uq_sales.pin, md_sales.pin) AS pin_coalesced, @@ -257,7 +274,6 @@ combined_sales AS ( THEN md_sales.year ELSE uq_sales.year END AS year_coalesced, - --COALESCE(uq_sales.year, md_sales.year) AS year_coalesced, COALESCE(uq_sales.township_code, tc.township_code) AS township_code_coalesced, --noqa COALESCE(uq_sales.nbhd, tc.nbhd) AS nbhd_coalesced, From d089352f9af8119187866f5a69cf2782f40b4091 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 15 Oct 2024 19:23:56 +0000 Subject: [PATCH 116/126] Add more docs --- dbt/models/default/default.vw_pin_sale.sql | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 94fd167ef..612ca8938 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -219,7 +219,7 @@ mydec_sales AS ( COUNT() OVER ( PARTITION BY line_1_primary_pin, line_4_instrument_date ) AS num_single_day_sales, - year_of_sale as year + year_of_sale AS year FROM {{ source('sale', 'mydec') }} WHERE line_2_total_parcels = 1 ) @@ -256,15 +256,17 @@ sales_val AS ( AND sf.version = mv.max_version ), --- For many of the fields we used simple coalesce statement, --- but some data is a bit more complicated. Prior to 2021, --- mydec sales and iasworld sales used different sale dates. --- We preference the mydec sale as they are believed to be more --- accurate. As of 2021, iasworld utilizes mydec sales, which means --- we can prioritize iasworld data instead of mydec data. +-- CTE to coalesce iasworld and mydec values prior to +-- constructing filters that depend on coalesced fields combined_sales AS ( SELECT COALESCE(uq_sales.pin, md_sales.pin) AS pin_coalesced, + -- For many of the fields we used simple coalesce statement, + -- but some data is a bit more complicated. Prior to 2021, + -- mydec sales and iasworld sales used different sale dates. + -- We preference the mydec sale as they are believed to be more + -- accurate. As of 2021, iasworld utilizes mydec sales, which means + -- we can prioritize iasworld data instead of mydec data. CASE WHEN md_sales.sale_date IS NOT NULL AND ( @@ -341,6 +343,9 @@ combined_sales AS ( md_sales.mydec_homestead_exemption_senior_citizens, md_sales.mydec_homestead_exemption_senior_citizens_assessment_freeze FROM unique_sales AS uq_sales + -- If a doc_no exists in iasworld and mydec, we prioritize iasworld, + -- if it only exists in mydec, we will grab the doc_no from mydec. The + -- 'source' column lets us know which table the doc_no came from. FULL OUTER JOIN mydec_sales AS md_sales ON uq_sales.doc_no = md_sales.doc_no LEFT JOIN town_class AS tc ON COALESCE(uq_sales.pin, md_sales.pin) = tc.parid From 96f6e1dd3f1b71509d2a654bf3c584cc8f865610 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 23 Oct 2024 16:39:58 +0000 Subject: [PATCH 117/126] Add PR changes --- dbt/models/default/default.vw_pin_sale.sql | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 612ca8938..7cea6a377 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -221,13 +221,12 @@ mydec_sales AS ( ) AS num_single_day_sales, year_of_sale AS year FROM {{ source('sale', 'mydec') }} - WHERE line_2_total_parcels = 1 ) /* Some sales in mydec have multiple rows for one pin on a given sale date. Sometimes they have different dates than iasworld prior to 2021 and when joined back onto unique_sales will create duplicates by pin/sale date. */ WHERE num_single_day_sales = 1 - OR (YEAR(sale_date) > 2020) + OR year > 2020 ), max_version_flag AS ( From d6579aa76740e8d625b9268a078e5dbc7d04c98c Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 23 Oct 2024 16:48:41 +0000 Subject: [PATCH 118/126] Add year cast --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 7cea6a377..0c7394935 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -226,7 +226,7 @@ mydec_sales AS ( Sometimes they have different dates than iasworld prior to 2021 and when joined back onto unique_sales will create duplicates by pin/sale date. */ WHERE num_single_day_sales = 1 - OR year > 2020 + OR YEAR(year) > 2020 ), max_version_flag AS ( From ad9af37c18592c7e4d7bddc228f6c9da18665852 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 23 Oct 2024 17:17:22 +0000 Subject: [PATCH 119/126] Fix year calc --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 0c7394935..b6c8c78e0 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -226,7 +226,7 @@ mydec_sales AS ( Sometimes they have different dates than iasworld prior to 2021 and when joined back onto unique_sales will create duplicates by pin/sale date. */ WHERE num_single_day_sales = 1 - OR YEAR(year) > 2020 + OR YEAR(DATE_PARSE(year, '%Y')) > 2020 ), max_version_flag AS ( From cd0d5ebfc5d4878fa2c2ed2c7e754f09d7f0df26 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 23 Oct 2024 17:24:56 +0000 Subject: [PATCH 120/126] Add documentation --- dbt/models/default/default.vw_pin_sale.sql | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index b6c8c78e0..48121aa01 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -342,9 +342,12 @@ combined_sales AS ( md_sales.mydec_homestead_exemption_senior_citizens, md_sales.mydec_homestead_exemption_senior_citizens_assessment_freeze FROM unique_sales AS uq_sales + -- This logic brings in mydec sales that aren't in iasworld. -- If a doc_no exists in iasworld and mydec, we prioritize iasworld, -- if it only exists in mydec, we will grab the doc_no from mydec. The - -- 'source' column lets us know which table the doc_no came from. + -- 'source' column lets us know which table the doc_no came from and allows + -- us to filter for only iasworld sales or for mydec sales that aren't in + -- iasworld already. FULL OUTER JOIN mydec_sales AS md_sales ON uq_sales.doc_no = md_sales.doc_no LEFT JOIN town_class AS tc ON COALESCE(uq_sales.pin, md_sales.pin) = tc.parid From 7a224a58c0c8c3e907929fa6ca3b364cad837f0a Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 23 Oct 2024 17:29:07 +0000 Subject: [PATCH 121/126] Add nuance info --- dbt/models/default/docs.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbt/models/default/docs.md b/dbt/models/default/docs.md index f45cd9df4..202117265 100644 --- a/dbt/models/default/docs.md +++ b/dbt/models/default/docs.md @@ -156,6 +156,9 @@ Sourced from `iasworld.sales`, which is sourced from to `iasworld.sales` (which is only parcel-level) without creating duplicates - Sales are unique by `doc_no` if multisales are excluded. When multisales are _not_ excluded, sales are unique by `doc_no` and `pin`. +- We include iasworld sales and mydec sales only if the mydec sale isn't already + present in iasworld (calculated by doc_no). This allows us to use mydec sales + for analysis or modeling if the iasworld sales ingest is lags behind mydec. ### Lineage From cf96232e9879f54316cb0659d8e1b979caee75b8 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 4 Nov 2024 16:53:45 +0000 Subject: [PATCH 122/126] Standardize year logic --- dbt/models/default/default.vw_pin_sale.sql | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 48121aa01..00b2cf120 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -267,13 +267,9 @@ combined_sales AS ( -- accurate. As of 2021, iasworld utilizes mydec sales, which means -- we can prioritize iasworld data instead of mydec data. CASE - WHEN md_sales.sale_date IS NOT NULL - AND ( - uq_sales.sale_date IS NULL - OR md_sales.sale_date != uq_sales.sale_date - ) - THEN md_sales.year - ELSE uq_sales.year + WHEN uq_sales.year < '2021' + THEN COALESCE(md_sales.year, uq_sales.year) + ELSE COALESCE(uq_sales.year, md_sales.year) END AS year_coalesced, COALESCE(uq_sales.township_code, tc.township_code) AS township_code_coalesced, --noqa From 6afe4e37e8952ac5baae06093d77f309c362160b Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 4 Nov 2024 16:59:22 +0000 Subject: [PATCH 123/126] Remove redundant coalesce --- dbt/models/default/default.vw_pin_sale.sql | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 00b2cf120..3b5e3d2c2 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -282,18 +282,15 @@ combined_sales AS ( ELSE COALESCE(uq_sales.sale_date, md_sales.sale_date) END AS sale_date_coalesced, CASE - -- If uq_sales.doc_no is not NULL, apply the COALESCE logic + -- If uq_sales.doc_no is not NULL, apply the COALESCE logic WHEN uq_sales.doc_no IS NOT NULL - THEN - COALESCE(COALESCE( - md_sales.sale_date IS NOT NULL - OR YEAR(uq_sales.sale_date) >= 2021, + THEN COALESCE( + md_sales.sale_date IS NOT NULL OR YEAR(uq_sales.sale_date) >= 2021, FALSE - ), FALSE) - -- If uq_sales.doc_no is NULL, set is_mydec_date to TRUE + ) ELSE TRUE - END AS is_mydec_date, + END AS is_mydec_date COALESCE(uq_sales.sale_price, md_sales.sale_price) AS sale_price_coalesced, --noqa uq_sales.sale_key, From 4de96f87bf50f4aea6983722409bf44e5854b40c Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 4 Nov 2024 17:03:56 +0000 Subject: [PATCH 124/126] Add missing comma --- dbt/models/default/default.vw_pin_sale.sql | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 3b5e3d2c2..004a18415 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -285,12 +285,13 @@ combined_sales AS ( -- If uq_sales.doc_no is not NULL, apply the COALESCE logic WHEN uq_sales.doc_no IS NOT NULL THEN COALESCE( - md_sales.sale_date IS NOT NULL OR YEAR(uq_sales.sale_date) >= 2021, + md_sales.sale_date IS NOT NULL + OR YEAR(uq_sales.sale_date) >= 2021, FALSE ) ELSE TRUE - END AS is_mydec_date + END AS is_mydec_date, COALESCE(uq_sales.sale_price, md_sales.sale_price) AS sale_price_coalesced, --noqa uq_sales.sale_key, From a2375c61125b79513ef6576ba2a50bc4b359652c Mon Sep 17 00:00:00 2001 From: wagnerlmichael <93889413+wagnerlmichael@users.noreply.github.com> Date: Mon, 4 Nov 2024 11:14:36 -0600 Subject: [PATCH 125/126] Update year call dbt/models/default/default.vw_pin_sale.sql Co-authored-by: William Ridgeway <10358980+wrridgeway@users.noreply.github.com> --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 004a18415..d6b353a87 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -226,7 +226,7 @@ mydec_sales AS ( Sometimes they have different dates than iasworld prior to 2021 and when joined back onto unique_sales will create duplicates by pin/sale date. */ WHERE num_single_day_sales = 1 - OR YEAR(DATE_PARSE(year, '%Y')) > 2020 + OR year > 2020 ), max_version_flag AS ( From 2e7de19945ab0e2c603e6aa15c43d712cc0be3b6 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Mon, 4 Nov 2024 17:17:18 +0000 Subject: [PATCH 126/126] Try wrapping 2020 in single quotes --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index d6b353a87..1fab60b2b 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -226,7 +226,7 @@ mydec_sales AS ( Sometimes they have different dates than iasworld prior to 2021 and when joined back onto unique_sales will create duplicates by pin/sale date. */ WHERE num_single_day_sales = 1 - OR year > 2020 + OR year > '2020' ), max_version_flag AS (