Skip to content

Commit

Permalink
Add homeowner exemption legacy extract to data lake (#611)
Browse files Browse the repository at this point in the history
* Add raw OWNR data table to `ccao.legacy`

* Add print statement to OWNR transformation

* Remove unnecessary comments from ccao-legacy.R
  • Loading branch information
jeancochrane authored Oct 4, 2024
1 parent f7ef7fc commit 5445b63
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 0 deletions.
8 changes: 8 additions & 0 deletions dbt/models/ccao/docs.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@ MV 08/18/2024.
**Primary Key**: `pin`, `year`
{% enddocs %}

# cc_pifdb_piexemptre_ownr

{% docs table_cc_pifdb_piexemptre_ownr %}
Legacy homestead exemption data pulled by BoT. Provided via MV 10/01/2024.

**Primary Key**: `pin`, `year`
{% enddocs %}

# cc_pifdb_piexemptre_sted

{% docs table_cc_pifdb_piexemptre_sted %}
Expand Down
3 changes: 3 additions & 0 deletions dbt/models/ccao/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ sources:
- name: cc_pifdb_piexemptre_dise
description: '{{ doc("table_cc_pifdb_piexemptre_dise") }}'

- name: cc_pifdb_piexemptre_ownr
description: '{{ doc("table_cc_pifdb_piexemptre_ownr") }}'

- name: cc_pifdb_piexemptre_sted
description: '{{ doc("table_cc_pifdb_piexemptre_sted") }}'

Expand Down
85 changes: 85 additions & 0 deletions etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-legacy.R
Original file line number Diff line number Diff line change
Expand Up @@ -262,3 +262,88 @@ cc_pifdb_piexemptre_dise %>%
hive_style = TRUE,
compression = "zstd"
)


##### CC_PIFDB_PIEXEMPTRE_OWNR #####
files_cc_pifdb_piexemptre_ownr <- aws.s3::get_bucket_df(
bucket = AWS_S3_RAW_BUCKET,
prefix = "ccao/legacy/CC_PIFDB_PIEXEMPTRE_OWNR",
max = Inf
) %>%
filter(Size > 0)

cc_pifdb_piexemptre_ownr <- map_dfr(files_cc_pifdb_piexemptre_ownr$Key, \(f) {
print(glue::glue("Transforming {f}"))
aws.s3::s3read_using(
object = f,
bucket = AWS_S3_RAW_BUCKET,
FUN = readr::read_fwf,
trim_ws = TRUE,
col_positions = readr::fwf_cols(
pin = c(1, 14),
year = c(16, 17),
tax_year = c(19, 20),
tax_type = c(22, 22),
segment_code = c(24, 24),
printed_indicator = c(26, 26),
response = c(28, 28),
year_applied = c(30, 33),
maintenance_indicator = c(35, 35),
proration_factor = c(37, 43),
coop_quantity = c(45, 49),
coop_status = c(51, 51),
equalized_factor = c(53, 57),
assessed_value = c(59, 67),
equalized_value = c(69, 77),
batch_number = c(79, 83),
occupancy_factor = c(85, 89),
exemption_amount = c(91, 99),
exemption_base_year = c(101, 104),
exemption_status = c(106, 107),
filler = c(109, 234)
),
col_types = cols(
pin = col_character(),
year = col_character(),
tax_year = col_character(),
tax_type = col_character(),
segment_code = col_character(),
printed_indicator = col_character(),
response = col_character(),
year_applied = col_character(),
maintenance_indicator = col_character(),
proration_factor = col_integer(),
coop_quantity = col_integer(),
coop_status = col_character(),
equalized_factor = col_integer(),
assessed_value = col_integer(),
equalized_value = col_integer(),
batch_number = col_integer(),
occupancy_factor = col_integer(),
exemption_amount = col_integer(),
exemption_base_year = col_character(),
exemption_status = col_character(),
filler = col_character()
)
) %>%
mutate(
across(
c(year, tax_year),
\(x) ifelse(substr(x, 1, 1) == "9", paste0("19", x), paste0("20", x))
),
source_file = {{ f }}
) %>%
select(-filler)
})

# Write the files to S3, partitioned by year
cc_pifdb_piexemptre_ownr %>%
group_by(year) %>%
arrow::write_dataset(
path = file.path(
output_bucket, "cc_pifdb_piexemptre_ownr"
),
format = "parquet",
hive_style = TRUE,
compression = "zstd"
)

0 comments on commit 5445b63

Please sign in to comment.