From be88f45c12f3489c2f6608437605020ed3ca5444 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Fri, 25 Oct 2024 21:52:31 +0000 Subject: [PATCH] Add comment about manual cleaning to `ccao-legacy.R` script --- etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-legacy.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-legacy.R b/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-legacy.R index 80d3dc52e..84673e731 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-legacy.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-legacy.R @@ -272,6 +272,9 @@ files_cc_pifdb_piexemptre_ownr <- aws.s3::get_bucket_df( ) %>% filter(Size > 0) +# Read the files into a single tibble. NOTE: these files are fixed-width +# and have been MANUALLY CLEANED to remove some ASCII null characters that were +# being used instead of spaces in the base year field cc_pifdb_piexemptre_ownr <- map_dfr(files_cc_pifdb_piexemptre_ownr$Key, \(f) { print(glue::glue("Transforming {f}")) aws.s3::s3read_using(