From 14d4edfacc1a7bf36f851ec595d0d1f728dd9b07 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Thu, 3 Oct 2024 21:17:35 +0000
Subject: [PATCH 01/74] Push for raw upload

---
 .../spatial/spatial-traffic.R                 | 84 +++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100644 etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-traffic.R

diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-traffic.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-traffic.R
new file mode 100644
index 000000000..d74b02a6e
--- /dev/null
+++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-traffic.R
@@ -0,0 +1,84 @@
+# Load necessary libraries
+if (!requireNamespace("httr", quietly = TRUE)) install.packages("httr")
+if (!requireNamespace("sf", quietly = TRUE)) install.packages("sf")
+if (!requireNamespace("tools", quietly = TRUE)) install.packages("tools")
+if (!requireNamespace("aws.s3", quietly = TRUE)) install.packages("aws.s3")
+if (!requireNamespace("arrow", quietly = TRUE)) install.packages("arrow")
+
+library(aws.s3)
+library(dplyr)
+library(purrr)
+library(sf)
+library(arrow)
+
+# Define S3 bucket and paths
+AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET")
+output_bucket <- file.path(AWS_S3_RAW_BUCKET, "spatial", "environment")
+current_year <- strftime(Sys.Date(), "%Y")
+
+# Function to process each year and upload shapefiles for that specific year to S3
+process_shapefiles_for_year <- function(year) {
+  # Define the URL for the shapefile ZIP file, dynamically for each year
+  url <- paste0("https://apps1.dot.illinois.gov/gist2/gisdata/all", year, ".zip")
+
+  # Create a temporary file to store the downloaded ZIP
+  temp_zip <- tempfile(fileext = ".zip")
+  temp_dir <- tempdir()
+
+  # Use httr to download the ZIP file to a temporary location
+  response <- httr::GET(url)
+
+  # Check if the request was successful
+  if (httr::status_code(response) == 200) {
+    # Save the content of the response as a ZIP file in a temporary location
+    writeBin(httr::content(response, "raw"), temp_zip)
+    message(paste("Shapefile ZIP for year", year, "downloaded successfully."))
+
+    # Unzip the file into a temporary directory
+    utils::unzip(temp_zip, exdir = temp_dir)
+    message(paste("Shapefile for year", year, "unzipped into temporary directory."))
+
+    # List files in the unzipped directory and look for the .shp files
+    unzipped_files <- list.files(temp_dir, recursive = TRUE, full.names = TRUE)
+    shp_file_for_year <- unzipped_files[grepl(paste0("T2HWY", year), unzipped_files, ignore.case = TRUE) & grepl("\\.shp$", unzipped_files)]
+
+    # Process only the shapefile that matches the current year
+    if (length(shp_file_for_year) == 1) {
+      # Read the shapefile into the environment using sf::st_read
+      shapefile_data <- sf::st_read(shp_file_for_year)
+
+      # Create a temporary file to save the shapefile as GeoParquet for S3 upload
+      temp_parquet <- tempfile(fileext = ".parquet")
+
+      # Save the shapefile as a GeoParquet file
+      sf::write_parquet(shapefile_data, temp_parquet)
+
+      # Define remote file path in S3
+      remote_file_path <- file.path(output_bucket, paste0("T2HWY_", year, ".parquet"))
+
+      # Upload to S3 if it doesn't already exist
+      if (!aws.s3::object_exists(remote_file_path)) {
+        message(paste("Uploading T2HWY_", year, "to S3 as Parquet..."))
+        put_object(file = temp_parquet, object = remote_file_path, bucket = AWS_S3_RAW_BUCKET)
+
+        message(paste("Shapefile T2HWY", year, "uploaded to S3 at:", remote_file_path))
+      } else {
+        message(paste("Shapefile T2HWY", year, "already exists in S3, skipping upload."))
+      }
+
+      # Clean up temporary files
+      file.remove(temp_parquet)
+
+    } else {
+      message(paste("No shapefile found for year", year, "."))
+    }
+
+  } else {
+    message(paste("Failed to retrieve the file for year", year, ". Status code: ", httr::status_code(response)))
+  }
+}
+
+# Loop through the years from 2012 to the current year and process each shapefile
+for (year in 2012:current_year) {
+  process_shapefiles_for_year(year)
+}

From 2ecffe16a7059030eb549070cd5a3bfb7d823561 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Thu, 3 Oct 2024 21:25:48 +0000
Subject: [PATCH 02/74] Remove unnecessary code

---
 .../spatial/spatial-traffic.R                            | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-traffic.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-traffic.R
index d74b02a6e..f69f3f596 100644
--- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-traffic.R
+++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-traffic.R
@@ -1,10 +1,3 @@
-# Load necessary libraries
-if (!requireNamespace("httr", quietly = TRUE)) install.packages("httr")
-if (!requireNamespace("sf", quietly = TRUE)) install.packages("sf")
-if (!requireNamespace("tools", quietly = TRUE)) install.packages("tools")
-if (!requireNamespace("aws.s3", quietly = TRUE)) install.packages("aws.s3")
-if (!requireNamespace("arrow", quietly = TRUE)) install.packages("arrow")
-
 library(aws.s3)
 library(dplyr)
 library(purrr)
@@ -51,7 +44,7 @@ process_shapefiles_for_year <- function(year) {
       temp_parquet <- tempfile(fileext = ".parquet")
 
       # Save the shapefile as a GeoParquet file
-      sf::write_parquet(shapefile_data, temp_parquet)
+      sf::st_write_parquet(shapefile_data, temp_parquet)
 
       # Define remote file path in S3
       remote_file_path <- file.path(output_bucket, paste0("T2HWY_", year, ".parquet"))

From e0c05aec962037f7b2a676158a0e10f2b2265d42 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Thu, 3 Oct 2024 21:26:05 +0000
Subject: [PATCH 03/74] Testing file

---
 traffic.R | 106 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 106 insertions(+)
 create mode 100644 traffic.R

diff --git a/traffic.R b/traffic.R
new file mode 100644
index 000000000..cfa3ad30b
--- /dev/null
+++ b/traffic.R
@@ -0,0 +1,106 @@
+library(sf)
+library(DBI)
+library(noctua)
+library(dplyr)
+library(leaflet)
+library(ggplot2)
+
+shapefile <- read_sf("etl/scripts-ccao-data-raw-us-east-1/spatial/traffic_data.shp")
+
+
+filtered_data <- shapefile %>%
+  filter(COUNTY_NAM == "COOK") %>%
+  st_as_sf() %>%
+  st_transform(crs = 4326) %>%
+  mutate(AADT_STRIN = as.numeric(AADT_STRIN))
+
+ggplot() +
+  geom_sf(data = filtered_data, aes(color = AADT_STRIN), size = 1) + # Adjust size as needed
+  scale_color_viridis_c(option = "plasma", name = "AADT_STRIN") +  # Use viridis color scale for better visualization
+  labs(
+    title = "Map of AADT_STRIN in Cook County",
+    subtitle = "Visualizing Traffic Data on Cook County Roads",
+    x = "Longitude",
+    y = "Latitude"
+  ) +
+  theme_minimal() +
+  theme(
+    plot.title = element_text(hjust = 0.5),
+    plot.subtitle = element_text(hjust = 0.5)
+  )
+
+filtered_data <- st_zm(filtered_data)
+
+
+pal <- colorNumeric(palette = "viridis", domain = filtered_data$AADT_STRIN)
+
+leaflet(filtered_data) %>%
+  addProviderTiles("CartoDB.Positron") %>%  # Add a base map layer
+  addPolylines(
+    color = ~pal(AADT_STRIN),  # Use the color palette based on AADT_STRIN
+    weight = 2,                # Adjust line thickness
+    opacity = 0.7,             # Adjust line transparency
+    popup = ~paste("AADT_STRIN:", AADT_STRIN)  # Add popups to show AADT_STRIN values
+  ) %>%
+  addLegend(
+    pal = pal,
+    values = ~AADT_STRIN,
+    opacity = 0.7,
+    title = "AADT_STRIN",
+    position = "bottomright"
+  ) %>%
+  setView(lng = mean(st_coordinates(filtered_data)[, 1]),
+          lat = mean(st_coordinates(filtered_data)[, 2]),
+          zoom = 10)  # Adjust zoom level and map center
+
+con <- dbConnect(noctua::athena())
+
+# Assuming the original CRS is EPSG:3857 (Web Mercator), adjust this if necessary
+secondary_roads <- dbGetQuery(con, 'SELECT * FROM "spatial"."secondary_road" WHERE CAST(year AS INTEGER) = 2023') %>%
+  st_as_sf() %>%
+  st_set_crs(3435)
+
+
+# Leaflet map combining both filtered_data and secondary_roads
+  # Leaflet map combining both filtered_data and secondary_roads
+  leaflet() %>%
+  addProviderTiles("CartoDB.Positron") %>%  # Base map layer
+  addPolylines(
+    data = filtered_data,
+    color = ~pal(AADT_STRIN),  # Color based on AADT_STRIN
+    weight = 2,                # Adjust line thickness
+    opacity = 0.7,             # Adjust transparency
+    popup = ~paste("AADT_STRIN:", AADT_STRIN)  # Popup for AADT_STRIN
+  ) %>%
+  addPolylines(
+    data = secondary_roads,
+    color = "blue",  # Color for secondary roads
+    weight = 1,      # Adjust line thickness for secondary roads
+    opacity = 0.6,   # Adjust transparency for secondary roads
+    popup = ~paste("Road Name:", name)  # Add popups for secondary roads
+  ) %>%
+  addLegend(
+    pal = pal,
+    values = filtered_data$AADT_STRIN,
+    opacity = 0.7,
+    title = "AADT_STRIN",
+    position = "bottomright"
+  ) %>%
+  setView(
+    lng = mean(st_coordinates(filtered_data)[, 1]),
+    lat = mean(st_coordinates(filtered_data)[, 2]),
+    zoom = 10  # Adjust zoom level
+  )
+
+
+  # Step 2: Buffer the geometries by 50 feet (around filtered_data)
+  filtered_data_buffer <- st_buffer(filtered_data, dist = 50)
+
+  # Step 3: Spatial join to find intersections within 50 feet
+  joined_data <- st_join(secondary_roads, filtered_data_buffer, join = st_intersects)
+
+  # Step 4: Optionally, filter for rows where intersections occurred
+  joined_data_within_50ft <- joined_data %>%
+    filter(!is.na(AADT_STRIN))  # AADT_STRIN is from filtered_data, so this filters where the join occurred
+
+

From d145bae6f98b9d87005e8995959bab548b99a7a5 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Thu, 3 Oct 2024 21:57:35 +0000
Subject: [PATCH 04/74] Modify HWY so it looks back to 2012

---
 etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-traffic.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-traffic.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-traffic.R
index f69f3f596..ef5d9cbcf 100644
--- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-traffic.R
+++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-traffic.R
@@ -33,7 +33,7 @@ process_shapefiles_for_year <- function(year) {
 
     # List files in the unzipped directory and look for the .shp files
     unzipped_files <- list.files(temp_dir, recursive = TRUE, full.names = TRUE)
-    shp_file_for_year <- unzipped_files[grepl(paste0("T2HWY", year), unzipped_files, ignore.case = TRUE) & grepl("\\.shp$", unzipped_files)]
+    shp_file_for_year <- unzipped_files[grepl(paste0("HWY", year), unzipped_files, ignore.case = TRUE) & grepl("\\.shp$", unzipped_files)]
 
     # Process only the shapefile that matches the current year
     if (length(shp_file_for_year) == 1) {
@@ -44,7 +44,7 @@ process_shapefiles_for_year <- function(year) {
       temp_parquet <- tempfile(fileext = ".parquet")
 
       # Save the shapefile as a GeoParquet file
-      sf::st_write_parquet(shapefile_data, temp_parquet)
+      geoarrow::write_geoparquet(shapefile_data, temp_parquet)
 
       # Define remote file path in S3
       remote_file_path <- file.path(output_bucket, paste0("T2HWY_", year, ".parquet"))

From b749f3ad027b16411e7830427eb5807adebb641f Mon Sep 17 00:00:00 2001
From: Sweaty Handshake <william.ridgeway@cookcountyil.gov>
Date: Fri, 4 Oct 2024 15:49:52 +0000
Subject: [PATCH 05/74] Minor simplifications

---
 etl/renv.lock                                 | 36 ++++++++-
 .../spatial/spatial-traffic.R                 | 80 ++++++++-----------
 2 files changed, 68 insertions(+), 48 deletions(-)

diff --git a/etl/renv.lock b/etl/renv.lock
index 35498d897..80382383c 100644
--- a/etl/renv.lock
+++ b/etl/renv.lock
@@ -142,6 +142,20 @@
       ],
       "Hash": "ae4a925e0f6bb1b7e5fa96b739c5221a"
     },
+    "RSocrata": {
+      "Package": "RSocrata",
+      "Version": "1.7.15-1",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "R",
+        "httr",
+        "jsonlite",
+        "mime",
+        "plyr"
+      ],
+      "Hash": "435ebea3fa736ab1317c79a5fa34fa55"
+    },
     "Rcpp": {
       "Package": "Rcpp",
       "Version": "1.0.12",
@@ -1926,8 +1940,13 @@
     "noctua": {
       "Package": "noctua",
       "Version": "2.6.2",
-      "Source": "Repository",
-      "Repository": "CRAN",
+      "Source": "GitHub",
+      "RemoteType": "github",
+      "RemoteHost": "api.github.com",
+      "RemoteUsername": "DyfanJones",
+      "RemoteRepo": "noctua",
+      "RemoteRef": "master",
+      "RemoteSha": "23a4cfbf537407c7a1547fc13ba771ba2eb098e0",
       "Requirements": [
         "DBI",
         "R",
@@ -1938,7 +1957,7 @@
         "utils",
         "uuid"
       ],
-      "Hash": "c03d73125d695e80b35b4bb3eacf0358"
+      "Hash": "a48e1decdd027c44ea6b97b0fe0950cb"
     },
     "numDeriv": {
       "Package": "numDeriv",
@@ -2276,6 +2295,17 @@
       "Repository": "CRAN",
       "Hash": "09eb987710984fc2905c7129c7d85e65"
     },
+    "plyr": {
+      "Package": "plyr",
+      "Version": "1.8.9",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "R",
+        "Rcpp"
+      ],
+      "Hash": "6b8177fd19982f0020743fadbfdbd933"
+    },
     "png": {
       "Package": "png",
       "Version": "0.1-8",
diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-traffic.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-traffic.R
index ef5d9cbcf..a0bbf3a8a 100644
--- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-traffic.R
+++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-traffic.R
@@ -1,77 +1,67 @@
 library(aws.s3)
 library(dplyr)
+library(httr)
+library(lubridate)
 library(purrr)
 library(sf)
 library(arrow)
 
 # Define S3 bucket and paths
 AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET")
-output_bucket <- file.path(AWS_S3_RAW_BUCKET, "spatial", "environment")
+output_bucket <- file.path(AWS_S3_RAW_BUCKET, "spatial", "environment", "traffic")
 current_year <- strftime(Sys.Date(), "%Y")
 
+# Get list of available files
+years <- map(2012:year(Sys.Date()), \(x){
+  if (HEAD(paste0(
+    "https://apps1.dot.illinois.gov/gist2/gisdata/all", x, ".zip"
+  ))$status_code == 200) {
+    x
+  }
+}) %>%
+  unlist()
+
 # Function to process each year and upload shapefiles for that specific year to S3
-process_shapefiles_for_year <- function(year) {
-  # Define the URL for the shapefile ZIP file, dynamically for each year
-  url <- paste0("https://apps1.dot.illinois.gov/gist2/gisdata/all", year, ".zip")
+process_shapefiles_for_year <- map(years, \(x) {
+
+  remote_file_path <- file.path(output_bucket, paste0(x, ".parquet"))
 
-  # Create a temporary file to store the downloaded ZIP
-  temp_zip <- tempfile(fileext = ".zip")
-  temp_dir <- tempdir()
+  # Skip everything if file already exists
+  if (!object_exists(remote_file_path)) {
+    # Define the URL for the shapefile ZIP file, dynamically for each year
+    url <- paste0("https://apps1.dot.illinois.gov/gist2/gisdata/all", x, ".zip")
 
-  # Use httr to download the ZIP file to a temporary location
-  response <- httr::GET(url)
+    # Create a temporary file to store the downloaded ZIP
+    temp_zip <- tempfile(fileext = ".zip")
+    temp_dir <- tempdir()
 
-  # Check if the request was successful
-  if (httr::status_code(response) == 200) {
-    # Save the content of the response as a ZIP file in a temporary location
-    writeBin(httr::content(response, "raw"), temp_zip)
-    message(paste("Shapefile ZIP for year", year, "downloaded successfully."))
+    # Download the ZIP file to a temporary location
+    download.file(url = url, destfile = temp_zip)
+
+    message(paste("Shapefile ZIP for year", x, "downloaded successfully."))
 
     # Unzip the file into a temporary directory
-    utils::unzip(temp_zip, exdir = temp_dir)
-    message(paste("Shapefile for year", year, "unzipped into temporary directory."))
+    unzip(temp_zip, exdir = temp_dir)
+    message(paste("Shapefile for year", x, "unzipped into temporary directory."))
 
     # List files in the unzipped directory and look for the .shp files
     unzipped_files <- list.files(temp_dir, recursive = TRUE, full.names = TRUE)
-    shp_file_for_year <- unzipped_files[grepl(paste0("HWY", year), unzipped_files, ignore.case = TRUE) & grepl("\\.shp$", unzipped_files)]
+    shp_file_for_year <- unzipped_files[grepl(paste0("HWY", x), unzipped_files, ignore.case = TRUE) & grepl("\\.shp$", unzipped_files)]
 
     # Process only the shapefile that matches the current year
     if (length(shp_file_for_year) == 1) {
       # Read the shapefile into the environment using sf::st_read
       shapefile_data <- sf::st_read(shp_file_for_year)
 
-      # Create a temporary file to save the shapefile as GeoParquet for S3 upload
-      temp_parquet <- tempfile(fileext = ".parquet")
-
       # Save the shapefile as a GeoParquet file
-      geoarrow::write_geoparquet(shapefile_data, temp_parquet)
-
-      # Define remote file path in S3
-      remote_file_path <- file.path(output_bucket, paste0("T2HWY_", year, ".parquet"))
-
-      # Upload to S3 if it doesn't already exist
-      if (!aws.s3::object_exists(remote_file_path)) {
-        message(paste("Uploading T2HWY_", year, "to S3 as Parquet..."))
-        put_object(file = temp_parquet, object = remote_file_path, bucket = AWS_S3_RAW_BUCKET)
-
-        message(paste("Shapefile T2HWY", year, "uploaded to S3 at:", remote_file_path))
-      } else {
-        message(paste("Shapefile T2HWY", year, "already exists in S3, skipping upload."))
-      }
-
-      # Clean up temporary files
-      file.remove(temp_parquet)
+      geoarrow::write_geoparquet(shapefile_data, remote_file_path)
 
     } else {
-      message(paste("No shapefile found for year", year, "."))
+      message(paste("No shapefile found for year", x, "."))
     }
 
-  } else {
-    message(paste("Failed to retrieve the file for year", year, ". Status code: ", httr::status_code(response)))
   }
-}
 
-# Loop through the years from 2012 to the current year and process each shapefile
-for (year in 2012:current_year) {
-  process_shapefiles_for_year(year)
-}
+})
+
+unlink(temp_dir, recursive = TRUE)

From 2bfc81397c3dfbb70007f12b3f2b62de86b829bf Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Fri, 4 Oct 2024 18:09:27 +0000
Subject: [PATCH 06/74] Add cleaning script

---
 ...raffic.R => spatial-environment-traffic.R} |  0
 .../spatial/spatial-environment-traffic.R     | 52 +++++++++++++++++++
 2 files changed, 52 insertions(+)
 rename etl/scripts-ccao-data-raw-us-east-1/spatial/{spatial-traffic.R => spatial-environment-traffic.R} (100%)
 create mode 100644 etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R

diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-traffic.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
similarity index 100%
rename from etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-traffic.R
rename to etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
new file mode 100644
index 000000000..cce13e99c
--- /dev/null
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -0,0 +1,52 @@
+# Load required libraries
+library(aws.s3)
+library(dplyr)
+library(purrr)
+library(sf)
+
+# Define S3 bucket and paths
+AWS_S3_RAW_BUCKET <- "ccao-data-raw-us-east-1"
+AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET")
+raw_bucket_prefix <- "spatial/environment/traffic/"
+warehouse_bucket_path <- file.path(AWS_S3_WAREHOUSE_BUCKET, "spatial", "environment", "traffic")
+
+# List files from the raw bucket
+raw_files <- get_bucket_df(bucket = AWS_S3_RAW_BUCKET, prefix = raw_bucket_prefix)
+
+
+process_files_from_raw_bucket <- map(raw_files$Key, \(file_key) {
+
+  # Skip if the file is not a .parquet file
+  if (!grepl("\\.parquet$", file_key)) {
+    message(paste("Skipping non-parquet file:", file_key))
+    return(NULL)
+  }
+
+  # Download the file locally for inspection
+  local_parquet_file <- tempfile(fileext = ".parquet")
+
+  # Corrected: Pass only the bucket name and file key
+  save_object(file = local_parquet_file, object = file_key, bucket = AWS_S3_RAW_BUCKET)
+
+  # Read the parquet file using geoarrow
+  shapefile_data <- geoarrow::read_geoparquet(local_parquet_file)
+
+  # Define the columns you want to select. These change over time, so a strict select isn't great.
+  # But all columns are present from 2014 on.
+  required_columns <- c("LNS", "SURF_TYP", "SURF_WTH", "SRF_YR", "AADT", "CRS_WITH", "CRS_OPP", "CRS_YR",
+                        "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP", "SP_LIM", "INVENTORY")
+
+  # Select only the columns that exist in the dataset
+  existing_columns <- intersect(required_columns, colnames(shapefile_data))
+  selected_columns <- shapefile_data %>%
+    select(all_of(existing_columns))
+
+  # Show the first few rows of the selected columns for inspection
+  print(paste("File:", file_key))
+  print(head(selected_columns))
+
+  # Clean up the temporary local file
+  unlink(local_parquet_file)
+
+})
+

From da0791d2b83eaa7c5954c4452d8686dcbe195ff5 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Fri, 4 Oct 2024 18:15:14 +0000
Subject: [PATCH 07/74] Quick edit

---
 .../spatial/spatial-environment-traffic.R                | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index cce13e99c..0f8f9fe9e 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -7,21 +7,14 @@ library(sf)
 # Define S3 bucket and paths
 AWS_S3_RAW_BUCKET <- "ccao-data-raw-us-east-1"
 AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET")
-raw_bucket_prefix <- "spatial/environment/traffic/"
 warehouse_bucket_path <- file.path(AWS_S3_WAREHOUSE_BUCKET, "spatial", "environment", "traffic")
 
 # List files from the raw bucket
-raw_files <- get_bucket_df(bucket = AWS_S3_RAW_BUCKET, prefix = raw_bucket_prefix)
+raw_files <- get_bucket_df(bucket = AWS_S3_RAW_BUCKET, prefix = warehouse_bucket_path)
 
 
 process_files_from_raw_bucket <- map(raw_files$Key, \(file_key) {
 
-  # Skip if the file is not a .parquet file
-  if (!grepl("\\.parquet$", file_key)) {
-    message(paste("Skipping non-parquet file:", file_key))
-    return(NULL)
-  }
-
   # Download the file locally for inspection
   local_parquet_file <- tempfile(fileext = ".parquet")
 

From b15eafa24b8876f5c7484575cb314fa514c63ee8 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Fri, 4 Oct 2024 18:21:25 +0000
Subject: [PATCH 08/74] Push correct version

---
 .../spatial/spatial-environment-traffic.R     | 21 +++++++++++--------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 0f8f9fe9e..c46ad48bd 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -3,16 +3,20 @@ library(aws.s3)
 library(dplyr)
 library(purrr)
 library(sf)
+library(geoarrow)
 
-# Define S3 bucket and paths
-AWS_S3_RAW_BUCKET <- "ccao-data-raw-us-east-1"
+# Define S3 bucket and paths for raw and warehouse
+AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET")
 AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET")
+
+# Paths to raw and warehouse buckets
+raw_bucket_path <- file.path(AWS_S3_RAW_BUCKET, "spatial", "environment", "traffic")
 warehouse_bucket_path <- file.path(AWS_S3_WAREHOUSE_BUCKET, "spatial", "environment", "traffic")
 
 # List files from the raw bucket
-raw_files <- get_bucket_df(bucket = AWS_S3_RAW_BUCKET, prefix = warehouse_bucket_path)
-
+raw_files <- get_bucket_df(bucket = AWS_S3_RAW_BUCKET, prefix = "spatial/environment/traffic/")
 
+# Process each file from the raw bucket
 process_files_from_raw_bucket <- map(raw_files$Key, \(file_key) {
 
   # Download the file locally for inspection
@@ -34,12 +38,11 @@ process_files_from_raw_bucket <- map(raw_files$Key, \(file_key) {
   selected_columns <- shapefile_data %>%
     select(all_of(existing_columns))
 
-  # Show the first few rows of the selected columns for inspection
-  print(paste("File:", file_key))
-  print(head(selected_columns))
-
   # Clean up the temporary local file
   unlink(local_parquet_file)
 
-})
+  # Optionally, write processed data back to warehouse bucket
+  output_file <- file.path(warehouse_bucket_path, file_key)
+  geoarrow::write_geoparquet(selected_columns, output_file)
 
+})

From bc1070c4054428be3be9c5e6cf595634664941e9 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Mon, 7 Oct 2024 17:09:20 +0000
Subject: [PATCH 09/74] Remove old file

---
 traffic.R | 106 ------------------------------------------------------
 1 file changed, 106 deletions(-)
 delete mode 100644 traffic.R

diff --git a/traffic.R b/traffic.R
deleted file mode 100644
index cfa3ad30b..000000000
--- a/traffic.R
+++ /dev/null
@@ -1,106 +0,0 @@
-library(sf)
-library(DBI)
-library(noctua)
-library(dplyr)
-library(leaflet)
-library(ggplot2)
-
-shapefile <- read_sf("etl/scripts-ccao-data-raw-us-east-1/spatial/traffic_data.shp")
-
-
-filtered_data <- shapefile %>%
-  filter(COUNTY_NAM == "COOK") %>%
-  st_as_sf() %>%
-  st_transform(crs = 4326) %>%
-  mutate(AADT_STRIN = as.numeric(AADT_STRIN))
-
-ggplot() +
-  geom_sf(data = filtered_data, aes(color = AADT_STRIN), size = 1) + # Adjust size as needed
-  scale_color_viridis_c(option = "plasma", name = "AADT_STRIN") +  # Use viridis color scale for better visualization
-  labs(
-    title = "Map of AADT_STRIN in Cook County",
-    subtitle = "Visualizing Traffic Data on Cook County Roads",
-    x = "Longitude",
-    y = "Latitude"
-  ) +
-  theme_minimal() +
-  theme(
-    plot.title = element_text(hjust = 0.5),
-    plot.subtitle = element_text(hjust = 0.5)
-  )
-
-filtered_data <- st_zm(filtered_data)
-
-
-pal <- colorNumeric(palette = "viridis", domain = filtered_data$AADT_STRIN)
-
-leaflet(filtered_data) %>%
-  addProviderTiles("CartoDB.Positron") %>%  # Add a base map layer
-  addPolylines(
-    color = ~pal(AADT_STRIN),  # Use the color palette based on AADT_STRIN
-    weight = 2,                # Adjust line thickness
-    opacity = 0.7,             # Adjust line transparency
-    popup = ~paste("AADT_STRIN:", AADT_STRIN)  # Add popups to show AADT_STRIN values
-  ) %>%
-  addLegend(
-    pal = pal,
-    values = ~AADT_STRIN,
-    opacity = 0.7,
-    title = "AADT_STRIN",
-    position = "bottomright"
-  ) %>%
-  setView(lng = mean(st_coordinates(filtered_data)[, 1]),
-          lat = mean(st_coordinates(filtered_data)[, 2]),
-          zoom = 10)  # Adjust zoom level and map center
-
-con <- dbConnect(noctua::athena())
-
-# Assuming the original CRS is EPSG:3857 (Web Mercator), adjust this if necessary
-secondary_roads <- dbGetQuery(con, 'SELECT * FROM "spatial"."secondary_road" WHERE CAST(year AS INTEGER) = 2023') %>%
-  st_as_sf() %>%
-  st_set_crs(3435)
-
-
-# Leaflet map combining both filtered_data and secondary_roads
-  # Leaflet map combining both filtered_data and secondary_roads
-  leaflet() %>%
-  addProviderTiles("CartoDB.Positron") %>%  # Base map layer
-  addPolylines(
-    data = filtered_data,
-    color = ~pal(AADT_STRIN),  # Color based on AADT_STRIN
-    weight = 2,                # Adjust line thickness
-    opacity = 0.7,             # Adjust transparency
-    popup = ~paste("AADT_STRIN:", AADT_STRIN)  # Popup for AADT_STRIN
-  ) %>%
-  addPolylines(
-    data = secondary_roads,
-    color = "blue",  # Color for secondary roads
-    weight = 1,      # Adjust line thickness for secondary roads
-    opacity = 0.6,   # Adjust transparency for secondary roads
-    popup = ~paste("Road Name:", name)  # Add popups for secondary roads
-  ) %>%
-  addLegend(
-    pal = pal,
-    values = filtered_data$AADT_STRIN,
-    opacity = 0.7,
-    title = "AADT_STRIN",
-    position = "bottomright"
-  ) %>%
-  setView(
-    lng = mean(st_coordinates(filtered_data)[, 1]),
-    lat = mean(st_coordinates(filtered_data)[, 2]),
-    zoom = 10  # Adjust zoom level
-  )
-
-
-  # Step 2: Buffer the geometries by 50 feet (around filtered_data)
-  filtered_data_buffer <- st_buffer(filtered_data, dist = 50)
-
-  # Step 3: Spatial join to find intersections within 50 feet
-  joined_data <- st_join(secondary_roads, filtered_data_buffer, join = st_intersects)
-
-  # Step 4: Optionally, filter for rows where intersections occurred
-  joined_data_within_50ft <- joined_data %>%
-    filter(!is.na(AADT_STRIN))  # AADT_STRIN is from filtered_data, so this filters where the join occurred
-
-

From 917c48d118244058a425a3848a67a494d54e2826 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Mon, 7 Oct 2024 19:42:40 +0000
Subject: [PATCH 10/74] Working script

---
 .../spatial/spatial-environment-traffic.R     | 58 +++++--------------
 1 file changed, 15 insertions(+), 43 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index c46ad48bd..dcc3f18f4 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -1,48 +1,20 @@
-# Load required libraries
 library(aws.s3)
 library(dplyr)
-library(purrr)
 library(sf)
 library(geoarrow)
 
-# Define S3 bucket and paths for raw and warehouse
-AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET")
-AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET")
-
-# Paths to raw and warehouse buckets
-raw_bucket_path <- file.path(AWS_S3_RAW_BUCKET, "spatial", "environment", "traffic")
-warehouse_bucket_path <- file.path(AWS_S3_WAREHOUSE_BUCKET, "spatial", "environment", "traffic")
-
-# List files from the raw bucket
-raw_files <- get_bucket_df(bucket = AWS_S3_RAW_BUCKET, prefix = "spatial/environment/traffic/")
-
-# Process each file from the raw bucket
-process_files_from_raw_bucket <- map(raw_files$Key, \(file_key) {
-
-  # Download the file locally for inspection
-  local_parquet_file <- tempfile(fileext = ".parquet")
-
-  # Corrected: Pass only the bucket name and file key
-  save_object(file = local_parquet_file, object = file_key, bucket = AWS_S3_RAW_BUCKET)
-
-  # Read the parquet file using geoarrow
-  shapefile_data <- geoarrow::read_geoparquet(local_parquet_file)
-
-  # Define the columns you want to select. These change over time, so a strict select isn't great.
-  # But all columns are present from 2014 on.
-  required_columns <- c("LNS", "SURF_TYP", "SURF_WTH", "SRF_YR", "AADT", "CRS_WITH", "CRS_OPP", "CRS_YR",
-                        "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP", "SP_LIM", "INVENTORY")
-
-  # Select only the columns that exist in the dataset
-  existing_columns <- intersect(required_columns, colnames(shapefile_data))
-  selected_columns <- shapefile_data %>%
-    select(all_of(existing_columns))
-
-  # Clean up the temporary local file
-  unlink(local_parquet_file)
-
-  # Optionally, write processed data back to warehouse bucket
-  output_file <- file.path(warehouse_bucket_path, file_key)
-  geoarrow::write_geoparquet(selected_columns, output_file)
-
-})
+# Define the S3 bucket and file path
+AWS_S3_RAW_BUCKET <- "ccao-data-raw-us-east-1"
+file_key <- "spatial/environment/traffic/2023.parquet"
+
+# Pipeline: download, read, and process the data with lowercase column names
+shapefile_data <- tempfile(fileext = ".parquet") %>%
+  {save_object(object = file_key, bucket = AWS_S3_RAW_BUCKET, file = .); .} %>%
+  geoarrow::read_geoparquet() %>%
+  mutate(geometry = st_as_sfc(geometry)) %>%
+  st_as_sf() %>%
+  st_transform(4326) %>%
+  mutate(geometry_3435 = st_transform(geometry, 3435)) %>%
+  select(all_of(intersect(c("lns", "surf_typ", "surf_wth", "srf_yr", "aadt", "crs_with", "crs_opp", "crs_yr",
+                            "road_name", "dtress_wth", "dtress_opp", "sp_lim", "inventory", "geometry_3435"),
+                          tolower(colnames(.)))))

From 936b972803b0aa09498f14578d846dd6e2a14ed7 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Mon, 7 Oct 2024 20:36:36 +0000
Subject: [PATCH 11/74] Working loop

---
 .../spatial/spatial-environment-traffic.R     | 60 ++++++++++++++-----
 1 file changed, 46 insertions(+), 14 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index dcc3f18f4..8c8ce0a65 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -3,18 +3,50 @@ library(dplyr)
 library(sf)
 library(geoarrow)
 
-# Define the S3 bucket and file path
+# Define the S3 bucket and folder path
 AWS_S3_RAW_BUCKET <- "ccao-data-raw-us-east-1"
-file_key <- "spatial/environment/traffic/2023.parquet"
-
-# Pipeline: download, read, and process the data with lowercase column names
-shapefile_data <- tempfile(fileext = ".parquet") %>%
-  {save_object(object = file_key, bucket = AWS_S3_RAW_BUCKET, file = .); .} %>%
-  geoarrow::read_geoparquet() %>%
-  mutate(geometry = st_as_sfc(geometry)) %>%
-  st_as_sf() %>%
-  st_transform(4326) %>%
-  mutate(geometry_3435 = st_transform(geometry, 3435)) %>%
-  select(all_of(intersect(c("lns", "surf_typ", "surf_wth", "srf_yr", "aadt", "crs_with", "crs_opp", "crs_yr",
-                            "road_name", "dtress_wth", "dtress_opp", "sp_lim", "inventory", "geometry_3435"),
-                          tolower(colnames(.)))))
+AWS_S3_WAREHOUSE_BUCKET <- "ccao-data-warehouse-us-east-1"
+s3_folder <- "spatial/environment/traffic/"
+output_bucket <- file.path(AWS_S3_WAREHOUSE_BUCKET, "spatial", "environment", "traffic")
+
+# List all the files in the S3 folder
+files_in_s3 <- get_bucket_df(bucket = AWS_S3_RAW_BUCKET, prefix = s3_folder)
+
+# Filter for files that match a 'parquet' pattern
+parquet_files <- files_in_s3 %>%
+  filter(grepl("\\.parquet$", Key)) %>%
+  pull(Key)
+
+# Loop through each parquet file and process it
+for (file_key in parquet_files) {
+  message("Processing file: ", file_key)
+
+  # Download the file from S3 as a raw connection into a temporary file
+  temp_file <- tempfile(fileext = ".parquet")
+  save_object(object = file_key, bucket = AWS_S3_RAW_BUCKET, file = temp_file)
+
+  # Read the downloaded file using geoarrow into the R environment
+  shapefile_data <- geoarrow::read_geoparquet(temp_file)
+
+  # Ensure geometry column is in 'sf' format
+  shapefile_data$geometry <- st_as_sfc(shapefile_data$geometry)
+
+  shapefile_data <- shapefile_data %>%
+    st_as_sf() %>%
+    st_transform(4326) %>%
+    mutate(geometry_3435 = st_transform(geometry, 3435))
+
+  # Define the columns you want to select
+  required_columns <- c("LNS", "SURF_TYP", "SURF_WTH", "SRF_YR", "AADT", "CRS_WITH", "CRS_OPP", "CRS_YR",
+                        "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP", "SP_LIM", "INVENTORY", "geometry_3435")
+
+  # Select only the non-geometry columns that exist in the dataset
+  existing_columns <- intersect(required_columns, colnames(shapefile_data))
+  selected_columns <- shapefile_data %>%
+    select(all_of(existing_columns))
+
+  # Clean up the temporary file
+  unlink(temp_file)
+}
+
+message("Processing completed for all files.")

From 1bbcce48f9b961e832ebbd90858a8438283d90db Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Mon, 7 Oct 2024 20:53:41 +0000
Subject: [PATCH 12/74] Billy edits

---
 .../spatial/spatial-environment-traffic.R     | 31 ++++++++++++-------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 8c8ce0a65..80b55a264 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -12,21 +12,18 @@ output_bucket <- file.path(AWS_S3_WAREHOUSE_BUCKET, "spatial", "environment", "t
 # List all the files in the S3 folder
 files_in_s3 <- get_bucket_df(bucket = AWS_S3_RAW_BUCKET, prefix = s3_folder)
 
-# Filter for files that match a 'parquet' pattern
+# Get the 'Key' (file path) for all files
 parquet_files <- files_in_s3 %>%
-  filter(grepl("\\.parquet$", Key)) %>%
   pull(Key)
 
 # Loop through each parquet file and process it
 for (file_key in parquet_files) {
-  message("Processing file: ", file_key)
 
-  # Download the file from S3 as a raw connection into a temporary file
-  temp_file <- tempfile(fileext = ".parquet")
-  save_object(object = file_key, bucket = AWS_S3_RAW_BUCKET, file = temp_file)
+  # Read the parquet file directly from S3 using aws.s3 functions
+  obj <- get_object(object = file_key, bucket = AWS_S3_RAW_BUCKET)
 
-  # Read the downloaded file using geoarrow into the R environment
-  shapefile_data <- geoarrow::read_geoparquet(temp_file)
+  # Convert the S3 object into raw data and read using geoarrow
+  shapefile_data <- geoarrow::read_geoparquet(rawConnection(obj))
 
   # Ensure geometry column is in 'sf' format
   shapefile_data$geometry <- st_as_sfc(shapefile_data$geometry)
@@ -45,8 +42,18 @@ for (file_key in parquet_files) {
   selected_columns <- shapefile_data %>%
     select(all_of(existing_columns))
 
-  # Clean up the temporary file
-  unlink(temp_file)
-}
+  # Create a temporary file for saving the processed data
+  output_file <- tempfile(fileext = ".parquet")
+
+  # Write the selected columns to a new parquet file
+  geoarrow::write_geoparquet(selected_columns, output_file)
+
+  # Define the output file path in the S3 bucket
+  output_key <- file.path(output_bucket, basename(file_key))
 
-message("Processing completed for all files.")
+  # Upload the processed file to the S3 output bucket
+  put_object(file = output_file, object = output_key, bucket = AWS_S3_WAREHOUSE_BUCKET)
+
+  # Clean up the temporary files
+  unlink(output_file)
+}

From 1e4475199712bee29b1ebef0d08053fb662bfff0 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Mon, 7 Oct 2024 20:54:46 +0000
Subject: [PATCH 13/74] text edits

---
 .../spatial/spatial-environment-traffic.R                 | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 80b55a264..728c8e746 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -9,10 +9,9 @@ AWS_S3_WAREHOUSE_BUCKET <- "ccao-data-warehouse-us-east-1"
 s3_folder <- "spatial/environment/traffic/"
 output_bucket <- file.path(AWS_S3_WAREHOUSE_BUCKET, "spatial", "environment", "traffic")
 
-# List all the files in the S3 folder
 files_in_s3 <- get_bucket_df(bucket = AWS_S3_RAW_BUCKET, prefix = s3_folder)
 
-# Get the 'Key' (file path) for all files
+# Get the 'Key'
 parquet_files <- files_in_s3 %>%
   pull(Key)
 
@@ -25,7 +24,7 @@ for (file_key in parquet_files) {
   # Convert the S3 object into raw data and read using geoarrow
   shapefile_data <- geoarrow::read_geoparquet(rawConnection(obj))
 
-  # Ensure geometry column is in 'sf' format
+  # Convert geometry column to 'sf' format
   shapefile_data$geometry <- st_as_sfc(shapefile_data$geometry)
 
   shapefile_data <- shapefile_data %>%
@@ -33,7 +32,8 @@ for (file_key in parquet_files) {
     st_transform(4326) %>%
     mutate(geometry_3435 = st_transform(geometry, 3435))
 
-  # Define the columns you want to select
+  # Define the columns you want to select. We do this because some columns are not present in older
+  # versions of the data
   required_columns <- c("LNS", "SURF_TYP", "SURF_WTH", "SRF_YR", "AADT", "CRS_WITH", "CRS_OPP", "CRS_YR",
                         "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP", "SP_LIM", "INVENTORY", "geometry_3435")
 

From 21aa4382520ff844c8f24efe3bb7b3c6272134e0 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Mon, 7 Oct 2024 20:56:49 +0000
Subject: [PATCH 14/74] Use correct buckets

---
 .../spatial/spatial-environment-traffic.R                     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 728c8e746..7e5fc9fff 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -4,8 +4,8 @@ library(sf)
 library(geoarrow)
 
 # Define the S3 bucket and folder path
-AWS_S3_RAW_BUCKET <- "ccao-data-raw-us-east-1"
-AWS_S3_WAREHOUSE_BUCKET <- "ccao-data-warehouse-us-east-1"
+AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET")
+AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET")
 s3_folder <- "spatial/environment/traffic/"
 output_bucket <- file.path(AWS_S3_WAREHOUSE_BUCKET, "spatial", "environment", "traffic")
 

From 83a0e4e4038b78a7f17efd5bde1e44f682ab995d Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Mon, 7 Oct 2024 20:57:59 +0000
Subject: [PATCH 15/74] Text edits

---
 .../spatial/spatial-environment-traffic.R                      | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 7e5fc9fff..86cae8342 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -32,8 +32,7 @@ for (file_key in parquet_files) {
     st_transform(4326) %>%
     mutate(geometry_3435 = st_transform(geometry, 3435))
 
-  # Define the columns you want to select. We do this because some columns are not present in older
-  # versions of the data
+  # We do this because some columns are not present in older versions of the data
   required_columns <- c("LNS", "SURF_TYP", "SURF_WTH", "SRF_YR", "AADT", "CRS_WITH", "CRS_OPP", "CRS_YR",
                         "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP", "SP_LIM", "INVENTORY", "geometry_3435")
 

From 32065710bc023332235a595d902340f9698dce53 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Tue, 8 Oct 2024 15:01:03 +0000
Subject: [PATCH 16/74] lintr

---
 .../spatial/spatial-environment-traffic.R     | 16 +++++++++-----
 .../spatial/spatial-environment-traffic.R     | 21 ++++++++++++-------
 2 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
index a0bbf3a8a..204e44d33 100644
--- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
@@ -8,20 +8,23 @@ library(arrow)
 
 # Define S3 bucket and paths
 AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET")
-output_bucket <- file.path(AWS_S3_RAW_BUCKET, "spatial", "environment", "traffic")
+output_bucket <- file.path(AWS_S3_RAW_BUCKET,
+                           "spatial", "environment", "traffic")
 current_year <- strftime(Sys.Date(), "%Y")
 
 # Get list of available files
 years <- map(2012:year(Sys.Date()), \(x){
   if (HEAD(paste0(
-    "https://apps1.dot.illinois.gov/gist2/gisdata/all", x, ".zip"
+    "https://apps1.dot.illinois.gov/gist2/gisdata/all",
+    x, ".zip"
   ))$status_code == 200) {
     x
   }
 }) %>%
   unlist()
 
-# Function to process each year and upload shapefiles for that specific year to S3
+# Function to process each year and upload shapefiles for
+# that specific year to S3
 process_shapefiles_for_year <- map(years, \(x) {
 
   remote_file_path <- file.path(output_bucket, paste0(x, ".parquet"))
@@ -29,7 +32,8 @@ process_shapefiles_for_year <- map(years, \(x) {
   # Skip everything if file already exists
   if (!object_exists(remote_file_path)) {
     # Define the URL for the shapefile ZIP file, dynamically for each year
-    url <- paste0("https://apps1.dot.illinois.gov/gist2/gisdata/all", x, ".zip")
+    url <- paste0(
+      "https://apps1.dot.illinois.gov/gist2/gisdata/all", x, ".zip")
 
     # Create a temporary file to store the downloaded ZIP
     temp_zip <- tempfile(fileext = ".zip")
@@ -46,7 +50,9 @@ process_shapefiles_for_year <- map(years, \(x) {
 
     # List files in the unzipped directory and look for the .shp files
     unzipped_files <- list.files(temp_dir, recursive = TRUE, full.names = TRUE)
-    shp_file_for_year <- unzipped_files[grepl(paste0("HWY", x), unzipped_files, ignore.case = TRUE) & grepl("\\.shp$", unzipped_files)]
+    shp_file_for_year <- unzipped_files[grepl(paste0("HWY", x),
+                                              unzipped_files, ignore.case = TRUE)
+                                        & grepl("\\.shp$", unzipped_files)]
 
     # Process only the shapefile that matches the current year
     if (length(shp_file_for_year) == 1) {
diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 86cae8342..ccf0ef236 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -7,9 +7,11 @@ library(geoarrow)
 AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET")
 AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET")
 s3_folder <- "spatial/environment/traffic/"
-output_bucket <- file.path(AWS_S3_WAREHOUSE_BUCKET, "spatial", "environment", "traffic")
+output_bucket <- file.path(AWS_S3_WAREHOUSE_BUCKET,
+                           "spatial", "environment", "traffic")
 
-files_in_s3 <- get_bucket_df(bucket = AWS_S3_RAW_BUCKET, prefix = s3_folder)
+files_in_s3 <- get_bucket_df(
+  bucket = AWS_S3_RAW_BUCKET, prefix = s3_folder)
 
 # Get the 'Key'
 parquet_files <- files_in_s3 %>%
@@ -32,12 +34,16 @@ for (file_key in parquet_files) {
     st_transform(4326) %>%
     mutate(geometry_3435 = st_transform(geometry, 3435))
 
-  # We do this because some columns are not present in older versions of the data
-  required_columns <- c("LNS", "SURF_TYP", "SURF_WTH", "SRF_YR", "AADT", "CRS_WITH", "CRS_OPP", "CRS_YR",
-                        "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP", "SP_LIM", "INVENTORY", "geometry_3435")
+  # We do this because some columns are not present in
+  # older versions of the data
+  required_columns <- c("LNS", "SURF_TYP", "SURF_WTH", "SRF_YR", "AADT",
+                        "CRS_WITH", "CRS_OPP", "CRS_YR",
+                        "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP",
+                        "SP_LIM", "INVENTORY", "geometry_3435")
 
   # Select only the non-geometry columns that exist in the dataset
-  existing_columns <- intersect(required_columns, colnames(shapefile_data))
+  existing_columns <- intersect(required_columns,
+                                colnames(shapefile_data))
   selected_columns <- shapefile_data %>%
     select(all_of(existing_columns))
 
@@ -51,7 +57,8 @@ for (file_key in parquet_files) {
   output_key <- file.path(output_bucket, basename(file_key))
 
   # Upload the processed file to the S3 output bucket
-  put_object(file = output_file, object = output_key, bucket = AWS_S3_WAREHOUSE_BUCKET)
+  put_object(file = output_file, object = output_key,
+             bucket = AWS_S3_WAREHOUSE_BUCKET)
 
   # Clean up the temporary files
   unlink(output_file)

From 59c180de3e0d82f01917245411f2d0220ed0394c Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Tue, 8 Oct 2024 15:04:56 +0000
Subject: [PATCH 17/74] lintr

---
 .../spatial/spatial-environment-traffic.R           | 13 ++++++++-----
 .../spatial/spatial-environment-traffic.R           |  2 +-
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
index 204e44d33..bd5a9328e 100644
--- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
@@ -13,7 +13,7 @@ output_bucket <- file.path(AWS_S3_RAW_BUCKET,
 current_year <- strftime(Sys.Date(), "%Y")
 
 # Get list of available files
-years <- map(2012:year(Sys.Date()), \(x){
+years <- map(2012:year(Sys.Date()), \(x) {
   if (HEAD(paste0(
     "https://apps1.dot.illinois.gov/gist2/gisdata/all",
     x, ".zip"
@@ -33,7 +33,7 @@ process_shapefiles_for_year <- map(years, \(x) {
   if (!object_exists(remote_file_path)) {
     # Define the URL for the shapefile ZIP file, dynamically for each year
     url <- paste0(
-      "https://apps1.dot.illinois.gov/gist2/gisdata/all", x, ".zip")
+                  "https://apps1.dot.illinois.gov/gist2/gisdata/all", x, ".zip")
 
     # Create a temporary file to store the downloaded ZIP
     temp_zip <- tempfile(fileext = ".zip")
@@ -46,12 +46,15 @@ process_shapefiles_for_year <- map(years, \(x) {
 
     # Unzip the file into a temporary directory
     unzip(temp_zip, exdir = temp_dir)
-    message(paste("Shapefile for year", x, "unzipped into temporary directory."))
+    message(paste("Shapefile for year", x,
+                  "unzipped into temporary directory."))
 
     # List files in the unzipped directory and look for the .shp files
     unzipped_files <- list.files(temp_dir, recursive = TRUE, full.names = TRUE)
-    shp_file_for_year <- unzipped_files[grepl(paste0("HWY", x),
-                                              unzipped_files, ignore.case = TRUE)
+    shp_file_for_year <- unzipped_files[grepl(paste0("HWY",
+                                                     x),
+                                              unzipped_files,
+                                              ignore.case = TRUE)
                                         & grepl("\\.shp$", unzipped_files)]
 
     # Process only the shapefile that matches the current year
diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index ccf0ef236..8081f6ccb 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -11,7 +11,7 @@ output_bucket <- file.path(AWS_S3_WAREHOUSE_BUCKET,
                            "spatial", "environment", "traffic")
 
 files_in_s3 <- get_bucket_df(
-  bucket = AWS_S3_RAW_BUCKET, prefix = s3_folder)
+                             bucket = AWS_S3_RAW_BUCKET, prefix = s3_folder)
 
 # Get the 'Key'
 parquet_files <- files_in_s3 %>%

From de905ef91d4ea5d12d47e8474aa668e8384e82eb Mon Sep 17 00:00:00 2001
From: Sweaty Handshake <william.ridgeway@cookcountyil.gov>
Date: Tue, 8 Oct 2024 15:48:08 +0000
Subject: [PATCH 18/74] Change geoparquet function

---
 etl/renv.lock                                 |  4 +-
 .../spatial/spatial-environment-traffic.R     | 76 ++++++++-----------
 2 files changed, 34 insertions(+), 46 deletions(-)

diff --git a/etl/renv.lock b/etl/renv.lock
index 80382383c..2bbbc4f11 100644
--- a/etl/renv.lock
+++ b/etl/renv.lock
@@ -1943,7 +1943,7 @@
       "Source": "GitHub",
       "RemoteType": "github",
       "RemoteHost": "api.github.com",
-      "RemoteUsername": "DyfanJones",
+      "RemoteUsername": "dyfanjones",
       "RemoteRepo": "noctua",
       "RemoteRef": "master",
       "RemoteSha": "23a4cfbf537407c7a1547fc13ba771ba2eb098e0",
@@ -1957,7 +1957,7 @@
         "utils",
         "uuid"
       ],
-      "Hash": "a48e1decdd027c44ea6b97b0fe0950cb"
+      "Hash": "b3fc482d0ae2f51ed324fd3da66471b4"
     },
     "numDeriv": {
       "Package": "numDeriv",
diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 8081f6ccb..f1b09c4db 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -1,65 +1,53 @@
 library(aws.s3)
 library(dplyr)
+library(purrr)
 library(sf)
 library(geoarrow)
 
 # Define the S3 bucket and folder path
 AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET")
 AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET")
-s3_folder <- "spatial/environment/traffic/"
-output_bucket <- file.path(AWS_S3_WAREHOUSE_BUCKET,
-                           "spatial", "environment", "traffic")
-
-files_in_s3 <- get_bucket_df(
-                             bucket = AWS_S3_RAW_BUCKET, prefix = s3_folder)
+output_bucket <- file.path(
+  AWS_S3_WAREHOUSE_BUCKET, "spatial", "environment", "traffic"
+  )
 
 # Get the 'Key'
-parquet_files <- files_in_s3 %>%
+parquet_files <- get_bucket_df(
+  bucket = AWS_S3_RAW_BUCKET, prefix = s3_folder
+  ) %>%
   pull(Key)
 
 # Loop through each parquet file and process it
-for (file_key in parquet_files) {
-
-  # Read the parquet file directly from S3 using aws.s3 functions
-  obj <- get_object(object = file_key, bucket = AWS_S3_RAW_BUCKET)
-
-  # Convert the S3 object into raw data and read using geoarrow
-  shapefile_data <- geoarrow::read_geoparquet(rawConnection(obj))
-
-  # Convert geometry column to 'sf' format
-  shapefile_data$geometry <- st_as_sfc(shapefile_data$geometry)
+walk(parquet_files, \(file_key) {
 
-  shapefile_data <- shapefile_data %>%
-    st_as_sf() %>%
-    st_transform(4326) %>%
-    mutate(geometry_3435 = st_transform(geometry, 3435))
+  if (!aws.s3::object_exists(file.path(AWS_S3_WAREHOUSE_BUCKET, file_key))) {
 
-  # We do this because some columns are not present in
-  # older versions of the data
-  required_columns <- c("LNS", "SURF_TYP", "SURF_WTH", "SRF_YR", "AADT",
-                        "CRS_WITH", "CRS_OPP", "CRS_YR",
-                        "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP",
-                        "SP_LIM", "INVENTORY", "geometry_3435")
+    print(paste("Cleaning", file_key))
 
-  # Select only the non-geometry columns that exist in the dataset
-  existing_columns <- intersect(required_columns,
-                                colnames(shapefile_data))
-  selected_columns <- shapefile_data %>%
-    select(all_of(existing_columns))
+    # Convert the S3 object into raw data and read using geoarrow
+    shapefile_data <- geoarrow::read_geoparquet_sf(
+      file.path(AWS_S3_RAW_BUCKET, file_key)
+    ) %>%
+      st_transform(4326) %>%
+      mutate(geometry_3435 = st_transform(geometry, 3435))
 
-  # Create a temporary file for saving the processed data
-  output_file <- tempfile(fileext = ".parquet")
+    # We do this because some columns are not present in
+    # older versions of the data
+    required_columns <- c("LNS", "SURF_TYP", "SURF_WTH", "SRF_YR", "AADT",
+                          "CRS_WITH", "CRS_OPP", "CRS_YR",
+                          "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP",
+                          "SP_LIM", "INVENTORY", "geometry_3435")
 
-  # Write the selected columns to a new parquet file
-  geoarrow::write_geoparquet(selected_columns, output_file)
+    # Select only the non-geometry columns that exist in the dataset
+    existing_columns <- intersect(required_columns, colnames(shapefile_data))
+    shapefile_data %>%
+      select(all_of(existing_columns)) %>%
+      geoarrow::write_geoparquet(
+        file.path(AWS_S3_WAREHOUSE_BUCKET, file_key)
+      )
 
-  # Define the output file path in the S3 bucket
-  output_key <- file.path(output_bucket, basename(file_key))
+    print(paste(file_key, "cleaned and uploaded."))
 
-  # Upload the processed file to the S3 output bucket
-  put_object(file = output_file, object = output_key,
-             bucket = AWS_S3_WAREHOUSE_BUCKET)
+  }
 
-  # Clean up the temporary files
-  unlink(output_file)
-}
+})

From 418ef5e970464470110e38f93922483961dbf2a7 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Tue, 8 Oct 2024 16:22:24 +0000
Subject: [PATCH 19/74] Add filter for Cook County

---
 .../spatial/spatial-environment-traffic.R                      | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
index bd5a9328e..94c26051e 100644
--- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
@@ -60,7 +60,8 @@ process_shapefiles_for_year <- map(years, \(x) {
     # Process only the shapefile that matches the current year
     if (length(shp_file_for_year) == 1) {
       # Read the shapefile into the environment using sf::st_read
-      shapefile_data <- sf::st_read(shp_file_for_year)
+      shapefile_data <- sf::st_read(shp_file_for_year) %>%
+        filter(INV_CO == '016')
 
       # Save the shapefile as a GeoParquet file
       geoarrow::write_geoparquet(shapefile_data, remote_file_path)

From 45b7929a07a46686b507bba5f2348f98abbaed89 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Tue, 8 Oct 2024 16:23:07 +0000
Subject: [PATCH 20/74] Comment

---
 .../spatial/spatial-environment-traffic.R                        | 1 +
 1 file changed, 1 insertion(+)

diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
index 94c26051e..1e1483467 100644
--- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
@@ -61,6 +61,7 @@ process_shapefiles_for_year <- map(years, \(x) {
     if (length(shp_file_for_year) == 1) {
       # Read the shapefile into the environment using sf::st_read
       shapefile_data <- sf::st_read(shp_file_for_year) %>%
+        # Add filter for Cook County
         filter(INV_CO == '016')
 
       # Save the shapefile as a GeoParquet file

From c6961d08baecce5226492ba38c8b65212a97d5f5 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Tue, 8 Oct 2024 16:39:44 +0000
Subject: [PATCH 21/74] Add if-else statement for County

---
 .../spatial/spatial-environment-traffic.R                    | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
index 1e1483467..13202a921 100644
--- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
@@ -61,8 +61,9 @@ process_shapefiles_for_year <- map(years, \(x) {
     if (length(shp_file_for_year) == 1) {
       # Read the shapefile into the environment using sf::st_read
       shapefile_data <- sf::st_read(shp_file_for_year) %>%
-        # Add filter for Cook County
-        filter(INV_CO == '016')
+        # Add filter for Cook County. The name changes in different years
+        filter(if ("COUNTY" %in% names(df))
+          COUNTY == '016' else INV_CO == '016')
 
       # Save the shapefile as a GeoParquet file
       geoarrow::write_geoparquet(shapefile_data, remote_file_path)

From c0fa4a8d9d6a7d4d1584bb81d3ec1193c2f25463 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Tue, 8 Oct 2024 16:42:06 +0000
Subject: [PATCH 22/74] rename data to shapefile_data

---
 .../spatial/spatial-environment-traffic.R                       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
index 13202a921..a7fdf4d3f 100644
--- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
@@ -62,7 +62,7 @@ process_shapefiles_for_year <- map(years, \(x) {
       # Read the shapefile into the environment using sf::st_read
       shapefile_data <- sf::st_read(shp_file_for_year) %>%
         # Add filter for Cook County. The name changes in different years
-        filter(if ("COUNTY" %in% names(df))
+        filter(if ("COUNTY" %in% names(shapefile_data))
           COUNTY == '016' else INV_CO == '016')
 
       # Save the shapefile as a GeoParquet file

From 266ba86dfd0cb3f8262e4ae5f282d59e628dfded Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Tue, 8 Oct 2024 16:44:23 +0000
Subject: [PATCH 23/74] Include period instead of named file

---
 .../spatial/spatial-environment-traffic.R                       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
index a7fdf4d3f..9b4a98631 100644
--- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
@@ -62,7 +62,7 @@ process_shapefiles_for_year <- map(years, \(x) {
       # Read the shapefile into the environment using sf::st_read
       shapefile_data <- sf::st_read(shp_file_for_year) %>%
         # Add filter for Cook County. The name changes in different years
-        filter(if ("COUNTY" %in% names(shapefile_data))
+        filter(if ("COUNTY" %in% names(.))
           COUNTY == '016' else INV_CO == '016')
 
       # Save the shapefile as a GeoParquet file

From cc5a18f05d2b0d9cffed97ee3a0fd1876262237f Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Tue, 8 Oct 2024 16:45:14 +0000
Subject: [PATCH 24/74] period

---
 .../spatial/spatial-environment-traffic.R                       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
index 9b4a98631..9a308f74b 100644
--- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
@@ -61,7 +61,7 @@ process_shapefiles_for_year <- map(years, \(x) {
     if (length(shp_file_for_year) == 1) {
       # Read the shapefile into the environment using sf::st_read
       shapefile_data <- sf::st_read(shp_file_for_year) %>%
-        # Add filter for Cook County. The name changes in different years
+        # Add filter for Cook County. The name changes in different years.
         filter(if ("COUNTY" %in% names(.))
           COUNTY == '016' else INV_CO == '016')
 

From 2cb1e0c35d7607f0715f52060216a273cf69c44f Mon Sep 17 00:00:00 2001
From: Sweaty Handshake <william.ridgeway@cookcountyil.gov>
Date: Tue, 8 Oct 2024 18:21:32 +0000
Subject: [PATCH 25/74] Fix s3 pathing

---
 .../spatial/spatial-environment-traffic.R                    | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index f1b09c4db..f5e57db6c 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -7,9 +7,8 @@ library(geoarrow)
 # Define the S3 bucket and folder path
 AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET")
 AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET")
-output_bucket <- file.path(
-  AWS_S3_WAREHOUSE_BUCKET, "spatial", "environment", "traffic"
-  )
+s3_folder <- "spatial/environment/traffic/"
+output_bucket <- file.path(AWS_S3_WAREHOUSE_BUCKET, s3_folder)
 
 # Get the 'Key'
 parquet_files <- get_bucket_df(

From 683a5519432d007823ae53234e5176430d88a341 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Tue, 8 Oct 2024 18:35:54 +0000
Subject: [PATCH 26/74] Start dbt schema

---
 ...ximity.dist_pin_to_traffic_speed_limit.sql | 24 +++++++++++++++++++
 dbt/models/spatial/docs.md                    |  8 +++++++
 dbt/models/spatial/schema.yml                 |  3 +++
 3 files changed, 35 insertions(+)
 create mode 100644 dbt/models/proximity/proximity.dist_pin_to_traffic_speed_limit.sql

diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_speed_limit.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_speed_limit.sql
new file mode 100644
index 000000000..f71306592
--- /dev/null
+++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_speed_limit.sql
@@ -0,0 +1,24 @@
+-- CTAS to create a table of distance to the nearest rail tracks for each PIN
+{{
+    config(
+        materialized='table',
+        partitioned_by=['year'],
+        bucketed_by=['pin10'],
+        bucket_count=1
+    )
+}}
+
+SELECT
+    pcl.pin10,
+    ARBITRARY(xy.name_id) AS nearest_road_name,
+    ARBITRARY(xy.dist_ft) AS nearest_speed_limit_dist_ft,
+    ARBITRARY(xy.year) AS nearest_speed_limit_data_year,
+    pcl.year
+FROM {{ source('spatial', 'parcel') }} AS pcl
+INNER JOIN
+    ( {{ dist_to_nearest_geometry(source('spatial', 'traffic')) }} ) AS xy
+    ON pcl.x_3435 = xy.x_3435
+    AND pcl.y_3435 = xy.y_3435
+    AND pcl.year = xy.pin_year
+    AND xy.sp_lim > 0
+GROUP BY pcl.pin10, pcl.year
diff --git a/dbt/models/spatial/docs.md b/dbt/models/spatial/docs.md
index 096221bb8..2a1463741 100644
--- a/dbt/models/spatial/docs.md
+++ b/dbt/models/spatial/docs.md
@@ -493,6 +493,14 @@ Includes townships within the City of Chicago, which are technically defunct.
 **Geometry:** `MULTIPOLYGON`
 {% enddocs %}
 
+# traffic
+
+{% docs table_traffic %}
+Locations of roads derived from the Illinois Department of Revenue website
+
+**Geometry:** `MULTILINESTRING`
+{% enddocs %}
+
 # transit_dict
 
 {% docs table_transit_dict %}
diff --git a/dbt/models/spatial/schema.yml b/dbt/models/spatial/schema.yml
index 0982b8106..513900073 100644
--- a/dbt/models/spatial/schema.yml
+++ b/dbt/models/spatial/schema.yml
@@ -174,6 +174,9 @@ sources:
       - name: township
         description: '{{ doc("table_township") }}'
 
+      - name: traffic
+        description: '{{ doc("table_traffic") }}'
+
       - name: transit_dict
         description: '{{ doc("table_transit_dict") }}'
 

From 76bfe5bc09b942100a85c133427135268ac6055a Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Tue, 8 Oct 2024 19:33:03 +0000
Subject: [PATCH 27/74] Add year

---
 .../spatial/spatial-environment-traffic.R                      | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
index 9a308f74b..67a5fff62 100644
--- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
@@ -63,7 +63,8 @@ process_shapefiles_for_year <- map(years, \(x) {
       shapefile_data <- sf::st_read(shp_file_for_year) %>%
         # Add filter for Cook County. The name changes in different years.
         filter(if ("COUNTY" %in% names(.))
-          COUNTY == '016' else INV_CO == '016')
+          COUNTY == '016' else INV_CO == '016') %>%
+        mutate(year = x)
 
       # Save the shapefile as a GeoParquet file
       geoarrow::write_geoparquet(shapefile_data, remote_file_path)

From c1d57e8d7ec5eab99b306259d9823ee6807982fa Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Tue, 8 Oct 2024 19:49:55 +0000
Subject: [PATCH 28/74] Fix path

---
 .../spatial/spatial-environment-traffic.R                       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index f5e57db6c..67962f2df 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -19,7 +19,7 @@ parquet_files <- get_bucket_df(
 # Loop through each parquet file and process it
 walk(parquet_files, \(file_key) {
 
-  if (!aws.s3::object_exists(file.path(AWS_S3_WAREHOUSE_BUCKET, file_key))) {
+  if (!aws.s3::object_exists(file.path(AWS_S3_WAREHOUSE_BUCKET, s3_folder, file_key))) {
 
     print(paste("Cleaning", file_key))
 

From 89c69e72f0aca8aed2d772b86b7ced2fcf63492c Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Tue, 8 Oct 2024 20:30:33 +0000
Subject: [PATCH 29/74] Add renaming

---
 .../spatial/spatial-environment-traffic.R     | 25 ++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 67962f2df..8b55b5ddc 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -32,15 +32,34 @@ walk(parquet_files, \(file_key) {
 
     # We do this because some columns are not present in
     # older versions of the data
-    required_columns <- c("LNS", "SURF_TYP", "SURF_WTH", "SRF_YR", "AADT",
+    required_columns <- c("FCNAME", "LNS", "SURF_TYP", "SURF_WTH", "SRF_YR", "AADT",
                           "CRS_WITH", "CRS_OPP", "CRS_YR",
                           "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP",
-                          "SP_LIM", "INVENTORY", "geometry_3435")
+                          "SP_LIM", "INVENTORY", "geometry_3435", "year")
 
     # Select only the non-geometry columns that exist in the dataset
     existing_columns <- intersect(required_columns, colnames(shapefile_data))
-    shapefile_data %>%
+    shapefile_data_test <- shapefile_data %>%
       select(all_of(existing_columns)) %>%
+      mutate(
+        road_type = if ("FCNAME" %in% colnames(.)) FCNAME else NA,
+        lanes = if ("LNS" %in% colnames(.)) LNS else NA,
+        surface_type = if ("SURF_TYP" %in% colnames(.)) SURF_TYP else NA,
+        surface_width = if ("SURF_WTH" %in% colnames(.)) SURF_WTH else NA,
+        surface_year = if ("SRF_YR" %in% colnames(.)) SRF_YR else NA,
+        annual_traffic = if ("AADT" %in% colnames(.)) AADT else NA,
+        condition_with = if ("CRS_WITH" %in% colnames(.)) CRS_WITH else NA,
+        condition_opposing = if ("CRS_OPP" %in% colnames(.)) CRS_OPP else NA,
+        condition_year = if ("CRS_YR" %in% colnames(.)) CRS_YR else NA,
+        road_name = if ("ROAD_NAME" %in% colnames(.)) ROAD_NAME else NA,
+        distress_with = if ("DTRESS_WTH" %in% colnames(.)) DTRESS_WTH else NA,
+        distress_opposing = if ("DTRESS_OPP" %in% colnames(.)) DTRESS_OPP else NA,
+        speed_limit = if ("SP_LIM" %in% colnames(.)) SP_LIM else NA,
+        inventory_id = if ("INVENTORY" %in% colnames(.)) INVENTORY else NA
+      ) %>%
+      select(-one_of(c("FCNAME", "LNS", "SURF_TYP", "SURF_WTH", "SRF_YR", "AADT", "CRS_WITH",
+                       "CRS_OPP", "CRS_YR", "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP",
+                       "SP_LIM", "INVENTORY"))) %>%
       geoarrow::write_geoparquet(
         file.path(AWS_S3_WAREHOUSE_BUCKET, file_key)
       )

From cb883e4071f3b7548acefcb09921b667eae1b83a Mon Sep 17 00:00:00 2001
From: Sweaty Handshake <william.ridgeway@cookcountyil.gov>
Date: Wed, 9 Oct 2024 15:21:51 +0000
Subject: [PATCH 30/74] Remove unecessary code

---
 .../spatial/spatial-environment-traffic.R                       | 2 --
 1 file changed, 2 deletions(-)

diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
index 67a5fff62..6b7b82178 100644
--- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
@@ -76,5 +76,3 @@ process_shapefiles_for_year <- map(years, \(x) {
   }
 
 })
-
-unlink(temp_dir, recursive = TRUE)

From 2014cbeda72a975519756c89373e61ed2e3b433f Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Wed, 9 Oct 2024 15:38:22 +0000
Subject: [PATCH 31/74] Delete file

---
 ...ximity.dist_pin_to_traffic_speed_limit.sql | 24 -------------------
 1 file changed, 24 deletions(-)
 delete mode 100644 dbt/models/proximity/proximity.dist_pin_to_traffic_speed_limit.sql

diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_speed_limit.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_speed_limit.sql
deleted file mode 100644
index f71306592..000000000
--- a/dbt/models/proximity/proximity.dist_pin_to_traffic_speed_limit.sql
+++ /dev/null
@@ -1,24 +0,0 @@
--- CTAS to create a table of distance to the nearest rail tracks for each PIN
-{{
-    config(
-        materialized='table',
-        partitioned_by=['year'],
-        bucketed_by=['pin10'],
-        bucket_count=1
-    )
-}}
-
-SELECT
-    pcl.pin10,
-    ARBITRARY(xy.name_id) AS nearest_road_name,
-    ARBITRARY(xy.dist_ft) AS nearest_speed_limit_dist_ft,
-    ARBITRARY(xy.year) AS nearest_speed_limit_data_year,
-    pcl.year
-FROM {{ source('spatial', 'parcel') }} AS pcl
-INNER JOIN
-    ( {{ dist_to_nearest_geometry(source('spatial', 'traffic')) }} ) AS xy
-    ON pcl.x_3435 = xy.x_3435
-    AND pcl.y_3435 = xy.y_3435
-    AND pcl.year = xy.pin_year
-    AND xy.sp_lim > 0
-GROUP BY pcl.pin10, pcl.year

From 102b721b1a86e83de76cf493273769aae920da88 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Wed, 9 Oct 2024 15:39:30 +0000
Subject: [PATCH 32/74] Remove docs

---
 dbt/models/spatial/docs.md    | 8 --------
 dbt/models/spatial/schema.yml | 3 ---
 2 files changed, 11 deletions(-)

diff --git a/dbt/models/spatial/docs.md b/dbt/models/spatial/docs.md
index 2a1463741..096221bb8 100644
--- a/dbt/models/spatial/docs.md
+++ b/dbt/models/spatial/docs.md
@@ -493,14 +493,6 @@ Includes townships within the City of Chicago, which are technically defunct.
 **Geometry:** `MULTIPOLYGON`
 {% enddocs %}
 
-# traffic
-
-{% docs table_traffic %}
-Locations of roads derived from the Illinois Department of Revenue website
-
-**Geometry:** `MULTILINESTRING`
-{% enddocs %}
-
 # transit_dict
 
 {% docs table_transit_dict %}
diff --git a/dbt/models/spatial/schema.yml b/dbt/models/spatial/schema.yml
index 513900073..0982b8106 100644
--- a/dbt/models/spatial/schema.yml
+++ b/dbt/models/spatial/schema.yml
@@ -174,9 +174,6 @@ sources:
       - name: township
         description: '{{ doc("table_township") }}'
 
-      - name: traffic
-        description: '{{ doc("table_traffic") }}'
-
       - name: transit_dict
         description: '{{ doc("table_transit_dict") }}'
 

From ad6ed4626ea73990669e950b5311bb738c060e3a Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Wed, 9 Oct 2024 16:10:11 +0000
Subject: [PATCH 33/74] Use FC_NAME and FCNAME

---
 .../spatial/spatial-environment-traffic.R     | 73 +++++++++++--------
 1 file changed, 41 insertions(+), 32 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 8b55b5ddc..7dd12bcd4 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -30,42 +30,51 @@ walk(parquet_files, \(file_key) {
       st_transform(4326) %>%
       mutate(geometry_3435 = st_transform(geometry, 3435))
 
-    # We do this because some columns are not present in
-    # older versions of the data
-    required_columns <- c("FCNAME", "LNS", "SURF_TYP", "SURF_WTH", "SRF_YR", "AADT",
-                          "CRS_WITH", "CRS_OPP", "CRS_YR",
-                          "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP",
-                          "SP_LIM", "INVENTORY", "geometry_3435", "year")
-
-    # Select only the non-geometry columns that exist in the dataset
-    existing_columns <- intersect(required_columns, colnames(shapefile_data))
-    shapefile_data_test <- shapefile_data %>%
-      select(all_of(existing_columns)) %>%
-      mutate(
-        road_type = if ("FCNAME" %in% colnames(.)) FCNAME else NA,
-        lanes = if ("LNS" %in% colnames(.)) LNS else NA,
-        surface_type = if ("SURF_TYP" %in% colnames(.)) SURF_TYP else NA,
-        surface_width = if ("SURF_WTH" %in% colnames(.)) SURF_WTH else NA,
-        surface_year = if ("SRF_YR" %in% colnames(.)) SRF_YR else NA,
-        annual_traffic = if ("AADT" %in% colnames(.)) AADT else NA,
-        condition_with = if ("CRS_WITH" %in% colnames(.)) CRS_WITH else NA,
-        condition_opposing = if ("CRS_OPP" %in% colnames(.)) CRS_OPP else NA,
-        condition_year = if ("CRS_YR" %in% colnames(.)) CRS_YR else NA,
-        road_name = if ("ROAD_NAME" %in% colnames(.)) ROAD_NAME else NA,
-        distress_with = if ("DTRESS_WTH" %in% colnames(.)) DTRESS_WTH else NA,
-        distress_opposing = if ("DTRESS_OPP" %in% colnames(.)) DTRESS_OPP else NA,
-        speed_limit = if ("SP_LIM" %in% colnames(.)) SP_LIM else NA,
-        inventory_id = if ("INVENTORY" %in% colnames(.)) INVENTORY else NA
+      # Convert the S3 object into raw data and read using geoarrow
+      shapefile_data <- geoarrow::read_geoparquet_sf(
+        file.path(AWS_S3_RAW_BUCKET, file_key)
       ) %>%
-      select(-one_of(c("FCNAME", "LNS", "SURF_TYP", "SURF_WTH", "SRF_YR", "AADT", "CRS_WITH",
-                       "CRS_OPP", "CRS_YR", "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP",
-                       "SP_LIM", "INVENTORY"))) %>%
-      geoarrow::write_geoparquet(
-        file.path(AWS_S3_WAREHOUSE_BUCKET, file_key)
-      )
+        st_transform(4326) %>%
+        mutate(geometry_3435 = st_transform(geometry, 3435))
+
+
+        # We do this because some columns are not present in
+        # older versions of the data
+        required_columns <- c("FCNAME", "FC_NAME", "LNS", "SURF_TYP", "SURF_WTH", "SRF_YR", "AADT",
+                              "CRS_WITH", "CRS_OPP", "CRS_YR",
+                              "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP",
+                              "SP_LIM", "INVENTORY", "geometry_3435", "year")
+
+      # Select only the non-geometry columns that exist in the dataset
+      existing_columns <- intersect(required_columns, colnames(shapefile_data))
+      shapefile_data %>%
+        select(all_of(existing_columns)) %>%
+        mutate(
+          road_type = if ("FCNAME" %in% colnames(.)) FCNAME else if ("FC_NAME" %in% colnames(.)) FC_NAME else NA,
+          lanes = if ("LNS" %in% colnames(.)) LNS else NA,
+          surface_type = if ("SURF_TYP" %in% colnames(.)) SURF_TYP else NA,
+          surface_width = if ("SURF_WTH" %in% colnames(.)) SURF_WTH else NA,
+          surface_year = if ("SRF_YR" %in% colnames(.)) SRF_YR else NA,
+          annual_traffic = if ("AADT" %in% colnames(.)) AADT else NA,
+          condition_with = if ("CRS_WITH" %in% colnames(.)) CRS_WITH else NA,
+          condition_opposing = if ("CRS_OPP" %in% colnames(.)) CRS_OPP else NA,
+          condition_year = if ("CRS_YR" %in% colnames(.)) CRS_YR else NA,
+          road_name = if ("ROAD_NAME" %in% colnames(.)) ROAD_NAME else NA,
+          distress_with = if ("DTRESS_WTH" %in% colnames(.)) DTRESS_WTH else NA,
+          distress_opposing = if ("DTRESS_OPP" %in% colnames(.)) DTRESS_OPP else NA,
+          speed_limit = if ("SP_LIM" %in% colnames(.)) SP_LIM else NA,
+          inventory_id = if ("INVENTORY" %in% colnames(.)) INVENTORY else NA
+        ) %>%
+        select(-one_of(c("FCNAME", "FC_NAME", "LNS", "SURF_TYP", "SURF_WTH", "SRF_YR", "AADT", "CRS_WITH",
+                         "CRS_OPP", "CRS_YR", "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP",
+                         "SP_LIM", "INVENTORY"))) %>%
+        geoarrow::write_geoparquet(
+          file.path(AWS_S3_WAREHOUSE_BUCKET, file_key)
+        )
 
     print(paste(file_key, "cleaned and uploaded."))
 
+    }
   }
 
 })

From 47b04698ba30e58edfe506204d031332b3445b88 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Wed, 9 Oct 2024 16:11:02 +0000
Subject: [PATCH 34/74] Fix brackets

---
 .../spatial/spatial-environment-traffic.R                      | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 7dd12bcd4..6e3ee51b0 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -76,5 +76,4 @@ walk(parquet_files, \(file_key) {
 
     }
   }
-
-})
+)

From 20b9c4c9d602ad7ad4580fca9845035951f94d54 Mon Sep 17 00:00:00 2001
From: Sweaty Handshake <william.ridgeway@cookcountyil.gov>
Date: Wed, 9 Oct 2024 18:11:57 +0000
Subject: [PATCH 35/74] Get back to running

---
 .../spatial/spatial-environment-traffic.R     | 82 +++++++++----------
 1 file changed, 41 insertions(+), 41 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 6e3ee51b0..4e7b042d3 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -13,13 +13,13 @@ output_bucket <- file.path(AWS_S3_WAREHOUSE_BUCKET, s3_folder)
 # Get the 'Key'
 parquet_files <- get_bucket_df(
   bucket = AWS_S3_RAW_BUCKET, prefix = s3_folder
-  ) %>%
+) %>%
   pull(Key)
 
 # Loop through each parquet file and process it
 walk(parquet_files, \(file_key) {
 
-  if (!aws.s3::object_exists(file.path(AWS_S3_WAREHOUSE_BUCKET, s3_folder, file_key))) {
+  if (!aws.s3::object_exists(file.path(AWS_S3_WAREHOUSE_BUCKET, file_key))) {
 
     print(paste("Cleaning", file_key))
 
@@ -30,50 +30,50 @@ walk(parquet_files, \(file_key) {
       st_transform(4326) %>%
       mutate(geometry_3435 = st_transform(geometry, 3435))
 
-      # Convert the S3 object into raw data and read using geoarrow
-      shapefile_data <- geoarrow::read_geoparquet_sf(
-        file.path(AWS_S3_RAW_BUCKET, file_key)
-      ) %>%
-        st_transform(4326) %>%
-        mutate(geometry_3435 = st_transform(geometry, 3435))
+    # Convert the S3 object into raw data and read using geoarrow
+    # shapefile_data <- geoarrow::read_geoparquet_sf(
+    #   file.path(AWS_S3_RAW_BUCKET, file_key)
+    # ) %>%
+    #   st_transform(4326) %>%
+    #   mutate(geometry_3435 = st_transform(geometry, 3435))
 
 
-        # We do this because some columns are not present in
-        # older versions of the data
-        required_columns <- c("FCNAME", "FC_NAME", "LNS", "SURF_TYP", "SURF_WTH", "SRF_YR", "AADT",
-                              "CRS_WITH", "CRS_OPP", "CRS_YR",
-                              "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP",
-                              "SP_LIM", "INVENTORY", "geometry_3435", "year")
+    # We do this because some columns are not present in
+    # older versions of the data
+    required_columns <- c("FCNAME", "FC_NAME", "LNS", "SURF_TYP", "SURF_WTH", "SRF_YR", "AADT",
+                          "CRS_WITH", "CRS_OPP", "CRS_YR",
+                          "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP",
+                          "SP_LIM", "INVENTORY", "geometry_3435", "year")
 
-      # Select only the non-geometry columns that exist in the dataset
-      existing_columns <- intersect(required_columns, colnames(shapefile_data))
-      shapefile_data %>%
-        select(all_of(existing_columns)) %>%
-        mutate(
-          road_type = if ("FCNAME" %in% colnames(.)) FCNAME else if ("FC_NAME" %in% colnames(.)) FC_NAME else NA,
-          lanes = if ("LNS" %in% colnames(.)) LNS else NA,
-          surface_type = if ("SURF_TYP" %in% colnames(.)) SURF_TYP else NA,
-          surface_width = if ("SURF_WTH" %in% colnames(.)) SURF_WTH else NA,
-          surface_year = if ("SRF_YR" %in% colnames(.)) SRF_YR else NA,
-          annual_traffic = if ("AADT" %in% colnames(.)) AADT else NA,
-          condition_with = if ("CRS_WITH" %in% colnames(.)) CRS_WITH else NA,
-          condition_opposing = if ("CRS_OPP" %in% colnames(.)) CRS_OPP else NA,
-          condition_year = if ("CRS_YR" %in% colnames(.)) CRS_YR else NA,
-          road_name = if ("ROAD_NAME" %in% colnames(.)) ROAD_NAME else NA,
-          distress_with = if ("DTRESS_WTH" %in% colnames(.)) DTRESS_WTH else NA,
-          distress_opposing = if ("DTRESS_OPP" %in% colnames(.)) DTRESS_OPP else NA,
-          speed_limit = if ("SP_LIM" %in% colnames(.)) SP_LIM else NA,
-          inventory_id = if ("INVENTORY" %in% colnames(.)) INVENTORY else NA
-        ) %>%
-        select(-one_of(c("FCNAME", "FC_NAME", "LNS", "SURF_TYP", "SURF_WTH", "SRF_YR", "AADT", "CRS_WITH",
-                         "CRS_OPP", "CRS_YR", "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP",
-                         "SP_LIM", "INVENTORY"))) %>%
-        geoarrow::write_geoparquet(
-          file.path(AWS_S3_WAREHOUSE_BUCKET, file_key)
-        )
+    # Select only the non-geometry columns that exist in the dataset
+    existing_columns <- intersect(required_columns, colnames(shapefile_data))
+    shapefile_data %>%
+      select(all_of(existing_columns)) %>%
+      mutate(
+        road_type = if ("FCNAME" %in% colnames(.)) FCNAME else if ("FC_NAME" %in% colnames(.)) FC_NAME else NA,
+        lanes = if ("LNS" %in% colnames(.)) LNS else NA,
+        surface_type = if ("SURF_TYP" %in% colnames(.)) SURF_TYP else NA,
+        surface_width = if ("SURF_WTH" %in% colnames(.)) SURF_WTH else NA,
+        surface_year = if ("SRF_YR" %in% colnames(.)) SRF_YR else NA,
+        annual_traffic = if ("AADT" %in% colnames(.)) AADT else NA,
+        condition_with = if ("CRS_WITH" %in% colnames(.)) CRS_WITH else NA,
+        condition_opposing = if ("CRS_OPP" %in% colnames(.)) CRS_OPP else NA,
+        condition_year = if ("CRS_YR" %in% colnames(.)) CRS_YR else NA,
+        road_name = if ("ROAD_NAME" %in% colnames(.)) ROAD_NAME else NA,
+        distress_with = if ("DTRESS_WTH" %in% colnames(.)) DTRESS_WTH else NA,
+        distress_opposing = if ("DTRESS_OPP" %in% colnames(.)) DTRESS_OPP else NA,
+        speed_limit = if ("SP_LIM" %in% colnames(.)) SP_LIM else NA,
+        inventory_id = if ("INVENTORY" %in% colnames(.)) INVENTORY else NA
+      ) %>%
+      select(-one_of(c("FCNAME", "FC_NAME", "LNS", "SURF_TYP", "SURF_WTH", "SRF_YR", "AADT", "CRS_WITH",
+                       "CRS_OPP", "CRS_YR", "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP",
+                       "SP_LIM", "INVENTORY"))) %>%
+      geoarrow::write_geoparquet(
+        file.path(AWS_S3_WAREHOUSE_BUCKET, file_key)
+      )
 
     print(paste(file_key, "cleaned and uploaded."))
 
-    }
   }
+}
 )

From 369e228f9606401c43ee6f46ee11b4ab54c3c40c Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Wed, 9 Oct 2024 18:43:24 +0000
Subject: [PATCH 36/74] Make year a character

---
 .../spatial/spatial-environment-traffic.R                       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
index 6b7b82178..0891a53ba 100644
--- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
@@ -64,7 +64,7 @@ process_shapefiles_for_year <- map(years, \(x) {
         # Add filter for Cook County. The name changes in different years.
         filter(if ("COUNTY" %in% names(.))
           COUNTY == '016' else INV_CO == '016') %>%
-        mutate(year = x)
+        mutate(year = as.character(x))
 
       # Save the shapefile as a GeoParquet file
       geoarrow::write_geoparquet(shapefile_data, remote_file_path)

From f07d62f08d3189c7adfe0a5565d9a91f617e0977 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Thu, 10 Oct 2024 18:08:20 +0000
Subject: [PATCH 37/74] Add NA handeling

---
 .../spatial/spatial-environment-traffic.R     | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 4e7b042d3..984700209 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -10,6 +10,55 @@ AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET")
 s3_folder <- "spatial/environment/traffic/"
 output_bucket <- file.path(AWS_S3_WAREHOUSE_BUCKET, s3_folder)
 
+# Recoding of road data
+road_codes <- c(
+  "762" = "Reinforced over PCC - Reinforcement unknown",
+  "765" = "Non-Reinforced over PCC - No reinforcement",
+  "767" = "Reinforced over PCC - No reinforcement",
+  "770" = "Non-Reinforced over PCC - Partial reinforcement",
+  "772" = "Reinforced over PCC - Partial reinforcement",
+  "775" = "Non-Reinforced over PCC - With No or Partial reinforcement but having Hinged Joints",
+  "777" = "Reinforced over PCC - With No or Partial reinforcement but having Hinged Joints",
+  "780" = "Non-Reinforced over PCC - Full reinforcement",
+  "782" = "Reinforced over PCC - Full reinforcement",
+  "790" = "Non-Reinforced over PCC - Continuous reinforcement",
+  "792" = "Reinforced over PCC - Continuous reinforcement",
+  "600" = "Over PCC - Reinforcement unknown",
+  "610" = "Over PCC - No reinforcement",
+  "615" = "Over PCC - No reinforcement but having short panels and dowels",
+  "620" = "Over PCC - Partial reinforcement",
+  "625" = "Over PCC - With No or Partial Reinforcement - But having Hinged Joints",
+  "630" = "Over PCC - Full reinforcement",
+  "640" = "Over PCC - Continuous reinforcement",
+  "650" = "Over Brick, Block, Steel, or similar material",
+  "700" = "Reinforcement unknown",
+  "710" = "No reinforcement",
+  "720" = "Partial reinforcement",
+  "725" = "With No or Partial reinforcement but having Hinged Joints",
+  "730" = "Full reinforcement",
+  "740" = "Continuous reinforcement",
+  "760" = "Non-Reinforced over PCC - Reinforcement unknown",
+  "400" = "Mixed Bituminous (low type bituminous)",
+  "410" = "Bituminous Penetration (low type bituminous)",
+  "500" = "Bituminous Surface Treated – Mixed bituminous",
+  "501" = "Over PCC - Rubblized - Reinforcement unknown",
+  "510" = "Over PCC - Rubblized - No reinforcement",
+  "520" = "Over PCC - Rubblized - Partial reinforcement",
+  "525" = "Over PCC - Rubblized - With No or Partial Reinforcement - But having Hinged Joints",
+  "530" = "Over PCC - Rubblized - Full reinforcement",
+  "540" = "Over PCC - Rubblized - Continuous reinforcement",
+  "550" = "Bituminous Concrete (other than Class I)",
+  "560" = "Bituminous Concrete Pavement (Full-Depth)",
+  "100" = "Without dust palliative treatment",
+  "110" = "With dust palliative (oiled)",
+  "200" = "Without dust palliative treatment",
+  "210" = "With dust palliative treatment",
+  "300" = "Bituminous Surface-Treated (low type bituminous)",
+  "010" = "Unimproved",
+  "020" = "Graded and Drained"
+)
+
+
 # Get the 'Key'
 parquet_files <- get_bucket_df(
   bucket = AWS_S3_RAW_BUCKET, prefix = s3_folder
@@ -65,9 +114,14 @@ walk(parquet_files, \(file_key) {
         speed_limit = if ("SP_LIM" %in% colnames(.)) SP_LIM else NA,
         inventory_id = if ("INVENTORY" %in% colnames(.)) INVENTORY else NA
       ) %>%
+      # Recode surface_type based on road codes
+      mutate(surface_type = road_codes[as.character(surface_type)]) %>%
+      # Select and remove unnecessary columns
       select(-one_of(c("FCNAME", "FC_NAME", "LNS", "SURF_TYP", "SURF_WTH", "SRF_YR", "AADT", "CRS_WITH",
                        "CRS_OPP", "CRS_YR", "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP",
                        "SP_LIM", "INVENTORY"))) %>%
+      # Replace all 0 values with NA, excluding the geometry column
+      mutate(across(-geometry, ~replace(., . %in% c(0, "0000"), NA))) %>%
       geoarrow::write_geoparquet(
         file.path(AWS_S3_WAREHOUSE_BUCKET, file_key)
       )

From 6ec7267101746366661cc141503fe43bb9b5d035 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Thu, 10 Oct 2024 18:14:11 +0000
Subject: [PATCH 38/74] Reorder columns

---
 .../spatial/spatial-environment-traffic.R     | 59 ++++++++++---------
 1 file changed, 30 insertions(+), 29 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 984700209..10ae58dfc 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -12,17 +12,24 @@ output_bucket <- file.path(AWS_S3_WAREHOUSE_BUCKET, s3_folder)
 
 # Recoding of road data
 road_codes <- c(
-  "762" = "Reinforced over PCC - Reinforcement unknown",
-  "765" = "Non-Reinforced over PCC - No reinforcement",
-  "767" = "Reinforced over PCC - No reinforcement",
-  "770" = "Non-Reinforced over PCC - Partial reinforcement",
-  "772" = "Reinforced over PCC - Partial reinforcement",
-  "775" = "Non-Reinforced over PCC - With No or Partial reinforcement but having Hinged Joints",
-  "777" = "Reinforced over PCC - With No or Partial reinforcement but having Hinged Joints",
-  "780" = "Non-Reinforced over PCC - Full reinforcement",
-  "782" = "Reinforced over PCC - Full reinforcement",
-  "790" = "Non-Reinforced over PCC - Continuous reinforcement",
-  "792" = "Reinforced over PCC - Continuous reinforcement",
+  "010" = "Unimproved",
+  "020" = "Graded and Drained",
+  "100" = "Without dust palliative treatment",
+  "110" = "With dust palliative (oiled)",
+  "200" = "Without dust palliative treatment",
+  "210" = "With dust palliative treatment",
+  "300" = "Bituminous Surface-Treated (low type bituminous)",
+  "400" = "Mixed Bituminous (low type bituminous)",
+  "410" = "Bituminous Penetration (low type bituminous)",
+  "500" = "Bituminous Surface Treated – Mixed bituminous",
+  "501" = "Over PCC - Rubblized - Reinforcement unknown",
+  "510" = "Over PCC - Rubblized - No reinforcement",
+  "520" = "Over PCC - Rubblized - Partial reinforcement",
+  "525" = "Over PCC - Rubblized - With No or Partial Reinforcement - But having Hinged Joints",
+  "530" = "Over PCC - Rubblized - Full reinforcement",
+  "540" = "Over PCC - Rubblized - Continuous reinforcement",
+  "550" = "Bituminous Concrete (other than Class I)",
+  "560" = "Bituminous Concrete Pavement (Full-Depth)",
   "600" = "Over PCC - Reinforcement unknown",
   "610" = "Over PCC - No reinforcement",
   "615" = "Over PCC - No reinforcement but having short panels and dowels",
@@ -38,24 +45,18 @@ road_codes <- c(
   "730" = "Full reinforcement",
   "740" = "Continuous reinforcement",
   "760" = "Non-Reinforced over PCC - Reinforcement unknown",
-  "400" = "Mixed Bituminous (low type bituminous)",
-  "410" = "Bituminous Penetration (low type bituminous)",
-  "500" = "Bituminous Surface Treated – Mixed bituminous",
-  "501" = "Over PCC - Rubblized - Reinforcement unknown",
-  "510" = "Over PCC - Rubblized - No reinforcement",
-  "520" = "Over PCC - Rubblized - Partial reinforcement",
-  "525" = "Over PCC - Rubblized - With No or Partial Reinforcement - But having Hinged Joints",
-  "530" = "Over PCC - Rubblized - Full reinforcement",
-  "540" = "Over PCC - Rubblized - Continuous reinforcement",
-  "550" = "Bituminous Concrete (other than Class I)",
-  "560" = "Bituminous Concrete Pavement (Full-Depth)",
-  "100" = "Without dust palliative treatment",
-  "110" = "With dust palliative (oiled)",
-  "200" = "Without dust palliative treatment",
-  "210" = "With dust palliative treatment",
-  "300" = "Bituminous Surface-Treated (low type bituminous)",
-  "010" = "Unimproved",
-  "020" = "Graded and Drained"
+  "762" = "Reinforced over PCC - Reinforcement unknown",
+  "765" = "Non-Reinforced over PCC - No reinforcement",
+  "767" = "Reinforced over PCC - No reinforcement",
+  "770" = "Non-Reinforced over PCC - Partial reinforcement",
+  "772" = "Reinforced over PCC - Partial reinforcement",
+  "775" = "Non-Reinforced over PCC - With No or Partial reinforcement but having Hinged Joints",
+  "777" = "Reinforced over PCC - With No or Partial reinforcement but having Hinged Joints",
+  "780" = "Non-Reinforced over PCC - Full reinforcement",
+  "782" = "Reinforced over PCC - Full reinforcement",
+  "790" = "Non-Reinforced over PCC - Continuous reinforcement",
+  "792" = "Reinforced over PCC - Continuous reinforcement",
+  "800" = "800 Brick, Block or Other"
 )
 
 

From b402b7709dbecd045fe286b265e4716f960b039f Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Thu, 10 Oct 2024 18:20:35 +0000
Subject: [PATCH 39/74] Add docs

---
 dbt/models/spatial/docs.md    | 11 +++++++++++
 dbt/models/spatial/schema.yml |  3 +++
 2 files changed, 14 insertions(+)

diff --git a/dbt/models/spatial/docs.md b/dbt/models/spatial/docs.md
index 096221bb8..c4fa435f2 100644
--- a/dbt/models/spatial/docs.md
+++ b/dbt/models/spatial/docs.md
@@ -493,6 +493,17 @@ Includes townships within the City of Chicago, which are technically defunct.
 **Geometry:** `MULTIPOLYGON`
 {% enddocs %}
 
+# traffic
+
+{% docs table_traffic %}
+
+Illinois Department of Transportation data source from
+[https://apps1.dot.illinois.gov/gist2/](https://apps1.dot.illinois.gov/gist2/).
+
+
+**Geometry:** `MULTILINESTRING`
+{% enddocs %}
+
 # transit_dict
 
 {% docs table_transit_dict %}
diff --git a/dbt/models/spatial/schema.yml b/dbt/models/spatial/schema.yml
index 0982b8106..513900073 100644
--- a/dbt/models/spatial/schema.yml
+++ b/dbt/models/spatial/schema.yml
@@ -174,6 +174,9 @@ sources:
       - name: township
         description: '{{ doc("table_township") }}'
 
+      - name: traffic
+        description: '{{ doc("table_traffic") }}'
+
       - name: transit_dict
         description: '{{ doc("table_transit_dict") }}'
 

From 1dfd84b39cec378c580b08faff231870a68c4b95 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Thu, 10 Oct 2024 18:21:10 +0000
Subject: [PATCH 40/74] better commenting

---
 .../spatial/spatial-environment-traffic.R                       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 10ae58dfc..4780f7136 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -10,7 +10,7 @@ AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET")
 s3_folder <- "spatial/environment/traffic/"
 output_bucket <- file.path(AWS_S3_WAREHOUSE_BUCKET, s3_folder)
 
-# Recoding of road data
+# Recoding of road type
 road_codes <- c(
   "010" = "Unimproved",
   "020" = "Graded and Drained",

From 22741ab49d571d7a6f94b676ab2897fbfa7527f2 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Thu, 10 Oct 2024 19:31:44 +0000
Subject: [PATCH 41/74] Remove duplicated code

---
 .../spatial/spatial-environment-traffic.R                 | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 4780f7136..4154ddd26 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -80,14 +80,6 @@ walk(parquet_files, \(file_key) {
       st_transform(4326) %>%
       mutate(geometry_3435 = st_transform(geometry, 3435))
 
-    # Convert the S3 object into raw data and read using geoarrow
-    # shapefile_data <- geoarrow::read_geoparquet_sf(
-    #   file.path(AWS_S3_RAW_BUCKET, file_key)
-    # ) %>%
-    #   st_transform(4326) %>%
-    #   mutate(geometry_3435 = st_transform(geometry, 3435))
-
-
     # We do this because some columns are not present in
     # older versions of the data
     required_columns <- c("FCNAME", "FC_NAME", "LNS", "SURF_TYP", "SURF_WTH", "SRF_YR", "AADT",

From c0450fffbb6dbac26e4a1ea706d5e4478655dd27 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Thu, 10 Oct 2024 19:36:11 +0000
Subject: [PATCH 42/74] Rename SURF_YR

---
 .../spatial/spatial-environment-traffic.R                   | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 4154ddd26..64d0361cd 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -82,7 +82,7 @@ walk(parquet_files, \(file_key) {
 
     # We do this because some columns are not present in
     # older versions of the data
-    required_columns <- c("FCNAME", "FC_NAME", "LNS", "SURF_TYP", "SURF_WTH", "SRF_YR", "AADT",
+    required_columns <- c("FCNAME", "FC_NAME", "LNS", "SURF_TYP", "SURF_WTH", "SURF_YR", "AADT",
                           "CRS_WITH", "CRS_OPP", "CRS_YR",
                           "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP",
                           "SP_LIM", "INVENTORY", "geometry_3435", "year")
@@ -96,7 +96,7 @@ walk(parquet_files, \(file_key) {
         lanes = if ("LNS" %in% colnames(.)) LNS else NA,
         surface_type = if ("SURF_TYP" %in% colnames(.)) SURF_TYP else NA,
         surface_width = if ("SURF_WTH" %in% colnames(.)) SURF_WTH else NA,
-        surface_year = if ("SRF_YR" %in% colnames(.)) SRF_YR else NA,
+        surface_year = if ("SURF_YR" %in% colnames(.)) SURF_YR else NA,
         annual_traffic = if ("AADT" %in% colnames(.)) AADT else NA,
         condition_with = if ("CRS_WITH" %in% colnames(.)) CRS_WITH else NA,
         condition_opposing = if ("CRS_OPP" %in% colnames(.)) CRS_OPP else NA,
@@ -110,7 +110,7 @@ walk(parquet_files, \(file_key) {
       # Recode surface_type based on road codes
       mutate(surface_type = road_codes[as.character(surface_type)]) %>%
       # Select and remove unnecessary columns
-      select(-one_of(c("FCNAME", "FC_NAME", "LNS", "SURF_TYP", "SURF_WTH", "SRF_YR", "AADT", "CRS_WITH",
+      select(-one_of(c("FCNAME", "FC_NAME", "LNS", "SURF_TYP", "SURF_WTH", "SURF_YR", "AADT", "CRS_WITH",
                        "CRS_OPP", "CRS_YR", "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP",
                        "SP_LIM", "INVENTORY"))) %>%
       # Replace all 0 values with NA, excluding the geometry column

From cfafd128dbbfaee76dc9851f1d17d53aea11c09a Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Thu, 10 Oct 2024 19:46:03 +0000
Subject: [PATCH 43/74] Better renaming

---
 .../spatial/spatial-environment-traffic.R                       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 64d0361cd..8cbcd5065 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -56,7 +56,7 @@ road_codes <- c(
   "782" = "Reinforced over PCC - Full reinforcement",
   "790" = "Non-Reinforced over PCC - Continuous reinforcement",
   "792" = "Reinforced over PCC - Continuous reinforcement",
-  "800" = "800 Brick, Block or Other"
+  "800" = "Brick, Block or Other"
 )
 
 

From 8af8f076a090bdd6cbbb5a6bd10a179820ccf419 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Thu, 10 Oct 2024 20:48:26 +0000
Subject: [PATCH 44/74] rename traffic, fix surface_year

---
 .../spatial/spatial-environment-traffic.R                      | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 8cbcd5065..322e106ed 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -97,7 +97,7 @@ walk(parquet_files, \(file_key) {
         surface_type = if ("SURF_TYP" %in% colnames(.)) SURF_TYP else NA,
         surface_width = if ("SURF_WTH" %in% colnames(.)) SURF_WTH else NA,
         surface_year = if ("SURF_YR" %in% colnames(.)) SURF_YR else NA,
-        annual_traffic = if ("AADT" %in% colnames(.)) AADT else NA,
+        daily_traffic = if ("AADT" %in% colnames(.)) AADT else NA,
         condition_with = if ("CRS_WITH" %in% colnames(.)) CRS_WITH else NA,
         condition_opposing = if ("CRS_OPP" %in% colnames(.)) CRS_OPP else NA,
         condition_year = if ("CRS_YR" %in% colnames(.)) CRS_YR else NA,
@@ -115,6 +115,7 @@ walk(parquet_files, \(file_key) {
                        "SP_LIM", "INVENTORY"))) %>%
       # Replace all 0 values with NA, excluding the geometry column
       mutate(across(-geometry, ~replace(., . %in% c(0, "0000"), NA))) %>%
+      mutate(surface_year = ifelse(surface_year == 9999, NA, surface_year)) %>%
       geoarrow::write_geoparquet(
         file.path(AWS_S3_WAREHOUSE_BUCKET, file_key)
       )

From 75b1bbb2fd4202e909293b26a6c8543cbe85b1ac Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Thu, 17 Oct 2024 15:50:35 +0000
Subject: [PATCH 45/74] Add mean values

---
 .../spatial/spatial-environment-traffic.R     | 127 +++++++++++++++---
 1 file changed, 105 insertions(+), 22 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 322e106ed..246186141 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -59,7 +59,6 @@ road_codes <- c(
   "800" = "Brick, Block or Other"
 )
 
-
 # Get the 'Key'
 parquet_files <- get_bucket_df(
   bucket = AWS_S3_RAW_BUCKET, prefix = s3_folder
@@ -80,16 +79,14 @@ walk(parquet_files, \(file_key) {
       st_transform(4326) %>%
       mutate(geometry_3435 = st_transform(geometry, 3435))
 
-    # We do this because some columns are not present in
-    # older versions of the data
-    required_columns <- c("FCNAME", "FC_NAME", "LNS", "SURF_TYP", "SURF_WTH", "SURF_YR", "AADT",
-                          "CRS_WITH", "CRS_OPP", "CRS_YR",
-                          "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP",
-                          "SP_LIM", "INVENTORY", "geometry_3435", "year")
+    required_columns <- c(
+      "FCNAME", "FC_NAME", "LNS", "SURF_TYP", "SURF_WTH", "SURF_YR", "AADT",
+      "CRS_WITH", "CRS_OPP", "CRS_YR", "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP",
+      "SP_LIM", "INVENTORY", "geometry_3435", "year"
+    )
 
-    # Select only the non-geometry columns that exist in the dataset
     existing_columns <- intersect(required_columns, colnames(shapefile_data))
-    shapefile_data %>%
+    shapefile_data <- shapefile_data %>%
       select(all_of(existing_columns)) %>%
       mutate(
         road_type = if ("FCNAME" %in% colnames(.)) FCNAME else if ("FC_NAME" %in% colnames(.)) FC_NAME else NA,
@@ -107,21 +104,107 @@ walk(parquet_files, \(file_key) {
         speed_limit = if ("SP_LIM" %in% colnames(.)) SP_LIM else NA,
         inventory_id = if ("INVENTORY" %in% colnames(.)) INVENTORY else NA
       ) %>%
-      # Recode surface_type based on road codes
-      mutate(surface_type = road_codes[as.character(surface_type)]) %>%
-      # Select and remove unnecessary columns
-      select(-one_of(c("FCNAME", "FC_NAME", "LNS", "SURF_TYP", "SURF_WTH", "SURF_YR", "AADT", "CRS_WITH",
-                       "CRS_OPP", "CRS_YR", "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP",
-                       "SP_LIM", "INVENTORY"))) %>%
-      # Replace all 0 values with NA, excluding the geometry column
+      mutate(surface_type = road_codes[as.character(surface_type)],
+             speed_limit = as.numeric(speed_limit)) %>%
+      select(-one_of(required_columns)) %>%
       mutate(across(-geometry, ~replace(., . %in% c(0, "0000"), NA))) %>%
       mutate(surface_year = ifelse(surface_year == 9999, NA, surface_year)) %>%
-      geoarrow::write_geoparquet(
-        file.path(AWS_S3_WAREHOUSE_BUCKET, file_key)
-      )
+      group_by(road_name, speed_limit, lanes, surface_type, daily_traffic) %>%
+      summarize(geometry = st_union(geometry)) %>%
+      ungroup()
 
-    print(paste(file_key, "cleaned and uploaded."))
+    # Function to create the intersection matrix and compute average traffic
+    calculate_traffic_averages <- function(shapefile_data) {
+      # Create an intersection matrix for averages
+      intersection_matrix <- st_intersects(shapefile_data)
+
+      # Create intersecting pairs
+      intersecting_pairs <- do.call(rbind, lapply(seq_along(intersection_matrix), function(i) {
+        data.frame(polygon_1 = i, polygon_2 = intersection_matrix[[i]])
+      })) %>%
+        filter(polygon_1 != polygon_2)  # Remove self-matches
+
+      # Add polygon ID and relevant columns to shapefile data
+      shapefile_with_ids <- shapefile_data %>%
+        mutate(polygon_id = row_number()) %>%
+        select(polygon_id, road_name, daily_traffic, speed_limit, lanes)
+
+      # Join intersecting pairs with matching street names
+      averages <- intersecting_pairs %>%
+        left_join(
+          shapefile_with_ids %>%
+            rename(
+              road_name_1 = road_name,
+              daily_traffic_1 = daily_traffic,
+              speed_limit_1 = speed_limit,
+              lanes_1 = lanes
+            ),
+          by = c("polygon_1" = "polygon_id")
+        ) %>%
+        left_join(
+          shapefile_with_ids %>%
+            rename(
+              road_name_2 = road_name,
+              daily_traffic_2 = daily_traffic,
+              speed_limit_2 = speed_limit,
+              lanes_2 = lanes
+            ),
+          by = c("polygon_2" = "polygon_id")
+        ) %>%
+        filter(road_name_1 == road_name_2) %>%  # Keep only matching road names
+        group_by(polygon_1) %>%
+        summarize(
+          average_daily_traffic = mean(daily_traffic_2, na.rm = TRUE),
+          average_speed_limit = mean(speed_limit_2, na.rm = TRUE),
+          average_lanes = mean(lanes_2, na.rm = TRUE),
+          .groups = 'drop'
+        )
+
+      # Update traffic, speed limit, and lanes with averages if needed
+      shapefile_data <- shapefile_data %>%
+        mutate(polygon_id = row_number()) %>%
+        left_join(averages, by = c("polygon_id" = "polygon_1")) %>%
+        mutate(
+          daily_traffic = if_else(is.na(daily_traffic), average_daily_traffic, daily_traffic),
+          speed_limit = if_else(is.na(speed_limit), average_speed_limit, speed_limit),
+          num_lanes = if_else(is.na(lanes), average_lanes, lanes)
+        )
+
+      return(shapefile_data)
+    }
+
+
+    # Loop until no changes are made
+    shapefile_data_final <- shapefile_data
+    calculate_traffic_with_loop <- function(shapefile_data) {
+      # Initialize final shapefile data
+      shapefile_data_final <- shapefile_data
 
+      repeat {
+        # Save current values to compare changes
+        previous_traffic <- shapefile_data_final$daily_traffic
+        previous_speed <- shapefile_data_final$speed_limit
+        previous_lanes <- shapefile_data_final$num_lanes
+
+        # Recalculate averages and update shapefile data
+        shapefile_data_final <- calculate_traffic_averages(shapefile_data_final)
+
+        # Check if all values remain unchanged
+        if (all(previous_traffic == shapefile_data_final$daily_traffic, na.rm = TRUE) &&
+            all(previous_speed == shapefile_data_final$speed_limit, na.rm = TRUE) &&
+            all(previous_lanes == shapefile_data_final$num_lanes, na.rm = TRUE)) {
+          break  # Exit loop if no changes were made
+        }
+      }
+
+      return(shapefile_data_final)
+    }
+
+
+    output_path <- file.path(output_bucket, basename(file_key))
+    geoarrow::write_geoparquet(shapefile_data_final, output_path)
+
+    print(paste(file_key, "cleaned and uploaded."))
   }
-}
-)
+})
+

From a86203cc7e20415ed7ba1367fa033f5671ef445d Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Thu, 17 Oct 2024 15:53:11 +0000
Subject: [PATCH 46/74] Update commenting

---
 .../spatial/spatial-environment-traffic.R             | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 246186141..baa8d17c6 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -124,15 +124,17 @@ walk(parquet_files, \(file_key) {
       })) %>%
         filter(polygon_1 != polygon_2)  # Remove self-matches
 
-      # Add polygon ID and relevant columns to shapefile data
+      # Add polygon ID and relevant columns to shapefile data. This allows us to later merge
+      # data with the intersection pairs above.
       shapefile_with_ids <- shapefile_data %>%
         mutate(polygon_id = row_number()) %>%
         select(polygon_id, road_name, daily_traffic, speed_limit, lanes)
 
-      # Join intersecting pairs with matching street names
+      # Join intersecting pairs with matching street IDs
       averages <- intersecting_pairs %>%
         left_join(
           shapefile_with_ids %>%
+            # Create IDs for the "home" street
             rename(
               road_name_1 = road_name,
               daily_traffic_1 = daily_traffic,
@@ -143,6 +145,7 @@ walk(parquet_files, \(file_key) {
         ) %>%
         left_join(
           shapefile_with_ids %>%
+            # Create IDs for the neighboring streets
             rename(
               road_name_2 = road_name,
               daily_traffic_2 = daily_traffic,
@@ -153,6 +156,7 @@ walk(parquet_files, \(file_key) {
         ) %>%
         filter(road_name_1 == road_name_2) %>%  # Keep only matching road names
         group_by(polygon_1) %>%
+        # Create averages
         summarize(
           average_daily_traffic = mean(daily_traffic_2, na.rm = TRUE),
           average_speed_limit = mean(speed_limit_2, na.rm = TRUE),
@@ -160,7 +164,7 @@ walk(parquet_files, \(file_key) {
           .groups = 'drop'
         )
 
-      # Update traffic, speed limit, and lanes with averages if needed
+      # Update traffic, speed limit, and lanes with averages
       shapefile_data <- shapefile_data %>%
         mutate(polygon_id = row_number()) %>%
         left_join(averages, by = c("polygon_id" = "polygon_1")) %>%
@@ -200,7 +204,6 @@ walk(parquet_files, \(file_key) {
       return(shapefile_data_final)
     }
 
-
     output_path <- file.path(output_bucket, basename(file_key))
     geoarrow::write_geoparquet(shapefile_data_final, output_path)
 

From 184b0e2fd11ecfade00ce9a7c61de09e0c683209 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Thu, 17 Oct 2024 16:01:27 +0000
Subject: [PATCH 47/74] Remove extra line

---
 .../spatial/spatial-environment-traffic.R                     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index baa8d17c6..9a6d9fd42 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -5,7 +5,7 @@ library(sf)
 library(geoarrow)
 
 # Define the S3 bucket and folder path
-AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET")
+AWS_S3_RAW_BUCKET <- "s3://ccao-data-raw-us-east-1"
 AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET")
 s3_folder <- "spatial/environment/traffic/"
 output_bucket <- file.path(AWS_S3_WAREHOUSE_BUCKET, s3_folder)
@@ -205,7 +205,7 @@ walk(parquet_files, \(file_key) {
     }
 
     output_path <- file.path(output_bucket, basename(file_key))
-    geoarrow::write_geoparquet(shapefile_data_final, output_path)
+    # geoarrow::write_geoparquet(shapefile_data_final, output_path)
 
     print(paste(file_key, "cleaned and uploaded."))
   }

From b074eeec7f3ef0740621289c983879598f0b3d7a Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Thu, 17 Oct 2024 16:09:01 +0000
Subject: [PATCH 48/74] Run function

---
 .../spatial/spatial-environment-traffic.R                       | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 9a6d9fd42..4a49f0383 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -204,6 +204,8 @@ walk(parquet_files, \(file_key) {
       return(shapefile_data_final)
     }
 
+    calculate_traffic_with_loop(shapefile_data)
+
     output_path <- file.path(output_bucket, basename(file_key))
     # geoarrow::write_geoparquet(shapefile_data_final, output_path)
 

From 4eb17feb1e037a90f364763b018ec9b835a080d2 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Thu, 17 Oct 2024 16:09:58 +0000
Subject: [PATCH 49/74] Revert num_lanes

---
 .../spatial/spatial-environment-traffic.R                     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 4a49f0383..bc5cc4fc4 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -171,7 +171,7 @@ walk(parquet_files, \(file_key) {
         mutate(
           daily_traffic = if_else(is.na(daily_traffic), average_daily_traffic, daily_traffic),
           speed_limit = if_else(is.na(speed_limit), average_speed_limit, speed_limit),
-          num_lanes = if_else(is.na(lanes), average_lanes, lanes)
+          lanes = if_else(is.na(lanes), average_lanes, lanes)
         )
 
       return(shapefile_data)
@@ -188,7 +188,7 @@ walk(parquet_files, \(file_key) {
         # Save current values to compare changes
         previous_traffic <- shapefile_data_final$daily_traffic
         previous_speed <- shapefile_data_final$speed_limit
-        previous_lanes <- shapefile_data_final$num_lanes
+        previous_lanes <- shapefile_data_final$lanes
 
         # Recalculate averages and update shapefile data
         shapefile_data_final <- calculate_traffic_averages(shapefile_data_final)

From 6840eb35b7b76bfd847bc4f129c889a72acdc6a7 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Thu, 17 Oct 2024 17:19:16 +0000
Subject: [PATCH 50/74] Get loop working

---
 .../spatial/spatial-environment-traffic.R     | 176 +++++++++---------
 1 file changed, 93 insertions(+), 83 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index bc5cc4fc4..087b0d496 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -5,7 +5,7 @@ library(sf)
 library(geoarrow)
 
 # Define the S3 bucket and folder path
-AWS_S3_RAW_BUCKET <- "s3://ccao-data-raw-us-east-1"
+AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET")
 AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET")
 s3_folder <- "spatial/environment/traffic/"
 output_bucket <- file.path(AWS_S3_WAREHOUSE_BUCKET, s3_folder)
@@ -105,109 +105,119 @@ walk(parquet_files, \(file_key) {
         inventory_id = if ("INVENTORY" %in% colnames(.)) INVENTORY else NA
       ) %>%
       mutate(surface_type = road_codes[as.character(surface_type)],
-             speed_limit = as.numeric(speed_limit)) %>%
-      select(-one_of(required_columns)) %>%
+             speed_limit = as.numeric(speed_limit),
+             road_name = str_to_lower(road_name),           # Convert to lowercase
+             road_name = gsub("[[:punct:]]", "", road_name)) %>% # Remove punctuation like . / etc.
+             select(-one_of(required_columns)) %>%
       mutate(across(-geometry, ~replace(., . %in% c(0, "0000"), NA))) %>%
       mutate(surface_year = ifelse(surface_year == 9999, NA, surface_year)) %>%
       group_by(road_name, speed_limit, lanes, surface_type, daily_traffic) %>%
       summarize(geometry = st_union(geometry)) %>%
       ungroup()
 
-    # Function to create the intersection matrix and compute average traffic
-    calculate_traffic_averages <- function(shapefile_data) {
-      # Create an intersection matrix for averages
-      intersection_matrix <- st_intersects(shapefile_data)
-
-      # Create intersecting pairs
-      intersecting_pairs <- do.call(rbind, lapply(seq_along(intersection_matrix), function(i) {
-        data.frame(polygon_1 = i, polygon_2 = intersection_matrix[[i]])
-      })) %>%
-        filter(polygon_1 != polygon_2)  # Remove self-matches
-
-      # Add polygon ID and relevant columns to shapefile data. This allows us to later merge
-      # data with the intersection pairs above.
-      shapefile_with_ids <- shapefile_data %>%
-        mutate(polygon_id = row_number()) %>%
-        select(polygon_id, road_name, daily_traffic, speed_limit, lanes)
-
-      # Join intersecting pairs with matching street IDs
-      averages <- intersecting_pairs %>%
-        left_join(
-          shapefile_with_ids %>%
-            # Create IDs for the "home" street
-            rename(
-              road_name_1 = road_name,
-              daily_traffic_1 = daily_traffic,
-              speed_limit_1 = speed_limit,
-              lanes_1 = lanes
-            ),
-          by = c("polygon_1" = "polygon_id")
-        ) %>%
-        left_join(
-          shapefile_with_ids %>%
-            # Create IDs for the neighboring streets
-            rename(
-              road_name_2 = road_name,
-              daily_traffic_2 = daily_traffic,
-              speed_limit_2 = speed_limit,
-              lanes_2 = lanes
-            ),
-          by = c("polygon_2" = "polygon_id")
-        ) %>%
-        filter(road_name_1 == road_name_2) %>%  # Keep only matching road names
-        group_by(polygon_1) %>%
-        # Create averages
-        summarize(
-          average_daily_traffic = mean(daily_traffic_2, na.rm = TRUE),
-          average_speed_limit = mean(speed_limit_2, na.rm = TRUE),
-          average_lanes = mean(lanes_2, na.rm = TRUE),
-          .groups = 'drop'
-        )
-
-      # Update traffic, speed limit, and lanes with averages
-      shapefile_data <- shapefile_data %>%
-        mutate(polygon_id = row_number()) %>%
-        left_join(averages, by = c("polygon_id" = "polygon_1")) %>%
-        mutate(
-          daily_traffic = if_else(is.na(daily_traffic), average_daily_traffic, daily_traffic),
-          speed_limit = if_else(is.na(speed_limit), average_speed_limit, speed_limit),
-          lanes = if_else(is.na(lanes), average_lanes, lanes)
-        )
-
-      return(shapefile_data)
-    }
-
+    # Function to compute traffic averages with a loop until no changes are made
+    calculate_traffic_data <- function(shapefile_data) {
+      # Helper function to calculate averages based on intersections
+      calculate_traffic_averages <- function(data) {
+        # Create an intersection matrix
+        intersection_matrix <- st_intersects(data)
+
+        # Create intersecting pairs
+        intersecting_pairs <- do.call(rbind, lapply(seq_along(intersection_matrix), function(i) {
+          data.frame(polygon_1 = i, polygon_2 = intersection_matrix[[i]])
+        })) %>%
+          filter(polygon_1 != polygon_2)  # Remove self-matches
+
+        # Add polygon IDs and relevant columns for merging
+        data_with_ids <- data %>%
+          mutate(polygon_id = row_number()) %>%
+          select(polygon_id, road_name, daily_traffic, speed_limit, lanes)
+
+        # Join intersecting pairs with their respective polygon data
+        averages <- intersecting_pairs %>%
+          left_join(
+            data_with_ids %>%
+              rename(
+                road_name_1 = road_name,
+                daily_traffic_1 = daily_traffic,
+                speed_limit_1 = speed_limit,
+                lanes_1 = lanes
+              ),
+            by = c("polygon_1" = "polygon_id")
+          ) %>%
+          left_join(
+            data_with_ids %>%
+              rename(
+                road_name_2 = road_name,
+                daily_traffic_2 = daily_traffic,
+                speed_limit_2 = speed_limit,
+                lanes_2 = lanes
+              ),
+            by = c("polygon_2" = "polygon_id")
+          ) %>%
+          filter(road_name_1 == road_name_2) %>%  # Keep only matching road names
+          group_by(polygon_1) %>%
+          summarize(
+            average_daily_traffic = mean(daily_traffic_2, na.rm = TRUE),
+            average_speed_limit = mean(speed_limit_2, na.rm = TRUE),
+            average_lanes = mean(lanes_2, na.rm = TRUE),
+            .groups = 'drop'
+          )
+
+        # Update the original data with averages where needed
+        updated_data <- data %>%
+          mutate(polygon_id = row_number()) %>%
+          left_join(averages, by = c("polygon_id" = "polygon_1")) %>%
+          mutate(
+            daily_traffic = if_else(is.na(daily_traffic), average_daily_traffic, daily_traffic),
+            speed_limit = if_else(is.na(speed_limit), average_speed_limit, speed_limit),
+            lanes = if_else(is.na(lanes), average_lanes, lanes)
+          )
+
+        return(updated_data)
+      }
 
-    # Loop until no changes are made
-    shapefile_data_final <- shapefile_data
-    calculate_traffic_with_loop <- function(shapefile_data) {
-      # Initialize final shapefile data
+      # Initialize loop
       shapefile_data_final <- shapefile_data
-
       repeat {
-        # Save current values to compare changes
+        # Save current NA counts to compare changes
+        previous_na_traffic <- sum(is.na(shapefile_data_final$daily_traffic))
+        previous_na_speed <- sum(is.na(shapefile_data_final$speed_limit))
+        previous_na_lanes <- sum(is.na(shapefile_data_final$lanes))
+
+        # Save the current state to track changes
         previous_traffic <- shapefile_data_final$daily_traffic
         previous_speed <- shapefile_data_final$speed_limit
         previous_lanes <- shapefile_data_final$lanes
 
-        # Recalculate averages and update shapefile data
-        shapefile_data_final <- calculate_traffic_averages(shapefile_data_final)
-
-        # Check if all values remain unchanged
-        if (all(previous_traffic == shapefile_data_final$daily_traffic, na.rm = TRUE) &&
-            all(previous_speed == shapefile_data_final$speed_limit, na.rm = TRUE) &&
-            all(previous_lanes == shapefile_data_final$num_lanes, na.rm = TRUE)) {
-          break  # Exit loop if no changes were made
+        # Recalculate averages and update the data
+        shapefile_data_final <- calculate_traffic_averages(shapefile_data_final) %>%
+          select(-average_intersect_value)
+
+        # Calculate current NA counts after updating
+        current_na_traffic <- sum(is.na(shapefile_data_final$daily_traffic))
+        current_na_speed <- sum(is.na(shapefile_data_final$speed_limit))
+        current_na_lanes <- sum(is.na(shapefile_data_final$lanes))
+
+        # Exit loop if no changes in NA counts are detected
+        if (current_na_traffic >= previous_na_traffic &&
+            current_na_speed >= previous_na_speed &&
+            current_na_lanes >= previous_na_lanes) {
+          cat("No reduction in NA counts detected. Exiting loop.\n")
+          break
         }
       }
 
+
       return(shapefile_data_final)
     }
 
-    calculate_traffic_with_loop(shapefile_data)
+    # Run the function
+    calculate_traffic_data(shapefile_data)
+
 
     output_path <- file.path(output_bucket, basename(file_key))
-    # geoarrow::write_geoparquet(shapefile_data_final, output_path)
+    geoarrow::write_geoparquet(shapefile_data_final, output_path)
 
     print(paste(file_key, "cleaned and uploaded."))
   }

From fb86cc6944cec35784a2f96a1f6fc8691dbef266 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Thu, 17 Oct 2024 21:00:46 +0000
Subject: [PATCH 51/74] Wrapup

---
 .../spatial/spatial-environment-traffic.R     | 103 +++++++++---------
 1 file changed, 53 insertions(+), 50 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 087b0d496..ea090ac40 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -76,8 +76,7 @@ walk(parquet_files, \(file_key) {
     shapefile_data <- geoarrow::read_geoparquet_sf(
       file.path(AWS_S3_RAW_BUCKET, file_key)
     ) %>%
-      st_transform(4326) %>%
-      mutate(geometry_3435 = st_transform(geometry, 3435))
+      st_transform(4326)
 
     required_columns <- c(
       "FCNAME", "FC_NAME", "LNS", "SURF_TYP", "SURF_WTH", "SURF_YR", "AADT",
@@ -104,19 +103,33 @@ walk(parquet_files, \(file_key) {
         speed_limit = if ("SP_LIM" %in% colnames(.)) SP_LIM else NA,
         inventory_id = if ("INVENTORY" %in% colnames(.)) INVENTORY else NA
       ) %>%
-      mutate(surface_type = road_codes[as.character(surface_type)],
-             speed_limit = as.numeric(speed_limit),
-             road_name = str_to_lower(road_name),           # Convert to lowercase
-             road_name = gsub("[[:punct:]]", "", road_name)) %>% # Remove punctuation like . / etc.
-             select(-one_of(required_columns)) %>%
-      mutate(across(-geometry, ~replace(., . %in% c(0, "0000"), NA))) %>%
-      mutate(surface_year = ifelse(surface_year == 9999, NA, surface_year)) %>%
+      mutate(
+        surface_type = road_codes[as.character(surface_type)],
+        speed_limit = as.numeric(speed_limit),
+        road_name = str_to_lower(road_name),           # Convert to lowercase
+        road_name = gsub("[[:punct:]]", "", road_name), # Remove punctuation like . / etc.
+
+        # Replace full street name words with abbreviations
+        road_name = gsub("\\bavenue\\b", "ave", road_name),
+        road_name = gsub("\\bav\\b", "ave", road_name),
+        road_name = gsub("\\bstreet\\b", "st", road_name),
+        road_name = gsub("\\bcourt\\b", "ct", road_name),
+        road_name = gsub("\\broad\\b", "rd", road_name),
+        road_name = gsub("\\bdrive\\b", "dr", road_name),
+        road_name = gsub("\\bplace\\b", "pl", road_name),
+        road_name = gsub("\\blane\\b", "ln", road_name),
+        road_name = gsub("\\btrail\\b", "trl", road_name),
+        road_name = gsub("\\bparkway\\b", "pkwy", road_name),
+        road_name = gsub("\\bhighway\\b", "hwy", road_name),
+        road_name = gsub("\\bexpressway\\b", "expy", road_name)
+      ) %>%
+      select(-one_of(required_columns)) %>%  # Drop unnecessary columns
+      mutate(across(-geometry, ~replace(., . %in% c(0, "0000"), NA))) %>%  # Replace 0 and '0000' with NA
+      mutate(surface_year = ifelse(surface_year == 9999, NA, surface_year)) %>%  # Replace 9999 with NA
       group_by(road_name, speed_limit, lanes, surface_type, daily_traffic) %>%
-      summarize(geometry = st_union(geometry)) %>%
-      ungroup()
+      summarize(geometry = st_union(geometry), .groups = "drop") %>%
+      mutate(geometry_3435 = st_transform(geometry, 3435))
 
-    # Function to compute traffic averages with a loop until no changes are made
-    calculate_traffic_data <- function(shapefile_data) {
       # Helper function to calculate averages based on intersections
       calculate_traffic_averages <- function(data) {
         # Create an intersection matrix
@@ -165,57 +178,47 @@ walk(parquet_files, \(file_key) {
           )
 
         # Update the original data with averages where needed
-        updated_data <- data %>%
+        shapefile_data_final <- data %>%
           mutate(polygon_id = row_number()) %>%
           left_join(averages, by = c("polygon_id" = "polygon_1")) %>%
           mutate(
             daily_traffic = if_else(is.na(daily_traffic), average_daily_traffic, daily_traffic),
             speed_limit = if_else(is.na(speed_limit), average_speed_limit, speed_limit),
             lanes = if_else(is.na(lanes), average_lanes, lanes)
-          )
+          ) %>%
+          select(-c(average_daily_traffic, average_speed_limit, average_lanes, polygon_id))
 
-        return(updated_data)
+        return(shapefile_data_final)
       }
 
-      # Initialize loop
-      shapefile_data_final <- shapefile_data
+      # Run the function
+      # Initialize with placeholder to ensure the first iteration runs
+      previous_na_counts <- list(
+        daily_traffic_na = -1,  # Placeholder different from any real NA count
+        speed_limit_na = -1      # Same here
+      )
+
+      # Loop until no changes in NA counts
       repeat {
-        # Save current NA counts to compare changes
-        previous_na_traffic <- sum(is.na(shapefile_data_final$daily_traffic))
-        previous_na_speed <- sum(is.na(shapefile_data_final$speed_limit))
-        previous_na_lanes <- sum(is.na(shapefile_data_final$lanes))
-
-        # Save the current state to track changes
-        previous_traffic <- shapefile_data_final$daily_traffic
-        previous_speed <- shapefile_data_final$speed_limit
-        previous_lanes <- shapefile_data_final$lanes
-
-        # Recalculate averages and update the data
-        shapefile_data_final <- calculate_traffic_averages(shapefile_data_final) %>%
-          select(-average_intersect_value)
-
-        # Calculate current NA counts after updating
-        current_na_traffic <- sum(is.na(shapefile_data_final$daily_traffic))
-        current_na_speed <- sum(is.na(shapefile_data_final$speed_limit))
-        current_na_lanes <- sum(is.na(shapefile_data_final$lanes))
-
-        # Exit loop if no changes in NA counts are detected
-        if (current_na_traffic >= previous_na_traffic &&
-            current_na_speed >= previous_na_speed &&
-            current_na_lanes >= previous_na_lanes) {
-          cat("No reduction in NA counts detected. Exiting loop.\n")
+        # Calculate current NA counts
+        current_na_counts <- list(
+          daily_traffic_na = sum(is.na(shapefile_data$daily_traffic)),
+          speed_limit_na = sum(is.na(shapefile_data$speed_limit))
+        )
+
+        # Check if NA counts have changed
+        if (!identical(current_na_counts, previous_na_counts)) {
+          print("NA values have changed, recalculating traffic averages.")
+          shapefile_data <- calculate_traffic_averages(shapefile_data)
+
+          # Update previous NA counts for the next iteration
+          previous_na_counts <- current_na_counts
+        } else {
+          print("No further NA changes detected, stopping recalculation.")
           break
         }
       }
 
-
-      return(shapefile_data_final)
-    }
-
-    # Run the function
-    calculate_traffic_data(shapefile_data)
-
-
     output_path <- file.path(output_bucket, basename(file_key))
     geoarrow::write_geoparquet(shapefile_data_final, output_path)
 

From 540efdc582f03ea808e0de6920e6726cdab1cf9a Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Thu, 17 Oct 2024 21:10:46 +0000
Subject: [PATCH 52/74] Wrapup

---
 .../spatial/spatial-environment-traffic.R                 | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index ea090ac40..5f57f7179 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -109,6 +109,9 @@ walk(parquet_files, \(file_key) {
         road_name = str_to_lower(road_name),           # Convert to lowercase
         road_name = gsub("[[:punct:]]", "", road_name), # Remove punctuation like . / etc.
 
+        # Remove standalone directional indicators (N, S, E, W)
+        road_name = gsub("\\b(n|s|e|w)\\b", "", road_name),
+
         # Replace full street name words with abbreviations
         road_name = gsub("\\bavenue\\b", "ave", road_name),
         road_name = gsub("\\bav\\b", "ave", road_name),
@@ -121,7 +124,10 @@ walk(parquet_files, \(file_key) {
         road_name = gsub("\\btrail\\b", "trl", road_name),
         road_name = gsub("\\bparkway\\b", "pkwy", road_name),
         road_name = gsub("\\bhighway\\b", "hwy", road_name),
-        road_name = gsub("\\bexpressway\\b", "expy", road_name)
+        road_name = gsub("\\bexpressway\\b", "expy", road_name),
+
+        # Remove extra spaces that may result from replacements
+        road_name = str_trim(road_name)
       ) %>%
       select(-one_of(required_columns)) %>%  # Drop unnecessary columns
       mutate(across(-geometry, ~replace(., . %in% c(0, "0000"), NA))) %>%  # Replace 0 and '0000' with NA

From 81cfdf66779f16e4918be1ccc9eda5bde0be58e2 Mon Sep 17 00:00:00 2001
From: Sweaty Handshake <william.ridgeway@cookcountyil.gov>
Date: Thu, 17 Oct 2024 21:52:04 +0000
Subject: [PATCH 53/74] Linting

---
 .../spatial/spatial-environment-traffic.R     |  40 ++--
 .../spatial/spatial-environment-traffic.R     | 177 +++++++++---------
 2 files changed, 111 insertions(+), 106 deletions(-)

diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
index 0891a53ba..6f72d9cd8 100644
--- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
@@ -8,8 +8,10 @@ library(arrow)
 
 # Define S3 bucket and paths
 AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET")
-output_bucket <- file.path(AWS_S3_RAW_BUCKET,
-                           "spatial", "environment", "traffic")
+output_bucket <- file.path(
+  AWS_S3_RAW_BUCKET,
+  "spatial", "environment", "traffic"
+)
 current_year <- strftime(Sys.Date(), "%Y")
 
 # Get list of available files
@@ -26,14 +28,14 @@ years <- map(2012:year(Sys.Date()), \(x) {
 # Function to process each year and upload shapefiles for
 # that specific year to S3
 process_shapefiles_for_year <- map(years, \(x) {
-
   remote_file_path <- file.path(output_bucket, paste0(x, ".parquet"))
 
   # Skip everything if file already exists
   if (!object_exists(remote_file_path)) {
     # Define the URL for the shapefile ZIP file, dynamically for each year
     url <- paste0(
-                  "https://apps1.dot.illinois.gov/gist2/gisdata/all", x, ".zip")
+      "https://apps1.dot.illinois.gov/gist2/gisdata/all", x, ".zip"
+    )
 
     # Create a temporary file to store the downloaded ZIP
     temp_zip <- tempfile(fileext = ".zip")
@@ -46,33 +48,39 @@ process_shapefiles_for_year <- map(years, \(x) {
 
     # Unzip the file into a temporary directory
     unzip(temp_zip, exdir = temp_dir)
-    message(paste("Shapefile for year", x,
-                  "unzipped into temporary directory."))
+    message(paste(
+      "Shapefile for year", x,
+      "unzipped into temporary directory."
+    ))
 
     # List files in the unzipped directory and look for the .shp files
     unzipped_files <- list.files(temp_dir, recursive = TRUE, full.names = TRUE)
-    shp_file_for_year <- unzipped_files[grepl(paste0("HWY",
-                                                     x),
-                                              unzipped_files,
-                                              ignore.case = TRUE)
-                                        & grepl("\\.shp$", unzipped_files)]
+    shp_file_for_year <- unzipped_files[grepl(
+      paste0(
+        "HWY",
+        x
+      ),
+      unzipped_files,
+      ignore.case = TRUE
+    ) &
+      grepl("\\.shp$", unzipped_files)]
 
     # Process only the shapefile that matches the current year
     if (length(shp_file_for_year) == 1) {
       # Read the shapefile into the environment using sf::st_read
       shapefile_data <- sf::st_read(shp_file_for_year) %>%
         # Add filter for Cook County. The name changes in different years.
-        filter(if ("COUNTY" %in% names(.))
-          COUNTY == '016' else INV_CO == '016') %>%
+        filter(if ("COUNTY" %in% names(.)) {
+          COUNTY == "016"
+        } else {
+          INV_CO == "016"
+        }) %>%
         mutate(year = as.character(x))
 
       # Save the shapefile as a GeoParquet file
       geoarrow::write_geoparquet(shapefile_data, remote_file_path)
-
     } else {
       message(paste("No shapefile found for year", x, "."))
     }
-
   }
-
 })
diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 5f57f7179..b382a8a54 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -67,9 +67,7 @@ parquet_files <- get_bucket_df(
 
 # Loop through each parquet file and process it
 walk(parquet_files, \(file_key) {
-
   if (!aws.s3::object_exists(file.path(AWS_S3_WAREHOUSE_BUCKET, file_key))) {
-
     print(paste("Cleaning", file_key))
 
     # Convert the S3 object into raw data and read using geoarrow
@@ -106,7 +104,7 @@ walk(parquet_files, \(file_key) {
       mutate(
         surface_type = road_codes[as.character(surface_type)],
         speed_limit = as.numeric(speed_limit),
-        road_name = str_to_lower(road_name),           # Convert to lowercase
+        road_name = str_to_lower(road_name), # Convert to lowercase
         road_name = gsub("[[:punct:]]", "", road_name), # Remove punctuation like . / etc.
 
         # Remove standalone directional indicators (N, S, E, W)
@@ -129,101 +127,101 @@ walk(parquet_files, \(file_key) {
         # Remove extra spaces that may result from replacements
         road_name = str_trim(road_name)
       ) %>%
-      select(-one_of(required_columns)) %>%  # Drop unnecessary columns
-      mutate(across(-geometry, ~replace(., . %in% c(0, "0000"), NA))) %>%  # Replace 0 and '0000' with NA
-      mutate(surface_year = ifelse(surface_year == 9999, NA, surface_year)) %>%  # Replace 9999 with NA
+      select(-one_of(required_columns)) %>% # Drop unnecessary columns
+      mutate(across(-geometry, ~ replace(., . %in% c(0, "0000"), NA))) %>% # Replace 0 and '0000' with NA
+      mutate(surface_year = ifelse(surface_year == 9999, NA, surface_year)) %>% # Replace 9999 with NA
       group_by(road_name, speed_limit, lanes, surface_type, daily_traffic) %>%
       summarize(geometry = st_union(geometry), .groups = "drop") %>%
       mutate(geometry_3435 = st_transform(geometry, 3435))
 
-      # Helper function to calculate averages based on intersections
-      calculate_traffic_averages <- function(data) {
-        # Create an intersection matrix
-        intersection_matrix <- st_intersects(data)
-
-        # Create intersecting pairs
-        intersecting_pairs <- do.call(rbind, lapply(seq_along(intersection_matrix), function(i) {
-          data.frame(polygon_1 = i, polygon_2 = intersection_matrix[[i]])
-        })) %>%
-          filter(polygon_1 != polygon_2)  # Remove self-matches
-
-        # Add polygon IDs and relevant columns for merging
-        data_with_ids <- data %>%
-          mutate(polygon_id = row_number()) %>%
-          select(polygon_id, road_name, daily_traffic, speed_limit, lanes)
-
-        # Join intersecting pairs with their respective polygon data
-        averages <- intersecting_pairs %>%
-          left_join(
-            data_with_ids %>%
-              rename(
-                road_name_1 = road_name,
-                daily_traffic_1 = daily_traffic,
-                speed_limit_1 = speed_limit,
-                lanes_1 = lanes
-              ),
-            by = c("polygon_1" = "polygon_id")
-          ) %>%
-          left_join(
-            data_with_ids %>%
-              rename(
-                road_name_2 = road_name,
-                daily_traffic_2 = daily_traffic,
-                speed_limit_2 = speed_limit,
-                lanes_2 = lanes
-              ),
-            by = c("polygon_2" = "polygon_id")
-          ) %>%
-          filter(road_name_1 == road_name_2) %>%  # Keep only matching road names
-          group_by(polygon_1) %>%
-          summarize(
-            average_daily_traffic = mean(daily_traffic_2, na.rm = TRUE),
-            average_speed_limit = mean(speed_limit_2, na.rm = TRUE),
-            average_lanes = mean(lanes_2, na.rm = TRUE),
-            .groups = 'drop'
-          )
-
-        # Update the original data with averages where needed
-        shapefile_data_final <- data %>%
-          mutate(polygon_id = row_number()) %>%
-          left_join(averages, by = c("polygon_id" = "polygon_1")) %>%
-          mutate(
-            daily_traffic = if_else(is.na(daily_traffic), average_daily_traffic, daily_traffic),
-            speed_limit = if_else(is.na(speed_limit), average_speed_limit, speed_limit),
-            lanes = if_else(is.na(lanes), average_lanes, lanes)
-          ) %>%
-          select(-c(average_daily_traffic, average_speed_limit, average_lanes, polygon_id))
-
-        return(shapefile_data_final)
-      }
+    # Helper function to calculate averages based on intersections
+    calculate_traffic_averages <- function(data) {
+      # Create an intersection matrix
+      intersection_matrix <- st_intersects(data)
+
+      # Create intersecting pairs
+      intersecting_pairs <- do.call(rbind, lapply(seq_along(intersection_matrix), function(i) {
+        data.frame(polygon_1 = i, polygon_2 = intersection_matrix[[i]])
+      })) %>%
+        filter(polygon_1 != polygon_2) # Remove self-matches
+
+      # Add polygon IDs and relevant columns for merging
+      data_with_ids <- data %>%
+        mutate(polygon_id = row_number()) %>%
+        select(polygon_id, road_name, daily_traffic, speed_limit, lanes)
+
+      # Join intersecting pairs with their respective polygon data
+      averages <- intersecting_pairs %>%
+        left_join(
+          data_with_ids %>%
+            rename(
+              road_name_1 = road_name,
+              daily_traffic_1 = daily_traffic,
+              speed_limit_1 = speed_limit,
+              lanes_1 = lanes
+            ),
+          by = c("polygon_1" = "polygon_id")
+        ) %>%
+        left_join(
+          data_with_ids %>%
+            rename(
+              road_name_2 = road_name,
+              daily_traffic_2 = daily_traffic,
+              speed_limit_2 = speed_limit,
+              lanes_2 = lanes
+            ),
+          by = c("polygon_2" = "polygon_id")
+        ) %>%
+        filter(road_name_1 == road_name_2) %>% # Keep only matching road names
+        group_by(polygon_1) %>%
+        summarize(
+          average_daily_traffic = mean(daily_traffic_2, na.rm = TRUE),
+          average_speed_limit = mean(speed_limit_2, na.rm = TRUE),
+          average_lanes = mean(lanes_2, na.rm = TRUE),
+          .groups = "drop"
+        )
 
-      # Run the function
-      # Initialize with placeholder to ensure the first iteration runs
-      previous_na_counts <- list(
-        daily_traffic_na = -1,  # Placeholder different from any real NA count
-        speed_limit_na = -1      # Same here
+      # Update the original data with averages where needed
+      shapefile_data_final <- data %>%
+        mutate(polygon_id = row_number()) %>%
+        left_join(averages, by = c("polygon_id" = "polygon_1")) %>%
+        mutate(
+          daily_traffic = if_else(is.na(daily_traffic), average_daily_traffic, daily_traffic),
+          speed_limit = if_else(is.na(speed_limit), average_speed_limit, speed_limit),
+          lanes = if_else(is.na(lanes), average_lanes, lanes)
+        ) %>%
+        select(-c(average_daily_traffic, average_speed_limit, average_lanes, polygon_id))
+
+      return(shapefile_data_final)
+    }
+
+    # Run the function
+    # Initialize with placeholder to ensure the first iteration runs
+    previous_na_counts <- list(
+      daily_traffic_na = -1, # Placeholder different from any real NA count
+      speed_limit_na = -1 # Same here
+    )
+
+    # Loop until no changes in NA counts
+    repeat {
+      # Calculate current NA counts
+      current_na_counts <- list(
+        daily_traffic_na = sum(is.na(shapefile_data$daily_traffic)),
+        speed_limit_na = sum(is.na(shapefile_data$speed_limit))
       )
 
-      # Loop until no changes in NA counts
-      repeat {
-        # Calculate current NA counts
-        current_na_counts <- list(
-          daily_traffic_na = sum(is.na(shapefile_data$daily_traffic)),
-          speed_limit_na = sum(is.na(shapefile_data$speed_limit))
-        )
+      # Check if NA counts have changed
+      if (!identical(current_na_counts, previous_na_counts)) {
+        print("NA values have changed, recalculating traffic averages.")
+        shapefile_data <- calculate_traffic_averages(shapefile_data)
 
-        # Check if NA counts have changed
-        if (!identical(current_na_counts, previous_na_counts)) {
-          print("NA values have changed, recalculating traffic averages.")
-          shapefile_data <- calculate_traffic_averages(shapefile_data)
-
-          # Update previous NA counts for the next iteration
-          previous_na_counts <- current_na_counts
-        } else {
-          print("No further NA changes detected, stopping recalculation.")
-          break
-        }
+        # Update previous NA counts for the next iteration
+        previous_na_counts <- current_na_counts
+      } else {
+        print("No further NA changes detected, stopping recalculation.")
+        break
       }
+    }
 
     output_path <- file.path(output_bucket, basename(file_key))
     geoarrow::write_geoparquet(shapefile_data_final, output_path)
@@ -231,4 +229,3 @@ walk(parquet_files, \(file_key) {
     print(paste(file_key, "cleaned and uploaded."))
   }
 })
-

From 7f1e4c1f05da138479d812f964a3515b70bbed25 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Thu, 17 Oct 2024 21:59:18 +0000
Subject: [PATCH 54/74] linting

---
 .../spatial/spatial-environment-traffic.R     | 25 +++++++++++++++----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index b382a8a54..17bd258ad 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -25,16 +25,25 @@ road_codes <- c(
   "501" = "Over PCC - Rubblized - Reinforcement unknown",
   "510" = "Over PCC - Rubblized - No reinforcement",
   "520" = "Over PCC - Rubblized - Partial reinforcement",
-  "525" = "Over PCC - Rubblized - With No or Partial Reinforcement - But having Hinged Joints",
+  "525" = paste(
+    "Over PCC - Rubblized - With No or Partial Reinforcement -",
+    "But having Hinged Joints"
+  ),
   "530" = "Over PCC - Rubblized - Full reinforcement",
   "540" = "Over PCC - Rubblized - Continuous reinforcement",
   "550" = "Bituminous Concrete (other than Class I)",
   "560" = "Bituminous Concrete Pavement (Full-Depth)",
   "600" = "Over PCC - Reinforcement unknown",
   "610" = "Over PCC - No reinforcement",
-  "615" = "Over PCC - No reinforcement but having short panels and dowels",
+  "615" = paste(
+    "Over PCC - No reinforcement but having short panels",
+    "and dowels"
+  ),
   "620" = "Over PCC - Partial reinforcement",
-  "625" = "Over PCC - With No or Partial Reinforcement - But having Hinged Joints",
+  "625" = paste(
+    "Over PCC - With No or Partial Reinforcement -",
+    "But having Hinged Joints"
+  ),
   "630" = "Over PCC - Full reinforcement",
   "640" = "Over PCC - Continuous reinforcement",
   "650" = "Over Brick, Block, Steel, or similar material",
@@ -50,8 +59,14 @@ road_codes <- c(
   "767" = "Reinforced over PCC - No reinforcement",
   "770" = "Non-Reinforced over PCC - Partial reinforcement",
   "772" = "Reinforced over PCC - Partial reinforcement",
-  "775" = "Non-Reinforced over PCC - With No or Partial reinforcement but having Hinged Joints",
-  "777" = "Reinforced over PCC - With No or Partial reinforcement but having Hinged Joints",
+  "775" = paste(
+    "Non-Reinforced over PCC - With No or Partial reinforcement",
+    "but having Hinged Joints"
+  ),
+  "777" = paste(
+    "Reinforced over PCC - With No or Partial reinforcement",
+    "but having Hinged Joints"
+  ),
   "780" = "Non-Reinforced over PCC - Full reinforcement",
   "782" = "Reinforced over PCC - Full reinforcement",
   "790" = "Non-Reinforced over PCC - Continuous reinforcement",

From fa29c882b2d79a1ce7905e12e02620bafe3f4d84 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Thu, 17 Oct 2024 22:03:29 +0000
Subject: [PATCH 55/74] linting

---
 .../spatial/spatial-environment-traffic.R     | 36 ++++++++++++-------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 17bd258ad..441785ab3 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -101,7 +101,8 @@ walk(parquet_files, \(file_key) {
     shapefile_data <- shapefile_data %>%
       select(all_of(existing_columns)) %>%
       mutate(
-        road_type = if ("FCNAME" %in% colnames(.)) FCNAME else if ("FC_NAME" %in% colnames(.)) FC_NAME else NA,
+        road_type = if ("FCNAME" %in% colnames(.)) FCNAME
+        else if ("FC_NAME" %in% colnames(.)) FC_NAME else NA,
         lanes = if ("LNS" %in% colnames(.)) LNS else NA,
         surface_type = if ("SURF_TYP" %in% colnames(.)) SURF_TYP else NA,
         surface_width = if ("SURF_WTH" %in% colnames(.)) SURF_WTH else NA,
@@ -112,7 +113,8 @@ walk(parquet_files, \(file_key) {
         condition_year = if ("CRS_YR" %in% colnames(.)) CRS_YR else NA,
         road_name = if ("ROAD_NAME" %in% colnames(.)) ROAD_NAME else NA,
         distress_with = if ("DTRESS_WTH" %in% colnames(.)) DTRESS_WTH else NA,
-        distress_opposing = if ("DTRESS_OPP" %in% colnames(.)) DTRESS_OPP else NA,
+        distress_opposing = if ("DTRESS_OPP" %in%
+                                  colnames(.)) DTRESS_OPP else NA,
         speed_limit = if ("SP_LIM" %in% colnames(.)) SP_LIM else NA,
         inventory_id = if ("INVENTORY" %in% colnames(.)) INVENTORY else NA
       ) %>%
@@ -120,7 +122,7 @@ walk(parquet_files, \(file_key) {
         surface_type = road_codes[as.character(surface_type)],
         speed_limit = as.numeric(speed_limit),
         road_name = str_to_lower(road_name), # Convert to lowercase
-        road_name = gsub("[[:punct:]]", "", road_name), # Remove punctuation like . / etc.
+        road_name = gsub("[[:punct:]]", "", road_name), # Remove punctuation
 
         # Remove standalone directional indicators (N, S, E, W)
         road_name = gsub("\\b(n|s|e|w)\\b", "", road_name),
@@ -143,8 +145,8 @@ walk(parquet_files, \(file_key) {
         road_name = str_trim(road_name)
       ) %>%
       select(-one_of(required_columns)) %>% # Drop unnecessary columns
-      mutate(across(-geometry, ~ replace(., . %in% c(0, "0000"), NA))) %>% # Replace 0 and '0000' with NA
-      mutate(surface_year = ifelse(surface_year == 9999, NA, surface_year)) %>% # Replace 9999 with NA
+      mutate(across(-geometry, ~ replace(., . %in% c(0, "0000"), NA))) %>%
+      mutate(surface_year = ifelse(surface_year == 9999, NA, surface_year)) %>%
       group_by(road_name, speed_limit, lanes, surface_type, daily_traffic) %>%
       summarize(geometry = st_union(geometry), .groups = "drop") %>%
       mutate(geometry_3435 = st_transform(geometry, 3435))
@@ -155,11 +157,16 @@ walk(parquet_files, \(file_key) {
       intersection_matrix <- st_intersects(data)
 
       # Create intersecting pairs
-      intersecting_pairs <- do.call(rbind, lapply(seq_along(intersection_matrix), function(i) {
-        data.frame(polygon_1 = i, polygon_2 = intersection_matrix[[i]])
-      })) %>%
-        filter(polygon_1 != polygon_2) # Remove self-matches
-
+      intersecting_pairs <- do.call(
+        rbind,
+        lapply(seq_along(intersection_matrix), function(i) {
+          data.frame(
+            polygon_1 = i,
+            polygon_2 = intersection_matrix[[i]]
+          )
+        })
+      ) %>%
+        filter(polygon_1 != polygon_2)  # Remove self-matches
       # Add polygon IDs and relevant columns for merging
       data_with_ids <- data %>%
         mutate(polygon_id = row_number()) %>%
@@ -201,11 +208,14 @@ walk(parquet_files, \(file_key) {
         mutate(polygon_id = row_number()) %>%
         left_join(averages, by = c("polygon_id" = "polygon_1")) %>%
         mutate(
-          daily_traffic = if_else(is.na(daily_traffic), average_daily_traffic, daily_traffic),
-          speed_limit = if_else(is.na(speed_limit), average_speed_limit, speed_limit),
+          daily_traffic = if_else(is.na(daily_traffic), average_daily_traffic,
+                                  daily_traffic),
+          speed_limit = if_else(is.na(speed_limit), average_speed_limit,
+                                speed_limit),
           lanes = if_else(is.na(lanes), average_lanes, lanes)
         ) %>%
-        select(-c(average_daily_traffic, average_speed_limit, average_lanes, polygon_id))
+        select(-c(average_daily_traffic, average_speed_limit,
+                  average_lanes, polygon_id))
 
       return(shapefile_data_final)
     }

From 5a285fde0524845c5835f324927b892c9c0ba5d1 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Mon, 21 Oct 2024 16:06:34 +0000
Subject: [PATCH 56/74] Rename to shapefile_data

---
 .../spatial/spatial-environment-traffic.R                       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 441785ab3..48768e0c7 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -249,7 +249,7 @@ walk(parquet_files, \(file_key) {
     }
 
     output_path <- file.path(output_bucket, basename(file_key))
-    geoarrow::write_geoparquet(shapefile_data_final, output_path)
+    geoarrow::write_geoparquet(shapefile_data, output_path)
 
     print(paste(file_key, "cleaned and uploaded."))
   }

From 6734b5d690ebaf8ac9037761a6afca9d5cec4b2b Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Mon, 21 Oct 2024 18:20:56 +0000
Subject: [PATCH 57/74] Add commented text

---
 .../spatial/spatial-environment-traffic.R              | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 48768e0c7..8bdb21aa0 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -91,6 +91,8 @@ walk(parquet_files, \(file_key) {
     ) %>%
       st_transform(4326)
 
+    # Because column names change, we can't just select, but create an intersection
+    # of columns we want and the renamed columns.
     required_columns <- c(
       "FCNAME", "FC_NAME", "LNS", "SURF_TYP", "SURF_WTH", "SURF_YR", "AADT",
       "CRS_WITH", "CRS_OPP", "CRS_YR", "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP",
@@ -147,11 +149,15 @@ walk(parquet_files, \(file_key) {
       select(-one_of(required_columns)) %>% # Drop unnecessary columns
       mutate(across(-geometry, ~ replace(., . %in% c(0, "0000"), NA))) %>%
       mutate(surface_year = ifelse(surface_year == 9999, NA, surface_year)) %>%
+      # Group by the characteristics that we want
       group_by(road_name, speed_limit, lanes, surface_type, daily_traffic) %>%
+      # Create a union of the streets based on the summarized features
       summarize(geometry = st_union(geometry), .groups = "drop") %>%
-      mutate(geometry_3435 = st_transform(geometry, 3435))
+      mutate(geometry_3435 = st_transform(geometry, 3435)) %>%
+      ungroup()
 
-    # Helper function to calculate averages based on intersections
+    # Helper function to calculate averages based on intersections of streets with the
+    # same name and overlapping spatial features.
     calculate_traffic_averages <- function(data) {
       # Create an intersection matrix
       intersection_matrix <- st_intersects(data)

From bc37c55723a0e65d6deac7a22aeb8d8d403bb537 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Mon, 21 Oct 2024 19:08:13 +0000
Subject: [PATCH 58/74] remove slash

---
 .../spatial/spatial-environment-traffic.R                    | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 8bdb21aa0..57238014b 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -1,13 +1,14 @@
 library(aws.s3)
 library(dplyr)
+library(geoarrow)
 library(purrr)
 library(sf)
-library(geoarrow)
+library(stringr)
 
 # Define the S3 bucket and folder path
 AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET")
 AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET")
-s3_folder <- "spatial/environment/traffic/"
+s3_folder <- "spatial/environment/traffic"
 output_bucket <- file.path(AWS_S3_WAREHOUSE_BUCKET, s3_folder)
 
 # Recoding of road type

From 91fab65174bf9fdd03e08c0529967d0c65d1ff8e Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Mon, 21 Oct 2024 20:09:17 +0000
Subject: [PATCH 59/74] remove hash

---
 .../spatial/spatial-environment-traffic.R                       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 57238014b..6007a55a6 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -9,7 +9,7 @@ library(stringr)
 AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET")
 AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET")
 s3_folder <- "spatial/environment/traffic"
-output_bucket <- file.path(AWS_S3_WAREHOUSE_BUCKET, s3_folder)
+output_bucket <- sub("/$", "", file.path(AWS_S3_WAREHOUSE_BUCKET, s3_folder))
 
 # Recoding of road type
 road_codes <- c(

From 9eb0fe3748fcabbdaba96c53da67577649d83bbc Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Tue, 22 Oct 2024 18:48:54 +0000
Subject: [PATCH 60/74] Updates after viewing output

---
 .../spatial/spatial-environment-traffic.R        | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 6007a55a6..a4171cb14 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -11,7 +11,7 @@ AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET")
 s3_folder <- "spatial/environment/traffic"
 output_bucket <- sub("/$", "", file.path(AWS_S3_WAREHOUSE_BUCKET, s3_folder))
 
-# Recoding of road type
+# Re-coding of road type
 road_codes <- c(
   "010" = "Unimproved",
   "020" = "Graded and Drained",
@@ -124,10 +124,16 @@ walk(parquet_files, \(file_key) {
       mutate(
         surface_type = road_codes[as.character(surface_type)],
         speed_limit = as.numeric(speed_limit),
+        # For testing
+        road_name_preserved = road_name,
         road_name = str_to_lower(road_name), # Convert to lowercase
         road_name = gsub("[[:punct:]]", "", road_name), # Remove punctuation
 
         # Remove standalone directional indicators (N, S, E, W)
+        # I wouldn't remove North South East west, so that streets like North
+        # Ave become empty. I also discovered that TH is not universally applied.
+        # For example, you can see 100TH st. I don't think the added value
+        # of removing TH is worth the risk of complicating valid street names.
         road_name = gsub("\\b(n|s|e|w)\\b", "", road_name),
 
         # Replace full street name words with abbreviations
@@ -147,11 +153,12 @@ walk(parquet_files, \(file_key) {
         # Remove extra spaces that may result from replacements
         road_name = str_trim(road_name)
       ) %>%
-      select(-one_of(required_columns)) %>% # Drop unnecessary columns
+      # Remove duplicated columns except for year
+      select(-one_of(required_columns[required_columns != "year"])) %>%
       mutate(across(-geometry, ~ replace(., . %in% c(0, "0000"), NA))) %>%
       mutate(surface_year = ifelse(surface_year == 9999, NA, surface_year)) %>%
       # Group by the characteristics that we want
-      group_by(road_name, speed_limit, lanes, surface_type, daily_traffic) %>%
+      group_by(road_name, speed_limit, lanes, surface_type, daily_traffic, year, road_type) %>%
       # Create a union of the streets based on the summarized features
       summarize(geometry = st_union(geometry), .groups = "drop") %>%
       mutate(geometry_3435 = st_transform(geometry, 3435)) %>%
@@ -255,6 +262,9 @@ walk(parquet_files, \(file_key) {
       }
     }
 
+    shapefile_data <- shapefile_data %>%
+      mutate(across(-geometry, ~ ifelse(is.nan(.), NA, .)))
+
     output_path <- file.path(output_bucket, basename(file_key))
     geoarrow::write_geoparquet(shapefile_data, output_path)
 

From 8237ace255ab977d3780d837b762c2102ef807fb Mon Sep 17 00:00:00 2001
From: Sweaty Handshake <william.ridgeway@cookcountyil.gov>
Date: Wed, 23 Oct 2024 16:20:58 +0000
Subject: [PATCH 61/74] Remove additional geom column from mutate

---
 .../spatial/spatial-environment-traffic.R                    | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index a4171cb14..beda69115 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -263,11 +263,12 @@ walk(parquet_files, \(file_key) {
     }
 
     shapefile_data <- shapefile_data %>%
-      mutate(across(-geometry, ~ ifelse(is.nan(.), NA, .)))
+      mutate(across(-c(geometry, geometry_3435), ~ ifelse(is.nan(.), NA, .)))
 
     output_path <- file.path(output_bucket, basename(file_key))
     geoarrow::write_geoparquet(shapefile_data, output_path)
 
     print(paste(file_key, "cleaned and uploaded."))
   }
-})
+}, .progress = TRUE)
+

From c87c58c4fc1373bad36e31c73caa461c8fef17f3 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Fri, 1 Nov 2024 20:44:47 +0000
Subject: [PATCH 62/74] lintr

---
 .../spatial/spatial-environment-traffic.R     | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index a4171cb14..725e77c04 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -92,8 +92,9 @@ walk(parquet_files, \(file_key) {
     ) %>%
       st_transform(4326)
 
-    # Because column names change, we can't just select, but create an intersection
-    # of columns we want and the renamed columns.
+    # Because column names change, we can't just select,
+    # but create an intersection of columns we want
+    # and the renamed columns.
     required_columns <- c(
       "FCNAME", "FC_NAME", "LNS", "SURF_TYP", "SURF_WTH", "SURF_YR", "AADT",
       "CRS_WITH", "CRS_OPP", "CRS_YR", "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP",
@@ -131,8 +132,10 @@ walk(parquet_files, \(file_key) {
 
         # Remove standalone directional indicators (N, S, E, W)
         # I wouldn't remove North South East west, so that streets like North
-        # Ave become empty. I also discovered that TH is not universally applied.
-        # For example, you can see 100TH st. I don't think the added value
+        # Ave become empty. I also discovered that
+        # TH is not universally applied.
+        # For example, you can look at 100TH st.
+        # I don't think the added value
         # of removing TH is worth the risk of complicating valid street names.
         road_name = gsub("\\b(n|s|e|w)\\b", "", road_name),
 
@@ -158,14 +161,16 @@ walk(parquet_files, \(file_key) {
       mutate(across(-geometry, ~ replace(., . %in% c(0, "0000"), NA))) %>%
       mutate(surface_year = ifelse(surface_year == 9999, NA, surface_year)) %>%
       # Group by the characteristics that we want
-      group_by(road_name, speed_limit, lanes, surface_type, daily_traffic, year, road_type) %>%
+      group_by(road_name, speed_limit, lanes,
+               surface_type, daily_traffic, year, road_type) %>%
       # Create a union of the streets based on the summarized features
       summarize(geometry = st_union(geometry), .groups = "drop") %>%
       mutate(geometry_3435 = st_transform(geometry, 3435)) %>%
       ungroup()
 
-    # Helper function to calculate averages based on intersections of streets with the
-    # same name and overlapping spatial features.
+    # Helper function to calculate averages based on intersections
+    # of streets with the same name
+    # and overlapping spatial features.
     calculate_traffic_averages <- function(data) {
       # Create an intersection matrix
       intersection_matrix <- st_intersects(data)

From 9f067ee9896364e5cbfdd3096857531340e2f09a Mon Sep 17 00:00:00 2001
From: Sweaty Handshake <william.ridgeway@cookcountyil.gov>
Date: Sun, 3 Nov 2024 23:48:48 +0000
Subject: [PATCH 63/74] Linting

---
 .../spatial/spatial-environment-traffic.R     | 32 ++++++++++++-------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index b8e7e52ca..854e6bd91 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -105,8 +105,9 @@ walk(parquet_files, \(file_key) {
     shapefile_data <- shapefile_data %>%
       select(all_of(existing_columns)) %>%
       mutate(
-        road_type = if ("FCNAME" %in% colnames(.)) FCNAME
-        else if ("FC_NAME" %in% colnames(.)) FC_NAME else NA,
+        road_type = if ("FCNAME" %in% colnames(.)) {
+          FCNAME
+        } else if ("FC_NAME" %in% colnames(.)) FC_NAME else NA,
         lanes = if ("LNS" %in% colnames(.)) LNS else NA,
         surface_type = if ("SURF_TYP" %in% colnames(.)) SURF_TYP else NA,
         surface_width = if ("SURF_WTH" %in% colnames(.)) SURF_WTH else NA,
@@ -118,7 +119,11 @@ walk(parquet_files, \(file_key) {
         road_name = if ("ROAD_NAME" %in% colnames(.)) ROAD_NAME else NA,
         distress_with = if ("DTRESS_WTH" %in% colnames(.)) DTRESS_WTH else NA,
         distress_opposing = if ("DTRESS_OPP" %in%
-                                  colnames(.)) DTRESS_OPP else NA,
+          colnames(.)) {
+          DTRESS_OPP
+        } else {
+          NA
+        },
         speed_limit = if ("SP_LIM" %in% colnames(.)) SP_LIM else NA,
         inventory_id = if ("INVENTORY" %in% colnames(.)) INVENTORY else NA
       ) %>%
@@ -161,8 +166,10 @@ walk(parquet_files, \(file_key) {
       mutate(across(-geometry, ~ replace(., . %in% c(0, "0000"), NA))) %>%
       mutate(surface_year = ifelse(surface_year == 9999, NA, surface_year)) %>%
       # Group by the characteristics that we want
-      group_by(road_name, speed_limit, lanes,
-               surface_type, daily_traffic, year, road_type) %>%
+      group_by(
+        road_name, speed_limit, lanes,
+        surface_type, daily_traffic, year, road_type
+      ) %>%
       # Create a union of the streets based on the summarized features
       summarize(geometry = st_union(geometry), .groups = "drop") %>%
       mutate(geometry_3435 = st_transform(geometry, 3435)) %>%
@@ -185,7 +192,7 @@ walk(parquet_files, \(file_key) {
           )
         })
       ) %>%
-        filter(polygon_1 != polygon_2)  # Remove self-matches
+        filter(polygon_1 != polygon_2) # Remove self-matches
       # Add polygon IDs and relevant columns for merging
       data_with_ids <- data %>%
         mutate(polygon_id = row_number()) %>%
@@ -228,13 +235,17 @@ walk(parquet_files, \(file_key) {
         left_join(averages, by = c("polygon_id" = "polygon_1")) %>%
         mutate(
           daily_traffic = if_else(is.na(daily_traffic), average_daily_traffic,
-                                  daily_traffic),
+            daily_traffic
+          ),
           speed_limit = if_else(is.na(speed_limit), average_speed_limit,
-                                speed_limit),
+            speed_limit
+          ),
           lanes = if_else(is.na(lanes), average_lanes, lanes)
         ) %>%
-        select(-c(average_daily_traffic, average_speed_limit,
-                  average_lanes, polygon_id))
+        select(-c(
+          average_daily_traffic, average_speed_limit,
+          average_lanes, polygon_id
+        ))
 
       return(shapefile_data_final)
     }
@@ -276,4 +287,3 @@ walk(parquet_files, \(file_key) {
     print(paste(file_key, "cleaned and uploaded."))
   }
 }, .progress = TRUE)
-

From 2fb559a916c34cfc4670b67448fb06627c003026 Mon Sep 17 00:00:00 2001
From: Damonamajor <56321109+Damonamajor@users.noreply.github.com>
Date: Mon, 4 Nov 2024 08:59:49 -0600
Subject: [PATCH 64/74] Update dbt/models/spatial/docs.md

Co-authored-by: William Ridgeway <10358980+wrridgeway@users.noreply.github.com>
---
 dbt/models/spatial/docs.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/dbt/models/spatial/docs.md b/dbt/models/spatial/docs.md
index c4fa435f2..86271fdae 100644
--- a/dbt/models/spatial/docs.md
+++ b/dbt/models/spatial/docs.md
@@ -500,7 +500,6 @@ Includes townships within the City of Chicago, which are technically defunct.
 Illinois Department of Transportation data source from
 [https://apps1.dot.illinois.gov/gist2/](https://apps1.dot.illinois.gov/gist2/).
 
-
 **Geometry:** `MULTILINESTRING`
 {% enddocs %}
 

From fd1945bf59ae4d8e5a10c73920b61a92b024cebf Mon Sep 17 00:00:00 2001
From: Damonamajor <56321109+Damonamajor@users.noreply.github.com>
Date: Mon, 4 Nov 2024 09:00:04 -0600
Subject: [PATCH 65/74] Update
 etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R

Co-authored-by: William Ridgeway <10358980+wrridgeway@users.noreply.github.com>
---
 .../spatial/spatial-environment-traffic.R                        | 1 -
 1 file changed, 1 deletion(-)

diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
index 6f72d9cd8..aff491495 100644
--- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R
@@ -12,7 +12,6 @@ output_bucket <- file.path(
   AWS_S3_RAW_BUCKET,
   "spatial", "environment", "traffic"
 )
-current_year <- strftime(Sys.Date(), "%Y")
 
 # Get list of available files
 years <- map(2012:year(Sys.Date()), \(x) {

From bc045b002e134bb6b60cc5500caf1e8167bc867f Mon Sep 17 00:00:00 2001
From: Damonamajor <56321109+Damonamajor@users.noreply.github.com>
Date: Mon, 4 Nov 2024 09:00:33 -0600
Subject: [PATCH 66/74] Update
 etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R

Co-authored-by: William Ridgeway <10358980+wrridgeway@users.noreply.github.com>
---
 .../spatial/spatial-environment-traffic.R     | 28 +++++++++++--------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 854e6bd91..3b91bd3bf 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -145,18 +145,22 @@ walk(parquet_files, \(file_key) {
         road_name = gsub("\\b(n|s|e|w)\\b", "", road_name),
 
         # Replace full street name words with abbreviations
-        road_name = gsub("\\bavenue\\b", "ave", road_name),
-        road_name = gsub("\\bav\\b", "ave", road_name),
-        road_name = gsub("\\bstreet\\b", "st", road_name),
-        road_name = gsub("\\bcourt\\b", "ct", road_name),
-        road_name = gsub("\\broad\\b", "rd", road_name),
-        road_name = gsub("\\bdrive\\b", "dr", road_name),
-        road_name = gsub("\\bplace\\b", "pl", road_name),
-        road_name = gsub("\\blane\\b", "ln", road_name),
-        road_name = gsub("\\btrail\\b", "trl", road_name),
-        road_name = gsub("\\bparkway\\b", "pkwy", road_name),
-        road_name = gsub("\\bhighway\\b", "hwy", road_name),
-        road_name = gsub("\\bexpressway\\b", "expy", road_name),
+road_name = str_replace_all(
+          road_name,
+          c("\\bavenue\\b" = "ave",
+            "\\bav\\b" = "ave",
+            "\\bstreet\\b" = "st",
+            "\\bcourt\\b" = "ct",
+            "\\broad\\b" = "rd",
+            "\\bdrive\\b" = "dr",
+            "\\bplace\\b" = "pl",
+            "\\blane\\b" = "ln",
+            "\\btrail\\b" = "trl",
+            "\\bparkway\\b" = "pkwy",
+            "\\bhighway\\b" = "hwy",
+            "\\bexpressway\\b" = "expy"
+          )
+        ),
 
         # Remove extra spaces that may result from replacements
         road_name = str_trim(road_name)

From 9810b7e3b11209292e5c9dde9d976c6f4d64e55a Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Mon, 4 Nov 2024 16:00:46 +0000
Subject: [PATCH 67/74] Billy changes

---
 .../spatial/spatial-environment-traffic.R     | 58 +++++++++----------
 1 file changed, 28 insertions(+), 30 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 3b91bd3bf..0c4aeb62c 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -6,7 +6,7 @@ library(sf)
 library(stringr)
 
 # Define the S3 bucket and folder path
-AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET")
+AWS_S3_RAW_BUCKET <- "s3://ccao-data-raw-us-east-1"
 AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET")
 s3_folder <- "spatial/environment/traffic"
 output_bucket <- sub("/$", "", file.path(AWS_S3_WAREHOUSE_BUCKET, s3_folder))
@@ -101,40 +101,36 @@ walk(parquet_files, \(file_key) {
       "SP_LIM", "INVENTORY", "geometry_3435", "year"
     )
 
+    # Define the renaming mapping
+    renames <- c(
+      "FCNAME" = "road_type",
+      "FC_NAME" = "road_type",
+      "LNS" = "lanes",
+      "SURF_TYP" = "surface_type",
+      "SURF_WTH" = "surface_width",
+      "SURF_YR" = "surface_year",
+      "AADT" = "daily_traffic",
+      "CRS_WITH" = "condition_with",
+      "CRS_OPP" = "condition_opposing",
+      "CRS_YR" = "condition_year",
+      "ROAD_NAME" = "road_name",
+      "DTRESS_WTH" = "distress_with",
+      "DTRESS_OPP" = "distress_opposing"
+    )
+
+
     existing_columns <- intersect(required_columns, colnames(shapefile_data))
     shapefile_data <- shapefile_data %>%
       select(all_of(existing_columns)) %>%
-      mutate(
-        road_type = if ("FCNAME" %in% colnames(.)) {
-          FCNAME
-        } else if ("FC_NAME" %in% colnames(.)) FC_NAME else NA,
-        lanes = if ("LNS" %in% colnames(.)) LNS else NA,
-        surface_type = if ("SURF_TYP" %in% colnames(.)) SURF_TYP else NA,
-        surface_width = if ("SURF_WTH" %in% colnames(.)) SURF_WTH else NA,
-        surface_year = if ("SURF_YR" %in% colnames(.)) SURF_YR else NA,
-        daily_traffic = if ("AADT" %in% colnames(.)) AADT else NA,
-        condition_with = if ("CRS_WITH" %in% colnames(.)) CRS_WITH else NA,
-        condition_opposing = if ("CRS_OPP" %in% colnames(.)) CRS_OPP else NA,
-        condition_year = if ("CRS_YR" %in% colnames(.)) CRS_YR else NA,
-        road_name = if ("ROAD_NAME" %in% colnames(.)) ROAD_NAME else NA,
-        distress_with = if ("DTRESS_WTH" %in% colnames(.)) DTRESS_WTH else NA,
-        distress_opposing = if ("DTRESS_OPP" %in%
-          colnames(.)) {
-          DTRESS_OPP
-        } else {
-          NA
-        },
-        speed_limit = if ("SP_LIM" %in% colnames(.)) SP_LIM else NA,
-        inventory_id = if ("INVENTORY" %in% colnames(.)) INVENTORY else NA
-      ) %>%
-      mutate(
+      # Dynamically rename and select columns based on the existing names in the dataset
+      rename_with(~ renames[.x], .cols = intersect(names(.), names(renames))) %>%
+      mutate(across(all_of(renames), ~ ifelse(is.na(.), NA, .)),
         surface_type = road_codes[as.character(surface_type)],
         speed_limit = as.numeric(speed_limit),
         # For testing
         road_name_preserved = road_name,
         road_name = str_to_lower(road_name), # Convert to lowercase
         road_name = gsub("[[:punct:]]", "", road_name), # Remove punctuation
-
         # Remove standalone directional indicators (N, S, E, W)
         # I wouldn't remove North South East west, so that streets like North
         # Ave become empty. I also discovered that
@@ -256,13 +252,14 @@ road_name = str_replace_all(
 
     # Run the function
     # Initialize with placeholder to ensure the first iteration runs
+    # Initialize previous NA counts with values that differ from any real NA count
     previous_na_counts <- list(
-      daily_traffic_na = -1, # Placeholder different from any real NA count
-      speed_limit_na = -1 # Same here
+      daily_traffic_na = -1,
+      speed_limit_na = -1
     )
 
-    # Loop until no changes in NA counts
-    repeat {
+    # Loop until there are no changes in NA counts
+    while (TRUE) {
       # Calculate current NA counts
       current_na_counts <- list(
         daily_traffic_na = sum(is.na(shapefile_data$daily_traffic)),
@@ -282,6 +279,7 @@ road_name = str_replace_all(
       }
     }
 
+
     shapefile_data <- shapefile_data %>%
       mutate(across(-c(geometry, geometry_3435), ~ ifelse(is.nan(.), NA, .)))
 

From de0526cd5382d1410705b2cf61dae01589684f83 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Mon, 4 Nov 2024 18:13:59 +0000
Subject: [PATCH 68/74] Working file with doc updates

---
 dbt/models/spatial/docs.md                    |  6 +++
 .../spatial/spatial-environment-traffic.R     | 44 ++++++++++---------
 2 files changed, 29 insertions(+), 21 deletions(-)

diff --git a/dbt/models/spatial/docs.md b/dbt/models/spatial/docs.md
index 86271fdae..95bda9c5c 100644
--- a/dbt/models/spatial/docs.md
+++ b/dbt/models/spatial/docs.md
@@ -499,6 +499,12 @@ Includes townships within the City of Chicago, which are technically defunct.
 
 Illinois Department of Transportation data source from
 [https://apps1.dot.illinois.gov/gist2/](https://apps1.dot.illinois.gov/gist2/).
+Data focuses on five features; lanes, speed limits, traffic count, road type,
+and surface type. Some columns are not present in all years of data (for example
+speed limit in 2012) Data for columns is not universally present so we average
+numeric values for roads which overlap and have a matching name. For example,
+if segment B touches segment A and C with speed limits of 25 and 30, the speed
+limit for segment B will be 27.5.
 
 **Geometry:** `MULTILINESTRING`
 {% enddocs %}
diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 0c4aeb62c..b39ac26f4 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -101,7 +101,6 @@ walk(parquet_files, \(file_key) {
       "SP_LIM", "INVENTORY", "geometry_3435", "year"
     )
 
-    # Define the renaming mapping
     renames <- c(
       "FCNAME" = "road_type",
       "FC_NAME" = "road_type",
@@ -110,6 +109,7 @@ walk(parquet_files, \(file_key) {
       "SURF_WTH" = "surface_width",
       "SURF_YR" = "surface_year",
       "AADT" = "daily_traffic",
+      "SP_LIM" = "speed_limit",
       "CRS_WITH" = "condition_with",
       "CRS_OPP" = "condition_opposing",
       "CRS_YR" = "condition_year",
@@ -118,13 +118,19 @@ walk(parquet_files, \(file_key) {
       "DTRESS_OPP" = "distress_opposing"
     )
 
+    shapefile_data <- shapefile_data %>%
+      rename_with(~ renames[.x], .cols = intersect(names(.), names(renames)))
+
+    # Create a list of required columns based on the rename mappings
+    required_columns <- unique(unname(renames))
+
+    # Identify missing renamed columns and add them with NA values
+    missing_columns <- setdiff(required_columns, colnames(shapefile_data))
+    shapefile_data[missing_columns] <- NA
+
 
-    existing_columns <- intersect(required_columns, colnames(shapefile_data))
     shapefile_data <- shapefile_data %>%
-      select(all_of(existing_columns)) %>%
-      # Dynamically rename and select columns based on the existing names in the dataset
-      rename_with(~ renames[.x], .cols = intersect(names(.), names(renames))) %>%
-      mutate(across(all_of(renames), ~ ifelse(is.na(.), NA, .)),
+      mutate(
         surface_type = road_codes[as.character(surface_type)],
         speed_limit = as.numeric(speed_limit),
         # For testing
@@ -132,16 +138,16 @@ walk(parquet_files, \(file_key) {
         road_name = str_to_lower(road_name), # Convert to lowercase
         road_name = gsub("[[:punct:]]", "", road_name), # Remove punctuation
         # Remove standalone directional indicators (N, S, E, W)
-        # I wouldn't remove North South East west, so that streets like North
+        # I wouldn't remove North South East West, so that streets like North
         # Ave become empty. I also discovered that
-        # TH is not universally applied.
-        # For example, you can look at 100TH st.
+        # TH is not universally applied. An example is 100th St.
         # I don't think the added value
         # of removing TH is worth the risk of complicating valid street names.
+        # Once again, ending in th would change North Ave.
         road_name = gsub("\\b(n|s|e|w)\\b", "", road_name),
 
         # Replace full street name words with abbreviations
-road_name = str_replace_all(
+        road_name = str_replace_all(
           road_name,
           c("\\bavenue\\b" = "ave",
             "\\bav\\b" = "ave",
@@ -161,8 +167,6 @@ road_name = str_replace_all(
         # Remove extra spaces that may result from replacements
         road_name = str_trim(road_name)
       ) %>%
-      # Remove duplicated columns except for year
-      select(-one_of(required_columns[required_columns != "year"])) %>%
       mutate(across(-geometry, ~ replace(., . %in% c(0, "0000"), NA))) %>%
       mutate(surface_year = ifelse(surface_year == 9999, NA, surface_year)) %>%
       # Group by the characteristics that we want
@@ -183,15 +187,13 @@ road_name = str_replace_all(
       intersection_matrix <- st_intersects(data)
 
       # Create intersecting pairs
-      intersecting_pairs <- do.call(
-        rbind,
-        lapply(seq_along(intersection_matrix), function(i) {
-          data.frame(
-            polygon_1 = i,
-            polygon_2 = intersection_matrix[[i]]
-          )
-        })
-      ) %>%
+      intersecting_pairs <- map(seq_along(intersection_matrix), \(x) {
+        data.frame(
+          polygon_1 = x,
+          polygon_2 = intersection_matrix[[x]]
+        )
+      }) %>%
+        bind_rows() %>%
         filter(polygon_1 != polygon_2) # Remove self-matches
       # Add polygon IDs and relevant columns for merging
       data_with_ids <- data %>%

From a1b8691741866c53ff1a8c200f2fba4052e85a29 Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Mon, 4 Nov 2024 19:09:21 +0000
Subject: [PATCH 69/74] Final?

---
 .../spatial/spatial-environment-traffic.R     | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index b39ac26f4..78aa41795 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -96,9 +96,10 @@ walk(parquet_files, \(file_key) {
     # but create an intersection of columns we want
     # and the renamed columns.
     required_columns <- c(
-      "FCNAME", "FC_NAME", "LNS", "SURF_TYP", "SURF_WTH", "SURF_YR", "AADT",
-      "CRS_WITH", "CRS_OPP", "CRS_YR", "ROAD_NAME", "DTRESS_WTH", "DTRESS_OPP",
-      "SP_LIM", "INVENTORY", "geometry_3435", "year"
+      "road_type", "lanes", "surface_type", "surface_width", "surface_year",
+      "daily_traffic", "speed_limit", "condition_with", "condition_opposing",
+      "condition_year", "road_name", "distress_with", "distress_opposing",
+      "inventory", "geometry_3435", "year"
     )
 
     renames <- c(
@@ -119,17 +120,15 @@ walk(parquet_files, \(file_key) {
     )
 
     shapefile_data <- shapefile_data %>%
-      rename_with(~ renames[.x], .cols = intersect(names(.), names(renames)))
+      rename_with(~ str_replace_all(.x, renames))
 
-    # Create a list of required columns based on the rename mappings
-    required_columns <- unique(unname(renames))
+    missing_columns <- setdiff(required_columns, names(shapefile_data))
 
-    # Identify missing renamed columns and add them with NA values
-    missing_columns <- setdiff(required_columns, colnames(shapefile_data))
+    # Add missing columns with NA values directly
     shapefile_data[missing_columns] <- NA
 
-
     shapefile_data <- shapefile_data %>%
+      select(all_of(required_columns)) %>%
       mutate(
         surface_type = road_codes[as.character(surface_type)],
         speed_limit = as.numeric(speed_limit),
@@ -283,7 +282,8 @@ walk(parquet_files, \(file_key) {
 
 
     shapefile_data <- shapefile_data %>%
-      mutate(across(-c(geometry, geometry_3435), ~ ifelse(is.nan(.), NA, .)))
+      mutate(across(-c(geometry, geometry_3435), ~ ifelse(is.nan(.), NA, .))) %>%
+      relocate(year, .after = last_col())
 
     output_path <- file.path(output_bucket, basename(file_key))
     geoarrow::write_geoparquet(shapefile_data, output_path)

From 02bbdccfd2196bcb66ea658e84675ea7357a293d Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Mon, 4 Nov 2024 19:10:26 +0000
Subject: [PATCH 70/74] Remove line at end

---
 .../spatial/spatial-environment-traffic.R                       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 78aa41795..3f6f601a3 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -290,4 +290,4 @@ walk(parquet_files, \(file_key) {
 
     print(paste(file_key, "cleaned and uploaded."))
   }
-}, .progress = TRUE)
+}

From bc72c99b9625dce265a368f858ac4c35b140a37a Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Mon, 4 Nov 2024 19:12:05 +0000
Subject: [PATCH 71/74] Rename environ

---
 .../spatial/spatial-environment-traffic.R                       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 3f6f601a3..35154ab12 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -6,7 +6,7 @@ library(sf)
 library(stringr)
 
 # Define the S3 bucket and folder path
-AWS_S3_RAW_BUCKET <- "s3://ccao-data-raw-us-east-1"
+AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET")
 AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET")
 s3_folder <- "spatial/environment/traffic"
 output_bucket <- sub("/$", "", file.path(AWS_S3_WAREHOUSE_BUCKET, s3_folder))

From 87da63bb6b3a747bd9fe42ae5ab51e06539b4bfe Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Mon, 4 Nov 2024 19:18:51 +0000
Subject: [PATCH 72/74] Add )

---
 .../spatial/spatial-environment-traffic.R                       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 35154ab12..b7b4894c1 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -290,4 +290,4 @@ walk(parquet_files, \(file_key) {
 
     print(paste(file_key, "cleaned and uploaded."))
   }
-}
+})

From a5df33168fb1dfa2c66076a83d6a2e284ad8c4fa Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Mon, 4 Nov 2024 19:21:01 +0000
Subject: [PATCH 73/74] lintr

---
 .../spatial/spatial-environment-traffic.R                   | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index b7b4894c1..76799259f 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -253,7 +253,8 @@ walk(parquet_files, \(file_key) {
 
     # Run the function
     # Initialize with placeholder to ensure the first iteration runs
-    # Initialize previous NA counts with values that differ from any real NA count
+    # Initialize previous NA counts with values that
+    # differ from any real NA count
     previous_na_counts <- list(
       daily_traffic_na = -1,
       speed_limit_na = -1
@@ -282,7 +283,8 @@ walk(parquet_files, \(file_key) {
 
 
     shapefile_data <- shapefile_data %>%
-      mutate(across(-c(geometry, geometry_3435), ~ ifelse(is.nan(.), NA, .))) %>%
+      mutate(across(-c(geometry, geometry_3435),
+                    ~ ifelse(is.nan(.), NA, .))) %>%
       relocate(year, .after = last_col())
 
     output_path <- file.path(output_bucket, basename(file_key))

From 5c60c23c7ea360068d446545a48146cbebe7a03a Mon Sep 17 00:00:00 2001
From: Damonamajor <damon.major@cookcountyil.gov>
Date: Mon, 4 Nov 2024 19:30:28 +0000
Subject: [PATCH 74/74] styler

---
 .../spatial/spatial-environment-traffic.R                | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
index 76799259f..d98ed2c8a 100644
--- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
+++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R
@@ -148,7 +148,8 @@ walk(parquet_files, \(file_key) {
         # Replace full street name words with abbreviations
         road_name = str_replace_all(
           road_name,
-          c("\\bavenue\\b" = "ave",
+          c(
+            "\\bavenue\\b" = "ave",
             "\\bav\\b" = "ave",
             "\\bstreet\\b" = "st",
             "\\bcourt\\b" = "ct",
@@ -283,8 +284,10 @@ walk(parquet_files, \(file_key) {
 
 
     shapefile_data <- shapefile_data %>%
-      mutate(across(-c(geometry, geometry_3435),
-                    ~ ifelse(is.nan(.), NA, .))) %>%
+      mutate(across(
+        -c(geometry, geometry_3435),
+        ~ ifelse(is.nan(.), NA, .)
+      )) %>%
       relocate(year, .after = last_col())
 
     output_path <- file.path(output_bucket, basename(file_key))