ccao-data · Damonamajor · Aug 5, 2024 · Aug 5, 2024 · Aug 6, 2024 · Aug 6, 2024
@@ -0,0 +1,226 @@
+```{r}
+create_leaflet_map <- function(dataset, legend_value, legend_title, order_scheme = "high", longitude = "loc_longitude", latitude = "loc_latitude") {
+  # Filter neighborhoods that have at least one observation
+  nbhd_borders <- nbhd %>%
+    right_join(dataset, by = c("town_nbhd" = "meta_nbhd_code"))
+
+  # Create the color palette based on order_scheme
+  if (order_scheme == "low") {
+    pal <- colorNumeric(palette = "Reds", domain = dataset[[legend_value]], reverse = TRUE)
+  } else {
+    pal <- colorNumeric(palette = "Reds", domain = dataset[[legend_value]])
+  }
+
+  # Calculate the bounding box of the filtered neighborhoods
+  bbox <- st_bbox(nbhd_borders)
+
+  # Create the leaflet map
+  leaflet(dataset) %>%
+    addProviderTiles(providers$CartoDB.Positron) %>%
+    addCircleMarkers(
+      lng = ~ get(longitude),
+      lat = ~ get(latitude),
+      radius = 5,
+      color = ~ pal(dataset[[legend_value]]),
+      popup = ~ {
+        shap_values <- dataset %>%
+          select(starts_with("target_feature_shap_")) %>%
+          summarise_all(~ ifelse(!is.na(.), sprintf("SHAP: %s", scales::dollar(.)), NA)) %>%
+          apply(1, function(row) {
+            paste(na.omit(row), collapse = "<br>")
+          })
+        paste(
+          "Pin: ", meta_pin,
+          ifelse(shap_values == "", "", paste0("<br>", shap_values)),
+          "<br>", "Relative SHAP: ", scales::percent(relative_shap, accuracy = 0.01),
+          "<br>", "Feature: ", sprintf("%.2f", get(params$added_feature)),
+          "<br>", "New FMV: ", pred_pin_final_fmv_new,
+          "<br>", "Comparison FMV: ", pred_pin_final_fmv_comp,
+          "<br>", "FMV Difference: ", scales::percent(diff_pred_pin_final_fmv)
+        )
+      }
+    ) %>%
+    addPolygons(
+      data = nbhd_borders,
+      color = "black",
+      weight = 2,
+      fill = FALSE
+    ) %>%
+    addLegend(
+      "bottomright",
+      pal = pal,
+      values = dataset[[legend_value]],
+      title = legend_title
+    )
+}
+```
+
+## Highest and Lowest 100 Values
+
+Three different types of high and low values are produced; the values of the feature we are analyzing, the impact that can be determined through the SHAPs, and the largest effects in change in FMV.
+
+::: panel-tabset
+### Largest 100 Values
+
+Be careful interpreting values which are the max and min of the raw value, since ties are not accounted for. For example, if there are 10,000 parcels which are 0 feet from a newly constructed building, the map will not be a full representation.
+
+```{r}
+highest_100 <- leaflet_data %>%
+  arrange(desc(!!sym(target_feature_value))) %>%
+  dplyr::slice(1:100)
+
+create_leaflet_map(highest_100, {{ target_feature_value }}, "Largest 100 Values")
+```
+
+### Lowest 100 Values
+
+Be careful interpreting values which are the max and min of the raw value, since ties are not accounted for. For example, if there are 10,000 parcels which are 0 feet from a newly constructed building, the map will not be a full representation.
+
+```{r}
+lowest_100 <- leaflet_data %>%
+  distinct(meta_pin, .keep_all = TRUE) %>%
+  arrange(!!sym({{ target_feature_value }})) %>%
+  slice(1:100)
+
+create_leaflet_map(lowest_100, {{ target_feature_value }}, "Lowest 100 Values", order_scheme = "low")
+```
+
+### Highest 100 SHAP Values
+
+```{r}
+highest_100 <- leaflet_data %>%
+  arrange(desc(shap_total)) %>%
+  slice(1:100)
+
+create_leaflet_map(highest_100, "shap_total", "Highest 100 SHAPs")
+```
+
+### Lowest 100 SHAP Values
+
+```{r}
+lowest_100 <- leaflet_data %>%
+  arrange(shap_total) %>%
+  slice(1:100)
+
+create_leaflet_map(lowest_100, "shap_total", "Lowest 100 SHAPs", order_scheme = "low")
+```
+:::
+
+## Largest FMV Changes
+
+Multicard parcels have heuristic which limits their change. The added feature may trigger (or not trigger it), leading to changes much larger than the added feature's impact.
+
+::: panel-tabset
+### 100 Largest FMV Increases
+
+```{r}
+largest_fmv_increases <- leaflet_data %>%
+  arrange(desc(diff_pred_pin_final_fmv)) %>%
+  slice(1:100)
+
+# Call the function with the pre-sliced dataset
+create_leaflet_map(largest_fmv_increases, "diff_pred_pin_final_fmv", "Largest FMV Increases (%)")
+```
+
+### 100 Largest FMV Decreases
+
+Multicard parcels have heuristic which limits their change. The added feature may trigger (or not trigger it), leading to changes much larger than the added feature's impact.
+
+```{r}
+largest_fmv_decreases <- leaflet_data %>%
+  arrange(diff_pred_pin_final_fmv) %>%
+  slice(1:100)
+
+create_leaflet_map(largest_fmv_decreases, "diff_pred_pin_final_fmv", "Largest FMV Decreases (%)", order_scheme = "low")
+```
+
+### 100 Largest FMV Initial Increases
+
+```{r}
+largest_fmv_increases <- leaflet_data %>%
+  arrange(desc(diff_pred_pin_initial_fmv)) %>%
+  slice(1:100)
+
+# Call the function with the pre-sliced dataset
+create_leaflet_map(largest_fmv_increases, "diff_pred_pin_initial_fmv", "Largest FMV Increases (%)")
+```
+
+### 100 Largest Initial FMV Decreases
+
+```{r}
+largest_fmv_decreases <- leaflet_data %>%
+  arrange(diff_pred_pin_initial_fmv) %>%
+  slice(1:100)
+
+create_leaflet_map(largest_fmv_decreases, "diff_pred_pin_initial_fmv", "Largest FMV Decreases (%)", order_scheme = "low")
+```
+
+## Largest FMV Increases no Multicards
+
+```{r}
+largest_fmv_increases <- leaflet_data %>%
+  group_by(meta_pin) %>%
+  filter(n() == 1) %>%
+  ungroup() %>%
+  arrange(desc(diff_pred_pin_final_fmv)) %>%
+  slice(1:100)
+
+create_leaflet_map(largest_fmv_increases, "diff_pred_pin_final_fmv", "Largest FMV Increases")
+```
+
+## Largest FMV Decreases no Multicards
+
+```{r}
+largest_fmv_decreases <- leaflet_data %>%
+  group_by(meta_pin) %>%
+  filter(n() == 1) %>%
+  ungroup() %>%
+  arrange(diff_pred_pin_initial_fmv) %>%
+  slice(1:100)
+
+create_leaflet_map(largest_fmv_increases, "diff_pred_pin_final_fmv", "Largest FMV Increases (%)", order_scheme = "low")
+```
+:::
+
+## Neighborhoods with the Highest and Lowest SHAP Values
+
+These maps identify neighborhoods where the added feature is having the largest impact on SHAP values. By selecting neighborhoods with the highest mean(absolute value), you can take a closer look at how individual parcels in these neighborhoods are affected.
+
+::: panel-tabset
+```{r processing_SHAP_values}
+selected_data <- leaflet_data %>%
+  group_by(meta_nbhd_code) %>%
+  mutate(mean_value = mean(abs(shap_total)), na.rm = TRUE) %>%
+  ungroup() %>%
+  distinct(meta_nbhd_code, .keep_all = TRUE) %>%
+  arrange(mean_value)
+
+# Select top 2 and bottom 2 neighborhoods based on mean SHAP values
+selected_nbhd_codes <- selected_data %>%
+  slice(c(1:2, (n() - 1):n())) %>%
+  pull(meta_nbhd_code)
+
+
+filtered_data <- filter(leaflet_data, meta_nbhd_code %in% selected_nbhd_codes)
+
+
+# Separate high and low mean value neighborhoods
+high_mean_data <- filtered_data %>%
+  filter(meta_nbhd_code %in% selected_nbhd_codes[(length(selected_nbhd_codes) - 1):length(selected_nbhd_codes)])
+
+low_mean_data <- filtered_data %>%
+  filter(meta_nbhd_code %in% selected_nbhd_codes[1:2])
+```
+
+### 2 Highest SHAP Neighborhoods
+
+```{r}
+create_leaflet_map(high_mean_data, "shap_total", "SHAP Values")
+```
+
+### 2 Lowest SHAP Neighborhoods
+
+```{r}
+create_leaflet_map(low_mean_data, "shap_total", "SHAP Values")
+```
+:::