From 17d2b0deaeb9a2c54705d5d31cd85993105eb974 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Mon, 5 Aug 2024 19:56:13 +0000 Subject: [PATCH 1/5] Commit intiial doc --- analyses/add_leaflet_maps.qmd | 226 ++++++++++++++++++++++++++++++++++ 1 file changed, 226 insertions(+) create mode 100644 analyses/add_leaflet_maps.qmd diff --git a/analyses/add_leaflet_maps.qmd b/analyses/add_leaflet_maps.qmd new file mode 100644 index 00000000..4ff55412 --- /dev/null +++ b/analyses/add_leaflet_maps.qmd @@ -0,0 +1,226 @@ + +```{r} +create_leaflet_map <- function(dataset, legend_value, legend_title, order_scheme = "high", longitude = "loc_longitude", latitude = "loc_latitude") { + # Filter neighborhoods that have at least one observation + nbhd_borders <- nbhd %>% + right_join(dataset, by = c("town_nbhd" = "meta_nbhd_code")) + + # Create the color palette based on order_scheme + if (order_scheme == "low") { + pal <- colorNumeric(palette = "Reds", domain = dataset[[legend_value]], reverse = TRUE) + } else { + pal <- colorNumeric(palette = "Reds", domain = dataset[[legend_value]]) + } + + # Calculate the bounding box of the filtered neighborhoods + bbox <- st_bbox(nbhd_borders) + + # Create the leaflet map + leaflet(dataset) %>% + addProviderTiles(providers$CartoDB.Positron) %>% + addCircleMarkers( + lng = ~ get(longitude), + lat = ~ get(latitude), + radius = 5, + color = ~ pal(dataset[[legend_value]]), + popup = ~ paste( + "
", "Pin: ", dataset$meta_pin, + "
", "SHAP:", dollar(dataset[[params$added_feature_shap]]), + "
", "Relative SHAP", dataset$shap_relative, + "
", "Feature:", sprintf("%.2f", dataset[[params$added_feature]]), + "
", "New FMV:", dataset$pred_pin_final_fmv_new, + "
", "Comparison FMV: ", dataset$pred_pin_final_fmv_comparison, + "
", "FMV Difference: ", dollar(dataset$diff_pred_pin_final_fmv) + ) + ) %>% + addPolygons( + data = nbhd_borders, + color = "black", + weight = 2, + fill = FALSE + ) %>% + addLegend( + "bottomright", + pal = pal, + values = dataset[[legend_value]], + title = legend_title + ) +} +``` + + +## Highest and Lowest 100 Values +Three different types of high and low values are produced; the values of the feature we are analyzing, the impact that can be determined through the SHAPs, and the largest effects in change in FMV. + +::: panel-tabset + +### Largest 100 Values + +Be careful interpreting values which are the max and min of the raw value, since ties are not accounted for. For example, if there are 10,000 parcels which are 0 feet from a newly constructed building, the map will not be a full representation. + +```{r} +highest_100 <- leaflet_data %>% + arrange(desc(!!sym(params$added_feature))) %>% + slice(1:100) + +create_leaflet_map(highest_100, params$added_feature, "Largest 100 Values", order_scheme = "high") +``` + +### Lowest 100 Values + +Be careful interpreting values which are the max and min of the raw value, since ties are not accounted for. For example, if there are 10,000 parcels which are 0 feet from a newly constructed building, the map will not be a full representation. + +```{r} +lowest_100 <- working_data_pin %>% + distinct(meta_pin, .keep_all = TRUE) %>% + arrange(!!sym(params$added_feature)) %>% + slice(1:100) + +create_leaflet_map(lowest_100, params$added_feature, "Lowest 100 Values", order_scheme = "low") +``` + +### Highest 100 SHAP Values + +```{r} +highest_100 <- working_data_card %>% + arrange(desc(!!sym(params$added_feature_shap))) %>% + slice(1:100) + +create_leaflet_map(highest_100, params$added_feature_shap, "Highest 100 SHAPs") +``` + +### Lowest 100 SHAP Values + +```{r} +# Example usage with the dataset sliced outside the function +lowest_100 <- working_data_card %>% + arrange(!!sym((params$added_feature_shap))) %>% + slice(1:100) + +create_leaflet_map(lowest_100, params$added_feature_shap, "Lowest 100 SHAPs", order_scheme = "low") +``` + +::: + +## Largest FMV Changes + +Multicard parcels have heuristic which limits their change. The added feature may trigger (or not trigger it), leading to changes much larger than the added feature's impact. + +::: panel-tabset + +### 100 Largest FMV Increases + +```{r} +largest_fmv_increases <- working_data_pin %>% + arrange(desc(diff_pred_pin_final_fmv)) %>% + slice(1:100) + +# Call the function with the pre-sliced dataset +create_leaflet_map(largest_fmv_increases, "diff_pred_pin_final_fmv", "Largest FMV Increases") +``` + +### 100 Largest FMV Decreases + +Multicard parcels have heuristic which limits their change. The added feature may trigger (or not trigger it), leading to changes much larger than the added feature's impact. +```{r} +largest_fmv_decreases <- working_data_pin %>% + arrange(diff_pred_pin_final_fmv) %>% + slice(1:100) + +create_leaflet_map(largest_fmv_decreases, "diff_pred_pin_final_fmv", "Largest FMV Decreases", order_scheme = "low") +``` + +### 100 Largest FMV Initial Increases +```{r} +largest_fmv_increases <- working_data_card %>% + arrange(desc(diff_pred_pin_initial_fmv)) %>% + slice(1:100) + +# Call the function with the pre-sliced dataset +create_leaflet_map(largest_fmv_increases, "diff_pred_pin_initial_fmv", "Largest FMV Increases") +``` + + +### 100 Largest Initial FMV Decreases + +```{r} +largest_fmv_decreases <- working_data_pin %>% + arrange(diff_pred_pin_initial_fmv) %>% + slice(1:100) + +create_leaflet_map(largest_fmv_decreases, "diff_pred_pin_initial_fmv", "Largest FMV Decreases", order_scheme = "low") +``` +## Largest FMV Increases no Multicards + +```{r} +largest_fmv_increases <- working_data_card %>% + group_by(meta_pin) %>% + filter(n() == 1) %>% + ungroup() %>% + arrange(desc(diff_pred_pin_final_fmv)) %>% + slice(1:100) + +create_leaflet_map(largest_fmv_increases, "diff_pred_pin_final_fmv", "Largest FMV Increases") +``` +## Largest FMV Decreases no Multicards + +```{r} +largest_fmv_decreases <- working_data_card %>% + group_by(meta_pin) %>% + filter(n() == 1) %>% + ungroup() %>% + arrange(diff_pred_pin_initial_fmv) %>% + slice(1:100) + +create_leaflet_map(largest_fmv_increases, "diff_pred_pin_final_fmv", "Largest FMV Increases", order_scheme = "low") +``` + + + +::: + +## Neighborhoods with the Highest and Lowest SHAP Values + +These maps identify neighborhoods where the added feature is having the largest impact on SHAP values. By selecting neighborhoods with the highest mean(absolute value), you can take a closer look at how individual parcels in these neighborhoods are affected. + +::: panel-tabset +```{r processing_SHAP_values} +selected_data <- working_data_card %>% + group_by(meta_nbhd_code) %>% + mutate(mean_value = mean(abs(!!sym(paste0(params$added_feature_shap))), na.rm = TRUE)) %>% + ungroup() %>% + distinct(meta_nbhd_code, .keep_all = TRUE) %>% + arrange(mean_value) + +# Select top 2 and bottom 2 neighborhoods based on mean SHAP values +selected_nbhd_codes <- selected_data %>% + slice(c(1:2, (n() - 1):n())) %>% + pull(meta_nbhd_code) + + +# Filter working_data_card for these neighborhoods +filtered_data <- filter(working_data_card, meta_nbhd_code %in% selected_nbhd_codes) + + +# Separate high and low mean value neighborhoods +high_mean_data <- filtered_data %>% + filter(meta_nbhd_code %in% selected_nbhd_codes[(length(selected_nbhd_codes) - 1):length(selected_nbhd_codes)]) + +low_mean_data <- filtered_data %>% + filter(meta_nbhd_code %in% selected_nbhd_codes[1:2]) +``` + + +### 2 Highest SHAP Neighborhoods + +```{r} +create_leaflet_map(high_mean_data, params$added_feature_shap, "SHAP Values") +``` +### 2 Lowest SHAP Neighborhoods + + +```{r} +create_leaflet_map(low_mean_data, params$added_feature_shap, "SHAP Values") +``` + +::: From 35235d436491d5fc1d16b71cf6d453933fc449bd Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Mon, 5 Aug 2024 20:15:26 +0000 Subject: [PATCH 2/5] Try appending shap_1 _2, etc --- analyses/add_leaflet_maps.qmd | 61 ++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/analyses/add_leaflet_maps.qmd b/analyses/add_leaflet_maps.qmd index 4ff55412..c31c7556 100644 --- a/analyses/add_leaflet_maps.qmd +++ b/analyses/add_leaflet_maps.qmd @@ -1,6 +1,5 @@ - ```{r} -create_leaflet_map <- function(dataset, legend_value, legend_title, order_scheme = "high", longitude = "loc_longitude", latitude = "loc_latitude") { +create_leaflet_map <- function(dataset, legend_value, legend_title, target_feature_shap, order_scheme = "high", longitude = "loc_longitude", latitude = "loc_latitude") { # Filter neighborhoods that have at least one observation nbhd_borders <- nbhd %>% right_join(dataset, by = c("town_nbhd" = "meta_nbhd_code")) @@ -15,6 +14,25 @@ create_leaflet_map <- function(dataset, legend_value, legend_title, order_scheme # Calculate the bounding box of the filtered neighborhoods bbox <- st_bbox(nbhd_borders) + # Function to dynamically create the SHAP values string + generate_shap_values <- function(dataset, target_feature_shap) { + shap_values <- "" + i <- 1 + while (TRUE) { + column_name <- paste0(target_feature_shap, "_", i) + if (!column_name %in% names(dataset)) { + break + } + shap_value <- dataset[[column_name]][1] # Ensure we're dealing with a single value + if (is.na(shap_value)) { + break + } + shap_values <- paste0(shap_values, "
SHAP_", i, ": ", dollar(shap_value)) + i <- i + 1 + } + return(shap_values) + } + # Create the leaflet map leaflet(dataset) %>% addProviderTiles(providers$CartoDB.Positron) %>% @@ -25,12 +43,12 @@ create_leaflet_map <- function(dataset, legend_value, legend_title, order_scheme color = ~ pal(dataset[[legend_value]]), popup = ~ paste( "
", "Pin: ", dataset$meta_pin, - "
", "SHAP:", dollar(dataset[[params$added_feature_shap]]), + generate_shap_values(dataset, target_feature_shap), "
", "Relative SHAP", dataset$shap_relative, "
", "Feature:", sprintf("%.2f", dataset[[params$added_feature]]), "
", "New FMV:", dataset$pred_pin_final_fmv_new, - "
", "Comparison FMV: ", dataset$pred_pin_final_fmv_comparison, - "
", "FMV Difference: ", dollar(dataset$diff_pred_pin_final_fmv) + "
", "Comparison FMV: ", dataset$pred_pin_final_fmv_comp, + "
", "FMV Difference: ", scales::percent(dataset$diff_pred_pin_final_fmv) ) ) %>% addPolygons( @@ -46,29 +64,30 @@ create_leaflet_map <- function(dataset, legend_value, legend_title, order_scheme title = legend_title ) } -``` +``` + ## Highest and Lowest 100 Values -Three different types of high and low values are produced; the values of the feature we are analyzing, the impact that can be determined through the SHAPs, and the largest effects in change in FMV. -::: panel-tabset +Three different types of high and low values are produced; the values of the feature we are analyzing, the impact that can be determined through the SHAPs, and the largest effects in change in FMV. +::: panel-tabset ### Largest 100 Values -Be careful interpreting values which are the max and min of the raw value, since ties are not accounted for. For example, if there are 10,000 parcels which are 0 feet from a newly constructed building, the map will not be a full representation. +Be careful interpreting values which are the max and min of the raw value, since ties are not accounted for. For example, if there are 10,000 parcels which are 0 feet from a newly constructed building, the map will not be a full representation. ```{r} highest_100 <- leaflet_data %>% arrange(desc(!!sym(params$added_feature))) %>% slice(1:100) -create_leaflet_map(highest_100, params$added_feature, "Largest 100 Values", order_scheme = "high") +create_leaflet_map(highest_100, params$added_feature, "Largest 100 Values", "target_feature_shap") ``` ### Lowest 100 Values -Be careful interpreting values which are the max and min of the raw value, since ties are not accounted for. For example, if there are 10,000 parcels which are 0 feet from a newly constructed building, the map will not be a full representation. +Be careful interpreting values which are the max and min of the raw value, since ties are not accounted for. For example, if there are 10,000 parcels which are 0 feet from a newly constructed building, the map will not be a full representation. ```{r} lowest_100 <- working_data_pin %>% @@ -82,7 +101,7 @@ create_leaflet_map(lowest_100, params$added_feature, "Lowest 100 Values", order_ ### Highest 100 SHAP Values ```{r} -highest_100 <- working_data_card %>% +highest_100 <- leaflet_data %>% arrange(desc(!!sym(params$added_feature_shap))) %>% slice(1:100) @@ -99,15 +118,13 @@ lowest_100 <- working_data_card %>% create_leaflet_map(lowest_100, params$added_feature_shap, "Lowest 100 SHAPs", order_scheme = "low") ``` - ::: ## Largest FMV Changes -Multicard parcels have heuristic which limits their change. The added feature may trigger (or not trigger it), leading to changes much larger than the added feature's impact. +Multicard parcels have heuristic which limits their change. The added feature may trigger (or not trigger it), leading to changes much larger than the added feature's impact. ::: panel-tabset - ### 100 Largest FMV Increases ```{r} @@ -121,7 +138,8 @@ create_leaflet_map(largest_fmv_increases, "diff_pred_pin_final_fmv", "Largest FM ### 100 Largest FMV Decreases -Multicard parcels have heuristic which limits their change. The added feature may trigger (or not trigger it), leading to changes much larger than the added feature's impact. +Multicard parcels have heuristic which limits their change. The added feature may trigger (or not trigger it), leading to changes much larger than the added feature's impact. + ```{r} largest_fmv_decreases <- working_data_pin %>% arrange(diff_pred_pin_final_fmv) %>% @@ -131,6 +149,7 @@ create_leaflet_map(largest_fmv_decreases, "diff_pred_pin_final_fmv", "Largest FM ``` ### 100 Largest FMV Initial Increases + ```{r} largest_fmv_increases <- working_data_card %>% arrange(desc(diff_pred_pin_initial_fmv)) %>% @@ -140,7 +159,6 @@ largest_fmv_increases <- working_data_card %>% create_leaflet_map(largest_fmv_increases, "diff_pred_pin_initial_fmv", "Largest FMV Increases") ``` - ### 100 Largest Initial FMV Decreases ```{r} @@ -150,6 +168,7 @@ largest_fmv_decreases <- working_data_pin %>% create_leaflet_map(largest_fmv_decreases, "diff_pred_pin_initial_fmv", "Largest FMV Decreases", order_scheme = "low") ``` + ## Largest FMV Increases no Multicards ```{r} @@ -162,6 +181,7 @@ largest_fmv_increases <- working_data_card %>% create_leaflet_map(largest_fmv_increases, "diff_pred_pin_final_fmv", "Largest FMV Increases") ``` + ## Largest FMV Decreases no Multicards ```{r} @@ -174,9 +194,6 @@ largest_fmv_decreases <- working_data_card %>% create_leaflet_map(largest_fmv_increases, "diff_pred_pin_final_fmv", "Largest FMV Increases", order_scheme = "low") ``` - - - ::: ## Neighborhoods with the Highest and Lowest SHAP Values @@ -210,17 +227,15 @@ low_mean_data <- filtered_data %>% filter(meta_nbhd_code %in% selected_nbhd_codes[1:2]) ``` - ### 2 Highest SHAP Neighborhoods ```{r} create_leaflet_map(high_mean_data, params$added_feature_shap, "SHAP Values") ``` -### 2 Lowest SHAP Neighborhoods +### 2 Lowest SHAP Neighborhoods ```{r} create_leaflet_map(low_mean_data, params$added_feature_shap, "SHAP Values") ``` - ::: From b0a6444df8d84b8dc29bfc97da248b54a0597ed9 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 6 Aug 2024 12:22:14 +0000 Subject: [PATCH 3/5] Update with functioning append for SHAP values --- analyses/add_leaflet_maps.qmd | 106 +++++++++++++++------------------- 1 file changed, 48 insertions(+), 58 deletions(-) diff --git a/analyses/add_leaflet_maps.qmd b/analyses/add_leaflet_maps.qmd index c31c7556..06a6a24e 100644 --- a/analyses/add_leaflet_maps.qmd +++ b/analyses/add_leaflet_maps.qmd @@ -1,5 +1,5 @@ ```{r} -create_leaflet_map <- function(dataset, legend_value, legend_title, target_feature_shap, order_scheme = "high", longitude = "loc_longitude", latitude = "loc_latitude") { +create_leaflet_map <- function(dataset, legend_value, legend_title, order_scheme = "high", longitude = "loc_longitude", latitude = "loc_latitude") { # Filter neighborhoods that have at least one observation nbhd_borders <- nbhd %>% right_join(dataset, by = c("town_nbhd" = "meta_nbhd_code")) @@ -14,25 +14,6 @@ create_leaflet_map <- function(dataset, legend_value, legend_title, target_featu # Calculate the bounding box of the filtered neighborhoods bbox <- st_bbox(nbhd_borders) - # Function to dynamically create the SHAP values string - generate_shap_values <- function(dataset, target_feature_shap) { - shap_values <- "" - i <- 1 - while (TRUE) { - column_name <- paste0(target_feature_shap, "_", i) - if (!column_name %in% names(dataset)) { - break - } - shap_value <- dataset[[column_name]][1] # Ensure we're dealing with a single value - if (is.na(shap_value)) { - break - } - shap_values <- paste0(shap_values, "
SHAP_", i, ": ", dollar(shap_value)) - i <- i + 1 - } - return(shap_values) - } - # Create the leaflet map leaflet(dataset) %>% addProviderTiles(providers$CartoDB.Positron) %>% @@ -41,15 +22,24 @@ create_leaflet_map <- function(dataset, legend_value, legend_title, target_featu lat = ~ get(latitude), radius = 5, color = ~ pal(dataset[[legend_value]]), - popup = ~ paste( - "
", "Pin: ", dataset$meta_pin, - generate_shap_values(dataset, target_feature_shap), - "
", "Relative SHAP", dataset$shap_relative, - "
", "Feature:", sprintf("%.2f", dataset[[params$added_feature]]), - "
", "New FMV:", dataset$pred_pin_final_fmv_new, - "
", "Comparison FMV: ", dataset$pred_pin_final_fmv_comp, - "
", "FMV Difference: ", scales::percent(dataset$diff_pred_pin_final_fmv) - ) + popup = ~ { + shap_values <- dataset %>% + select(starts_with("target_feature_shap_")) %>% + summarise_all(~ ifelse(!is.na(.), sprintf("SHAP: %.2f", .), NA)) %>% + apply(1, function(row) { + paste(na.omit(row), collapse = "
") + }) + + paste( + "Pin: ", meta_pin, + ifelse(shap_values == "", "", paste0("
", shap_values)), + "
", "Relative SHAP: ", scales::percent(relative_shap, accuracy = 0.01), + "
", "Feature: ", sprintf("%.2f", get(params$added_feature)), + "
", "New FMV: ", pred_pin_final_fmv_new, + "
", "Comparison FMV: ", pred_pin_final_fmv_comp, + "
", "FMV Difference: ", scales::percent(diff_pred_pin_final_fmv) + ) + } ) %>% addPolygons( data = nbhd_borders, @@ -66,6 +56,8 @@ create_leaflet_map <- function(dataset, legend_value, legend_title, target_featu } + + ``` ## Highest and Lowest 100 Values @@ -79,10 +71,10 @@ Be careful interpreting values which are the max and min of the raw value, since ```{r} highest_100 <- leaflet_data %>% - arrange(desc(!!sym(params$added_feature))) %>% - slice(1:100) + arrange(desc(!!sym(target_feature_value))) %>% + dplyr::slice(1:100) -create_leaflet_map(highest_100, params$added_feature, "Largest 100 Values", "target_feature_shap") +create_leaflet_map(highest_100, {{target_feature_value}}, "Largest 100 Values") ``` ### Lowest 100 Values @@ -90,33 +82,32 @@ create_leaflet_map(highest_100, params$added_feature, "Largest 100 Values", "tar Be careful interpreting values which are the max and min of the raw value, since ties are not accounted for. For example, if there are 10,000 parcels which are 0 feet from a newly constructed building, the map will not be a full representation. ```{r} -lowest_100 <- working_data_pin %>% +lowest_100 <- leaflet_data %>% distinct(meta_pin, .keep_all = TRUE) %>% - arrange(!!sym(params$added_feature)) %>% + arrange(!!sym({{target_feature_value}})) %>% slice(1:100) -create_leaflet_map(lowest_100, params$added_feature, "Lowest 100 Values", order_scheme = "low") +create_leaflet_map(lowest_100, {{target_feature_value}}, "Lowest 100 Values", order_scheme = "low") ``` ### Highest 100 SHAP Values ```{r} highest_100 <- leaflet_data %>% - arrange(desc(!!sym(params$added_feature_shap))) %>% + arrange(desc(shap_total)) %>% slice(1:100) -create_leaflet_map(highest_100, params$added_feature_shap, "Highest 100 SHAPs") +create_leaflet_map(highest_100, "shap_total", "Highest 100 SHAPs") ``` ### Lowest 100 SHAP Values ```{r} -# Example usage with the dataset sliced outside the function -lowest_100 <- working_data_card %>% - arrange(!!sym((params$added_feature_shap))) %>% +lowest_100 <- leaflet_data %>% + arrange(shap_total) %>% slice(1:100) -create_leaflet_map(lowest_100, params$added_feature_shap, "Lowest 100 SHAPs", order_scheme = "low") +create_leaflet_map(lowest_100, "shap_total", "Lowest 100 SHAPs", order_scheme = "low") ``` ::: @@ -128,12 +119,12 @@ Multicard parcels have heuristic which limits their change. The added feature ma ### 100 Largest FMV Increases ```{r} -largest_fmv_increases <- working_data_pin %>% +largest_fmv_increases <- leaflet_data %>% arrange(desc(diff_pred_pin_final_fmv)) %>% slice(1:100) # Call the function with the pre-sliced dataset -create_leaflet_map(largest_fmv_increases, "diff_pred_pin_final_fmv", "Largest FMV Increases") +create_leaflet_map(largest_fmv_increases, "diff_pred_pin_final_fmv", "Largest FMV Increases (%)") ``` ### 100 Largest FMV Decreases @@ -141,38 +132,38 @@ create_leaflet_map(largest_fmv_increases, "diff_pred_pin_final_fmv", "Largest FM Multicard parcels have heuristic which limits their change. The added feature may trigger (or not trigger it), leading to changes much larger than the added feature's impact. ```{r} -largest_fmv_decreases <- working_data_pin %>% +largest_fmv_decreases <- leaflet_data %>% arrange(diff_pred_pin_final_fmv) %>% slice(1:100) -create_leaflet_map(largest_fmv_decreases, "diff_pred_pin_final_fmv", "Largest FMV Decreases", order_scheme = "low") +create_leaflet_map(largest_fmv_decreases, "diff_pred_pin_final_fmv", "Largest FMV Decreases (%)", order_scheme = "low") ``` ### 100 Largest FMV Initial Increases ```{r} -largest_fmv_increases <- working_data_card %>% +largest_fmv_increases <- leaflet_data %>% arrange(desc(diff_pred_pin_initial_fmv)) %>% slice(1:100) # Call the function with the pre-sliced dataset -create_leaflet_map(largest_fmv_increases, "diff_pred_pin_initial_fmv", "Largest FMV Increases") +create_leaflet_map(largest_fmv_increases, "diff_pred_pin_initial_fmv", "Largest FMV Increases (%)") ``` ### 100 Largest Initial FMV Decreases ```{r} -largest_fmv_decreases <- working_data_pin %>% +largest_fmv_decreases <- leaflet_data %>% arrange(diff_pred_pin_initial_fmv) %>% slice(1:100) -create_leaflet_map(largest_fmv_decreases, "diff_pred_pin_initial_fmv", "Largest FMV Decreases", order_scheme = "low") +create_leaflet_map(largest_fmv_decreases, "diff_pred_pin_initial_fmv", "Largest FMV Decreases (%)", order_scheme = "low") ``` ## Largest FMV Increases no Multicards ```{r} -largest_fmv_increases <- working_data_card %>% +largest_fmv_increases <- leaflet_data %>% group_by(meta_pin) %>% filter(n() == 1) %>% ungroup() %>% @@ -185,14 +176,14 @@ create_leaflet_map(largest_fmv_increases, "diff_pred_pin_final_fmv", "Largest FM ## Largest FMV Decreases no Multicards ```{r} -largest_fmv_decreases <- working_data_card %>% +largest_fmv_decreases <- leaflet_data %>% group_by(meta_pin) %>% filter(n() == 1) %>% ungroup() %>% arrange(diff_pred_pin_initial_fmv) %>% slice(1:100) -create_leaflet_map(largest_fmv_increases, "diff_pred_pin_final_fmv", "Largest FMV Increases", order_scheme = "low") +create_leaflet_map(largest_fmv_increases, "diff_pred_pin_final_fmv", "Largest FMV Increases (%)", order_scheme = "low") ``` ::: @@ -202,9 +193,9 @@ These maps identify neighborhoods where the added feature is having the largest ::: panel-tabset ```{r processing_SHAP_values} -selected_data <- working_data_card %>% +selected_data <- leaflet_data %>% group_by(meta_nbhd_code) %>% - mutate(mean_value = mean(abs(!!sym(paste0(params$added_feature_shap))), na.rm = TRUE)) %>% + mutate(mean_value = mean(abs(shap_total)), na.rm = TRUE) %>% ungroup() %>% distinct(meta_nbhd_code, .keep_all = TRUE) %>% arrange(mean_value) @@ -215,8 +206,7 @@ selected_nbhd_codes <- selected_data %>% pull(meta_nbhd_code) -# Filter working_data_card for these neighborhoods -filtered_data <- filter(working_data_card, meta_nbhd_code %in% selected_nbhd_codes) +filtered_data <- filter(leaflet_data, meta_nbhd_code %in% selected_nbhd_codes) # Separate high and low mean value neighborhoods @@ -230,12 +220,12 @@ low_mean_data <- filtered_data %>% ### 2 Highest SHAP Neighborhoods ```{r} -create_leaflet_map(high_mean_data, params$added_feature_shap, "SHAP Values") +create_leaflet_map(high_mean_data, "shap_total", "SHAP Values") ``` ### 2 Lowest SHAP Neighborhoods ```{r} -create_leaflet_map(low_mean_data, params$added_feature_shap, "SHAP Values") +create_leaflet_map(low_mean_data, "shap_total", "SHAP Values") ``` ::: From 376ab2cab53e9d7cc51233dbc09bb8a3f30a5ad7 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 6 Aug 2024 12:28:37 +0000 Subject: [PATCH 4/5] Add dollar for SHAP values --- analyses/add_leaflet_maps.qmd | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/analyses/add_leaflet_maps.qmd b/analyses/add_leaflet_maps.qmd index 06a6a24e..0bd47c4e 100644 --- a/analyses/add_leaflet_maps.qmd +++ b/analyses/add_leaflet_maps.qmd @@ -25,11 +25,10 @@ create_leaflet_map <- function(dataset, legend_value, legend_title, order_scheme popup = ~ { shap_values <- dataset %>% select(starts_with("target_feature_shap_")) %>% - summarise_all(~ ifelse(!is.na(.), sprintf("SHAP: %.2f", .), NA)) %>% + summarise_all(~ ifelse(!is.na(.), sprintf("SHAP: %s", scales::dollar(.)), NA)) %>% apply(1, function(row) { paste(na.omit(row), collapse = "
") }) - paste( "Pin: ", meta_pin, ifelse(shap_values == "", "", paste0("
", shap_values)), From 6af437f93fc425271f01dea25e1b47c0dbf05ea4 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 6 Aug 2024 12:29:28 +0000 Subject: [PATCH 5/5] styler --- analyses/add_leaflet_maps.qmd | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/analyses/add_leaflet_maps.qmd b/analyses/add_leaflet_maps.qmd index 0bd47c4e..0562dd38 100644 --- a/analyses/add_leaflet_maps.qmd +++ b/analyses/add_leaflet_maps.qmd @@ -53,10 +53,6 @@ create_leaflet_map <- function(dataset, legend_value, legend_title, order_scheme title = legend_title ) } - - - - ``` ## Highest and Lowest 100 Values @@ -70,10 +66,10 @@ Be careful interpreting values which are the max and min of the raw value, since ```{r} highest_100 <- leaflet_data %>% - arrange(desc(!!sym(target_feature_value))) %>% - dplyr::slice(1:100) + arrange(desc(!!sym(target_feature_value))) %>% + dplyr::slice(1:100) -create_leaflet_map(highest_100, {{target_feature_value}}, "Largest 100 Values") +create_leaflet_map(highest_100, {{ target_feature_value }}, "Largest 100 Values") ``` ### Lowest 100 Values @@ -83,10 +79,10 @@ Be careful interpreting values which are the max and min of the raw value, since ```{r} lowest_100 <- leaflet_data %>% distinct(meta_pin, .keep_all = TRUE) %>% - arrange(!!sym({{target_feature_value}})) %>% + arrange(!!sym({{ target_feature_value }})) %>% slice(1:100) -create_leaflet_map(lowest_100, {{target_feature_value}}, "Lowest 100 Values", order_scheme = "low") +create_leaflet_map(lowest_100, {{ target_feature_value }}, "Lowest 100 Values", order_scheme = "low") ``` ### Highest 100 SHAP Values