Skip to content

Commit

Permalink
Add flag removal checkpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
wagnerlmichael committed Oct 28, 2024
1 parent 8b0e0f4 commit e9e64a0
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 101 deletions.
154 changes: 78 additions & 76 deletions dvc.lock
Original file line number Diff line number Diff line change
Expand Up @@ -26,35 +26,36 @@ stages:
outs:
- path: input/assessment_data.parquet
hash: md5
md5: 605ee612ff45dca2edf5c508993a7f56
size: 69522635
md5: 4e1ac463b4d74fb9b238fa5e2c19210a
size: 80070368
- path: input/char_data.parquet
hash: md5
md5: ed7b8f4ed02eb491d0450920874a66c3
size: 131476800
md5: ea34f077e5a26d8e23174d3f9743ec9c
size: 149213288
- path: input/condo_strata_data.parquet
hash: md5
md5: 0a7462f0afccb09bdd94c58148a3ca8d
size: 40842
md5: 413f828ff0d1a2ca4d34ae0c6ca33ca8
size: 40859
- path: input/land_nbhd_rate_data.parquet
hash: md5
md5: e508daf5790982c303d6503fe1cb8e2b
md5: 6c1baaf2acbcba9869025bb336f4ad25
size: 4413
- path: input/training_data.parquet
hash: md5
md5: 51090aa4f5b5311b1441e62b81fd3827
size: 68987740
md5: 2fe90bea662f5624408f5927374abaeb
size: 74835195
train:
cmd: Rscript pipeline/01-train.R
deps:
- path: input/training_data.parquet
hash: md5
md5: 97b6ced3adb788e20fb2fc3758cd38a2
size: 53281136
md5: 2fe90bea662f5624408f5927374abaeb
size: 74835195
params:
params.yaml:
cv:
split_prop: 0.9
num_folds: 10
fold_overlap: 9
initial_set: 20
max_iterations: 50
Expand All @@ -64,21 +65,21 @@ stages:
model.engine: lightgbm
model.hyperparameter:
default:
num_iterations: 2500
learning_rate: 0.015
max_bin: 512
num_leaves: 159
add_to_linked_depth: 1
feature_fraction: 0.688
min_gain_to_split: 5.58
num_iterations: 2275
learning_rate: 0.011
max_bin: 225
num_leaves: 200
add_to_linked_depth: 2
feature_fraction: 0.661
min_gain_to_split: 1.58
min_data_in_leaf: 44
max_cat_threshold: 228
min_data_per_group: 160
cat_smooth: 54.52
cat_l2: 0.11
lambda_l1: 0.016
lambda_l2: 2.413
neighbors: 5
max_cat_threshold: 87
min_data_per_group: 200
cat_smooth: 140.85
cat_l2: 0.017
lambda_l1: 0.697
lambda_l2: 0.002
neighbors: 15
range:
num_iterations:
- 100
Expand Down Expand Up @@ -143,12 +144,14 @@ stages:
- char_building_non_units
- char_bldg_is_mixed_use
- char_building_sf
- char_unit_sf
- char_bedrooms
- char_half_baths
- char_full_baths
- loc_longitude
- loc_latitude
- loc_env_flood_fema_sfha
- loc_census_tract_geoid
- loc_env_flood_fs_factor
- loc_env_flood_fs_risk_direction
- loc_env_airport_noise_dnl
- loc_school_elementary_district_geoid
- loc_school_secondary_district_geoid
- loc_access_cmap_walk_nta_score
Expand All @@ -158,8 +161,6 @@ stages:
- prox_num_bus_stop_in_half_mile
- prox_num_foreclosure_per_1000_pin_past_5_years
- prox_num_school_in_half_mile
- prox_num_school_with_rating_in_half_mile
- prox_avg_school_rating_in_half_mile
- prox_airport_dnl_total
- prox_nearest_bike_trail_dist_ft
- prox_nearest_cemetery_dist_ft
Expand All @@ -173,12 +174,13 @@ stages:
- prox_nearest_park_dist_ft
- prox_nearest_railroad_dist_ft
- prox_nearest_secondary_road_dist_ft
- prox_nearest_university_dist_ft
- prox_nearest_vacant_land_dist_ft
- prox_nearest_water_dist_ft
- prox_nearest_golf_course_dist_ft
- acs5_percent_age_children
- acs5_percent_age_senior
- acs5_median_age_total
- acs5_percent_mobility_no_move
- acs5_percent_mobility_moved_from_other_state
- acs5_percent_household_family_married
- acs5_percent_household_nonfamily_alone
Expand All @@ -196,8 +198,9 @@ stages:
- acs5_percent_household_total_occupied_w_sel_cond
- acs5_percent_mobility_moved_in_county
- other_tax_bill_rate
- other_school_district_elementary_avg_rating
- other_school_district_secondary_avg_rating
- ccao_is_active_exe_homeowner
- ccao_is_corner_lot
- ccao_n_years_exe_homeowner
- time_sale_year
- time_sale_day
- time_sale_quarter_of_year
Expand All @@ -211,6 +214,7 @@ stages:
categorical:
- meta_township_code
- meta_nbhd_code
- loc_census_tract_geoid
- loc_tax_municipality_name
- loc_school_elementary_district_geoid
- loc_school_secondary_district_geoid
Expand Down Expand Up @@ -260,59 +264,59 @@ stages:
outs:
- path: output/intermediate/timing/model_timing_train.parquet
hash: md5
md5: 2a1bd76cefa0e890a0c44d4c1957b728
size: 2865
md5: 66e8095fc931829e5dee80997bf73a33
size: 2872
- path: output/parameter_final/model_parameter_final.parquet
hash: md5
md5: e8bee777cc37b928818f58e5f10c30ef
md5: b234a91486b487642e8738306f87c25c
size: 8857
- path: output/parameter_range/model_parameter_range.parquet
hash: md5
md5: 3b2015c65992cfcc2a46b1c029d62212
md5: 150000269b5873fa1b3eaeeff7887ce2
size: 501
- path: output/parameter_search/model_parameter_search.parquet
hash: md5
md5: 3b2015c65992cfcc2a46b1c029d62212
md5: 150000269b5873fa1b3eaeeff7887ce2
size: 501
- path: output/test_card/model_test_card.parquet
hash: md5
md5: 0c39e69ea32a78d6ffadf87fc9eab1e0
size: 1085792
md5: e7901e36f5ce271eb0b029b22d17a4ac
size: 1398727
- path: output/workflow/fit/model_workflow_fit.zip
hash: md5
md5: d7223e5a080f2bbaaca75ab8eeddfb2b
size: 11610240
md5: e8bec722aa37a604dd068469c52f5adf
size: 11666878
- path: output/workflow/recipe/model_workflow_recipe.rds
hash: md5
md5: bef3c1299229b126404c8ac251ad981e
size: 3391336
md5: 1a995afd1800871877dbcbe3695f1032
size: 4375559
assess:
cmd: Rscript pipeline/02-assess.R
deps:
- path: input/assessment_data.parquet
hash: md5
md5: 9f1a4cb2c2b1533e568b936404913d44
size: 84715114
md5: 4e1ac463b4d74fb9b238fa5e2c19210a
size: 80070368
- path: input/condo_strata_data.parquet
hash: md5
md5: 68c07b633902d6de2b7f564ad2e5e304
size: 40750
md5: 413f828ff0d1a2ca4d34ae0c6ca33ca8
size: 40859
- path: input/land_nbhd_rate_data.parquet
hash: md5
md5: e508daf5790982c303d6503fe1cb8e2b
md5: 6c1baaf2acbcba9869025bb336f4ad25
size: 4413
- path: input/training_data.parquet
hash: md5
md5: 97b6ced3adb788e20fb2fc3758cd38a2
size: 53281136
md5: 2fe90bea662f5624408f5927374abaeb
size: 74835195
- path: output/workflow/fit/model_workflow_fit.zip
hash: md5
md5: d7223e5a080f2bbaaca75ab8eeddfb2b
size: 11610240
md5: e8bec722aa37a604dd068469c52f5adf
size: 11666878
- path: output/workflow/recipe/model_workflow_recipe.rds
hash: md5
md5: bef3c1299229b126404c8ac251ad981e
size: 3391336
md5: 1a995afd1800871877dbcbe3695f1032
size: 4375559
params:
params.yaml:
assessment:
Expand All @@ -332,12 +336,14 @@ stages:
- char_building_non_units
- char_bldg_is_mixed_use
- char_building_sf
- char_unit_sf
- char_bedrooms
- char_half_baths
- char_full_baths
- loc_longitude
- loc_latitude
- loc_env_flood_fema_sfha
- loc_census_tract_geoid
- loc_env_flood_fs_factor
- loc_env_flood_fs_risk_direction
- loc_env_airport_noise_dnl
- loc_school_elementary_district_geoid
- loc_school_secondary_district_geoid
- loc_access_cmap_walk_nta_score
Expand All @@ -347,8 +353,6 @@ stages:
- prox_num_bus_stop_in_half_mile
- prox_num_foreclosure_per_1000_pin_past_5_years
- prox_num_school_in_half_mile
- prox_num_school_with_rating_in_half_mile
- prox_avg_school_rating_in_half_mile
- prox_airport_dnl_total
- prox_nearest_bike_trail_dist_ft
- prox_nearest_cemetery_dist_ft
Expand All @@ -362,12 +366,13 @@ stages:
- prox_nearest_park_dist_ft
- prox_nearest_railroad_dist_ft
- prox_nearest_secondary_road_dist_ft
- prox_nearest_university_dist_ft
- prox_nearest_vacant_land_dist_ft
- prox_nearest_water_dist_ft
- prox_nearest_golf_course_dist_ft
- acs5_percent_age_children
- acs5_percent_age_senior
- acs5_median_age_total
- acs5_percent_mobility_no_move
- acs5_percent_mobility_moved_from_other_state
- acs5_percent_household_family_married
- acs5_percent_household_nonfamily_alone
Expand All @@ -385,8 +390,9 @@ stages:
- acs5_percent_household_total_occupied_w_sel_cond
- acs5_percent_mobility_moved_in_county
- other_tax_bill_rate
- other_school_district_elementary_avg_rating
- other_school_district_secondary_avg_rating
- ccao_is_active_exe_homeowner
- ccao_is_corner_lot
- ccao_n_years_exe_homeowner
- time_sale_year
- time_sale_day
- time_sale_quarter_of_year
Expand All @@ -398,19 +404,15 @@ stages:
- meta_strata_1
- meta_strata_2
pv:
nonlivable_threshold: 1000
nonlivable_fixed_fmv: 30000
land_pct_of_total_cap: 0.5
round_break:
- 1000
- 10000
- 100000
round_to_nearest:
- 1
- 500
- 5000
- 10000
round_type: floor
- 10
- 100
round_type: ceiling
ratio_study:
far_year: '2021'
far_stage: board
Expand All @@ -436,16 +438,16 @@ stages:
outs:
- path: output/assessment_card/model_assessment_card.parquet
hash: md5
md5: 32956ff98cb61bf379d91876075d856a
size: 46538183
md5: 7af071fdbf4ff8ba35ae158d4b6480f7
size: 39005384
- path: output/assessment_pin/model_assessment_pin.parquet
hash: md5
md5: e4b201478916e76c05281e80239a1715
size: 43587426
md5: 0305e937be3245ca7403c8d2d7b714fa
size: 41683293
- path: output/intermediate/timing/model_timing_assess.parquet
hash: md5
md5: e5aa33e79f26f4c243126e3874f8df2c
size: 2879
md5: ee8d205dec3fe1a5d77f6180557657e1
size: 2886
evaluate:
cmd: Rscript pipeline/03-evaluate.R
deps:
Expand Down
Loading

0 comments on commit e9e64a0

Please sign in to comment.