forked from SEMCOG/semcog_urbansim
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataset.py
108 lines (88 loc) · 5.21 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import warnings
import numpy as np
import orca
import pandas as pd
from urbansim.utils import misc
import assumptions
warnings.filterwarnings('ignore', category=pd.io.pytables.PerformanceWarning)
for name in ['persons', 'parcels', 'zones', 'semmcds', 'counties', 'employment_sectors',
'building_sqft_per_job',
'annual_relocation_rates_for_households',
'annual_relocation_rates_for_jobs', 'annual_employment_control_totals',
'travel_data', 'zoning', 'large_areas', 'building_types', 'land_use_types',
'workers_labor_participation_rates', 'workers_employment_rates_by_large_area_age',
'workers_employment_rates_by_large_area',
'transit_stops', 'crime_rates', 'schools', 'poi',
'group_quarters', 'group_quarters_control_totals',
'annual_household_control_totals',
'events_addition', 'events_deletion', 'refiner_events', 'income_growth_rates']:
store = orca.get_injectable("store")
orca.add_table(name, store[name])
orca.add_table("remi_pop_total", pd.read_csv("data/remi_hhpop_bylarge.csv", index_col='large_area_id'))
orca.add_table('target_vacancies', pd.read_csv("data/target_vacancies.csv"))
orca.add_table('demolition_rates', pd.read_csv("data/DEMOLITION_RATES.csv", index_col='city_id'))
orca.add_table('extreme_hu_controls', pd.read_csv("data/extreme_hu_controls.csv", index_col='b_city_id'))
@orca.table(cache=True)
def buildings(store):
df = store['buildings']
# Todo: combine two sqft prices into one and set non use sqft price to 0
df.loc[df.improvement_value < 0, 'improvement_value'] = 0
df['sqft_price_nonres'] = df.improvement_value * 1.0 / 0.7 / df.non_residential_sqft
df.loc[df.sqft_price_nonres > 1000, 'sqft_price_nonres'] = 0
df.loc[df.sqft_price_nonres < 0, 'sqft_price_nonres'] = 0
df['sqft_price_res'] = df.improvement_value * 1.25 / 0.7 / (df.sqft_per_unit.astype(int) * df.residential_units)
df.loc[df.sqft_price_res > 1000, 'sqft_price_res'] = 0
df.loc[df.sqft_price_res < 0, 'sqft_price_res'] = 0
df.fillna(0, inplace=True)
orca.add_injectable("max_building_id", 10000000)
df['hu_filter'] = 0
cites = [551, 1155, 1100, 3130, 6020, 6040]
sample = df[df.residential_units > 0]
sample = sample[~(sample.index.isin(store['households'].building_id))]
for c in sample.b_city_id.unique():
frac = 0.9 if c in cites else 0.5
df.loc[sample[sample.b_city_id == c].sample(frac=frac, replace=False).index.values, 'hu_filter'] = 1
return df
@orca.table(cache=True)
def households(store, buildings):
df = store['households']
b = buildings.to_frame(['large_area_id'])
b = b[b.large_area_id.isin({161.0, 3.0, 5.0, 125.0, 99.0, 115.0, 147.0, 93.0})]
df.loc[df.building_id == -1, 'building_id'] = np.random.choice(b.index.values,
(df.building_id == -1).sum())
idx_invalid_building_id = np.in1d(df.building_id, b.index.values) == False
df.loc[idx_invalid_building_id, 'building_id'] = np.random.choice(b.index.values,
idx_invalid_building_id.sum())
df['large_area_id'] = misc.reindex(b.large_area_id, df.building_id)
return df.fillna(0)
@orca.table(cache=True)
def jobs(store, buildings):
df = store['jobs']
b = buildings.to_frame(['large_area_id'])
b = b[b.large_area_id.isin({161.0, 3.0, 5.0, 125.0, 99.0, 115.0, 147.0, 93.0})]
df.loc[df.building_id == -1, 'building_id'] = np.random.choice(b.index.values,
(df.building_id == -1).sum())
idx_invalid_building_id = np.in1d(df.building_id, b.index.values) == False
df.loc[idx_invalid_building_id, 'building_id'] = np.random.choice(b.index.values,
idx_invalid_building_id.sum())
df['large_area_id'] = misc.reindex(b.large_area_id, df.building_id)
return df.fillna(0)
@orca.table(cache=True)
def base_job_space(buildings):
return buildings.jobs_non_home_based.to_frame("base_job_space")
# these are dummy returns that last until accessibility runs
for node_tbl in ['nodes', 'nodes_walk', 'nodes_drv']:
empty_df = pd.DataFrame()
orca.add_table(node_tbl, empty_df)
# this specifies the relationships between tables
orca.broadcast('nodes_walk', 'buildings', cast_index=True, onto_on='nodeid_walk')
orca.broadcast('nodes_walk', 'parcels', cast_index=True, onto_on='nodeid_walk')
orca.broadcast('nodes_drv', 'buildings', cast_index=True, onto_on='nodeid_drv')
orca.broadcast('nodes_drv', 'parcels', cast_index=True, onto_on='nodeid_drv')
orca.broadcast('parcels', 'buildings', cast_index=True, onto_on='parcel_id')
orca.broadcast('buildings', 'households', cast_index=True, onto_on='building_id')
orca.broadcast('buildings', 'jobs', cast_index=True, onto_on='building_id')
orca.broadcast('households', 'persons', cast_index=True, onto_on='household_id')
orca.broadcast('building_types', 'buildings', cast_index=True, onto_on='building_type_id')
orca.broadcast('zones', 'parcels', cast_index=True, onto_on='zone_id')
orca.broadcast('schools', 'parcels', cast_on='parcel_id', onto_index=True)