-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #172 from SANDAG/synth_estab_mgra_recode
Improved Recoding of Synthetic Establishment MGRAs
- Loading branch information
Showing
1 changed file
with
35 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,49 @@ | ||
import pandas as pd | ||
import numpy as np | ||
import sys | ||
from shutil import copy | ||
|
||
est_file = sys.argv[1] | ||
lu_file = sys.argv[2] | ||
|
||
print("Creating backup of synthetic establishments file") | ||
# Create backup file just in case something goes wrong | ||
copy(est_file, est_file.replace(".csv", "_backup.csv")) | ||
|
||
print("Reading Data") | ||
# Read in input files | ||
est = pd.read_csv(est_file) | ||
lu = pd.read_csv(lu_file) | ||
|
||
print("Recoding MGRAs") | ||
taz2mgra = lu.groupby("taz").first()["mgra"] | ||
est["MGRA"] = est["MGRA"].map(taz2mgra) | ||
# Create dictionary mapping TAZ to the share of the TAZ's employtment by its MGRA | ||
taz_map = {} | ||
for taz in est["MGRA"].value_counts().index: | ||
taz_lu = lu.query("taz == @taz") | ||
taz_map[taz] = pd.Series( | ||
(taz_lu["emp_total"] / taz_lu["emp_total"].sum()).values, | ||
index = taz_lu["mgra"].values | ||
) | ||
|
||
print("Writing Data") | ||
def select_mgra(taz): | ||
""" | ||
Randomly selects an MGRA belonging to the input TAZ based on the employment in each of the TAZ's MGRAs. | ||
Parameters | ||
---------- | ||
taz (int): | ||
TAZ ID | ||
Returns | ||
------- | ||
mgra (int): | ||
Selected MGRA ID | ||
""" | ||
global taz_map | ||
return np.random.choice( | ||
taz_map[taz].index, | ||
p = taz_map[taz] | ||
) | ||
|
||
# Select MGRA for each establishment (the MGRA field actually has the TAZ ID in the input file) | ||
est["MGRA"] = est["MGRA"].apply(select_mgra) | ||
|
||
# Write output | ||
est.to_csv(est_file, index = False) |