Skip to content

Commit

Permalink
Merge pull request #172 from SANDAG/synth_estab_mgra_recode
Browse files Browse the repository at this point in the history
Improved Recoding of Synthetic Establishment MGRAs
  • Loading branch information
bhargavasana authored Jul 19, 2024
2 parents dc97c87 + e721514 commit b9f6de9
Showing 1 changed file with 35 additions and 6 deletions.
41 changes: 35 additions & 6 deletions src/main/python/recodeSynthEstabToMGRA.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,49 @@
import pandas as pd
import numpy as np
import sys
from shutil import copy

est_file = sys.argv[1]
lu_file = sys.argv[2]

print("Creating backup of synthetic establishments file")
# Create backup file just in case something goes wrong
copy(est_file, est_file.replace(".csv", "_backup.csv"))

print("Reading Data")
# Read in input files
est = pd.read_csv(est_file)
lu = pd.read_csv(lu_file)

print("Recoding MGRAs")
taz2mgra = lu.groupby("taz").first()["mgra"]
est["MGRA"] = est["MGRA"].map(taz2mgra)
# Create dictionary mapping TAZ to the share of the TAZ's employtment by its MGRA
taz_map = {}
for taz in est["MGRA"].value_counts().index:
taz_lu = lu.query("taz == @taz")
taz_map[taz] = pd.Series(
(taz_lu["emp_total"] / taz_lu["emp_total"].sum()).values,
index = taz_lu["mgra"].values
)

print("Writing Data")
def select_mgra(taz):
"""
Randomly selects an MGRA belonging to the input TAZ based on the employment in each of the TAZ's MGRAs.
Parameters
----------
taz (int):
TAZ ID
Returns
-------
mgra (int):
Selected MGRA ID
"""
global taz_map
return np.random.choice(
taz_map[taz].index,
p = taz_map[taz]
)

# Select MGRA for each establishment (the MGRA field actually has the TAZ ID in the input file)
est["MGRA"] = est["MGRA"].apply(select_mgra)

# Write output
est.to_csv(est_file, index = False)

0 comments on commit b9f6de9

Please sign in to comment.