From f9ce0a4d68c41fe1bfa33da734a1f56665cc8a11 Mon Sep 17 00:00:00 2001
From: Arash <39972246+arashasadabadi@users.noreply.github.com>
Date: Thu, 25 Jul 2024 11:19:18 -0700
Subject: [PATCH] Update scripts to solve the bugs

1- taz_district_df should be defined in canonical rather than simulated script, so it can be used in observed script.
2- update the station field to be consistent with new pems_station file
---
 tm2py/acceptance/canonical.py | 47 ++++++++++++++++++++++++++++++++---
 tm2py/acceptance/observed.py  |  4 +--
 tm2py/acceptance/simulated.py | 40 +++--------------------------
 3 files changed, 50 insertions(+), 41 deletions(-)

diff --git a/tm2py/acceptance/canonical.py b/tm2py/acceptance/canonical.py
index 8c5509e3..7dc6decd 100644
--- a/tm2py/acceptance/canonical.py
+++ b/tm2py/acceptance/canonical.py
@@ -8,6 +8,8 @@
 class Canonical:
     canonical_dict: dict
     canonical_file: str
+    scenario_dict: dict
+    scenario_file: str
 
     census_2010_to_maz_crosswalk_df: pd.DataFrame
 
@@ -19,6 +21,7 @@ class Canonical:
 
     standard_to_emme_node_crosswalk_df: pd.DataFrame
     pems_to_link_crosswalk_df: pd.DataFrame
+    taz_to_district_df: pd.DataFrame
 
     ALL_DAY_WORD = "daily"
     WALK_ACCESS_WORD = "Walk"
@@ -64,18 +67,25 @@ def _load_configs(self):
         with open(self.canonical_file, "r", encoding="utf-8") as toml_file:
             self.canonical_dict = toml.load(toml_file)
 
+        with open(self.scenario_file, "r", encoding="utf-8") as toml_file:
+            self.scenario_dict = toml.load(toml_file)
+        
+
+
         return
 
     def __init__(
-        self, canonical_file: str, on_board_assign_summary: bool = False
+        self, canonical_file: str, scenario_file: str = None, on_board_assign_summary: bool = False
     ) -> None:
         self.canonical_file = canonical_file
+        self.scenario_file = scenario_file
         self._load_configs()
         self._make_canonical_agency_names_dict()
         self._make_canonical_station_names_dict()
         self._read_standard_to_emme_transit()
         self._make_tm2_to_gtfs_mode_crosswalk()
         self._read_standard_transit_to_survey_crosswalk()
+        self._make_simulated_maz_data()
 
         if not on_board_assign_summary:
             self._make_census_maz_crosswalk()
@@ -84,6 +94,37 @@ def __init__(
 
         return
 
+    def _make_simulated_maz_data(self):
+        in_file = self.scenario_dict["scenario"]["maz_landuse_file"]
+
+        df = pd.read_csv(in_file)
+
+        index_file = os.path.join("inputs", "landuse", "mtc_final_network_zone_seq.csv")
+
+        index_df = pd.read_csv(index_file)
+        join_df = index_df.rename(columns={"N": "MAZ_ORIGINAL"})[
+            ["MAZ_ORIGINAL", "MAZSEQ"]
+        ].copy()
+
+        self.simulated_maz_data_df = pd.merge(
+            df,
+            join_df,
+            how="left",
+            on="MAZ_ORIGINAL",
+        )
+
+        self._make_taz_district_crosswalk()
+
+        return
+        
+    def _make_taz_district_crosswalk(self):
+
+        df = self.simulated_maz_data_df[["TAZ_ORIGINAL", "DistID"]].copy()
+        df = df.rename(columns={"TAZ_ORIGINAL": "taz", "DistID": "district"})
+        self.taz_to_district_df = df.drop_duplicates().reset_index(drop=True)
+
+        return
+        
     def _make_canonical_agency_names_dict(self):
         file_root = self.canonical_dict["remote_io"]["crosswalk_folder_root"]
         in_file = self.canonical_dict["crosswalks"]["canonical_agency_names_file"]
@@ -243,8 +284,8 @@ def _read_pems_to_link_crosswalk(self) -> pd.DataFrame:
         in_file = self.canonical_dict["crosswalks"]["pems_station_to_tm2_links_file"]
 
         df = pd.read_csv(os.path.join(file_root, in_file))
-
-        df = df[["station", "A", "B"]]
+        df["station_id"] = df["station"].astype(str) + "_" + df["direction"]
+        df = df[["station_id", "A", "B"]]
 
         self.pems_to_link_crosswalk_df = df
 
diff --git a/tm2py/acceptance/observed.py b/tm2py/acceptance/observed.py
index 3ac37e98..9e7a11c2 100644
--- a/tm2py/acceptance/observed.py
+++ b/tm2py/acceptance/observed.py
@@ -519,8 +519,8 @@ def _make_district_to_district_transit_flows_by_technology(self):
         o_df = self.reduced_transit_spatial_flow_df.copy()
         o_df = o_df[o_df["time_period"] == "am"].copy()
 
-        tm2_district_dict = self.c.taz_to_district_df.set_index("taz_tm2")[
-            "district_tm2"
+        tm2_district_dict = self.c.taz_to_district_df.set_index("taz")[
+            "district"
         ].to_dict()
         o_df["orig_district"] = o_df["orig_taz"].map(tm2_district_dict)
         o_df["dest_district"] = o_df["dest_taz"].map(tm2_district_dict)
diff --git a/tm2py/acceptance/simulated.py b/tm2py/acceptance/simulated.py
index 69074946..c2169317 100644
--- a/tm2py/acceptance/simulated.py
+++ b/tm2py/acceptance/simulated.py
@@ -31,7 +31,6 @@ class Simulated:
     transit_access_mode_dict = {}
     transit_mode_dict = {}
 
-    taz_to_district_df: pd.DataFrame
 
     simulated_boardings_df: pd.DataFrame
     simulated_home_work_flows_df: pd.DataFrame
@@ -136,7 +135,6 @@ def reduce_on_board_assignment_boardings(self, time_period_list: list = ["am"]):
 
     def _validate(self):
         self._make_transit_mode_dict()
-        self._make_simulated_maz_data()
         self._read_standard_transit_stops()
         self._read_standard_transit_shapes()
         self._read_standard_transit_routes()
@@ -356,7 +354,7 @@ def _reduce_simulated_home_work_flows(self):
         b_df = (
             pd.merge(
                 df[["HHID", "HomeMGRA", "WorkLocation"]].copy(),
-                self.simulated_maz_data_df[["MAZSEQ", "CountyName"]].copy(),
+                self.c.simulated_maz_data_df[["MAZSEQ", "CountyName"]].copy(),
                 how="left",
                 left_on="HomeMGRA",
                 right_on="MAZSEQ",
@@ -368,7 +366,7 @@ def _reduce_simulated_home_work_flows(self):
         c_df = (
             pd.merge(
                 b_df,
-                self.simulated_maz_data_df[["MAZSEQ", "CountyName"]].copy(),
+                self.c.simulated_maz_data_df[["MAZSEQ", "CountyName"]].copy(),
                 how="left",
                 left_on="WorkLocation",
                 right_on="MAZSEQ",
@@ -388,37 +386,7 @@ def _reduce_simulated_home_work_flows(self):
 
         return
 
-    def _make_simulated_maz_data(self):
-        root_dir = self.scenario_dict["scenario"]["root_dir"]
-        in_file = self.scenario_dict["scenario"]["maz_landuse_file"]
-
-        df = pd.read_csv(os.path.join(root_dir, in_file))
-
-        index_file = os.path.join("inputs", "landuse", "mtc_final_network_zone_seq.csv")
-
-        index_df = pd.read_csv(index_file)
-        join_df = index_df.rename(columns={"N": "MAZ_ORIGINAL"})[
-            ["MAZ_ORIGINAL", "MAZSEQ"]
-        ].copy()
-
-        self.simulated_maz_data_df = pd.merge(
-            df,
-            join_df,
-            how="left",
-            on="MAZ_ORIGINAL",
-        )
-
-        self._make_taz_district_crosswalk()
-
-        return
     
-    def _make_taz_district_crosswalk(self):
-
-        df = self.simulated_maz_data_df[["TAZ_ORIGINAL", "DistID"]].copy()
-        df = df.rename(columns={"TAZ_ORIGINAL": "taz", "DistID": "district"})
-        self.taz_to_district_df = df.drop_duplicates().reset_index(drop=True)
-
-        return
 
     def _reduce_simulated_rail_access_summaries(self):
         if not self.transit_mode_dict:
@@ -726,7 +694,7 @@ def _reduce_simulated_zero_vehicle_households(self):
         a_df = (
             pd.merge(
                 self.simulated_zero_vehicle_hhs_df,
-                self.simulated_maz_data_df[["MAZ_ORIGINAL", "MAZSEQ"]],
+                self.c.simulated_maz_data_df[["MAZ_ORIGINAL", "MAZSEQ"]],
                 left_on="maz",
                 right_on="MAZSEQ",
                 how="left",
@@ -1075,7 +1043,7 @@ def _make_dataframe_from_omx(self, input_mtx: omx, core_name: str):
         return df
 
     def _make_district_to_district_transit_summaries(self):
-        taz_district_dict = self.taz_to_district_df.set_index("taz")[
+        taz_district_dict = self.c.taz_to_district_df.set_index("taz")[
             "district"
         ].to_dict()