From c9886313b5a6a895677b2e5a414ede769678d69e Mon Sep 17 00:00:00 2001 From: RGAlexander216 <20406603+RGAlexander216@users.noreply.github.com> Date: Sun, 15 Sep 2024 20:24:34 -0500 Subject: [PATCH] Fix pbp and pbp participation join issue (#79) Resolves issue with play_by_play data having incorrect old_game_id values. For example, all old_game_id values in the play_by_play_2023 for week 15 start with "2022" but they begin with "2023" in the pbp_participation_2023 data. --- nfl_data_py/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nfl_data_py/__init__.py b/nfl_data_py/__init__.py index c6a2ecc..bdacfba 100644 --- a/nfl_data_py/__init__.py +++ b/nfl_data_py/__init__.py @@ -146,7 +146,10 @@ def import_pbp_data( if all([include_participation, year >= 2016, not cache]): path = r'https://github.com/nflverse/nflverse-data/releases/download/pbp_participation/pbp_participation_{}.parquet'.format(year) partic = pandas.read_parquet(path) - raw = raw.merge(partic, how='left', on=['play_id','old_game_id']) + raw = raw.merge(partic, + how='left', + left_on=['play_id','game_id'], + right_on=['play_id','nflverse_game_id']) pbp_data.append(raw) print(str(year) + ' done.')