Skip to content

Commit

Permalink
Merge pull request #3 from lennonay/pre_processing
Browse files Browse the repository at this point in the history
added pre processing src
  • Loading branch information
lennonay authored Feb 7, 2023
2 parents 428b56d + 46179f4 commit 42c370f
Showing 1 changed file with 34 additions and 0 deletions.
34 changes: 34 additions & 0 deletions pre_processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import pandas as pd

def stats_process(master):

new_col_list = ['GAME_ID', 'game_number', 'H_A', 'team_name', 'player_id', 'person_id', 'name', 'birthdate', 'birthdate_year',
'jersey_number', 'position_str', 'shots', 'shots_on', 'goals',
'assists', 'EVprimarypoints','primarypoints','plusminus', 'hits',
'pim', 'EV_G', 'EV_A1', 'EV_A2', 'PP_G', 'PP_A1', 'PP_A2', 'SH_G','SH_A1', 'SH_A2',
'EV_GF', 'EV_GA', 'EV_GF%', 'faceoff_wins', 'faceoff_attempts']

master['EV_GF%'] = (master['EV_GF']/(master['EV_GA']+master['EV_GF']))
master['EV_GF%'] = master['EV_GF%'].fillna(0.50)

stat_list = ['EV_G', 'EV_A1', 'EV_A2', 'PP_G', 'PP_A1', 'PP_A2', 'SH_G','SH_A1', 'SH_A2','EV_GF', 'EV_GA']

for column in stat_list:
if column not in master:
master[column] = 0

master['name'] = master['first_name'] + ' ' + master['last_name']

master['EVprimarypoints'] = master['EV_G'] + master['EV_A1']

# primary points all situations
master['primarypoints'] = master['PP_G'] + master['PP_A1'] + master['EVprimarypoints'] + master['SH_G'] + master['SH_A1']

master = master[new_col_list]

master = master.fillna(0)

master['birthdate_year'] = master['birthdate_year'].astype('int')
master['birthdate'] = master['birthdate'].dt.date

return master

0 comments on commit 42c370f

Please sign in to comment.