patrickramos commited on
Commit
f89cae0
·
1 Parent(s): 65fefb5

Add location stats

Browse files
Files changed (3) hide show
  1. data.py +1 -0
  2. pitch_leaderboard.py +4 -4
  3. stats.py +5 -43
data.py CHANGED
@@ -221,6 +221,7 @@ data_df = (
221
  pl.col('ballKind').replace_strict(general_ball_kind).alias('general_ballKind'),
222
  pl.col('ballKind').replace_strict(general_ball_kind_code).alias('general_ballKind_code'),
223
  pl.col('batLR').replace_strict(lr),
 
224
  pl.col('date').str.to_date('%Y%m%d'),
225
 
226
  pl.when(pl.col('GameKindName').str.contains('Regular Season') | (pl.col('GameKindName') == 'Interleague'))
 
221
  pl.col('ballKind').replace_strict(general_ball_kind).alias('general_ballKind'),
222
  pl.col('ballKind').replace_strict(general_ball_kind_code).alias('general_ballKind_code'),
223
  pl.col('batLR').replace_strict(lr),
224
+ pl.col('pitLR').replace_strict(lr),
225
  pl.col('date').str.to_date('%Y%m%d'),
226
 
227
  pl.when(pl.col('GameKindName').str.contains('Regular Season') | (pl.col('GameKindName') == 'Interleague'))
pitch_leaderboard.py CHANGED
@@ -8,9 +8,9 @@ from data import data_df
8
  from stats import compute_pitch_stats, filter_data_by_date_and_game_kind
9
  from convert import ball_kind
10
 
11
- STATS = ['Count', 'Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
12
- PCT_STATS = ['Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
13
- STATS_WITH_PCTLS = ['Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
14
 
15
  todo = '''
16
  **To-do**
@@ -56,7 +56,7 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr, i
56
  pl.col(stat).mul(100).round(1)
57
  for stat in PCT_STATS + [f'{stat} (Pctl)' for stat in STATS_WITH_PCTLS]
58
  )
59
- [['Pitcher', 'Pitch', 'Pitch (General)', 'Count', 'Usage'] + STATS_WITH_PCTLS]
60
  )
61
  return pitch_stats
62
 
 
8
  from stats import compute_pitch_stats, filter_data_by_date_and_game_kind
9
  from convert import ball_kind
10
 
11
+ STATS = ['Count', 'Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%']
12
+ PCT_STATS = ['Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%']
13
+ STATS_WITH_PCTLS = ['Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
14
 
15
  todo = '''
16
  **To-do**
 
56
  pl.col(stat).mul(100).round(1)
57
  for stat in PCT_STATS + [f'{stat} (Pctl)' for stat in STATS_WITH_PCTLS]
58
  )
59
+ [['Pitcher', 'Pitch', 'Pitch (General)'] + STATS]
60
  )
61
  return pitch_stats
62
 
stats.py CHANGED
@@ -52,48 +52,6 @@ def compute_team_games(data):
52
  )
53
 
54
 
55
- # def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
56
- # assert player_type in ('pitcher', 'batter')
57
- # assert pitch_class_type in ('general', 'specific')
58
- # id_col = 'pitId' if player_type == 'pitcher' else 'batId'
59
- # pitch_col = 'ballKind_code' if pitch_class_type == 'specific' else 'general_ballKind_code'
60
- # pitch_name_col = 'ballKind' if pitch_class_type == 'specific' else 'general_ballKind'
61
- # pitch_stats = (
62
- # data
63
- # .group_by(id_col, pitch_col)
64
- # .agg(
65
- # pl.first('pitcher_name'),
66
- # *([pl.first('general_ballKind')] if pitch_class_type == 'specific' else []),
67
- # pl.first(pitch_name_col),
68
- # pl.len().alias('count'),
69
- # pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
70
- # (pl.col('whiff').sum() / pl.col('pitch').sum()).alias('SwStr%'),
71
- # (pl.col('whiff').sum() / pl.col('swing').sum()).alias('Whiff%'),
72
- # (pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%')
73
- # )
74
- # .with_columns(
75
- # (pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),
76
- # (pl.col('count') >= min_pitches).alias('qualified')
77
- # )
78
- # .explode('batType')
79
- # .unnest('batType')
80
- # .pivot(on='batType', values='proportion')
81
- # .fill_null(0)
82
- # .with_columns(
83
- # (pl.col('G') + pl.col('B')).alias('GB%'),
84
- # (pl.col('F') + pl.col('P')).alias('FB%'),
85
- # pl.col('L').alias('LD%').round(2),
86
- # )
87
- # .drop('G', 'F', 'B', 'P', 'L', 'null')
88
- # .with_columns(
89
- # (pl.when(pl.col('qualified')).then(pl.col(stat)).rank()/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
90
- # for stat in ['SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
91
- # )
92
- # .rename({pitch_col: 'ballKind_code', pitch_name_col: 'ballKind'} if pitch_class_type == 'general' else {})
93
- # .sort(id_col, 'count', descending=[False, True])
94
- # )
95
- # return pitch_stats
96
-
97
  def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
98
  assert player_type in ('pitcher', 'batter')
99
  assert pitch_class_type in ('general', 'specific')
@@ -109,7 +67,6 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
109
  pl.first(pitch_name_col),
110
  pl.len().alias('count'),
111
  pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
112
- (pl.col('zone').sum() / pl.col('pitch').sum()).alias('Zone%'),
113
  (pl.col('swing').sum() / pl.col('pitch').sum()).alias('Swing%'),
114
  ((pl.col('swing') & pl.col('zone')).sum() / pl.col('pitch').sum()).alias('Z-Swing%'),
115
  ((pl.col('swing') & ~pl.col('zone')).sum() / pl.col('pitch').sum()).alias('Chase%'),
@@ -119,6 +76,11 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
119
  (pl.col('whiff').sum() / pl.col('swing').sum()).alias('Whiff%'),
120
  (pl.col('whiff').sum() / pl.col('pitch').sum()).alias('SwStr%'),
121
  (pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%'),
 
 
 
 
 
122
  )
123
  .with_columns(
124
  (pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),
 
52
  )
53
 
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
56
  assert player_type in ('pitcher', 'batter')
57
  assert pitch_class_type in ('general', 'specific')
 
67
  pl.first(pitch_name_col),
68
  pl.len().alias('count'),
69
  pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
 
70
  (pl.col('swing').sum() / pl.col('pitch').sum()).alias('Swing%'),
71
  ((pl.col('swing') & pl.col('zone')).sum() / pl.col('pitch').sum()).alias('Z-Swing%'),
72
  ((pl.col('swing') & ~pl.col('zone')).sum() / pl.col('pitch').sum()).alias('Chase%'),
 
76
  (pl.col('whiff').sum() / pl.col('swing').sum()).alias('Whiff%'),
77
  (pl.col('whiff').sum() / pl.col('pitch').sum()).alias('SwStr%'),
78
  (pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%'),
79
+ (pl.col('zone').sum() / pl.col('pitch').sum()).alias('Zone%'),
80
+ (pl.when(pl.col('pitLR') == 'r').then(pl.col('x') < 0).otherwise(pl.col('x') > 0)).mean().alias('Glove%'),
81
+ (pl.when(pl.col('pitLR') == 'r').then(pl.col('x') >= 0).otherwise(pl.col('x') <= 0)).mean().alias('Arm%'),
82
+ (pl.col('y') > 125).mean().alias('High%'),
83
+ (pl.col('y') <= 125).mean().alias('Low%')
84
  )
85
  .with_columns(
86
  (pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),