Spaces:
Running
Running
Commit
·
f89cae0
1
Parent(s):
65fefb5
Add location stats
Browse files- data.py +1 -0
- pitch_leaderboard.py +4 -4
- stats.py +5 -43
data.py
CHANGED
@@ -221,6 +221,7 @@ data_df = (
|
|
221 |
pl.col('ballKind').replace_strict(general_ball_kind).alias('general_ballKind'),
|
222 |
pl.col('ballKind').replace_strict(general_ball_kind_code).alias('general_ballKind_code'),
|
223 |
pl.col('batLR').replace_strict(lr),
|
|
|
224 |
pl.col('date').str.to_date('%Y%m%d'),
|
225 |
|
226 |
pl.when(pl.col('GameKindName').str.contains('Regular Season') | (pl.col('GameKindName') == 'Interleague'))
|
|
|
221 |
pl.col('ballKind').replace_strict(general_ball_kind).alias('general_ballKind'),
|
222 |
pl.col('ballKind').replace_strict(general_ball_kind_code).alias('general_ballKind_code'),
|
223 |
pl.col('batLR').replace_strict(lr),
|
224 |
+
pl.col('pitLR').replace_strict(lr),
|
225 |
pl.col('date').str.to_date('%Y%m%d'),
|
226 |
|
227 |
pl.when(pl.col('GameKindName').str.contains('Regular Season') | (pl.col('GameKindName') == 'Interleague'))
|
pitch_leaderboard.py
CHANGED
@@ -8,9 +8,9 @@ from data import data_df
|
|
8 |
from stats import compute_pitch_stats, filter_data_by_date_and_game_kind
|
9 |
from convert import ball_kind
|
10 |
|
11 |
-
STATS = ['Count', 'Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
12 |
-
PCT_STATS = ['Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
13 |
-
STATS_WITH_PCTLS = ['Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
14 |
|
15 |
todo = '''
|
16 |
**To-do**
|
@@ -56,7 +56,7 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr, i
|
|
56 |
pl.col(stat).mul(100).round(1)
|
57 |
for stat in PCT_STATS + [f'{stat} (Pctl)' for stat in STATS_WITH_PCTLS]
|
58 |
)
|
59 |
-
[['Pitcher', 'Pitch', 'Pitch (General)'
|
60 |
)
|
61 |
return pitch_stats
|
62 |
|
|
|
8 |
from stats import compute_pitch_stats, filter_data_by_date_and_game_kind
|
9 |
from convert import ball_kind
|
10 |
|
11 |
+
STATS = ['Count', 'Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%']
|
12 |
+
PCT_STATS = ['Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%']
|
13 |
+
STATS_WITH_PCTLS = ['Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
|
14 |
|
15 |
todo = '''
|
16 |
**To-do**
|
|
|
56 |
pl.col(stat).mul(100).round(1)
|
57 |
for stat in PCT_STATS + [f'{stat} (Pctl)' for stat in STATS_WITH_PCTLS]
|
58 |
)
|
59 |
+
[['Pitcher', 'Pitch', 'Pitch (General)'] + STATS]
|
60 |
)
|
61 |
return pitch_stats
|
62 |
|
stats.py
CHANGED
@@ -52,48 +52,6 @@ def compute_team_games(data):
|
|
52 |
)
|
53 |
|
54 |
|
55 |
-
# def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
56 |
-
# assert player_type in ('pitcher', 'batter')
|
57 |
-
# assert pitch_class_type in ('general', 'specific')
|
58 |
-
# id_col = 'pitId' if player_type == 'pitcher' else 'batId'
|
59 |
-
# pitch_col = 'ballKind_code' if pitch_class_type == 'specific' else 'general_ballKind_code'
|
60 |
-
# pitch_name_col = 'ballKind' if pitch_class_type == 'specific' else 'general_ballKind'
|
61 |
-
# pitch_stats = (
|
62 |
-
# data
|
63 |
-
# .group_by(id_col, pitch_col)
|
64 |
-
# .agg(
|
65 |
-
# pl.first('pitcher_name'),
|
66 |
-
# *([pl.first('general_ballKind')] if pitch_class_type == 'specific' else []),
|
67 |
-
# pl.first(pitch_name_col),
|
68 |
-
# pl.len().alias('count'),
|
69 |
-
# pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
|
70 |
-
# (pl.col('whiff').sum() / pl.col('pitch').sum()).alias('SwStr%'),
|
71 |
-
# (pl.col('whiff').sum() / pl.col('swing').sum()).alias('Whiff%'),
|
72 |
-
# (pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%')
|
73 |
-
# )
|
74 |
-
# .with_columns(
|
75 |
-
# (pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),
|
76 |
-
# (pl.col('count') >= min_pitches).alias('qualified')
|
77 |
-
# )
|
78 |
-
# .explode('batType')
|
79 |
-
# .unnest('batType')
|
80 |
-
# .pivot(on='batType', values='proportion')
|
81 |
-
# .fill_null(0)
|
82 |
-
# .with_columns(
|
83 |
-
# (pl.col('G') + pl.col('B')).alias('GB%'),
|
84 |
-
# (pl.col('F') + pl.col('P')).alias('FB%'),
|
85 |
-
# pl.col('L').alias('LD%').round(2),
|
86 |
-
# )
|
87 |
-
# .drop('G', 'F', 'B', 'P', 'L', 'null')
|
88 |
-
# .with_columns(
|
89 |
-
# (pl.when(pl.col('qualified')).then(pl.col(stat)).rank()/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
|
90 |
-
# for stat in ['SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
91 |
-
# )
|
92 |
-
# .rename({pitch_col: 'ballKind_code', pitch_name_col: 'ballKind'} if pitch_class_type == 'general' else {})
|
93 |
-
# .sort(id_col, 'count', descending=[False, True])
|
94 |
-
# )
|
95 |
-
# return pitch_stats
|
96 |
-
|
97 |
def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
98 |
assert player_type in ('pitcher', 'batter')
|
99 |
assert pitch_class_type in ('general', 'specific')
|
@@ -109,7 +67,6 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
|
109 |
pl.first(pitch_name_col),
|
110 |
pl.len().alias('count'),
|
111 |
pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
|
112 |
-
(pl.col('zone').sum() / pl.col('pitch').sum()).alias('Zone%'),
|
113 |
(pl.col('swing').sum() / pl.col('pitch').sum()).alias('Swing%'),
|
114 |
((pl.col('swing') & pl.col('zone')).sum() / pl.col('pitch').sum()).alias('Z-Swing%'),
|
115 |
((pl.col('swing') & ~pl.col('zone')).sum() / pl.col('pitch').sum()).alias('Chase%'),
|
@@ -119,6 +76,11 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
|
119 |
(pl.col('whiff').sum() / pl.col('swing').sum()).alias('Whiff%'),
|
120 |
(pl.col('whiff').sum() / pl.col('pitch').sum()).alias('SwStr%'),
|
121 |
(pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%'),
|
|
|
|
|
|
|
|
|
|
|
122 |
)
|
123 |
.with_columns(
|
124 |
(pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),
|
|
|
52 |
)
|
53 |
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
56 |
assert player_type in ('pitcher', 'batter')
|
57 |
assert pitch_class_type in ('general', 'specific')
|
|
|
67 |
pl.first(pitch_name_col),
|
68 |
pl.len().alias('count'),
|
69 |
pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
|
|
|
70 |
(pl.col('swing').sum() / pl.col('pitch').sum()).alias('Swing%'),
|
71 |
((pl.col('swing') & pl.col('zone')).sum() / pl.col('pitch').sum()).alias('Z-Swing%'),
|
72 |
((pl.col('swing') & ~pl.col('zone')).sum() / pl.col('pitch').sum()).alias('Chase%'),
|
|
|
76 |
(pl.col('whiff').sum() / pl.col('swing').sum()).alias('Whiff%'),
|
77 |
(pl.col('whiff').sum() / pl.col('pitch').sum()).alias('SwStr%'),
|
78 |
(pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%'),
|
79 |
+
(pl.col('zone').sum() / pl.col('pitch').sum()).alias('Zone%'),
|
80 |
+
(pl.when(pl.col('pitLR') == 'r').then(pl.col('x') < 0).otherwise(pl.col('x') > 0)).mean().alias('Glove%'),
|
81 |
+
(pl.when(pl.col('pitLR') == 'r').then(pl.col('x') >= 0).otherwise(pl.col('x') <= 0)).mean().alias('Arm%'),
|
82 |
+
(pl.col('y') > 125).mean().alias('High%'),
|
83 |
+
(pl.col('y') <= 125).mean().alias('Low%')
|
84 |
)
|
85 |
.with_columns(
|
86 |
(pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),
|