Skip to content

Commit 333fb04

Browse files
committed
feat(Statcast): Implement WBC search functions
1 parent b9d6ac9 commit 333fb04

9 files changed

Lines changed: 412 additions & 77 deletions

File tree

docs/wbc_statcast_search.md

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# WBC Statcast Search
2+
3+
## `wbc_statcast_search`
4+
5+
Function to search for WBC Statcast pitch-level data with custom filters based on Baseball Savant's [WBC Statcast Search](https://baseballsavant.mlb.com/statcast-search-world-baseball-classic).
6+
7+
**Notification:** If the search range is too wide, the response time will be very long.
8+
9+
**WBC data availability**
10+
11+
> From Baseball Savant:
12+
> World Baseball Classic pitch-level Statcast data is available beginning with the 2023 tournament. Bat tracking data will additionally be available beginning with the 2026 tournament.
13+
14+
**Examples**
15+
16+
```python
17+
from baseball_stats_python import wbc_statcast_search
18+
19+
# Get all pitch data in 2023 WBC
20+
wbc_statcast_search(
21+
season="2023"
22+
)
23+
24+
# Get all pitch data in 2026 Pool Play
25+
wbc_statcast_search(
26+
game_type="F"
27+
)
28+
```
29+
30+
**Arguments**
31+
32+
| Argument | Data Type | Description | Default |
33+
| --------------- | -------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------- |
34+
| season | `str` or `list[str]` | The season(s) to search for. | Current season |
35+
| player_type | `str` | Player type for search result. Currently only supports `pitcher` and `batter`. | "pitcher" |
36+
| game_type | `str` or `WbcGameType` or `list[str or WbcGameType]` | Game type (`F`, `CL`, `CD`, `CW`). Also support `all` to select all options. Can check enum [WbcGameType](../enums/wbc.py) | `R` |
37+
| pitchers_lookup | `str` or `list[str]` | Pitcher(s)'s mlbam_id. Can get MLBAM ID from Savant's WBC gameday | "" |
38+
| batters_lookup | `str` or `list[str]` | Batter(s)'s mlbam_id. Can get MLBAM ID from Savant's WBC gameday | "" |
39+
| debug | `bool` | Whether to print debug information | False |
40+
41+
**Use Enums**
42+
43+
```python
44+
from baseball_stats_python.enums.minor import WbcGameType
45+
46+
# Get Semi-Finals data
47+
wbc_statcast_search(
48+
game_type=WbcGameType.SEMI_FINALS
49+
)
50+
51+
```
52+
53+
**Return**
54+
55+
A DataFrame with columns can be found from Baseball Savant's [CSV Docs](https://baseballsavant.mlb.com/csv-docs).
56+
57+
## `wbc_statcast_pitcher_search`
58+
59+
Based on `wbc_statcast_search`, but only returns pitcher data.
60+
61+
**Examples**
62+
63+
```python
64+
from baseball_stats_python import wbc_statcast_pitcher_search
65+
66+
# Get all pitch data of a specific pitcher
67+
wbc_statcast_pitcher_search(
68+
pitchers_lookup="830717"
69+
)
70+
```
71+
72+
**Arguments**
73+
74+
Same with `wbc_statcast_search` but only can use `pitchers_lookup` filter. If `pitchers_lookup` is not provided, it will throw an error.
75+
76+
## `wbc_statcast_batter_search`
77+
78+
Based on `wbc_statcast_search`, but only returns pitches that target batter faced.
79+
80+
**Examples**
81+
82+
```python
83+
from baseball_stats_python import wbc_statcast_batter_search
84+
85+
# Get all pitch data of a specific batter
86+
wbc_statcast_batter_search(
87+
batters_lookup="838360"
88+
)
89+
```
90+
91+
**Arguments**
92+
93+
Same with `wbc_statcast_batter_search` but only can use `batters_lookup` filter. If `batters_lookup` is not provided, it will throw an error.

example.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,17 @@
44
minor_statcast_search,
55
mlbam_id_search,
66
statcast_search,
7+
wbc_statcast_search,
78
)
89
from src.baseball_stats_python.enums.minor import MinorGameType
910
from src.baseball_stats_python.enums.statcast import GameType, MlbTeam, Month
1011

1112

1213
def example():
1314
df = statcast_search(
14-
season='2023',
15-
pitchers_lookup='477132',
16-
game_type=[GameType.PLAYOFFS, 'R'],
15+
season="2023",
16+
pitchers_lookup="477132",
17+
game_type=[GameType.PLAYOFFS, "R"],
1718
opponent=MlbTeam.PADRES,
1819
month=Month.JUNE,
1920
)
@@ -22,22 +23,29 @@ def example():
2223

2324
def minor_example():
2425
df = minor_statcast_search(
25-
season='2023', game_type=MinorGameType.REGULAR_SEASON, pitchers_lookup='678906'
26+
season="2023", game_type=MinorGameType.REGULAR_SEASON, pitchers_lookup="678906"
2627
)
2728
print(df)
2829

2930

3031
def mlbam_id_example():
31-
df = mlbam_id_search('Reynolds')
32+
df = mlbam_id_search("Lin")
3233
print(df)
3334

3435

3536
def spring_training_example():
3637
df = statcast_search(
37-
season='2025',
38-
start_dt='2025-02-20',
39-
end_dt='2025-02-20',
40-
game_type='S',
38+
season="2025",
39+
start_dt="2025-02-20",
40+
end_dt="2025-02-20",
41+
game_type="S",
42+
)
43+
print(df)
44+
45+
46+
def wbc_example():
47+
df = wbc_statcast_search(
48+
batters_lookup="838360",
4149
)
4250
print(df)
4351

@@ -46,3 +54,4 @@ def spring_training_example():
4654
# minor_example()
4755
# mlbam_id_example()
4856
# spring_training_example()
57+
# wbc_example()

src/baseball_stats_python/__init__.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,24 @@
1212
statcast_pitcher_search,
1313
statcast_search,
1414
)
15+
from .statcast.wbc_statcast_search import (
16+
wbc_statcast_batter_search,
17+
wbc_statcast_pitcher_search,
18+
wbc_statcast_search,
19+
)
1520

1621
__all__ = [
17-
'statcast_search',
18-
'statcast_pitcher_search',
19-
'statcast_batter_search',
20-
'minor_statcast_search',
21-
'minor_statcast_pitcher_search',
22-
'minor_statcast_batter_search',
23-
'mlbam_id_search',
24-
'catcher_throwing',
25-
'runner_basestealing',
26-
'runner_extra_bases_taken',
22+
"statcast_search",
23+
"statcast_pitcher_search",
24+
"statcast_batter_search",
25+
"minor_statcast_search",
26+
"minor_statcast_pitcher_search",
27+
"minor_statcast_batter_search",
28+
"mlbam_id_search",
29+
"catcher_throwing",
30+
"runner_basestealing",
31+
"runner_extra_bases_taken",
32+
"wbc_statcast_search",
33+
"wbc_statcast_pitcher_search",
34+
"wbc_statcast_batter_search",
2735
]
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
DEFAULT_SEASON = 2024
1+
DEFAULT_SEASON = 2026
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from .enum_base import EnumBase
2+
3+
4+
class WbcGameType(EnumBase):
5+
"""
6+
Enum for WBC Game Types.
7+
Currently WBC Statcast Search only supports Pool Play, Semi-Finals, Quarter-Finals, and Championship.
8+
9+
POOL_PLAY = "F",
10+
SEMI_FINALS = "CL",
11+
QUARTER_FINALS = "CD",
12+
CHAMPIONSHIP = "CW"
13+
"""
14+
15+
POOL_PLAY = "F"
16+
SEMI_FINALS = "CL"
17+
QUARTER_FINALS = "CD"
18+
CHAMPIONSHIP = "CW"

src/baseball_stats_python/statcast/statcast_search.py

Lines changed: 58 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -14,24 +14,24 @@
1414
from ..utils.utils import validate_date_range
1515

1616
logging.basicConfig()
17-
logger = logging.getLogger('Statcast')
17+
logger = logging.getLogger("Statcast")
1818

1919
session = requests.Session()
2020

21-
STATCAST_SEARCH_URL = 'https://baseballsavant.mlb.com/statcast_search/csv'
21+
STATCAST_SEARCH_URL = "https://baseballsavant.mlb.com/statcast_search/csv"
2222

2323

2424
def statcast_search(
25-
season: str | list[str] = '2024',
26-
player_type: str = 'pitcher',
25+
season: str | list[str] = "2024",
26+
player_type: str = "pitcher",
2727
game_type: str | GameType | list[str | GameType] = GameType.REGULAR_SEASON,
28-
start_dt: str = '',
29-
end_dt: str = '',
30-
month: str | Month | list[str | Month] = '',
31-
pitchers_lookup: str | list[str] = '',
32-
batters_lookup: str | list[str] = '',
33-
team: str | MlbTeam | list[str | MlbTeam] = '',
34-
opponent: str | MlbTeam | list[str | MlbTeam] = '',
28+
start_dt: str = "",
29+
end_dt: str = "",
30+
month: str | Month | list[str | Month] = "",
31+
pitchers_lookup: str | list[str] = "",
32+
batters_lookup: str | list[str] = "",
33+
team: str | MlbTeam | list[str | MlbTeam] = "",
34+
opponent: str | MlbTeam | list[str | MlbTeam] = "",
3535
debug: bool = False,
3636
) -> pd.DataFrame:
3737
"""
@@ -60,49 +60,50 @@ def statcast_search(
6060
validate_date_range(start_dt, end_dt)
6161

6262
params = {
63-
'all': 'true',
64-
'player_type': player_type,
65-
'hfSea': get_season_param_str(season),
66-
'hfGT': get_game_type_param_str(game_type),
67-
'game_date_gt': start_dt,
68-
'game_date_lt': end_dt,
69-
'hfMo': get_month_param_str(month),
70-
'hfTeam': get_team_param_str(team),
71-
'hfOpponent': get_team_param_str(opponent),
72-
'type': 'details',
63+
"all": "true",
64+
"player_type": player_type,
65+
"hfSea": get_season_param_str(season),
66+
"hfGT": get_game_type_param_str(game_type),
67+
"game_date_gt": start_dt,
68+
"game_date_lt": end_dt,
69+
"hfMo": get_month_param_str(month),
70+
"hfTeam": get_team_param_str(team),
71+
"hfOpponent": get_team_param_str(opponent),
72+
"type": "details",
7373
}
7474

7575
if pitchers_lookup:
76-
params['pitchers_lookup[]'] = pitchers_lookup
76+
params["pitchers_lookup[]"] = pitchers_lookup
7777

7878
if batters_lookup:
79-
params['batters_lookup[]'] = batters_lookup
79+
params["batters_lookup[]"] = batters_lookup
8080

81-
print('Starting Statcast Search')
82-
logger.debug(f'Params: {params}')
81+
print("Starting Statcast Search")
82+
logger.debug(f"Params: {params}")
8383
response = session.get(STATCAST_SEARCH_URL, params=params)
8484

8585
logger.debug(response.url)
86+
print(response.url)
8687

8788
if response.status_code == 200:
88-
print('Statcast Search Completed')
89+
print("Statcast Search Completed")
8990
csv_content = io.StringIO(response.text)
9091

9192
return pd.read_csv(csv_content)
9293
else:
9394
raise Exception(
94-
f'Failed to fetch data: {response.status_code} - {response.text}'
95+
f"Failed to fetch data: {response.status_code} - {response.text}"
9596
)
9697

9798

9899
def statcast_pitcher_search(
99100
pitchers_lookup: str | list[str],
100-
season: str | list[str] = '2024',
101+
season: str | list[str] = "2024",
101102
game_type: str | GameType | list[str | GameType] = GameType.REGULAR_SEASON,
102-
start_dt: str = '',
103-
end_dt: str = '',
104-
month: str | Month | list[str | Month] = '',
105-
opponent: str | MlbTeam | list[str | MlbTeam] = '',
103+
start_dt: str = "",
104+
end_dt: str = "",
105+
month: str | Month | list[str | Month] = "",
106+
opponent: str | MlbTeam | list[str | MlbTeam] = "",
106107
debug: bool = False,
107108
) -> pd.DataFrame:
108109
"""
@@ -122,31 +123,31 @@ def statcast_pitcher_search(
122123
"""
123124

124125
if not pitchers_lookup:
125-
raise ValueError('pitchers_lookup is required')
126+
raise ValueError("pitchers_lookup is required")
126127

127128
params = {
128-
'pitchers_lookup': pitchers_lookup,
129-
'season': season,
130-
'player_type': 'pitcher',
131-
'game_type': game_type,
132-
'start_dt': start_dt,
133-
'end_dt': end_dt,
134-
'month': month,
135-
'opponent': opponent,
136-
'debug': debug,
129+
"pitchers_lookup": pitchers_lookup,
130+
"season": season,
131+
"player_type": "pitcher",
132+
"game_type": game_type,
133+
"start_dt": start_dt,
134+
"end_dt": end_dt,
135+
"month": month,
136+
"opponent": opponent,
137+
"debug": debug,
137138
}
138139

139140
return statcast_search(**params)
140141

141142

142143
def statcast_batter_search(
143144
batters_lookup: str | list[str],
144-
season: str | list[str] = '2024',
145+
season: str | list[str] = "2024",
145146
game_type: str | GameType | list[str | GameType] = GameType.REGULAR_SEASON,
146-
start_dt: str = '',
147-
end_dt: str = '',
148-
month: str | Month | list[str | Month] = '',
149-
opponent: str | MlbTeam | list[str | MlbTeam] = '',
147+
start_dt: str = "",
148+
end_dt: str = "",
149+
month: str | Month | list[str | Month] = "",
150+
opponent: str | MlbTeam | list[str | MlbTeam] = "",
150151
debug: bool = False,
151152
) -> pd.DataFrame:
152153
"""
@@ -166,18 +167,18 @@ def statcast_batter_search(
166167
"""
167168

168169
if not batters_lookup:
169-
raise ValueError('batters_lookup is required')
170+
raise ValueError("batters_lookup is required")
170171

171172
params = {
172-
'batters_lookup': batters_lookup,
173-
'season': season,
174-
'player_type': 'batter',
175-
'game_type': game_type,
176-
'start_dt': start_dt,
177-
'end_dt': end_dt,
178-
'month': month,
179-
'opponent': opponent,
180-
'debug': debug,
173+
"batters_lookup": batters_lookup,
174+
"season": season,
175+
"player_type": "batter",
176+
"game_type": game_type,
177+
"start_dt": start_dt,
178+
"end_dt": end_dt,
179+
"month": month,
180+
"opponent": opponent,
181+
"debug": debug,
181182
}
182183

183184
return statcast_search(**params)

0 commit comments

Comments
 (0)