1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,15 @@ |
1 |
+# NHL stats |
|
2 |
+ |
|
3 |
+This repo will contain scripts for dumping full NHL game feeds. |
|
4 |
+ |
|
5 |
+## API URLs |
|
6 |
+ |
|
7 |
+This page contains docs on the API: https://gitlab.com/dword4/nhlapi/-/blob/master/stats-api.md |
|
8 |
+ |
|
9 |
+The main API endpoint is https://statsapi.web.nhl.com/api/v1 |
|
10 |
+ |
|
11 |
+The important endpoints are /game/ID/feed/live which contains the full game info, including all plays, |
|
12 |
+and /game/ID/linescore which contains aggregate statistics for each player during the match. |
|
13 |
+ |
|
14 |
+Game discovery is via /schedule query strings are used to get schedule data between certain dates |
|
15 |
+or for a whole season with: ?season=20172018 |
0 | 16 |
new file mode 100755 |
... | ... |
@@ -0,0 +1,77 @@ |
1 |
+#!/usr/bin/env python |
|
2 |
+"""Download full game feeds.""" |
|
3 |
+ |
|
4 |
+import argparse |
|
5 |
+import asyncio |
|
6 |
+from functools import partial |
|
7 |
+from pathlib import Path |
|
8 |
+import sys |
|
9 |
+ |
|
10 |
+import backoff |
|
11 |
+import httpx |
|
12 |
+import msgpack |
|
13 |
+import tqdm |
|
14 |
+ |
|
15 |
+api_url = "https://statsapi.web.nhl.com/api/v1{}".format |
|
16 |
+ |
|
17 |
+ |
|
18 |
+def get_game_guids(season: int) -> list[int]: |
|
19 |
+ path = Path(f"seasons/{season}{season+1}.msgpack") |
|
20 |
+ if not path.exists(): |
|
21 |
+ raise ValueError(f"No game data for {season} season") |
|
22 |
+ |
|
23 |
+ with path.open("rb") as f: |
|
24 |
+ season_data = msgpack.load(f) |
|
25 |
+ |
|
26 |
+ # TODO: replace with JSONPath query |
|
27 |
+ return [ |
|
28 |
+ g["gamePk"] |
|
29 |
+ for d in season_data["dates"] |
|
30 |
+ for g in d["games"] |
|
31 |
+ if g["status"]["abstractGameState"] == "Final" |
|
32 |
+ ] |
|
33 |
+ |
|
34 |
+ |
|
35 |
+@backoff.on_exception( |
|
36 |
+ backoff.expo, |
|
37 |
+ (httpx.ConnectTimeout, httpx.ConnectError), |
|
38 |
+ max_time=120, |
|
39 |
+) |
|
40 |
+async def get_game_feed(client: httpx.Client, guid: int) -> dict: |
|
41 |
+ resp = await client.get(api_url(f"/game/{guid}/feed/live")) |
|
42 |
+ return resp.json() |
|
43 |
+ |
|
44 |
+ |
|
45 |
+def feed_downloaded(guid: int) -> bool: |
|
46 |
+ return Path(f"games/{guid}.msgpack").exists() |
|
47 |
+ |
|
48 |
+ |
|
49 |
+def save_game_feed(feed): |
|
50 |
+ guid = feed["gamePk"] |
|
51 |
+ with open(f"games/{guid}.msgpack", "wb") as f: |
|
52 |
+ msgpack.dump(feed, f) |
|
53 |
+ |
|
54 |
+ |
|
55 |
+async def main(season: int, force: bool = False): |
|
56 |
+ guids = [ |
|
57 |
+ guid for guid in get_game_guids(season=season) if not feed_downloaded(guid) |
|
58 |
+ ] |
|
59 |
+ if not guids and not force: |
|
60 |
+ print(f"Games for {season} season already downloaded", file=sys.stderr) |
|
61 |
+ return |
|
62 |
+ |
|
63 |
+ c = httpx.AsyncClient( |
|
64 |
+ limits=httpx.Limits(max_keepalive_connections=5, max_connections=10) |
|
65 |
+ ) |
|
66 |
+ game_feeds = map(partial(get_game_feed, c), guids) |
|
67 |
+ monitor = partial(tqdm.tqdm, desc=f"{season} season games", total=len(guids)) |
|
68 |
+ async with c: |
|
69 |
+ for feed_coro in monitor(asyncio.as_completed(game_feeds)): |
|
70 |
+ save_game_feed(await feed_coro) |
|
71 |
+ |
|
72 |
+ |
|
73 |
+if __name__ == "__main__": |
|
74 |
+ parser = argparse.ArgumentParser(description="Download NHL game feeds") |
|
75 |
+ parser.add_argument("season", type=int, help="Which season to download") |
|
76 |
+ args = parser.parse_args() |
|
77 |
+ asyncio.run(main(args.season)) |
0 | 78 |
new file mode 100755 |
... | ... |
@@ -0,0 +1,19 @@ |
1 |
+#!/bin/bash |
|
2 |
+ |
|
3 |
+let season_start="$1" |
|
4 |
+ |
|
5 |
+if [[ -z $season_start ]]; then |
|
6 |
+ echo "Expected Season (e.g. 2008), but none provided" >&2 |
|
7 |
+ exit 1 |
|
8 |
+fi |
|
9 |
+ |
|
10 |
+# 2008 -> 20082009 as required by NHL API |
|
11 |
+let season=$season_start$season_start+1 |
|
12 |
+url="https://statsapi.web.nhl.com/api/v1/schedule?season=$season" |
|
13 |
+outfile="$season.json" |
|
14 |
+ |
|
15 |
+echo |
|
16 |
+echo "Hitting $url and saving to $season.msgpack" |
|
17 |
+echo |
|
18 |
+ |
|
19 |
+curl $url | ./json2msgpack > seasons/$season.msgpack |