-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathupdate_data.py
More file actions
65 lines (53 loc) · 1.98 KB
/
update_data.py
File metadata and controls
65 lines (53 loc) · 1.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import pandas as pd
import requests
import json
from tqdm import tqdm
dataset_url = 'https://s3-ap-northeast-1.amazonaws.com/kenkoooo/submissions.csv.gz'
r = requests.get(dataset_url)
dataset_fname = './submissions.csv.gz'
with open(dataset_fname , 'wb') as f:
f.write(r.content)
submissions_data = pd.read_csv(dataset_fname)
contest_url = 'https://kenkoooo.com/atcoder/resources/contests.json'
data = requests.get(contest_url)
data = json.loads(data.text)
contest_limit = dict()
for contest in data:
name = contest['id']
start = int(contest['start_epoch_second'])
end = start + int(contest['duration_second'])
contest_limit[name] = end
data = requests.get(contest_url)
data = json.loads(data.text)
problem_url = 'https://kenkoooo.com/atcoder/resources/merged-problems.json'
data = requests.get(problem_url)
data = json.loads(data.text)
prob2contest = dict()
for prob in tqdm(data):
name = prob['id']
contest_name = prob['contest_id']
prob2contest[name] = contest_name
all_subs = submissions_data[submissions_data['problem_id'] == name]
all_subs = all_subs[all_subs['result'] == 'AC']
all_subs['epoch_second'].astype(int)
if all_subs.empty:
continue
all_langs = set(all_subs['language'])
all_langs_hist = dict()
for lang in all_langs:
all_langs_hist[lang] = len(all_subs[all_subs['language'] == lang])
all_path = f'./json_data/{name}_all.json'
with open(all_path, 'w') as f:
json.dump(all_langs_hist, f)
end_time = int(contest_limit[contest_name])
contest_subs = all_subs[all_subs['epoch_second'] <= end_time]
if contest_subs.empty:
continue
print(contest_subs)
contest_langs = set(contest_subs['language'])
contest_langs_hist = dict()
for lang in contest_langs:
contest_langs_hist[lang] = len(contest_subs[contest_subs['language'] == lang])
contest_path = f'./json_data/{name}_contest.json'
with open(contest_path, 'w') as f:
json.dump(contest_langs_hist, f)