atcoder_language_visualizer/update_data.py at main · Inazuma110/atcoder_language_visualizer · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import pandas as pd
import requests
import json
from tqdm import tqdm

dataset_url = 'https://s3-ap-northeast-1.amazonaws.com/kenkoooo/submissions.csv.gz'
r = requests.get(dataset_url)
dataset_fname = './submissions.csv.gz'
with open(dataset_fname , 'wb') as f:
    f.write(r.content)
submissions_data = pd.read_csv(dataset_fname)

contest_url = 'https://kenkoooo.com/atcoder/resources/contests.json'
data = requests.get(contest_url)
data = json.loads(data.text)
contest_limit = dict()

for contest in data:
    name = contest['id']
    start = int(contest['start_epoch_second'])
    end = start + int(contest['duration_second'])
    contest_limit[name] = end

data = requests.get(contest_url)
data = json.loads(data.text)

problem_url = 'https://kenkoooo.com/atcoder/resources/merged-problems.json'
data = requests.get(problem_url)
data = json.loads(data.text)
prob2contest = dict()

for prob in tqdm(data):
    name = prob['id']
    contest_name = prob['contest_id']
    prob2contest[name] = contest_name

    all_subs = submissions_data[submissions_data['problem_id'] == name]
    all_subs = all_subs[all_subs['result'] == 'AC']
    all_subs['epoch_second'].astype(int)
    if all_subs.empty:
        continue

    all_langs = set(all_subs['language'])
    all_langs_hist = dict()
    for lang in all_langs:
        all_langs_hist[lang] = len(all_subs[all_subs['language'] == lang])

    all_path = f'./json_data/{name}_all.json'
    with open(all_path, 'w') as f:
        json.dump(all_langs_hist, f)

    end_time = int(contest_limit[contest_name])
    contest_subs = all_subs[all_subs['epoch_second'] <= end_time]
    if contest_subs.empty:
        continue
    print(contest_subs)

    contest_langs = set(contest_subs['language'])
    contest_langs_hist = dict()
    for lang in contest_langs:
        contest_langs_hist[lang] = len(contest_subs[contest_subs['language'] == lang])

    contest_path = f'./json_data/{name}_contest.json'
    with open(contest_path, 'w') as f:
        json.dump(contest_langs_hist, f)