-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcompress-dict.py
More file actions
executable file
·43 lines (28 loc) · 901 Bytes
/
compress-dict.py
File metadata and controls
executable file
·43 lines (28 loc) · 901 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/usr/bin/env python3
# Combines the JSON dictionary entries into four letter files which contain all
# the words that begin with those four letters.
import os
import json
import glob
import gzip
def get_group_name(word):
return word[0] if len(word) < 4 else word[:4]
def save(name, entries):
path = 'comp/%s.json.gz' % name
with gzip.open(path, 'wt', encoding = 'utf-8') as f:
json.dump(entries, f, ensure_ascii = False)
words = []
last = None
entries = {}
if not os.path.exists('comp'): os.mkdir('comp')
for path in glob.glob('dict/*.json'):
words.append(path[5:-5])
for word in sorted(words, key = get_group_name):
with open('dict/%s.json' % word, 'r') as f: data = json.load(f)
group = get_group_name(word)
if group != last and last:
save(last, entries)
entries = {}
entries[word.replace('_', ' ')] = data
last = group
if entries: save(group, entries)