-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhanja.py
More file actions
33 lines (25 loc) · 880 Bytes
/
hanja.py
File metadata and controls
33 lines (25 loc) · 880 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
"""
Find the hanja characters that appear most often in the sino-korean words
in my Korean anki deck.
"""
from collections import defaultdict
from dictionary import load_hanja, load_vocab
def format_example(note, hanja):
return f'{hanja}: {note.value} ({note.meaning})'
studied_hanja = load_hanja()
notes = load_vocab()
words_by_hanja_character = defaultdict(list)
for note in notes:
for hanja in note.extra['hanja']:
for hanja_character in hanja:
if studied_hanja.lookup(hanja_character):
continue
words_by_hanja_character[hanja_character].append(
format_example(note, hanja)
)
sorted_characters = sorted(words_by_hanja_character.items(), key=lambda item: len(item[1]), reverse=True)
for character_item in sorted_characters[:10]:
character, examples = character_item
print(character)
print('\n'.join(examples))
print('')