Skip to content

Commit 31bee96

Browse files
committed
Decouple conf files from index entries.
1 parent 1dab71c commit 31bee96

2 files changed

Lines changed: 33 additions & 40 deletions

File tree

scriptshifter/tables/__init__.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@
6666

6767
logger = logging.getLogger(__name__)
6868

69+
tbl_index = None # Module-level index of all scripts.
70+
6971

7072
class Token(str):
7173
"""
@@ -165,11 +167,12 @@ def init_db():
165167
conn.executescript(fh.read())
166168

167169
# Populate tables.
170+
global tbl_index
168171
with open(path.join(path.dirname(TABLE_DIR), "index.yml")) as fh:
169-
tlist = load(fh, Loader=Loader)
172+
tbl_index = load(fh, Loader=Loader)
170173
try:
171174
with conn:
172-
for tname, tdata in tlist.items():
175+
for tname, tdata in tbl_index.items():
173176
populate_table(conn, tname, tdata)
174177

175178
# If the DB already exists, it will be overwritten ONLY on success at
@@ -340,9 +343,14 @@ def load_table(tname):
340343
the language & script metadata and parsing rules.
341344
"""
342345

343-
fname = path.join(TABLE_DIR, tname + ".yml")
346+
try:
347+
fname = path.join(TABLE_DIR, tbl_index[tname]["conf"])
348+
except KeyError:
349+
# If no `conf` key is provided, use the conventional table name + .yml.
350+
fname = path.join(TABLE_DIR, tname + ".yml")
344351
if not access(fname, R_OK):
345-
raise ValueError(f"No transliteration table for {tname}!")
352+
raise ValueError(
353+
f"No transliteration table `{fname}` found for {tname}!")
346354

347355
with open(fname) as fh:
348356
tdata = load(fh, Loader=Loader)

scriptshifter/tables/index.yml

Lines changed: 21 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -22,26 +22,12 @@ arabic:
2222
name: Arabic
2323
amharic:
2424
marc_code: amh
25-
name: Amharic
25+
name: Amharic
2626
armenian:
2727
marc_code: arm
2828
name: Armenian
29-
# asian_cyrillic:
30-
# description: >
31-
# Multi-purpose transliteration for non-Slavic Cyrillic scripts: Abaza,
32-
# Abkhaz, Adygei, Aisor, Altai, Avar, Azeri, Balkar, Bashkir, Buryat,
33-
# Chechen, Chukchi, Chuvash, Dargwa, Dungan, Eskimo, Even, Evenki, Gagauz,
34-
# Ingush, Inuit, Kabardian, Kalmyk, Karachay, Karachay-Balkar, Karakalpak,
35-
# Karelian, Khakass, Khanty, Komi, Komi-Permyak, Koryak, Kumyk, Lak, Lapp,
36-
# Lezghian, Lithuanian, Mansi, Mari, Moldovan, Molodstov, Mordvin, Nanai,
37-
# Nenets, Nivkh, Nogai, Ossetic, Permyak, Romanian, Romany, Selkup, Shor,
38-
# Tabasaran, Tat, Tuva, Udekhe, Udmurt, Yakut.
39-
# marc_code: >
40-
# abk, ady, alt, ava, bak, che, chv, dar, ale, esk, kbd, xal, krc, kaa,
41-
# krl, kom, kum, lez, lit, chm, nog, oss, rum, rom, sel, udm, sah
42-
# name: Asian Cyrillic
4329
assamese:
44-
marc_code: asm
30+
marc_code: asm
4531
name: assamese
4632
azerbaijani_cyrillic:
4733
marc_code: aze
@@ -60,7 +46,7 @@ bihari_devanagari:
6046
name: Bihari (Devanagari)
6147
braj_devanagari:
6248
marc_code: bra
63-
name: Braj (Devanagari)
49+
name: Braj (Devanagari)
6450
bulgarian:
6551
marc_code: bul
6652
name: Bulgarian
@@ -82,27 +68,28 @@ chuvash_cyrillic:
8268
marc_code: chv
8369
name: Chuvash (Cyrillic)
8470
cyrillic_generic:
85-
description: 'Multi-purpose transliteration for most languages that use the Cyrillic script:
86-
Abaza, Abkhaz, Adygei, Aisor, Altai, Avar, Azeri, Balkar, Bashkir, Belarusian, Bulgarian,
87-
Buryat, Chechen, Chukchi, Chuvash, Dargwa, Dungan, Eskimo, Even, Evenki, Gagauz, Ingush,
88-
Inuit, Kabardian, Kalmyk, Karachay, Karachay-Balkar, Karakalpak, Karelian, Khakass, Khanty,
89-
Komi, Komi-Permyak, Koryak, Kumyk, Lak, Lapp, Lezghian, Lithuanian, Macedonian, Mansi, Mari,
90-
Moldovan, Molodstov, Mordvin, Nanai, Nenets, Nivkh, Nogai, Ossetic, Permyak, Romanian, Romany,
91-
Russian, Selkup, Serbian, Shor, Tabasaran, Tat, Tuva, Udekhe, Udmurt, Ukrainian, Yakut.'
92-
marc_code: abk, ady, alt, ava, bak, bel, bul, che, chm, chv, dar, ale, esk, kbd, xal, krc, kaa,
93-
krl, kom, kum, lez, lit, mac, nog, oss, rum, rom, sah, sel, srp, udm, ukr
71+
description: >
72+
Multi-purpose transliteration for most languages that use the
73+
Cyrillic script: Abaza, Abkhaz, Adygei, Aisor, Altai, Avar, Azeri, Balkar,
74+
Bashkir, Belarusian, Bulgarian, Buryat, Chechen, Chukchi, Chuvash, Dargwa,
75+
Dungan, Eskimo, Even, Evenki, Gagauz, Ingush, Inuit, Kabardian, Kalmyk,
76+
Karachay, Karachay-Balkar, Karakalpak, Karelian, Khakass, Khanty, Komi,
77+
Komi-Permyak, Koryak, Kumyk, Lak, Lapp, Lezghian, Lithuanian, Macedonian,
78+
Mansi, Mari, Moldovan, Molodstov, Mordvin, Nanai, Nenets, Nivkh, Nogai,
79+
Ossetic, Permyak, Romanian, Romany, Russian, Selkup, Serbian, Shor,
80+
Tabasaran, Tat, Tuva, Udekhe, Udmurt, Ukrainian, Yakut.'
81+
marc_code: abk, ady, alt, ava, bak, bel, bul, che, chm, chv, dar, ale, esk,
82+
kbd, xal, krc, kaa, krl, kom, kum, lez, lit, mac, nog, oss, rum, rom, sah,
83+
sel, srp, udm, ukr
9484
name: Cyrillic (Generic)
95-
devanagari:
96-
marc_code: hin, san
97-
name: Devanagari
9885
divehi_thaana:
9986
marc_code: div
10087
name: Divehi (Thaana)
10188
dogri_devanagari:
10289
marc_code: doi
10390
name: Dogri (Devanagari)
10491
dungan_cyrillic:
105-
marc_code: sit
92+
marc_code: sit
10693
name: Dungan (Cyrillic)
10794
ethiopic:
10895
marc_code: amh, eth
@@ -159,7 +146,7 @@ khakass_cyrillic:
159146
marc_code: tut
160147
name: Khakass (Cyrillic)
161148
khanty_cyrillic:
162-
marc_code: fiu
149+
marc_code: fiu
163150
name: Khanty (Cyrillic)
164151
khmer:
165152
marc_code: khm
@@ -194,7 +181,7 @@ macedonian:
194181
name: Macedonian
195182
maithili_devanagari:
196183
marc_code: mai
197-
name: Maithili (Devanagari)
184+
name: Maithili (Devanagari)
198185
malayalam:
199186
marc_code: mal
200187
name: Malayalam
@@ -233,7 +220,7 @@ ossetic_cyrillic:
233220
name: Ossetic (Cyrillic)
234221
pahari_devanagari:
235222
marc_code: him
236-
name: Pahari (Devanagari)
223+
name: Pahari (Devanagari)
237224
pali:
238225
marc_code: pli
239226
name: Pali
@@ -306,7 +293,7 @@ tibetan:
306293
name: Tibetan
307294
tigrinya:
308295
marc_code: tir
309-
name: Tigrinya
296+
name: Tigrinya
310297
tod_mongolian:
311298
marc_code: xal
312299
name: Tod Mongolian
@@ -343,5 +330,3 @@ yiddish:
343330
yuit_cyrillic:
344331
marc_code: ypk
345332
name: Yuit (Cyrillic)
346-
347-

0 commit comments

Comments
 (0)