Skip to content

Commit 0543e7c

Browse files
committed
Merge branch 'decompose' into test
2 parents dddca65 + 47df258 commit 0543e7c

11 files changed

Lines changed: 425 additions & 537 deletions

File tree

scriptshifter/hooks/korean/data.yml

Lines changed: 0 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -467,57 +467,6 @@ fkr047:
467467
"f8": "l"
468468
"f0": ""
469469

470-
fkr050:
471-
"!": "SB01KQ"
472-
"\"": "SB02KQ"
473-
"#": "SB03KQ"
474-
"$": "SB04KQ"
475-
"%": "SB05KQ"
476-
"&": "SB06KQ"
477-
"'": "SB07KQ"
478-
"(": "SB08KQ"
479-
")": "SB09KQ"
480-
"*": "SB10KQ"
481-
"+": "SB11KQ"
482-
",": "SB12KQ"
483-
"-": "SB13KQ"
484-
".": "SB14KQ"
485-
"/": "SB15KQ"
486-
":": "SB16KQ"
487-
";": "SB17KQ"
488-
"<": "SB18KQ"
489-
"=": "SB19KQ"
490-
">": "SB20KQ"
491-
"?": "SB21KQ"
492-
"": "SB22KQ"
493-
"ǂ": "SB23KQ"
494-
"": "SB24KQ"
495-
"": "SB25KQ"
496-
"": "SB26KQ"
497-
"": "SB27KQ"
498-
"@": "SB28KQ"
499-
"[": "SB29KQ"
500-
"\\": "SB30KQ"
501-
"]": "SB31KQ"
502-
"^": "SB32KQ"
503-
"_": "SB33KQ"
504-
"`": "SB34KQ"
505-
"{": "SB35KQ"
506-
"|": "SB36KQ"
507-
"}": "SB37KQ"
508-
"~": "SB38KQ"
509-
"": "SB39KQ"
510-
"": "SB40KQ"
511-
"": "SB41KQ"
512-
"": "SB42KQ"
513-
"": "SB43KQ"
514-
"": "SB44KQ"
515-
"": "SB45KQ"
516-
"": "SB46KQ"
517-
"˜": "SB47KQ"
518-
"©": "SB48KQ"
519-
"·": "SB49KQ"
520-
521470
fkr052:
522471
rule_nu:
523472
" 무의식역 ": " 무의식🜹역 "
@@ -2764,57 +2713,6 @@ fkr065:
27642713
" hanchaŏ ": " Hanchaŏ "
27652714
" ch'ŏlchong ": " Ch'ŏlchong "
27662715

2767-
fkr066:
2768-
"SB01KQ": "!"
2769-
"SB02KQ": "\""
2770-
"SB03KQ": "#"
2771-
"SB04KQ": "$"
2772-
"SB05KQ": "%"
2773-
"SB06KQ": "&"
2774-
"SB07KQ": "'"
2775-
"SB08KQ": "("
2776-
"SB09KQ": ")"
2777-
"SB10KQ": "*"
2778-
"SB11KQ": "+"
2779-
"SB12KQ": ","
2780-
"SB13KQ": "-"
2781-
"SB14KQ": "."
2782-
"SB15KQ": "/"
2783-
"SB16KQ": ":"
2784-
"SB17KQ": ";"
2785-
"SB18KQ": "<"
2786-
"SB19KQ": "="
2787-
"SB20KQ": ">"
2788-
"SB21KQ": "?"
2789-
"SB22KQ": ","
2790-
"SB23KQ": "ǂ"
2791-
"SB24KQ": ""
2792-
"SB25KQ": ""
2793-
"SB26KQ": ""
2794-
"SB27KQ": ""
2795-
"SB28KQ": "@"
2796-
"SB29KQ": "["
2797-
"SB30KQ": "\\"
2798-
"SB31KQ": "]"
2799-
"SB32KQ": "^"
2800-
"SB33KQ": "_"
2801-
"SB34KQ": "`"
2802-
"SB35KQ": "{"
2803-
"SB36KQ": "|"
2804-
"SB37KQ": "}"
2805-
"SB38KQ": "~"
2806-
"SB39KQ": ""
2807-
"SB40KQ": ""
2808-
"SB41KQ": ""
2809-
"SB42KQ": ""
2810-
"SB43KQ": ""
2811-
"SB44KQ": ""
2812-
"SB45KQ": ""
2813-
"SB46KQ": ""
2814-
"SB47KQ": "˜"
2815-
"SB48KQ": "©"
2816-
"SB49KQ": ","
2817-
28182716
fkr069:
28192717
"학여울역": "학여울력"
28202718
"값어치": "가버치"

scriptshifter/tables/data/arabic.yml

Lines changed: 53 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
---
55
general:
66
name: Arabic
7-
description: Arabic R2S using a conversion table and S2R using a 3rd party library.
7+
description: >
8+
Arabic R2S using a conversion table and S2R using a 3rd party library.
89
case_sensitive: false
910

1011
parents:
@@ -17,7 +18,8 @@ roman_to_script:
1718
# Original table by David Bucknum
1819
# Last updated 25 January 2019
1920
# Modified by WK with testing by Arabic Cat Staff LOC-CAIRO
20-
# Additional info from R. Vassie, [n.d.] "Marrying the Arabic and Latin Scripts Conceptually"
21+
# Additional info from R. Vassie, [n.d.] "Marrying the Arabic and Latin
22+
# Scripts Conceptually"
2123

2224
# Punctuation marks:
2325
"*": "\u066D"
@@ -65,19 +67,19 @@ roman_to_script:
6567

6668
# "sh[dot below] as in "Ishaq"
6769

68-
"%sh\u0323%": "\u0633\u062D"
70+
"sh\u0323": "\u0633\u062D"
6971

7072
# "s[prime]h" combos
7173

72-
"%s\u02B9h%": "\u0633\u0647"
74+
"s\u02B9h": "\u0633\u0647"
7375

7476
# "th[dot below]"
7577

76-
"%th\u0323%": "\u062A\u062D"
78+
"th\u0323": "\u062A\u062D"
7779

78-
# dh[dot under]
80+
# dh[dot under]
7981

80-
"%dh\u0323%": "\u062F\u062D"
82+
"dh\u0323": "\u062F\u062D"
8183

8284
# La-hu
8385

@@ -95,7 +97,9 @@ roman_to_script:
9597
"mi\u02BEat": "\u0645\u0627\u0626\u0629"
9698
"mi\u02BCat": "\u0645\u0627\u0626\u0629"
9799

98-
# Numbers (I have set these to Hindi numbers. Note that Persian and Urdu will technically use \u06F0-06F9. This needs further discussion with PSD as RLIN21 used Hindi numbers, Connexion and Voyager does not.)
100+
# Numbers (I have set these to Hindi numbers. Note that Persian and Urdu
101+
# will technically use \u06F0-06F9. This needs further discussion with PSD
102+
# as RLIN21 used Hindi numbers, Connexion and Voyager does not.)
99103

100104
# Edition statements with Latin number
101105
"al-T\u0323ab\u02BBah 1": "\u0627\u0644\u0637\u0628\u0639\u0629 1"
@@ -131,15 +135,15 @@ roman_to_script:
131135
"ka-": "\u0643"
132136

133137
# Vowels and vowel/consonant combinations
134-
"%ah": "\u0629"
135-
"%at": "\u0629"
138+
"ah%": "\u0629"
139+
"at%": "\u0629"
136140

137-
#tanwin
138-
"%an": "\u0627"
141+
# tanwin
142+
"an%": "\u0627"
139143

140144
# ayn-alif combo
141-
"%\u02BBa\u0304\u02BE": "\u0639\u0627\u0621"
142-
"%\u02BBa\u0304\u02BC": "\u0639\u0627\u0621"
145+
"\u02BBa\u0304\u02BE%": "\u0639\u0627\u0621"
146+
"\u02BBa\u0304\u02BC%": "\u0639\u0627\u0621"
143147

144148
"\u02BBA\u0304": "\u0639\u0627"
145149
"\u02BBa\u0304": "\u0639\u0627"
@@ -152,24 +156,24 @@ roman_to_script:
152156
"\u02BBU": "\u0639"
153157
"\u02BBu": "\u0639"
154158

155-
"\u02BBA%": "\u0639"
156-
#"\u02BBa%": "\u0639"
159+
"%\u02BBA": "\u0639"
160+
# "%\u02BBa": "\u0639"
157161

158162
# alif and hamzas for all occasions
159163

160-
# truncation necessary? It seems to work fine with.
164+
# truncation necessary? It seems to work fine with.
161165

162-
"%i\u0304\u02BEah": "\u064A\u0626\u0629"
163-
"%i\u0304\u02BCah": "\u064A\u0626\u0629"
166+
"i\u0304\u02BEah%": "\u064A\u0626\u0629"
167+
"i\u0304\u02BCah%": "\u064A\u0626\u0629"
164168

165-
"%i\u0304\u02BEat": "\u064A\u0626\u0629"
166-
"%i\u0304\u02BCat": "\u064A\u0626\u0629"
169+
"i\u0304\u02BEat%": "\u064A\u0626\u0629"
170+
"i\u0304\u02BCat%": "\u064A\u0626\u0629"
167171

168-
"%i\u02BEa\u0304": "\u0626\u0627"
169-
"%i\u02BCa\u0304": "\u0626\u0627"
172+
"i\u02BEa\u0304%": "\u0626\u0627"
173+
"i\u02BCa\u0304%": "\u0626\u0627"
170174

171-
"%i\u02BE": "\u0626"
172-
"%i\u02BC": "\u0626"
175+
"i\u02BE%": "\u0626"
176+
"i\u02BC%": "\u0626"
173177
"a\u0304\u02BEa\u0304": "\u0627\u0621\u0627"
174178
"a\u0304\u02BCa\u0304": "\u0627\u0621\u0627"
175179

@@ -197,32 +201,34 @@ roman_to_script:
197201
"a\u0304\u02BEi": "\u0627\u0626"
198202
"a\u0304\u02BC": "\u0627\u0621"
199203
"a\u0304\u02BE": "\u0627\u0621"
200-
"A\u0304%": "\u0622"
201-
"a\u0304%": "\u0622"
204+
"%A\u0304": "\u0622"
205+
"%a\u0304": "\u0622"
202206
"A\u0304": "\u0627"
203207
"a\u0304": "\u0627"
204208

205-
# These next two lines were intended to convert to alif-ayn when it is at the beginning of a word, definite or indefinine (i.e. al-a[ayn]ma[macron]l or [space]a[ayn]ma[macron]l"
206-
"A\u02BB%": "\u0623\u0639"
207-
"a\u02BB%": "\u0623\u0639"
209+
# These next two lines were intended to convert to alif-ayn when it is at
210+
# # the beginning of a word, definite or indefinine (i.e.
211+
# al-a[ayn]ma[macron]l or [space]a[ayn]ma[macron]l"
212+
"%A\u02BB": "\u0623\u0639"
213+
"%a\u02BB": "\u0623\u0639"
208214
"a\u02BB": "\u0639"
209215
"A\u0301": "\u0649"
210216
"a\u0301": "\u0649"
211217

212218
"ayy": "\u064A"
213-
"A%": "\u0623"
214-
"a%": "\u0627"
219+
"%A": "\u0623"
220+
"%a": "\u0627"
215221
"A": "\u0623"
216222
"a": ""
217223

218224
# I - Capital I at beginning of word is usually alif hamzah-below.
219225

220-
"%i\u0304": "\u064A"
226+
"i\u0304%": "\u064A"
221227
"i\u0304y": "\u064A"
222228
"iy": "\u064A"
223-
"I\u0304%": "\u0625\u064A"
229+
"%I\u0304": "\u0625\u064A"
224230
"i\u0304": "\u064A"
225-
"\u02BBI%": "\u0639"
231+
"%\u02BBI": "\u0639"
226232

227233
# "i\u02BB": "\u0625\u0639"
228234

@@ -231,30 +237,30 @@ roman_to_script:
231237
"i\u02BE": "\u0626"
232238
"i\u02BC": "\u0627\u0626"
233239

234-
"I%": "\u0625"
235-
"i%": "\u0625"
240+
"%I": "\u0625"
241+
"%i": "\u0625"
236242
"I": "\u0625"
237243
"i": ""
238244

239245
# U
240246

241247
"u\u0304\u02BE": "\u0624"
242248
"u\u0304\u02BC": "\u0624"
243-
"U\u0304w%": "\u0623\u0648"
244-
"u\u0304w%": "\u0623\u0648"
245-
"U\u0304%": "\u0623\u0648"
246-
"u\u0304%": "\u0623\u0648"
249+
"%U\u0304w": "\u0623\u0648"
250+
"%u\u0304w": "\u0623\u0648"
251+
"%U\u0304": "\u0623\u0648"
252+
"%u\u0304": "\u0623\u0648"
247253
"u\u0304w": "\u0648"
248254
"u\u0304": "\u0648"
249255
"u\u02BE": "\u0624"
250256
"u\u02BC": "\u0624"
251257

252-
"U%": "\u0623"
253-
"u%": "\u0623"
258+
"%U": "\u0623"
259+
"%u": "\u0623"
254260
"U": "\u0623"
255261
"u": ""
256262

257-
# Consonants, with tashdid added
263+
# Consonants, with tashdid added
258264

259265
"B": "\u0628"
260266
"bb": "\u0628"
@@ -351,8 +357,8 @@ roman_to_script:
351357
# hamza - not romanized
352358
# "\u0621"
353359
# hamza (alone in final position)
354-
"%\u02BE": "\u0621"
355-
"%\u02BC": "\u0621"
360+
"\u02BE%": "\u0621"
361+
"\u02BC%": "\u0621"
356362

357363
# Do not know what, if anything, is needed here:
358364
# tatweel:
@@ -378,6 +384,7 @@ roman_to_script:
378384
# alef wasla
379385
# "\u0671"
380386

387+
381388
script_to_roman:
382389
hooks:
383390
post_config:

scriptshifter/tables/data/bulgarian.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ script_to_roman:
3838
"\u044C": ""
3939
"\u042C": ""
4040
"\u044A": ""
41-
"\u042A%": "u\u0306"
42-
"\u042A": ""
41+
"\u042A%": "" # Final
42+
"\u042A": "u\u0306"
4343
"\u0413": "G"
4444
"\u0433": "g"
4545
"\u0416": "Zh"

0 commit comments

Comments
 (0)