Skip to content

Commit 9e10c36

Browse files
authored
Merge pull request #262 from lcnetdev/test
Test
2 parents f92ab8d + a904297 commit 9e10c36

11 files changed

Lines changed: 2669 additions & 484 deletions

File tree

.github/workflows/push-app-image.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ jobs:
2424
uses: actions/checkout@v4
2525
with:
2626
submodules: recursive
27+
fetch-tags: true
2728

2829
- name: update version info
2930
run: |
30-
git fetch --tags
3131
git describe --tags --always >| VERSION
3232
git rev-parse HEAD >> VERSION
3333

.github/workflows/push-test-image.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@ jobs:
1717
uses: actions/checkout@v4
1818
with:
1919
submodules: recursive
20+
fetch-tags: true
2021

2122
- name: update version info
2223
run: |
23-
git fetch --tags
2424
git describe --tags --always >| VERSION
2525
git rev-parse HEAD >> VERSION
2626

.gitmodules

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
[submodule "ext/arabic_rom"]
22
path = ext/arabic_rom
33
url = https://github.com/fadhleryani/Arabic_ALA-LC_Romanization.git
4+
branch = main

ext/arabic_rom

scriptshifter/hooks/korean/romanizer.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
from csv import reader
2727
from os import path
28+
from unicodedata import normalize
2829

2930
from scriptshifter.exceptions import BREAK
3031
from scriptshifter.hooks.korean import KCONF
@@ -92,6 +93,8 @@ def s2r_names_post_config(ctx):
9293
def _romanize_nonames(src, options):
9394
""" Main Romanization function for non-name strings. """
9495

96+
# Normalize to precomposed characters.
97+
src = normalize("NFC", src)
9598
# FKR038: Convert Chinese characters to Hangul
9699
if options.get("hancha", True):
97100
kor = _hancha2hangul(_marc8_hancha(src))
@@ -142,6 +145,8 @@ def _romanize_names(src, options):
142145
"""
143146
rom_ls = []
144147
warnings = []
148+
# Normalize to precomposed characters.
149+
src = normalize("NFC", src)
145150

146151
if "," in src and "·" in src:
147152
warnings.append(
@@ -386,9 +391,10 @@ def _romanize_oclc_auto(kor):
386391

387392
# FKR068: Exceptions, Exceptions to initial sound law, Proper names
388393
def _kor_rom(kor):
389-
# Only convert string if it contains CJK (i.e. do not change if already romanized)
390-
# \u3000 is the ideographic space, the lowest codepoint in the Unicode CJK range
391-
if max(kor) < '\u3000':
394+
# Only convert string if it contains CJK (i.e. do not change if already
395+
# romanized) \u3000 is the ideographic space, the lowest codepoint in the
396+
# Unicode CJK range
397+
if max(kor) < '\u3000':
392398
return kor
393399

394400
kor = re.sub(r"\s{2,}", " ", kor.strip())
Lines changed: 4 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
1-
# Chinese numerals map.
2-
#
3-
# All other Chinese mappings are kept in _chinese_base.yml. This mapping only
4-
# adds an overlay for parsing numerals and Scriptshifter-specific features.
5-
61
---
72
general:
83
name: Chinese
4+
description: >
5+
Chinese transliteration table that does not convert Chinese numerals to
6+
Indo-Arabic numerals.
97
parents:
108
- _chinese_base
119
case_sensitive: false
@@ -29,39 +27,7 @@ script_to_roman:
2927

3028
hooks:
3129
pre_assembly:
32-
-
33-
- chinese.parse_numerals_pre_assembly
3430
-
3531
- chinese.person_name_pre_assembly
3632

37-
map:
38-
"": "ling#0 "
39-
"": "ling#0 "
40-
"": "yi#1 "
41-
"": "er#2 "
42-
"": "liang#2 "
43-
"": "liang#2 "
44-
"": "san#3 "
45-
"": "si#4 "
46-
"": "wu#5 "
47-
"": "liu#6 "
48-
"": "qi#7 "
49-
"": "ba#8 "
50-
"": "jiu#9 "
51-
"": "shi#10 "
52-
"": "gong#20 "
53-
"廿": "nian#20 "
54-
"": "sa#30 "
55-
"": "xi#40 "
56-
"": "bai#100 "
57-
"": "qian#1000 "
58-
"": "wan#10000 "
59-
"": "wan#10000 "
60-
"亿": "yi#100000000 "
61-
"": "yi#100000000 "
62-
"": "ji# "
63-
"": "zhi# "
64-
"": "nian# "
65-
"": "yue# "
66-
"": "ri# "
67-
"": "di# "
33+
map: {}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# Chinese numerals map.
2+
#
3+
# All other Chinese mappings are kept in _chinese_base.yml. This mapping only
4+
# adds an overlay for parsing numerals and Scriptshifter-specific features.
5+
6+
---
7+
general:
8+
name: Chinese (numerals transliteration)
9+
description: >
10+
Chinese transliteration table that includes romanization of Chinese
11+
numerals.
12+
parents:
13+
- _chinese_base
14+
case_sensitive: false
15+
16+
options:
17+
- id: marc_field
18+
label: MARC field
19+
description: >
20+
Romanize according to a specific MARC field format. If indicating a
21+
subfield, append it to the numeric field value , e.g. \'245n\'.
22+
Leave blank if not applicable.
23+
type: string
24+
default:
25+
26+
script_to_roman:
27+
directives:
28+
# Capitalize the first letter of the string only; TODO
29+
# Implement a list that includes all punctuation marks that
30+
# want the following letter capitalized.
31+
capitalize: true
32+
33+
hooks:
34+
pre_assembly:
35+
-
36+
- chinese.parse_numerals_pre_assembly
37+
-
38+
- chinese.person_name_pre_assembly
39+
40+
map:
41+
"": "ling#0 "
42+
"": "ling#0 "
43+
"": "yi#1 "
44+
"": "er#2 "
45+
"": "liang#2 "
46+
"": "liang#2 "
47+
"": "san#3 "
48+
"": "si#4 "
49+
"": "wu#5 "
50+
"": "liu#6 "
51+
"": "qi#7 "
52+
"": "ba#8 "
53+
"": "jiu#9 "
54+
"": "shi#10 "
55+
"": "gong#20 "
56+
"廿": "nian#20 "
57+
"": "sa#30 "
58+
"": "xi#40 "
59+
"": "bai#100 "
60+
"": "qian#1000 "
61+
"": "wan#10000 "
62+
"": "wan#10000 "
63+
"亿": "yi#100000000 "
64+
"": "yi#100000000 "
65+
"": "ji# "
66+
"": "zhi# "
67+
"": "nian# "
68+
"": "yue# "
69+
"": "ri# "
70+
"": "di# "

0 commit comments

Comments
 (0)