Skip to content

Commit 1d20276

Browse files
committed
Merge branch 'main' into test
2 parents 1531278 + 44b2e05 commit 1d20276

53 files changed

Lines changed: 871 additions & 965 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

scriptshifter/tables/data/_cyrillic_base.yml

Lines changed: 0 additions & 133 deletions
This file was deleted.

scriptshifter/tables/data/_ignore_base.yml

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,32 +9,37 @@ roman_to_script:
99
- "date of publication not identified"
1010
- "place of publication not identified"
1111
- "publisher not identified"
12+
- "and one other"
13+
- "et al."
14+
ignore_ptn:
15+
- "and ([a-z0-9]+ )?others"
16+
17+
# Incorrectly entered (but frequently found) Roman numerals.
1218
# NOTE There is ambiguity about ignoring these
1319
# words. Note that the single-character Roman
1420
# numerals are not included on purpose.
1521
# Ideally the source editors should use the
1622
# dedicated U+2160÷U+216F (uppercase Roman
1723
# numerals) and/or U+2170÷U+217F (lower case Roman
1824
# numerals) ranges to avoid this ambiguity.
19-
- "and one other"
20-
- "et al."
21-
ignore_ptn:
22-
- "and ([a-z0-9]+ )?others"
23-
- "I{2,3}"
24-
- "I(V|X)"
25-
- "LI{,3}"
26-
- "LI?(V|X)"
27-
- "L(V|X{1,3})I{,3}"
28-
- "LX{1,3}I?V"
29-
- "LX{1,3}VI{,3}"
30-
- "(V|X{1,3})I{,3}"
31-
- "X{1,3}I{,3}"
32-
- "X{1,3}I(V|X)"
33-
- "X{1,3}VI{,3}"
34-
- "[\u2021$][0-9a-z] *"
25+
- "\\bI{2,3}\\b"
26+
- "\\bI(V|X)\\b"
27+
- "\\bLI{,3}\\b"
28+
- "\\bLI?(V|X)\\b"
29+
- "\\bL(V|X{1,3})I{,3}\\b"
30+
- "\\bLX{1,3}I?V\\b"
31+
- "\\bLX{1,3}VI{,3}\\b"
32+
- "\\b(V|X{1,3})I{,3}\\b"
33+
- "\\bX{1,3}I{,3}\\b"
34+
- "\\bX{1,3}I(V|X)\\b"
35+
- "\\bX{1,3}VI{,3}\\b"
36+
37+
# MARC sub-field markers.
38+
- "\\b[\u2021$][0-9a-z]\\b"
3539

3640
script_to_roman:
3741
ignore:
3842
- " "
3943
ignore_ptn:
40-
- "[\u2021$][0-9a-z] *"
44+
# MARC sub-field markers.
45+
- "\\b[\u2021$][0-9a-z]\\b"

scriptshifter/tables/data/abkhaz_cyrillic.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
general:
22
name: Abkhaz (Cyrillic)
33
parents:
4-
- _cyrillic_base
4+
- cyrillic_generic
55

66
roman_to_script:
77
map:

scriptshifter/tables/data/altai_cyrillic.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
general:
22
name: Altai (Cyrillic)
33
parents:
4-
- _cyrillic_base
4+
- cyrillic_generic
55

66
roman_to_script:
77
map:

scriptshifter/tables/data/azerbaijani_cyrillic.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
general:
22
name: Azerbaijani (Cyrillic)
33
parents:
4-
- _cyrillic_base
4+
- cyrillic_generic
55

66
roman_to_script:
77
map:

scriptshifter/tables/data/bashkir_cyrillic.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
general:
22
name: Bashkir (Cyrillic)
33
parents:
4-
- _cyrillic_base
4+
- cyrillic_generic
55

66
roman_to_script:
77
map:

scriptshifter/tables/data/belarusian.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
general:
22
name: Belarusian
33
parents:
4-
- _cyrillic_base
4+
- cyrillic_generic
55

66
roman_to_script:
77
map:
Lines changed: 18 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,38 @@
11
general:
22
name: Bulgarian
33
parents:
4-
- _cyrillic_base
4+
- cyrillic_generic
55

66
roman_to_script:
77
map:
8-
"G": "\u0413"
9-
"g": "\u0433"
10-
# this conversion shouldn't be needed, but does no harm
11-
"ZH": "\u0416"
12-
"Zh": "\u0416"
13-
"zh": "\u0436"
14-
"I\uFE20E\uFE21": "\u0462"
15-
# this conversion shouldn't be needed, but does no harm
16-
"I\uFE20e\uFE21": "\u0462"
17-
# this conversion shouldn't be needed, but does no harm
18-
# this conversion shouldn't be needed, but does no harm
19-
"I": "\u0418"
20-
"i\uFE20e\uFE21": "\u0463"
21-
"i": "\u0438"
22-
# this conversion shouldn't be needed, but does no harm
238
"SHT": "\u0429"
249
"Sht": "\u0429"
2510
"sht": "\u0449"
26-
"T\uFE20S\uFE21": "\u0426"
27-
# this conversion shouldn't be needed, but does no harm
28-
"T\uFE20s\uFE21": "\u0426"
29-
"t\uFE20s\uFE21": "\u0446"
30-
"U\u0310": "\u046A"
11+
"U\u0306": "\u042A"
12+
# Mapping from precomposed non-MARC-8 Latin equivalent
13+
"\u016C": "\u042A"
3114
"u\u0306": "\u044A"
15+
# Mapping from precomposed non-MARC-8 Latin equivalent
16+
"\u016D": "\u044A"
17+
"U\u0310": "\u046A"
3218
"u\u0310": "\u046B"
3319
# this conversion is ambiguous - \u042A is also theoretically possible
3420
"\u02BA": "\u044A"
21+
# upper case hard sign is unlikely to occur
22+
"\u02BA\u0332": "\u042A"
3523

3624
script_to_roman:
3725
map:
38-
"\u044C": ""
39-
"\u042C": ""
40-
"\u044A": ""
41-
"\u042A%": "" # Final
42-
"\u042A": "u\u0306"
43-
"\u0413": "G"
44-
"\u0433": "g"
45-
"\u0416": "Zh"
46-
"\u0436": "zh"
47-
"\u0462": "I\uFE20E\uFE21"
48-
"\u0418": "I"
49-
"\u0463": "i\uFE20e\uFE21"
50-
"\u0438": "i"
5126
"\u0429": "Sht"
27+
"\u042A": "U\u0306"
28+
# Capital letter hard sign at the end of a word (rare)
29+
"\u042A%": "\u02BA\u0332"
30+
"\u042C": "\u02B9\u0332"
5231
"\u0449": "sht"
53-
"\u0426": "T\uFE20S\uFE21"
54-
"\u0446": "t\uFE20s\uFE21"
32+
"\u044A": "u\u0306"
33+
# Small letter hard sign at the end of a word (rare)
34+
"\u044A%": "\u02BA"
35+
"\u044C": "\u02B9"
5536
"\u046A": "U\u0310"
5637
"\u046B": "u\u0310"
57-
"\u042A": "u\u016C"
58-
"\u044A": "u\u016D"
38+

scriptshifter/tables/data/buriat_cyrillic.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
general:
22
name: Buriat (Cyrillic)
33
parents:
4-
- _cyrillic_base
5-
- _ignore_base
4+
- cyrillic_generic
65

76
roman_to_script:
87
map:

scriptshifter/tables/data/chukchi_cyrillic.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
general:
22
name: Chukchi (Cyrillic)
33
parents:
4-
- _cyrillic_base
4+
- cyrillic_generic
55

66
roman_to_script:
77
map:

0 commit comments

Comments
 (0)