Skip to content

Commit 3135e24

Browse files
authored
Merge pull request #188 from lcnetdev/capitalization
Capitalization
2 parents 463a096 + a33822b commit 3135e24

6 files changed

Lines changed: 53 additions & 33 deletions

File tree

README.md

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,9 @@ the drop-down automatically. The value must be one of the keys found in
114114
## Command-line interface
115115

116116
Various Scriptshifter commands can be accessed via the shell command `sscli`.
117-
At the moment only a few essential admin and testing tools are available. More
118-
commands can be made avaliable on an as-needed basis.
117+
At the moment a few essential admin and testing tools are available, as well as
118+
a transliteration function. More commands can be made available on an as-needed
119+
basis.
119120

120121
Help menu:
121122

@@ -129,6 +130,12 @@ Section help:
129130
/path/to/sscli admin --help
130131
```
131132

133+
Transliteration:
134+
135+
```
136+
echo "王正强" | /path/to/sscli trans chinese -c first -o "marc_field=100"
137+
```
138+
132139

133140
## Contributing
134141

scriptshifter/hooks/general/__init__.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,25 @@
2727
logger = getLogger(__name__)
2828

2929

30+
def capitalize_pre_assembly(ctx):
31+
"""
32+
Capitalize a not-yet-assembled result list according to user options.
33+
"""
34+
ctx.dest_ls = _capitalize(ctx.dest_ls, ctx.options.get("capitalize"))
35+
36+
37+
def capitalize_post_assembly(ctx):
38+
"""
39+
Capitalize an already assembled result string according to user options.
40+
"""
41+
dest_ls = ctx.dest.split(" ")
42+
43+
dest_ls = _capitalize(dest_ls, ctx.options.get("capitalize"))
44+
45+
return " ".join(dest_ls)
46+
47+
48+
3049
def normalize_spacing_post_assembly(ctx):
3150
"""
3251
Remove duplicate and unwanted whitespace around punctuation.
@@ -53,3 +72,18 @@ def normalize_spacing_post_assembly(ctx):
5372
# norm = NORM8_RE.sub(r"\1\2", norm)
5473

5574
return norm
75+
76+
77+
def _capitalize(src, which):
78+
"""
79+
Only capitalize first word and words preceded by space.
80+
81+
NOTE: this function is only used for capitalizing hook-generated
82+
transliterations, which are not normally processed. Double cap rules are
83+
not applicable here.
84+
"""
85+
if which == "first":
86+
ctx.dest_ls[0] = ctx.dest_ls[0].upper()
87+
88+
elif which == "all":
89+
ctx.dest_ls = [tk[0].upper() + tk[1:] for tk in ctx.dest_ls]

scriptshifter/hooks/yiddish_/__init__.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,24 +16,16 @@
1616
from yiddish import detransliterate, transliterate
1717

1818
from scriptshifter.exceptions import BREAK
19-
from scriptshifter.tools import capitalize
2019

2120

2221
def s2r_post_config(ctx):
2322
"""
2423
Script to Roman.
2524
"""
26-
rom = transliterate(
25+
ctx.dest = transliterate(
2726
ctx.src, loc=True,
2827
loshn_koydesh=ctx.options.get("loshn_koydesh"))
2928

30-
if ctx.options["capitalize"] == "all":
31-
rom = capitalize(rom)
32-
elif ctx.options["capitalize"] == "first":
33-
rom = rom[0].upper() + rom[1:]
34-
35-
ctx.dest = rom
36-
3729
return BREAK
3830

3931

scriptshifter/tools.py

Lines changed: 0 additions & 9 deletions
This file was deleted.

scriptshifter/trans.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ def transliterate(src, lang, t_dir="s2r", capitalize=False, options={}):
112112
)
113113

114114
# Normalize case before post_config and rule-based normalization.
115-
if not ctx.general["case_sensitive"]:
115+
if t_dir == FEAT_R2S and not ctx.general["case_sensitive"]:
116116
ctx._src = ctx.src.lower()
117117

118118
# This hook may take over the whole transliteration process or delegate
@@ -270,7 +270,10 @@ def transliterate(src, lang, t_dir="s2r", capitalize=False, options={}):
270270
# A match is found. Stop scanning tokens, append result,
271271
# and proceed scanning the source.
272272

273-
# Capitalization.
273+
# Capitalization. This applies double capitalization
274+
# rules. The external function in
275+
# scriptshifter.tools.capitalize used for non-table
276+
# languages does not.
274277
if (
275278
(ctx.options["capitalize"] == "first" and ctx.cur == 0)
276279
or

sscli

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -65,15 +65,9 @@ def samples(lang):
6565
return test_sample(lang)
6666

6767

68-
@cli.group(name="trans")
69-
def trans_grp():
70-
""" Transliteration and transcription operations. """
71-
pass
72-
73-
74-
@trans_grp.command()
75-
@click.argument("src", type=click.File("r"))
68+
@cli.command(name="trans")
7669
@click.argument("lang")
70+
@click.argument("src", type=click.File("r"), default="-")
7771
@click.option(
7872
"-c", "--capitalize", default=None,
7973
help="Capitalize output: `first`, `all`, ot none (the default).")
@@ -85,12 +79,11 @@ def trans_grp():
8579
help=(
8680
"Language=specific option. Format: key=value. Multiple -o entries "
8781
"are possible."))
88-
def transliterate(src, lang, t_dir, capitalize, option):
82+
def trans_(src, lang, t_dir, capitalize, option):
8983
"""
9084
Transliterate text from standard input.
9185
92-
e.g.: `echo "王正强" | /path/to/sscli trans transliterate chinese
93-
-o "marc_field=100"'
86+
e.g.: `echo "王正强" | /path/to/sscli trans chinese -o "marc_field=100"'
9487
"""
9588
options = {}
9689
for opt in option:

0 commit comments

Comments
 (0)