Skip to content

Commit dc74924

Browse files
committed
Fix initial, medial, final handling; add tests.
1 parent 1e480d7 commit dc74924

4 files changed

Lines changed: 14 additions & 10 deletions

File tree

scriptshifter/tables/__init__.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -84,12 +84,12 @@ def __init__(self, content):
8484
# Standalone has precedence, then initial, then final, then medial.
8585
# This is somewhat arbitrary and may change if special cases arise.
8686
# WB markers are moved to flags to allow default comparison.
87-
if self.content.endswith(TOKEN_WB_MARKER):
88-
self.flags |= BOW
89-
self.content = self.content.rstrip(TOKEN_WB_MARKER)
9087
if self.content.startswith(TOKEN_WB_MARKER):
91-
self.flags |= EOW
88+
self.flags |= BOW
9289
self.content = self.content.lstrip(TOKEN_WB_MARKER)
90+
if self.content.endswith(TOKEN_WB_MARKER):
91+
self.flags |= EOW
92+
self.content = self.content.rstrip(TOKEN_WB_MARKER)
9393

9494
def __lt__(self, other):
9595
"""
@@ -115,9 +115,9 @@ def __lt__(self, other):
115115
if (
116116
(self.flags > 0 or other.flags > 0)
117117
and self.content == other.content):
118-
logger.debug(f"{self.content} flags: {self.flags}")
119-
logger.debug(f"{other.content} flags: {other.flags}")
120-
logger.debug("Performing flags comparison.")
118+
# logger.debug(f"{self.content} flags: {self.flags}")
119+
# logger.debug(f"{other.content} flags: {other.flags}")
120+
# logger.debug("Performing flags comparison.")
121121

122122
return self.flags > other.flags
123123

@@ -202,6 +202,8 @@ def populate_table(conn, tname, tdata):
202202
203203
@param tdata(dict): Table data.
204204
"""
205+
logger.info(f"Populating table: {tname}")
206+
205207
res = conn.execute(
206208
"""INSERT INTO tbl_language (
207209
name, label, marc_code, description

scriptshifter/tables/data/_ignore_base.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1+
---
12
general:
23
name: Common ignore list.
34

45
roman_to_script:
56
ignore:
6-
- " "
77
- "at head of title"
88
- "colophon"
99
- "date of publication not identified"
@@ -38,8 +38,6 @@ roman_to_script:
3838
- "\\b[\u2021$][0-9a-z]\\b"
3939

4040
script_to_roman:
41-
ignore:
42-
- " "
4341
ignore_ptn:
4442
# MARC sub-field markers.
4543
- "\\b[\u2021$][0-9a-z]\\b"

test/data/script_samples/unittest.csv

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,5 @@
66
"rot3","st uv","Vw Xy","r2s","{""capitalize"": ""all""}"
77
"regex","Hello abc","Hello 678","r2s",
88
"regex","Hullo abc","5u22o 678","r2s",
9+
"word_boundaries","bab aa b.abc c, dae abada:ddd vb","<212> <11> 020.<123> 030, <41e <12141>:<444> v2>","r2s"
10+
"word_boundaries","43 23432 455 4:3 51, 11","<dc> <bcdcb> <d55 0d0:0c0 5a>, <aa>","s2r"

test/unittest/tables/index.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,5 @@ regex:
1515
name: inherited config + regex ignore.
1616
rot3:
1717
name: Test ROT3 hooks
18+
word_boundaries:
19+
name: Word boundaries

0 commit comments

Comments
 (0)