Skip to content

Commit f25702e

Browse files
committed
Update row data type naming
1 parent 0898f1d commit f25702e

19 files changed

Lines changed: 70 additions & 63 deletions

machine/corpora/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from .alignment_row import AlignmentRow
55
from .corpora_utils import batch
66
from .corpus import Corpus
7-
from .data_type import DataType
87
from .dbl_bundle_text_corpus import DblBundleTextCorpus
98
from .dictionary_alignment_corpus import DictionaryAlignmentCorpus
109
from .dictionary_text_corpus import DictionaryTextCorpus
@@ -49,6 +48,7 @@
4948
from .text_file_text import TextFileText
5049
from .text_file_text_corpus import TextFileTextCorpus
5150
from .text_row import TextRow, TextRowFlags
51+
from .text_row_content_type import TextRowContentType
5252
from .token_processors import (
5353
escape_spaces,
5454
lowercase,
@@ -103,7 +103,7 @@
103103
"batch",
104104
"Corpus",
105105
"create_versification_ref_corpus",
106-
"DataType",
106+
"TextRowContentType",
107107
"DblBundleTextCorpus",
108108
"DictionaryAlignmentCorpus",
109109
"DictionaryTextCorpus",

machine/corpora/data_type.py

Lines changed: 0 additions & 9 deletions
This file was deleted.

machine/corpora/memory_text.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
from typing import Generator, Iterable
22

33
from .corpora_utils import gen
4-
from .data_type import DataType
54
from .text import Text
65
from .text_row import TextRow
6+
from .text_row_content_type import TextRowContentType
77

88

99
class MemoryText(Text):
10-
def __init__(self, id: str, rows: Iterable[TextRow] = [], data_type: DataType = DataType.SENTENCE) -> None:
10+
def __init__(
11+
self, id: str, rows: Iterable[TextRow] = [], data_type: TextRowContentType = TextRowContentType.SEGMENT
12+
) -> None:
1113
self._id = id
1214
self._rows = list(rows)
1315
if any([r.data_type != data_type for r in self._rows]):
@@ -23,7 +25,7 @@ def sort_key(self) -> str:
2325
return self._id
2426

2527
@property
26-
def data_type(self) -> DataType:
28+
def data_type(self) -> TextRowContentType:
2729
return self._data_type
2830

2931
def _get_rows(self) -> Generator[TextRow, None, None]:

machine/corpora/n_parallel_text_corpus.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,20 @@
22
from typing import Any, Callable, Iterable, List, Optional, Sequence, Set, cast
33

44
from ..scripture.verse_ref import Versification
5-
from .data_type import DataType
65
from .n_parallel_text_corpus_base import NParallelTextCorpusBase
76
from .n_parallel_text_row import NParallelTextRow
87
from .scripture_ref import ScriptureRef
98
from .text_corpus import TextCorpus
109
from .text_corpus_enumerator import TextCorpusEnumerator
1110
from .text_row import TextRow, TextRowFlags
11+
from .text_row_content_type import TextRowContentType
1212

1313

1414
class _RangeRow:
1515
refs: List[Any]
1616
segment: List[str]
1717
is_sentence_start: bool = False
18-
data_type: DataType = DataType.SENTENCE
18+
data_type: TextRowContentType = TextRowContentType.SEGMENT
1919

2020
@property
2121
def is_in_range(self):
@@ -38,7 +38,7 @@ def __init__(self, n: int):
3838
self.text_id = ""
3939
self.versifications: Optional[List[Versification]] = None
4040
self.row_ref_comparer = None
41-
self.data_type = DataType.SENTENCE
41+
self.data_type = TextRowContentType.SEGMENT
4242

4343
@property
4444
def is_in_range(self) -> bool:
@@ -293,7 +293,7 @@ def _create_rows(
293293
yield range_info.create_row()
294294

295295
default_refs = [[r.ref for r in rows if r is not None][0]]
296-
data_type = DataType.SENTENCE
296+
data_type = TextRowContentType.SEGMENT
297297

298298
text_id: Optional[str] = None
299299
refs: List[List[Any]] = []

machine/corpora/n_parallel_text_row.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
from typing import Any, Sequence
22

3-
from .data_type import DataType
43
from .text_row import TextRowFlags
4+
from .text_row_content_type import TextRowContentType
55

66

77
class NParallelTextRow:
8-
def __init__(self, text_id: str, n_refs: Sequence[Sequence[Any]], data_type: DataType = DataType.SENTENCE):
8+
def __init__(
9+
self, text_id: str, n_refs: Sequence[Sequence[Any]], data_type: TextRowContentType = TextRowContentType.SEGMENT
10+
):
911
if len([n_ref for n_ref in n_refs if n_ref is not None and len(n_ref) > 0]) == 0:
1012
raise ValueError(f"Refs must be provided but n_refs={n_refs}")
1113
self._text_id = text_id
@@ -24,7 +26,7 @@ def ref(self) -> Any:
2426
return self._n_refs[0][0]
2527

2628
@property
27-
def data_type(self) -> DataType:
29+
def data_type(self) -> TextRowContentType:
2830
return self._data_type
2931

3032
@property

machine/corpora/parallel_text_corpus.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@
2626
from .aligned_word_pair import AlignedWordPair
2727
from .corpora_utils import get_split_indices
2828
from .corpus import Corpus
29-
from .data_type import DataType
3029
from .parallel_text_row import ParallelTextRow
30+
from .text_row_content_type import TextRowContentType
3131
from .token_processors import escape_spaces, lowercase, normalize, unescape_spaces
3232

3333
if TYPE_CHECKING:
@@ -419,7 +419,7 @@ def to_hf_dataset(
419419
if alignment_column is not None:
420420
features_dict[alignment_column] = Sequence({source_lang: Value("int32"), target_lang: Value("int32")})
421421
if data_type_column is not None:
422-
features_dict[data_type_column] = ClassLabel(names=[e.name for e in DataType])
422+
features_dict[data_type_column] = ClassLabel(names=[e.name for e in TextRowContentType])
423423
features = Features(features_dict)
424424

425425
def iterable() -> Iterable[dict]:

machine/corpora/parallel_text_row.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
from typing import Any, Collection, Optional, Sequence
44

55
from .aligned_word_pair import AlignedWordPair
6-
from .data_type import DataType
76
from .text_row import TextRowFlags
7+
from .text_row_content_type import TextRowContentType
88

99

1010
class ParallelTextRow(Sequence[Sequence[str]]):
@@ -18,7 +18,7 @@ def __init__(
1818
aligned_word_pairs: Optional[Collection[AlignedWordPair]] = None,
1919
source_flags: TextRowFlags = TextRowFlags.SENTENCE_START,
2020
target_flags: TextRowFlags = TextRowFlags.SENTENCE_START,
21-
data_type: DataType = DataType.SENTENCE,
21+
data_type: TextRowContentType = TextRowContentType.SEGMENT,
2222
) -> None:
2323
if not text_id:
2424
raise ValueError("A text_id must be set.")
@@ -55,7 +55,7 @@ def refs(self) -> Sequence[Any]:
5555
return self.target_refs if len(self.source_refs) == 0 else self.source_refs
5656

5757
@property
58-
def data_type(self) -> DataType:
58+
def data_type(self) -> TextRowContentType:
5959
return self._data_type
6060

6161
@property

machine/corpora/paratext_backup_terms_corpus.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
from zipfile import ZipFile
33

44
from ..utils.typeshed import StrPath
5-
from .data_type import DataType
65
from .dictionary_text_corpus import DictionaryTextCorpus
76
from .key_term import KeyTerm
87
from .memory_text import MemoryText
98
from .text_row import TextRow
9+
from .text_row_content_type import TextRowContentType
1010
from .zip_paratext_project_settings_parser import ZipParatextProjectSettingsParser
1111
from .zip_paratext_project_terms_parser import ZipParatextProjectTermsParser
1212

@@ -29,9 +29,9 @@ def __init__(self, filename: StrPath, term_categories: Sequence[str], use_term_g
2929
text = MemoryText(
3030
text_id,
3131
[
32-
TextRow(text_id, key_term.id, key_term.renderings, data_type=DataType.GLOSS)
32+
TextRow(text_id, key_term.id, key_term.renderings, data_type=TextRowContentType.WORD)
3333
for key_term in key_terms
3434
],
35-
data_type=DataType.GLOSS,
35+
data_type=TextRowContentType.WORD,
3636
)
3737
self._add_text(text)

machine/corpora/scripture_text.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@
44
from ..scripture.verse_ref import VerseRef, Versification
55
from ..utils.context_managed_generator import ContextManagedGenerator
66
from .corpora_utils import gen, get_scripture_text_sort_key
7-
from .data_type import DataType
87
from .scripture_ref import ScriptureElement, ScriptureRef
98
from .text_base import TextBase
109
from .text_row import TextRow, TextRowFlags
10+
from .text_row_content_type import TextRowContentType
1111

1212

1313
class ScriptureText(TextBase):
1414
def __init__(self, id: str, versification: Optional[Versification] = None) -> None:
15-
super().__init__(id, get_scripture_text_sort_key(id), data_type=DataType.SENTENCE)
15+
super().__init__(id, get_scripture_text_sort_key(id), data_type=TextRowContentType.SEGMENT)
1616
self._versification = ENGLISH_VERSIFICATION if versification is None else versification
1717

1818
@property

machine/corpora/text.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
from abc import abstractmethod
22

33
from .corpus import Corpus
4-
from .data_type import DataType
54
from .text_row import TextRow
5+
from .text_row_content_type import TextRowContentType
66

77

88
class Text(Corpus[TextRow]):
@@ -16,4 +16,4 @@ def sort_key(self) -> str: ...
1616

1717
@property
1818
@abstractmethod
19-
def data_type(self) -> DataType: ...
19+
def data_type(self) -> TextRowContentType: ...

0 commit comments

Comments
 (0)