Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion machine/corpora/paratext_project_text_updater_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def __init__(
def update_usfm(
self,
book_id: str,
chapters: Optional[Sequence[int]] = None,
rows: Optional[Sequence[UpdateUsfmRow]] = None,
full_name: Optional[str] = None,
text_behavior: UpdateUsfmTextBehavior = UpdateUsfmTextBehavior.PREFER_EXISTING,
Expand Down Expand Up @@ -61,7 +62,7 @@ def update_usfm(
)
try:
parse_usfm(usfm, handler, self._settings.stylesheet, self._settings.versification)
return handler.get_usfm(self._settings.stylesheet)
return handler.get_usfm(self._settings.stylesheet, chapters)
except Exception as e:
error_message = (
f"An error occurred while parsing the usfm for '{book_id}'"
Expand Down
34 changes: 25 additions & 9 deletions machine/corpora/update_usfm_parser_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,27 +334,43 @@ def _end_embed_text(self, state: UsfmParserState, scripture_ref: ScriptureRef) -
if embed_outside_of_block:
self._end_update_block(state, [scripture_ref])

def get_usfm(self, stylesheet: Union[str, UsfmStylesheet] = "usfm.sty") -> str:
def get_usfm(
self, stylesheet: Union[str, UsfmStylesheet] = "usfm.sty", chapters: Optional[Sequence[int]] = None
) -> str:
if isinstance(stylesheet, str):
stylesheet = UsfmStylesheet(stylesheet)
tokenizer = UsfmTokenizer(stylesheet)
tokens = list(self._tokens)
if chapters is not None:
tokens = self._get_incremental_draft_tokens(tokens, chapters)
if len(self._remarks) > 0:
remark_tokens: List[UsfmToken] = []
for remark in self._remarks:
remark_tokens.append(UsfmToken(UsfmTokenType.PARAGRAPH, "rem"))
remark_tokens.append(UsfmToken(UsfmTokenType.TEXT, text=remark))
if len(tokens) > 0:
index = 0
markers_to_skip = {"id", "ide", "rem"}
while tokens[index].marker in markers_to_skip:
index += 1
if len(tokens) > index and tokens[index].type == UsfmTokenType.TEXT:
index += 1
for remark_token in reversed(remark_tokens):
tokens.insert(index, remark_token)
for index, token in enumerate(tokens):
if token.type == UsfmTokenType.CHAPTER:
tokens[index + 1 : index + 1] = remark_tokens
return tokenizer.detokenize(tokens)

def _get_incremental_draft_tokens(self, tokens: List[UsfmToken], chapters: Sequence[int]) -> List[UsfmToken]:
incremental_draft_tokens: List[UsfmToken] = []
in_chapter: bool = False
for index, token in enumerate(tokens):
if index == 0 and token.marker == "id":
incremental_draft_tokens.append(token)
continue
elif token.type == UsfmTokenType.CHAPTER:
if token.data and int(token.data) in chapters:
in_chapter = True
incremental_draft_tokens.append(token)
else:
in_chapter = False
elif in_chapter:
incremental_draft_tokens.append(token)
return incremental_draft_tokens

def _advance_rows(self, seg_scr_refs: Sequence[ScriptureRef]) -> Tuple[List[str], Optional[dict[str, object]]]:
row_texts: List[str] = []
row_metadata = None
Expand Down
Loading