Skip to content

Commit 9263ed9

Browse files
authored
Fix LT-22303: Add ParagraphParser.EndsWithEOS (#358)
* Fix LT-22303: Add ParagraphParser.EndsWithEOS * Adds comment as requested by Jason
1 parent 66263ab commit 9263ed9

2 files changed

Lines changed: 31 additions & 1 deletion

File tree

src/SIL.LCModel/DomainServices/ITextUtils.cs

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,18 @@ public static void ParseText(IStText sttext)
275275
}
276276
}
277277

278+
/// <summary>
279+
/// Determine whether text ends with an EOS character.
280+
/// This is used by the FieldWorks interlinear importer
281+
/// to make sure that segments are well-formed.
282+
/// </summary>
283+
public static bool EndsWithEOS(ITsString text, LcmCache cache)
284+
{
285+
var collector = new SegmentMaker(text, cache.WritingSystemFactory, null);
286+
collector.Run();
287+
return !collector.ExtraSegment;
288+
}
289+
278290
/// <summary>
279291
/// tokenize the paragraph with segments and analyses (wordforms generally, though we try to preserve other existing ones).
280292
/// </summary>
@@ -1632,6 +1644,7 @@ internal abstract class SegmentBreaker
16321644
private int m_csegs;
16331645
private int m_prevCh;
16341646
private readonly ILgWritingSystemFactory m_wsf;
1647+
internal bool ExtraSegment = false;
16351648

16361649
// The idea here is that certain characters more-or-less mark the end of a segment:
16371650
// basically, sentence-terminating characters like period, question-mark, and so forth.
@@ -1824,7 +1837,13 @@ public void Run()
18241837
}
18251838
// We reached the end of the loop. Make a segment out of anything left over.
18261839
if (ichStartSeg < m_tssText.Length)
1840+
{
1841+
if (state != SegParseState.FoundEosChar)
1842+
{
1843+
ExtraSegment = true;
1844+
}
18271845
CreateSegment(ichStartSeg, m_tssText.Length);
1846+
}
18281847

18291848
}
18301849

@@ -1985,7 +2004,10 @@ internal SegmentMaker(ITsString text, ILgWritingSystemFactory wsf, ParagraphPars
19852004
protected override void CreateSegment(int ichMin, int ichLim)
19862005
{
19872006
base.CreateSegment(ichMin, ichLim);
1988-
m_segments.Add(m_paraParser.CreateSegment(ichMin, ichLim));
2007+
if (m_paraParser != null)
2008+
{
2009+
m_segments.Add(m_paraParser.CreateSegment(ichMin, ichLim));
2010+
}
19892011
}
19902012

19912013
/// <summary>

tests/SIL.LCModel.Tests/DomainServices/ParagraphParserTests.cs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -946,6 +946,14 @@ public void CheckValidGuessesAfterInsertNewWord_LT8467()
946946
ValidateGuesses(expectedGuessesAfterEdit, paraGuessed);
947947
}
948948

949+
[Test]
950+
public void EndsWithEOS()
951+
{
952+
Assert.IsFalse(ParagraphParser.EndsWithEOS(TsStringUtils.MakeString("abc", Cache.DefaultVernWs), Cache));
953+
Assert.IsTrue(ParagraphParser.EndsWithEOS(TsStringUtils.MakeString("abc.", Cache.DefaultVernWs), Cache));
954+
Assert.IsTrue(ParagraphParser.EndsWithEOS(TsStringUtils.MakeString("abc.\"", Cache.DefaultVernWs), Cache));
955+
}
956+
949957
private void ValidateGuesses(IList<IWfiGloss> expectedGuesses, IStTxtPara paraWithGuesses)
950958
{
951959
var segsParaGuesses = paraWithGuesses.SegmentsOS;

0 commit comments

Comments
 (0)