@@ -275,6 +275,18 @@ public static void ParseText(IStText sttext)
275275 }
276276 }
277277
278+ /// <summary>
279+ /// Determine whether text ends with an EOS character.
280+ /// This is used by the FieldWorks interlinear importer
281+ /// to make sure that segments are well-formed.
282+ /// </summary>
283+ public static bool EndsWithEOS ( ITsString text , LcmCache cache )
284+ {
285+ var collector = new SegmentMaker ( text , cache . WritingSystemFactory , null ) ;
286+ collector . Run ( ) ;
287+ return ! collector . ExtraSegment ;
288+ }
289+
278290 /// <summary>
279291 /// tokenize the paragraph with segments and analyses (wordforms generally, though we try to preserve other existing ones).
280292 /// </summary>
@@ -1632,6 +1644,7 @@ internal abstract class SegmentBreaker
16321644 private int m_csegs ;
16331645 private int m_prevCh ;
16341646 private readonly ILgWritingSystemFactory m_wsf ;
1647+ internal bool ExtraSegment = false ;
16351648
16361649 // The idea here is that certain characters more-or-less mark the end of a segment:
16371650 // basically, sentence-terminating characters like period, question-mark, and so forth.
@@ -1824,7 +1837,13 @@ public void Run()
18241837 }
18251838 // We reached the end of the loop. Make a segment out of anything left over.
18261839 if ( ichStartSeg < m_tssText . Length )
1840+ {
1841+ if ( state != SegParseState . FoundEosChar )
1842+ {
1843+ ExtraSegment = true ;
1844+ }
18271845 CreateSegment ( ichStartSeg , m_tssText . Length ) ;
1846+ }
18281847
18291848 }
18301849
@@ -1985,7 +2004,10 @@ internal SegmentMaker(ITsString text, ILgWritingSystemFactory wsf, ParagraphPars
19852004 protected override void CreateSegment ( int ichMin , int ichLim )
19862005 {
19872006 base . CreateSegment ( ichMin , ichLim ) ;
1988- m_segments . Add ( m_paraParser . CreateSegment ( ichMin , ichLim ) ) ;
2007+ if ( m_paraParser != null )
2008+ {
2009+ m_segments . Add ( m_paraParser . CreateSegment ( ichMin , ichLim ) ) ;
2010+ }
19892011 }
19902012
19912013 /// <summary>
0 commit comments