1111from .verse import Verse
1212
1313
14+ class QuotationMarkCounter :
15+ _NEGLIGIBLE_PROPORTION_THRESHOLD = 0.01
16+
17+ def __init__ (self ):
18+ self .reset ()
19+
20+ def reset (self ) -> None :
21+ self ._quotation_mark_counts : Dict [str , int ] = defaultdict (int )
22+ self ._total_quotation_mark_count : int = 0
23+
24+ def count_quotation_marks (self , quotation_marks : List [QuotationMarkStringMatch ]) -> None :
25+ for quotation_mark_match in quotation_marks :
26+ mark : str = quotation_mark_match .quotation_mark
27+ self ._quotation_mark_counts [mark ] += 1
28+ self ._total_quotation_mark_count += 1
29+
30+ def is_quotation_mark_proportion_negligible (self , quotation_mark : str ) -> bool :
31+ if self ._total_quotation_mark_count == 0 :
32+ return True
33+ return (
34+ self ._quotation_mark_counts [quotation_mark ] / self ._total_quotation_mark_count
35+ < self ._NEGLIGIBLE_PROPORTION_THRESHOLD
36+ )
37+
38+
1439class ApostropheProportionStatistics :
1540 def __init__ (self ):
1641 self .reset ()
@@ -260,11 +285,13 @@ def __init__(self, quote_conventions: QuoteConventionSet):
260285 self ._quote_conventions = quote_conventions
261286 self ._apostrophe_analyzer = PreliminaryApostropheAnalyzer ()
262287 self ._quotation_mark_sequences = QuotationMarkSequences ()
288+ self ._quotation_mark_counts = QuotationMarkCounter ()
263289 self .reset ()
264290
265291 def reset (self ) -> None :
266292 self ._apostrophe_analyzer .reset ()
267293 self ._quotation_mark_sequences .reset ()
294+ self ._quotation_mark_counts .reset ()
268295
269296 def narrow_down_possible_quote_conventions (self , chapters : List [Chapter ]) -> QuoteConventionSet :
270297 for chapter in chapters :
@@ -281,6 +308,7 @@ def _analyze_quotation_marks_for_verse(self, verse: Verse) -> None:
281308 ).find_all_potential_quotation_marks_in_verse (verse )
282309 self ._analyze_quotation_mark_sequence (quotation_marks )
283310 self ._apostrophe_analyzer .process_quotation_marks (verse .text_segments , quotation_marks )
311+ self ._quotation_mark_counts .count_quotation_marks (quotation_marks )
284312
285313 def _analyze_quotation_mark_sequence (self , quotation_marks : List [QuotationMarkStringMatch ]) -> None :
286314 quotation_mark_grouper : QuotationMarkGrouper = QuotationMarkGrouper (quotation_marks , self ._quote_conventions )
@@ -304,6 +332,8 @@ def _find_opening_quotation_marks(self) -> List[str]:
304332 ]
305333
306334 def _is_opening_quotation_mark (self , quotation_mark : str ) -> bool :
335+ if self ._quotation_mark_counts .is_quotation_mark_proportion_negligible (quotation_mark ):
336+ return False
307337 if self ._apostrophe_analyzer .is_apostrophe_only (quotation_mark ):
308338 return False
309339
@@ -323,6 +353,8 @@ def _find_closing_quotation_marks(self) -> List[str]:
323353 ]
324354
325355 def _is_closing_quotation_mark (self , quotation_mark : str ) -> bool :
356+ if self ._quotation_mark_counts .is_quotation_mark_proportion_negligible (quotation_mark ):
357+ return False
326358 if self ._apostrophe_analyzer .is_apostrophe_only (quotation_mark ):
327359 return False
328360
0 commit comments