22using System . Collections . Generic ;
33using System . Linq ;
44using SIL . Machine . QualityEstimation . Scores ;
5- using SIL . Machine . QualityEstimation . Thresholds ;
65using SIL . Machine . QualityEstimation . Usability ;
76using SIL . Scripture ;
87
98namespace SIL . Machine . QualityEstimation
109{
10+ /// <summary>
11+ /// Provides chrF3 quality estimation support for pre-translations.
12+ /// </summary>
1113 public class QualityEstimation
1214 {
13- public BookThresholds BookThresholds { get ; set ; } = new BookThresholds ( ) ;
15+ private readonly BookScores _bookScores = new BookScores ( ) ;
16+ private readonly ChapterScores _chapterScores = new ChapterScores ( ) ;
17+ private readonly double _intercept ;
18+ private readonly List < SequenceScore > _sequenceScores = new List < SequenceScore > ( ) ;
19+ private readonly double _slope ;
20+ private readonly TxtFileScores _txtFileScores = new TxtFileScores ( ) ;
21+ private readonly List < VerseScore > _verseScores = new List < VerseScore > ( ) ;
1422
15- public ChapterThresholds ChapterThresholds { get ; set ; } = new ChapterThresholds ( ) ;
23+ public QualityEstimation ( double slope , double intercept )
24+ {
25+ _slope = slope ;
26+ _intercept = intercept ;
27+ }
28+
29+ /// <summary>
30+ /// The threshold values used to calculate the usability label for every book.
31+ /// </summary>
32+ public Thresholds BookThresholds { get ; set ; } = new Thresholds ( greenThreshold : 0.745 , yellowThreshold : 0.62 ) ;
33+
34+ /// <summary>
35+ /// The threshold values used to calculate the usability label for every chapter.
36+ /// </summary>
37+ public Thresholds ChapterThresholds { get ; set ; } =
38+ new Thresholds ( greenThreshold : 0.745 , yellowThreshold : 0.62 ) ;
1639
17- public VerseThresholds VerseThresholds { get ; set ; } = new VerseThresholds ( ) ;
40+ /// <summary>
41+ /// The threshold values used to calculate the usability label for every verse.
42+ /// </summary>
43+ public Thresholds VerseThresholds { get ; set ; } = new Thresholds ( greenThreshold : 0.745 , yellowThreshold : 0.62 ) ;
1844
45+ /// <summary>
46+ /// The usable parameters to calculate the usable probabilities.
47+ /// </summary>
1948 public UsabilityParameters Usable { get ; set ; } = UsabilityParameters . Usable ;
2049
50+ /// <summary>
51+ /// The unusable parameters to calculate the usable probabilities.
52+ /// </summary>
2153 public UsabilityParameters Unusable { get ; set ; } = UsabilityParameters . Unusable ;
2254
55+ /// <summary>
56+ /// The usability scores for every book.
57+ /// </summary>
2358 public List < BookUsability > UsabilityBooks { get ; } = new List < BookUsability > ( ) ;
2459
60+ /// <summary>
61+ /// The usability scores for every chapter.
62+ /// </summary>
2563 public List < ChapterUsability > UsabilityChapters { get ; } = new List < ChapterUsability > ( ) ;
2664
65+ /// <summary>
66+ /// The usability scores for every line in a text file.
67+ /// </summary>
2768 public List < SequenceUsability > UsabilitySequences { get ; } = new List < SequenceUsability > ( ) ;
2869
70+ /// <summary>
71+ /// The usability scores for every text file.
72+ /// </summary>
2973 public List < TxtFileUsability > UsabilityTxtFiles { get ; } = new List < TxtFileUsability > ( ) ;
3074
75+ /// <summary>
76+ /// The usability scores for every verse.
77+ /// </summary>
3178 public List < VerseUsability > UsabilityVerses { get ; } = new List < VerseUsability > ( ) ;
3279
33- public double CalculateUsableProbability ( double chrF3 )
80+ /// <summary>
81+ /// Estimate the quality of the pre-translations from text files.
82+ /// </summary>
83+ /// <param name="confidences">The confidence values.</param>
84+ public void EstimateQuality ( Dictionary < string , double > confidences )
85+ {
86+ ProjectChrF3 ( confidences ) ;
87+ ComputeUsableProportionsForTxtFiles ( ) ;
88+ }
89+
90+ /// <summary>
91+ /// Estimate the quality of the pre-translations from USFM files.
92+ /// </summary>
93+ /// <param name="confidences">The confidence values.</param>
94+ public void EstimateQuality ( Dictionary < VerseRef , double > confidences )
95+ {
96+ ProjectChrF3 ( confidences ) ;
97+ ComputeUsableProportionsForVerses ( ) ;
98+ }
99+
100+ private double CalculateUsableProbability ( double chrF3 )
34101 {
35102 double usableWeight = Math . Exp ( - Math . Pow ( chrF3 - Usable . Mean , 2 ) / ( 2 * Usable . Variance ) ) * Usable . Count ;
36103 double unusableWeight =
37104 Math . Exp ( - Math . Pow ( chrF3 - Unusable . Mean , 2 ) / ( 2 * Unusable . Variance ) ) * Unusable . Count ;
38105 return usableWeight / ( usableWeight + unusableWeight ) ;
39106 }
40107
41- public void ComputeBookUsability ( BookScores bookScores )
108+ private void ComputeBookUsability ( )
42109 {
43- foreach ( string book in bookScores . Scores . Keys )
110+ foreach ( string book in _bookScores . Scores . Keys )
44111 {
45- Score score = bookScores . GetScore ( book ) ;
112+ Score score = _bookScores . GetScore ( book ) ;
46113 if ( score is null )
47114 {
48115 continue ;
49116 }
50117
51- List < double > bookUsabilities = bookScores . GetVerseUsabilities ( book ) ;
118+ List < double > bookUsabilities = _bookScores . GetVerseUsabilities ( book ) ;
52119 double averageProbability = bookUsabilities . Average ( ) ;
53120 UsabilityBooks . Add (
54121 new BookUsability
@@ -62,20 +129,20 @@ public void ComputeBookUsability(BookScores bookScores)
62129 }
63130 }
64131
65- public void ComputeChapterUsability ( ChapterScores chapterScores )
132+ public void ComputeChapterUsability ( )
66133 {
67- foreach ( KeyValuePair < string , Dictionary < int , Score > > chapterScoresByBook in chapterScores . Scores )
134+ foreach ( KeyValuePair < string , Dictionary < int , Score > > chapterScoresByBook in _chapterScores . Scores )
68135 {
69136 string book = chapterScoresByBook . Key ;
70137 foreach ( int chapter in chapterScoresByBook . Value . Keys )
71138 {
72- Score score = chapterScores . GetScore ( book , chapter ) ;
139+ Score score = _chapterScores . GetScore ( book , chapter ) ;
73140 if ( score is null )
74141 {
75142 continue ;
76143 }
77144
78- List < double > chapterUsabilities = chapterScores . GetVerseUsabilities ( book , chapter ) ;
145+ List < double > chapterUsabilities = _chapterScores . GetVerseUsabilities ( book , chapter ) ;
79146 double averageProbability = chapterUsabilities . Average ( ) ;
80147 UsabilityChapters . Add (
81148 new ChapterUsability
@@ -91,17 +158,17 @@ public void ComputeChapterUsability(ChapterScores chapterScores)
91158 }
92159 }
93160
94- public void ComputeTxtFileUsability ( TxtFileScores txtFileScores )
161+ private void ComputeTxtFileUsability ( )
95162 {
96- foreach ( string targetDraftFileStem in txtFileScores . Scores . Keys )
163+ foreach ( string targetDraftFileStem in _txtFileScores . Scores . Keys )
97164 {
98- Score score = txtFileScores . GetScore ( targetDraftFileStem ) ;
165+ Score score = _txtFileScores . GetScore ( targetDraftFileStem ) ;
99166 if ( score is null )
100167 {
101168 continue ;
102169 }
103170
104- List < double > txtFileUsabilities = txtFileScores . GetSequenceUsabilities ( targetDraftFileStem ) ;
171+ List < double > txtFileUsabilities = _txtFileScores . GetSequenceUsabilities ( targetDraftFileStem ) ;
105172 double averageProbability = txtFileUsabilities . Average ( ) ;
106173 UsabilityTxtFiles . Add (
107174 new TxtFileUsability
@@ -115,21 +182,17 @@ public void ComputeTxtFileUsability(TxtFileScores txtFileScores)
115182 }
116183 }
117184
118- public void ComputeUsableProportions (
119- List < VerseScore > verseScores ,
120- ref ChapterScores chapterScores ,
121- ref BookScores bookScores
122- )
185+ private void ComputeUsableProportionsForVerses ( )
123186 {
124- foreach ( VerseScore verseScore in verseScores . Where ( v => v . VerseRef . VerseNum > 0 ) )
187+ foreach ( VerseScore verseScore in _verseScores . Where ( v => v . VerseRef . VerseNum > 0 ) )
125188 {
126189 double probability = CalculateUsableProbability ( verseScore . ProjectedChrF3 ) ;
127- chapterScores . AppendVerseUsability (
190+ _chapterScores . AppendVerseUsability (
128191 verseScore . VerseRef . Book ,
129192 verseScore . VerseRef . ChapterNum ,
130193 probability
131194 ) ;
132- bookScores . AppendVerseUsability ( verseScore . VerseRef . Book , probability ) ;
195+ _bookScores . AppendVerseUsability ( verseScore . VerseRef . Book , probability ) ;
133196 UsabilityVerses . Add (
134197 new VerseUsability
135198 {
@@ -143,16 +206,16 @@ ref BookScores bookScores
143206 ) ;
144207 }
145208
146- ComputeChapterUsability ( chapterScores ) ;
147- ComputeBookUsability ( bookScores ) ;
209+ ComputeChapterUsability ( ) ;
210+ ComputeBookUsability ( ) ;
148211 }
149212
150- public void ComputeUsableProportions ( List < SequenceScore > sequenceScores , ref TxtFileScores txtFileScores )
213+ private void ComputeUsableProportionsForTxtFiles ( )
151214 {
152- foreach ( SequenceScore sequenceScore in sequenceScores )
215+ foreach ( SequenceScore sequenceScore in _sequenceScores )
153216 {
154217 double probability = CalculateUsableProbability ( sequenceScore . ProjectedChrF3 ) ;
155- txtFileScores . AppendSequenceUsability ( sequenceScore . TargetDraftFileStem , probability ) ;
218+ _txtFileScores . AppendSequenceUsability ( sequenceScore . TargetDraftFileStem , probability ) ;
156219 UsabilitySequences . Add (
157220 new SequenceUsability
158221 {
@@ -165,33 +228,10 @@ public void ComputeUsableProportions(List<SequenceScore> sequenceScores, ref Txt
165228 ) ;
166229 }
167230
168- ComputeTxtFileUsability ( txtFileScores ) ;
169- }
170-
171- public void EstimateQuality ( double slope , double intercept , Dictionary < string , double > confidences )
172- {
173- var sequenceScores = new List < SequenceScore > ( ) ;
174- var txtFileScores = new TxtFileScores ( ) ;
175- ProjectChrF3 ( slope , intercept , confidences , ref sequenceScores , ref txtFileScores ) ;
176- ComputeUsableProportions ( sequenceScores , ref txtFileScores ) ;
177- }
178-
179- public void EstimateQuality ( double slope , double intercept , Dictionary < VerseRef , double > confidences )
180- {
181- var verseScores = new List < VerseScore > ( ) ;
182- var chapterScores = new ChapterScores ( ) ;
183- var bookScores = new BookScores ( ) ;
184- ProjectChrF3 ( slope , intercept , confidences , ref verseScores , ref chapterScores , ref bookScores ) ;
185- ComputeUsableProportions ( verseScores , ref chapterScores , ref bookScores ) ;
231+ ComputeTxtFileUsability ( ) ;
186232 }
187233
188- public void ProjectChrF3 (
189- double slope ,
190- double intercept ,
191- Dictionary < string , double > confidences ,
192- ref List < SequenceScore > sequenceScores ,
193- ref TxtFileScores txtFileScores
194- )
234+ private void ProjectChrF3 ( Dictionary < string , double > confidences )
195235 {
196236 foreach ( KeyValuePair < string , double > confidence in confidences )
197237 {
@@ -200,35 +240,28 @@ ref TxtFileScores txtFileScores
200240 {
201241 string targetDraftFileStem = keyParts [ 0 ] ;
202242 var score = new SequenceScore (
203- slope ,
243+ _slope ,
204244 confidence . Value ,
205- intercept ,
245+ _intercept ,
206246 sequenceNumber ,
207247 targetDraftFileStem
208248 ) ;
209- sequenceScores . Add ( score ) ;
210- txtFileScores . AddScore ( targetDraftFileStem , score ) ;
249+ _sequenceScores . Add ( score ) ;
250+ _txtFileScores . AddScore ( targetDraftFileStem , score ) ;
211251 }
212252 }
213253 }
214254
215- public void ProjectChrF3 (
216- double slope ,
217- double intercept ,
218- Dictionary < VerseRef , double > confidences ,
219- ref List < VerseScore > verseScores ,
220- ref ChapterScores chapterScores ,
221- ref BookScores bookScores
222- )
255+ private void ProjectChrF3 ( Dictionary < VerseRef , double > confidences )
223256 {
224257 foreach ( KeyValuePair < VerseRef , double > confidence in confidences )
225258 {
226- var score = new VerseScore ( slope , confidence . Value , intercept , confidence . Key ) ;
227- verseScores . Add ( score ) ;
259+ var score = new VerseScore ( _slope , confidence . Value , _intercept , confidence . Key ) ;
260+ _verseScores . Add ( score ) ;
228261 string book = confidence . Key . Book ;
229262 int chapter = confidence . Key . ChapterNum ;
230- chapterScores . AddScore ( book , chapter , score ) ;
231- bookScores . AddScore ( book , score ) ;
263+ _chapterScores . AddScore ( book , chapter , score ) ;
264+ _bookScores . AddScore ( book , score ) ;
232265 }
233266 }
234267 }
0 commit comments