@@ -84,7 +84,7 @@ public QualityEstimation(double slope, double intercept)
8484 public void EstimateQuality ( Dictionary < string , double > confidences )
8585 {
8686 ProjectChrF3 ( confidences ) ;
87- ComputeUsableProportionsForTxtFiles ( ) ;
87+ ComputeSequenceUsability ( ) ;
8888 }
8989
9090 /// <summary>
@@ -94,7 +94,23 @@ public void EstimateQuality(Dictionary<string, double> confidences)
9494 public void EstimateQuality ( Dictionary < VerseRef , double > confidences )
9595 {
9696 ProjectChrF3 ( confidences ) ;
97- ComputeUsableProportionsForVerses ( ) ;
97+ ComputeVerseUsability ( ) ;
98+ }
99+
100+ /// <summary>
101+ /// Calculates the geometric mean for a collection of values.
102+ /// </summary>
103+ /// <param name="values"></param>
104+ /// <returns>The geometric mean.</returns>
105+ private static double GeometricMean ( IList < double > values )
106+ {
107+ // Geometric mean requires positive values
108+ if ( values == null || ! values . Any ( ) || values . Any ( x => x <= 0 ) )
109+ return 0 ;
110+
111+ // Compute the sum of the natural logarithms of all values,
112+ // and divide by the count of numbers and take the exponential
113+ return Math . Exp ( values . Sum ( Math . Log ) / values . Count ) ;
98114 }
99115
100116 private double CalculateUsableProbability ( double chrF3 )
@@ -129,7 +145,7 @@ private void ComputeBookUsability()
129145 }
130146 }
131147
132- public void ComputeChapterUsability ( )
148+ private void ComputeChapterUsability ( )
133149 {
134150 foreach ( KeyValuePair < string , Dictionary < int , Score > > chapterScoresByBook in _chapterScores . Scores )
135151 {
@@ -182,7 +198,7 @@ private void ComputeTxtFileUsability()
182198 }
183199 }
184200
185- private void ComputeUsableProportionsForVerses ( )
201+ private void ComputeVerseUsability ( )
186202 {
187203 foreach ( VerseScore verseScore in _verseScores . Where ( v => v . VerseRef . VerseNum > 0 ) )
188204 {
@@ -210,7 +226,7 @@ private void ComputeUsableProportionsForVerses()
210226 ComputeBookUsability ( ) ;
211227 }
212228
213- private void ComputeUsableProportionsForTxtFiles ( )
229+ private void ComputeSequenceUsability ( )
214230 {
215231 foreach ( SequenceScore sequenceScore in _sequenceScores )
216232 {
@@ -233,6 +249,7 @@ private void ComputeUsableProportionsForTxtFiles()
233249
234250 private void ProjectChrF3 ( Dictionary < string , double > confidences )
235251 {
252+ var confidencesByTxtFile = new Dictionary < string , List < double > > ( ) ;
236253 foreach ( KeyValuePair < string , double > confidence in confidences )
237254 {
238255 string [ ] keyParts = confidence . Key . Split ( ':' ) ;
@@ -247,21 +264,82 @@ private void ProjectChrF3(Dictionary<string, double> confidences)
247264 targetDraftFileStem
248265 ) ;
249266 _sequenceScores . Add ( score ) ;
250- _txtFileScores . AddScore ( targetDraftFileStem , score ) ;
267+
268+ // Record the confidence by text file
269+ if ( ! confidencesByTxtFile . TryGetValue ( targetDraftFileStem , out List < double > txtFileConfidences ) )
270+ {
271+ txtFileConfidences = new List < double > ( ) ;
272+ confidencesByTxtFile [ targetDraftFileStem ] = txtFileConfidences ;
273+ }
274+
275+ txtFileConfidences . Add ( confidence . Value ) ;
251276 }
252277 }
278+
279+ foreach ( KeyValuePair < string , List < double > > txtFileConfidences in confidencesByTxtFile )
280+ {
281+ _txtFileScores . AddScore (
282+ txtFileConfidences . Key ,
283+ new Score ( _slope , confidence : GeometricMean ( txtFileConfidences . Value ) , _intercept )
284+ ) ;
285+ }
253286 }
254287
255288 private void ProjectChrF3 ( Dictionary < VerseRef , double > confidences )
256289 {
290+ var confidencesByBook = new Dictionary < string , List < double > > ( ) ;
291+ var confidencesByBookAndChapter = new Dictionary < ( string , int ) , List < double > > ( ) ;
257292 foreach ( KeyValuePair < VerseRef , double > confidence in confidences )
258293 {
259294 var score = new VerseScore ( _slope , confidence . Value , _intercept , confidence . Key ) ;
260295 _verseScores . Add ( score ) ;
261296 string book = confidence . Key . Book ;
262297 int chapter = confidence . Key . ChapterNum ;
263- _chapterScores . AddScore ( book , chapter , score ) ;
264- _bookScores . AddScore ( book , score ) ;
298+
299+ // Record the confidence by and chapter
300+ if (
301+ ! confidencesByBookAndChapter . TryGetValue (
302+ ( book , chapter ) ,
303+ out List < double > bookAndChapterConfidences
304+ )
305+ )
306+ {
307+ bookAndChapterConfidences = new List < double > ( ) ;
308+ confidencesByBookAndChapter [ ( book , chapter ) ] = bookAndChapterConfidences ;
309+ }
310+
311+ bookAndChapterConfidences . Add ( confidence . Value ) ;
312+
313+ // Record the confidence by book
314+ if ( ! confidencesByBook . TryGetValue ( book , out List < double > bookConfidences ) )
315+ {
316+ bookConfidences = new List < double > ( ) ;
317+ confidencesByBook [ book ] = bookConfidences ;
318+ }
319+
320+ bookConfidences . Add ( confidence . Value ) ;
321+ }
322+
323+ foreach ( KeyValuePair < string , List < double > > bookConfidences in confidencesByBook )
324+ {
325+ _bookScores . AddScore (
326+ bookConfidences . Key ,
327+ new Score ( _slope , confidence : GeometricMean ( bookConfidences . Value ) , _intercept )
328+ ) ;
329+ }
330+
331+ foreach (
332+ KeyValuePair <
333+ ( string Book , int Chapter ) ,
334+ List < double >
335+ > bookAndChapterConfidences in confidencesByBookAndChapter
336+ )
337+ {
338+ _chapterScores . AddScore (
339+ bookAndChapterConfidences . Key . Book ,
340+ bookAndChapterConfidences . Key . Chapter ,
341+ new Score ( _slope , confidence : GeometricMean ( bookAndChapterConfidences . Value ) , _intercept )
342+ ) ;
265343 }
266344 }
267345 }
0 commit comments