Skip to content

Commit c477a48

Browse files
committed
Add calculation of book and chapter confidences by geometric mean
1 parent 5e5db10 commit c477a48

2 files changed

Lines changed: 142 additions & 8 deletions

File tree

src/SIL.Machine/QualityEstimation/QualityEstimation.cs

Lines changed: 86 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ public QualityEstimation(double slope, double intercept)
8484
public void EstimateQuality(Dictionary<string, double> confidences)
8585
{
8686
ProjectChrF3(confidences);
87-
ComputeUsableProportionsForTxtFiles();
87+
ComputeSequenceUsability();
8888
}
8989

9090
/// <summary>
@@ -94,7 +94,23 @@ public void EstimateQuality(Dictionary<string, double> confidences)
9494
public void EstimateQuality(Dictionary<VerseRef, double> confidences)
9595
{
9696
ProjectChrF3(confidences);
97-
ComputeUsableProportionsForVerses();
97+
ComputeVerseUsability();
98+
}
99+
100+
/// <summary>
101+
/// Calculates the geometric mean for a collection of values.
102+
/// </summary>
103+
/// <param name="values"></param>
104+
/// <returns>The geometric mean.</returns>
105+
private static double GeometricMean(IList<double> values)
106+
{
107+
// Geometric mean requires positive values
108+
if (values == null || !values.Any() || values.Any(x => x <= 0))
109+
return 0;
110+
111+
// Compute the sum of the natural logarithms of all values,
112+
// and divide by the count of numbers and take the exponential
113+
return Math.Exp(values.Sum(Math.Log) / values.Count);
98114
}
99115

100116
private double CalculateUsableProbability(double chrF3)
@@ -129,7 +145,7 @@ private void ComputeBookUsability()
129145
}
130146
}
131147

132-
public void ComputeChapterUsability()
148+
private void ComputeChapterUsability()
133149
{
134150
foreach (KeyValuePair<string, Dictionary<int, Score>> chapterScoresByBook in _chapterScores.Scores)
135151
{
@@ -182,7 +198,7 @@ private void ComputeTxtFileUsability()
182198
}
183199
}
184200

185-
private void ComputeUsableProportionsForVerses()
201+
private void ComputeVerseUsability()
186202
{
187203
foreach (VerseScore verseScore in _verseScores.Where(v => v.VerseRef.VerseNum > 0))
188204
{
@@ -210,7 +226,7 @@ private void ComputeUsableProportionsForVerses()
210226
ComputeBookUsability();
211227
}
212228

213-
private void ComputeUsableProportionsForTxtFiles()
229+
private void ComputeSequenceUsability()
214230
{
215231
foreach (SequenceScore sequenceScore in _sequenceScores)
216232
{
@@ -233,6 +249,7 @@ private void ComputeUsableProportionsForTxtFiles()
233249

234250
private void ProjectChrF3(Dictionary<string, double> confidences)
235251
{
252+
var confidencesByTxtFile = new Dictionary<string, List<double>>();
236253
foreach (KeyValuePair<string, double> confidence in confidences)
237254
{
238255
string[] keyParts = confidence.Key.Split(':');
@@ -247,21 +264,82 @@ private void ProjectChrF3(Dictionary<string, double> confidences)
247264
targetDraftFileStem
248265
);
249266
_sequenceScores.Add(score);
250-
_txtFileScores.AddScore(targetDraftFileStem, score);
267+
268+
// Record the confidence by text file
269+
if (!confidencesByTxtFile.TryGetValue(targetDraftFileStem, out List<double> txtFileConfidences))
270+
{
271+
txtFileConfidences = new List<double>();
272+
confidencesByTxtFile[targetDraftFileStem] = txtFileConfidences;
273+
}
274+
275+
txtFileConfidences.Add(confidence.Value);
251276
}
252277
}
278+
279+
foreach (KeyValuePair<string, List<double>> txtFileConfidences in confidencesByTxtFile)
280+
{
281+
_txtFileScores.AddScore(
282+
txtFileConfidences.Key,
283+
new Score(_slope, confidence: GeometricMean(txtFileConfidences.Value), _intercept)
284+
);
285+
}
253286
}
254287

255288
private void ProjectChrF3(Dictionary<VerseRef, double> confidences)
256289
{
290+
var confidencesByBook = new Dictionary<string, List<double>>();
291+
var confidencesByBookAndChapter = new Dictionary<(string, int), List<double>>();
257292
foreach (KeyValuePair<VerseRef, double> confidence in confidences)
258293
{
259294
var score = new VerseScore(_slope, confidence.Value, _intercept, confidence.Key);
260295
_verseScores.Add(score);
261296
string book = confidence.Key.Book;
262297
int chapter = confidence.Key.ChapterNum;
263-
_chapterScores.AddScore(book, chapter, score);
264-
_bookScores.AddScore(book, score);
298+
299+
// Record the confidence by and chapter
300+
if (
301+
!confidencesByBookAndChapter.TryGetValue(
302+
(book, chapter),
303+
out List<double> bookAndChapterConfidences
304+
)
305+
)
306+
{
307+
bookAndChapterConfidences = new List<double>();
308+
confidencesByBookAndChapter[(book, chapter)] = bookAndChapterConfidences;
309+
}
310+
311+
bookAndChapterConfidences.Add(confidence.Value);
312+
313+
// Record the confidence by book
314+
if (!confidencesByBook.TryGetValue(book, out List<double> bookConfidences))
315+
{
316+
bookConfidences = new List<double>();
317+
confidencesByBook[book] = bookConfidences;
318+
}
319+
320+
bookConfidences.Add(confidence.Value);
321+
}
322+
323+
foreach (KeyValuePair<string, List<double>> bookConfidences in confidencesByBook)
324+
{
325+
_bookScores.AddScore(
326+
bookConfidences.Key,
327+
new Score(_slope, confidence: GeometricMean(bookConfidences.Value), _intercept)
328+
);
329+
}
330+
331+
foreach (
332+
KeyValuePair<
333+
(string Book, int Chapter),
334+
List<double>
335+
> bookAndChapterConfidences in confidencesByBookAndChapter
336+
)
337+
{
338+
_chapterScores.AddScore(
339+
bookAndChapterConfidences.Key.Book,
340+
bookAndChapterConfidences.Key.Chapter,
341+
new Score(_slope, confidence: GeometricMean(bookAndChapterConfidences.Value), _intercept)
342+
);
265343
}
266344
}
267345
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
using NUnit.Framework;
2+
using SIL.Scripture;
3+
4+
namespace SIL.Machine.QualityEstimation;
5+
6+
[TestFixture]
7+
public class QualityEstimationTests
8+
{
9+
[Test]
10+
public void QualityEstimation_TxtFiles()
11+
{
12+
var qualityEstimation = new QualityEstimation(slope: 0.6, intercept: 1.0);
13+
var confidences = new Dictionary<string, double>
14+
{
15+
["MAT.txt:1"] = 85.0,
16+
["MAT.txt:2"] = 80.0,
17+
["MRK.txt:1"] = 60.0,
18+
};
19+
qualityEstimation.EstimateQuality(confidences);
20+
using (Assert.EnterMultipleScope())
21+
{
22+
Assert.That(qualityEstimation.UsabilitySequences, Has.Count.EqualTo(3));
23+
Assert.That(qualityEstimation.UsabilitySequences[0].Label, Is.EqualTo(UsabilityLabel.Green));
24+
Assert.That(qualityEstimation.UsabilitySequences[1].Label, Is.EqualTo(UsabilityLabel.Yellow));
25+
Assert.That(qualityEstimation.UsabilitySequences[2].Label, Is.EqualTo(UsabilityLabel.Red));
26+
Assert.That(qualityEstimation.UsabilityTxtFiles, Has.Count.EqualTo(2));
27+
Assert.That(qualityEstimation.UsabilityTxtFiles[0].Label, Is.EqualTo(UsabilityLabel.Green));
28+
Assert.That(qualityEstimation.UsabilityTxtFiles[1].Label, Is.EqualTo(UsabilityLabel.Red));
29+
}
30+
}
31+
32+
[Test]
33+
public void QualityEstimation_Verses()
34+
{
35+
var qualityEstimation = new QualityEstimation(slope: 0.6, intercept: 1.0);
36+
var confidences = new Dictionary<VerseRef, double>
37+
{
38+
[new VerseRef(1, 1, 1)] = 85.0,
39+
[new VerseRef(1, 1, 2)] = 80.0,
40+
[new VerseRef(1, 2, 1)] = 60.0,
41+
};
42+
qualityEstimation.EstimateQuality(confidences);
43+
using (Assert.EnterMultipleScope())
44+
{
45+
Assert.That(qualityEstimation.UsabilityVerses, Has.Count.EqualTo(3));
46+
Assert.That(qualityEstimation.UsabilityVerses[0].Label, Is.EqualTo(UsabilityLabel.Green));
47+
Assert.That(qualityEstimation.UsabilityVerses[1].Label, Is.EqualTo(UsabilityLabel.Yellow));
48+
Assert.That(qualityEstimation.UsabilityVerses[2].Label, Is.EqualTo(UsabilityLabel.Red));
49+
Assert.That(qualityEstimation.UsabilityChapters, Has.Count.EqualTo(2));
50+
Assert.That(qualityEstimation.UsabilityChapters[0].Label, Is.EqualTo(UsabilityLabel.Green));
51+
Assert.That(qualityEstimation.UsabilityChapters[1].Label, Is.EqualTo(UsabilityLabel.Red));
52+
Assert.That(qualityEstimation.UsabilityBooks, Has.Count.EqualTo(1));
53+
Assert.That(qualityEstimation.UsabilityBooks[0].Label, Is.EqualTo(UsabilityLabel.Yellow));
54+
}
55+
}
56+
}

0 commit comments

Comments
 (0)