Skip to content

Commit 5e5db10

Browse files
committed
Refactored quality estimation
1 parent 3dec5ac commit 5e5db10

6 files changed

Lines changed: 124 additions & 112 deletions

File tree

src/SIL.Machine/QualityEstimation/QualityEstimation.cs

Lines changed: 104 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -2,53 +2,120 @@
22
using System.Collections.Generic;
33
using System.Linq;
44
using SIL.Machine.QualityEstimation.Scores;
5-
using SIL.Machine.QualityEstimation.Thresholds;
65
using SIL.Machine.QualityEstimation.Usability;
76
using SIL.Scripture;
87

98
namespace SIL.Machine.QualityEstimation
109
{
10+
/// <summary>
11+
/// Provides chrF3 quality estimation support for pre-translations.
12+
/// </summary>
1113
public class QualityEstimation
1214
{
13-
public BookThresholds BookThresholds { get; set; } = new BookThresholds();
15+
private readonly BookScores _bookScores = new BookScores();
16+
private readonly ChapterScores _chapterScores = new ChapterScores();
17+
private readonly double _intercept;
18+
private readonly List<SequenceScore> _sequenceScores = new List<SequenceScore>();
19+
private readonly double _slope;
20+
private readonly TxtFileScores _txtFileScores = new TxtFileScores();
21+
private readonly List<VerseScore> _verseScores = new List<VerseScore>();
1422

15-
public ChapterThresholds ChapterThresholds { get; set; } = new ChapterThresholds();
23+
public QualityEstimation(double slope, double intercept)
24+
{
25+
_slope = slope;
26+
_intercept = intercept;
27+
}
28+
29+
/// <summary>
30+
/// The threshold values used to calculate the usability label for every book.
31+
/// </summary>
32+
public Thresholds BookThresholds { get; set; } = new Thresholds(greenThreshold: 0.745, yellowThreshold: 0.62);
33+
34+
/// <summary>
35+
/// The threshold values used to calculate the usability label for every chapter.
36+
/// </summary>
37+
public Thresholds ChapterThresholds { get; set; } =
38+
new Thresholds(greenThreshold: 0.745, yellowThreshold: 0.62);
1639

17-
public VerseThresholds VerseThresholds { get; set; } = new VerseThresholds();
40+
/// <summary>
41+
/// The threshold values used to calculate the usability label for every verse.
42+
/// </summary>
43+
public Thresholds VerseThresholds { get; set; } = new Thresholds(greenThreshold: 0.745, yellowThreshold: 0.62);
1844

45+
/// <summary>
46+
/// The usable parameters to calculate the usable probabilities.
47+
/// </summary>
1948
public UsabilityParameters Usable { get; set; } = UsabilityParameters.Usable;
2049

50+
/// <summary>
51+
/// The unusable parameters to calculate the usable probabilities.
52+
/// </summary>
2153
public UsabilityParameters Unusable { get; set; } = UsabilityParameters.Unusable;
2254

55+
/// <summary>
56+
/// The usability scores for every book.
57+
/// </summary>
2358
public List<BookUsability> UsabilityBooks { get; } = new List<BookUsability>();
2459

60+
/// <summary>
61+
/// The usability scores for every chapter.
62+
/// </summary>
2563
public List<ChapterUsability> UsabilityChapters { get; } = new List<ChapterUsability>();
2664

65+
/// <summary>
66+
/// The usability scores for every line in a text file.
67+
/// </summary>
2768
public List<SequenceUsability> UsabilitySequences { get; } = new List<SequenceUsability>();
2869

70+
/// <summary>
71+
/// The usability scores for every text file.
72+
/// </summary>
2973
public List<TxtFileUsability> UsabilityTxtFiles { get; } = new List<TxtFileUsability>();
3074

75+
/// <summary>
76+
/// The usability scores for every verse.
77+
/// </summary>
3178
public List<VerseUsability> UsabilityVerses { get; } = new List<VerseUsability>();
3279

33-
public double CalculateUsableProbability(double chrF3)
80+
/// <summary>
81+
/// Estimate the quality of the pre-translations from text files.
82+
/// </summary>
83+
/// <param name="confidences">The confidence values.</param>
84+
public void EstimateQuality(Dictionary<string, double> confidences)
85+
{
86+
ProjectChrF3(confidences);
87+
ComputeUsableProportionsForTxtFiles();
88+
}
89+
90+
/// <summary>
91+
/// Estimate the quality of the pre-translations from USFM files.
92+
/// </summary>
93+
/// <param name="confidences">The confidence values.</param>
94+
public void EstimateQuality(Dictionary<VerseRef, double> confidences)
95+
{
96+
ProjectChrF3(confidences);
97+
ComputeUsableProportionsForVerses();
98+
}
99+
100+
private double CalculateUsableProbability(double chrF3)
34101
{
35102
double usableWeight = Math.Exp(-Math.Pow(chrF3 - Usable.Mean, 2) / (2 * Usable.Variance)) * Usable.Count;
36103
double unusableWeight =
37104
Math.Exp(-Math.Pow(chrF3 - Unusable.Mean, 2) / (2 * Unusable.Variance)) * Unusable.Count;
38105
return usableWeight / (usableWeight + unusableWeight);
39106
}
40107

41-
public void ComputeBookUsability(BookScores bookScores)
108+
private void ComputeBookUsability()
42109
{
43-
foreach (string book in bookScores.Scores.Keys)
110+
foreach (string book in _bookScores.Scores.Keys)
44111
{
45-
Score score = bookScores.GetScore(book);
112+
Score score = _bookScores.GetScore(book);
46113
if (score is null)
47114
{
48115
continue;
49116
}
50117

51-
List<double> bookUsabilities = bookScores.GetVerseUsabilities(book);
118+
List<double> bookUsabilities = _bookScores.GetVerseUsabilities(book);
52119
double averageProbability = bookUsabilities.Average();
53120
UsabilityBooks.Add(
54121
new BookUsability
@@ -62,20 +129,20 @@ public void ComputeBookUsability(BookScores bookScores)
62129
}
63130
}
64131

65-
public void ComputeChapterUsability(ChapterScores chapterScores)
132+
public void ComputeChapterUsability()
66133
{
67-
foreach (KeyValuePair<string, Dictionary<int, Score>> chapterScoresByBook in chapterScores.Scores)
134+
foreach (KeyValuePair<string, Dictionary<int, Score>> chapterScoresByBook in _chapterScores.Scores)
68135
{
69136
string book = chapterScoresByBook.Key;
70137
foreach (int chapter in chapterScoresByBook.Value.Keys)
71138
{
72-
Score score = chapterScores.GetScore(book, chapter);
139+
Score score = _chapterScores.GetScore(book, chapter);
73140
if (score is null)
74141
{
75142
continue;
76143
}
77144

78-
List<double> chapterUsabilities = chapterScores.GetVerseUsabilities(book, chapter);
145+
List<double> chapterUsabilities = _chapterScores.GetVerseUsabilities(book, chapter);
79146
double averageProbability = chapterUsabilities.Average();
80147
UsabilityChapters.Add(
81148
new ChapterUsability
@@ -91,17 +158,17 @@ public void ComputeChapterUsability(ChapterScores chapterScores)
91158
}
92159
}
93160

94-
public void ComputeTxtFileUsability(TxtFileScores txtFileScores)
161+
private void ComputeTxtFileUsability()
95162
{
96-
foreach (string targetDraftFileStem in txtFileScores.Scores.Keys)
163+
foreach (string targetDraftFileStem in _txtFileScores.Scores.Keys)
97164
{
98-
Score score = txtFileScores.GetScore(targetDraftFileStem);
165+
Score score = _txtFileScores.GetScore(targetDraftFileStem);
99166
if (score is null)
100167
{
101168
continue;
102169
}
103170

104-
List<double> txtFileUsabilities = txtFileScores.GetSequenceUsabilities(targetDraftFileStem);
171+
List<double> txtFileUsabilities = _txtFileScores.GetSequenceUsabilities(targetDraftFileStem);
105172
double averageProbability = txtFileUsabilities.Average();
106173
UsabilityTxtFiles.Add(
107174
new TxtFileUsability
@@ -115,21 +182,17 @@ public void ComputeTxtFileUsability(TxtFileScores txtFileScores)
115182
}
116183
}
117184

118-
public void ComputeUsableProportions(
119-
List<VerseScore> verseScores,
120-
ref ChapterScores chapterScores,
121-
ref BookScores bookScores
122-
)
185+
private void ComputeUsableProportionsForVerses()
123186
{
124-
foreach (VerseScore verseScore in verseScores.Where(v => v.VerseRef.VerseNum > 0))
187+
foreach (VerseScore verseScore in _verseScores.Where(v => v.VerseRef.VerseNum > 0))
125188
{
126189
double probability = CalculateUsableProbability(verseScore.ProjectedChrF3);
127-
chapterScores.AppendVerseUsability(
190+
_chapterScores.AppendVerseUsability(
128191
verseScore.VerseRef.Book,
129192
verseScore.VerseRef.ChapterNum,
130193
probability
131194
);
132-
bookScores.AppendVerseUsability(verseScore.VerseRef.Book, probability);
195+
_bookScores.AppendVerseUsability(verseScore.VerseRef.Book, probability);
133196
UsabilityVerses.Add(
134197
new VerseUsability
135198
{
@@ -143,16 +206,16 @@ ref BookScores bookScores
143206
);
144207
}
145208

146-
ComputeChapterUsability(chapterScores);
147-
ComputeBookUsability(bookScores);
209+
ComputeChapterUsability();
210+
ComputeBookUsability();
148211
}
149212

150-
public void ComputeUsableProportions(List<SequenceScore> sequenceScores, ref TxtFileScores txtFileScores)
213+
private void ComputeUsableProportionsForTxtFiles()
151214
{
152-
foreach (SequenceScore sequenceScore in sequenceScores)
215+
foreach (SequenceScore sequenceScore in _sequenceScores)
153216
{
154217
double probability = CalculateUsableProbability(sequenceScore.ProjectedChrF3);
155-
txtFileScores.AppendSequenceUsability(sequenceScore.TargetDraftFileStem, probability);
218+
_txtFileScores.AppendSequenceUsability(sequenceScore.TargetDraftFileStem, probability);
156219
UsabilitySequences.Add(
157220
new SequenceUsability
158221
{
@@ -165,33 +228,10 @@ public void ComputeUsableProportions(List<SequenceScore> sequenceScores, ref Txt
165228
);
166229
}
167230

168-
ComputeTxtFileUsability(txtFileScores);
169-
}
170-
171-
public void EstimateQuality(double slope, double intercept, Dictionary<string, double> confidences)
172-
{
173-
var sequenceScores = new List<SequenceScore>();
174-
var txtFileScores = new TxtFileScores();
175-
ProjectChrF3(slope, intercept, confidences, ref sequenceScores, ref txtFileScores);
176-
ComputeUsableProportions(sequenceScores, ref txtFileScores);
177-
}
178-
179-
public void EstimateQuality(double slope, double intercept, Dictionary<VerseRef, double> confidences)
180-
{
181-
var verseScores = new List<VerseScore>();
182-
var chapterScores = new ChapterScores();
183-
var bookScores = new BookScores();
184-
ProjectChrF3(slope, intercept, confidences, ref verseScores, ref chapterScores, ref bookScores);
185-
ComputeUsableProportions(verseScores, ref chapterScores, ref bookScores);
231+
ComputeTxtFileUsability();
186232
}
187233

188-
public void ProjectChrF3(
189-
double slope,
190-
double intercept,
191-
Dictionary<string, double> confidences,
192-
ref List<SequenceScore> sequenceScores,
193-
ref TxtFileScores txtFileScores
194-
)
234+
private void ProjectChrF3(Dictionary<string, double> confidences)
195235
{
196236
foreach (KeyValuePair<string, double> confidence in confidences)
197237
{
@@ -200,35 +240,28 @@ ref TxtFileScores txtFileScores
200240
{
201241
string targetDraftFileStem = keyParts[0];
202242
var score = new SequenceScore(
203-
slope,
243+
_slope,
204244
confidence.Value,
205-
intercept,
245+
_intercept,
206246
sequenceNumber,
207247
targetDraftFileStem
208248
);
209-
sequenceScores.Add(score);
210-
txtFileScores.AddScore(targetDraftFileStem, score);
249+
_sequenceScores.Add(score);
250+
_txtFileScores.AddScore(targetDraftFileStem, score);
211251
}
212252
}
213253
}
214254

215-
public void ProjectChrF3(
216-
double slope,
217-
double intercept,
218-
Dictionary<VerseRef, double> confidences,
219-
ref List<VerseScore> verseScores,
220-
ref ChapterScores chapterScores,
221-
ref BookScores bookScores
222-
)
255+
private void ProjectChrF3(Dictionary<VerseRef, double> confidences)
223256
{
224257
foreach (KeyValuePair<VerseRef, double> confidence in confidences)
225258
{
226-
var score = new VerseScore(slope, confidence.Value, intercept, confidence.Key);
227-
verseScores.Add(score);
259+
var score = new VerseScore(_slope, confidence.Value, _intercept, confidence.Key);
260+
_verseScores.Add(score);
228261
string book = confidence.Key.Book;
229262
int chapter = confidence.Key.ChapterNum;
230-
chapterScores.AddScore(book, chapter, score);
231-
bookScores.AddScore(book, score);
263+
_chapterScores.AddScore(book, chapter, score);
264+
_bookScores.AddScore(book, score);
232265
}
233266
}
234267
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
namespace SIL.Machine.QualityEstimation
2+
{
3+
public class Thresholds
4+
{
5+
public Thresholds(double greenThreshold, double yellowThreshold)
6+
{
7+
GreenThreshold = greenThreshold;
8+
YellowThreshold = yellowThreshold;
9+
}
10+
11+
public double GreenThreshold { get; }
12+
13+
public double YellowThreshold { get; }
14+
15+
public UsabilityLabel ReturnLabel(double probability) =>
16+
probability >= GreenThreshold ? UsabilityLabel.Green
17+
: probability >= YellowThreshold ? UsabilityLabel.Yellow
18+
: UsabilityLabel.Red;
19+
}
20+
}

src/SIL.Machine/QualityEstimation/Thresholds/BookThresholds.cs

Lines changed: 0 additions & 9 deletions
This file was deleted.

src/SIL.Machine/QualityEstimation/Thresholds/ChapterThresholds.cs

Lines changed: 0 additions & 9 deletions
This file was deleted.

src/SIL.Machine/QualityEstimation/Thresholds/Thresholds.cs

Lines changed: 0 additions & 14 deletions
This file was deleted.

src/SIL.Machine/QualityEstimation/Thresholds/VerseThresholds.cs

Lines changed: 0 additions & 9 deletions
This file was deleted.

0 commit comments

Comments
 (0)