Skip to content

Commit 3dec5ac

Browse files
committed
Porting usability score computation from confidence scores
1 parent d1390f7 commit 3dec5ac

19 files changed

Lines changed: 526 additions & 0 deletions
Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using SIL.Machine.QualityEstimation.Scores;
5+
using SIL.Machine.QualityEstimation.Thresholds;
6+
using SIL.Machine.QualityEstimation.Usability;
7+
using SIL.Scripture;
8+
9+
namespace SIL.Machine.QualityEstimation
10+
{
11+
public class QualityEstimation
12+
{
13+
public BookThresholds BookThresholds { get; set; } = new BookThresholds();
14+
15+
public ChapterThresholds ChapterThresholds { get; set; } = new ChapterThresholds();
16+
17+
public VerseThresholds VerseThresholds { get; set; } = new VerseThresholds();
18+
19+
public UsabilityParameters Usable { get; set; } = UsabilityParameters.Usable;
20+
21+
public UsabilityParameters Unusable { get; set; } = UsabilityParameters.Unusable;
22+
23+
public List<BookUsability> UsabilityBooks { get; } = new List<BookUsability>();
24+
25+
public List<ChapterUsability> UsabilityChapters { get; } = new List<ChapterUsability>();
26+
27+
public List<SequenceUsability> UsabilitySequences { get; } = new List<SequenceUsability>();
28+
29+
public List<TxtFileUsability> UsabilityTxtFiles { get; } = new List<TxtFileUsability>();
30+
31+
public List<VerseUsability> UsabilityVerses { get; } = new List<VerseUsability>();
32+
33+
public double CalculateUsableProbability(double chrF3)
34+
{
35+
double usableWeight = Math.Exp(-Math.Pow(chrF3 - Usable.Mean, 2) / (2 * Usable.Variance)) * Usable.Count;
36+
double unusableWeight =
37+
Math.Exp(-Math.Pow(chrF3 - Unusable.Mean, 2) / (2 * Unusable.Variance)) * Unusable.Count;
38+
return usableWeight / (usableWeight + unusableWeight);
39+
}
40+
41+
public void ComputeBookUsability(BookScores bookScores)
42+
{
43+
foreach (string book in bookScores.Scores.Keys)
44+
{
45+
Score score = bookScores.GetScore(book);
46+
if (score is null)
47+
{
48+
continue;
49+
}
50+
51+
List<double> bookUsabilities = bookScores.GetVerseUsabilities(book);
52+
double averageProbability = bookUsabilities.Average();
53+
UsabilityBooks.Add(
54+
new BookUsability
55+
{
56+
Book = book,
57+
Usability = averageProbability,
58+
ProjectedChrF3 = score.ProjectedChrF3,
59+
Label = BookThresholds.ReturnLabel(averageProbability),
60+
}
61+
);
62+
}
63+
}
64+
65+
public void ComputeChapterUsability(ChapterScores chapterScores)
66+
{
67+
foreach (KeyValuePair<string, Dictionary<int, Score>> chapterScoresByBook in chapterScores.Scores)
68+
{
69+
string book = chapterScoresByBook.Key;
70+
foreach (int chapter in chapterScoresByBook.Value.Keys)
71+
{
72+
Score score = chapterScores.GetScore(book, chapter);
73+
if (score is null)
74+
{
75+
continue;
76+
}
77+
78+
List<double> chapterUsabilities = chapterScores.GetVerseUsabilities(book, chapter);
79+
double averageProbability = chapterUsabilities.Average();
80+
UsabilityChapters.Add(
81+
new ChapterUsability
82+
{
83+
Book = book,
84+
Chapter = chapter,
85+
Usability = averageProbability,
86+
ProjectedChrF3 = score.ProjectedChrF3,
87+
Label = ChapterThresholds.ReturnLabel(averageProbability),
88+
}
89+
);
90+
}
91+
}
92+
}
93+
94+
public void ComputeTxtFileUsability(TxtFileScores txtFileScores)
95+
{
96+
foreach (string targetDraftFileStem in txtFileScores.Scores.Keys)
97+
{
98+
Score score = txtFileScores.GetScore(targetDraftFileStem);
99+
if (score is null)
100+
{
101+
continue;
102+
}
103+
104+
List<double> txtFileUsabilities = txtFileScores.GetSequenceUsabilities(targetDraftFileStem);
105+
double averageProbability = txtFileUsabilities.Average();
106+
UsabilityTxtFiles.Add(
107+
new TxtFileUsability
108+
{
109+
TargetDraftFile = targetDraftFileStem,
110+
Usability = averageProbability,
111+
ProjectedChrF3 = score.ProjectedChrF3,
112+
Label = VerseThresholds.ReturnLabel(averageProbability),
113+
}
114+
);
115+
}
116+
}
117+
118+
public void ComputeUsableProportions(
119+
List<VerseScore> verseScores,
120+
ref ChapterScores chapterScores,
121+
ref BookScores bookScores
122+
)
123+
{
124+
foreach (VerseScore verseScore in verseScores.Where(v => v.VerseRef.VerseNum > 0))
125+
{
126+
double probability = CalculateUsableProbability(verseScore.ProjectedChrF3);
127+
chapterScores.AppendVerseUsability(
128+
verseScore.VerseRef.Book,
129+
verseScore.VerseRef.ChapterNum,
130+
probability
131+
);
132+
bookScores.AppendVerseUsability(verseScore.VerseRef.Book, probability);
133+
UsabilityVerses.Add(
134+
new VerseUsability
135+
{
136+
Book = verseScore.VerseRef.Book,
137+
Chapter = verseScore.VerseRef.ChapterNum,
138+
Verse = verseScore.VerseRef.Verse,
139+
Usability = probability,
140+
ProjectedChrF3 = verseScore.ProjectedChrF3,
141+
Label = VerseThresholds.ReturnLabel(probability),
142+
}
143+
);
144+
}
145+
146+
ComputeChapterUsability(chapterScores);
147+
ComputeBookUsability(bookScores);
148+
}
149+
150+
public void ComputeUsableProportions(List<SequenceScore> sequenceScores, ref TxtFileScores txtFileScores)
151+
{
152+
foreach (SequenceScore sequenceScore in sequenceScores)
153+
{
154+
double probability = CalculateUsableProbability(sequenceScore.ProjectedChrF3);
155+
txtFileScores.AppendSequenceUsability(sequenceScore.TargetDraftFileStem, probability);
156+
UsabilitySequences.Add(
157+
new SequenceUsability
158+
{
159+
TargetDraftFile = sequenceScore.TargetDraftFileStem,
160+
SequenceNumber = sequenceScore.SequenceNumber,
161+
Usability = probability,
162+
ProjectedChrF3 = sequenceScore.ProjectedChrF3,
163+
Label = VerseThresholds.ReturnLabel(probability),
164+
}
165+
);
166+
}
167+
168+
ComputeTxtFileUsability(txtFileScores);
169+
}
170+
171+
public void EstimateQuality(double slope, double intercept, Dictionary<string, double> confidences)
172+
{
173+
var sequenceScores = new List<SequenceScore>();
174+
var txtFileScores = new TxtFileScores();
175+
ProjectChrF3(slope, intercept, confidences, ref sequenceScores, ref txtFileScores);
176+
ComputeUsableProportions(sequenceScores, ref txtFileScores);
177+
}
178+
179+
public void EstimateQuality(double slope, double intercept, Dictionary<VerseRef, double> confidences)
180+
{
181+
var verseScores = new List<VerseScore>();
182+
var chapterScores = new ChapterScores();
183+
var bookScores = new BookScores();
184+
ProjectChrF3(slope, intercept, confidences, ref verseScores, ref chapterScores, ref bookScores);
185+
ComputeUsableProportions(verseScores, ref chapterScores, ref bookScores);
186+
}
187+
188+
public void ProjectChrF3(
189+
double slope,
190+
double intercept,
191+
Dictionary<string, double> confidences,
192+
ref List<SequenceScore> sequenceScores,
193+
ref TxtFileScores txtFileScores
194+
)
195+
{
196+
foreach (KeyValuePair<string, double> confidence in confidences)
197+
{
198+
string[] keyParts = confidence.Key.Split(':');
199+
if (keyParts.Length == 2 && int.TryParse(keyParts[1], out int sequenceNumber))
200+
{
201+
string targetDraftFileStem = keyParts[0];
202+
var score = new SequenceScore(
203+
slope,
204+
confidence.Value,
205+
intercept,
206+
sequenceNumber,
207+
targetDraftFileStem
208+
);
209+
sequenceScores.Add(score);
210+
txtFileScores.AddScore(targetDraftFileStem, score);
211+
}
212+
}
213+
}
214+
215+
public void ProjectChrF3(
216+
double slope,
217+
double intercept,
218+
Dictionary<VerseRef, double> confidences,
219+
ref List<VerseScore> verseScores,
220+
ref ChapterScores chapterScores,
221+
ref BookScores bookScores
222+
)
223+
{
224+
foreach (KeyValuePair<VerseRef, double> confidence in confidences)
225+
{
226+
var score = new VerseScore(slope, confidence.Value, intercept, confidence.Key);
227+
verseScores.Add(score);
228+
string book = confidence.Key.Book;
229+
int chapter = confidence.Key.ChapterNum;
230+
chapterScores.AddScore(book, chapter, score);
231+
bookScores.AddScore(book, score);
232+
}
233+
}
234+
}
235+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
using System.Collections.Generic;
2+
3+
namespace SIL.Machine.QualityEstimation.Scores
4+
{
5+
public class BookScores
6+
{
7+
private readonly Dictionary<string, List<double>> _verseUsabilities = new Dictionary<string, List<double>>();
8+
9+
public readonly Dictionary<string, Score> Scores = new Dictionary<string, Score>();
10+
11+
public void AddScore(string book, Score score) => Scores[book] = score;
12+
13+
public Score GetScore(string book) => Scores.TryGetValue(book, out Score score) ? score : null;
14+
15+
public void AppendVerseUsability(string book, double usability)
16+
{
17+
if (!_verseUsabilities.TryGetValue(book, out List<double> list))
18+
{
19+
list = new List<double>();
20+
_verseUsabilities[book] = list;
21+
}
22+
23+
list.Add(usability);
24+
}
25+
26+
public List<double> GetVerseUsabilities(string book) =>
27+
_verseUsabilities.TryGetValue(book, out List<double> list) ? new List<double>(list) : new List<double>();
28+
}
29+
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
using System.Collections.Generic;
2+
3+
namespace SIL.Machine.QualityEstimation.Scores
4+
{
5+
public class ChapterScores
6+
{
7+
private readonly Dictionary<string, Dictionary<int, List<double>>> _verseUsabilities =
8+
new Dictionary<string, Dictionary<int, List<double>>>();
9+
10+
public readonly Dictionary<string, Dictionary<int, Score>> Scores =
11+
new Dictionary<string, Dictionary<int, Score>>();
12+
13+
public void AddScore(string book, int chapter, Score score)
14+
{
15+
if (!Scores.TryGetValue(book, out Dictionary<int, Score> chapters))
16+
{
17+
chapters = new Dictionary<int, Score>();
18+
Scores[book] = chapters;
19+
}
20+
21+
chapters[chapter] = score;
22+
}
23+
24+
public Score GetScore(string book, int chapter) =>
25+
Scores.TryGetValue(book, out Dictionary<int, Score> chapters)
26+
&& chapters.TryGetValue(chapter, out Score score)
27+
? score
28+
: null;
29+
30+
public void AppendVerseUsability(string book, int chapter, double usability)
31+
{
32+
if (!_verseUsabilities.TryGetValue(book, out Dictionary<int, List<double>> chapters))
33+
{
34+
chapters = new Dictionary<int, List<double>>();
35+
_verseUsabilities[book] = chapters;
36+
}
37+
38+
if (!chapters.TryGetValue(chapter, out List<double> list))
39+
{
40+
list = new List<double>();
41+
chapters[chapter] = list;
42+
}
43+
44+
list.Add(usability);
45+
}
46+
47+
public List<double> GetVerseUsabilities(string book, int chapter) =>
48+
_verseUsabilities.TryGetValue(book, out Dictionary<int, List<double>> chapters)
49+
&& chapters.TryGetValue(chapter, out List<double> list)
50+
? new List<double>(list)
51+
: new List<double>();
52+
}
53+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
namespace SIL.Machine.QualityEstimation.Scores
2+
{
3+
public class Score
4+
{
5+
public Score(double slope, double confidence, double intercept)
6+
{
7+
Confidence = confidence;
8+
ProjectedChrF3 = slope * confidence + intercept;
9+
}
10+
11+
public double Confidence { get; }
12+
13+
public double ProjectedChrF3 { get; }
14+
}
15+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
namespace SIL.Machine.QualityEstimation.Scores
2+
{
3+
public class SequenceScore : Score
4+
{
5+
public SequenceScore(
6+
double slope,
7+
double confidence,
8+
double intercept,
9+
int sequenceNumber,
10+
string targetDraftFileStem
11+
)
12+
: base(slope, confidence, intercept)
13+
{
14+
SequenceNumber = sequenceNumber;
15+
TargetDraftFileStem = targetDraftFileStem;
16+
}
17+
18+
public int SequenceNumber { get; }
19+
public string TargetDraftFileStem { get; }
20+
}
21+
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
using System.Collections.Generic;
2+
3+
namespace SIL.Machine.QualityEstimation.Scores
4+
{
5+
public class TxtFileScores
6+
{
7+
private readonly Dictionary<string, List<double>> _sequenceUsabilities = new Dictionary<string, List<double>>();
8+
9+
public readonly Dictionary<string, Score> Scores = new Dictionary<string, Score>();
10+
11+
public void AddScore(string targetDraftFileStem, Score score) => Scores[targetDraftFileStem] = score;
12+
13+
public Score GetScore(string targetDraftFileStem) =>
14+
Scores.TryGetValue(targetDraftFileStem, out Score score) ? score : null;
15+
16+
public void AppendSequenceUsability(string targetDraftFileStem, double usability)
17+
{
18+
if (!_sequenceUsabilities.TryGetValue(targetDraftFileStem, out List<double> list))
19+
{
20+
list = new List<double>();
21+
_sequenceUsabilities[targetDraftFileStem] = list;
22+
}
23+
24+
list.Add(usability);
25+
}
26+
27+
public List<double> GetSequenceUsabilities(string targetDraftFileStem) =>
28+
_sequenceUsabilities.TryGetValue(targetDraftFileStem, out List<double> list)
29+
? new List<double>(list)
30+
: new List<double>();
31+
}
32+
}

0 commit comments

Comments
 (0)