|
| 1 | +using System; |
| 2 | +using System.Collections.Generic; |
| 3 | +using System.Linq; |
| 4 | +using SIL.Machine.QualityEstimation.Scores; |
| 5 | +using SIL.Machine.QualityEstimation.Thresholds; |
| 6 | +using SIL.Machine.QualityEstimation.Usability; |
| 7 | +using SIL.Scripture; |
| 8 | + |
| 9 | +namespace SIL.Machine.QualityEstimation |
| 10 | +{ |
| 11 | + public class QualityEstimation |
| 12 | + { |
| 13 | + public BookThresholds BookThresholds { get; set; } = new BookThresholds(); |
| 14 | + |
| 15 | + public ChapterThresholds ChapterThresholds { get; set; } = new ChapterThresholds(); |
| 16 | + |
| 17 | + public VerseThresholds VerseThresholds { get; set; } = new VerseThresholds(); |
| 18 | + |
| 19 | + public UsabilityParameters Usable { get; set; } = UsabilityParameters.Usable; |
| 20 | + |
| 21 | + public UsabilityParameters Unusable { get; set; } = UsabilityParameters.Unusable; |
| 22 | + |
| 23 | + public List<BookUsability> UsabilityBooks { get; } = new List<BookUsability>(); |
| 24 | + |
| 25 | + public List<ChapterUsability> UsabilityChapters { get; } = new List<ChapterUsability>(); |
| 26 | + |
| 27 | + public List<SequenceUsability> UsabilitySequences { get; } = new List<SequenceUsability>(); |
| 28 | + |
| 29 | + public List<TxtFileUsability> UsabilityTxtFiles { get; } = new List<TxtFileUsability>(); |
| 30 | + |
| 31 | + public List<VerseUsability> UsabilityVerses { get; } = new List<VerseUsability>(); |
| 32 | + |
| 33 | + public double CalculateUsableProbability(double chrF3) |
| 34 | + { |
| 35 | + double usableWeight = Math.Exp(-Math.Pow(chrF3 - Usable.Mean, 2) / (2 * Usable.Variance)) * Usable.Count; |
| 36 | + double unusableWeight = |
| 37 | + Math.Exp(-Math.Pow(chrF3 - Unusable.Mean, 2) / (2 * Unusable.Variance)) * Unusable.Count; |
| 38 | + return usableWeight / (usableWeight + unusableWeight); |
| 39 | + } |
| 40 | + |
| 41 | + public void ComputeBookUsability(BookScores bookScores) |
| 42 | + { |
| 43 | + foreach (string book in bookScores.Scores.Keys) |
| 44 | + { |
| 45 | + Score score = bookScores.GetScore(book); |
| 46 | + if (score is null) |
| 47 | + { |
| 48 | + continue; |
| 49 | + } |
| 50 | + |
| 51 | + List<double> bookUsabilities = bookScores.GetVerseUsabilities(book); |
| 52 | + double averageProbability = bookUsabilities.Average(); |
| 53 | + UsabilityBooks.Add( |
| 54 | + new BookUsability |
| 55 | + { |
| 56 | + Book = book, |
| 57 | + Usability = averageProbability, |
| 58 | + ProjectedChrF3 = score.ProjectedChrF3, |
| 59 | + Label = BookThresholds.ReturnLabel(averageProbability), |
| 60 | + } |
| 61 | + ); |
| 62 | + } |
| 63 | + } |
| 64 | + |
| 65 | + public void ComputeChapterUsability(ChapterScores chapterScores) |
| 66 | + { |
| 67 | + foreach (KeyValuePair<string, Dictionary<int, Score>> chapterScoresByBook in chapterScores.Scores) |
| 68 | + { |
| 69 | + string book = chapterScoresByBook.Key; |
| 70 | + foreach (int chapter in chapterScoresByBook.Value.Keys) |
| 71 | + { |
| 72 | + Score score = chapterScores.GetScore(book, chapter); |
| 73 | + if (score is null) |
| 74 | + { |
| 75 | + continue; |
| 76 | + } |
| 77 | + |
| 78 | + List<double> chapterUsabilities = chapterScores.GetVerseUsabilities(book, chapter); |
| 79 | + double averageProbability = chapterUsabilities.Average(); |
| 80 | + UsabilityChapters.Add( |
| 81 | + new ChapterUsability |
| 82 | + { |
| 83 | + Book = book, |
| 84 | + Chapter = chapter, |
| 85 | + Usability = averageProbability, |
| 86 | + ProjectedChrF3 = score.ProjectedChrF3, |
| 87 | + Label = ChapterThresholds.ReturnLabel(averageProbability), |
| 88 | + } |
| 89 | + ); |
| 90 | + } |
| 91 | + } |
| 92 | + } |
| 93 | + |
| 94 | + public void ComputeTxtFileUsability(TxtFileScores txtFileScores) |
| 95 | + { |
| 96 | + foreach (string targetDraftFileStem in txtFileScores.Scores.Keys) |
| 97 | + { |
| 98 | + Score score = txtFileScores.GetScore(targetDraftFileStem); |
| 99 | + if (score is null) |
| 100 | + { |
| 101 | + continue; |
| 102 | + } |
| 103 | + |
| 104 | + List<double> txtFileUsabilities = txtFileScores.GetSequenceUsabilities(targetDraftFileStem); |
| 105 | + double averageProbability = txtFileUsabilities.Average(); |
| 106 | + UsabilityTxtFiles.Add( |
| 107 | + new TxtFileUsability |
| 108 | + { |
| 109 | + TargetDraftFile = targetDraftFileStem, |
| 110 | + Usability = averageProbability, |
| 111 | + ProjectedChrF3 = score.ProjectedChrF3, |
| 112 | + Label = VerseThresholds.ReturnLabel(averageProbability), |
| 113 | + } |
| 114 | + ); |
| 115 | + } |
| 116 | + } |
| 117 | + |
| 118 | + public void ComputeUsableProportions( |
| 119 | + List<VerseScore> verseScores, |
| 120 | + ref ChapterScores chapterScores, |
| 121 | + ref BookScores bookScores |
| 122 | + ) |
| 123 | + { |
| 124 | + foreach (VerseScore verseScore in verseScores.Where(v => v.VerseRef.VerseNum > 0)) |
| 125 | + { |
| 126 | + double probability = CalculateUsableProbability(verseScore.ProjectedChrF3); |
| 127 | + chapterScores.AppendVerseUsability( |
| 128 | + verseScore.VerseRef.Book, |
| 129 | + verseScore.VerseRef.ChapterNum, |
| 130 | + probability |
| 131 | + ); |
| 132 | + bookScores.AppendVerseUsability(verseScore.VerseRef.Book, probability); |
| 133 | + UsabilityVerses.Add( |
| 134 | + new VerseUsability |
| 135 | + { |
| 136 | + Book = verseScore.VerseRef.Book, |
| 137 | + Chapter = verseScore.VerseRef.ChapterNum, |
| 138 | + Verse = verseScore.VerseRef.Verse, |
| 139 | + Usability = probability, |
| 140 | + ProjectedChrF3 = verseScore.ProjectedChrF3, |
| 141 | + Label = VerseThresholds.ReturnLabel(probability), |
| 142 | + } |
| 143 | + ); |
| 144 | + } |
| 145 | + |
| 146 | + ComputeChapterUsability(chapterScores); |
| 147 | + ComputeBookUsability(bookScores); |
| 148 | + } |
| 149 | + |
| 150 | + public void ComputeUsableProportions(List<SequenceScore> sequenceScores, ref TxtFileScores txtFileScores) |
| 151 | + { |
| 152 | + foreach (SequenceScore sequenceScore in sequenceScores) |
| 153 | + { |
| 154 | + double probability = CalculateUsableProbability(sequenceScore.ProjectedChrF3); |
| 155 | + txtFileScores.AppendSequenceUsability(sequenceScore.TargetDraftFileStem, probability); |
| 156 | + UsabilitySequences.Add( |
| 157 | + new SequenceUsability |
| 158 | + { |
| 159 | + TargetDraftFile = sequenceScore.TargetDraftFileStem, |
| 160 | + SequenceNumber = sequenceScore.SequenceNumber, |
| 161 | + Usability = probability, |
| 162 | + ProjectedChrF3 = sequenceScore.ProjectedChrF3, |
| 163 | + Label = VerseThresholds.ReturnLabel(probability), |
| 164 | + } |
| 165 | + ); |
| 166 | + } |
| 167 | + |
| 168 | + ComputeTxtFileUsability(txtFileScores); |
| 169 | + } |
| 170 | + |
| 171 | + public void EstimateQuality(double slope, double intercept, Dictionary<string, double> confidences) |
| 172 | + { |
| 173 | + var sequenceScores = new List<SequenceScore>(); |
| 174 | + var txtFileScores = new TxtFileScores(); |
| 175 | + ProjectChrF3(slope, intercept, confidences, ref sequenceScores, ref txtFileScores); |
| 176 | + ComputeUsableProportions(sequenceScores, ref txtFileScores); |
| 177 | + } |
| 178 | + |
| 179 | + public void EstimateQuality(double slope, double intercept, Dictionary<VerseRef, double> confidences) |
| 180 | + { |
| 181 | + var verseScores = new List<VerseScore>(); |
| 182 | + var chapterScores = new ChapterScores(); |
| 183 | + var bookScores = new BookScores(); |
| 184 | + ProjectChrF3(slope, intercept, confidences, ref verseScores, ref chapterScores, ref bookScores); |
| 185 | + ComputeUsableProportions(verseScores, ref chapterScores, ref bookScores); |
| 186 | + } |
| 187 | + |
| 188 | + public void ProjectChrF3( |
| 189 | + double slope, |
| 190 | + double intercept, |
| 191 | + Dictionary<string, double> confidences, |
| 192 | + ref List<SequenceScore> sequenceScores, |
| 193 | + ref TxtFileScores txtFileScores |
| 194 | + ) |
| 195 | + { |
| 196 | + foreach (KeyValuePair<string, double> confidence in confidences) |
| 197 | + { |
| 198 | + string[] keyParts = confidence.Key.Split(':'); |
| 199 | + if (keyParts.Length == 2 && int.TryParse(keyParts[1], out int sequenceNumber)) |
| 200 | + { |
| 201 | + string targetDraftFileStem = keyParts[0]; |
| 202 | + var score = new SequenceScore( |
| 203 | + slope, |
| 204 | + confidence.Value, |
| 205 | + intercept, |
| 206 | + sequenceNumber, |
| 207 | + targetDraftFileStem |
| 208 | + ); |
| 209 | + sequenceScores.Add(score); |
| 210 | + txtFileScores.AddScore(targetDraftFileStem, score); |
| 211 | + } |
| 212 | + } |
| 213 | + } |
| 214 | + |
| 215 | + public void ProjectChrF3( |
| 216 | + double slope, |
| 217 | + double intercept, |
| 218 | + Dictionary<VerseRef, double> confidences, |
| 219 | + ref List<VerseScore> verseScores, |
| 220 | + ref ChapterScores chapterScores, |
| 221 | + ref BookScores bookScores |
| 222 | + ) |
| 223 | + { |
| 224 | + foreach (KeyValuePair<VerseRef, double> confidence in confidences) |
| 225 | + { |
| 226 | + var score = new VerseScore(slope, confidence.Value, intercept, confidence.Key); |
| 227 | + verseScores.Add(score); |
| 228 | + string book = confidence.Key.Book; |
| 229 | + int chapter = confidence.Key.ChapterNum; |
| 230 | + chapterScores.AddScore(book, chapter, score); |
| 231 | + bookScores.AddScore(book, score); |
| 232 | + } |
| 233 | + } |
| 234 | + } |
| 235 | +} |
0 commit comments