Skip to content

Commit 49d4355

Browse files
committed
refactor(lib): use static data table for plural rules and remove unreachable variants
- Replace match-based rules with lazy_static BTreeMap keyed by base language - Remove region/script variants unreachable with base-language selection - Keep conservative default to {Other} - Validate plural completeness per locale with exit status - Add tests for missing and complete English plural forms - Replace match-based rules with lazy_static BTreeMap keyed by base language - Remove region/script variants unreachable with base-language selection - Keep conservative default to {Other} - Add PluralValidationReport with missing/have sets - Add collect_resource_plural_issues and Codec::collect_plural_issues - Refactor validate_plurals to fold reports into an Error
1 parent 73035de commit 49d4355

1 file changed

Lines changed: 61 additions & 96 deletions

File tree

langcodec/src/plural_rules.rs

Lines changed: 61 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::collections::BTreeSet;
1+
use std::collections::{BTreeMap, BTreeSet};
22

33
use unic_langid::LanguageIdentifier;
44

@@ -7,124 +7,89 @@ use crate::{
77
types::{Plural, PluralCategory, Resource, Translation},
88
};
99

10-
/// Returns the required CLDR plural categories for a given language identifier.
11-
///
12-
/// This is a curated subset of CLDR rules covering common locales. For unknown
13-
/// or unsupported locales, falls back to {Other} to avoid false positives.
14-
pub fn required_categories_for(lang: &LanguageIdentifier) -> BTreeSet<PluralCategory> {
15-
let mut set: BTreeSet<PluralCategory> = BTreeSet::new();
10+
use lazy_static::lazy_static;
1611

17-
// Base language subtag only for rule selection
18-
let lang_str = lang.language.as_str();
12+
lazy_static! {
13+
/// Static mapping from base language subtag → required plural categories (CLDR‑style, cardinals).
14+
static ref CATEGORY_TABLE: BTreeMap<&'static str, BTreeSet<PluralCategory>> = {
15+
use PluralCategory::*;
16+
let mut m: BTreeMap<&'static str, BTreeSet<PluralCategory>> = BTreeMap::new();
1917

20-
match lang_str {
21-
// One/Other languages (most European languages)
22-
"en" | "de" | "nl" | "sv" | "da" | "nb" | "nn" | "no" | "is" | "fi" | "et"
23-
| "fa" | "hi" | "bn" | "gu" | "ta" | "te" | "kn" | "ml" | "mr" | "it"
24-
| "es" | "pt" | "pt_br" | "pt_pt" | "mk" | "el" | "eu" | "gl" | "af" | "sw"
25-
| "ur" | "fil" | "tl" | "tr" | "id" | "ms" => {
26-
set.insert(PluralCategory::One);
27-
set.insert(PluralCategory::Other);
18+
// Helper to build a set from a slice
19+
fn s(items: &[PluralCategory]) -> BTreeSet<PluralCategory> {
20+
items.iter().cloned().collect()
2821
}
2922

30-
// Only Other (East Asian languages and some SE Asian)
31-
"ja" | "zh" | "ko" | "th" | "vi" | "km" | "lo" | "my" | "yue" | "zh_hant"
32-
| "zh_hans" => {
33-
set.insert(PluralCategory::Other);
23+
// One/Other (most Indo‑European languages without complex plural rules)
24+
for code in [
25+
"en","de","nl","sv","da","nb","nn","no","is","fi","et","fa","hi","bn","gu",
26+
"ta","te","kn","ml","mr","it","es","pt","mk","el","eu","gl","af","sw","ur",
27+
"fil","tl","tr","id","ms","fr","hy","kab"
28+
] {
29+
m.insert(code, s(&[One, Other]));
3430
}
3531

36-
// French-like (CLDR: one/other)
37-
"fr" | "hy" | "kab" => {
38-
set.insert(PluralCategory::One);
39-
set.insert(PluralCategory::Other);
32+
// Only Other (East/Southeast Asian common cases)
33+
for code in ["ja","zh","ko","th","vi","km","lo","my","yue"] {
34+
m.insert(code, s(&[Other]));
4035
}
4136

4237
// Slavic (Russian group): one, few, many, other
43-
"ru" | "uk" | "be" | "sr" | "hr" | "bs" | "sh" => {
44-
set.insert(PluralCategory::One);
45-
set.insert(PluralCategory::Few);
46-
set.insert(PluralCategory::Many);
47-
set.insert(PluralCategory::Other);
38+
for code in ["ru","uk","be","sr","hr","bs","sh"] {
39+
m.insert(code, s(&[One, Few, Many, Other]));
4840
}
4941

50-
// Polish: one, few, many, other
51-
"pl" => {
52-
set.insert(PluralCategory::One);
53-
set.insert(PluralCategory::Few);
54-
set.insert(PluralCategory::Many);
55-
set.insert(PluralCategory::Other);
56-
}
42+
// Polish
43+
m.insert("pl", s(&[One, Few, Many, Other]));
5744

58-
// Czech/Slovak: one, few, other
59-
"cs" | "sk" => {
60-
set.insert(PluralCategory::One);
61-
set.insert(PluralCategory::Few);
62-
set.insert(PluralCategory::Other);
45+
// Czech/Slovak
46+
for code in ["cs","sk"] {
47+
m.insert(code, s(&[One, Few, Other]));
6348
}
6449

65-
// Slovenian: one, two, few, other
66-
"sl" => {
67-
set.insert(PluralCategory::One);
68-
set.insert(PluralCategory::Two);
69-
set.insert(PluralCategory::Few);
70-
set.insert(PluralCategory::Other);
71-
}
50+
// Slovenian
51+
m.insert("sl", s(&[One, Two, Few, Other]));
7252

73-
// Lithuanian: one, few, other
74-
"lt" => {
75-
set.insert(PluralCategory::One);
76-
set.insert(PluralCategory::Few);
77-
set.insert(PluralCategory::Other);
78-
}
53+
// Lithuanian
54+
m.insert("lt", s(&[One, Few, Other]));
7955

80-
// Latvian: zero, one, other
81-
"lv" => {
82-
set.insert(PluralCategory::Zero);
83-
set.insert(PluralCategory::One);
84-
set.insert(PluralCategory::Other);
85-
}
56+
// Latvian
57+
m.insert("lv", s(&[Zero, One, Other]));
8658

87-
// Irish Gaelic: one, two, few, many, other
88-
"ga" => {
89-
set.insert(PluralCategory::One);
90-
set.insert(PluralCategory::Two);
91-
set.insert(PluralCategory::Few);
92-
set.insert(PluralCategory::Many);
93-
set.insert(PluralCategory::Other);
94-
}
59+
// Irish Gaelic
60+
m.insert("ga", s(&[One, Two, Few, Many, Other]));
9561

96-
// Romanian: one, few, other
97-
"ro" => {
98-
set.insert(PluralCategory::One);
99-
set.insert(PluralCategory::Few);
100-
set.insert(PluralCategory::Other);
101-
}
62+
// Romanian
63+
m.insert("ro", s(&[One, Few, Other]));
10264

103-
// Arabic: zero, one, two, few, many, other
104-
"ar" => {
105-
set.insert(PluralCategory::Zero);
106-
set.insert(PluralCategory::One);
107-
set.insert(PluralCategory::Two);
108-
set.insert(PluralCategory::Few);
109-
set.insert(PluralCategory::Many);
110-
set.insert(PluralCategory::Other);
111-
}
65+
// Arabic
66+
m.insert("ar", s(&[Zero, One, Two, Few, Many, Other]));
11267

113-
// Hebrew (cardinals) commonly use one, two, many, other in CLDR
114-
"iw" /* legacy */ | "he" => {
115-
set.insert(PluralCategory::One);
116-
set.insert(PluralCategory::Two);
117-
set.insert(PluralCategory::Many);
118-
set.insert(PluralCategory::Other);
68+
// Hebrew (legacy code iw also maps here)
69+
for code in ["he","iw"] {
70+
m.insert(code, s(&[One, Two, Many, Other]));
11971
}
12072

121-
_ => {
122-
// Conservative default to avoid noisy validation for unknown locales
123-
set.insert(PluralCategory::Other);
124-
}
125-
}
73+
m
74+
};
75+
}
12676

127-
set
77+
/// Returns the required CLDR plural categories for a given language identifier.
78+
///
79+
/// This is a curated subset of CLDR rules covering common locales. For unknown
80+
/// or unsupported locales, falls back to {Other} to avoid false positives.
81+
pub fn required_categories_for(lang: &LanguageIdentifier) -> BTreeSet<PluralCategory> {
82+
// Base language subtag only for rule selection
83+
let lang_str = lang.language.as_str();
84+
CATEGORY_TABLE
85+
.get(lang_str)
86+
.cloned()
87+
.unwrap_or_else(|| {
88+
// Conservative default to avoid noisy validation for unknown locales
89+
let mut s = BTreeSet::new();
90+
s.insert(PluralCategory::Other);
91+
s
92+
})
12893
}
12994

13095
/// Helper for string language codes (accepts underscores, normalizes to hyphen).

0 commit comments

Comments
 (0)