Skip to content

Commit c77af4d

Browse files
committed
feat(lib): add plural rules engine and validation
- Add module with CLDR-like required category sets per locale - Add and exports - Add to aggregate per-resource errors
1 parent bf6ee32 commit c77af4d

3 files changed

Lines changed: 281 additions & 1 deletion

File tree

langcodec/src/codec.rs

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,28 @@ impl Codec {
613613
Ok(())
614614
}
615615

616+
/// Validates plural completeness per CLDR category sets for each locale.
617+
///
618+
/// For each plural entry in each resource, checks that all required plural
619+
/// categories for the language are present. Returns a Validation error with
620+
/// aggregated details if any are missing.
621+
pub fn validate_plurals(&self) -> Result<(), Error> {
622+
use crate::plural_rules::validate_resource_plurals;
623+
624+
let mut problems: Vec<String> = Vec::new();
625+
for res in &self.resources {
626+
if let Err(e) = validate_resource_plurals(res) {
627+
problems.push(e.to_string());
628+
}
629+
}
630+
631+
if problems.is_empty() {
632+
Ok(())
633+
} else {
634+
Err(Error::validation_error(problems.join("\n")))
635+
}
636+
}
637+
616638
/// Cleans up resources by removing empty resources and entries.
617639
pub fn clean_up_resources(&mut self) {
618640
self.resources
@@ -962,7 +984,18 @@ impl Codec {
962984
path: P,
963985
format_type: FormatType,
964986
) -> Result<(), Error> {
965-
let language = crate::converter::infer_language_from_path(&path, &format_type)?;
987+
let mut language = crate::converter::infer_language_from_path(&path, &format_type)?;
988+
// Fallback to explicitly provided language if inference failed
989+
if language.is_none() {
990+
match &format_type {
991+
FormatType::Strings(lang_opt) | FormatType::AndroidStrings(lang_opt) => {
992+
if let Some(l) = lang_opt {
993+
language = Some(l.clone());
994+
}
995+
}
996+
_ => {}
997+
}
998+
}
966999

9671000
let domain = path
9681001
.as_ref()

langcodec/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ pub mod formats;
145145
pub mod placeholder;
146146
pub mod traits;
147147
pub mod types;
148+
pub mod plural_rules;
148149

149150
// Re-export most used types for easy consumption
150151
pub use crate::{
@@ -158,6 +159,7 @@ pub use crate::{
158159
error::Error,
159160
formats::FormatType,
160161
placeholder::{extract_placeholders, normalize_placeholders, signature},
162+
plural_rules::{required_categories_for_str, validate_resource_plurals},
161163
types::{
162164
ConflictStrategy, Entry, EntryStatus, Metadata, Plural, PluralCategory, Resource,
163165
Translation,

langcodec/src/plural_rules.rs

Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
use std::collections::BTreeSet;
2+
3+
use unic_langid::LanguageIdentifier;
4+
5+
use crate::{
6+
error::Error,
7+
types::{Plural, PluralCategory, Resource, Translation},
8+
};
9+
10+
/// Returns the required CLDR plural categories for a given language identifier.
11+
///
12+
/// This is a curated subset of CLDR rules covering common locales. For unknown
13+
/// or unsupported locales, falls back to {Other} to avoid false positives.
14+
pub fn required_categories_for(lang: &LanguageIdentifier) -> BTreeSet<PluralCategory> {
15+
let mut set: BTreeSet<PluralCategory> = BTreeSet::new();
16+
17+
// Base language subtag only for rule selection
18+
let lang_str = lang.language.as_str();
19+
20+
match lang_str {
21+
// One/Other languages (most European languages)
22+
"en" | "de" | "nl" | "sv" | "da" | "nb" | "nn" | "no" | "is" | "fi" | "et"
23+
| "fa" | "hi" | "bn" | "gu" | "ta" | "te" | "kn" | "ml" | "mr" | "it"
24+
| "es" | "pt" | "pt_br" | "pt_pt" | "mk" | "el" | "eu" | "gl" | "af" | "sw"
25+
| "ur" | "fil" | "tl" | "tr" | "id" | "ms" => {
26+
set.insert(PluralCategory::One);
27+
set.insert(PluralCategory::Other);
28+
}
29+
30+
// Only Other (East Asian languages and some SE Asian)
31+
"ja" | "zh" | "ko" | "th" | "vi" | "km" | "lo" | "my" | "yue" | "zh_hant"
32+
| "zh_hans" => {
33+
set.insert(PluralCategory::Other);
34+
}
35+
36+
// French-like (CLDR: one/other)
37+
"fr" | "hy" | "kab" => {
38+
set.insert(PluralCategory::One);
39+
set.insert(PluralCategory::Other);
40+
}
41+
42+
// Slavic (Russian group): one, few, many, other
43+
"ru" | "uk" | "be" | "sr" | "hr" | "bs" | "sh" => {
44+
set.insert(PluralCategory::One);
45+
set.insert(PluralCategory::Few);
46+
set.insert(PluralCategory::Many);
47+
set.insert(PluralCategory::Other);
48+
}
49+
50+
// Polish: one, few, many, other
51+
"pl" => {
52+
set.insert(PluralCategory::One);
53+
set.insert(PluralCategory::Few);
54+
set.insert(PluralCategory::Many);
55+
set.insert(PluralCategory::Other);
56+
}
57+
58+
// Czech/Slovak: one, few, other
59+
"cs" | "sk" => {
60+
set.insert(PluralCategory::One);
61+
set.insert(PluralCategory::Few);
62+
set.insert(PluralCategory::Other);
63+
}
64+
65+
// Slovenian: one, two, few, other
66+
"sl" => {
67+
set.insert(PluralCategory::One);
68+
set.insert(PluralCategory::Two);
69+
set.insert(PluralCategory::Few);
70+
set.insert(PluralCategory::Other);
71+
}
72+
73+
// Lithuanian: one, few, other
74+
"lt" => {
75+
set.insert(PluralCategory::One);
76+
set.insert(PluralCategory::Few);
77+
set.insert(PluralCategory::Other);
78+
}
79+
80+
// Latvian: zero, one, other
81+
"lv" => {
82+
set.insert(PluralCategory::Zero);
83+
set.insert(PluralCategory::One);
84+
set.insert(PluralCategory::Other);
85+
}
86+
87+
// Irish Gaelic: one, two, few, many, other
88+
"ga" => {
89+
set.insert(PluralCategory::One);
90+
set.insert(PluralCategory::Two);
91+
set.insert(PluralCategory::Few);
92+
set.insert(PluralCategory::Many);
93+
set.insert(PluralCategory::Other);
94+
}
95+
96+
// Romanian: one, few, other
97+
"ro" => {
98+
set.insert(PluralCategory::One);
99+
set.insert(PluralCategory::Few);
100+
set.insert(PluralCategory::Other);
101+
}
102+
103+
// Arabic: zero, one, two, few, many, other
104+
"ar" => {
105+
set.insert(PluralCategory::Zero);
106+
set.insert(PluralCategory::One);
107+
set.insert(PluralCategory::Two);
108+
set.insert(PluralCategory::Few);
109+
set.insert(PluralCategory::Many);
110+
set.insert(PluralCategory::Other);
111+
}
112+
113+
// Hebrew (cardinals) commonly use one, two, many, other in CLDR
114+
"iw" /* legacy */ | "he" => {
115+
set.insert(PluralCategory::One);
116+
set.insert(PluralCategory::Two);
117+
set.insert(PluralCategory::Many);
118+
set.insert(PluralCategory::Other);
119+
}
120+
121+
_ => {
122+
// Conservative default to avoid noisy validation for unknown locales
123+
set.insert(PluralCategory::Other);
124+
}
125+
}
126+
127+
set
128+
}
129+
130+
/// Helper for string language codes (accepts underscores, normalizes to hyphen).
131+
pub fn required_categories_for_str(lang: &str) -> BTreeSet<PluralCategory> {
132+
let normalized = lang.replace('_', "-");
133+
let parsed: LanguageIdentifier = normalized.parse().unwrap_or_else(|_| "und".parse().unwrap());
134+
required_categories_for(&parsed)
135+
}
136+
137+
/// Compute which required categories are missing for a given plural entry and language.
138+
pub fn missing_categories_for_plural(
139+
lang: &LanguageIdentifier,
140+
plural: &Plural,
141+
) -> BTreeSet<PluralCategory> {
142+
let required = required_categories_for(lang);
143+
let have: BTreeSet<PluralCategory> = plural.forms.keys().cloned().collect();
144+
&required - &have
145+
}
146+
147+
/// Validate a single resource for missing plural categories.
148+
pub fn validate_resource_plurals(resource: &Resource) -> Result<(), Error> {
149+
let lang_id = match resource.parse_language_identifier() {
150+
Some(id) => id,
151+
None => {
152+
return Err(Error::validation_error(format!(
153+
"Invalid or missing language for resource: {}",
154+
resource.metadata.language
155+
)));
156+
}
157+
};
158+
159+
let mut problems: Vec<String> = Vec::new();
160+
161+
for entry in &resource.entries {
162+
if let Translation::Plural(plural) = &entry.value {
163+
let missing = missing_categories_for_plural(&lang_id, plural);
164+
if !missing.is_empty() {
165+
let have: Vec<String> = plural
166+
.forms
167+
.keys()
168+
.map(|k| format!("{:?}", k))
169+
.collect();
170+
let miss: Vec<String> = missing.into_iter().map(|k| format!("{:?}", k)).collect();
171+
problems.push(format!(
172+
"lang='{}' key='{}': missing plural categories: [{}] (have: [{}])",
173+
resource.metadata.language,
174+
entry.id,
175+
miss.join(", "),
176+
have.join(", ")
177+
));
178+
}
179+
}
180+
}
181+
182+
if problems.is_empty() {
183+
Ok(())
184+
} else {
185+
Err(Error::validation_error(format!(
186+
"Plural validation failed:\n{}",
187+
problems.join("\n")
188+
)))
189+
}
190+
}
191+
192+
#[cfg(test)]
193+
mod tests {
194+
use super::*;
195+
use crate::types::{Entry, EntryStatus, Metadata};
196+
197+
#[test]
198+
fn test_required_categories_basic() {
199+
let en: LanguageIdentifier = "en".parse().unwrap();
200+
let ru: LanguageIdentifier = "ru".parse().unwrap();
201+
let ja: LanguageIdentifier = "ja".parse().unwrap();
202+
203+
let en_set = required_categories_for(&en);
204+
assert!(en_set.contains(&PluralCategory::One));
205+
assert!(en_set.contains(&PluralCategory::Other));
206+
assert_eq!(en_set.len(), 2);
207+
208+
let ru_set = required_categories_for(&ru);
209+
assert!(ru_set.contains(&PluralCategory::One));
210+
assert!(ru_set.contains(&PluralCategory::Few));
211+
assert!(ru_set.contains(&PluralCategory::Many));
212+
assert!(ru_set.contains(&PluralCategory::Other));
213+
assert_eq!(ru_set.len(), 4);
214+
215+
let ja_set = required_categories_for(&ja);
216+
assert!(ja_set.contains(&PluralCategory::Other));
217+
assert_eq!(ja_set.len(), 1);
218+
}
219+
220+
#[test]
221+
fn test_validate_resource_plurals_missing() {
222+
// English requires one/other; missing 'one' should fail
223+
let resource = Resource {
224+
metadata: Metadata {
225+
language: "en".into(),
226+
domain: String::new(),
227+
custom: Default::default(),
228+
},
229+
entries: vec![Entry {
230+
id: "apples".into(),
231+
value: Translation::Plural(Plural::new(
232+
"apples",
233+
vec![(PluralCategory::Other, "%d apples".to_string())].into_iter(),
234+
)
235+
.unwrap()),
236+
comment: None,
237+
status: EntryStatus::Translated,
238+
custom: Default::default(),
239+
}],
240+
};
241+
242+
let err = validate_resource_plurals(&resource).unwrap_err();
243+
assert!(format!("{}", err).contains("missing plural categories"));
244+
}
245+
}

0 commit comments

Comments
 (0)