1- use std:: collections:: BTreeSet ;
1+ use std:: collections:: { BTreeMap , BTreeSet } ;
22
33use unic_langid:: LanguageIdentifier ;
44
@@ -7,124 +7,89 @@ use crate::{
77 types:: { Plural , PluralCategory , Resource , Translation } ,
88} ;
99
10- /// Returns the required CLDR plural categories for a given language identifier.
11- ///
12- /// This is a curated subset of CLDR rules covering common locales. For unknown
13- /// or unsupported locales, falls back to {Other} to avoid false positives.
14- pub fn required_categories_for ( lang : & LanguageIdentifier ) -> BTreeSet < PluralCategory > {
15- let mut set: BTreeSet < PluralCategory > = BTreeSet :: new ( ) ;
10+ use lazy_static:: lazy_static;
1611
17- // Base language subtag only for rule selection
18- let lang_str = lang. language . as_str ( ) ;
12+ lazy_static ! {
13+ /// Static mapping from base language subtag → required plural categories (CLDR‑style, cardinals).
14+ static ref CATEGORY_TABLE : BTreeMap <& ' static str , BTreeSet <PluralCategory >> = {
15+ use PluralCategory :: * ;
16+ let mut m: BTreeMap <& ' static str , BTreeSet <PluralCategory >> = BTreeMap :: new( ) ;
1917
20- match lang_str {
21- // One/Other languages (most European languages)
22- "en" | "de" | "nl" | "sv" | "da" | "nb" | "nn" | "no" | "is" | "fi" | "et"
23- | "fa" | "hi" | "bn" | "gu" | "ta" | "te" | "kn" | "ml" | "mr" | "it"
24- | "es" | "pt" | "pt_br" | "pt_pt" | "mk" | "el" | "eu" | "gl" | "af" | "sw"
25- | "ur" | "fil" | "tl" | "tr" | "id" | "ms" => {
26- set. insert ( PluralCategory :: One ) ;
27- set. insert ( PluralCategory :: Other ) ;
18+ // Helper to build a set from a slice
19+ fn s( items: & [ PluralCategory ] ) -> BTreeSet <PluralCategory > {
20+ items. iter( ) . cloned( ) . collect( )
2821 }
2922
30- // Only Other (East Asian languages and some SE Asian)
31- "ja" | "zh" | "ko" | "th" | "vi" | "km" | "lo" | "my" | "yue" | "zh_hant"
32- | "zh_hans" => {
33- set. insert ( PluralCategory :: Other ) ;
23+ // One/Other (most Indo‑European languages without complex plural rules)
24+ for code in [
25+ "en" , "de" , "nl" , "sv" , "da" , "nb" , "nn" , "no" , "is" , "fi" , "et" , "fa" , "hi" , "bn" , "gu" ,
26+ "ta" , "te" , "kn" , "ml" , "mr" , "it" , "es" , "pt" , "mk" , "el" , "eu" , "gl" , "af" , "sw" , "ur" ,
27+ "fil" , "tl" , "tr" , "id" , "ms" , "fr" , "hy" , "kab"
28+ ] {
29+ m. insert( code, s( & [ One , Other ] ) ) ;
3430 }
3531
36- // French-like (CLDR: one/other)
37- "fr" | "hy" | "kab" => {
38- set. insert ( PluralCategory :: One ) ;
39- set. insert ( PluralCategory :: Other ) ;
32+ // Only Other (East/Southeast Asian common cases)
33+ for code in [ "ja" , "zh" , "ko" , "th" , "vi" , "km" , "lo" , "my" , "yue" ] {
34+ m. insert( code, s( & [ Other ] ) ) ;
4035 }
4136
4237 // Slavic (Russian group): one, few, many, other
43- "ru" | "uk" | "be" | "sr" | "hr" | "bs" | "sh" => {
44- set. insert ( PluralCategory :: One ) ;
45- set. insert ( PluralCategory :: Few ) ;
46- set. insert ( PluralCategory :: Many ) ;
47- set. insert ( PluralCategory :: Other ) ;
38+ for code in [ "ru" , "uk" , "be" , "sr" , "hr" , "bs" , "sh" ] {
39+ m. insert( code, s( & [ One , Few , Many , Other ] ) ) ;
4840 }
4941
50- // Polish: one, few, many, other
51- "pl" => {
52- set. insert ( PluralCategory :: One ) ;
53- set. insert ( PluralCategory :: Few ) ;
54- set. insert ( PluralCategory :: Many ) ;
55- set. insert ( PluralCategory :: Other ) ;
56- }
42+ // Polish
43+ m. insert( "pl" , s( & [ One , Few , Many , Other ] ) ) ;
5744
58- // Czech/Slovak: one, few, other
59- "cs" | "sk" => {
60- set. insert ( PluralCategory :: One ) ;
61- set. insert ( PluralCategory :: Few ) ;
62- set. insert ( PluralCategory :: Other ) ;
45+ // Czech/Slovak
46+ for code in [ "cs" , "sk" ] {
47+ m. insert( code, s( & [ One , Few , Other ] ) ) ;
6348 }
6449
65- // Slovenian: one, two, few, other
66- "sl" => {
67- set. insert ( PluralCategory :: One ) ;
68- set. insert ( PluralCategory :: Two ) ;
69- set. insert ( PluralCategory :: Few ) ;
70- set. insert ( PluralCategory :: Other ) ;
71- }
50+ // Slovenian
51+ m. insert( "sl" , s( & [ One , Two , Few , Other ] ) ) ;
7252
73- // Lithuanian: one, few, other
74- "lt" => {
75- set. insert ( PluralCategory :: One ) ;
76- set. insert ( PluralCategory :: Few ) ;
77- set. insert ( PluralCategory :: Other ) ;
78- }
53+ // Lithuanian
54+ m. insert( "lt" , s( & [ One , Few , Other ] ) ) ;
7955
80- // Latvian: zero, one, other
81- "lv" => {
82- set. insert ( PluralCategory :: Zero ) ;
83- set. insert ( PluralCategory :: One ) ;
84- set. insert ( PluralCategory :: Other ) ;
85- }
56+ // Latvian
57+ m. insert( "lv" , s( & [ Zero , One , Other ] ) ) ;
8658
87- // Irish Gaelic: one, two, few, many, other
88- "ga" => {
89- set. insert ( PluralCategory :: One ) ;
90- set. insert ( PluralCategory :: Two ) ;
91- set. insert ( PluralCategory :: Few ) ;
92- set. insert ( PluralCategory :: Many ) ;
93- set. insert ( PluralCategory :: Other ) ;
94- }
59+ // Irish Gaelic
60+ m. insert( "ga" , s( & [ One , Two , Few , Many , Other ] ) ) ;
9561
96- // Romanian: one, few, other
97- "ro" => {
98- set. insert ( PluralCategory :: One ) ;
99- set. insert ( PluralCategory :: Few ) ;
100- set. insert ( PluralCategory :: Other ) ;
101- }
62+ // Romanian
63+ m. insert( "ro" , s( & [ One , Few , Other ] ) ) ;
10264
103- // Arabic: zero, one, two, few, many, other
104- "ar" => {
105- set. insert ( PluralCategory :: Zero ) ;
106- set. insert ( PluralCategory :: One ) ;
107- set. insert ( PluralCategory :: Two ) ;
108- set. insert ( PluralCategory :: Few ) ;
109- set. insert ( PluralCategory :: Many ) ;
110- set. insert ( PluralCategory :: Other ) ;
111- }
65+ // Arabic
66+ m. insert( "ar" , s( & [ Zero , One , Two , Few , Many , Other ] ) ) ;
11267
113- // Hebrew (cardinals) commonly use one, two, many, other in CLDR
114- "iw" /* legacy */ | "he" => {
115- set. insert ( PluralCategory :: One ) ;
116- set. insert ( PluralCategory :: Two ) ;
117- set. insert ( PluralCategory :: Many ) ;
118- set. insert ( PluralCategory :: Other ) ;
68+ // Hebrew (legacy code iw also maps here)
69+ for code in [ "he" , "iw" ] {
70+ m. insert( code, s( & [ One , Two , Many , Other ] ) ) ;
11971 }
12072
121- _ => {
122- // Conservative default to avoid noisy validation for unknown locales
123- set. insert ( PluralCategory :: Other ) ;
124- }
125- }
73+ m
74+ } ;
75+ }
12676
127- set
77+ /// Returns the required CLDR plural categories for a given language identifier.
78+ ///
79+ /// This is a curated subset of CLDR rules covering common locales. For unknown
80+ /// or unsupported locales, falls back to {Other} to avoid false positives.
81+ pub fn required_categories_for ( lang : & LanguageIdentifier ) -> BTreeSet < PluralCategory > {
82+ // Base language subtag only for rule selection
83+ let lang_str = lang. language . as_str ( ) ;
84+ CATEGORY_TABLE
85+ . get ( lang_str)
86+ . cloned ( )
87+ . unwrap_or_else ( || {
88+ // Conservative default to avoid noisy validation for unknown locales
89+ let mut s = BTreeSet :: new ( ) ;
90+ s. insert ( PluralCategory :: Other ) ;
91+ s
92+ } )
12893}
12994
13095/// Helper for string language codes (accepts underscores, normalizes to hyphen).
0 commit comments