Skip to content

Commit 0626de7

Browse files
authored
Merge pull request #3 from WendellXY/feat/placeholders
feat: placeholder normalization and validation in library
2 parents d6fbdde + adffc05 commit 0626de7

8 files changed

Lines changed: 715 additions & 24 deletions

File tree

ROADMAP.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@ Legend: [ ] todo, [x] done, [~] in progress
1717

1818
## M1. Quality & Safety
1919

20-
- [ ] Placeholder normalization and validation
21-
- [ ] Mapping between iOS (`%1$@`, `%d`) and Android (`%1$s`, `%d`)
22-
- [ ] Detect placeholder mismatches across languages; fail in strict mode, warn otherwise
23-
- [ ] Auto‑fix option for common cases (`%@``%s`, `%1$@``%1$s`)
24-
- [ ] Tests across `.strings`, Android, `.xcstrings`
20+
- [x] Placeholder normalization and validation
21+
- [x] Mapping between iOS (`%1$@`, `%@`, `%ld`) and Android (`%1$s`, `%s`, `%d/%u`)
22+
- [x] Detect placeholder mismatches across languages; strict vs non‑strict modes
23+
- [x] Auto‑fix option for common cases (`normalize_placeholders_in_place`)
24+
- [x] Tests across singular and plural entries; cross‑language normalization
2525
- [ ] Plural rules engine
2626
- [ ] CLDR‑driven required category sets per locale (few/many/etc.)
2727
- [ ] Validation pass: flag missing categories per key+locale

langcodec-cli/src/main.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@ mod debug;
33
mod formats;
44
mod merge;
55
mod path_glob;
6+
mod stats;
67
mod transformers;
78
mod validation;
89
mod view;
9-
mod stats;
1010

1111
use crate::convert::{ConvertOptions, run_unified_convert_command, try_custom_format_view};
1212
use crate::debug::run_debug_command;
@@ -304,7 +304,9 @@ fn main() {
304304
Commands::Stats { input, lang, json } => {
305305
// Validate
306306
let mut context = ValidationContext::new().with_input_file(input.clone());
307-
if let Some(l) = &lang { context = context.with_language_code(l.clone()); }
307+
if let Some(l) = &lang {
308+
context = context.with_language_code(l.clone());
309+
}
308310
if let Err(e) = validate_context(&context) {
309311
eprintln!("❌ Validation failed: {}", e);
310312
std::process::exit(1);

langcodec-cli/src/stats.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,11 @@ fn accumulate(lang_stats: &mut LangStats, status: &EntryStatus) {
3030

3131
pub fn print_stats(codec: &Codec, lang_filter: &Option<String>, json_output: bool) {
3232
let resources: Vec<_> = match lang_filter {
33-
Some(lang) => codec.resources.iter().filter(|r| r.metadata.language == *lang).collect(),
33+
Some(lang) => codec
34+
.resources
35+
.iter()
36+
.filter(|r| r.metadata.language == *lang)
37+
.collect(),
3438
None => codec.resources.iter().collect(),
3539
};
3640

@@ -84,7 +88,10 @@ pub fn print_stats(codec: &Codec, lang_filter: &Option<String>, json_output: boo
8488
println!(" Total: {}", stats.total);
8589
println!(" By status:");
8690
for (k, v) in [
87-
("translated", stats.by_status.get("translated").copied().unwrap_or(0)),
91+
(
92+
"translated",
93+
stats.by_status.get("translated").copied().unwrap_or(0),
94+
),
8895
(
8996
"needs_review",
9097
stats.by_status.get("needs_review").copied().unwrap_or(0),
@@ -105,4 +112,3 @@ pub fn print_stats(codec: &Codec, lang_filter: &Option<String>, json_output: boo
105112
println!(" Completion: {:.2}%", percent);
106113
}
107114
}
108-

langcodec-cli/tests/stats_cli_tests.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,3 @@ fn test_stats_json_on_android_strings() {
4848
assert_eq!(by_status["do_not_translate"], 1);
4949
assert_eq!(by_status["new"], 1);
5050
}
51-

langcodec/src/codec.rs

Lines changed: 308 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,182 @@ impl Codec {
619619
.retain(|resource| !resource.entries.is_empty());
620620
}
621621

622+
/// Validate placeholder consistency across languages for each key.
623+
///
624+
/// Rules (initial version):
625+
/// - For each key, each language must have the same placeholder signature.
626+
/// - For plural entries, all forms within a language must share the same signature.
627+
/// - iOS vs Android differences like `%@`/`%1$@` vs `%s`/`%1$s` are normalized.
628+
///
629+
/// Example
630+
/// ```rust
631+
/// use langcodec::{Codec, types::{Entry, EntryStatus, Metadata, Resource, Translation}};
632+
/// let mut codec = Codec::new();
633+
/// let en = Resource{
634+
/// metadata: Metadata{ language: "en".into(), domain: String::new(), custom: Default::default() },
635+
/// entries: vec![Entry{ id: "greet".into(), value: Translation::Singular("Hello %1$@".into()), comment: None, status: EntryStatus::Translated, custom: Default::default() }]
636+
/// };
637+
/// let fr = Resource{
638+
/// metadata: Metadata{ language: "fr".into(), domain: String::new(), custom: Default::default() },
639+
/// entries: vec![Entry{ id: "greet".into(), value: Translation::Singular("Bonjour %1$s".into()), comment: None, status: EntryStatus::Translated, custom: Default::default() }]
640+
/// };
641+
/// codec.add_resource(en);
642+
/// codec.add_resource(fr);
643+
/// assert!(codec.validate_placeholders(true).is_ok());
644+
/// ```
645+
pub fn validate_placeholders(&self, strict: bool) -> Result<(), Error> {
646+
use crate::placeholder::signature;
647+
use crate::types::Translation;
648+
use std::collections::HashMap;
649+
650+
// key -> lang -> Vec<signatures per form or single>
651+
let mut map: HashMap<String, HashMap<String, Vec<Vec<String>>>> = HashMap::new();
652+
653+
for res in &self.resources {
654+
for entry in &res.entries {
655+
let sigs: Vec<Vec<String>> = match &entry.value {
656+
Translation::Singular(v) => vec![signature(v)],
657+
Translation::Plural(p) => p.forms.values().map(|v| signature(v)).collect(),
658+
};
659+
map.entry(entry.id.clone())
660+
.or_default()
661+
.entry(res.metadata.language.clone())
662+
.or_default()
663+
.push(sigs.into_iter().flatten().collect());
664+
}
665+
}
666+
667+
let mut problems = Vec::new();
668+
669+
for (key, langs) in map {
670+
// Per-language: ensure all collected signatures for this entry are identical
671+
let mut per_lang_sig: HashMap<String, Vec<String>> = HashMap::new();
672+
for (lang, sig_lists) in langs {
673+
if let Some(first) = sig_lists.first() {
674+
if sig_lists.iter().any(|s| s != first) {
675+
problems.push(format!(
676+
"Key '{}' in '{}': inconsistent placeholders across forms: {:?}",
677+
key, lang, sig_lists
678+
));
679+
}
680+
per_lang_sig.insert(lang, first.clone());
681+
}
682+
}
683+
684+
// Across languages, pick one baseline and compare
685+
if let Some((base_lang, base_sig)) = per_lang_sig.iter().next() {
686+
for (lang, sig) in &per_lang_sig {
687+
if sig != base_sig {
688+
problems.push(format!(
689+
"Key '{}' mismatch: {} {:?} vs {} {:?}",
690+
key, base_lang, base_sig, lang, sig
691+
));
692+
}
693+
}
694+
}
695+
}
696+
697+
if problems.is_empty() {
698+
return Ok(());
699+
}
700+
if strict {
701+
return Err(Error::validation_error(format!(
702+
"Placeholder issues: {}",
703+
problems.join(" | ")
704+
)));
705+
}
706+
// Non-strict mode: treat as success
707+
Ok(())
708+
}
709+
710+
/// Collect placeholder issues without failing.
711+
/// Returns a list of human-readable messages; empty if none.
712+
///
713+
/// Useful to warn in non-strict mode.
714+
pub fn collect_placeholder_issues(&self) -> Vec<String> {
715+
use crate::placeholder::signature;
716+
use crate::types::Translation;
717+
use std::collections::HashMap;
718+
719+
let mut map: HashMap<String, HashMap<String, Vec<Vec<String>>>> = HashMap::new();
720+
for res in &self.resources {
721+
for entry in &res.entries {
722+
let sigs: Vec<Vec<String>> = match &entry.value {
723+
Translation::Singular(v) => vec![signature(v)],
724+
Translation::Plural(p) => p.forms.values().map(|v| signature(v)).collect(),
725+
};
726+
map.entry(entry.id.clone())
727+
.or_default()
728+
.entry(res.metadata.language.clone())
729+
.or_default()
730+
.push(sigs.into_iter().flatten().collect());
731+
}
732+
}
733+
734+
let mut problems = Vec::new();
735+
for (key, langs) in map {
736+
let mut per_lang_sig: HashMap<String, Vec<String>> = HashMap::new();
737+
for (lang, sig_lists) in langs {
738+
if let Some(first) = sig_lists.first() {
739+
if sig_lists.iter().any(|s| s != first) {
740+
problems.push(format!(
741+
"Key '{}' in '{}': inconsistent placeholders across forms: {:?}",
742+
key, lang, sig_lists
743+
));
744+
}
745+
per_lang_sig.insert(lang, first.clone());
746+
}
747+
}
748+
if let Some((base_lang, base_sig)) = per_lang_sig.iter().next() {
749+
for (lang, sig) in &per_lang_sig {
750+
if sig != base_sig {
751+
problems.push(format!(
752+
"Key '{}' mismatch: {} {:?} vs {} {:?}",
753+
key, base_lang, base_sig, lang, sig
754+
));
755+
}
756+
}
757+
}
758+
}
759+
problems
760+
}
761+
762+
/// Normalize placeholders in all entries (mutates in place).
763+
/// Converts iOS patterns like `%@`, `%1$@`, `%ld` to canonical forms (%s, %1$s, %d/%u).
764+
///
765+
/// Example
766+
/// ```rust
767+
/// use langcodec::{Codec, types::{Entry, EntryStatus, Metadata, Resource, Translation}};
768+
/// let mut codec = Codec::new();
769+
/// codec.add_resource(Resource{
770+
/// metadata: Metadata{ language: "en".into(), domain: String::new(), custom: Default::default() },
771+
/// entries: vec![Entry{ id: "id".into(), value: Translation::Singular("Hello %@ and %1$@".into()), comment: None, status: EntryStatus::Translated, custom: Default::default() }]
772+
/// });
773+
/// codec.normalize_placeholders_in_place();
774+
/// let v = match &codec.resources[0].entries[0].value { Translation::Singular(v) => v.clone(), _ => unreachable!() };
775+
/// assert!(v.contains("%s") && v.contains("%1$s"));
776+
/// ```
777+
pub fn normalize_placeholders_in_place(&mut self) {
778+
use crate::placeholder::normalize_placeholders;
779+
use crate::types::Translation;
780+
for res in &mut self.resources {
781+
for entry in &mut res.entries {
782+
match &mut entry.value {
783+
Translation::Singular(v) => {
784+
let nv = normalize_placeholders(v);
785+
*v = nv;
786+
}
787+
Translation::Plural(p) => {
788+
for v in p.forms.values_mut() {
789+
let nv = normalize_placeholders(v);
790+
*v = nv;
791+
}
792+
}
793+
}
794+
}
795+
}
796+
}
797+
622798
/// Merge resources with the same language by the given strategy.
623799
///
624800
/// This method groups resources by language and merges multiple resources
@@ -1584,4 +1760,136 @@ mod tests {
15841760
assert_eq!(merged.resources[0].metadata.language, "en");
15851761
assert_eq!(merged.resources[0].entries.len(), 2);
15861762
}
1763+
1764+
#[test]
1765+
fn test_validate_placeholders_across_languages() {
1766+
let mut codec = Codec::new();
1767+
// English with %1$@, French with %1$s should match after normalization
1768+
codec.add_resource(Resource {
1769+
metadata: Metadata {
1770+
language: "en".into(),
1771+
domain: "d".into(),
1772+
custom: HashMap::new(),
1773+
},
1774+
entries: vec![Entry {
1775+
id: "greet".into(),
1776+
value: Translation::Singular("Hello %1$@".into()),
1777+
comment: None,
1778+
status: EntryStatus::Translated,
1779+
custom: HashMap::new(),
1780+
}],
1781+
});
1782+
codec.add_resource(Resource {
1783+
metadata: Metadata {
1784+
language: "fr".into(),
1785+
domain: "d".into(),
1786+
custom: HashMap::new(),
1787+
},
1788+
entries: vec![Entry {
1789+
id: "greet".into(),
1790+
value: Translation::Singular("Bonjour %1$s".into()),
1791+
comment: None,
1792+
status: EntryStatus::Translated,
1793+
custom: HashMap::new(),
1794+
}],
1795+
});
1796+
assert!(codec.validate_placeholders(true).is_ok());
1797+
}
1798+
1799+
#[test]
1800+
fn test_validate_placeholders_mismatch() {
1801+
let mut codec = Codec::new();
1802+
codec.add_resource(Resource {
1803+
metadata: Metadata {
1804+
language: "en".into(),
1805+
domain: "d".into(),
1806+
custom: HashMap::new(),
1807+
},
1808+
entries: vec![Entry {
1809+
id: "count".into(),
1810+
value: Translation::Singular("%d files".into()),
1811+
comment: None,
1812+
status: EntryStatus::Translated,
1813+
custom: HashMap::new(),
1814+
}],
1815+
});
1816+
codec.add_resource(Resource {
1817+
metadata: Metadata {
1818+
language: "fr".into(),
1819+
domain: "d".into(),
1820+
custom: HashMap::new(),
1821+
},
1822+
entries: vec![Entry {
1823+
id: "count".into(),
1824+
value: Translation::Singular("%s fichiers".into()),
1825+
comment: None,
1826+
status: EntryStatus::Translated,
1827+
custom: HashMap::new(),
1828+
}],
1829+
});
1830+
assert!(codec.validate_placeholders(true).is_err());
1831+
}
1832+
1833+
#[test]
1834+
fn test_collect_placeholder_issues_non_strict_ok() {
1835+
let mut codec = Codec::new();
1836+
codec.add_resource(Resource {
1837+
metadata: Metadata {
1838+
language: "en".into(),
1839+
domain: "d".into(),
1840+
custom: HashMap::new(),
1841+
},
1842+
entries: vec![Entry {
1843+
id: "count".into(),
1844+
value: Translation::Singular("%d files".into()),
1845+
comment: None,
1846+
status: EntryStatus::Translated,
1847+
custom: HashMap::new(),
1848+
}],
1849+
});
1850+
codec.add_resource(Resource {
1851+
metadata: Metadata {
1852+
language: "fr".into(),
1853+
domain: "d".into(),
1854+
custom: HashMap::new(),
1855+
},
1856+
entries: vec![Entry {
1857+
id: "count".into(),
1858+
value: Translation::Singular("%s fichiers".into()),
1859+
comment: None,
1860+
status: EntryStatus::Translated,
1861+
custom: HashMap::new(),
1862+
}],
1863+
});
1864+
// Non-strict should be Ok but issues present
1865+
assert!(codec.validate_placeholders(false).is_ok());
1866+
let issues = codec.collect_placeholder_issues();
1867+
assert!(!issues.is_empty());
1868+
}
1869+
1870+
#[test]
1871+
fn test_normalize_placeholders_in_place() {
1872+
let mut codec = Codec::new();
1873+
codec.add_resource(Resource {
1874+
metadata: Metadata {
1875+
language: "en".into(),
1876+
domain: "d".into(),
1877+
custom: HashMap::new(),
1878+
},
1879+
entries: vec![Entry {
1880+
id: "g".into(),
1881+
value: Translation::Singular("Hello %@ and %1$@".into()),
1882+
comment: None,
1883+
status: EntryStatus::Translated,
1884+
custom: HashMap::new(),
1885+
}],
1886+
});
1887+
codec.normalize_placeholders_in_place();
1888+
let v = match &codec.resources[0].entries[0].value {
1889+
Translation::Singular(v) => v.clone(),
1890+
_ => String::new(),
1891+
};
1892+
assert!(v.contains("%s"));
1893+
assert!(v.contains("%1$s"));
1894+
}
15871895
}

0 commit comments

Comments
 (0)