Skip to content

Commit adffc05

Browse files
committed
Improve placeholder normalization and formatting
Refactors placeholder normalization logic to handle positional iOS object placeholders more robustly, ensuring %<n>$@ is converted to %<n>$s. Also improves code formatting and consistency across modules, and reorders some imports for clarity. No functional changes outside of placeholder normalization and code style.
1 parent 240f7fd commit adffc05

7 files changed

Lines changed: 99 additions & 40 deletions

File tree

langcodec-cli/src/main.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@ mod debug;
33
mod formats;
44
mod merge;
55
mod path_glob;
6+
mod stats;
67
mod transformers;
78
mod validation;
89
mod view;
9-
mod stats;
1010

1111
use crate::convert::{ConvertOptions, run_unified_convert_command, try_custom_format_view};
1212
use crate::debug::run_debug_command;
@@ -304,7 +304,9 @@ fn main() {
304304
Commands::Stats { input, lang, json } => {
305305
// Validate
306306
let mut context = ValidationContext::new().with_input_file(input.clone());
307-
if let Some(l) = &lang { context = context.with_language_code(l.clone()); }
307+
if let Some(l) = &lang {
308+
context = context.with_language_code(l.clone());
309+
}
308310
if let Err(e) = validate_context(&context) {
309311
eprintln!("❌ Validation failed: {}", e);
310312
std::process::exit(1);

langcodec-cli/src/stats.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,11 @@ fn accumulate(lang_stats: &mut LangStats, status: &EntryStatus) {
3030

3131
pub fn print_stats(codec: &Codec, lang_filter: &Option<String>, json_output: bool) {
3232
let resources: Vec<_> = match lang_filter {
33-
Some(lang) => codec.resources.iter().filter(|r| r.metadata.language == *lang).collect(),
33+
Some(lang) => codec
34+
.resources
35+
.iter()
36+
.filter(|r| r.metadata.language == *lang)
37+
.collect(),
3438
None => codec.resources.iter().collect(),
3539
};
3640

@@ -84,7 +88,10 @@ pub fn print_stats(codec: &Codec, lang_filter: &Option<String>, json_output: boo
8488
println!(" Total: {}", stats.total);
8589
println!(" By status:");
8690
for (k, v) in [
87-
("translated", stats.by_status.get("translated").copied().unwrap_or(0)),
91+
(
92+
"translated",
93+
stats.by_status.get("translated").copied().unwrap_or(0),
94+
),
8895
(
8996
"needs_review",
9097
stats.by_status.get("needs_review").copied().unwrap_or(0),
@@ -105,4 +112,3 @@ pub fn print_stats(codec: &Codec, lang_filter: &Option<String>, json_output: boo
105112
println!(" Completion: {:.2}%", percent);
106113
}
107114
}
108-

langcodec-cli/tests/stats_cli_tests.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,3 @@ fn test_stats_json_on_android_strings() {
4848
assert_eq!(by_status["do_not_translate"], 1);
4949
assert_eq!(by_status["new"], 1);
5050
}
51-

langcodec/src/codec.rs

Lines changed: 42 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -643,9 +643,9 @@ impl Codec {
643643
/// assert!(codec.validate_placeholders(true).is_ok());
644644
/// ```
645645
pub fn validate_placeholders(&self, strict: bool) -> Result<(), Error> {
646-
use std::collections::HashMap;
647646
use crate::placeholder::signature;
648647
use crate::types::Translation;
648+
use std::collections::HashMap;
649649

650650
// key -> lang -> Vec<signatures per form or single>
651651
let mut map: HashMap<String, HashMap<String, Vec<Vec<String>>>> = HashMap::new();
@@ -654,9 +654,7 @@ impl Codec {
654654
for entry in &res.entries {
655655
let sigs: Vec<Vec<String>> = match &entry.value {
656656
Translation::Singular(v) => vec![signature(v)],
657-
Translation::Plural(p) => {
658-
p.forms.values().map(|v| signature(v)).collect()
659-
}
657+
Translation::Plural(p) => p.forms.values().map(|v| signature(v)).collect(),
660658
};
661659
map.entry(entry.id.clone())
662660
.or_default()
@@ -714,9 +712,9 @@ impl Codec {
714712
///
715713
/// Useful to warn in non-strict mode.
716714
pub fn collect_placeholder_issues(&self) -> Vec<String> {
717-
use std::collections::HashMap;
718715
use crate::placeholder::signature;
719716
use crate::types::Translation;
717+
use std::collections::HashMap;
720718

721719
let mut map: HashMap<String, HashMap<String, Vec<Vec<String>>>> = HashMap::new();
722720
for res in &self.resources {
@@ -1768,7 +1766,11 @@ mod tests {
17681766
let mut codec = Codec::new();
17691767
// English with %1$@, French with %1$s should match after normalization
17701768
codec.add_resource(Resource {
1771-
metadata: Metadata { language: "en".into(), domain: "d".into(), custom: HashMap::new() },
1769+
metadata: Metadata {
1770+
language: "en".into(),
1771+
domain: "d".into(),
1772+
custom: HashMap::new(),
1773+
},
17721774
entries: vec![Entry {
17731775
id: "greet".into(),
17741776
value: Translation::Singular("Hello %1$@".into()),
@@ -1778,7 +1780,11 @@ mod tests {
17781780
}],
17791781
});
17801782
codec.add_resource(Resource {
1781-
metadata: Metadata { language: "fr".into(), domain: "d".into(), custom: HashMap::new() },
1783+
metadata: Metadata {
1784+
language: "fr".into(),
1785+
domain: "d".into(),
1786+
custom: HashMap::new(),
1787+
},
17821788
entries: vec![Entry {
17831789
id: "greet".into(),
17841790
value: Translation::Singular("Bonjour %1$s".into()),
@@ -1794,7 +1800,11 @@ mod tests {
17941800
fn test_validate_placeholders_mismatch() {
17951801
let mut codec = Codec::new();
17961802
codec.add_resource(Resource {
1797-
metadata: Metadata { language: "en".into(), domain: "d".into(), custom: HashMap::new() },
1803+
metadata: Metadata {
1804+
language: "en".into(),
1805+
domain: "d".into(),
1806+
custom: HashMap::new(),
1807+
},
17981808
entries: vec![Entry {
17991809
id: "count".into(),
18001810
value: Translation::Singular("%d files".into()),
@@ -1804,7 +1814,11 @@ mod tests {
18041814
}],
18051815
});
18061816
codec.add_resource(Resource {
1807-
metadata: Metadata { language: "fr".into(), domain: "d".into(), custom: HashMap::new() },
1817+
metadata: Metadata {
1818+
language: "fr".into(),
1819+
domain: "d".into(),
1820+
custom: HashMap::new(),
1821+
},
18081822
entries: vec![Entry {
18091823
id: "count".into(),
18101824
value: Translation::Singular("%s fichiers".into()),
@@ -1820,7 +1834,11 @@ mod tests {
18201834
fn test_collect_placeholder_issues_non_strict_ok() {
18211835
let mut codec = Codec::new();
18221836
codec.add_resource(Resource {
1823-
metadata: Metadata { language: "en".into(), domain: "d".into(), custom: HashMap::new() },
1837+
metadata: Metadata {
1838+
language: "en".into(),
1839+
domain: "d".into(),
1840+
custom: HashMap::new(),
1841+
},
18241842
entries: vec![Entry {
18251843
id: "count".into(),
18261844
value: Translation::Singular("%d files".into()),
@@ -1830,7 +1848,11 @@ mod tests {
18301848
}],
18311849
});
18321850
codec.add_resource(Resource {
1833-
metadata: Metadata { language: "fr".into(), domain: "d".into(), custom: HashMap::new() },
1851+
metadata: Metadata {
1852+
language: "fr".into(),
1853+
domain: "d".into(),
1854+
custom: HashMap::new(),
1855+
},
18341856
entries: vec![Entry {
18351857
id: "count".into(),
18361858
value: Translation::Singular("%s fichiers".into()),
@@ -1849,7 +1871,11 @@ mod tests {
18491871
fn test_normalize_placeholders_in_place() {
18501872
let mut codec = Codec::new();
18511873
codec.add_resource(Resource {
1852-
metadata: Metadata { language: "en".into(), domain: "d".into(), custom: HashMap::new() },
1874+
metadata: Metadata {
1875+
language: "en".into(),
1876+
domain: "d".into(),
1877+
custom: HashMap::new(),
1878+
},
18531879
entries: vec![Entry {
18541880
id: "g".into(),
18551881
value: Translation::Singular("Hello %@ and %1$@".into()),
@@ -1859,7 +1885,10 @@ mod tests {
18591885
}],
18601886
});
18611887
codec.normalize_placeholders_in_place();
1862-
let v = match &codec.resources[0].entries[0].value { Translation::Singular(v) => v.clone(), _ => String::new() };
1888+
let v = match &codec.resources[0].entries[0].value {
1889+
Translation::Singular(v) => v.clone(),
1890+
_ => String::new(),
1891+
};
18631892
assert!(v.contains("%s"));
18641893
assert!(v.contains("%1$s"));
18651894
}

langcodec/src/converter.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -356,11 +356,7 @@ mod normalize_tests {
356356
let strings = tmp.path().join("en.strings");
357357
let xml = tmp.path().join("strings.xml");
358358

359-
fs::write(
360-
&strings,
361-
"\n\"g\" = \"Hello %@ and %1$@ and %ld\";\n",
362-
)
363-
.unwrap();
359+
fs::write(&strings, "\n\"g\" = \"Hello %@ and %1$@ and %ld\";\n").unwrap();
364360

365361
// Without normalization: convert should succeed
366362
convert(

langcodec/src/lib.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -142,17 +142,17 @@ pub mod codec;
142142
pub mod converter;
143143
pub mod error;
144144
pub mod formats;
145+
pub mod placeholder;
145146
pub mod traits;
146147
pub mod types;
147-
pub mod placeholder;
148148

149149
// Re-export most used types for easy consumption
150150
pub use crate::{
151151
builder::CodecBuilder,
152152
codec::Codec,
153153
converter::{
154-
convert, convert_auto, convert_resources_to_format, convert_with_normalization,
155-
convert_auto_with_normalization, infer_format_from_extension, infer_format_from_path,
154+
convert, convert_auto, convert_auto_with_normalization, convert_resources_to_format,
155+
convert_with_normalization, infer_format_from_extension, infer_format_from_path,
156156
infer_language_from_path, merge_resources,
157157
},
158158
error::Error,

langcodec/src/placeholder.rs

Lines changed: 40 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
//! - Extract a placeholder "signature" for comparison across languages.
66
//! - Validate placeholder consistency per entry (across all languages and plural forms).
77
8-
98
#[derive(Debug, Clone, PartialEq, Eq)]
109
pub struct PlaceholderToken {
1110
pub index: Option<usize>,
@@ -73,7 +72,10 @@ pub fn extract_placeholders(input: &str) -> Vec<PlaceholderToken> {
7372
if j < bytes.len() {
7473
let ch = bytes[j] as char;
7574
if ch.is_ascii_alphabetic() || ch == '@' {
76-
out.push(PlaceholderToken { index, kind: canonical_kind_char(ch) });
75+
out.push(PlaceholderToken {
76+
index,
77+
kind: canonical_kind_char(ch),
78+
});
7779
i = j + 1;
7880
continue;
7981
}
@@ -91,19 +93,37 @@ pub fn extract_placeholders(input: &str) -> Vec<PlaceholderToken> {
9193
/// - %1$@ -> %1$s
9294
/// - %ld, %lu -> %d / %u
9395
pub fn normalize_placeholders(input: &str) -> String {
94-
let mut out = input.to_string();
95-
// Positional iOS object -> Android string
96-
out = out.replace("%1$@", "%1$s");
97-
out = out.replace("%2$@", "%2$s");
98-
out = out.replace("%3$@", "%3$s");
99-
out = out.replace("%4$@", "%4$s");
100-
out = out.replace("%5$@", "%5$s");
96+
// Replace positional iOS object placeholders %<n>$@ -> %<n>$s
97+
let bytes = input.as_bytes();
98+
let mut i = 0;
99+
let mut tmp = String::with_capacity(input.len());
100+
while i < bytes.len() {
101+
if bytes[i] == b'%' {
102+
let mut j = i + 1;
103+
let start_digits = j;
104+
while j < bytes.len() && bytes[j].is_ascii_digit() {
105+
j += 1;
106+
}
107+
if j > start_digits && j + 1 < bytes.len() && bytes[j] == b'$' && bytes[j + 1] == b'@' {
108+
// Copy prefix, then normalized token
109+
tmp.push('%');
110+
tmp.push_str(&input[start_digits..j]); // digits
111+
tmp.push('$');
112+
tmp.push('s');
113+
i = j + 2;
114+
continue;
115+
}
116+
}
117+
tmp.push(bytes[i] as char);
118+
i += 1;
119+
}
120+
101121
// Simple iOS object -> string
102-
out = out.replace("%@", "%s");
122+
let out = tmp.replace("%@", "%s");
103123
// Long ints to canonical
104-
out = out.replace("%ld", "%d");
105-
out = out.replace("%lu", "%u");
106-
out
124+
let out = out.replace("%ld", "%d");
125+
126+
out.replace("%lu", "%u")
107127
}
108128

109129
/// Build a normalized signature (sequence of tokens) for comparison.
@@ -142,6 +162,13 @@ mod tests {
142162
assert_eq!(signature(s), vec!["s", "d"]);
143163
}
144164

165+
#[test]
166+
fn test_normalize_positional_object() {
167+
let s = "Hello %1$@";
168+
let n = normalize_placeholders(s);
169+
assert!(n.contains("%1$s"));
170+
}
171+
145172
#[test]
146173
fn test_ignore_escaped_percent() {
147174
let s = "Discount: 50%% and value %d";

0 commit comments

Comments
 (0)