Skip to content

Commit bf4ae2c

Browse files
committed
Add placeholder normalization to converter utilities
Introduces convert_with_normalization and convert_auto_with_normalization functions to optionally normalize iOS-style placeholders (e.g., %@, %1$@, %ld) to canonical forms before serialization. Updates documentation and tests to demonstrate usage. Also improves Codec API docs with usage examples for placeholder validation and normalization.
1 parent 1a55215 commit bf4ae2c

3 files changed

Lines changed: 235 additions & 3 deletions

File tree

langcodec/src/codec.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,23 @@ impl Codec {
625625
/// - For each key, each language must have the same placeholder signature.
626626
/// - For plural entries, all forms within a language must share the same signature.
627627
/// - iOS vs Android differences like `%@`/`%1$@` vs `%s`/`%1$s` are normalized.
628+
///
629+
/// Example
630+
/// ```rust
631+
/// use langcodec::{Codec, types::{Entry, EntryStatus, Metadata, Resource, Translation}};
632+
/// let mut codec = Codec::new();
633+
/// let en = Resource{
634+
/// metadata: Metadata{ language: "en".into(), domain: String::new(), custom: Default::default() },
635+
/// entries: vec![Entry{ id: "greet".into(), value: Translation::Singular("Hello %1$@".into()), comment: None, status: EntryStatus::Translated, custom: Default::default() }]
636+
/// };
637+
/// let fr = Resource{
638+
/// metadata: Metadata{ language: "fr".into(), domain: String::new(), custom: Default::default() },
639+
/// entries: vec![Entry{ id: "greet".into(), value: Translation::Singular("Bonjour %1$s".into()), comment: None, status: EntryStatus::Translated, custom: Default::default() }]
640+
/// };
641+
/// codec.add_resource(en);
642+
/// codec.add_resource(fr);
643+
/// assert!(codec.validate_placeholders(true).is_ok());
644+
/// ```
628645
pub fn validate_placeholders(&self, strict: bool) -> Result<(), Error> {
629646
use std::collections::HashMap;
630647
use crate::placeholder::signature;
@@ -698,6 +715,8 @@ impl Codec {
698715

699716
/// Collect placeholder issues without failing.
700717
/// Returns a list of human-readable messages; empty if none.
718+
///
719+
/// Useful to warn in non-strict mode.
701720
pub fn collect_placeholder_issues(&self) -> Vec<String> {
702721
use std::collections::HashMap;
703722
use crate::placeholder::signature;
@@ -752,6 +771,19 @@ impl Codec {
752771

753772
/// Normalize placeholders in all entries (mutates in place).
754773
/// Converts iOS patterns like `%@`, `%1$@`, `%ld` to canonical forms (%s, %1$s, %d/%u).
774+
///
775+
/// Example
776+
/// ```rust
777+
/// use langcodec::{Codec, types::{Entry, EntryStatus, Metadata, Resource, Translation}};
778+
/// let mut codec = Codec::new();
779+
/// codec.add_resource(Resource{
780+
/// metadata: Metadata{ language: "en".into(), domain: String::new(), custom: Default::default() },
781+
/// entries: vec![Entry{ id: "id".into(), value: Translation::Singular("Hello %@ and %1$@".into()), comment: None, status: EntryStatus::Translated, custom: Default::default() }]
782+
/// });
783+
/// codec.normalize_placeholders_in_place();
784+
/// let v = match &codec.resources[0].entries[0].value { Translation::Singular(v) => v.clone(), _ => unreachable!() };
785+
/// assert!(v.contains("%s") && v.contains("%1$s"));
786+
/// ```
755787
pub fn normalize_placeholders_in_place(&mut self) {
756788
use crate::placeholder::normalize_placeholders;
757789
use crate::types::Translation;

langcodec/src/converter.rs

Lines changed: 200 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use crate::{
99
formats::{
1010
AndroidStringsFormat, CSVFormat, FormatType, StringsFormat, TSVFormat, XcstringsFormat,
1111
},
12+
placeholder::normalize_placeholders,
1213
traits::Parser,
1314
types::Resource,
1415
};
@@ -143,14 +144,135 @@ pub fn convert<P: AsRef<Path>>(
143144
}
144145

145146
// Read input as resources
146-
let resources = match input_format {
147+
let mut resources = match input_format {
147148
FormatType::AndroidStrings(_) => vec![AndroidStringsFormat::read_from(&input)?.into()],
148149
FormatType::Strings(_) => vec![StringsFormat::read_from(&input)?.into()],
149150
FormatType::Xcstrings => Vec::<Resource>::try_from(XcstringsFormat::read_from(&input)?)?,
150151
FormatType::CSV => Vec::<Resource>::try_from(CSVFormat::read_from(&input)?)?,
151152
FormatType::TSV => Vec::<Resource>::try_from(TSVFormat::read_from(&input)?)?,
152153
};
153154

155+
// Ensure language is set for single-language inputs if provided on input_format
156+
if let Some(l) = input_format.language().cloned() {
157+
for res in &mut resources {
158+
if res.metadata.language.is_empty() {
159+
res.metadata.language = l.clone();
160+
}
161+
}
162+
}
163+
164+
// Helper to extract resource by language if present, or first one
165+
let pick_resource = |lang: Option<String>| -> Option<Resource> {
166+
match lang {
167+
Some(l) => resources.iter().find(|r| r.metadata.language == l).cloned(),
168+
None => resources.first().cloned(),
169+
}
170+
};
171+
172+
match output_format {
173+
FormatType::AndroidStrings(lang) => {
174+
let resource = pick_resource(lang);
175+
if let Some(res) = resource {
176+
AndroidStringsFormat::from(res).write_to(&output)
177+
} else {
178+
Err(Error::InvalidResource(
179+
"No matching resource for output language.".to_string(),
180+
))
181+
}
182+
}
183+
FormatType::Strings(lang) => {
184+
let resource = pick_resource(lang);
185+
if let Some(res) = resource {
186+
StringsFormat::try_from(res)?.write_to(&output)
187+
} else {
188+
Err(Error::InvalidResource(
189+
"No matching resource for output language.".to_string(),
190+
))
191+
}
192+
}
193+
FormatType::Xcstrings => XcstringsFormat::try_from(resources)?.write_to(&output),
194+
FormatType::CSV => CSVFormat::try_from(resources)?.write_to(&output),
195+
FormatType::TSV => TSVFormat::try_from(resources)?.write_to(&output),
196+
}
197+
}
198+
199+
/// Convert like [`convert`], with an option to normalize placeholders before writing.
200+
///
201+
/// When `normalize` is true, common iOS placeholder tokens like `%@`, `%1$@`, `%ld` are
202+
/// converted to canonical forms (`%s`, `%1$s`, `%d`) prior to serialization.
203+
/// Convert with optional placeholder normalization.
204+
///
205+
/// Example
206+
/// ```rust,no_run
207+
/// use langcodec::formats::FormatType;
208+
/// use langcodec::converter::convert_with_normalization;
209+
/// convert_with_normalization(
210+
/// "en.lproj/Localizable.strings",
211+
/// FormatType::Strings(Some("en".to_string())),
212+
/// "values/strings.xml",
213+
/// FormatType::AndroidStrings(Some("en".to_string())),
214+
/// true, // normalize placeholders (e.g., %@ -> %s)
215+
/// )?;
216+
/// # Ok::<(), langcodec::Error>(())
217+
/// ```
218+
pub fn convert_with_normalization<P: AsRef<Path>>(
219+
input: P,
220+
input_format: FormatType,
221+
output: P,
222+
output_format: FormatType,
223+
normalize: bool,
224+
) -> Result<(), Error> {
225+
let mut input = input.as_ref().to_path_buf();
226+
let mut output = output.as_ref().to_path_buf();
227+
228+
// Carry language between single-language formats
229+
let output_format = if let Some(lang) = input_format.language() {
230+
output_format.with_language(Some(lang.clone()))
231+
} else {
232+
output_format
233+
};
234+
235+
if !input_format.matches_language_of(&output_format) {
236+
return Err(Error::InvalidResource(
237+
"Input and output formats must match in language.".to_string(),
238+
));
239+
}
240+
241+
// Read input as resources
242+
let mut resources = match input_format {
243+
FormatType::AndroidStrings(_) => vec![AndroidStringsFormat::read_from(&input)?.into()],
244+
FormatType::Strings(_) => vec![StringsFormat::read_from(&input)?.into()],
245+
FormatType::Xcstrings => Vec::<Resource>::try_from(XcstringsFormat::read_from(&input)?)?,
246+
FormatType::CSV => Vec::<Resource>::try_from(CSVFormat::read_from(&input)?)?,
247+
FormatType::TSV => Vec::<Resource>::try_from(TSVFormat::read_from(&input)?)?,
248+
};
249+
250+
// Ensure language is set for single-language inputs if provided on input_format
251+
if let Some(l) = input_format.language().cloned() {
252+
for res in &mut resources {
253+
if res.metadata.language.is_empty() {
254+
res.metadata.language = l.clone();
255+
}
256+
}
257+
}
258+
259+
if normalize {
260+
for res in &mut resources {
261+
for entry in &mut res.entries {
262+
match &mut entry.value {
263+
crate::types::Translation::Singular(v) => {
264+
*v = normalize_placeholders(v).into();
265+
}
266+
crate::types::Translation::Plural(p) => {
267+
for (_c, v) in p.forms.iter_mut() {
268+
*v = normalize_placeholders(v);
269+
}
270+
}
271+
}
272+
}
273+
}
274+
}
275+
154276
// Helper to extract resource by language if present, or first one
155277
let pick_resource = |lang: Option<String>| -> Option<Resource> {
156278
match lang {
@@ -223,6 +345,83 @@ pub fn convert_auto<P: AsRef<Path>>(input: P, output: P) -> Result<(), Error> {
223345
convert(input, input_format, output, output_format)
224346
}
225347

348+
#[cfg(test)]
349+
mod normalize_tests {
350+
use super::*;
351+
use std::fs;
352+
353+
#[test]
354+
fn test_convert_strings_to_android_with_normalization() {
355+
let tmp = tempfile::tempdir().unwrap();
356+
let strings = tmp.path().join("en.strings");
357+
let xml = tmp.path().join("strings.xml");
358+
359+
fs::write(
360+
&strings,
361+
"\n\"g\" = \"Hello %@ and %1$@ and %ld\";\n",
362+
)
363+
.unwrap();
364+
365+
// Without normalization: convert should succeed
366+
convert(
367+
&strings,
368+
FormatType::Strings(Some("en".into())),
369+
&xml,
370+
FormatType::AndroidStrings(Some("en".into())),
371+
)
372+
.unwrap();
373+
let content = fs::read_to_string(&xml).unwrap();
374+
assert!(content.contains("Hello %"));
375+
376+
// With normalization
377+
convert_with_normalization(
378+
&strings,
379+
FormatType::Strings(Some("en".into())),
380+
&xml,
381+
FormatType::AndroidStrings(Some("en".into())),
382+
true,
383+
)
384+
.unwrap();
385+
let content = fs::read_to_string(&xml).unwrap();
386+
assert!(content.contains("%s"));
387+
assert!(content.contains("%1$s"));
388+
assert!(content.contains("%d"));
389+
}
390+
}
391+
392+
/// Auto-infer formats from paths and convert, with optional placeholder normalization.
393+
/// Auto-infer formats and convert with optional placeholder normalization.
394+
///
395+
/// Example
396+
/// ```rust,no_run
397+
/// use langcodec::converter::convert_auto_with_normalization;
398+
/// convert_auto_with_normalization(
399+
/// "Localizable.strings",
400+
/// "strings.xml",
401+
/// true, // normalize placeholders
402+
/// )?;
403+
/// # Ok::<(), langcodec::Error>(())
404+
/// ```
405+
pub fn convert_auto_with_normalization<P: AsRef<Path>>(
406+
input: P,
407+
output: P,
408+
normalize: bool,
409+
) -> Result<(), Error> {
410+
let input_format = infer_format_from_path(&input).ok_or_else(|| {
411+
Error::UnknownFormat(format!(
412+
"Cannot infer input format from extension: {:?}",
413+
input.as_ref().extension()
414+
))
415+
})?;
416+
let output_format = infer_format_from_path(&output).ok_or_else(|| {
417+
Error::UnknownFormat(format!(
418+
"Cannot infer output format from extension: {:?}",
419+
output.as_ref().extension()
420+
))
421+
})?;
422+
convert_with_normalization(input, input_format, output, output_format, normalize)
423+
}
424+
226425
/// Infers a [`FormatType`] from a file path's extension.
227426
///
228427
/// Returns `Some(FormatType)` if the extension matches a known format, otherwise `None`.

langcodec/src/lib.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,9 @@ pub use crate::{
151151
builder::CodecBuilder,
152152
codec::Codec,
153153
converter::{
154-
convert, convert_auto, convert_resources_to_format, infer_format_from_extension,
155-
infer_format_from_path, infer_language_from_path, merge_resources,
154+
convert, convert_auto, convert_resources_to_format, convert_with_normalization,
155+
convert_auto_with_normalization, infer_format_from_extension, infer_format_from_path,
156+
infer_language_from_path, merge_resources,
156157
},
157158
error::Error,
158159
formats::FormatType,

0 commit comments

Comments
 (0)