Skip to content

Commit 62ae5eb

Browse files
committed
test(lib): add edge-case corpus regression fixtures
1 parent 806d45b commit 62ae5eb

5 files changed

Lines changed: 323 additions & 0 deletions

File tree

Lines changed: 281 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,281 @@
1+
use langcodec::types::Translation;
2+
use langcodec::{Codec, convert_auto};
3+
use std::path::{Path, PathBuf};
4+
5+
#[derive(Clone)]
6+
struct ExpectedValue {
7+
language: &'static str,
8+
key: &'static str,
9+
value: &'static str,
10+
}
11+
12+
struct ParseCase {
13+
name: &'static str,
14+
input_relative_path: &'static str,
15+
lang_hint: Option<&'static str>,
16+
expected_values: Vec<ExpectedValue>,
17+
}
18+
19+
struct ConvertCase {
20+
name: &'static str,
21+
input_relative_path: &'static str,
22+
output_file_name: &'static str,
23+
output_lang_hint: Option<&'static str>,
24+
expected_values: Vec<ExpectedValue>,
25+
}
26+
27+
fn corpus_root() -> PathBuf {
28+
Path::new(env!("CARGO_MANIFEST_DIR"))
29+
.join("..")
30+
.join("tests")
31+
.join("data")
32+
.join("lib")
33+
.join("corpus")
34+
}
35+
36+
fn expected_en_stable_values() -> Vec<ExpectedValue> {
37+
vec![
38+
ExpectedValue {
39+
language: "en",
40+
key: "welcome_message",
41+
value: "Hello, World!",
42+
},
43+
ExpectedValue {
44+
language: "en",
45+
key: "xml_entities",
46+
value: "Use <tag> & value",
47+
},
48+
ExpectedValue {
49+
language: "en",
50+
key: "comma_text",
51+
value: "alpha, beta, gamma",
52+
},
53+
ExpectedValue {
54+
language: "en",
55+
key: "accent_text",
56+
value: "Café crème brûlée",
57+
},
58+
]
59+
}
60+
61+
fn expected_fr_stable_values() -> Vec<ExpectedValue> {
62+
vec![
63+
ExpectedValue {
64+
language: "fr",
65+
key: "welcome_message",
66+
value: "Bonjour, le monde !",
67+
},
68+
ExpectedValue {
69+
language: "fr",
70+
key: "xml_entities",
71+
value: "Utiliser <tag> & valeur",
72+
},
73+
ExpectedValue {
74+
language: "fr",
75+
key: "comma_text",
76+
value: "alpha, bêta, gamma",
77+
},
78+
ExpectedValue {
79+
language: "fr",
80+
key: "accent_text",
81+
value: "Café crème brûlée",
82+
},
83+
]
84+
}
85+
86+
fn with_language(
87+
expected_values: Vec<ExpectedValue>,
88+
language: &'static str,
89+
) -> Vec<ExpectedValue> {
90+
expected_values
91+
.into_iter()
92+
.map(|item| ExpectedValue { language, ..item })
93+
.collect()
94+
}
95+
96+
fn read_codec(path: &Path, lang_hint: Option<&str>) -> Codec {
97+
let mut codec = Codec::new();
98+
codec
99+
.read_file_by_extension(path, lang_hint.map(|l| l.to_string()))
100+
.unwrap_or_else(|e| panic!("failed to read {}: {}", path.display(), e));
101+
codec
102+
}
103+
104+
fn assert_expected_values(codec: &Codec, expected_values: &[ExpectedValue], case_name: &str) {
105+
for expected in expected_values {
106+
let entry = codec
107+
.find_entry(expected.key, expected.language)
108+
.unwrap_or_else(|| {
109+
panic!(
110+
"{case_name}: missing key '{}' for language '{}'",
111+
expected.key, expected.language
112+
)
113+
});
114+
115+
match &entry.value {
116+
Translation::Singular(actual) => {
117+
assert_eq!(
118+
actual, expected.value,
119+
"{case_name}: value mismatch for {}:{}",
120+
expected.language, expected.key
121+
);
122+
}
123+
other => panic!(
124+
"{case_name}: expected singular value for {}:{}, got {:?}",
125+
expected.language, expected.key, other
126+
),
127+
}
128+
}
129+
}
130+
131+
#[test]
132+
fn parse_edge_case_corpora_table_driven() {
133+
let root = corpus_root();
134+
let mut csv_and_tsv_expected = expected_en_stable_values();
135+
csv_and_tsv_expected.extend(expected_fr_stable_values());
136+
137+
let parse_cases = vec![
138+
ParseCase {
139+
name: "strings corpus parse",
140+
input_relative_path: "en.lproj/Localizable.strings",
141+
lang_hint: None,
142+
expected_values: {
143+
let mut expected = expected_en_stable_values();
144+
expected.push(ExpectedValue {
145+
language: "en",
146+
key: "quoted_text",
147+
value: "He said \\\"Hello\\\"",
148+
});
149+
expected.push(ExpectedValue {
150+
language: "en",
151+
key: "apostrophe_text",
152+
value: "Don't stop",
153+
});
154+
expected
155+
},
156+
},
157+
ParseCase {
158+
name: "android corpus parse",
159+
input_relative_path: "values-en/strings.xml",
160+
lang_hint: None,
161+
expected_values: {
162+
let mut expected = expected_en_stable_values();
163+
expected.push(ExpectedValue {
164+
language: "en",
165+
key: "quoted_text",
166+
value: "He said \"Hello\"",
167+
});
168+
expected.push(ExpectedValue {
169+
language: "en",
170+
key: "apostrophe_text",
171+
value: "Don\\'t stop",
172+
});
173+
expected
174+
},
175+
},
176+
ParseCase {
177+
name: "csv corpus parse",
178+
input_relative_path: "corpus.csv",
179+
lang_hint: None,
180+
expected_values: csv_and_tsv_expected.clone(),
181+
},
182+
ParseCase {
183+
name: "tsv corpus parse",
184+
input_relative_path: "corpus.tsv",
185+
lang_hint: None,
186+
expected_values: csv_and_tsv_expected,
187+
},
188+
];
189+
190+
for case in parse_cases {
191+
let input_path = root.join(case.input_relative_path);
192+
let codec = read_codec(&input_path, case.lang_hint);
193+
assert_expected_values(&codec, &case.expected_values, case.name);
194+
}
195+
}
196+
197+
#[test]
198+
fn convert_edge_case_corpora_table_driven() {
199+
let root = corpus_root();
200+
let output_dir = tempfile::tempdir().expect("create temp output dir");
201+
202+
let csv_default_expected = with_language(expected_en_stable_values(), "default");
203+
204+
let convert_cases = vec![
205+
ConvertCase {
206+
name: "strings -> android",
207+
input_relative_path: "en.lproj/Localizable.strings",
208+
output_file_name: "from_strings.xml",
209+
output_lang_hint: Some("en"),
210+
expected_values: expected_en_stable_values(),
211+
},
212+
ConvertCase {
213+
name: "strings -> xcstrings",
214+
input_relative_path: "en.lproj/Localizable.strings",
215+
output_file_name: "from_strings.xcstrings",
216+
output_lang_hint: None,
217+
expected_values: expected_en_stable_values(),
218+
},
219+
ConvertCase {
220+
name: "strings -> csv",
221+
input_relative_path: "en.lproj/Localizable.strings",
222+
output_file_name: "from_strings.csv",
223+
output_lang_hint: None,
224+
expected_values: csv_default_expected.clone(),
225+
},
226+
ConvertCase {
227+
name: "strings -> tsv",
228+
input_relative_path: "en.lproj/Localizable.strings",
229+
output_file_name: "from_strings.tsv",
230+
output_lang_hint: None,
231+
expected_values: csv_default_expected,
232+
},
233+
ConvertCase {
234+
name: "android -> strings",
235+
input_relative_path: "values-en/strings.xml",
236+
output_file_name: "from_android.strings",
237+
output_lang_hint: Some("en"),
238+
expected_values: expected_en_stable_values(),
239+
},
240+
ConvertCase {
241+
name: "csv -> xcstrings",
242+
input_relative_path: "corpus.csv",
243+
output_file_name: "from_csv.xcstrings",
244+
output_lang_hint: None,
245+
expected_values: {
246+
let mut expected = expected_en_stable_values();
247+
expected.extend(expected_fr_stable_values());
248+
expected
249+
},
250+
},
251+
ConvertCase {
252+
name: "tsv -> xcstrings",
253+
input_relative_path: "corpus.tsv",
254+
output_file_name: "from_tsv.xcstrings",
255+
output_lang_hint: None,
256+
expected_values: {
257+
let mut expected = expected_en_stable_values();
258+
expected.extend(expected_fr_stable_values());
259+
expected
260+
},
261+
},
262+
];
263+
264+
for case in convert_cases {
265+
let input_path = root.join(case.input_relative_path);
266+
let output_path = output_dir.path().join(case.output_file_name);
267+
268+
convert_auto(&input_path, &output_path).unwrap_or_else(|e| {
269+
panic!(
270+
"{}: conversion failed from {} to {}: {}",
271+
case.name,
272+
input_path.display(),
273+
output_path.display(),
274+
e
275+
)
276+
});
277+
278+
let codec = read_codec(&output_path, case.output_lang_hint);
279+
assert_expected_values(&codec, &case.expected_values, case.name);
280+
}
281+
}

tests/data/lib/corpus/corpus.csv

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
key,en,fr
2+
welcome_message,"Hello, World!","Bonjour, le monde !"
3+
quoted_text,"He said ""Hello""","Il a dit ""Bonjour"""
4+
path_windows,"C:\\Program Files\\LangCodec","C:\\Programmes\\LangCodec"
5+
line_break_literal,"Line1\\nLine2","Ligne1\\nLigne2"
6+
apostrophe_text,"Don't stop","N'arrête pas"
7+
xml_entities,"Use <tag> & value","Utiliser <tag> & valeur"
8+
comma_text,"alpha, beta, gamma","alpha, bêta, gamma"
9+
accent_text,"Café crème brûlée","Café crème brûlée"
10+
placeholder_android,"Welcome, %1$s!","Bienvenue, %1$s !"

tests/data/lib/corpus/corpus.tsv

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
key en fr
2+
welcome_message Hello, World! Bonjour, le monde !
3+
quoted_text He said "Hello" Il a dit "Bonjour"
4+
path_windows C:\\Program Files\\LangCodec C:\\Programmes\\LangCodec
5+
line_break_literal Line1\\nLine2 Ligne1\\nLigne2
6+
apostrophe_text Don't stop N'arrête pas
7+
xml_entities Use <tag> & value Utiliser <tag> & valeur
8+
comma_text alpha, beta, gamma alpha, bêta, gamma
9+
accent_text Café crème brûlée Café crème brûlée
10+
placeholder_android Welcome, %1$s! Bienvenue, %1$s !
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
/* Edge-case corpus for regression tests */
2+
"welcome_message" = "Hello, World!";
3+
"quoted_text" = "He said \"Hello\"";
4+
"path_windows" = "C:\\Program Files\\LangCodec";
5+
"line_break_literal" = "Line1\\nLine2";
6+
"apostrophe_text" = "Don't stop";
7+
"xml_entities" = "Use <tag> & value";
8+
"comma_text" = "alpha, beta, gamma";
9+
"accent_text" = "Café crème brûlée";
10+
"placeholder_ios" = "Welcome, %@!";
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
<?xml version="1.0" encoding="utf-8"?>
2+
<resources>
3+
<string name="welcome_message">Hello, World!</string>
4+
<string name="quoted_text">He said "Hello"</string>
5+
<string name="path_windows">C:\\Program Files\\LangCodec</string>
6+
<string name="line_break_literal">Line1\nLine2</string>
7+
<string name="apostrophe_text">Don\'t stop</string>
8+
<string name="xml_entities">Use &lt;tag&gt; &amp; value</string>
9+
<string name="comma_text">alpha, beta, gamma</string>
10+
<string name="accent_text">Café crème brûlée</string>
11+
<string name="placeholder_android">Welcome, %1$s!</string>
12+
</resources>

0 commit comments

Comments
 (0)