Skip to content

Commit eb4f7f0

Browse files
committed
Address translate PR review comments
1 parent a5affd7 commit eb4f7f0

1 file changed

Lines changed: 151 additions & 60 deletions

File tree

langcodec-cli/src/translate.rs

Lines changed: 151 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,15 @@ use mentra::provider::{
1111
use serde::Deserialize;
1212
use std::{
1313
borrow::Cow,
14-
collections::{BTreeMap, HashMap},
14+
collections::{BTreeMap, HashMap, VecDeque},
1515
path::{Path, PathBuf},
1616
sync::Arc,
1717
};
18-
use tokio::{runtime::Builder, sync::Semaphore, task::JoinSet};
18+
use tokio::{
19+
runtime::Builder,
20+
sync::{Mutex as AsyncMutex, mpsc},
21+
task::JoinSet,
22+
};
1923

2024
const DEFAULT_STATUSES: [&str; 2] = ["new", "stale"];
2125
const DEFAULT_CONCURRENCY: usize = 4;
@@ -246,49 +250,59 @@ async fn async_run_translation(
246250
});
247251
}
248252

249-
let semaphore = Arc::new(Semaphore::new(prepared.opts.concurrency));
253+
let worker_count = prepared.opts.concurrency.min(prepared.jobs.len()).max(1);
254+
let queue = Arc::new(AsyncMutex::new(VecDeque::from(prepared.jobs.clone())));
255+
let (tx, mut rx) = mpsc::unbounded_channel::<Result<TranslationResult, String>>();
250256
let mut join_set = JoinSet::new();
251-
for job in prepared.jobs.clone() {
257+
for _ in 0..worker_count {
252258
let backend = Arc::clone(&backend);
253-
let permit_pool = Arc::clone(&semaphore);
259+
let queue = Arc::clone(&queue);
260+
let tx = tx.clone();
254261
join_set.spawn(async move {
255-
let _permit = permit_pool
256-
.acquire_owned()
257-
.await
258-
.map_err(|_| format!("Failed to acquire translation worker for '{}'", job.key))?;
259-
let value = backend
260-
.translate(BackendRequest {
261-
key: job.key.clone(),
262-
source_lang: job.source_lang.clone(),
263-
target_lang: job.target_lang.clone(),
264-
source_value: job.source_value.clone(),
265-
source_comment: job.source_comment.clone(),
266-
})
267-
.await?;
268-
Ok::<TranslationResult, String>(TranslationResult {
269-
key: job.key.clone(),
270-
translated_value: value,
271-
})
262+
loop {
263+
let job = {
264+
let mut queue = queue.lock().await;
265+
queue.pop_front()
266+
};
267+
268+
let Some(job) = job else {
269+
break;
270+
};
271+
272+
let result = backend
273+
.translate(BackendRequest {
274+
key: job.key.clone(),
275+
source_lang: job.source_lang.clone(),
276+
target_lang: job.target_lang.clone(),
277+
source_value: job.source_value.clone(),
278+
source_comment: job.source_comment.clone(),
279+
})
280+
.await
281+
.map(|translated_value| TranslationResult {
282+
key: job.key.clone(),
283+
translated_value,
284+
});
285+
let _ = tx.send(result);
286+
}
287+
288+
Ok::<(), String>(())
272289
});
273290
}
291+
drop(tx);
274292

275293
let mut results: HashMap<String, String> = HashMap::new();
276294
let mut completed = 0usize;
277295

278-
while let Some(result) = join_set.join_next().await {
296+
while let Some(result) = rx.recv().await {
279297
completed += 1;
280298
match result {
281-
Ok(Ok(item)) => {
299+
Ok(item) => {
282300
prepared.summary.translated += 1;
283301
results.insert(item.key, item.translated_value);
284302
}
285-
Ok(Err(err)) => {
286-
prepared.summary.failed += 1;
287-
eprintln!("✖ {}", err);
288-
}
289303
Err(err) => {
290304
prepared.summary.failed += 1;
291-
eprintln!("✖ Translation task failed to join: {}", err);
305+
eprintln!("✖ {}", err);
292306
}
293307
}
294308
eprintln!(
@@ -301,6 +315,20 @@ async fn async_run_translation(
301315
);
302316
}
303317

318+
while let Some(result) = join_set.join_next().await {
319+
match result {
320+
Ok(Ok(())) => {}
321+
Ok(Err(err)) => {
322+
prepared.summary.failed += 1;
323+
eprintln!("✖ Translation worker failed: {}", err);
324+
}
325+
Err(err) => {
326+
prepared.summary.failed += 1;
327+
eprintln!("✖ Translation task failed to join: {}", err);
328+
}
329+
}
330+
}
331+
304332
print_summary(&prepared.summary);
305333

306334
if prepared.summary.failed > 0 {
@@ -332,7 +360,7 @@ async fn async_run_translation(
332360

333361
fn prepare_translation(opts: &TranslateOptions) -> Result<PreparedTranslation, String> {
334362
let config = load_config(opts.config.as_deref())?;
335-
let resolved = resolve_options(opts, config.as_ref())?;
363+
let mut resolved = resolve_options(opts, config.as_ref())?;
336364

337365
validate_path_inputs(&resolved)?;
338366

@@ -352,13 +380,19 @@ fn prepare_translation(opts: &TranslateOptions) -> Result<PreparedTranslation, S
352380
.ok()
353381
.flatten();
354382

383+
if opts.target.is_none()
384+
&& output_path == source_path
385+
&& !is_multi_language_format(&output_format)
386+
{
387+
return Err(
388+
"Omitting --target is only supported for in-place multi-language files; use --target or --output for single-language formats"
389+
.to_string(),
390+
);
391+
}
392+
355393
let source_codec = read_codec(&source_path, resolved.source_lang.clone(), resolved.strict)?;
356394
let source_resource = select_source_resource(&source_codec, &resolved.source_lang)?;
357395

358-
if source_resource.language == resolved.target_lang {
359-
return Err("Source language and target language must differ".to_string());
360-
}
361-
362396
let mut target_codec = if Path::new(&target_path).exists() {
363397
read_codec(&target_path, output_lang_hint.clone(), resolved.strict)?
364398
} else {
@@ -379,12 +413,10 @@ fn prepare_translation(opts: &TranslateOptions) -> Result<PreparedTranslation, S
379413
&resolved.target_lang,
380414
output_lang_hint.as_deref(),
381415
)?;
382-
if target_language != resolved.target_lang {
383-
return Err(format!(
384-
"Target language '{}' does not match resolved output language '{}'",
385-
resolved.target_lang, target_language
386-
));
416+
if lang_matches(&source_resource.language, &target_language) {
417+
return Err("Source language and target language must differ".to_string());
387418
}
419+
resolved.target_lang = target_language;
388420

389421
ensure_target_resource(&mut target_codec, &resolved.target_lang)?;
390422
propagate_xcstrings_metadata(&mut target_codec, &source_resource.language);
@@ -394,7 +426,7 @@ fn prepare_translation(opts: &TranslateOptions) -> Result<PreparedTranslation, S
394426
&target_codec,
395427
&resolved.target_lang,
396428
&resolved.statuses,
397-
target_supports_explicit_status(&target_path, &output_format),
429+
target_supports_explicit_status(&target_path),
398430
)?;
399431

400432
Ok(PreparedTranslation {
@@ -829,14 +861,11 @@ fn is_multi_language_format(format: &FormatType) -> bool {
829861
matches!(format, FormatType::Xcstrings | FormatType::CSV | FormatType::TSV)
830862
}
831863

832-
fn target_supports_explicit_status(path: &str, format: &FormatType) -> bool {
833-
match format {
834-
FormatType::Xcstrings => true,
835-
_ => Path::new(path)
836-
.extension()
837-
.and_then(|ext| ext.to_str())
838-
.is_some_and(|ext| ext.eq_ignore_ascii_case("xcstrings")),
839-
}
864+
fn target_supports_explicit_status(path: &str) -> bool {
865+
Path::new(path)
866+
.extension()
867+
.and_then(|ext| ext.to_str())
868+
.is_some_and(|ext| ext.eq_ignore_ascii_case("xcstrings"))
840869
}
841870

842871
fn write_back(
@@ -960,29 +989,34 @@ fn format_provider_error(err: ProviderError) -> String {
960989
#[cfg(test)]
961990
mod tests {
962991
use super::*;
963-
use std::{collections::VecDeque, fs, sync::Mutex};
992+
use std::{fs, sync::Mutex};
964993
use tempfile::TempDir;
965994

966995
#[derive(Clone)]
967996
struct MockBackend {
968-
responses: Arc<Mutex<VecDeque<Result<String, String>>>>,
997+
responses: Arc<Mutex<HashMap<String, Result<String, String>>>>,
969998
}
970999

9711000
impl MockBackend {
972-
fn new(responses: Vec<Result<String, String>>) -> Self {
1001+
fn new(responses: Vec<(&str, Result<String, String>)>) -> Self {
9731002
Self {
974-
responses: Arc::new(Mutex::new(VecDeque::from(responses))),
1003+
responses: Arc::new(Mutex::new(
1004+
responses
1005+
.into_iter()
1006+
.map(|(key, value)| (key.to_string(), value))
1007+
.collect(),
1008+
)),
9751009
}
9761010
}
9771011
}
9781012

9791013
#[async_trait]
9801014
impl TranslationBackend for MockBackend {
981-
async fn translate(&self, _request: BackendRequest) -> Result<String, String> {
1015+
async fn translate(&self, request: BackendRequest) -> Result<String, String> {
9821016
self.responses
9831017
.lock()
9841018
.unwrap()
985-
.pop_front()
1019+
.remove(&request.key)
9861020
.unwrap_or_else(|| Err("missing mock response".to_string()))
9871021
}
9881022
}
@@ -1013,8 +1047,14 @@ mod tests {
10131047
fs::write(&source, "\"welcome\" = \"Welcome\";\n\"bye\" = \"Goodbye\";\n").unwrap();
10141048

10151049
let prepared = prepare_translation(&base_options(&source, Some(&target))).unwrap();
1016-
let outcome =
1017-
run_prepared_translation(prepared, Arc::new(MockBackend::new(vec![Ok("Bienvenue".to_string()), Ok("Au revoir".to_string())]))).unwrap();
1050+
let outcome = run_prepared_translation(
1051+
prepared,
1052+
Arc::new(MockBackend::new(vec![
1053+
("welcome", Ok("Bienvenue".to_string())),
1054+
("bye", Ok("Au revoir".to_string())),
1055+
])),
1056+
)
1057+
.unwrap();
10181058

10191059
assert_eq!(outcome.translated, 2);
10201060
let written = fs::read_to_string(&target).unwrap();
@@ -1036,8 +1076,14 @@ mod tests {
10361076

10371077
let before = fs::read_to_string(&target).unwrap();
10381078
let prepared = prepare_translation(&options).unwrap();
1039-
let outcome =
1040-
run_prepared_translation(prepared, Arc::new(MockBackend::new(vec![Ok("Bienvenue".to_string())]))).unwrap();
1079+
let outcome = run_prepared_translation(
1080+
prepared,
1081+
Arc::new(MockBackend::new(vec![(
1082+
"welcome",
1083+
Ok("Bienvenue".to_string()),
1084+
)])),
1085+
)
1086+
.unwrap();
10411087
let after = fs::read_to_string(&target).unwrap();
10421088

10431089
assert_eq!(outcome.translated, 1);
@@ -1058,8 +1104,8 @@ mod tests {
10581104
let err = run_prepared_translation(
10591105
prepared,
10601106
Arc::new(MockBackend::new(vec![
1061-
Ok("Bienvenue".to_string()),
1062-
Err("boom".to_string()),
1107+
("welcome", Ok("Bienvenue".to_string())),
1108+
("bye", Err("boom".to_string())),
10631109
])),
10641110
)
10651111
.unwrap_err();
@@ -1162,6 +1208,51 @@ status = ["new", "stale"]
11621208
assert_eq!(prepared.summary.queued, 1);
11631209
}
11641210

1211+
#[test]
1212+
fn rejects_in_place_single_language_translation_without_target() {
1213+
let temp_dir = TempDir::new().unwrap();
1214+
let source = temp_dir.path().join("en.strings");
1215+
fs::write(&source, "\"welcome\" = \"Welcome\";\n").unwrap();
1216+
1217+
let options = base_options(&source, None);
1218+
let err = prepare_translation(&options).unwrap_err();
1219+
assert!(err.contains("Omitting --target is only supported"));
1220+
}
1221+
1222+
#[test]
1223+
fn canonicalizes_target_language_from_existing_target_resource() {
1224+
let temp_dir = TempDir::new().unwrap();
1225+
let source = temp_dir.path().join("translations.csv");
1226+
let target = temp_dir.path().join("target.csv");
1227+
fs::write(&source, "key,en\nwelcome,Welcome\n").unwrap();
1228+
fs::write(&target, "key,fr-CA\nwelcome,\n").unwrap();
1229+
1230+
let mut options = base_options(&source, Some(&target));
1231+
options.target_lang = Some("fr".to_string());
1232+
options.source_lang = Some("en".to_string());
1233+
1234+
let prepared = prepare_translation(&options).unwrap();
1235+
assert_eq!(prepared.opts.target_lang, "fr-CA");
1236+
assert_eq!(prepared.summary.queued, 1);
1237+
}
1238+
1239+
#[test]
1240+
fn infers_status_from_target_input_format_not_output_format() {
1241+
let temp_dir = TempDir::new().unwrap();
1242+
let source = temp_dir.path().join("en.strings");
1243+
let target = temp_dir.path().join("fr.strings");
1244+
let output = temp_dir.path().join("translated.xcstrings");
1245+
1246+
fs::write(&source, "\"welcome\" = \"Welcome\";\n").unwrap();
1247+
fs::write(&target, "\"welcome\" = \"\";\n").unwrap();
1248+
1249+
let mut options = base_options(&source, Some(&target));
1250+
options.output = Some(output.to_string_lossy().to_string());
1251+
1252+
let prepared = prepare_translation(&options).unwrap();
1253+
assert_eq!(prepared.summary.queued, 1);
1254+
}
1255+
11651256
#[test]
11661257
fn parses_fenced_json_translation() {
11671258
let text = "```json\n{\"translation\":\"Bonjour\"}\n```";

0 commit comments

Comments
 (0)