Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@ name: CI

on:
push:
branches: ['**']
branches: [main]
pull_request:
branches: ['**']

jobs:
test-and-lint:
Expand Down
32 changes: 5 additions & 27 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Main features:

- `📝` Works directly with gettext `.po` files
- `🤖` Translates only untranslated entries using AI
- `🧠` Uses OpenAI `gpt-4o` by default for translation
- `🧠` Uses OpenAI `gpt-5.4` by default for translation
- `🏷️` Respects gettext context (`msgctxt`) when translating entries
- `🔁` Supports singular and plural translations
- `⚠️` Skips fuzzy entries by default
Expand All @@ -21,34 +21,12 @@ Main features:

1. Read the `.po` file and parse its entries.
2. Find entries with empty or missing translations.
3. Send those strings to OpenAI `gpt-4o` for translation while preserving gettext context such as `msgctxt`.
3. Send those strings to OpenAI `gpt-5.4` for translation while preserving gettext context such as `msgctxt`.
4. Write the translated values back into the same `.po` file.

The translation API uses OpenAI `json_schema` structured outputs. Only models that support `json_schema` structured outputs are valid for `msgai`.

<details>
<summary>Supported model families</summary>

- `gpt-4o`
- `gpt-4o-mini`
- `gpt-4.1`
- `gpt-4.1-mini`
- `gpt-4.1-nano`
- `gpt-5`
- `gpt-5-mini`
- `gpt-5-nano`
- `gpt-5-pro`
- `gpt-5.1`
- `gpt-5.2`
- `gpt-5-codex`
- `gpt-5.1-codex`
- `gpt-5.1-codex-mini`
- `gpt-5.1-codex-max`
- `gpt-5.2-codex`

Dated snapshots are accepted where the model family supports them.

</details>
Any OpenAI model that supports `json_schema` structured outputs can be used via the `--model` flag.

By default, entries marked as `fuzzy` are skipped. If you use `--include-fuzzy`, `msgai` will translate those entries too and remove the fuzzy flag after applying the result.

Expand Down Expand Up @@ -87,7 +65,7 @@ Options:
- `--dry-run`: list untranslated `msgid` values only, with no API calls and no file changes
- `--include-fuzzy`: include fuzzy entries for translation and clear their fuzzy flag after translation
- `--source-lang LANG`: set the source language of `msgid` strings as an ISO 639-1 code such as `en` or `uk`
- `--model MODEL`: set the OpenAI model used for translation; default is `gpt-4o`. Only models with `json_schema` structured outputs are supported.
- `--model MODEL`: set the OpenAI model used for translation; default is `gpt-5.4`. Only models with `json_schema` structured outputs are supported.
- `--api-key KEY`: pass the OpenAI API key directly instead of using `OPENAI_API_KEY`
- `--fold-length N`: set PO line fold length when writing files. Use `0` to disable folding and minimize formatting-only diffs. Default: `0`
- `--context TEXT`: additional instructions for the translation model in English, appended to the system prompt (e.g. "use formal tone", "don't translate currency names")
Expand All @@ -111,7 +89,7 @@ Example `msgai.config.yml`:

```yaml
source-lang: en
model: gpt-4o
model: gpt-5.4
include-fuzzy: false
fold-length: 80
context: "use formal tone"
Expand Down
2 changes: 1 addition & 1 deletion src/cli/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ function parseArgs(argv: string[]): CliArgs {
})
.option('model', {
type: 'string',
description: 'OpenAI model to use for translation. Default: gpt-4o',
description: 'OpenAI model to use for translation. Default: gpt-5.4',
})
.option('fold-length', {
type: 'number',
Expand Down
33 changes: 7 additions & 26 deletions src/cli/runTranslate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,7 @@ import {
getLanguage,
getPluralForms,
} from '../po';
import {
resolveApiKey,
translateStrings,
validateModel,
SUPPORTED_STRUCTURED_OUTPUT_MODELS,
} from '../translate';
import { resolveApiKey, translateStrings } from '../translate';
import { validateSourceLang } from '../validate-source-lang';

const TRANSLATE_BATCH_SIZE = 15;
Expand Down Expand Up @@ -47,6 +42,11 @@ function getApiErrorMessage(err: unknown): string | null {
return `Quota exceeded (out of credits or usage limit). Check plan and billing: https://platform.openai.com/settings/organization/billing`;
}
return `Rate limit reached. Request was retried; if this persists, slow down or check https://developers.openai.com/api/docs/guides/rate-limits`;
case 400:
if (/response_format|json_schema|structured/i.test(message)) {
return `The specified model may not support json_schema structured outputs required by msgai. Try a compatible model like gpt-5.4. API error: ${message}`;
}
return `Invalid request: ${message}`;
case 500:
return `OpenAI server error. Retry later; see https://status.openai.com/`;
case 503:
Expand All @@ -56,13 +56,6 @@ function getApiErrorMessage(err: unknown): string | null {
}
}

function getInvalidModelMessage(model: string): string {
return [
`Invalid --model "${model}". msgai only supports OpenAI models with json_schema structured outputs.`,
`Supported model families: ${SUPPORTED_STRUCTURED_OUTPUT_MODELS.join(', ')}.`,
].join(' ');
}

export type TranslateCommandArgs = {
poFilePath?: string;
dryRun: boolean;
Expand Down Expand Up @@ -91,7 +84,7 @@ export async function runTranslate(
debugLogger.log('cli.runTranslate', 'Starting translation run', {
poFilePath,
sourceLang,
model: model ?? 'gpt-4o',
model: model ?? 'gpt-5.4',
includeFuzzy: includeFuzzy === true,
});
const poContent = fs.readFileSync(poFilePath, 'utf8');
Expand Down Expand Up @@ -214,18 +207,6 @@ export function runTranslateCommand(args: TranslateCommandArgs): number | Promis
}
}

if (args.model != null) {
try {
validateModel(args.model);
} catch {
debugLogger.log('cli.runTranslateCommand', 'Model validation failed', {
model: args.model,
});
console.warn(getInvalidModelMessage(args.model));
return 1;
}
}

if (!args.dryRun) {
let resultApiKey: string;
try {
Expand Down
55 changes: 1 addition & 54 deletions src/translate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,44 +20,7 @@ export type TranslateOptions = {
context?: string;
};

const DEFAULT_MODEL = 'gpt-4o';
const SUPPORTED_STRUCTURED_OUTPUT_MODEL_PATTERNS = [
/^gpt-4o(?:-\d{4}-\d{2}-\d{2})?$/,
/^gpt-4o-mini(?:-\d{4}-\d{2}-\d{2})?$/,
/^gpt-4\.1(?:-\d{4}-\d{2}-\d{2})?$/,
/^gpt-4\.1-mini(?:-\d{4}-\d{2}-\d{2})?$/,
/^gpt-4\.1-nano(?:-\d{4}-\d{2}-\d{2})?$/,
/^gpt-5(?:-\d{4}-\d{2}-\d{2}|-chat-latest)?$/,
/^gpt-5-mini(?:-\d{4}-\d{2}-\d{2})?$/,
/^gpt-5-nano(?:-\d{4}-\d{2}-\d{2})?$/,
/^gpt-5-pro(?:-\d{4}-\d{2}-\d{2})?$/,
/^gpt-5\.1(?:-\d{4}-\d{2}-\d{2}|-chat-latest)?$/,
/^gpt-5\.2(?:-\d{4}-\d{2}-\d{2}|-chat-latest)?$/,
/^gpt-5-codex$/,
/^gpt-5\.1-codex$/,
/^gpt-5\.1-codex-mini$/,
/^gpt-5\.1-codex-max$/,
/^gpt-5\.2-codex$/,
] as const;
export const SUPPORTED_STRUCTURED_OUTPUT_MODELS = [
'gpt-4o',
'gpt-4o-mini',
'gpt-4.1',
'gpt-4.1-mini',
'gpt-4.1-nano',
'gpt-5',
'gpt-5-mini',
'gpt-5-nano',
'gpt-5-pro',
'gpt-5.1',
'gpt-5.2',
'gpt-5-codex',
'gpt-5.1-codex',
'gpt-5.1-codex-mini',
'gpt-5.1-codex-max',
'gpt-5.2-codex',
] as const;

const DEFAULT_MODEL = 'gpt-5.4';
/** Error codes: https://developers.openai.com/api/docs/guides/error-codes#api-errors */

const MAX_RETRIES = 3;
Expand All @@ -80,21 +43,6 @@ function isRetryableStatus(status: number): boolean {
return status === 429 || status === 500 || status === 503;
}

function isSupportedStructuredOutputModel(model: string): boolean {
return SUPPORTED_STRUCTURED_OUTPUT_MODEL_PATTERNS.some((pattern) => pattern.test(model));
}

function validateStructuredOutputModel(model: string): void {
if (isSupportedStructuredOutputModel(model)) return;
throw new Error(
`Model "${model}" is not supported. This package requires an OpenAI Chat Completions model with json_schema structured outputs. Supported model families: ${SUPPORTED_STRUCTURED_OUTPUT_MODELS.join(', ')}.`,
);
}

export function validateModel(model: string): void {
validateStructuredOutputModel(model);
}

/** Request entry: either singular (msgid) or plural (msgid_plural). Optional msgctxt for gettext context. */
export type TranslateRequestEntry =
| { msgid: string; msgctxt?: string }
Expand Down Expand Up @@ -373,7 +321,6 @@ export async function translatePayload(
apiKey: options.apiKey,
});
const model = options?.model ?? DEFAULT_MODEL;
validateStructuredOutputModel(model);
debug.log('translate', 'Prepared translatePayload request summary', {
model,
target_language: payload.target_language,
Expand Down
40 changes: 40 additions & 0 deletions test/cli/api-error-messages.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,46 @@ msgstr ""
}
});

test('runTranslate shows structured output hint when 400 mentions json_schema', async () => {
mockState.errorToThrow = apiError(
400,
undefined,
"response_format of type 'json_schema' is not supported with this model",
);
const tempPo = getTmpPo(`
msgid "Hello"
msgstr ""
`);

try {
const code = await runTranslate(tempPo.poFilePath, 'fake-key');
expect(code).toBe(1);
expect(consoleWarnSpy).toHaveBeenCalledWith(
expect.stringContaining('may not support json_schema structured outputs'),
);
expect(consoleWarnSpy.mock.calls[0][0]).toContain('gpt-5.4');
} finally {
tempPo.cleanup();
}
});

test('runTranslate shows generic 400 message for unrelated bad request', async () => {
mockState.errorToThrow = apiError(400, undefined, 'Invalid value for temperature');
const tempPo = getTmpPo(`
msgid "Hello"
msgstr ""
`);

try {
const code = await runTranslate(tempPo.poFilePath, 'fake-key');
expect(code).toBe(1);
expect(consoleWarnSpy).toHaveBeenCalledWith(expect.stringContaining('Invalid request'));
expect(consoleWarnSpy.mock.calls[0][0]).toContain('Invalid value for temperature');
} finally {
tempPo.cleanup();
}
});

test('runTranslate shows generic message for non-API error', async () => {
mockState.errorToThrow = new Error('Network connection failed');
const tempPo = getTmpPo(`
Expand Down
22 changes: 0 additions & 22 deletions test/cli/model.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import { test, expect, jest, beforeEach } from '@jest/globals';
import path from 'node:path';
import { spawnSync } from 'node:child_process';
import { getTmpPo } from '../test-utils/getTmpPo';
import { runTranslate } from '../../src/cli/runTranslate';
import { translateStrings } from '../../src/translate';
Expand All @@ -19,26 +17,6 @@ beforeEach(() => {
translateStringsMock.mockReset();
});

test('CLI exits with error when --model is invalid', () => {
const cliPath = path.resolve(process.cwd(), 'dist/src/cli/index.js');
const tempPo = getTmpPo(`
msgid "Hello"
msgstr ""
`);

const runResult = spawnSync(
process.execPath,
[cliPath, tempPo.poFilePath, '--dry-run', '--model=not-a-real-model'],
{ encoding: 'utf8' },
);

tempPo.cleanup();

expect(runResult.status).toBe(1);
expect(runResult.stderr).toContain('Invalid --model "not-a-real-model"');
expect(runResult.stderr).toMatch(/Supported model families/i);
});

test('runTranslate forwards model to translateStrings', async () => {
translateStringsMock.mockResolvedValue([{ msgid: 'Hello', msgstr: 'Привіт' }]);
const tempPo = getTmpPo(`
Expand Down
60 changes: 0 additions & 60 deletions test/translate.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -249,66 +249,6 @@ test('translatePayload throws when response missing translations', async () => {
).rejects.toThrow(/translations/i);
});

test('translatePayload rejects unsupported models before calling OpenAI', async () => {
const createMock = jest.fn<(params: unknown) => Promise<unknown>>();
const mockClient = { chat: { completions: { create: createMock } } } as unknown as OpenAI;

await expect(
translatePayload(
{
formula: '',
target_language: 'uk',
source_language: 'en',
translations: [{ msgid: 'Hi' }],
},
{ apiKey: 'test-key', client: mockClient, model: 'gpt-4-turbo' },
),
).rejects.toThrow(/json_schema structured outputs|supported models/i);

expect(createMock).not.toHaveBeenCalled();
});

test('translatePayload allows supported GPT-5 structured-output models', async () => {
const createMock = jest
.fn<(params: unknown) => Promise<unknown>>()
.mockResolvedValue(
mockCompletion(JSON.stringify({ translations: [{ msgid: 'Hi', msgstr: 'Привіт' }] })),
);
const mockClient = { chat: { completions: { create: createMock } } } as unknown as OpenAI;

const result = await translatePayload(
{
formula: '',
target_language: 'uk',
source_language: 'en',
translations: [{ msgid: 'Hi' }],
},
{ apiKey: 'test-key', client: mockClient, model: 'gpt-5.2' },
);

expect(result.translations[0]?.msgstr).toBe('Привіт');
expect(createMock).toHaveBeenCalledTimes(1);
});

test('translatePayload rejects GPT-5.2 models without structured outputs support', async () => {
const createMock = jest.fn<(params: unknown) => Promise<unknown>>();
const mockClient = { chat: { completions: { create: createMock } } } as unknown as OpenAI;

await expect(
translatePayload(
{
formula: '',
target_language: 'uk',
source_language: 'en',
translations: [{ msgid: 'Hi' }],
},
{ apiKey: 'test-key', client: mockClient, model: 'gpt-5.2-pro' },
),
).rejects.toThrow(/not supported/i);

expect(createMock).not.toHaveBeenCalled();
});

test('translateItems sends items and returns translated strings in same order', async () => {
const responsePayload = {
translations: [
Expand Down
Loading