Skip to content

Commit 6188890

Browse files
committed
Add optional language parameter to synthesize methods.
- defaults to `en` - language specified as ISO 639-1 code - server side for streaming synthesis support not yet in place - note multilingual support is still in beta
1 parent 0e1056d commit 6188890

5 files changed

Lines changed: 41 additions & 5 deletions

File tree

demo/stream.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ async def writer_task(conn, prompt):
3838

3939
async def main(args):
4040
speech = Speech(os.getenv('LMNT_API_KEY'))
41-
conn = await speech.synthesize_streaming(VOICE_ID, return_extras=False)
41+
conn = await speech.synthesize_streaming(VOICE_ID, return_extras=False, language=args.language)
4242

4343
t1 = asyncio.create_task(reader_task(conn))
4444
t2 = asyncio.create_task(writer_task(conn, args.prompt))
@@ -51,4 +51,5 @@ async def main(args):
5151
if __name__ == '__main__':
5252
parser = ArgumentParser()
5353
parser.add_argument('prompt', default=DEFAULT_PROMPT, nargs='?')
54+
parser.add_argument('-l', '--language', required=False, default='en', help='Language code')
5455
asyncio.run(main(parser.parse_args()))

demo/synthesize.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ async def main(args):
1818
print(account)
1919

2020
# Synthesize text to speech.
21-
synthesize = await s.synthesize(text=args.text, voice=args.voice)
21+
synthesize = await s.synthesize(text=args.text, voice=args.voice, language=args.language)
2222
with open('output.mp3', 'wb') as f:
2323
f.write(synthesize['audio'])
2424
print('Done.')
@@ -27,5 +27,6 @@ async def main(args):
2727
parser = argparse.ArgumentParser(description='Synthesize text to speech using LMNT API')
2828
parser.add_argument('-t', '--text', required=False, default='This is a test of the LMNT API.', help='Text to synthesize')
2929
parser.add_argument('-v', '--voice', required=False, default='lily', help='Voice to use')
30+
parser.add_argument('-l', '--language', required=False, default='en', help='Language code')
3031
args = parser.parse_args()
3132
asyncio.run(main(args))

src/lmnt/api.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,7 @@ async def synthesize(self, text: str, voice: str, **kwargs):
319319
- `speed` (float): Floating point value between 0.25 (slow) and 2.0 (fast); Defaults to 1.0
320320
- `return_durations` (bool): If `True`, the response will include word durations detail. Defaults to `False`.
321321
- `return_seed` (bool): If `True`, the response will include the seed used for synthesis. Defaults to `False`.
322+
- `language` (str): The desired language of the synthesized speech. Two letter ISO 639-1 code. Defaults to `en`.
322323
- `length` (int): The desired target length of the output speech in seconds. Maximum 300.0 (5 minutes)
323324
324325
Deprecated parameters:
@@ -362,7 +363,8 @@ async def synthesize(self, text: str, voice: str, **kwargs):
362363
if return_durations is True:
363364
form_data.add_field('return_durations', 'true')
364365
return_seed = kwargs.get('return_seed', False)
365-
366+
if 'language' in kwargs:
367+
form_data.add_field('language', kwargs.get('language'))
366368
async with self._session.post(url, data=form_data, headers=self._build_headers()) as resp:
367369
await self._handle_response_errors(resp, 'Speech.synthesize')
368370
response_data = await resp.json()
@@ -384,6 +386,7 @@ async def synthesize_streaming(self, voice: str, return_extras: bool = False, **
384386
- `voice` (str): The voice id to use for this connection.
385387
- `speed` (float): The speed to use for synthesis. Defaults to 1.0.
386388
- `return_extras` (bool): If `True`, the response will include word durations detail. Defaults to `False`.
389+
- `language` (str): The desired language of the synthesized speech. Two letter ISO 639-1 code. Defaults to `en`.
387390
388391
Returns:
389392
- `StreamingSynthesisConnection`: The streaming connection object.
@@ -406,6 +409,8 @@ async def synthesize_streaming(self, voice: str, return_extras: bool = False, **
406409
if 'expressive' in kwargs:
407410
init_msg['expressive'] = kwargs['expressive']
408411
init_msg['send_extras'] = return_extras
412+
if 'language' in kwargs:
413+
init_msg['language'] = kwargs['language']
409414
ws = await self._session.ws_connect(f'{self._base_url}{_SYNTHESIZE_STREAMING_ENDPOINT}')
410415
await ws.send_str(json.dumps(init_msg))
411416
return StreamingSynthesisConnection(ws, return_extras)

test/integration/smoke_test.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,20 @@ async def test_synthesize(api: Speech):
107107
assert isinstance(result['audio'], bytes)
108108

109109

110+
@pytest.mark.asyncio
111+
async def test_synthesize__non_en_language(api: Speech):
112+
voice = 'lily'
113+
text = 'Example Text'
114+
language = 'pt'
115+
result = await api.synthesize(text=text, voice=voice, language=language)
116+
assert result is not None
117+
assert 'audio' in result
118+
assert 'durations' not in result
119+
assert 'seed' not in result
120+
assert len(result['audio']) > 0
121+
assert isinstance(result['audio'], bytes)
122+
123+
110124
@pytest.mark.asyncio
111125
async def test_synthesize_with_empty_voice(api: Speech):
112126
voice = ''

test/unit/test_speech.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,19 @@ async def test_synthesize_return_durations_and_seed(api):
7171
assert synthesis_result == {'audio': base64.b64decode(mock_response['audio']), 'durations': mock_response['durations'], 'seed': mock_response['seed']}
7272

7373

74+
@pytest.mark.asyncio
75+
async def test_synthesize__non_en_language(api):
76+
text = 'Hello, world!'
77+
voice = 'Voice1'
78+
language = 'pt'
79+
mock_response = {'audio': MOCK_AUDIO, 'durations': [], 'seed': 'random_seed'}
80+
api._session.post.return_value.__aenter__.return_value.json = AsyncMock(return_value=mock_response)
81+
api._session.post.return_value.__aenter__.return_value.status = 200
82+
83+
synthesis_result = await api.synthesize(text, voice, language=language)
84+
assert synthesis_result == {'audio': base64.b64decode(mock_response['audio'])}
85+
86+
7487
@pytest.mark.asyncio
7588
async def test_synthesize_no_text(api):
7689
with pytest.raises(AssertionError):
@@ -113,12 +126,13 @@ async def test_synthesize_streaming(api):
113126
speed = 1.5
114127
expressive = 0.8
115128
return_extras = True
129+
language = 'pt'
116130

117131
mock_ws = AsyncMock()
118132
api._session = AsyncMock()
119133
api._session.ws_connect.return_value = mock_ws
120134

121-
connection = await api.synthesize_streaming(voice, return_extras=return_extras, speed=speed, expressive=expressive)
135+
connection = await api.synthesize_streaming(voice, return_extras=return_extras, speed=speed, expressive=expressive, language=language)
122136

123137
assert isinstance(connection, StreamingSynthesisConnection)
124138
api._session.ws_connect.assert_called_once_with(f'{api._base_url}{_SYNTHESIZE_STREAMING_ENDPOINT}')
@@ -127,7 +141,8 @@ async def test_synthesize_streaming(api):
127141
'voice': voice,
128142
'speed': speed,
129143
'expressive': expressive,
130-
'send_extras': return_extras
144+
'send_extras': return_extras,
145+
'language': language
131146
}))
132147

133148

0 commit comments

Comments
 (0)