Skip to content

Commit cf7cd42

Browse files
committed
Hackathon template
1 parent ee0bc08 commit cf7cd42

5 files changed

Lines changed: 348 additions & 0 deletions

File tree

demo/hackathon-template/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*.egg-info
2+
__pycache__

demo/hackathon-template/README.md

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Hackathon Template
2+
3+
We’re excited to see you using LMNT during the hackathon! To make your experience seamless, we have created this quick onboarding template.
4+
5+
This template includes a simple agent that uses LMNT to synthesize audio. If whisper audio transcription and an llm are configured, the agent does the following:
6+
7+
1. Accepts as input an audio file or a text prompt
8+
2. Whisper transcribes the audio file to extract text from the spoken input
9+
3. The transcribed text prompt is sent to the LLM and the text response is streamed back
10+
4. The text is streamed to LMNT and the synthesized audio is streamed back
11+
12+
You can change the configuration/input to skip certain steps. For example, you can pass text straight to LMNT by disabling whisper and the llm.
13+
14+
## Installation
15+
16+
To get started, follow these steps:
17+
18+
1. Ensure you're using Python 3.6 or higher, but less than 3.12.
19+
20+
2. Set up your api keys as environment variables. You can get your api keys from the [LMNT website](https://app.lmnt.com/account).
21+
22+
```bash
23+
export LMNT_API_KEY=<your_api_key>
24+
```
25+
26+
If you are using Mistral, set up your Mistral api key as well.
27+
28+
```bash
29+
export MISTRAL_API_KEY=<your_mistral_api_key>
30+
```
31+
32+
3. **Install dependencies:**
33+
34+
```bash
35+
pip install -r requirements.txt
36+
```
37+
38+
4. **Basic Usage**
39+
40+
After installing the dependencies, you can run the script.
41+
42+
```bash
43+
python scripts/lmnt_agent.py --config config/config.yaml
44+
```
45+
46+
Checkout our [documentation](https://docs.lmnt.com/introduction) for exploring more of our model's capabilities.
47+
48+
## Contributing
49+
50+
We welcome contributions! If you have any ideas, bug fixes, or improvements, please submit a pull request.
51+
52+
Happy hacking!
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
lmnt_api:
2+
api_key: "" # API key for the LMNT API, Recommend setting this in the environment variable LMNT_API_KEY e.g. export LMNT_API_KEY = "your_api_key"
3+
default_voice: "lily"
4+
model: "aurora" # 'aurora' for low latency. 'blizzard' for high quality. Note that streaming is only supported for 'aurora' model.
5+
6+
whisper:
7+
enabled: true # Whisper is enabled to process input audio
8+
model: "base" # {'base', 'small', 'medium', 'large'}
9+
10+
llm:
11+
enabled: true # Pass input text / transcribed text to the LLM model
12+
api_key: "" # API key for the LLM model, Recommend setting this in the environment variable e.g. export MISTRAL_API_KEY = "your_api
13+
model: "mistral-tiny" # {'mistral': ['mistral-tiny', 'mistral-large-latest']}
14+
prompt: "" # Optional prompt to be passed to initialize the LLM stream
15+
provider: "mistral" # Supported providers: "mistral"
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
lmnt
2+
MistralAI
3+
openai
4+
openai-whisper
Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
import asyncio
2+
import os
3+
import yaml
4+
import whisper
5+
6+
from lmnt.api import Speech
7+
from typing import Optional
8+
from openai import AsyncOpenAI
9+
from mistralai import Mistral
10+
11+
12+
class LMNTStream:
13+
"""
14+
Handles LMNT API for text-to-speech streaming.
15+
"""
16+
17+
def __init__(self, api_key: Optional[str] = None, model: str = 'blizzard', voice_id: str = 'lily'):
18+
"""
19+
Initialize the LMNTHandler.
20+
Args:
21+
api_key (str): The LMNT API key. Defaults to the LMNT_API_KEY environment variable.
22+
voice_id (str): The ID of the voice to use for LMNT TTS.
23+
output_file (str): File to save the audio output.
24+
"""
25+
self.api_key = api_key or os.environ.get('LMNT_API_KEY')
26+
self.voice_id = voice_id
27+
self.model = model
28+
self.output_file = 'output.mp3'
29+
30+
async def __call__(self, text_stream):
31+
"""
32+
Streams text to LMNT API and saves audio output.
33+
Args:
34+
text_stream (async generator): Stream of text chunks to send to LMNT.
35+
"""
36+
async with Speech(self.api_key) as speech:
37+
connection = await speech.synthesize_streaming(self.voice_id)
38+
reader_task = asyncio.create_task(self._reader_task(connection))
39+
writer_task = asyncio.create_task(self._writer_task(connection, text_stream))
40+
await asyncio.gather(reader_task, writer_task)
41+
42+
async def _reader_task(self, connection):
43+
"""Reads audio data from LMNT and writes to a file."""
44+
with open(self.output_file, 'wb') as f:
45+
async for message in connection:
46+
f.write(message['audio'])
47+
48+
async def _writer_task(self, connection, text_stream):
49+
"""Streams text to LMNT."""
50+
async for text in text_stream:
51+
await connection.append_text(text)
52+
await connection.flush()
53+
54+
55+
class LMNTtts:
56+
def __init__(self, api_key: Optional[str] = None, model: str = 'blizzard', voice_id: str = 'lily'):
57+
"""
58+
Initialize the LMNTHandler.
59+
Args:
60+
api_key (str): The LMNT API key. Defaults to the LMNT_API_KEY environment variable.
61+
voice_id (str): The ID of the voice to use for LMNT TTS.
62+
output_file (str): File to save the audio output.
63+
"""
64+
self.api_key = api_key or os.environ.get('LMNT_API_KEY')
65+
self.voice_id = voice_id
66+
self.model = model
67+
self.output_file = 'output.mp3'
68+
69+
async def synthesize(self, text):
70+
"""
71+
Synthesize text using the LMNT API.
72+
Args:
73+
text (str): The text to synthesize.
74+
Returns:
75+
bytes: The synthesized audio.
76+
"""
77+
async with Speech(self.api_key) as speech:
78+
synthesis = await speech.synthesize(text, self.voice_id, model=self.model)
79+
with open(self.output_file, 'wb') as f:
80+
f.write(synthesis['audio'])
81+
82+
83+
class MistralStream:
84+
"""
85+
Handles text generation using Mistral
86+
"""
87+
88+
def __init__(self, api_key: Optional[str] = None, model: str = 'mistral-tiny', prompt: str = ''):
89+
"""
90+
Initialize the LLMHandler.
91+
Args:
92+
model (str): The LLM model to use.
93+
prompt (str): The default text generation prompt.
94+
"""
95+
self.api_key = api_key or os.environ.get('MISTRAL_API_KEY')
96+
self.model = model
97+
self._set_prompt(prompt)
98+
99+
def _set_prompt(self, prompt: str = ''):
100+
"""Set the text generation prompt."""
101+
if prompt:
102+
with Mistral(api_key=self.api_key) as client:
103+
chat_response = client.chat.complete(
104+
model=self.model,
105+
messages=[{'role': 'system', 'content': prompt}],
106+
)
107+
print(chat_response.choices[0].message.content)
108+
109+
async def __call__(self, query_text: str):
110+
"""
111+
Generates text from the LLM and streams it as chunks.
112+
Returns:
113+
async generator: Stream of text chunks.
114+
"""
115+
with Mistral(api_key=self.api_key) as client:
116+
response = await client.chat.stream_async(
117+
model=self.model,
118+
messages=[{'role': 'user', 'content': query_text}],
119+
)
120+
async for chunk in response:
121+
if chunk.data.choices[0].delta.content is not None:
122+
yield chunk.data.choices[0].delta.content
123+
124+
125+
class OpenAIStream:
126+
"""
127+
Handles text generation using an LLM (e.g., OpenAI GPT).
128+
"""
129+
130+
def __init__(self, api_key=None, model='gpt-3.5-turbo', prompt=None):
131+
"""
132+
Initialize the LLMHandler.
133+
Args:
134+
model (str): The LLM model to use.
135+
prompt (str): The default text generation prompt.
136+
"""
137+
self.api_key = api_key or os.environ.get('OPENAI_API_KEY')
138+
self.model = model
139+
self.client = AsyncOpenAI(api_key=api_key)
140+
self._set_prompt(prompt)
141+
142+
def _set_prompt(self, prompt=None):
143+
"""Set the text generation prompt."""
144+
if prompt is not None:
145+
chat_response = self.client.chat.complete(
146+
model=self.model,
147+
messages=[{'role': 'system', 'content': prompt}],
148+
)
149+
print(chat_response.choices[0].message.content)
150+
151+
async def __call__(self, query_text):
152+
"""
153+
Generates text from the LLM and streams it as chunks.
154+
Returns:
155+
async generator: Stream of text chunks.
156+
"""
157+
response = await self.client.chat.completions.create(
158+
model=self.model,
159+
messages=[{'role': 'user', 'content': self.prompt}],
160+
stream=True,
161+
)
162+
163+
async for chunk in response:
164+
if (
165+
chunk.choices
166+
and chunk.choices[0].delta
167+
and chunk.choices[0].delta.content
168+
):
169+
yield chunk.choices[0].delta.content
170+
171+
172+
class LMNTAgent:
173+
def __init__(self, config_path: Optional[str] = None):
174+
"""
175+
Initialize the LMNTAgent with a configuration file.
176+
Args:
177+
config (dict): Configuration dictionary.
178+
"""
179+
if config_path is None:
180+
config_path = 'config.yaml'
181+
with open(config_path, 'r') as f:
182+
config = yaml.safe_load(f)
183+
184+
self.config = config
185+
self._whisper = None
186+
self._llm = None
187+
self._init_lmnt()
188+
self._init_whisper()
189+
self._init_llm()
190+
191+
def _init_lmnt(self):
192+
"""Initialize the LMNT API handler."""
193+
lmnt_config = self.config['lmnt_api']
194+
api_key = lmnt_config.get('api_key', None)
195+
model = lmnt_config.get('model', 'blizzard')
196+
voice = lmnt_config.get('voice', 'lily')
197+
self.lmnt_stream = LMNTStream(api_key, model, voice)
198+
199+
def _init_whisper(self):
200+
"""Initialize the Whisper transcription handler."""
201+
whisper_config = self.config.get('whisper', {})
202+
if not whisper_config.get('enabled', False):
203+
self._whisper = None
204+
return
205+
model = whisper_config.get('model')
206+
self._whisper = whisper.load_model(model)
207+
208+
def _init_llm(self):
209+
"""Initialize the LLM handler."""
210+
llm_config = self.config.get('llm', {})
211+
if not llm_config.get('enabled', False):
212+
self._llm = None
213+
return
214+
model = llm_config['model']
215+
api_key = llm_config.get('api_key', None)
216+
prompt = llm_config.get('prompt', None)
217+
self._llm = MistralStream(api_key, model, prompt)
218+
219+
def transcribe_audio(self, audio_bytes: bytes) -> str:
220+
"""
221+
Transcribe audio bytes using the Whisper API.
222+
Args:
223+
audio_bytes (bytes):
224+
Returns
225+
str: Transcribed text.
226+
"""
227+
assert self._whisper is not None, 'Whisper mode is not enabled.'
228+
return self._whisper.transcribe(audio_bytes)
229+
230+
async def run(self, input_data: str | bytes):
231+
"""
232+
Run the agent based on the detected mode.
233+
Args:
234+
input_data: Input data (text or audio bytes).
235+
"""
236+
237+
# Transcribe audio if input is bytes
238+
if isinstance(input_data, bytes):
239+
try:
240+
text = self.transcribe_audio(input_data)
241+
print(f'Transcribed text: {text}')
242+
except Exception as e:
243+
print(f'Error transcribing audio: {e}')
244+
return
245+
else:
246+
text = input_data
247+
248+
# Process text with LLM if enabled
249+
if self._llm:
250+
try:
251+
text = self._llm(text)
252+
except Exception as e:
253+
print(f'Error processing text: {e}')
254+
return
255+
256+
# Synthesize audio
257+
try:
258+
await self.lmnt_stream(text)
259+
except Exception as e:
260+
print(f'Error synthesizing audio: {e}')
261+
return
262+
263+
264+
# Example Usage
265+
if __name__ == '__main__':
266+
import argparse
267+
import yaml
268+
269+
parser = argparse.ArgumentParser()
270+
parser.add_argument('--config', type=str, default='config.yaml', help='Path to configuration file.')
271+
args = parser.parse_args()
272+
273+
agent = LMNTAgent(args.config)
274+
input_data = 'Give me a list of the best restuarants in Berlin?'
275+
asyncio.run(agent.run(input_data))

0 commit comments

Comments
 (0)