Fix sse handling for speech models (#436)

yadavsahil197 · web-flow · commit cc9f25369987 · 2026-03-18T17:55:03.000-07:00
* Fix sse handling for speech models

* fix integration tests
diff --git a/src/together/resources/audio/speech.py b/src/together/resources/audio/speech.py
@@ -10,7 +10,6 @@
     AudioLanguage,
     AudioResponseEncoding,
     AudioSpeechStreamChunk,
-    AudioSpeechStreamEvent,
     AudioSpeechStreamResponse,
     TogetherClient,
     TogetherRequest,
diff --git a/src/together/types/__init__.py b/src/together/types/__init__.py
@@ -5,7 +5,6 @@
     AudioResponseFormat,
     AudioSpeechRequest,
     AudioSpeechStreamChunk,
-    AudioSpeechStreamEvent,
     AudioSpeechStreamResponse,
     AudioTimestampGranularities,
     AudioTranscriptionRequest,
@@ -134,7 +133,6 @@
     "AudioLanguage",
     "AudioResponseEncoding",
     "AudioSpeechStreamChunk",
-    "AudioSpeechStreamEvent",
     "AudioSpeechStreamResponse",
     "AudioTranscriptionRequest",
     "AudioTranslationRequest",
diff --git a/src/together/types/audio_speech.py b/src/together/types/audio_speech.py
@@ -41,14 +41,6 @@ class AudioResponseEncoding(str, Enum):
     PCM_ALAW = "pcm_alaw"
 
 
-class AudioObjectType(str, Enum):
-    AUDIO_TTS_CHUNK = "audio.tts.chunk"
-
-
-class StreamSentinelType(str, Enum):
-    DONE = "[DONE]"
-
-
 class AudioSpeechRequest(BaseModel):
     model: str
     input: str
@@ -61,21 +53,8 @@ class AudioSpeechRequest(BaseModel):
 
 
 class AudioSpeechStreamChunk(BaseModel):
-    object: AudioObjectType = AudioObjectType.AUDIO_TTS_CHUNK
-    model: str
-    b64: str
-
-
-class AudioSpeechStreamEvent(BaseModel):
-    data: AudioSpeechStreamChunk
-
-
-class StreamSentinel(BaseModel):
-    data: StreamSentinelType = StreamSentinelType.DONE
-
-
-class AudioSpeechStreamEventResponse(BaseModel):
-    response: AudioSpeechStreamEvent | StreamSentinel
+    type: str = "conversation.item.audio_output.delta"
+    delta: str
 
 
 class AudioSpeechStreamResponse(BaseModel):
@@ -127,18 +106,10 @@ def stream_to_file(
                 if isinstance(chunk.data, bytes):
                     audio_chunks.append(chunk.data)
                 elif isinstance(chunk.data, dict):
-                    # SSE format with JSON/base64
-                    try:
-                        stream_event = AudioSpeechStreamEventResponse(
-                            response={"data": chunk.data}
-                        )
-                        if isinstance(stream_event.response, StreamSentinel):
-                            break
-                        audio_chunks.append(
-                            base64.b64decode(stream_event.response.data.b64)
-                        )
-                    except Exception:
-                        continue  # Skip malformed chunks
+                    # SSE format: {"type": "conversation.item.audio_output.delta", "delta": "<base64>"}
+                    delta = chunk.data.get("delta")
+                    if delta:
+                        audio_chunks.append(base64.b64decode(delta))
 
             if not audio_chunks:
                 raise ValueError("No audio data received in streaming response")
diff --git a/tests/integration/constants.py b/tests/integration/constants.py
@@ -1,5 +1,5 @@
 completion_test_model_list = [
-    "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+    "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 ]
 chat_test_model_list = []
 embedding_test_model_list = []
diff --git a/tests/integration/resources/test_completion_stream.py b/tests/integration/resources/test_completion_stream.py
@@ -35,7 +35,7 @@ def test_create(
         random_repetition_penalty,  # noqa
     ) -> None:
         prompt = "The space robots have"
-        model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
+        model = "meta-llama/Llama-3.3-70B-Instruct-Turbo"
         stop = ["</s>"]
 
         # max_tokens should be a reasonable number for this test

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`completion_test_model_list = [`
`2`		`- "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",`
	`2`	`+ "meta-llama/Llama-3.3-70B-Instruct-Turbo",`
`3`	`3`	`]`
`4`	`4`	`chat_test_model_list = []`
`5`	`5`	`embedding_test_model_list = []`