chore: sync sdk code with DeepLearning repo (#181)

he-james · AssemblyAI · web-flow · commit dd178a0d7a40 · 2026-03-12T15:02:41.000-06:00
Co-authored-by: AssemblyAI &lt;engineering.sdk@assemblyai.com&gt;
diff --git a/README.md b/README.md
@@ -29,12 +29,14 @@ With a single API call, get access to AI models built on the latest AI breakthro
     - [**Core Examples**](#core-examples)
     - [**Speech Understanding Examples**](#speech-understanding-examples)
     - [**Streaming Examples**](#streaming-examples)
-  - [Playgrounds](#playgrounds)
+    - [**Change the default settings**](#change-the-default-settings)
+  - [Playground](#playground)
 - [Advanced](#advanced)
   - [How the SDK handles Default Configurations](#how-the-sdk-handles-default-configurations)
     - [Defining Defaults](#defining-defaults)
     - [Overriding Defaults](#overriding-defaults)
   - [Synchronous vs Asynchronous](#synchronous-vs-asynchronous)
+  - [Getting the HTTP status code](#getting-the-http-status-code)
   - [Polling Intervals](#polling-intervals)
   - [Retrieving Existing Transcripts](#retrieving-existing-transcripts)
     - [Retrieving a Single Transcript](#retrieving-a-single-transcript)
@@ -690,11 +692,11 @@ for result in transcript.auto_highlights.results:
 
 <details>
   <summary>Stream your microphone in real-time</summary>
-  
+
 ```bash
 pip install -U assemblyai
 ```
-  
+
 ```python
 import logging
 from typing import Type
diff --git a/assemblyai/__version__.py b/assemblyai/__version__.py
@@ -1 +1 @@
-__version__ = "0.54.1"
+__version__ = "0.56.0"
diff --git a/assemblyai/streaming/v3/models.py b/assemblyai/streaming/v3/models.py
@@ -119,7 +119,7 @@ def __str__(self):
 class StreamingParameters(StreamingSessionParameters):
     sample_rate: int
     encoding: Optional[Encoding] = None
-    speech_model: Optional[SpeechModel] = None
+    speech_model: SpeechModel
     language_detection: Optional[bool] = None
     inactivity_timeout: Optional[int] = None
     webhook_url: Optional[str] = None
diff --git a/assemblyai/transcriber.py b/assemblyai/transcriber.py
@@ -50,7 +50,7 @@ def config(self) -> types.TranscriptionConfig:
         "Returns the configuration from the internal Transcript object"
         if self.transcript is None:
             raise ValueError(
-                "Canot access the configuration. The internal Transcript object is None."
+                "Cannot access the configuration. The internal Transcript object is None."
             )
 
         return types.TranscriptionConfig(
diff --git a/assemblyai/types.py b/assemblyai/types.py
@@ -155,7 +155,7 @@ def __getattribute__(self, item):
         ]
         if item in languages:
             warn(
-                "LanuageCode Enum is deprecated and will be removed in 1.0.0. Use a string instead.",
+                "LanguageCode Enum is deprecated and will be removed in 1.0.0. Use a string instead.",
                 DeprecationWarning,
                 stacklevel=2,
             )
@@ -165,7 +165,7 @@ def __getattribute__(self, item):
 
 class LanguageCode(str, Enum, metaclass=DeprecatedLanguageCodeMeta):
     """
-    DeprecationWarning: LanuageCode is deprecated and will be removed in 1.0.0. Use a string instead.
+    DeprecationWarning: LanguageCode is deprecated and will be removed in 1.0.0. Use a string instead.
 
     Supported languages for transcribing audio.
     """
@@ -937,6 +937,9 @@ class RawTranscriptionConfig(BaseModel):
     temperature: Optional[float] = None
     "Change how deterministic the response is, with 0 being the most deterministic and 1 being the least deterministic."
 
+    remove_audio_tags: Optional[str] = None
+    "When set to 'all', removes all bracketed audio/speaker tags (e.g. [MUSIC], [Speaker: A]) from the transcript. Only supported for Universal-3 Pro."
+
     keyterms_prompt: Optional[List[str]] = None
     "The list of key terms used to generate the transcript with the Slam-1 speech model. Can't be used together with `prompt`."
 
@@ -1007,6 +1010,7 @@ def __init__(
         speech_models: Optional[List[str]] = None,
         prompt: Optional[str] = None,
         temperature: Optional[float] = None,
+        remove_audio_tags: Optional[str] = None,
         keyterms_prompt: Optional[List[str]] = None,
         keyterms_prompt_options: Optional[KeytermsPromptOptions] = None,
         speech_understanding: Optional[SpeechUnderstandingRequest] = None,
@@ -1054,6 +1058,7 @@ def __init__(
             raw_transcription_config: Create the config from a `RawTranscriptionConfig`
             speech_understanding: Speech understanding configuration for LLM Gateway features (speaker identification, translation, custom formatting)
             temperature: Change how deterministic the response is, with 0 being the most deterministic and 1 being the least deterministic.
+            remove_audio_tags: When set to 'all', removes all bracketed audio/speaker tags from the transcript. Only supported for Universal-3 Pro.
             keyterms_prompt_options: Options for controlling keyterms boosting behavior when using `keyterms_prompt`.
         """
         self._raw_transcription_config = (
@@ -1109,6 +1114,7 @@ def __init__(
         self.speech_models = speech_models
         self.prompt = prompt
         self.temperature = temperature
+        self.remove_audio_tags = remove_audio_tags
         self.keyterms_prompt = keyterms_prompt
         self.keyterms_prompt_options = keyterms_prompt_options
         self.speech_understanding = speech_understanding
@@ -1170,6 +1176,16 @@ def temperature(self, temperature: Optional[float]) -> None:
         "Sets the temperature to use for the transcription."
         self._raw_transcription_config.temperature = temperature
 
+    @property
+    def remove_audio_tags(self) -> Optional[str]:
+        "When set to 'all', removes all bracketed audio/speaker tags from the transcript."
+        return self._raw_transcription_config.remove_audio_tags
+
+    @remove_audio_tags.setter
+    def remove_audio_tags(self, remove_audio_tags: Optional[str]) -> None:
+        "Sets remove_audio_tags for the transcription."
+        self._raw_transcription_config.remove_audio_tags = remove_audio_tags
+
     @property
     def keyterms_prompt(self) -> Optional[List[str]]:
         "The keyterms_prompt to use for the transcription."
@@ -1780,7 +1796,7 @@ def set_custom_spelling(
         Args:
             replacement: A dictionary that contains the replacement object (see below example).
                 For each key-value pair, the key is the 'to' field, and the value is the 'from' field.
-            override: If `True` `replacement` gets overriden with the given `replacement` argument, otherwise merged.
+            override: If `True` `replacement` gets overridden with the given `replacement` argument, otherwise merged.
 
         Example:
             ```
@@ -2379,6 +2395,9 @@ class TranscriptResponse(BaseTranscript):
     temperature: Optional[float] = None
     "Change how deterministic the response is, with 0 being the most deterministic and 1 being the least deterministic."
 
+    remove_audio_tags: Optional[str] = None
+    "When set to 'all', removes all bracketed audio/speaker tags (e.g. [MUSIC], [Speaker: A]) from the transcript. Only supported for Universal-3 Pro."
+
     keyterms_prompt: Optional[List[str]] = None
     "When Slam-1 is enabled, the list of key terms used to generate the transcript"
 
diff --git a/tests/unit/test_streaming.py b/tests/unit/test_streaming.py
@@ -1,5 +1,7 @@
 from urllib.parse import urlencode
 
+import pytest
+from pydantic import ValidationError
 from pytest_mock import MockFixture
 
 from assemblyai.streaming.v3 import (
@@ -43,11 +45,15 @@ def mocked_websocket_connect(
     options = StreamingClientOptions(api_key="test", api_host="api.example.com")
     client = StreamingClient(options)
 
-    params = StreamingParameters(sample_rate=16000)
+    params = StreamingParameters(
+        sample_rate=16000,
+        speech_model=SpeechModel.universal_streaming_english,
+    )
     client.connect(params)
 
     expected_headers = {
         "sample_rate": params.sample_rate,
+        "speech_model": str(params.speech_model),
     }
 
     assert actual_url == f"wss://api.example.com/v3/ws?{urlencode(expected_headers)}"
@@ -81,11 +87,15 @@ def mocked_websocket_connect(
     options = StreamingClientOptions(token="test", api_host="api.example.com")
     client = StreamingClient(options)
 
-    params = StreamingParameters(sample_rate=16000)
+    params = StreamingParameters(
+        sample_rate=16000,
+        speech_model=SpeechModel.universal_streaming_english,
+    )
     client.connect(params)
 
     expected_headers = {
         "sample_rate": params.sample_rate,
+        "speech_model": str(params.speech_model),
     }
 
     assert actual_url == f"wss://api.example.com/v3/ws?{urlencode(expected_headers)}"
@@ -121,6 +131,7 @@ def mocked_websocket_connect(
 
     params = StreamingParameters(
         sample_rate=16000,
+        speech_model=SpeechModel.universal_streaming_english,
         end_of_turn_confidence_threshold=0.5,
         min_end_of_turn_silence_when_confident=2000,
         max_turn_silence=3000,
@@ -133,6 +144,7 @@ def mocked_websocket_connect(
         "min_end_of_turn_silence_when_confident": params.min_end_of_turn_silence_when_confident,
         "max_turn_silence": params.max_turn_silence,
         "sample_rate": params.sample_rate,
+        "speech_model": str(params.speech_model),
     }
 
     assert actual_url == f"wss://api.example.com/v3/ws?{urlencode(expected_headers)}"
@@ -167,7 +179,10 @@ def mocked_websocket_connect(
     options = StreamingClientOptions(api_key="test", api_host="api.example.com")
     client = StreamingClient(options)
 
-    params = StreamingParameters(sample_rate=16000)
+    params = StreamingParameters(
+        sample_rate=16000,
+        speech_model=SpeechModel.universal_streaming_english,
+    )
     client.connect(params)
     client.stream(b"test audio data")
 
@@ -200,6 +215,7 @@ def mocked_websocket_connect(
 
     params = StreamingParameters(
         sample_rate=16000,
+        speech_model=SpeechModel.universal_streaming_english,
         webhook_url="https://example.com/webhook",
         webhook_auth_header_name="X-Webhook-Secret",
         webhook_auth_header_value="my-secret",
@@ -209,6 +225,7 @@ def mocked_websocket_connect(
 
     expected_params = {
         "sample_rate": params.sample_rate,
+        "speech_model": str(params.speech_model),
         "webhook_url": params.webhook_url,
         "webhook_auth_header_name": params.webhook_auth_header_name,
         "webhook_auth_header_value": params.webhook_auth_header_value,
@@ -287,6 +304,7 @@ def mocked_websocket_connect(
 
     params = StreamingParameters(
         sample_rate=16000,
+        speech_model=SpeechModel.universal_streaming_english,
         speaker_labels=True,
         max_speakers=3,
     )
@@ -355,6 +373,12 @@ def test_turn_event_without_speaker_label():
     assert event.speaker_label is None
 
 
+def test_speech_model_required():
+    """Test that omitting speech_model raises a validation error."""
+    with pytest.raises(ValidationError):
+        StreamingParameters(sample_rate=16000)
+
+
 def test_speech_started_event():
     """Test SpeechStarted event parsing (u3-rt-pro only)"""
     data = {
diff --git a/tests/unit/test_transcriber.py b/tests/unit/test_transcriber.py
@@ -68,7 +68,7 @@ def test_upload_file_fails(httpx_mock: HTTPXMock):
     with pytest.raises(aai.TranscriptError) as excinfo:
         aai.Transcriber().upload_file(os.urandom(10))
 
-    # check wheter the TranscriptError contains the specified error message
+    # check whether the TranscriptError contains the specified error message
     assert returned_error_message in str(excinfo.value)
     assert httpx.codes.INTERNAL_SERVER_ERROR == excinfo.value.status_code
 
@@ -148,7 +148,7 @@ def test_submit_file_fails_due_api_error(httpx_mock: HTTPXMock):
         with pytest.raises(aai.TranscriptError) as excinfo:
             transcriber.transcribe("audio.wav")
 
-    # check wheter the Exception contains the specified error message
+    # check whether the Exception contains the specified error message
     assert "something went wrong" in str(excinfo.value)
     assert httpx.codes.INTERNAL_SERVER_ERROR == excinfo.value.status_code
 
diff --git a/tests/unit/test_transcript.py b/tests/unit/test_transcript.py
@@ -379,7 +379,7 @@ def test_get_by_id_fails(httpx_mock: HTTPXMock):
     with pytest.raises(aai.TranscriptError) as excinfo:
         aai.Transcript.get_by_id(test_id)
 
-    # check wheter the TranscriptError contains the specified error message
+    # check whether the TranscriptError contains the specified error message
     assert response_json["error"] in str(excinfo.value)
     assert len(httpx_mock.get_requests()) == 1
 

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "0.54.1"`
	`1`	`+__version__ = "0.56.0"`
Original file line number	Diff line number	Diff line change
`@@ -50,7 +50,7 @@ def config(self) -> types.TranscriptionConfig:`
`50`	`50`	`"Returns the configuration from the internal Transcript object"`
`51`	`51`	`if self.transcript is None:`
`52`	`52`	`raise ValueError(`
`53`		`- "Canot access the configuration. The internal Transcript object is None."`
	`53`	`+ "Cannot access the configuration. The internal Transcript object is None."`
`54`	`54`	`)`
`55`	`55`
`56`	`56`	`return types.TranscriptionConfig(`