Skip to content

Commit dd178a0

Browse files
he-jamesAssemblyAI
andauthored
chore: sync sdk code with DeepLearning repo (#181)
Co-authored-by: AssemblyAI <engineering.sdk@assemblyai.com>
1 parent 2d1e479 commit dd178a0

8 files changed

Lines changed: 60 additions & 15 deletions

File tree

README.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,14 @@ With a single API call, get access to AI models built on the latest AI breakthro
2929
- [**Core Examples**](#core-examples)
3030
- [**Speech Understanding Examples**](#speech-understanding-examples)
3131
- [**Streaming Examples**](#streaming-examples)
32-
- [Playgrounds](#playgrounds)
32+
- [**Change the default settings**](#change-the-default-settings)
33+
- [Playground](#playground)
3334
- [Advanced](#advanced)
3435
- [How the SDK handles Default Configurations](#how-the-sdk-handles-default-configurations)
3536
- [Defining Defaults](#defining-defaults)
3637
- [Overriding Defaults](#overriding-defaults)
3738
- [Synchronous vs Asynchronous](#synchronous-vs-asynchronous)
39+
- [Getting the HTTP status code](#getting-the-http-status-code)
3840
- [Polling Intervals](#polling-intervals)
3941
- [Retrieving Existing Transcripts](#retrieving-existing-transcripts)
4042
- [Retrieving a Single Transcript](#retrieving-a-single-transcript)
@@ -690,11 +692,11 @@ for result in transcript.auto_highlights.results:
690692

691693
<details>
692694
<summary>Stream your microphone in real-time</summary>
693-
695+
694696
```bash
695697
pip install -U assemblyai
696698
```
697-
699+
698700
```python
699701
import logging
700702
from typing import Type

assemblyai/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.54.1"
1+
__version__ = "0.56.0"

assemblyai/streaming/v3/models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ def __str__(self):
119119
class StreamingParameters(StreamingSessionParameters):
120120
sample_rate: int
121121
encoding: Optional[Encoding] = None
122-
speech_model: Optional[SpeechModel] = None
122+
speech_model: SpeechModel
123123
language_detection: Optional[bool] = None
124124
inactivity_timeout: Optional[int] = None
125125
webhook_url: Optional[str] = None

assemblyai/transcriber.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def config(self) -> types.TranscriptionConfig:
5050
"Returns the configuration from the internal Transcript object"
5151
if self.transcript is None:
5252
raise ValueError(
53-
"Canot access the configuration. The internal Transcript object is None."
53+
"Cannot access the configuration. The internal Transcript object is None."
5454
)
5555

5656
return types.TranscriptionConfig(

assemblyai/types.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ def __getattribute__(self, item):
155155
]
156156
if item in languages:
157157
warn(
158-
"LanuageCode Enum is deprecated and will be removed in 1.0.0. Use a string instead.",
158+
"LanguageCode Enum is deprecated and will be removed in 1.0.0. Use a string instead.",
159159
DeprecationWarning,
160160
stacklevel=2,
161161
)
@@ -165,7 +165,7 @@ def __getattribute__(self, item):
165165

166166
class LanguageCode(str, Enum, metaclass=DeprecatedLanguageCodeMeta):
167167
"""
168-
DeprecationWarning: LanuageCode is deprecated and will be removed in 1.0.0. Use a string instead.
168+
DeprecationWarning: LanguageCode is deprecated and will be removed in 1.0.0. Use a string instead.
169169
170170
Supported languages for transcribing audio.
171171
"""
@@ -937,6 +937,9 @@ class RawTranscriptionConfig(BaseModel):
937937
temperature: Optional[float] = None
938938
"Change how deterministic the response is, with 0 being the most deterministic and 1 being the least deterministic."
939939

940+
remove_audio_tags: Optional[str] = None
941+
"When set to 'all', removes all bracketed audio/speaker tags (e.g. [MUSIC], [Speaker: A]) from the transcript. Only supported for Universal-3 Pro."
942+
940943
keyterms_prompt: Optional[List[str]] = None
941944
"The list of key terms used to generate the transcript with the Slam-1 speech model. Can't be used together with `prompt`."
942945

@@ -1007,6 +1010,7 @@ def __init__(
10071010
speech_models: Optional[List[str]] = None,
10081011
prompt: Optional[str] = None,
10091012
temperature: Optional[float] = None,
1013+
remove_audio_tags: Optional[str] = None,
10101014
keyterms_prompt: Optional[List[str]] = None,
10111015
keyterms_prompt_options: Optional[KeytermsPromptOptions] = None,
10121016
speech_understanding: Optional[SpeechUnderstandingRequest] = None,
@@ -1054,6 +1058,7 @@ def __init__(
10541058
raw_transcription_config: Create the config from a `RawTranscriptionConfig`
10551059
speech_understanding: Speech understanding configuration for LLM Gateway features (speaker identification, translation, custom formatting)
10561060
temperature: Change how deterministic the response is, with 0 being the most deterministic and 1 being the least deterministic.
1061+
remove_audio_tags: When set to 'all', removes all bracketed audio/speaker tags from the transcript. Only supported for Universal-3 Pro.
10571062
keyterms_prompt_options: Options for controlling keyterms boosting behavior when using `keyterms_prompt`.
10581063
"""
10591064
self._raw_transcription_config = (
@@ -1109,6 +1114,7 @@ def __init__(
11091114
self.speech_models = speech_models
11101115
self.prompt = prompt
11111116
self.temperature = temperature
1117+
self.remove_audio_tags = remove_audio_tags
11121118
self.keyterms_prompt = keyterms_prompt
11131119
self.keyterms_prompt_options = keyterms_prompt_options
11141120
self.speech_understanding = speech_understanding
@@ -1170,6 +1176,16 @@ def temperature(self, temperature: Optional[float]) -> None:
11701176
"Sets the temperature to use for the transcription."
11711177
self._raw_transcription_config.temperature = temperature
11721178

1179+
@property
1180+
def remove_audio_tags(self) -> Optional[str]:
1181+
"When set to 'all', removes all bracketed audio/speaker tags from the transcript."
1182+
return self._raw_transcription_config.remove_audio_tags
1183+
1184+
@remove_audio_tags.setter
1185+
def remove_audio_tags(self, remove_audio_tags: Optional[str]) -> None:
1186+
"Sets remove_audio_tags for the transcription."
1187+
self._raw_transcription_config.remove_audio_tags = remove_audio_tags
1188+
11731189
@property
11741190
def keyterms_prompt(self) -> Optional[List[str]]:
11751191
"The keyterms_prompt to use for the transcription."
@@ -1780,7 +1796,7 @@ def set_custom_spelling(
17801796
Args:
17811797
replacement: A dictionary that contains the replacement object (see below example).
17821798
For each key-value pair, the key is the 'to' field, and the value is the 'from' field.
1783-
override: If `True` `replacement` gets overriden with the given `replacement` argument, otherwise merged.
1799+
override: If `True` `replacement` gets overridden with the given `replacement` argument, otherwise merged.
17841800
17851801
Example:
17861802
```
@@ -2379,6 +2395,9 @@ class TranscriptResponse(BaseTranscript):
23792395
temperature: Optional[float] = None
23802396
"Change how deterministic the response is, with 0 being the most deterministic and 1 being the least deterministic."
23812397

2398+
remove_audio_tags: Optional[str] = None
2399+
"When set to 'all', removes all bracketed audio/speaker tags (e.g. [MUSIC], [Speaker: A]) from the transcript. Only supported for Universal-3 Pro."
2400+
23822401
keyterms_prompt: Optional[List[str]] = None
23832402
"When Slam-1 is enabled, the list of key terms used to generate the transcript"
23842403

tests/unit/test_streaming.py

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from urllib.parse import urlencode
22

3+
import pytest
4+
from pydantic import ValidationError
35
from pytest_mock import MockFixture
46

57
from assemblyai.streaming.v3 import (
@@ -43,11 +45,15 @@ def mocked_websocket_connect(
4345
options = StreamingClientOptions(api_key="test", api_host="api.example.com")
4446
client = StreamingClient(options)
4547

46-
params = StreamingParameters(sample_rate=16000)
48+
params = StreamingParameters(
49+
sample_rate=16000,
50+
speech_model=SpeechModel.universal_streaming_english,
51+
)
4752
client.connect(params)
4853

4954
expected_headers = {
5055
"sample_rate": params.sample_rate,
56+
"speech_model": str(params.speech_model),
5157
}
5258

5359
assert actual_url == f"wss://api.example.com/v3/ws?{urlencode(expected_headers)}"
@@ -81,11 +87,15 @@ def mocked_websocket_connect(
8187
options = StreamingClientOptions(token="test", api_host="api.example.com")
8288
client = StreamingClient(options)
8389

84-
params = StreamingParameters(sample_rate=16000)
90+
params = StreamingParameters(
91+
sample_rate=16000,
92+
speech_model=SpeechModel.universal_streaming_english,
93+
)
8594
client.connect(params)
8695

8796
expected_headers = {
8897
"sample_rate": params.sample_rate,
98+
"speech_model": str(params.speech_model),
8999
}
90100

91101
assert actual_url == f"wss://api.example.com/v3/ws?{urlencode(expected_headers)}"
@@ -121,6 +131,7 @@ def mocked_websocket_connect(
121131

122132
params = StreamingParameters(
123133
sample_rate=16000,
134+
speech_model=SpeechModel.universal_streaming_english,
124135
end_of_turn_confidence_threshold=0.5,
125136
min_end_of_turn_silence_when_confident=2000,
126137
max_turn_silence=3000,
@@ -133,6 +144,7 @@ def mocked_websocket_connect(
133144
"min_end_of_turn_silence_when_confident": params.min_end_of_turn_silence_when_confident,
134145
"max_turn_silence": params.max_turn_silence,
135146
"sample_rate": params.sample_rate,
147+
"speech_model": str(params.speech_model),
136148
}
137149

138150
assert actual_url == f"wss://api.example.com/v3/ws?{urlencode(expected_headers)}"
@@ -167,7 +179,10 @@ def mocked_websocket_connect(
167179
options = StreamingClientOptions(api_key="test", api_host="api.example.com")
168180
client = StreamingClient(options)
169181

170-
params = StreamingParameters(sample_rate=16000)
182+
params = StreamingParameters(
183+
sample_rate=16000,
184+
speech_model=SpeechModel.universal_streaming_english,
185+
)
171186
client.connect(params)
172187
client.stream(b"test audio data")
173188

@@ -200,6 +215,7 @@ def mocked_websocket_connect(
200215

201216
params = StreamingParameters(
202217
sample_rate=16000,
218+
speech_model=SpeechModel.universal_streaming_english,
203219
webhook_url="https://example.com/webhook",
204220
webhook_auth_header_name="X-Webhook-Secret",
205221
webhook_auth_header_value="my-secret",
@@ -209,6 +225,7 @@ def mocked_websocket_connect(
209225

210226
expected_params = {
211227
"sample_rate": params.sample_rate,
228+
"speech_model": str(params.speech_model),
212229
"webhook_url": params.webhook_url,
213230
"webhook_auth_header_name": params.webhook_auth_header_name,
214231
"webhook_auth_header_value": params.webhook_auth_header_value,
@@ -287,6 +304,7 @@ def mocked_websocket_connect(
287304

288305
params = StreamingParameters(
289306
sample_rate=16000,
307+
speech_model=SpeechModel.universal_streaming_english,
290308
speaker_labels=True,
291309
max_speakers=3,
292310
)
@@ -355,6 +373,12 @@ def test_turn_event_without_speaker_label():
355373
assert event.speaker_label is None
356374

357375

376+
def test_speech_model_required():
377+
"""Test that omitting speech_model raises a validation error."""
378+
with pytest.raises(ValidationError):
379+
StreamingParameters(sample_rate=16000)
380+
381+
358382
def test_speech_started_event():
359383
"""Test SpeechStarted event parsing (u3-rt-pro only)"""
360384
data = {

tests/unit/test_transcriber.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def test_upload_file_fails(httpx_mock: HTTPXMock):
6868
with pytest.raises(aai.TranscriptError) as excinfo:
6969
aai.Transcriber().upload_file(os.urandom(10))
7070

71-
# check wheter the TranscriptError contains the specified error message
71+
# check whether the TranscriptError contains the specified error message
7272
assert returned_error_message in str(excinfo.value)
7373
assert httpx.codes.INTERNAL_SERVER_ERROR == excinfo.value.status_code
7474

@@ -148,7 +148,7 @@ def test_submit_file_fails_due_api_error(httpx_mock: HTTPXMock):
148148
with pytest.raises(aai.TranscriptError) as excinfo:
149149
transcriber.transcribe("audio.wav")
150150

151-
# check wheter the Exception contains the specified error message
151+
# check whether the Exception contains the specified error message
152152
assert "something went wrong" in str(excinfo.value)
153153
assert httpx.codes.INTERNAL_SERVER_ERROR == excinfo.value.status_code
154154

tests/unit/test_transcript.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ def test_get_by_id_fails(httpx_mock: HTTPXMock):
379379
with pytest.raises(aai.TranscriptError) as excinfo:
380380
aai.Transcript.get_by_id(test_id)
381381

382-
# check wheter the TranscriptError contains the specified error message
382+
# check whether the TranscriptError contains the specified error message
383383
assert response_json["error"] in str(excinfo.value)
384384
assert len(httpx_mock.get_requests()) == 1
385385

0 commit comments

Comments
 (0)