Skip to content

Commit ba601c1

Browse files
mvanhornclaude
andauthored
fix: make audio_interface optional for text-only chat mode (#749)
* fix: make audio_interface optional for text-only chat mode When using chat-only (text_only) mode, requiring an AudioInterface is unnecessary. This makes audio_interface default to None in both Conversation and AsyncConversation, with None-guards around all audio operations (start, stop, output, interrupt). Fixes #632 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: auto-set text_only in config when audio_interface is omitted When audio_interface is None (text-only chat mode), automatically set text_only: True in conversation_config_override so the server skips audio generation. Uses setdefault to preserve explicit user overrides. Applied to both Conversation and AsyncConversation. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: avoid mutating shared config dict when setting text_only Replace setdefault (which mutates the caller's dict in-place) with a new dict spread. Prevents a reused ConversationInitiationData from inheriting text_only: True when creating a second Conversation with an audio_interface. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * test: add text-only mode tests for Conversation Verifies that: - text_only is auto-set when audio_interface is None - explicit text_only=False override is preserved - original config dict is not mutated (dict spread fix) * refactor: move text_only auto-set to BaseConversation, deep-copy config Move the text_only auto-set logic from both Conversation and AsyncConversation into BaseConversation to eliminate duplication. Deep-copy the ConversationInitiationData so the caller's original object is never mutated. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Matt Van Horn <455140+mvanhorn@users.noreply.github.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 54f25ea commit ba601c1

2 files changed

Lines changed: 100 additions & 13 deletions

File tree

src/elevenlabs/conversational_ai/conversation.py

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,7 @@ def __init__(
388388
user_id: Optional[str] = None,
389389
*,
390390
requires_auth: bool,
391+
audio_interface=None,
391392
config: Optional[ConversationInitiationData] = None,
392393
client_tools: Optional[ClientTools] = None,
393394
on_prem_config: Optional[OnPremInitiationData] = None,
@@ -397,7 +398,16 @@ def __init__(
397398
self.agent_id = agent_id
398399
self.user_id = user_id
399400
self.requires_auth = requires_auth
400-
self.config = config or ConversationInitiationData()
401+
# Copy the config so we never mutate the caller's original object
402+
src = config or ConversationInitiationData()
403+
self.config = ConversationInitiationData(
404+
extra_body=dict(src.extra_body),
405+
conversation_config_override=dict(src.conversation_config_override),
406+
dynamic_variables=dict(src.dynamic_variables),
407+
user_id=src.user_id,
408+
)
409+
if audio_interface is None:
410+
self.config.conversation_config_override.setdefault("text_only", True)
401411
self.client_tools = client_tools or ClientTools()
402412
self.on_prem_config = on_prem_config
403413
self.environment = environment
@@ -610,7 +620,7 @@ async def _handle_message_core_async(self, message, message_handler):
610620

611621

612622
class Conversation(BaseConversation):
613-
audio_interface: AudioInterface
623+
audio_interface: Optional[AudioInterface]
614624
callback_agent_response: Optional[Callable[[str], None]]
615625
callback_agent_response_correction: Optional[Callable[[str, str], None]]
616626
callback_agent_chat_response_part: Optional[Callable[[str, AgentChatResponsePartType], None]]
@@ -630,7 +640,7 @@ def __init__(
630640
user_id: Optional[str] = None,
631641
*,
632642
requires_auth: bool,
633-
audio_interface: AudioInterface,
643+
audio_interface: Optional[AudioInterface] = None,
634644
config: Optional[ConversationInitiationData] = None,
635645
client_tools: Optional[ClientTools] = None,
636646
callback_agent_response: Optional[Callable[[str], None]] = None,
@@ -653,6 +663,7 @@ def __init__(
653663
user_id: The ID of the user conversing with the agent.
654664
requires_auth: Whether the agent requires authentication.
655665
audio_interface: The audio interface to use for input and output.
666+
Can be omitted for text-only (chat) mode.
656667
client_tools: The client tools to use for the conversation.
657668
callback_agent_response: Callback for agent responses.
658669
callback_agent_response_correction: Callback for agent response corrections.
@@ -671,6 +682,7 @@ def __init__(
671682
agent_id=agent_id,
672683
user_id=user_id,
673684
requires_auth=requires_auth,
685+
audio_interface=audio_interface,
674686
config=config,
675687
client_tools=client_tools,
676688
on_prem_config=on_prem_config,
@@ -701,7 +713,8 @@ def start_session(self):
701713

702714
def end_session(self):
703715
"""Ends the conversation session and cleans up resources."""
704-
self.audio_interface.stop()
716+
if self.audio_interface is not None:
717+
self.audio_interface.stop()
705718
self.client_tools.stop()
706719
self._ws = None
707720
self._should_stop.set()
@@ -830,7 +843,8 @@ def input_callback(audio):
830843
logger.error(f"Error sending user audio chunk: {e}")
831844
self.end_session()
832845

833-
self.audio_interface.start(input_callback)
846+
if self.audio_interface is not None:
847+
self.audio_interface.start(input_callback)
834848
while not self._should_stop.is_set():
835849
try:
836850
message = json.loads(ws.recv(timeout=0.5))
@@ -860,7 +874,8 @@ def __init__(self, conversation, ws):
860874
self.callback_audio_alignment = conversation.callback_audio_alignment
861875

862876
def handle_audio_output(self, audio):
863-
self.conversation.audio_interface.output(audio)
877+
if self.conversation.audio_interface is not None:
878+
self.conversation.audio_interface.output(audio)
864879

865880
def handle_audio_alignment(self, alignment):
866881
self.conversation.callback_audio_alignment(alignment)
@@ -878,7 +893,8 @@ def handle_user_transcript(self, transcript):
878893
self.conversation.callback_user_transcript(transcript)
879894

880895
def handle_interruption(self):
881-
self.conversation.audio_interface.interrupt()
896+
if self.conversation.audio_interface is not None:
897+
self.conversation.audio_interface.interrupt()
882898

883899
def handle_ping(self, event):
884900
self.ws.send(
@@ -905,7 +921,7 @@ def send_response(response):
905921

906922

907923
class AsyncConversation(BaseConversation):
908-
audio_interface: AsyncAudioInterface
924+
audio_interface: Optional[AsyncAudioInterface]
909925
callback_agent_response: Optional[Callable[[str], Awaitable[None]]]
910926
callback_agent_response_correction: Optional[Callable[[str, str], Awaitable[None]]]
911927
callback_agent_chat_response_part: Optional[Callable[[str, AgentChatResponsePartType], Awaitable[None]]]
@@ -925,7 +941,7 @@ def __init__(
925941
user_id: Optional[str] = None,
926942
*,
927943
requires_auth: bool,
928-
audio_interface: AsyncAudioInterface,
944+
audio_interface: Optional[AsyncAudioInterface] = None,
929945
config: Optional[ConversationInitiationData] = None,
930946
client_tools: Optional[ClientTools] = None,
931947
callback_agent_response: Optional[Callable[[str], Awaitable[None]]] = None,
@@ -948,6 +964,7 @@ def __init__(
948964
user_id: The ID of the user conversing with the agent.
949965
requires_auth: Whether the agent requires authentication.
950966
audio_interface: The async audio interface to use for input and output.
967+
Can be omitted for text-only (chat) mode.
951968
client_tools: The client tools to use for the conversation.
952969
callback_agent_response: Async callback for agent responses.
953970
callback_agent_response_correction: Async callback for agent response corrections.
@@ -967,6 +984,7 @@ def __init__(
967984
agent_id=agent_id,
968985
user_id=user_id,
969986
requires_auth=requires_auth,
987+
audio_interface=audio_interface,
970988
config=config,
971989
client_tools=client_tools,
972990
on_prem_config=on_prem_config,
@@ -996,7 +1014,8 @@ async def start_session(self):
9961014

9971015
async def end_session(self):
9981016
"""Ends the conversation session and cleans up resources."""
999-
await self.audio_interface.stop()
1017+
if self.audio_interface is not None:
1018+
await self.audio_interface.stop()
10001019
self.client_tools.stop()
10011020
self._ws = None
10021021
self._should_stop.set()
@@ -1124,7 +1143,8 @@ async def input_callback(audio):
11241143
logger.error(f"Error sending user audio chunk: {e}")
11251144
await self.end_session()
11261145

1127-
await self.audio_interface.start(input_callback)
1146+
if self.audio_interface is not None:
1147+
await self.audio_interface.start(input_callback)
11281148

11291149
try:
11301150
while not self._should_stop.is_set():
@@ -1159,7 +1179,8 @@ def __init__(self, conversation, ws):
11591179
self.callback_audio_alignment = conversation.callback_audio_alignment
11601180

11611181
async def handle_audio_output(self, audio):
1162-
await self.conversation.audio_interface.output(audio)
1182+
if self.conversation.audio_interface is not None:
1183+
await self.conversation.audio_interface.output(audio)
11631184

11641185
async def handle_audio_alignment(self, alignment):
11651186
await self.conversation.callback_audio_alignment(alignment)
@@ -1177,7 +1198,8 @@ async def handle_user_transcript(self, transcript):
11771198
await self.conversation.callback_user_transcript(transcript)
11781199

11791200
async def handle_interruption(self):
1180-
await self.conversation.audio_interface.interrupt()
1201+
if self.conversation.audio_interface is not None:
1202+
await self.conversation.audio_interface.interrupt()
11811203

11821204
async def handle_ping(self, event):
11831205
await self.ws.send(

tests/test_convai.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,3 +398,68 @@ def streaming_callback(text, part_type):
398398
assert streaming_calls[3] == ("!", AgentChatResponsePartType.DELTA)
399399

400400
assert streaming_calls[4] == ("", AgentChatResponsePartType.STOP)
401+
402+
403+
def test_text_only_mode_auto_sets_config():
404+
"""When audio_interface is None, text_only should be auto-set in config."""
405+
mock_ws = create_mock_websocket()
406+
mock_client = MagicMock()
407+
mock_client._client_wrapper.get_base_url.return_value = "https://api.elevenlabs.io"
408+
409+
conversation = Conversation(
410+
client=mock_client,
411+
agent_id=TEST_AGENT_ID,
412+
requires_auth=False,
413+
audio_interface=None,
414+
)
415+
416+
assert conversation.config.conversation_config_override.get("text_only") is True
417+
418+
with patch("elevenlabs.conversational_ai.conversation.connect") as mock_connect:
419+
mock_connect.return_value.__enter__.return_value = mock_ws
420+
conversation.start_session()
421+
conversation.end_session()
422+
conversation.wait_for_session_end()
423+
424+
send_calls = [call[0][0] for call in mock_ws.send.call_args_list]
425+
init_messages = [json.loads(call) for call in send_calls if 'conversation_initiation_client_data' in call]
426+
assert len(init_messages) == 1
427+
assert init_messages[0]["conversation_config_override"]["text_only"] is True
428+
429+
430+
def test_text_only_mode_preserves_explicit_override():
431+
"""When user explicitly sets text_only=False, the override should be preserved."""
432+
mock_client = MagicMock()
433+
mock_client._client_wrapper.get_base_url.return_value = "https://api.elevenlabs.io"
434+
435+
config = ConversationInitiationData(conversation_config_override={"text_only": False})
436+
conversation = Conversation(
437+
client=mock_client,
438+
config=config,
439+
agent_id=TEST_AGENT_ID,
440+
requires_auth=False,
441+
audio_interface=None,
442+
)
443+
444+
assert conversation.config.conversation_config_override["text_only"] is False
445+
446+
447+
def test_text_only_mode_does_not_mutate_original_config():
448+
"""Setting text_only should not mutate the caller's original config dict."""
449+
mock_client = MagicMock()
450+
mock_client._client_wrapper.get_base_url.return_value = "https://api.elevenlabs.io"
451+
452+
original_override = {"some_setting": "value"}
453+
config = ConversationInitiationData(conversation_config_override=original_override)
454+
455+
Conversation(
456+
client=mock_client,
457+
config=config,
458+
agent_id=TEST_AGENT_ID,
459+
requires_auth=False,
460+
audio_interface=None,
461+
)
462+
463+
assert "text_only" not in original_override
464+
# The ConversationInitiationData object itself should also be unmodified
465+
assert "text_only" not in config.conversation_config_override

0 commit comments

Comments
 (0)