Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Comment thread
devin-ai-integration[bot] marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ class STTOptions:
keyterm: str | Sequence[str]
profanity_filter: bool
redact: str | list[str]
diarize_model: str
endpoint_url: str
vad_events: bool = True
numerals: bool = False
Expand All @@ -89,6 +90,7 @@ def __init__(
no_delay: bool = True,
endpointing_ms: int = 25,
enable_diarization: bool = False,
diarize_model: NotGivenOr[str] = NOT_GIVEN,
# enable filler words by default to improve turn detector accuracy
filler_words: bool = True,
keywords: NotGivenOr[list[tuple[str, float]]] = NOT_GIVEN,
Expand Down Expand Up @@ -117,6 +119,11 @@ def __init__(
sample_rate: The sample rate of the audio in Hz. Defaults to 16000.
no_delay: When smart_format is used, ensures it does not wait for sequence to be complete before returning results. Defaults to True.
endpointing_ms: Time in milliseconds of silence to consider end of speech. Set to 0 to disable. Defaults to 25.
diarize_model: Select the speaker diarization model version. Enabling this turns on
diarization without also needing ``enable_diarization``. Accepts "latest"
(newest GA diarizer), "v2" (improved batch diarizer, pre-recorded only) or "v1"
(original diarizer). The "v2" value is not supported for streaming requests.
See https://developers.deepgram.com/docs/diarization for details. Defaults to NOT_GIVEN.
filler_words: Whether to include filler words (um, uh, etc.) in transcription. Defaults to True.
keywords: List of tuples containing keywords and their boost values for improved recognition.
Each tuple should be (keyword: str, boost: float). Defaults to None.
Expand Down Expand Up @@ -144,11 +151,14 @@ def __init__(
the DEEPGRAM_API_KEY environmental variable.
""" # noqa: E501

# diarize_model implies diarization without also needing enable_diarization
_diarization_enabled = enable_diarization or is_given(diarize_model)

super().__init__(
capabilities=stt.STTCapabilities(
streaming=True,
interim_results=interim_results,
diarization=enable_diarization,
diarization=_diarization_enabled,
aligned_transcript="word",
)
)
Expand Down Expand Up @@ -186,6 +196,7 @@ def __init__(
keyterm=keyterm if is_given(keyterm) else [],
profanity_filter=profanity_filter,
redact=redact if is_given(redact) else [],
diarize_model=diarize_model if is_given(diarize_model) else "",
numerals=numerals,
mip_opt_out=mip_opt_out,
vad_events=vad_events,
Expand Down Expand Up @@ -232,6 +243,8 @@ async def _recognize_impl(
recognize_config["keyterm"] = self._opts.keyterm
if config.redact:
recognize_config["redact"] = config.redact
if config.diarize_model:
recognize_config["diarize_model"] = config.diarize_model
if config.enable_diarization:
logger.warning("speaker diarization is not supported in non-streaming mode, ignoring")
Comment on lines +246 to 249

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🚩 Pre-recorded path also missing diarize=True for diarize_model

In _recognize_impl at lines 243-246, diarize_model is added to recognize_config but diarize=True is not. The existing code for enable_diarization only logs a misleading warning that 'speaker diarization is not supported in non-streaming mode' (Deepgram does support pre-recorded diarization). For consistency with the streaming fix in BUG-0002, the prerecorded path should also set recognize_config['diarize'] = True when diarize_model is specified. Additionally, the docstring mentions "v2" as a "pre-recorded only" model, confirming the intent that diarize_model should work in prerecorded mode.

Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.


Expand Down Expand Up @@ -299,6 +312,7 @@ def update_options(
no_delay: NotGivenOr[bool] = NOT_GIVEN,
endpointing_ms: NotGivenOr[int] = NOT_GIVEN,
enable_diarization: NotGivenOr[bool] = NOT_GIVEN,
diarize_model: NotGivenOr[str] = NOT_GIVEN,
filler_words: NotGivenOr[bool] = NOT_GIVEN,
keywords: NotGivenOr[list[tuple[str, float]]] = NOT_GIVEN,
keyterm: NotGivenOr[str | list[str]] = NOT_GIVEN,
Expand Down Expand Up @@ -332,6 +346,8 @@ def update_options(
self._opts.endpointing_ms = endpointing_ms
if is_given(enable_diarization):
self._opts.enable_diarization = enable_diarization
if is_given(diarize_model):
self._opts.diarize_model = diarize_model
if is_given(filler_words):
self._opts.filler_words = filler_words
if is_given(keywords):
Expand Down Expand Up @@ -368,6 +384,7 @@ def update_options(
sample_rate=sample_rate,
no_delay=no_delay,
endpointing_ms=endpointing_ms,
diarize_model=diarize_model,
filler_words=filler_words,
keywords=keywords,
keyterm=keyterm,
Comment on lines 384 to 390

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🚩 Pre-existing: enable_diarization not forwarded to streams in update_options

In STT.update_options at lines 374-394, the stream.update_options() call forwards diarize_model (newly added) but does NOT forward enable_diarization, despite SpeechStream.update_options accepting it as a parameter (stt.py:460). This is a pre-existing omission that predates this PR. If a user calls stt.update_options(enable_diarization=True), the STT's own _opts is updated (line 344-345), but active streams won't pick up the change. The PR correctly adds forwarding for diarize_model but inherits this gap for enable_diarization.

(Refers to lines 374-394)

Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

Expand Down Expand Up @@ -444,6 +461,7 @@ def update_options(
no_delay: NotGivenOr[bool] = NOT_GIVEN,
endpointing_ms: NotGivenOr[int] = NOT_GIVEN,
enable_diarization: NotGivenOr[bool] = NOT_GIVEN,
diarize_model: NotGivenOr[str] = NOT_GIVEN,
filler_words: NotGivenOr[bool] = NOT_GIVEN,
keywords: NotGivenOr[list[tuple[str, float]]] = NOT_GIVEN,
keyterm: NotGivenOr[str | list[str]] = NOT_GIVEN,
Expand Down Expand Up @@ -477,6 +495,8 @@ def update_options(
self._opts.endpointing_ms = endpointing_ms
if is_given(enable_diarization):
self._opts.enable_diarization = enable_diarization
if is_given(diarize_model):
self._opts.diarize_model = diarize_model
if is_given(filler_words):
self._opts.filler_words = filler_words
if is_given(keywords):
Expand Down Expand Up @@ -653,6 +673,8 @@ async def _connect_ws(self) -> aiohttp.ClientWebSocketResponse:
}
if self._opts.enable_diarization:
live_config["diarize"] = True
if self._opts.diarize_model:
live_config["diarize_model"] = self._opts.diarize_model
Comment on lines 674 to +677

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 Missing diarize=True in streaming config when diarize_model is set without enable_diarization

In _connect_ws, diarize=True is only added when self._opts.enable_diarization is True (stt.py:671-672). When a user sets only diarize_model (per the docstring: "Enabling this turns on diarization without also needing enable_diarization"), the request sends diarize_model=X but NOT diarize=true. According to Deepgram's API, the diarize boolean parameter is what enables diarization; diarize_model selects which model to use. Without diarize=true, the diarize_model parameter is likely ignored, making the feature silently non-functional for users who follow the docstring and set only diarize_model.

Suggested change
if self._opts.enable_diarization:
live_config["diarize"] = True
if self._opts.diarize_model:
live_config["diarize_model"] = self._opts.diarize_model
if self._opts.enable_diarization or self._opts.diarize_model:
live_config["diarize"] = True
if self._opts.diarize_model:
live_config["diarize_model"] = self._opts.diarize_model
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

if self._opts.keywords:
live_config["keywords"] = self._opts.keywords
if self._opts.keyterm:
Expand Down
91 changes: 91 additions & 0 deletions tests/test_plugin_deepgram_stt.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,101 @@
from __future__ import annotations

from unittest import mock

import pytest

pytestmark = pytest.mark.plugin("deepgram")


def _capture_request_params(monkeypatch: pytest.MonkeyPatch) -> dict:
"""Patch the Deepgram URL builder so we can inspect the request params dict
the plugin assembles, without performing any network request."""
from livekit.plugins.deepgram import stt as dg_stt

captured: dict = {}

def fake_to_deepgram_url(opts: dict, base_url: str, *, websocket: bool) -> str:
captured.clear()
captured.update(opts)
return f"{base_url}?captured"

monkeypatch.setattr(dg_stt, "_to_deepgram_url", fake_to_deepgram_url)
return captured


async def test_diarize_model_in_prerecorded_request_params(monkeypatch: pytest.MonkeyPatch):
from livekit import rtc
from livekit.agents import APIConnectionError
from livekit.plugins.deepgram import STT

captured = _capture_request_params(monkeypatch)
stt = STT(api_key="test-key", diarize_model="v2")

buffer = rtc.AudioFrame(
data=b"\x00\x00" * 1600, sample_rate=16000, num_channels=1, samples_per_channel=1600
)

with mock.patch.object(stt, "_ensure_session") as ensure_session:
ensure_session.return_value.post.side_effect = RuntimeError("stop-before-network")
with pytest.raises(APIConnectionError):
await stt._recognize_impl(buffer)

assert captured.get("diarize_model") == "v2"


async def test_diarize_model_in_live_request_params(monkeypatch: pytest.MonkeyPatch):
from livekit.agents import APIConnectionError
from livekit.plugins.deepgram import STT

captured = _capture_request_params(monkeypatch)

session = mock.MagicMock()

async def fake_ws_connect(url, **kwargs):
raise APIConnectionError("stop-before-network")

session.ws_connect = fake_ws_connect

stt = STT(api_key="test-key", diarize_model="latest", http_session=session)
stream = stt.stream(language="en-US")

with pytest.raises(APIConnectionError):
await stream._connect_ws()

assert captured.get("diarize_model") == "latest"
await stream.aclose()


async def test_update_options_diarize_model():
from livekit.plugins.deepgram import STT

stt = STT(api_key="test-key")
assert stt._opts.diarize_model is None or stt._opts.diarize_model == ""
stt.update_options(diarize_model="v2")
assert stt._opts.diarize_model == "v2"


async def test_diarize_model_reports_diarization_capability():
from livekit.plugins.deepgram import STT

stt = STT(api_key="test-key", diarize_model="latest")
assert stt.capabilities.diarization is True


async def test_enable_diarization_reports_diarization_capability():
from livekit.plugins.deepgram import STT

stt = STT(api_key="test-key", enable_diarization=True)
assert stt.capabilities.diarization is True


async def test_no_diarization_reports_no_capability():
from livekit.plugins.deepgram import STT

stt = STT(api_key="test-key")
assert stt.capabilities.diarization is False


async def test_update_options_uses_stored_language_for_model_validation():
from livekit.plugins.deepgram import STT

Expand Down