Skip to content

Commit 10eb3cc

Browse files
committed
test(listen): comprehensive tests for formatters, captions, WS piping, and routing
formatters.py — previously zero coverage: - format_diarized_words: speaker grouping, punctuated_word preference, empty/multi-speaker - format_diarized_transcript: API result extraction, malformed input handling - extract_plain_transcript: channels/paragraphs/fallback paths - extract_summary: short/text field precedence - extract_topics: confidence formatting, deduplication, empty inputs captions.py — previously zero coverage: - _fmt_webvtt / _fmt_srt: precision, separators, negative clamping - StreamingCaptionWriter (WebVTT + SRT): timestamps, speaker tags, blank lines, sequential numbering, word accumulation, empty-words guard - captions_from_words: empty→header, WebVTT/SRT batch output via library WebSocket message handling (piping stdout out): - Final transcript printed to stdout and accumulated - Non-final not accumulated; interim printed with \r - Non-Results type ignored; invalid JSON does not raise - Diarized final uses [Speaker N] labels in output - caption_writer receives entry and suppresses plain text - Interim suppressed in caption mode - Multiple messages accumulate correctly WebSocket URL building: - wss:// scheme, v1/v2 paths, model/language/diarize/interim params - Custom base URL respected; https→wss conversion Flux model auto-selection: - flux-general-en / flux-* → api_version=2 - nova-3 / enhanced → api_version=1 Caption flag mutual exclusivity: - --webvtt + --srt → error with "mutually exclusive" message - Each flag individually passes correct caption_format to _prerecorded - No flags → caption_format=None TranscribeCommand alias (deepctl-cmd-transcribe): - Subclasses ListenCommand, name="transcribe", hidden=True - ListenCommand.hidden is False - Identical argument list inherited - Plugin manager: transcribe absent from dg --help output - Plugin manager: transcribe --help still works when called directly - listen remains visible in dg --help
1 parent fef1287 commit 10eb3cc

4 files changed

Lines changed: 867 additions & 0 deletions

File tree

Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
"""Tests for the captions module — timestamp formatters, StreamingCaptionWriter, and batch helpers."""
2+
3+
from __future__ import annotations
4+
5+
import pytest
6+
7+
from deepctl_cmd_listen.captions import (
8+
StreamingCaptionWriter,
9+
_fmt_srt,
10+
_fmt_webvtt,
11+
captions_from_words,
12+
)
13+
14+
15+
def _words(*entries: tuple[str, float, float]) -> list[dict]:
16+
"""Build word dicts from (text, start, end) tuples."""
17+
return [
18+
{"word": t, "punctuated_word": t, "start": s, "end": e}
19+
for t, s, e in entries
20+
]
21+
22+
23+
def _words_with_speaker(*entries: tuple[str, float, float, int]) -> list[dict]:
24+
"""Build word dicts with speaker tags."""
25+
return [
26+
{"word": t, "punctuated_word": t, "start": s, "end": e, "speaker": sp}
27+
for t, s, e, sp in entries
28+
]
29+
30+
31+
# ── Timestamp formatters ──────────────────────────────────────────────────────
32+
33+
34+
class TestFmtWebvtt:
35+
def test_zero(self):
36+
assert _fmt_webvtt(0.0) == "00:00:00.000"
37+
38+
def test_subsecond(self):
39+
assert _fmt_webvtt(0.08) == "00:00:00.080"
40+
41+
def test_seconds(self):
42+
assert _fmt_webvtt(1.5) == "00:00:01.500"
43+
44+
def test_minutes(self):
45+
assert _fmt_webvtt(65.4) == "00:01:05.400"
46+
47+
def test_hours(self):
48+
assert _fmt_webvtt(3661.0) == "01:01:01.000"
49+
50+
def test_uses_dot_separator(self):
51+
ts = _fmt_webvtt(1.234)
52+
assert "." in ts
53+
assert "," not in ts
54+
55+
def test_negative_clamped_to_zero(self):
56+
assert _fmt_webvtt(-5.0) == "00:00:00.000"
57+
58+
def test_six_digit_format(self):
59+
ts = _fmt_webvtt(1.0)
60+
# HH:MM:SS.mmm
61+
parts = ts.split(":")
62+
assert len(parts) == 3
63+
assert "." in parts[2]
64+
65+
66+
class TestFmtSrt:
67+
def test_zero(self):
68+
assert _fmt_srt(0.0) == "00:00:00,000"
69+
70+
def test_subsecond(self):
71+
assert _fmt_srt(0.08) == "00:00:00,080"
72+
73+
def test_seconds(self):
74+
assert _fmt_srt(1.5) == "00:00:01,500"
75+
76+
def test_minutes(self):
77+
assert _fmt_srt(65.4) == "00:01:05,400"
78+
79+
def test_uses_comma_separator(self):
80+
ts = _fmt_srt(1.234)
81+
assert "," in ts
82+
assert "." not in ts
83+
84+
def test_negative_clamped_to_zero(self):
85+
assert _fmt_srt(-5.0) == "00:00:00,000"
86+
87+
88+
# ── StreamingCaptionWriter — WebVTT ──────────────────────────────────────────
89+
90+
91+
class TestStreamingCaptionWriterWebVTT:
92+
def test_header_outputs_webvtt(self, capsys):
93+
writer = StreamingCaptionWriter("webvtt")
94+
writer.print_header()
95+
assert "WEBVTT" in capsys.readouterr().out
96+
97+
def test_entry_outputs_timestamp(self, capsys):
98+
writer = StreamingCaptionWriter("webvtt")
99+
writer.write_entry(_words(("Hello", 0.08, 0.5)), 0.08, 0.5)
100+
out = capsys.readouterr().out
101+
assert "00:00:00.080 --> 00:00:00.500" in out
102+
103+
def test_entry_outputs_text(self, capsys):
104+
writer = StreamingCaptionWriter("webvtt")
105+
writer.write_entry(_words(("Hello", 0.0, 0.5)), 0.0, 0.5)
106+
assert "Hello" in capsys.readouterr().out
107+
108+
def test_speaker_label_voice_tag(self, capsys):
109+
writer = StreamingCaptionWriter("webvtt")
110+
writer.write_entry(_words_with_speaker(("Hi", 0.0, 0.5, 1)), 0.0, 0.5)
111+
assert "<v Speaker 1>" in capsys.readouterr().out
112+
113+
def test_no_voice_tag_without_speaker(self, capsys):
114+
writer = StreamingCaptionWriter("webvtt")
115+
writer.write_entry(_words(("Hi", 0.0, 0.5)), 0.0, 0.5)
116+
assert "<v" not in capsys.readouterr().out
117+
118+
def test_blank_line_after_entry(self, capsys):
119+
writer = StreamingCaptionWriter("webvtt")
120+
writer.write_entry(_words(("Hi", 0.0, 0.5)), 0.0, 0.5)
121+
out = capsys.readouterr().out
122+
assert out.endswith("\n\n")
123+
124+
def test_empty_words_produces_no_output(self, capsys):
125+
writer = StreamingCaptionWriter("webvtt")
126+
writer.write_entry([], 0.0, 1.0)
127+
assert capsys.readouterr().out == ""
128+
129+
def test_accumulates_words(self):
130+
writer = StreamingCaptionWriter("webvtt")
131+
words = _words(("hello", 0.0, 0.5), ("world", 0.6, 1.0))
132+
writer.write_entry(words, 0.0, 1.0)
133+
assert writer.accumulated_words == words
134+
135+
def test_accumulates_across_entries(self, capsys):
136+
writer = StreamingCaptionWriter("webvtt")
137+
w1 = _words(("one", 0.0, 0.5))
138+
w2 = _words(("two", 1.0, 1.5))
139+
writer.write_entry(w1, 0.0, 0.5)
140+
writer.write_entry(w2, 1.0, 1.5)
141+
capsys.readouterr() # discard output
142+
assert len(writer.accumulated_words) == 2
143+
144+
145+
# ── StreamingCaptionWriter — SRT ─────────────────────────────────────────────
146+
147+
148+
class TestStreamingCaptionWriterSRT:
149+
def test_no_header_output(self, capsys):
150+
writer = StreamingCaptionWriter("srt")
151+
writer.print_header()
152+
assert capsys.readouterr().out == ""
153+
154+
def test_entry_starts_with_sequence_number(self, capsys):
155+
writer = StreamingCaptionWriter("srt")
156+
writer.write_entry(_words(("Hi", 0.0, 0.5)), 0.0, 0.5)
157+
out = capsys.readouterr().out
158+
assert out.startswith("1\n")
159+
160+
def test_sequential_numbering(self, capsys):
161+
writer = StreamingCaptionWriter("srt")
162+
words = _words(("A", 0.0, 0.5))
163+
writer.write_entry(words, 0.0, 0.5)
164+
writer.write_entry(words, 1.0, 1.5)
165+
out = capsys.readouterr().out
166+
assert "1\n" in out
167+
assert "2\n" in out
168+
169+
def test_timestamp_uses_comma(self, capsys):
170+
writer = StreamingCaptionWriter("srt")
171+
writer.write_entry(_words(("Hi", 0.08, 0.5)), 0.08, 0.5)
172+
out = capsys.readouterr().out
173+
assert "00:00:00,080 --> 00:00:00,500" in out
174+
175+
def test_speaker_bracket_label(self, capsys):
176+
writer = StreamingCaptionWriter("srt")
177+
writer.write_entry(_words_with_speaker(("Hi", 0.0, 0.5, 0)), 0.0, 0.5)
178+
assert "[Speaker 0]" in capsys.readouterr().out
179+
180+
def test_no_speaker_label_without_speaker(self, capsys):
181+
writer = StreamingCaptionWriter("srt")
182+
writer.write_entry(_words(("Hi", 0.0, 0.5)), 0.0, 0.5)
183+
assert "[Speaker" not in capsys.readouterr().out
184+
185+
def test_blank_line_after_entry(self, capsys):
186+
writer = StreamingCaptionWriter("srt")
187+
writer.write_entry(_words(("Hi", 0.0, 0.5)), 0.0, 0.5)
188+
assert capsys.readouterr().out.endswith("\n\n")
189+
190+
191+
# ── captions_from_words (batch / end-of-stream) ───────────────────────────────
192+
193+
194+
class TestCaptionsFromWords:
195+
def test_empty_words_returns_webvtt_header(self):
196+
result = captions_from_words([], "webvtt")
197+
assert "WEBVTT" in result
198+
199+
def test_empty_words_returns_empty_srt(self):
200+
result = captions_from_words([], "srt")
201+
assert result == ""
202+
203+
def test_webvtt_output_contains_webvtt_header(self):
204+
words = [
205+
{"word": "hello", "punctuated_word": "Hello", "start": 0.08, "end": 0.5},
206+
{"word": "world", "punctuated_word": "world.", "start": 0.6, "end": 1.0},
207+
]
208+
result = captions_from_words(words, "webvtt")
209+
assert result.startswith("WEBVTT")
210+
211+
def test_webvtt_output_contains_timestamp(self):
212+
words = [{"word": "hi", "punctuated_word": "Hi", "start": 0.08, "end": 0.5}]
213+
result = captions_from_words(words, "webvtt")
214+
assert "-->" in result
215+
216+
def test_srt_output_contains_sequence_number(self):
217+
words = [{"word": "hi", "punctuated_word": "Hi", "start": 0.08, "end": 0.5}]
218+
result = captions_from_words(words, "srt")
219+
assert result.strip().startswith("1")
220+
221+
def test_srt_timestamp_uses_comma(self):
222+
words = [{"word": "hi", "punctuated_word": "Hi", "start": 0.08, "end": 0.5}]
223+
result = captions_from_words(words, "srt")
224+
assert "00:00:00,080" in result

0 commit comments

Comments
 (0)