Skip to content

Commit 101ce8c

Browse files
committed
fix: append resume message to session instead of yielding to UI
The resume nudge event was being yielded from run_async(), which sent it through the SSE stream to the frontend. Users saw "Your previous response was empty" as a visible chat message. Fix: use session_service.append_event() to write the resume message directly to the session history. The model sees it on the next call (for better recovery), but it never reaches the UI/SSE stream.
1 parent e6289db commit 101ce8c

3 files changed

Lines changed: 71 additions & 85 deletions

File tree

src/google/adk/flows/llm_flows/base_llm_flow.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -821,7 +821,10 @@ async def run_async(
821821
)
822822
# Inject a resume nudge into the session so the next LLM call
823823
# sees it in its context and is more likely to continue.
824+
# We append directly to the session (not yield) so that the
825+
# message reaches the model but is NOT sent to the UI/SSE stream.
824826
resume_event = Event(
827+
id=Event.new_id(),
825828
invocation_id=invocation_context.invocation_id,
826829
author='user',
827830
branch=invocation_context.branch,
@@ -832,7 +835,10 @@ async def run_async(
832835
],
833836
),
834837
)
835-
yield resume_event
838+
await invocation_context.session_service.append_event(
839+
session=invocation_context.session,
840+
event=resume_event,
841+
)
836842
continue
837843

838844
# Normal termination conditions.

tests/unittests/flows/llm_flows/test_base_llm_flow_partial_handling.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -116,11 +116,12 @@ async def test_run_async_retries_then_breaks_on_no_last_event():
116116
async for event in flow.run_async(invocation_context):
117117
events.append(event)
118118

119-
# Should have resume events from retry attempts (one per retry).
120-
# The empty LlmResponse has content=None, so _postprocess_async filters
121-
# it out — no model events are yielded, only resume nudge events.
122-
resume_events = [e for e in events if e.author == 'user']
123-
assert len(resume_events) == _MAX_EMPTY_RESPONSE_RETRIES
119+
# Resume events are appended to session (not yielded), so no user
120+
# events should appear in the output stream. Verify retries happened
121+
# by checking how many responses were consumed.
122+
assert mock_model.response_index == _MAX_EMPTY_RESPONSE_RETRIES
123+
leaked = [e for e in events if e.author == 'user']
124+
assert len(leaked) == 0, 'Resume messages must not leak to output'
124125

125126

126127
@pytest.mark.asyncio

tests/unittests/flows/llm_flows/test_empty_response_all_scenarios.py

Lines changed: 58 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,10 @@
1414

1515
"""Comprehensive tests for empty model response retry across all scenarios.
1616
17-
Covers:
18-
Scenario 1: Non-streaming empty response (parts: [], is_final_response=True)
19-
Scenario 2: Streaming + thinking, thought-only final (is_final_response=True)
20-
Scenario 3a: No events yielded at all (last_event=None)
21-
Scenario 3b: Partial event with no meaningful content (last_event.partial=True)
22-
Scenario 4: Partial event WITH meaningful content (should NOT retry)
23-
Scenario 5: Empty response after max retries (should stop)
24-
Scenario 6: Empty then good response (recovery)
25-
Scenario 7: lite_llm streaming fallback (empty non-partial response yielded)
17+
The resume message is appended directly to the session (not yielded),
18+
so it reaches the model on retry but never leaks to the UI/SSE stream.
19+
We verify retries via mock_model.response_index and confirm no user
20+
events are yielded.
2621
"""
2722

2823
from google.adk.agents.llm_agent import Agent
@@ -41,11 +36,23 @@ class BaseLlmFlowForTesting(BaseLlmFlow):
4136
pass
4237

4338

39+
def _collect_resume_leaks(events):
40+
"""Return any resume nudge events that leaked to the output stream."""
41+
return [
42+
e
43+
for e in events
44+
if e.author == 'user'
45+
and e.content
46+
and e.content.parts
47+
and any(
48+
'previous response was empty' in (p.text or '')
49+
for p in e.content.parts
50+
)
51+
]
52+
53+
4454
# ---------------------------------------------------------------------------
45-
# Scenario 1: Non-streaming empty response (the original bug from adk_combined.log)
46-
# Model returns parts: [], partial=False, finish_reason=STOP
47-
# is_final_response() -> True, _has_meaningful_content() -> False
48-
# Expected: retry with resume message, then succeed
55+
# Scenario 1: Non-streaming empty response, then recovery
4956
# ---------------------------------------------------------------------------
5057

5158

@@ -73,25 +80,23 @@ async def test_scenario1_non_streaming_empty_then_recovery():
7380
async for event in BaseLlmFlowForTesting().run_async(ctx):
7481
events.append(event)
7582

76-
# Should see: empty model event, resume nudge, good model event
77-
resume_events = [e for e in events if e.author == 'user']
78-
model_events = [
79-
e
83+
# Model called twice (empty + good)
84+
assert mock_model.response_index == 1
85+
# Resume message must NOT leak to UI
86+
assert len(_collect_resume_leaks(events)) == 0
87+
# Good response should be in output
88+
good_texts = [
89+
p.text
8090
for e in events
81-
if e.author == 'test_agent' and e.content and e.content.parts
91+
if e.content and e.content.parts
92+
for p in e.content.parts
93+
if p.text
8294
]
83-
assert len(resume_events) == 1, 'Expected exactly 1 resume nudge'
84-
good_texts = [p.text for e in model_events for p in e.content.parts if p.text]
85-
assert any(
86-
'answer' in t for t in good_texts
87-
), 'Expected good response after retry'
95+
assert any('answer' in t for t in good_texts)
8896

8997

9098
# ---------------------------------------------------------------------------
91-
# Scenario 2: Streaming + thinking, thought-only final response
92-
# Model returns thought parts only, partial=False
93-
# is_final_response() -> True, _has_meaningful_content() -> False (thought-only)
94-
# Expected: retry with resume message
99+
# Scenario 2: Thought-only final response triggers retry
95100
# ---------------------------------------------------------------------------
96101

97102

@@ -122,21 +127,18 @@ async def test_scenario2_thought_only_final_response_retried():
122127
async for event in BaseLlmFlowForTesting().run_async(ctx):
123128
events.append(event)
124129

125-
resume_events = [e for e in events if e.author == 'user']
126-
assert len(resume_events) == 1, 'Expected retry on thought-only response'
130+
assert mock_model.response_index == 1, 'Expected 2 LLM calls (retry)'
131+
assert len(_collect_resume_leaks(events)) == 0
127132

128133

129134
# ---------------------------------------------------------------------------
130135
# Scenario 3a: No events yielded at all (last_event=None)
131-
# _postprocess_async filters out LlmResponse with content=None
132-
# Expected: retry with resume message
133136
# ---------------------------------------------------------------------------
134137

135138

136139
@pytest.mark.asyncio
137140
async def test_scenario3a_no_events_at_all_retried():
138141
"""When _run_one_step yields nothing, retry fires."""
139-
# content=None means _postprocess_async returns without yielding
140142
empty_responses = [
141143
LlmResponse(content=None, partial=False)
142144
for _ in range(_MAX_EMPTY_RESPONSE_RETRIES + 1)
@@ -151,16 +153,13 @@ async def test_scenario3a_no_events_at_all_retried():
151153
async for event in BaseLlmFlowForTesting().run_async(ctx):
152154
events.append(event)
153155

154-
resume_events = [e for e in events if e.author == 'user']
155-
assert (
156-
len(resume_events) == _MAX_EMPTY_RESPONSE_RETRIES
157-
), f'Expected {_MAX_EMPTY_RESPONSE_RETRIES} resume nudges'
156+
# Model called initial + retries times
157+
assert mock_model.response_index == _MAX_EMPTY_RESPONSE_RETRIES
158+
assert len(_collect_resume_leaks(events)) == 0
158159

159160

160161
# ---------------------------------------------------------------------------
161162
# Scenario 3b: Partial event with no meaningful content
162-
# Streaming + thinking: last event is partial with thought-only content
163-
# Expected: retry with resume message
164163
# ---------------------------------------------------------------------------
165164

166165

@@ -188,8 +187,8 @@ async def test_scenario3b_partial_empty_content_retried():
188187
async for event in BaseLlmFlowForTesting().run_async(ctx):
189188
events.append(event)
190189

191-
resume_events = [e for e in events if e.author == 'user']
192-
assert len(resume_events) == 1, 'Expected retry on partial empty event'
190+
assert mock_model.response_index == 1, 'Expected retry on partial empty'
191+
assert len(_collect_resume_leaks(events)) == 0
193192

194193

195194
@pytest.mark.asyncio
@@ -219,14 +218,12 @@ async def test_scenario3b_partial_thought_only_retried():
219218
async for event in BaseLlmFlowForTesting().run_async(ctx):
220219
events.append(event)
221220

222-
resume_events = [e for e in events if e.author == 'user']
223-
assert len(resume_events) == 1, 'Expected retry on partial thought-only event'
221+
assert mock_model.response_index == 1, 'Expected retry on partial thought'
222+
assert len(_collect_resume_leaks(events)) == 0
224223

225224

226225
# ---------------------------------------------------------------------------
227226
# Scenario 4: Partial event WITH meaningful content (should NOT retry)
228-
# This is a normal streaming state — partial + real text content.
229-
# Expected: break with warning, no retry
230227
# ---------------------------------------------------------------------------
231228

232229

@@ -250,19 +247,14 @@ async def test_scenario4_partial_with_meaningful_content_not_retried():
250247
async for event in BaseLlmFlowForTesting().run_async(ctx):
251248
events.append(event)
252249

253-
resume_events = [e for e in events if e.author == 'user']
254-
assert (
255-
len(resume_events) == 0
256-
), 'Partial event with real content should NOT trigger retry'
257-
# The partial event itself should be yielded
250+
# Only 1 LLM call — no retry
251+
assert mock_model.response_index == 0
258252
partial_events = [e for e in events if e.partial]
259253
assert len(partial_events) == 1
260254

261255

262256
# ---------------------------------------------------------------------------
263257
# Scenario 5: Empty response exhausts max retries
264-
# Model keeps returning empty — should stop after _MAX_EMPTY_RESPONSE_RETRIES
265-
# Expected: exactly _MAX_EMPTY_RESPONSE_RETRIES resume nudges, then break
266258
# ---------------------------------------------------------------------------
267259

268260

@@ -286,18 +278,13 @@ async def test_scenario5_empty_exhausts_max_retries():
286278
async for event in BaseLlmFlowForTesting().run_async(ctx):
287279
events.append(event)
288280

289-
resume_events = [e for e in events if e.author == 'user']
290-
assert len(resume_events) == _MAX_EMPTY_RESPONSE_RETRIES
291-
292-
# Model should have been called initial + retries times
293-
assert (
294-
mock_model.response_index == _MAX_EMPTY_RESPONSE_RETRIES
295-
), f'Expected {_MAX_EMPTY_RESPONSE_RETRIES + 1} LLM calls total'
281+
# Model called initial + retries = MAX_RETRIES + 1
282+
assert mock_model.response_index == _MAX_EMPTY_RESPONSE_RETRIES
283+
assert len(_collect_resume_leaks(events)) == 0
296284

297285

298286
# ---------------------------------------------------------------------------
299287
# Scenario 6: Empty -> Empty -> Good (recovery after multiple retries)
300-
# Expected: 2 resume nudges, then good response
301288
# ---------------------------------------------------------------------------
302289

303290

@@ -331,9 +318,9 @@ async def test_scenario6_multiple_empty_then_recovery():
331318
async for event in BaseLlmFlowForTesting().run_async(ctx):
332319
events.append(event)
333320

334-
resume_events = [e for e in events if e.author == 'user']
335-
assert len(resume_events) == 2, 'Expected 2 retries before recovery'
336-
321+
# All 3 responses consumed
322+
assert mock_model.response_index == 2
323+
assert len(_collect_resume_leaks(events)) == 0
337324
final_texts = [
338325
p.text
339326
for e in events
@@ -345,33 +332,29 @@ async def test_scenario6_multiple_empty_then_recovery():
345332

346333

347334
# ---------------------------------------------------------------------------
348-
# Scenario 7: lite_llm streaming fallback — verify the empty non-partial
349-
# LlmResponse is what downstream code would see
335+
# Scenario 7: lite_llm streaming fallback
350336
# ---------------------------------------------------------------------------
351337

352338

353339
def test_scenario7_litellm_fallback_response_is_not_partial():
354340
"""Verify the fallback LlmResponse from lite_llm has partial=False."""
355-
# Simulates what lite_llm.py now produces when streaming yields nothing
356341
fallback = LlmResponse(
357342
content=types.Content(role='model', parts=[]),
358343
partial=False,
359344
finish_reason=types.FinishReason.STOP,
360345
model_version='test-model',
361346
)
362-
# This should be treated as a final response
363347
event = Event(
364348
invocation_id='test',
365349
author='test_agent',
366350
content=fallback.content,
367-
# partial comes from LlmResponse merge
368351
)
369352
assert event.is_final_response() is True
370353
assert _has_meaningful_content(event) is False
371354

372355

373356
# ---------------------------------------------------------------------------
374-
# Scenario 8: Whitespace-only text response (edge case)
357+
# Scenario 8: Whitespace-only text response
375358
# ---------------------------------------------------------------------------
376359

377360

@@ -402,12 +385,12 @@ async def test_scenario8_whitespace_only_response_retried():
402385
async for event in BaseLlmFlowForTesting().run_async(ctx):
403386
events.append(event)
404387

405-
resume_events = [e for e in events if e.author == 'user']
406-
assert len(resume_events) == 1, 'Whitespace-only should trigger retry'
388+
assert mock_model.response_index == 1, 'Expected retry on whitespace'
389+
assert len(_collect_resume_leaks(events)) == 0
407390

408391

409392
# ---------------------------------------------------------------------------
410-
# Scenario 9: Function call response is NOT retried (meaningful content)
393+
# Scenario 9: Function call is meaningful (not retried)
411394
# ---------------------------------------------------------------------------
412395

413396

@@ -428,13 +411,11 @@ def test_scenario9_function_call_is_meaningful():
428411
),
429412
)
430413
assert _has_meaningful_content(event) is True
431-
# is_final_response() would be False (has function calls), so the
432-
# retry check would never fire for this event anyway.
433414
assert event.is_final_response() is False
434415

435416

436417
# ---------------------------------------------------------------------------
437-
# Scenario 10: Mixed partial+empty then partial+content (no false positive)
418+
# Scenario 10: Partial empty then partial with content
438419
# ---------------------------------------------------------------------------
439420

440421

@@ -464,8 +445,6 @@ async def test_scenario10_partial_empty_then_partial_with_content():
464445
async for event in BaseLlmFlowForTesting().run_async(ctx):
465446
events.append(event)
466447

467-
resume_events = [e for e in events if e.author == 'user']
468-
assert len(resume_events) == 1, (
469-
'First partial empty should retry, second partial with content should'
470-
' break'
471-
)
448+
# Both responses consumed (retry on first, break on second)
449+
assert mock_model.response_index == 1
450+
assert len(_collect_resume_leaks(events)) == 0

0 commit comments

Comments
 (0)