fix: append resume message to session instead of yielding to UI

gurjot-05 · gurjot-05 · commit 101ce8c27026 · 2026-03-26T13:46:49.000+05:30
The resume nudge event was being yielded from run_async(), which sent
it through the SSE stream to the frontend. Users saw "Your previous
response was empty" as a visible chat message.

Fix: use session_service.append_event() to write the resume message
directly to the session history. The model sees it on the next call
(for better recovery), but it never reaches the UI/SSE stream.
diff --git a/src/google/adk/flows/llm_flows/base_llm_flow.py b/src/google/adk/flows/llm_flows/base_llm_flow.py
@@ -821,7 +821,10 @@ async def run_async(
         )
         # Inject a resume nudge into the session so the next LLM call
         # sees it in its context and is more likely to continue.
+        # We append directly to the session (not yield) so that the
+        # message reaches the model but is NOT sent to the UI/SSE stream.
         resume_event = Event(
+            id=Event.new_id(),
             invocation_id=invocation_context.invocation_id,
             author='user',
             branch=invocation_context.branch,
@@ -832,7 +835,10 @@ async def run_async(
                 ],
             ),
         )
-        yield resume_event
+        await invocation_context.session_service.append_event(
+            session=invocation_context.session,
+            event=resume_event,
+        )
         continue
 
       # Normal termination conditions.
diff --git a/tests/unittests/flows/llm_flows/test_base_llm_flow_partial_handling.py b/tests/unittests/flows/llm_flows/test_base_llm_flow_partial_handling.py
@@ -116,11 +116,12 @@ async def test_run_async_retries_then_breaks_on_no_last_event():
   async for event in flow.run_async(invocation_context):
     events.append(event)
 
-  # Should have resume events from retry attempts (one per retry).
-  # The empty LlmResponse has content=None, so _postprocess_async filters
-  # it out — no model events are yielded, only resume nudge events.
-  resume_events = [e for e in events if e.author == 'user']
-  assert len(resume_events) == _MAX_EMPTY_RESPONSE_RETRIES
+  # Resume events are appended to session (not yielded), so no user
+  # events should appear in the output stream.  Verify retries happened
+  # by checking how many responses were consumed.
+  assert mock_model.response_index == _MAX_EMPTY_RESPONSE_RETRIES
+  leaked = [e for e in events if e.author == 'user']
+  assert len(leaked) == 0, 'Resume messages must not leak to output'
 
 
 @pytest.mark.asyncio
diff --git a/tests/unittests/flows/llm_flows/test_empty_response_all_scenarios.py b/tests/unittests/flows/llm_flows/test_empty_response_all_scenarios.py
@@ -14,15 +14,10 @@
 
 """Comprehensive tests for empty model response retry across all scenarios.
 
-Covers:
-  Scenario 1: Non-streaming empty response (parts: [], is_final_response=True)
-  Scenario 2: Streaming + thinking, thought-only final (is_final_response=True)
-  Scenario 3a: No events yielded at all (last_event=None)
-  Scenario 3b: Partial event with no meaningful content (last_event.partial=True)
-  Scenario 4: Partial event WITH meaningful content (should NOT retry)
-  Scenario 5: Empty response after max retries (should stop)
-  Scenario 6: Empty then good response (recovery)
-  Scenario 7: lite_llm streaming fallback (empty non-partial response yielded)
+The resume message is appended directly to the session (not yielded),
+so it reaches the model on retry but never leaks to the UI/SSE stream.
+We verify retries via mock_model.response_index and confirm no user
+events are yielded.
 """
 
 from google.adk.agents.llm_agent import Agent
@@ -41,11 +36,23 @@ class BaseLlmFlowForTesting(BaseLlmFlow):
   pass
 
 
+def _collect_resume_leaks(events):
+  """Return any resume nudge events that leaked to the output stream."""
+  return [
+      e
+      for e in events
+      if e.author == 'user'
+      and e.content
+      and e.content.parts
+      and any(
+          'previous response was empty' in (p.text or '')
+          for p in e.content.parts
+      )
+  ]
+
+
 # ---------------------------------------------------------------------------
-# Scenario 1: Non-streaming empty response (the original bug from adk_combined.log)
-#   Model returns parts: [], partial=False, finish_reason=STOP
-#   is_final_response() -> True, _has_meaningful_content() -> False
-#   Expected: retry with resume message, then succeed
+# Scenario 1: Non-streaming empty response, then recovery
 # ---------------------------------------------------------------------------
 
 
@@ -73,25 +80,23 @@ async def test_scenario1_non_streaming_empty_then_recovery():
   async for event in BaseLlmFlowForTesting().run_async(ctx):
     events.append(event)
 
-  # Should see: empty model event, resume nudge, good model event
-  resume_events = [e for e in events if e.author == 'user']
-  model_events = [
-      e
+  # Model called twice (empty + good)
+  assert mock_model.response_index == 1
+  # Resume message must NOT leak to UI
+  assert len(_collect_resume_leaks(events)) == 0
+  # Good response should be in output
+  good_texts = [
+      p.text
       for e in events
-      if e.author == 'test_agent' and e.content and e.content.parts
+      if e.content and e.content.parts
+      for p in e.content.parts
+      if p.text
   ]
-  assert len(resume_events) == 1, 'Expected exactly 1 resume nudge'
-  good_texts = [p.text for e in model_events for p in e.content.parts if p.text]
-  assert any(
-      'answer' in t for t in good_texts
-  ), 'Expected good response after retry'
+  assert any('answer' in t for t in good_texts)
 
 
 # ---------------------------------------------------------------------------
-# Scenario 2: Streaming + thinking, thought-only final response
-#   Model returns thought parts only, partial=False
-#   is_final_response() -> True, _has_meaningful_content() -> False (thought-only)
-#   Expected: retry with resume message
+# Scenario 2: Thought-only final response triggers retry
 # ---------------------------------------------------------------------------
 
 
@@ -122,21 +127,18 @@ async def test_scenario2_thought_only_final_response_retried():
   async for event in BaseLlmFlowForTesting().run_async(ctx):
     events.append(event)
 
-  resume_events = [e for e in events if e.author == 'user']
-  assert len(resume_events) == 1, 'Expected retry on thought-only response'
+  assert mock_model.response_index == 1, 'Expected 2 LLM calls (retry)'
+  assert len(_collect_resume_leaks(events)) == 0
 
 
 # ---------------------------------------------------------------------------
 # Scenario 3a: No events yielded at all (last_event=None)
-#   _postprocess_async filters out LlmResponse with content=None
-#   Expected: retry with resume message
 # ---------------------------------------------------------------------------
 
 
 @pytest.mark.asyncio
 async def test_scenario3a_no_events_at_all_retried():
   """When _run_one_step yields nothing, retry fires."""
-  # content=None means _postprocess_async returns without yielding
   empty_responses = [
       LlmResponse(content=None, partial=False)
       for _ in range(_MAX_EMPTY_RESPONSE_RETRIES + 1)
@@ -151,16 +153,13 @@ async def test_scenario3a_no_events_at_all_retried():
   async for event in BaseLlmFlowForTesting().run_async(ctx):
     events.append(event)
 
-  resume_events = [e for e in events if e.author == 'user']
-  assert (
-      len(resume_events) == _MAX_EMPTY_RESPONSE_RETRIES
-  ), f'Expected {_MAX_EMPTY_RESPONSE_RETRIES} resume nudges'
+  # Model called initial + retries times
+  assert mock_model.response_index == _MAX_EMPTY_RESPONSE_RETRIES
+  assert len(_collect_resume_leaks(events)) == 0
 
 
 # ---------------------------------------------------------------------------
 # Scenario 3b: Partial event with no meaningful content
-#   Streaming + thinking: last event is partial with thought-only content
-#   Expected: retry with resume message
 # ---------------------------------------------------------------------------
 
 
@@ -188,8 +187,8 @@ async def test_scenario3b_partial_empty_content_retried():
   async for event in BaseLlmFlowForTesting().run_async(ctx):
     events.append(event)
 
-  resume_events = [e for e in events if e.author == 'user']
-  assert len(resume_events) == 1, 'Expected retry on partial empty event'
+  assert mock_model.response_index == 1, 'Expected retry on partial empty'
+  assert len(_collect_resume_leaks(events)) == 0
 
 
 @pytest.mark.asyncio
@@ -219,14 +218,12 @@ async def test_scenario3b_partial_thought_only_retried():
   async for event in BaseLlmFlowForTesting().run_async(ctx):
     events.append(event)
 
-  resume_events = [e for e in events if e.author == 'user']
-  assert len(resume_events) == 1, 'Expected retry on partial thought-only event'
+  assert mock_model.response_index == 1, 'Expected retry on partial thought'
+  assert len(_collect_resume_leaks(events)) == 0
 
 
 # ---------------------------------------------------------------------------
 # Scenario 4: Partial event WITH meaningful content (should NOT retry)
-#   This is a normal streaming state — partial + real text content.
-#   Expected: break with warning, no retry
 # ---------------------------------------------------------------------------
 
 
@@ -250,19 +247,14 @@ async def test_scenario4_partial_with_meaningful_content_not_retried():
   async for event in BaseLlmFlowForTesting().run_async(ctx):
     events.append(event)
 
-  resume_events = [e for e in events if e.author == 'user']
-  assert (
-      len(resume_events) == 0
-  ), 'Partial event with real content should NOT trigger retry'
-  # The partial event itself should be yielded
+  # Only 1 LLM call — no retry
+  assert mock_model.response_index == 0
   partial_events = [e for e in events if e.partial]
   assert len(partial_events) == 1
 
 
 # ---------------------------------------------------------------------------
 # Scenario 5: Empty response exhausts max retries
-#   Model keeps returning empty — should stop after _MAX_EMPTY_RESPONSE_RETRIES
-#   Expected: exactly _MAX_EMPTY_RESPONSE_RETRIES resume nudges, then break
 # ---------------------------------------------------------------------------
 
 
@@ -286,18 +278,13 @@ async def test_scenario5_empty_exhausts_max_retries():
   async for event in BaseLlmFlowForTesting().run_async(ctx):
     events.append(event)
 
-  resume_events = [e for e in events if e.author == 'user']
-  assert len(resume_events) == _MAX_EMPTY_RESPONSE_RETRIES
-
-  # Model should have been called initial + retries times
-  assert (
-      mock_model.response_index == _MAX_EMPTY_RESPONSE_RETRIES
-  ), f'Expected {_MAX_EMPTY_RESPONSE_RETRIES + 1} LLM calls total'
+  # Model called initial + retries = MAX_RETRIES + 1
+  assert mock_model.response_index == _MAX_EMPTY_RESPONSE_RETRIES
+  assert len(_collect_resume_leaks(events)) == 0
 
 
 # ---------------------------------------------------------------------------
 # Scenario 6: Empty -> Empty -> Good (recovery after multiple retries)
-#   Expected: 2 resume nudges, then good response
 # ---------------------------------------------------------------------------
 
 
@@ -331,9 +318,9 @@ async def test_scenario6_multiple_empty_then_recovery():
   async for event in BaseLlmFlowForTesting().run_async(ctx):
     events.append(event)
 
-  resume_events = [e for e in events if e.author == 'user']
-  assert len(resume_events) == 2, 'Expected 2 retries before recovery'
-
+  # All 3 responses consumed
+  assert mock_model.response_index == 2
+  assert len(_collect_resume_leaks(events)) == 0
   final_texts = [
       p.text
       for e in events
@@ -345,33 +332,29 @@ async def test_scenario6_multiple_empty_then_recovery():
 
 
 # ---------------------------------------------------------------------------
-# Scenario 7: lite_llm streaming fallback — verify the empty non-partial
-#   LlmResponse is what downstream code would see
+# Scenario 7: lite_llm streaming fallback
 # ---------------------------------------------------------------------------
 
 
 def test_scenario7_litellm_fallback_response_is_not_partial():
   """Verify the fallback LlmResponse from lite_llm has partial=False."""
-  # Simulates what lite_llm.py now produces when streaming yields nothing
   fallback = LlmResponse(
       content=types.Content(role='model', parts=[]),
       partial=False,
       finish_reason=types.FinishReason.STOP,
       model_version='test-model',
   )
-  # This should be treated as a final response
   event = Event(
       invocation_id='test',
       author='test_agent',
       content=fallback.content,
-      # partial comes from LlmResponse merge
   )
   assert event.is_final_response() is True
   assert _has_meaningful_content(event) is False
 
 
 # ---------------------------------------------------------------------------
-# Scenario 8: Whitespace-only text response (edge case)
+# Scenario 8: Whitespace-only text response
 # ---------------------------------------------------------------------------
 
 
@@ -402,12 +385,12 @@ async def test_scenario8_whitespace_only_response_retried():
   async for event in BaseLlmFlowForTesting().run_async(ctx):
     events.append(event)
 
-  resume_events = [e for e in events if e.author == 'user']
-  assert len(resume_events) == 1, 'Whitespace-only should trigger retry'
+  assert mock_model.response_index == 1, 'Expected retry on whitespace'
+  assert len(_collect_resume_leaks(events)) == 0
 
 
 # ---------------------------------------------------------------------------
-# Scenario 9: Function call response is NOT retried (meaningful content)
+# Scenario 9: Function call is meaningful (not retried)
 # ---------------------------------------------------------------------------
 
 
@@ -428,13 +411,11 @@ def test_scenario9_function_call_is_meaningful():
       ),
   )
   assert _has_meaningful_content(event) is True
-  # is_final_response() would be False (has function calls), so the
-  # retry check would never fire for this event anyway.
   assert event.is_final_response() is False
 
 
 # ---------------------------------------------------------------------------
-# Scenario 10: Mixed partial+empty then partial+content (no false positive)
+# Scenario 10: Partial empty then partial with content
 # ---------------------------------------------------------------------------
 
 
@@ -464,8 +445,6 @@ async def test_scenario10_partial_empty_then_partial_with_content():
   async for event in BaseLlmFlowForTesting().run_async(ctx):
     events.append(event)
 
-  resume_events = [e for e in events if e.author == 'user']
-  assert len(resume_events) == 1, (
-      'First partial empty should retry, second partial with content should'
-      ' break'
-  )
+  # Both responses consumed (retry on first, break on second)
+  assert mock_model.response_index == 1
+  assert len(_collect_resume_leaks(events)) == 0