Skip to content

Commit 10f9073

Browse files
gurjot-05claude
andcommitted
fix: handle empty response retry for streaming+thinking and no-event cases
The original fix only retried when is_final_response() was True with empty content. This missed two scenarios observed in production: 1. Streaming + thinking: model streams thought chunks (partial=True) then stops with no text — the LiteLLM adapter dropped the response entirely, and the loop broke on last_event.partial without retry. 2. No events at all: model returned content=None which was filtered by _postprocess_async, leaving last_event=None — loop broke immediately. Changes: - lite_llm.py: Add fallback after streaming loop to yield an explicit empty non-partial LlmResponse when nothing was finalized, so downstream retry logic can detect and handle it. - base_llm_flow.py: Restructure run_async() to check for empty responses (None, partial+empty, final+empty) before normal termination, enabling retry across all three scenarios. - Update existing test for new retry-on-None behavior. - Add 12 comprehensive scenario tests covering all cases. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 3acd2ac commit 10f9073

4 files changed

Lines changed: 554 additions & 37 deletions

File tree

src/google/adk/flows/llm_flows/base_llm_flow.py

Lines changed: 49 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -788,39 +788,58 @@ async def run_async(
788788
async for event in agen:
789789
last_event = event
790790
yield event
791-
if not last_event or last_event.partial:
791+
792+
# Determine if the model returned an empty / useless response that
793+
# should be retried. Three cases:
794+
# 1. No event at all (model/adapter yielded nothing)
795+
# 2. Last event is partial with no meaningful content (streaming +
796+
# thinking: only thought chunks arrived, no final response)
797+
# 3. Last event is a final response with no meaningful content
798+
# (non-streaming empty response, or streaming empty aggregated)
799+
is_empty_response = False
800+
if not last_event:
801+
is_empty_response = True
802+
elif last_event.partial and not _has_meaningful_content(last_event):
803+
is_empty_response = True
804+
elif (
805+
last_event.is_final_response()
806+
and not _has_meaningful_content(last_event)
807+
and last_event.author == invocation_context.agent.name
808+
):
809+
is_empty_response = True
810+
811+
if (
812+
is_empty_response
813+
and empty_response_count < _MAX_EMPTY_RESPONSE_RETRIES
814+
):
815+
empty_response_count += 1
816+
logger.warning(
817+
'Model returned an empty response (attempt %d/%d),'
818+
' injecting resume message and re-prompting.',
819+
empty_response_count,
820+
_MAX_EMPTY_RESPONSE_RETRIES,
821+
)
822+
# Inject a resume nudge into the session so the next LLM call
823+
# sees it in its context and is more likely to continue.
824+
resume_event = Event(
825+
invocation_id=invocation_context.invocation_id,
826+
author='user',
827+
branch=invocation_context.branch,
828+
content=types.Content(
829+
role='user',
830+
parts=[
831+
types.Part.from_text(text=_EMPTY_RESPONSE_RESUME_MESSAGE)
832+
],
833+
),
834+
)
835+
yield resume_event
836+
continue
837+
838+
# Normal termination conditions.
839+
if not last_event or last_event.is_final_response() or last_event.partial:
792840
if last_event and last_event.partial:
793841
logger.warning('The last event is partial, which is not expected.')
794842
break
795-
if last_event.is_final_response():
796-
if (
797-
not _has_meaningful_content(last_event)
798-
and last_event.author == invocation_context.agent.name
799-
and empty_response_count < _MAX_EMPTY_RESPONSE_RETRIES
800-
):
801-
empty_response_count += 1
802-
logger.warning(
803-
'Model returned an empty response (attempt %d/%d),'
804-
' injecting resume message and re-prompting.',
805-
empty_response_count,
806-
_MAX_EMPTY_RESPONSE_RETRIES,
807-
)
808-
# Inject a resume nudge into the session so the next LLM call
809-
# sees it in its context and is more likely to continue.
810-
resume_event = Event(
811-
invocation_id=invocation_context.invocation_id,
812-
author='user',
813-
branch=invocation_context.branch,
814-
content=types.Content(
815-
role='user',
816-
parts=[
817-
types.Part.from_text(text=_EMPTY_RESPONSE_RESUME_MESSAGE)
818-
],
819-
),
820-
)
821-
yield resume_event
822-
continue
823-
break
824843

825844
async def _run_one_step_async(
826845
self,

src/google/adk/models/lite_llm.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2235,6 +2235,7 @@ async def generate_content_async(
22352235
aggregated_llm_response_with_tool_call = None
22362236
usage_metadata = None
22372237
fallback_index = 0
2238+
last_model_version = None
22382239

22392240
def _finalize_tool_call_response(
22402241
*, model_version: str, finish_reason: str
@@ -2319,6 +2320,7 @@ def _reset_stream_buffers() -> None:
23192320
function_calls.clear()
23202321

23212322
async for part in await self.llm_client.acompletion(**completion_args):
2323+
last_model_version = part.model
23222324
for chunk, finish_reason in _model_response_to_chunk(part):
23232325
if isinstance(chunk, FunctionChunk):
23242326
index = chunk.index or fallback_index
@@ -2413,6 +2415,22 @@ def _reset_stream_buffers() -> None:
24132415
)
24142416
_reset_stream_buffers()
24152417

2418+
# Fallback: if the model produced no meaningful output at all (no text,
2419+
# no reasoning, no tool calls), yield an explicit empty non-partial
2420+
# response so that downstream retry logic in run_async() can detect it
2421+
# and re-prompt instead of silently halting.
2422+
if (
2423+
not aggregated_llm_response
2424+
and not aggregated_llm_response_with_tool_call
2425+
and last_model_version is not None
2426+
):
2427+
aggregated_llm_response = LlmResponse(
2428+
content=types.Content(role="model", parts=[]),
2429+
partial=False,
2430+
finish_reason=_map_finish_reason("stop"),
2431+
model_version=last_model_version,
2432+
)
2433+
24162434
# waiting until streaming ends to yield the llm_response as litellm tends
24172435
# to send chunk that contains usage_metadata after the chunk with
24182436
# finish_reason set to tool_calls or stop.

tests/unittests/flows/llm_flows/test_base_llm_flow_partial_handling.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -91,12 +91,18 @@ async def test_run_async_breaks_on_final_response():
9191

9292

9393
@pytest.mark.asyncio
94-
async def test_run_async_breaks_on_no_last_event():
95-
"""Test that run_async breaks when there is no last event."""
96-
# Create a mock model that returns an empty response (no content)
97-
empty_response = LlmResponse(content=None, partial=False)
94+
async def test_run_async_retries_then_breaks_on_no_last_event():
95+
"""Test that run_async retries when there is no last event, then breaks."""
96+
# Create a mock model that returns empty responses (no content).
97+
# Need enough responses to cover initial call + max retries.
98+
from google.adk.flows.llm_flows.base_llm_flow import _MAX_EMPTY_RESPONSE_RETRIES
9899

99-
mock_model = testing_utils.MockModel.create(responses=[empty_response])
100+
empty_responses = [
101+
LlmResponse(content=None, partial=False)
102+
for _ in range(_MAX_EMPTY_RESPONSE_RETRIES + 1)
103+
]
104+
105+
mock_model = testing_utils.MockModel.create(responses=empty_responses)
100106

101107
agent = Agent(name='test_agent', model=mock_model)
102108
invocation_context = await testing_utils.create_invocation_context(
@@ -110,8 +116,11 @@ async def test_run_async_breaks_on_no_last_event():
110116
async for event in flow.run_async(invocation_context):
111117
events.append(event)
112118

113-
# Should have no events because empty responses are filtered out
114-
assert len(events) == 0
119+
# Should have resume events from retry attempts (one per retry).
120+
# The empty LlmResponse has content=None, so _postprocess_async filters
121+
# it out — no model events are yielded, only resume nudge events.
122+
resume_events = [e for e in events if e.author == 'user']
123+
assert len(resume_events) == _MAX_EMPTY_RESPONSE_RETRIES
115124

116125

117126
@pytest.mark.asyncio

0 commit comments

Comments
 (0)