Skip to content

Commit 6dcd247

Browse files
fix(litellm): handle missing usage attribute on ModelResponseStream (strands-agents#1520)
1 parent 456b70a commit 6dcd247

3 files changed

Lines changed: 124 additions & 2 deletions

File tree

src/strands/models/litellm.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -547,8 +547,8 @@ async def _handle_streaming_response(self, litellm_request: dict[str, Any]) -> A
547547
# Skip remaining events as we don't have use for anything except the final usage payload
548548
async for event in response:
549549
_ = event
550-
if event.usage:
551-
yield self.format_chunk({"chunk_type": "metadata", "data": event.usage})
550+
if usage := getattr(event, "usage", None):
551+
yield self.format_chunk({"chunk_type": "metadata", "data": usage})
552552

553553
logger.debug("finished streaming response from model")
554554

tests/strands/models/test_litellm.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -711,3 +711,104 @@ def test_stream_switch_content_different_type_no_prev():
711711
assert len(chunks) == 1
712712
assert chunks[0]["contentBlockStart"] == {"start": {}}
713713
assert data_type == "text"
714+
715+
716+
@pytest.mark.asyncio
717+
async def test_stream_with_events_missing_usage_attribute(
718+
litellm_acompletion, api_key, model_id, model, agenerator, alist
719+
):
720+
"""Test streaming handles events that don't have a usage attribute.
721+
722+
This test verifies the fix for a bug where ModelResponseStream objects
723+
(which don't have a 'usage' attribute) would cause an AttributeError
724+
when the code tried to access event.usage directly instead of using getattr.
725+
726+
The bug occurred because:
727+
1. ModelResponse (non-streaming) has a 'usage' attribute
728+
2. ModelResponseStream (streaming chunks) does NOT have a 'usage' attribute
729+
3. The code assumed all events would have the 'usage' attribute
730+
731+
Regression test for: 'ModelResponseStream' object has no attribute 'usage'
732+
"""
733+
734+
# Use spec to ensure mock objects only have specified attributes
735+
# This mimics the real ModelResponseStream which doesn't have 'usage'
736+
class MockStreamChunk:
737+
"""Mock that mimics ModelResponseStream - no usage attribute."""
738+
739+
def __init__(self, choices=None):
740+
self.choices = choices or []
741+
742+
mock_delta = unittest.mock.Mock(content="Hello", tool_calls=None, reasoning_content=None)
743+
mock_event_1 = MockStreamChunk(choices=[unittest.mock.Mock(finish_reason=None, delta=mock_delta)])
744+
mock_event_2 = MockStreamChunk(choices=[unittest.mock.Mock(finish_reason="stop", delta=mock_delta)])
745+
# After finish_reason is received, remaining events in the stream also don't have 'usage'
746+
mock_event_3 = MockStreamChunk(choices=[])
747+
mock_event_4 = MockStreamChunk(choices=[])
748+
749+
litellm_acompletion.side_effect = unittest.mock.AsyncMock(
750+
return_value=agenerator([mock_event_1, mock_event_2, mock_event_3, mock_event_4])
751+
)
752+
753+
messages = [{"role": "user", "content": [{"type": "text", "text": "Hello"}]}]
754+
response = model.stream(messages)
755+
756+
# This should NOT raise AttributeError: 'MockStreamChunk' object has no attribute 'usage'
757+
tru_events = await alist(response)
758+
759+
# Verify we got the expected events (no metadata since no usage was available)
760+
assert tru_events[0] == {"messageStart": {"role": "assistant"}}
761+
assert {"messageStop": {"stopReason": "end_turn"}} in tru_events
762+
# No metadata event since mock events don't have usage
763+
assert not any("metadata" in event for event in tru_events)
764+
765+
766+
@pytest.mark.asyncio
767+
async def test_stream_with_usage_in_final_event(litellm_acompletion, api_key, model_id, model, agenerator, alist):
768+
"""Test streaming correctly extracts usage when it IS present in final events.
769+
770+
This test ensures that when usage data IS available (e.g., with stream_options.include_usage=True),
771+
it is correctly extracted and included in the metadata event.
772+
"""
773+
774+
class MockStreamChunkWithoutUsage:
775+
"""Mock streaming chunk without usage."""
776+
777+
def __init__(self, choices=None):
778+
self.choices = choices or []
779+
780+
class MockStreamChunkWithUsage:
781+
"""Mock streaming chunk with usage (final event)."""
782+
783+
def __init__(self, usage):
784+
self.choices = []
785+
self.usage = usage
786+
787+
mock_delta = unittest.mock.Mock(content="Hi", tool_calls=None, reasoning_content=None)
788+
mock_event_1 = MockStreamChunkWithoutUsage(choices=[unittest.mock.Mock(finish_reason=None, delta=mock_delta)])
789+
mock_event_2 = MockStreamChunkWithoutUsage(choices=[unittest.mock.Mock(finish_reason="stop", delta=mock_delta)])
790+
791+
# Final event with usage data
792+
mock_usage = unittest.mock.Mock()
793+
mock_usage.prompt_tokens = 10
794+
mock_usage.completion_tokens = 5
795+
mock_usage.total_tokens = 15
796+
mock_usage.prompt_tokens_details = None
797+
mock_usage.cache_creation_input_tokens = None
798+
mock_event_3 = MockStreamChunkWithUsage(usage=mock_usage)
799+
800+
litellm_acompletion.side_effect = unittest.mock.AsyncMock(
801+
return_value=agenerator([mock_event_1, mock_event_2, mock_event_3])
802+
)
803+
804+
messages = [{"role": "user", "content": [{"type": "text", "text": "Hi"}]}]
805+
response = model.stream(messages)
806+
807+
tru_events = await alist(response)
808+
809+
# Verify metadata event is present with correct usage
810+
metadata_events = [e for e in tru_events if "metadata" in e]
811+
assert len(metadata_events) == 1
812+
assert metadata_events[0]["metadata"]["usage"]["inputTokens"] == 10
813+
assert metadata_events[0]["metadata"]["usage"]["outputTokens"] == 5
814+
assert metadata_events[0]["metadata"]["usage"]["totalTokens"] == 15

tests_integ/models/test_model_litellm.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,27 @@ def test_structured_output_unsupported_model(model, nested_weather):
236236
mock_schema.assert_not_called()
237237

238238

239+
@pytest.mark.parametrize("model_fixture", ["streaming_model", "non_streaming_model"])
240+
def test_streaming_returns_usage_metrics(model_fixture, request):
241+
"""Test that streaming returns usage metrics.
242+
243+
This test verifies that the streaming flow correctly extracts and returns
244+
usage data from the model response. This is a regression test for the bug
245+
where accessing 'usage' attribute on ModelResponseStream raised AttributeError.
246+
247+
Regression test for: 'ModelResponseStream' object has no attribute 'usage'
248+
"""
249+
model = request.getfixturevalue(model_fixture)
250+
agent = Agent(model=model)
251+
result = agent("Say hello")
252+
253+
# Verify usage metrics are returned - this would fail if streaming breaks
254+
assert result.metrics.accumulated_usage is not None
255+
assert result.metrics.accumulated_usage["inputTokens"] > 0
256+
assert result.metrics.accumulated_usage["outputTokens"] > 0
257+
assert result.metrics.accumulated_usage["totalTokens"] > 0
258+
259+
239260
@pytest.mark.asyncio
240261
async def test_cache_read_tokens_multi_turn(model):
241262
"""Integration test for cache read tokens in multi-turn conversation."""

0 commit comments

Comments
 (0)