From 4514d4dad9ff642d73864aec1e223f7b4a4d9ae4 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Thu, 26 Mar 2026 11:13:22 +0100
Subject: [PATCH 1/3] test(openai): Replace mocks with httpx types for
 streaming Completions

---
 tests/integrations/openai/test_openai.py | 684 +++++++++++++----------
 1 file changed, 393 insertions(+), 291 deletions(-)

diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index d06bfecbc0..d4ac9d1bbe 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -489,7 +489,12 @@ def tiktoken_encoding_if_installed():
     ],
 )
 def test_streaming_chat_completion_no_prompts(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    sentry_init,
+    capture_events,
+    send_default_pii,
+    include_prompts,
+    get_model_response,
+    server_side_event_chunks,
 ):
     sentry_init(
         integrations=[
@@ -504,61 +509,76 @@ def test_streaming_chat_completion_no_prompts(
     events = capture_events()
 
     client = OpenAI(api_key="z")
-    returned_stream = Stream(cast_to=None, response=None, client=client)
-    returned_stream._iterator = [
-        ChatCompletionChunk(
-            id="1",
-            choices=[
-                DeltaChoice(
-                    index=0, delta=ChoiceDelta(content="hel"), finish_reason=None
-                )
-            ],
-            created=100000,
-            model="model-id",
-            object="chat.completion.chunk",
-        ),
-        ChatCompletionChunk(
-            id="1",
-            choices=[
-                DeltaChoice(
-                    index=1, delta=ChoiceDelta(content="lo "), finish_reason=None
-                )
-            ],
-            created=100000,
-            model="model-id",
-            object="chat.completion.chunk",
-        ),
-        ChatCompletionChunk(
-            id="1",
-            choices=[
-                DeltaChoice(
-                    index=2, delta=ChoiceDelta(content="world"), finish_reason="stop"
-                )
-            ],
-            created=100000,
-            model="model-id",
-            object="chat.completion.chunk",
-        ),
-    ]
-
-    client.chat.completions._post = mock.Mock(return_value=returned_stream)
-    with start_transaction(name="openai tx"):
-        response_stream = client.chat.completions.create(
-            model="some-model",
-            messages=[
-                {"role": "system", "content": "You are a helpful assistant."},
-                {"role": "user", "content": "hello"},
+    returned_stream = get_model_response(
+        server_side_event_chunks(
+            [
+                ChatCompletionChunk(
+                    id="1",
+                    choices=[
+                        DeltaChoice(
+                            index=0,
+                            delta=ChoiceDelta(content="hel"),
+                            finish_reason=None,
+                        )
+                    ],
+                    created=100000,
+                    model="model-id",
+                    object="chat.completion.chunk",
+                ),
+                ChatCompletionChunk(
+                    id="1",
+                    choices=[
+                        DeltaChoice(
+                            index=1,
+                            delta=ChoiceDelta(content="lo "),
+                            finish_reason=None,
+                        )
+                    ],
+                    created=100000,
+                    model="model-id",
+                    object="chat.completion.chunk",
+                ),
+                ChatCompletionChunk(
+                    id="1",
+                    choices=[
+                        DeltaChoice(
+                            index=2,
+                            delta=ChoiceDelta(content="world"),
+                            finish_reason="stop",
+                        )
+                    ],
+                    created=100000,
+                    model="model-id",
+                    object="chat.completion.chunk",
+                ),
             ],
-            stream=True,
-            max_tokens=100,
-            presence_penalty=0.1,
-            frequency_penalty=0.2,
-            temperature=0.7,
-            top_p=0.9,
-        )
-        response_string = "".join(
-            map(lambda x: x.choices[0].delta.content, response_stream)
+            include_event_type=False,
         )
+    )
+
+    with mock.patch.object(
+        client.chat._client._client,
+        "send",
+        return_value=returned_stream,
+    ):
+        with start_transaction(name="openai tx"):
+            response_stream = client.chat.completions.create(
+                model="some-model",
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": "hello"},
+                ],
+                stream=True,
+                max_tokens=100,
+                presence_penalty=0.1,
+                frequency_penalty=0.2,
+                temperature=0.7,
+                top_p=0.9,
+            )
+            response_string = "".join(
+                map(lambda x: x.choices[0].delta.content, response_stream)
+            )
+
     assert response_string == "hello world"
     tx = events[0]
     assert tx["type"] == "transaction"
@@ -634,7 +654,14 @@ def test_streaming_chat_completion_no_prompts(
         ),
     ],
 )
-def test_streaming_chat_completion(sentry_init, capture_events, messages, request):
+def test_streaming_chat_completion(
+    sentry_init,
+    capture_events,
+    messages,
+    request,
+    get_model_response,
+    server_side_event_chunks,
+):
     sentry_init(
         integrations=[
             OpenAIIntegration(
@@ -648,58 +675,72 @@ def test_streaming_chat_completion(sentry_init, capture_events, messages, reques
     events = capture_events()
 
     client = OpenAI(api_key="z")
-    returned_stream = Stream(cast_to=None, response=None, client=client)
-    returned_stream._iterator = [
-        ChatCompletionChunk(
-            id="1",
-            choices=[
-                DeltaChoice(
-                    index=0, delta=ChoiceDelta(content="hel"), finish_reason=None
-                )
-            ],
-            created=100000,
-            model="model-id",
-            object="chat.completion.chunk",
-        ),
-        ChatCompletionChunk(
-            id="1",
-            choices=[
-                DeltaChoice(
-                    index=1, delta=ChoiceDelta(content="lo "), finish_reason=None
-                )
-            ],
-            created=100000,
-            model="model-id",
-            object="chat.completion.chunk",
-        ),
-        ChatCompletionChunk(
-            id="1",
-            choices=[
-                DeltaChoice(
-                    index=2, delta=ChoiceDelta(content="world"), finish_reason="stop"
-                )
+    returned_stream = get_model_response(
+        server_side_event_chunks(
+            [
+                ChatCompletionChunk(
+                    id="1",
+                    choices=[
+                        DeltaChoice(
+                            index=0,
+                            delta=ChoiceDelta(content="hel"),
+                            finish_reason=None,
+                        )
+                    ],
+                    created=100000,
+                    model="model-id",
+                    object="chat.completion.chunk",
+                ),
+                ChatCompletionChunk(
+                    id="1",
+                    choices=[
+                        DeltaChoice(
+                            index=1,
+                            delta=ChoiceDelta(content="lo "),
+                            finish_reason=None,
+                        )
+                    ],
+                    created=100000,
+                    model="model-id",
+                    object="chat.completion.chunk",
+                ),
+                ChatCompletionChunk(
+                    id="1",
+                    choices=[
+                        DeltaChoice(
+                            index=2,
+                            delta=ChoiceDelta(content="world"),
+                            finish_reason="stop",
+                        )
+                    ],
+                    created=100000,
+                    model="model-id",
+                    object="chat.completion.chunk",
+                ),
             ],
-            created=100000,
-            model="model-id",
-            object="chat.completion.chunk",
-        ),
-    ]
-
-    client.chat.completions._post = mock.Mock(return_value=returned_stream)
-    with start_transaction(name="openai tx"):
-        response_stream = client.chat.completions.create(
-            model="some-model",
-            messages=messages,
-            stream=True,
-            max_tokens=100,
-            presence_penalty=0.1,
-            frequency_penalty=0.2,
-            temperature=0.7,
-            top_p=0.9,
-        )
-        response_string = "".join(
-            map(lambda x: x.choices[0].delta.content, response_stream)
+            include_event_type=False,
         )
+    )
+
+    with mock.patch.object(
+        client.chat._client._client,
+        "send",
+        return_value=returned_stream,
+    ):
+        with start_transaction(name="openai tx"):
+            response_stream = client.chat.completions.create(
+                model="some-model",
+                messages=messages,
+                stream=True,
+                max_tokens=100,
+                presence_penalty=0.1,
+                frequency_penalty=0.2,
+                temperature=0.7,
+                top_p=0.9,
+            )
+            response_string = "".join(
+                map(lambda x: x.choices[0].delta.content, response_stream)
+            )
     assert response_string == "hello world"
     tx = events[0]
     assert tx["type"] == "transaction"
@@ -766,7 +807,13 @@ def test_streaming_chat_completion(sentry_init, capture_events, messages, reques
     ],
 )
 async def test_streaming_chat_completion_async_no_prompts(
-    sentry_init, capture_events, send_default_pii, include_prompts, async_iterator
+    sentry_init,
+    capture_events,
+    send_default_pii,
+    include_prompts,
+    get_model_response,
+    async_iterator,
+    server_side_event_chunks,
 ):
     sentry_init(
         integrations=[
@@ -781,66 +828,78 @@ async def test_streaming_chat_completion_async_no_prompts(
     events = capture_events()
 
     client = AsyncOpenAI(api_key="z")
-    returned_stream = AsyncStream(cast_to=None, response=None, client=client)
-    returned_stream._iterator = async_iterator(
-        [
-            ChatCompletionChunk(
-                id="1",
-                choices=[
-                    DeltaChoice(
-                        index=0, delta=ChoiceDelta(content="hel"), finish_reason=None
-                    )
-                ],
-                created=100000,
-                model="model-id",
-                object="chat.completion.chunk",
-            ),
-            ChatCompletionChunk(
-                id="1",
-                choices=[
-                    DeltaChoice(
-                        index=1, delta=ChoiceDelta(content="lo "), finish_reason=None
-                    )
-                ],
-                created=100000,
-                model="model-id",
-                object="chat.completion.chunk",
-            ),
-            ChatCompletionChunk(
-                id="1",
-                choices=[
-                    DeltaChoice(
-                        index=2,
-                        delta=ChoiceDelta(content="world"),
-                        finish_reason="stop",
-                    )
+    returned_stream = get_model_response(
+        async_iterator(
+            server_side_event_chunks(
+                [
+                    ChatCompletionChunk(
+                        id="1",
+                        choices=[
+                            DeltaChoice(
+                                index=0,
+                                delta=ChoiceDelta(content="hel"),
+                                finish_reason=None,
+                            )
+                        ],
+                        created=100000,
+                        model="model-id",
+                        object="chat.completion.chunk",
+                    ),
+                    ChatCompletionChunk(
+                        id="1",
+                        choices=[
+                            DeltaChoice(
+                                index=1,
+                                delta=ChoiceDelta(content="lo "),
+                                finish_reason=None,
+                            )
+                        ],
+                        created=100000,
+                        model="model-id",
+                        object="chat.completion.chunk",
+                    ),
+                    ChatCompletionChunk(
+                        id="1",
+                        choices=[
+                            DeltaChoice(
+                                index=2,
+                                delta=ChoiceDelta(content="world"),
+                                finish_reason="stop",
+                            )
+                        ],
+                        created=100000,
+                        model="model-id",
+                        object="chat.completion.chunk",
+                    ),
                 ],
-                created=100000,
-                model="model-id",
-                object="chat.completion.chunk",
-            ),
-        ]
+                include_event_type=False,
+            )
+        )
     )
 
-    client.chat.completions._post = AsyncMock(return_value=returned_stream)
-    with start_transaction(name="openai tx"):
-        response_stream = await client.chat.completions.create(
-            model="some-model",
-            messages=[
-                {"role": "system", "content": "You are a helpful assistant."},
-                {"role": "user", "content": "hello"},
-            ],
-            stream=True,
-            max_tokens=100,
-            presence_penalty=0.1,
-            frequency_penalty=0.2,
-            temperature=0.7,
-            top_p=0.9,
-        )
+    with mock.patch.object(
+        client.chat._client._client,
+        "send",
+        return_value=returned_stream,
+    ):
+        with start_transaction(name="openai tx"):
+            response_stream = await client.chat.completions.create(
+                model="some-model",
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": "hello"},
+                ],
+                stream=True,
+                max_tokens=100,
+                presence_penalty=0.1,
+                frequency_penalty=0.2,
+                temperature=0.7,
+                top_p=0.9,
+            )
 
-        response_string = ""
-        async for x in response_stream:
-            response_string += x.choices[0].delta.content
+            response_string = ""
+            async for x in response_stream:
+                response_string += x.choices[0].delta.content
 
     assert response_string == "hello world"
     tx = events[0]
@@ -920,7 +979,13 @@ async def test_streaming_chat_completion_async_no_prompts(
     ],
 )
 async def test_streaming_chat_completion_async(
-    sentry_init, capture_events, messages, request, async_iterator
+    sentry_init,
+    capture_events,
+    messages,
+    request,
+    get_model_response,
+    async_iterator,
+    server_side_event_chunks,
 ):
     sentry_init(
         integrations=[
@@ -935,63 +1000,76 @@ async def test_streaming_chat_completion_async(
     events = capture_events()
 
     client = AsyncOpenAI(api_key="z")
-    returned_stream = AsyncStream(cast_to=None, response=None, client=client)
-    returned_stream._iterator = async_iterator(
-        [
-            ChatCompletionChunk(
-                id="1",
-                choices=[
-                    DeltaChoice(
-                        index=0, delta=ChoiceDelta(content="hel"), finish_reason=None
-                    )
-                ],
-                created=100000,
-                model="model-id",
-                object="chat.completion.chunk",
-            ),
-            ChatCompletionChunk(
-                id="1",
-                choices=[
-                    DeltaChoice(
-                        index=1, delta=ChoiceDelta(content="lo "), finish_reason=None
-                    )
-                ],
-                created=100000,
-                model="model-id",
-                object="chat.completion.chunk",
-            ),
-            ChatCompletionChunk(
-                id="1",
-                choices=[
-                    DeltaChoice(
-                        index=2,
-                        delta=ChoiceDelta(content="world"),
-                        finish_reason="stop",
-                    )
+
+    returned_stream = get_model_response(
+        async_iterator(
+            server_side_event_chunks(
+                [
+                    ChatCompletionChunk(
+                        id="1",
+                        choices=[
+                            DeltaChoice(
+                                index=0,
+                                delta=ChoiceDelta(content="hel"),
+                                finish_reason=None,
+                            )
+                        ],
+                        created=100000,
+                        model="model-id",
+                        object="chat.completion.chunk",
+                    ),
+                    ChatCompletionChunk(
+                        id="1",
+                        choices=[
+                            DeltaChoice(
+                                index=1,
+                                delta=ChoiceDelta(content="lo "),
+                                finish_reason=None,
+                            )
+                        ],
+                        created=100000,
+                        model="model-id",
+                        object="chat.completion.chunk",
+                    ),
+                    ChatCompletionChunk(
+                        id="1",
+                        choices=[
+                            DeltaChoice(
+                                index=2,
+                                delta=ChoiceDelta(content="world"),
+                                finish_reason="stop",
+                            )
+                        ],
+                        created=100000,
+                        model="model-id",
+                        object="chat.completion.chunk",
+                    ),
                 ],
-                created=100000,
-                model="model-id",
-                object="chat.completion.chunk",
-            ),
-        ]
+                include_event_type=False,
+            )
+        )
     )
 
-    client.chat.completions._post = AsyncMock(return_value=returned_stream)
-    with start_transaction(name="openai tx"):
-        response_stream = await client.chat.completions.create(
-            model="some-model",
-            messages=messages,
-            stream=True,
-            max_tokens=100,
-            presence_penalty=0.1,
-            frequency_penalty=0.2,
-            temperature=0.7,
-            top_p=0.9,
-        )
+    with mock.patch.object(
+        client.chat._client._client,
+        "send",
+        return_value=returned_stream,
+    ):
+        with start_transaction(name="openai tx"):
+            response_stream = await client.chat.completions.create(
+                model="some-model",
+                messages=messages,
+                stream=True,
+                max_tokens=100,
+                presence_penalty=0.1,
+                frequency_penalty=0.2,
+                temperature=0.7,
+                top_p=0.9,
+            )
 
-        response_string = ""
-        async for x in response_stream:
-            response_string += x.choices[0].delta.content
+            response_string = ""
+            async for x in response_stream:
+                response_string += x.choices[0].delta.content
 
     assert response_string == "hello world"
     tx = events[0]
@@ -1516,7 +1594,9 @@ async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_events):
     assert event["spans"][0]["origin"] == "auto.ai.openai"
 
 
-def test_span_origin_streaming_chat(sentry_init, capture_events):
+def test_span_origin_streaming_chat(
+    sentry_init, capture_events, get_model_response, server_side_event_chunks
+):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
@@ -1524,42 +1604,52 @@ def test_span_origin_streaming_chat(sentry_init, capture_events):
     events = capture_events()
 
     client = OpenAI(api_key="z")
-    returned_stream = Stream(cast_to=None, response=None, client=client)
-    returned_stream._iterator = [
-        ChatCompletionChunk(
-            id="1",
-            choices=[
-                DeltaChoice(
-                    index=0, delta=ChoiceDelta(content="hel"), finish_reason=None
-                )
-            ],
-            created=100000,
-            model="model-id",
-            object="chat.completion.chunk",
-        ),
-        ChatCompletionChunk(
-            id="1",
-            choices=[
-                DeltaChoice(
-                    index=1, delta=ChoiceDelta(content="lo "), finish_reason=None
-                )
-            ],
-            created=100000,
-            model="model-id",
-            object="chat.completion.chunk",
-        ),
-        ChatCompletionChunk(
-            id="1",
-            choices=[
-                DeltaChoice(
-                    index=2, delta=ChoiceDelta(content="world"), finish_reason="stop"
-                )
+    returned_stream = get_model_response(
+        server_side_event_chunks(
+            [
+                ChatCompletionChunk(
+                    id="1",
+                    choices=[
+                        DeltaChoice(
+                            index=0,
+                            delta=ChoiceDelta(content="hel"),
+                            finish_reason=None,
+                        )
+                    ],
+                    created=100000,
+                    model="model-id",
+                    object="chat.completion.chunk",
+                ),
+                ChatCompletionChunk(
+                    id="1",
+                    choices=[
+                        DeltaChoice(
+                            index=1,
+                            delta=ChoiceDelta(content="lo "),
+                            finish_reason=None,
+                        )
+                    ],
+                    created=100000,
+                    model="model-id",
+                    object="chat.completion.chunk",
+                ),
+                ChatCompletionChunk(
+                    id="1",
+                    choices=[
+                        DeltaChoice(
+                            index=2,
+                            delta=ChoiceDelta(content="world"),
+                            finish_reason="stop",
+                        )
+                    ],
+                    created=100000,
+                    model="model-id",
+                    object="chat.completion.chunk",
+                ),
             ],
-            created=100000,
-            model="model-id",
-            object="chat.completion.chunk",
-        ),
-    ]
+            include_event_type=False,
+        )
+    )
 
     client.chat.completions._post = mock.Mock(return_value=returned_stream)
     with start_transaction(name="openai tx"):
@@ -1577,7 +1667,11 @@ def test_span_origin_streaming_chat(sentry_init, capture_events):
 
 @pytest.mark.asyncio
 async def test_span_origin_streaming_chat_async(
-    sentry_init, capture_events, async_iterator
+    sentry_init,
+    capture_events,
+    get_model_response,
+    async_iterator,
+    server_side_event_chunks,
 ):
     sentry_init(
         integrations=[OpenAIIntegration()],
@@ -1586,45 +1680,53 @@ async def test_span_origin_streaming_chat_async(
     events = capture_events()
 
     client = AsyncOpenAI(api_key="z")
-    returned_stream = AsyncStream(cast_to=None, response=None, client=client)
-    returned_stream._iterator = async_iterator(
-        [
-            ChatCompletionChunk(
-                id="1",
-                choices=[
-                    DeltaChoice(
-                        index=0, delta=ChoiceDelta(content="hel"), finish_reason=None
-                    )
-                ],
-                created=100000,
-                model="model-id",
-                object="chat.completion.chunk",
-            ),
-            ChatCompletionChunk(
-                id="1",
-                choices=[
-                    DeltaChoice(
-                        index=1, delta=ChoiceDelta(content="lo "), finish_reason=None
-                    )
-                ],
-                created=100000,
-                model="model-id",
-                object="chat.completion.chunk",
-            ),
-            ChatCompletionChunk(
-                id="1",
-                choices=[
-                    DeltaChoice(
-                        index=2,
-                        delta=ChoiceDelta(content="world"),
-                        finish_reason="stop",
-                    )
+    returned_stream = get_model_response(
+        async_iterator(
+            server_side_event_chunks(
+                [
+                    ChatCompletionChunk(
+                        id="1",
+                        choices=[
+                            DeltaChoice(
+                                index=0,
+                                delta=ChoiceDelta(content="hel"),
+                                finish_reason=None,
+                            )
+                        ],
+                        created=100000,
+                        model="model-id",
+                        object="chat.completion.chunk",
+                    ),
+                    ChatCompletionChunk(
+                        id="1",
+                        choices=[
+                            DeltaChoice(
+                                index=1,
+                                delta=ChoiceDelta(content="lo "),
+                                finish_reason=None,
+                            )
+                        ],
+                        created=100000,
+                        model="model-id",
+                        object="chat.completion.chunk",
+                    ),
+                    ChatCompletionChunk(
+                        id="1",
+                        choices=[
+                            DeltaChoice(
+                                index=2,
+                                delta=ChoiceDelta(content="world"),
+                                finish_reason="stop",
+                            )
+                        ],
+                        created=100000,
+                        model="model-id",
+                        object="chat.completion.chunk",
+                    ),
                 ],
-                created=100000,
-                model="model-id",
-                object="chat.completion.chunk",
-            ),
-        ]
+                include_event_type=False,
+            )
+        )
     )
 
     client.chat.completions._post = AsyncMock(return_value=returned_stream)

From a2748fd4604aed3e6651baade1d7db706dd46e32 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Thu, 26 Mar 2026 11:28:27 +0100
Subject: [PATCH 2/3] wip

---
 tests/integrations/openai/test_openai.py | 52 ++++++++++++++++++++----
 1 file changed, 44 insertions(+), 8 deletions(-)

diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index d4ac9d1bbe..5c78d6b552 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -16,6 +16,7 @@
 
 from openai import AsyncOpenAI, OpenAI, AsyncStream, Stream, OpenAIError
 from openai.types import CompletionUsage, CreateEmbeddingResponse, Embedding
+from openai.types.completion_usage import CompletionTokensDetails, PromptTokensDetails
 from openai.types.chat import ChatCompletion, ChatCompletionMessage, ChatCompletionChunk
 from openai.types.chat.chat_completion import Choice
 from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice as DeltaChoice
@@ -551,6 +552,28 @@ def test_streaming_chat_completion_no_prompts(
                     model="model-id",
                     object="chat.completion.chunk",
                 ),
+                ChatCompletionChunk(
+                    id="1",
+                    choices=[],
+                    created=100000,
+                    model="model-id",
+                    object="chat.completion.chunk",
+                    usage=CompletionUsage(
+                        prompt_tokens=10,
+                        completion_tokens=5,
+                        total_tokens=15,
+                        prompt_tokens_details=PromptTokensDetails(
+                            audio_tokens=10,
+                            cached_tokens=20,
+                        ),
+                        completion_tokens_details=CompletionTokensDetails(
+                            reasoning_tokens=5,
+                            audio_tokens=3,
+                            accepted_prediction_tokens=7,
+                            rejected_prediction_tokens=2,
+                        ),
+                    ),
+                ),
             ],
             include_event_type=False,
         )
@@ -575,11 +598,9 @@ def test_streaming_chat_completion_no_prompts(
                 temperature=0.7,
                 top_p=0.9,
             )
-            response_string = "".join(
-                map(lambda x: x.choices[0].delta.content, response_stream)
-            )
+            for _ in response_stream:
+                pass
 
-    assert response_string == "hello world"
     tx = events[0]
     assert tx["type"] == "transaction"
     span = tx["spans"][0]
@@ -596,6 +617,10 @@ def test_streaming_chat_completion_no_prompts(
 
     assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id"
 
+    assert span["data"]["gen_ai.usage.output_tokens"] == 2
+    assert span["data"]["gen_ai.usage.input_tokens"] == 7
+    assert span["data"]["gen_ai.usage.total_tokens"] == 9
+
     assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"]
     assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
     assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
@@ -717,6 +742,18 @@ def test_streaming_chat_completion(
                     model="model-id",
                     object="chat.completion.chunk",
                 ),
+                ChatCompletionChunk(
+                    id="1",
+                    choices=[],
+                    created=100000,
+                    model="model-id",
+                    object="chat.completion.chunk",
+                    usage=CompletionUsage(
+                        prompt_tokens=10,
+                        completion_tokens=5,
+                        total_tokens=15,
+                    ),
+                ),
             ],
             include_event_type=False,
         )
@@ -738,10 +775,9 @@ def test_streaming_chat_completion(
                 temperature=0.7,
                 top_p=0.9,
             )
-            response_string = "".join(
-                map(lambda x: x.choices[0].delta.content, response_stream)
-            )
-    assert response_string == "hello world"
+            for _ in response_stream:
+                pass
+
     tx = events[0]
     assert tx["type"] == "transaction"
     span = tx["spans"][0]

From e05ea81bf3ec1c2e008dd9ae2f60b7aa6ae211f6 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Thu, 26 Mar 2026 11:38:32 +0100
Subject: [PATCH 3/3] .

---
 tests/integrations/openai/test_openai.py | 174 ++++++++++-------------
 1 file changed, 75 insertions(+), 99 deletions(-)

diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index d4ac9d1bbe..0ca1a17ead 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -1594,9 +1594,7 @@ async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_events):
     assert event["spans"][0]["origin"] == "auto.ai.openai"
 
 
-def test_span_origin_streaming_chat(
-    sentry_init, capture_events, get_model_response, server_side_event_chunks
-):
+def test_span_origin_streaming_chat(sentry_init, capture_events):
     sentry_init(
         integrations=[OpenAIIntegration()],
         traces_sample_rate=1.0,
@@ -1604,52 +1602,42 @@ def test_span_origin_streaming_chat(
     events = capture_events()
 
     client = OpenAI(api_key="z")
-    returned_stream = get_model_response(
-        server_side_event_chunks(
-            [
-                ChatCompletionChunk(
-                    id="1",
-                    choices=[
-                        DeltaChoice(
-                            index=0,
-                            delta=ChoiceDelta(content="hel"),
-                            finish_reason=None,
-                        )
-                    ],
-                    created=100000,
-                    model="model-id",
-                    object="chat.completion.chunk",
-                ),
-                ChatCompletionChunk(
-                    id="1",
-                    choices=[
-                        DeltaChoice(
-                            index=1,
-                            delta=ChoiceDelta(content="lo "),
-                            finish_reason=None,
-                        )
-                    ],
-                    created=100000,
-                    model="model-id",
-                    object="chat.completion.chunk",
-                ),
-                ChatCompletionChunk(
-                    id="1",
-                    choices=[
-                        DeltaChoice(
-                            index=2,
-                            delta=ChoiceDelta(content="world"),
-                            finish_reason="stop",
-                        )
-                    ],
-                    created=100000,
-                    model="model-id",
-                    object="chat.completion.chunk",
-                ),
+    returned_stream = Stream(cast_to=None, response=None, client=client)
+    returned_stream._iterator = [
+        ChatCompletionChunk(
+            id="1",
+            choices=[
+                DeltaChoice(
+                    index=0, delta=ChoiceDelta(content="hel"), finish_reason=None
+                )
             ],
-            include_event_type=False,
-        )
-    )
+            created=100000,
+            model="model-id",
+            object="chat.completion.chunk",
+        ),
+        ChatCompletionChunk(
+            id="1",
+            choices=[
+                DeltaChoice(
+                    index=1, delta=ChoiceDelta(content="lo "), finish_reason=None
+                )
+            ],
+            created=100000,
+            model="model-id",
+            object="chat.completion.chunk",
+        ),
+        ChatCompletionChunk(
+            id="1",
+            choices=[
+                DeltaChoice(
+                    index=2, delta=ChoiceDelta(content="world"), finish_reason="stop"
+                )
+            ],
+            created=100000,
+            model="model-id",
+            object="chat.completion.chunk",
+        ),
+    ]
 
     client.chat.completions._post = mock.Mock(return_value=returned_stream)
     with start_transaction(name="openai tx"):
@@ -1667,11 +1655,7 @@ def test_span_origin_streaming_chat(
 
 @pytest.mark.asyncio
 async def test_span_origin_streaming_chat_async(
-    sentry_init,
-    capture_events,
-    get_model_response,
-    async_iterator,
-    server_side_event_chunks,
+    sentry_init, capture_events, async_iterator
 ):
     sentry_init(
         integrations=[OpenAIIntegration()],
@@ -1680,53 +1664,45 @@ async def test_span_origin_streaming_chat_async(
     events = capture_events()
 
     client = AsyncOpenAI(api_key="z")
-    returned_stream = get_model_response(
-        async_iterator(
-            server_side_event_chunks(
-                [
-                    ChatCompletionChunk(
-                        id="1",
-                        choices=[
-                            DeltaChoice(
-                                index=0,
-                                delta=ChoiceDelta(content="hel"),
-                                finish_reason=None,
-                            )
-                        ],
-                        created=100000,
-                        model="model-id",
-                        object="chat.completion.chunk",
-                    ),
-                    ChatCompletionChunk(
-                        id="1",
-                        choices=[
-                            DeltaChoice(
-                                index=1,
-                                delta=ChoiceDelta(content="lo "),
-                                finish_reason=None,
-                            )
-                        ],
-                        created=100000,
-                        model="model-id",
-                        object="chat.completion.chunk",
-                    ),
-                    ChatCompletionChunk(
-                        id="1",
-                        choices=[
-                            DeltaChoice(
-                                index=2,
-                                delta=ChoiceDelta(content="world"),
-                                finish_reason="stop",
-                            )
-                        ],
-                        created=100000,
-                        model="model-id",
-                        object="chat.completion.chunk",
-                    ),
+    returned_stream = AsyncStream(cast_to=None, response=None, client=client)
+    returned_stream._iterator = async_iterator(
+        [
+            ChatCompletionChunk(
+                id="1",
+                choices=[
+                    DeltaChoice(
+                        index=0, delta=ChoiceDelta(content="hel"), finish_reason=None
+                    )
                 ],
-                include_event_type=False,
-            )
-        )
+                created=100000,
+                model="model-id",
+                object="chat.completion.chunk",
+            ),
+            ChatCompletionChunk(
+                id="1",
+                choices=[
+                    DeltaChoice(
+                        index=1, delta=ChoiceDelta(content="lo "), finish_reason=None
+                    )
+                ],
+                created=100000,
+                model="model-id",
+                object="chat.completion.chunk",
+            ),
+            ChatCompletionChunk(
+                id="1",
+                choices=[
+                    DeltaChoice(
+                        index=2,
+                        delta=ChoiceDelta(content="world"),
+                        finish_reason="stop",
+                    )
+                ],
+                created=100000,
+                model="model-id",
+                object="chat.completion.chunk",
+            ),
+        ]
     )
 
     client.chat.completions._post = AsyncMock(return_value=returned_stream)