From 29b8891f149769db9208e0acf7ebcc64f716d732 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Mon, 30 Mar 2026 11:05:57 +0200
Subject: [PATCH 1/8] fix(openai): Only wrap types with _iterator for streamed
 responses

---
 sentry_sdk/integrations/openai.py | 436 +++++++++++++++++-------------
 1 file changed, 248 insertions(+), 188 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 073f4546c6..9e2c1809e2 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -51,6 +51,7 @@
     from sentry_sdk._types import TextPart
 
     from openai.types.responses import ResponseInputParam, SequenceNotStr
+    from openai.types.responses import ResponseStreamEvent
     from openai import Omit
 
 try:
@@ -67,6 +68,8 @@
     from openai.resources.chat.completions import Completions, AsyncCompletions
     from openai.resources import Embeddings, AsyncEmbeddings
 
+    from openai import Stream, AsyncStream
+
     if TYPE_CHECKING:
         from openai.types.chat import (
             ChatCompletionMessageParam,
@@ -607,116 +610,258 @@ def _set_completions_api_output_data(
     )
 
 
-def _set_streaming_completions_api_output_data(
+def _wrap_synchronous_completions_chunk_iterator(
     span: "Span",
-    response: "Any",
-    kwargs: "dict[str, Any]",
     integration: "OpenAIIntegration",
-    start_time: "Optional[float]" = None,
-    finish_span: bool = True,
-) -> None:
-    messages = kwargs.get("messages")
+    start_time: "float",
+    messages: "Iterable[ChatCompletionMessageParam]",
+    response: "Stream[ChatCompletionChunk]",
+    old_iterator: "Iterator[ChatCompletionChunk]",
+    finish_span: "bool",
+):
+    ttft = None
+    data_buf: "list[list[str]]" = []  # one for each choice
 
-    if messages is not None and isinstance(messages, str):
-        messages = [messages]
+    for x in old_iterator:
+        span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, x.model)
 
-    ttft: "Optional[float]" = None
+        with capture_internal_exceptions():
+            if hasattr(x, "choices"):
+                choice_index = 0
+                for choice in x.choices:
+                    if hasattr(choice, "delta") and hasattr(choice.delta, "content"):
+                        if start_time is not None and ttft is None:
+                            ttft = time.perf_counter() - start_time
+                        content = choice.delta.content
+                        if len(data_buf) <= choice_index:
+                            data_buf.append([])
+                        data_buf[choice_index].append(content or "")
+                    choice_index += 1
+
+        yield x
+
+    with capture_internal_exceptions():
+        if ttft is not None:
+            set_data_normalized(
+                span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
+            )
+        if len(data_buf) > 0:
+            all_responses = ["".join(chunk) for chunk in data_buf]
+            if should_send_default_pii() and integration.include_prompts:
+                set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses)
+            _calculate_token_usage(
+                messages,
+                response,
+                span,
+                all_responses,
+                integration.count_tokens,
+            )
+
+    if finish_span:
+        span.__exit__(None, None, None)
+
+
+async def _wrap_asynchronous_completions_chunk_iterator(
+    span: "Span",
+    integration: "OpenAIIntegration",
+    start_time: "float",
+    messages: "Iterable[ChatCompletionMessageParam]",
+    response: "AsyncStream[ChatCompletionChunk]",
+    old_iterator: "AsyncIterator[ChatCompletionChunk]",
+    finish_span: "bool",
+):
+    start_time = time.perf_counter()
+    ttft = None
     data_buf: "list[list[str]]" = []  # one for each choice
 
-    old_iterator = response._iterator
-
-    def new_iterator() -> "Iterator[ChatCompletionChunk]":
-        nonlocal ttft
-        for x in old_iterator:
-            span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, x.model)
-
-            with capture_internal_exceptions():
-                if hasattr(x, "choices"):
-                    choice_index = 0
-                    for choice in x.choices:
-                        if hasattr(choice, "delta") and hasattr(
-                            choice.delta, "content"
-                        ):
-                            if start_time is not None and ttft is None:
-                                ttft = time.perf_counter() - start_time
-                            content = choice.delta.content
-                            if len(data_buf) <= choice_index:
-                                data_buf.append([])
-                            data_buf[choice_index].append(content or "")
-                        choice_index += 1
-
-            yield x
+    async for x in old_iterator:
+        span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, x.model)
 
         with capture_internal_exceptions():
-            if ttft is not None:
-                set_data_normalized(
-                    span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
+            if hasattr(x, "choices"):
+                choice_index = 0
+                for choice in x.choices:
+                    if hasattr(choice, "delta") and hasattr(choice.delta, "content"):
+                        if start_time is not None and ttft is None:
+                            ttft = time.perf_counter() - start_time
+                        content = choice.delta.content
+                        if len(data_buf) <= choice_index:
+                            data_buf.append([])
+                        data_buf[choice_index].append(content or "")
+                    choice_index += 1
+
+        yield x
+
+    with capture_internal_exceptions():
+        if ttft is not None:
+            set_data_normalized(
+                span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
+            )
+        if len(data_buf) > 0:
+            all_responses = ["".join(chunk) for chunk in data_buf]
+            if should_send_default_pii() and integration.include_prompts:
+                set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses)
+            _calculate_token_usage(
+                messages,
+                response,
+                span,
+                all_responses,
+                integration.count_tokens,
+            )
+
+    if finish_span:
+        span.__exit__(None, None, None)
+
+
+def _wrap_synchronous_responses_event_iterator(
+    span: "Span",
+    integration: "OpenAIIntegration",
+    start_time: "float",
+    input: "Optional[Union[str, ResponseInputParam]]",
+    response: "Stream[ResponseStreamEvent]",
+    old_iterator: "Iterator[ResponseStreamEvent]",
+    finish_span: "bool",
+):
+    start_time = time.perf_counter()
+    ttft = None
+    data_buf: "list[list[str]]" = []  # one for each choice
+
+    count_tokens_manually = True
+    for x in old_iterator:
+        with capture_internal_exceptions():
+            if hasattr(x, "delta"):
+                if start_time is not None and ttft is None:
+                    ttft = time.perf_counter() - start_time
+                if len(data_buf) == 0:
+                    data_buf.append([])
+                data_buf[0].append(x.delta or "")
+
+            if isinstance(x, ResponseCompletedEvent):
+                span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, x.response.model)
+
+                _calculate_token_usage(
+                    input,
+                    x.response,
+                    span,
+                    None,
+                    integration.count_tokens,
                 )
-            if len(data_buf) > 0:
-                all_responses = ["".join(chunk) for chunk in data_buf]
-                if should_send_default_pii() and integration.include_prompts:
-                    set_data_normalized(
-                        span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
-                    )
+                count_tokens_manually = False
+
+        yield x
+
+    with capture_internal_exceptions():
+        if ttft is not None:
+            set_data_normalized(
+                span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
+            )
+        if len(data_buf) > 0:
+            all_responses = ["".join(chunk) for chunk in data_buf]
+            if should_send_default_pii() and integration.include_prompts:
+                set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses)
+            if count_tokens_manually:
                 _calculate_token_usage(
-                    messages,
+                    input,
                     response,
                     span,
                     all_responses,
                     integration.count_tokens,
                 )
 
-        if finish_span:
-            span.__exit__(None, None, None)
+    if finish_span:
+        span.__exit__(None, None, None)
 
-    async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
-        nonlocal ttft
-        async for x in old_iterator:
-            span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, x.model)
-
-            with capture_internal_exceptions():
-                if hasattr(x, "choices"):
-                    choice_index = 0
-                    for choice in x.choices:
-                        if hasattr(choice, "delta") and hasattr(
-                            choice.delta, "content"
-                        ):
-                            if start_time is not None and ttft is None:
-                                ttft = time.perf_counter() - start_time
-                            content = choice.delta.content
-                            if len(data_buf) <= choice_index:
-                                data_buf.append([])
-                            data_buf[choice_index].append(content or "")
-                        choice_index += 1
-
-            yield x
 
+async def _wrap_asynchronous_responses_event_iterator(
+    span: "Span",
+    integration: "OpenAIIntegration",
+    start_time: "float",
+    input: "Optional[Union[str, ResponseInputParam]]",
+    response: "AsyncStream[ResponseStreamEvent]",
+    old_iterator: "AsyncIterator[ResponseStreamEvent]",
+    finish_span: "bool",
+):
+    ttft: "Optional[float]" = None
+    data_buf: "list[list[str]]" = []  # one for each choice
+
+    count_tokens_manually = True
+    async for x in old_iterator:
         with capture_internal_exceptions():
-            if ttft is not None:
-                set_data_normalized(
-                    span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
+            if hasattr(x, "delta"):
+                if start_time is not None and ttft is None:
+                    ttft = time.perf_counter() - start_time
+                if len(data_buf) == 0:
+                    data_buf.append([])
+                data_buf[0].append(x.delta or "")
+
+            if isinstance(x, ResponseCompletedEvent):
+                span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, x.response.model)
+
+                _calculate_token_usage(
+                    input,
+                    x.response,
+                    span,
+                    None,
+                    integration.count_tokens,
                 )
-            if len(data_buf) > 0:
-                all_responses = ["".join(chunk) for chunk in data_buf]
-                if should_send_default_pii() and integration.include_prompts:
-                    set_data_normalized(
-                        span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
-                    )
+                count_tokens_manually = False
+
+        yield x
+
+    with capture_internal_exceptions():
+        if ttft is not None:
+            set_data_normalized(
+                span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
+            )
+        if len(data_buf) > 0:
+            all_responses = ["".join(chunk) for chunk in data_buf]
+            if should_send_default_pii() and integration.include_prompts:
+                set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses)
+            if count_tokens_manually:
                 _calculate_token_usage(
-                    messages,
+                    input,
                     response,
                     span,
                     all_responses,
                     integration.count_tokens,
                 )
+    if finish_span:
+        span.__exit__(None, None, None)
 
-        if finish_span:
-            span.__exit__(None, None, None)
 
-    if str(type(response._iterator)) == "<class 'async_generator'>":
-        response._iterator = new_iterator_async()
-    else:
-        response._iterator = new_iterator()
+def _set_streaming_completions_api_output_data(
+    span: "Span",
+    response: "Any",
+    kwargs: "dict[str, Any]",
+    integration: "OpenAIIntegration",
+    start_time: "float",
+    finish_span: bool = True,
+) -> None:
+    messages = kwargs.get("messages")
+
+    if messages is not None and isinstance(messages, str):
+        messages = [messages]
+
+    if isinstance(response, Stream):
+        response._iterator = _wrap_synchronous_completions_chunk_iterator(
+            span=span,
+            integration=integration,
+            start_time=start_time,
+            messages=messages,
+            response=response,
+            old_iterator=response._iterator,
+            finish_span=finish_span,
+        )
+    elif isinstance(response, AsyncStream):
+        response._iterator = _wrap_asynchronous_completions_chunk_iterator(
+            span=span,
+            integration=integration,
+            start_time=start_time,
+            messages=messages,
+            response=response,
+            old_iterator=response._iterator,
+            finish_span=finish_span,
+        )
 
 
 def _set_responses_api_output_data(
@@ -745,7 +890,7 @@ def _set_streaming_responses_api_output_data(
     response: "Any",
     kwargs: "dict[str, Any]",
     integration: "OpenAIIntegration",
-    start_time: "Optional[float]" = None,
+    start_time: "float",
     finish_span: bool = True,
 ) -> None:
     input = kwargs.get("input")
@@ -753,112 +898,27 @@ def _set_streaming_responses_api_output_data(
     if input is not None and isinstance(input, str):
         input = [input]
 
-    ttft: "Optional[float]" = None
-    data_buf: "list[list[str]]" = []  # one for each choice
-
-    old_iterator = response._iterator
-
-    def new_iterator() -> "Iterator[ChatCompletionChunk]":
-        nonlocal ttft
-        count_tokens_manually = True
-        for x in old_iterator:
-            with capture_internal_exceptions():
-                if hasattr(x, "delta"):
-                    if start_time is not None and ttft is None:
-                        ttft = time.perf_counter() - start_time
-                    if len(data_buf) == 0:
-                        data_buf.append([])
-                    data_buf[0].append(x.delta or "")
-
-                if isinstance(x, ResponseCompletedEvent):
-                    span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, x.response.model)
-
-                    _calculate_token_usage(
-                        input,
-                        x.response,
-                        span,
-                        None,
-                        integration.count_tokens,
-                    )
-                    count_tokens_manually = False
-
-            yield x
-
-        with capture_internal_exceptions():
-            if ttft is not None:
-                set_data_normalized(
-                    span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
-                )
-            if len(data_buf) > 0:
-                all_responses = ["".join(chunk) for chunk in data_buf]
-                if should_send_default_pii() and integration.include_prompts:
-                    set_data_normalized(
-                        span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
-                    )
-                if count_tokens_manually:
-                    _calculate_token_usage(
-                        input,
-                        response,
-                        span,
-                        all_responses,
-                        integration.count_tokens,
-                    )
-
-        if finish_span:
-            span.__exit__(None, None, None)
-
-    async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
-        nonlocal ttft
-        count_tokens_manually = True
-        async for x in old_iterator:
-            with capture_internal_exceptions():
-                if hasattr(x, "delta"):
-                    if start_time is not None and ttft is None:
-                        ttft = time.perf_counter() - start_time
-                    if len(data_buf) == 0:
-                        data_buf.append([])
-                    data_buf[0].append(x.delta or "")
-
-                if isinstance(x, ResponseCompletedEvent):
-                    span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, x.response.model)
-
-                    _calculate_token_usage(
-                        input,
-                        x.response,
-                        span,
-                        None,
-                        integration.count_tokens,
-                    )
-                    count_tokens_manually = False
-
-            yield x
-
-        with capture_internal_exceptions():
-            if ttft is not None:
-                set_data_normalized(
-                    span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
-                )
-            if len(data_buf) > 0:
-                all_responses = ["".join(chunk) for chunk in data_buf]
-                if should_send_default_pii() and integration.include_prompts:
-                    set_data_normalized(
-                        span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
-                    )
-                if count_tokens_manually:
-                    _calculate_token_usage(
-                        input,
-                        response,
-                        span,
-                        all_responses,
-                        integration.count_tokens,
-                    )
-        if finish_span:
-            span.__exit__(None, None, None)
+    if isinstance(response, Stream):
+        response._iterator = _wrap_synchronous_responses_event_iterator(
+            span=span,
+            integration=integration,
+            start_time=start_time,
+            input=input,
+            response=response,
+            old_iterator=response._iterator,
+            finish_span=finish_span,
+        )
 
-    if str(type(response._iterator)) == "<class 'async_generator'>":
-        response._iterator = new_iterator_async()
-    else:
-        response._iterator = new_iterator()
+    elif isinstance(response, AsyncStream):
+        response._iterator = _wrap_asynchronous_responses_event_iterator(
+            span=span,
+            integration=integration,
+            start_time=start_time,
+            input=input,
+            response=response,
+            old_iterator=response._iterator,
+            finish_span=finish_span,
+        )
 
 
 def _set_embeddings_output_data(

From 0ab8c46b8b15a881958f1d87d00b152dbee200f1 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Mon, 30 Mar 2026 11:08:23 +0200
Subject: [PATCH 2/8] .

---
 sentry_sdk/integrations/openai.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 9e2c1809e2..caef788301 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -834,7 +834,7 @@ def _set_streaming_completions_api_output_data(
     response: "Any",
     kwargs: "dict[str, Any]",
     integration: "OpenAIIntegration",
-    start_time: "float",
+    start_time: "Optional[float]" = None,
     finish_span: bool = True,
 ) -> None:
     messages = kwargs.get("messages")
@@ -890,7 +890,7 @@ def _set_streaming_responses_api_output_data(
     response: "Any",
     kwargs: "dict[str, Any]",
     integration: "OpenAIIntegration",
-    start_time: "float",
+    start_time: "Optional[float]" = None,
     finish_span: bool = True,
 ) -> None:
     input = kwargs.get("input")

From 510b32063b1cefd560224537b5b6342c1644f116 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Mon, 30 Mar 2026 11:15:06 +0200
Subject: [PATCH 3/8] docs and stop overwriting start_time

---
 sentry_sdk/integrations/openai.py | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index caef788301..43163942e6 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -613,12 +613,17 @@ def _set_completions_api_output_data(
 def _wrap_synchronous_completions_chunk_iterator(
     span: "Span",
     integration: "OpenAIIntegration",
-    start_time: "float",
+    start_time: "Optional[float]",
     messages: "Iterable[ChatCompletionMessageParam]",
     response: "Stream[ChatCompletionChunk]",
     old_iterator: "Iterator[ChatCompletionChunk]",
     finish_span: "bool",
 ):
+    """
+    Sets information received while iterating the response stream on the AI Client Span.
+    Compute token count based on inputs and outputs using tiktoken if token counts are not in the model response.
+    Responsible for closing the AI Client Span if instructed to by the `finish_span` argument.
+    """
     ttft = None
     data_buf: "list[list[str]]" = []  # one for each choice
 
@@ -664,13 +669,17 @@ def _wrap_synchronous_completions_chunk_iterator(
 async def _wrap_asynchronous_completions_chunk_iterator(
     span: "Span",
     integration: "OpenAIIntegration",
-    start_time: "float",
+    start_time: "Optional[float]",
     messages: "Iterable[ChatCompletionMessageParam]",
     response: "AsyncStream[ChatCompletionChunk]",
     old_iterator: "AsyncIterator[ChatCompletionChunk]",
     finish_span: "bool",
 ):
-    start_time = time.perf_counter()
+    """
+    Sets information received while iterating the response stream on the AI Client Span.
+    Compute token count based on inputs and outputs using tiktoken if token counts are not in the model response.
+    Responsible for closing the AI Client Span if instructed to by the `finish_span` argument.
+    """
     ttft = None
     data_buf: "list[list[str]]" = []  # one for each choice
 
@@ -716,13 +725,17 @@ async def _wrap_asynchronous_completions_chunk_iterator(
 def _wrap_synchronous_responses_event_iterator(
     span: "Span",
     integration: "OpenAIIntegration",
-    start_time: "float",
+    start_time: "Optional[float]",
     input: "Optional[Union[str, ResponseInputParam]]",
     response: "Stream[ResponseStreamEvent]",
     old_iterator: "Iterator[ResponseStreamEvent]",
     finish_span: "bool",
 ):
-    start_time = time.perf_counter()
+    """
+    Sets information received while iterating the response stream on the AI Client Span.
+    Compute token count based on inputs and outputs using tiktoken if token counts are not in the model response.
+    Responsible for closing the AI Client Span if instructed to by the `finish_span` argument.
+    """
     ttft = None
     data_buf: "list[list[str]]" = []  # one for each choice
 
@@ -775,12 +788,17 @@ def _wrap_synchronous_responses_event_iterator(
 async def _wrap_asynchronous_responses_event_iterator(
     span: "Span",
     integration: "OpenAIIntegration",
-    start_time: "float",
+    start_time: "Optional[float]",
     input: "Optional[Union[str, ResponseInputParam]]",
     response: "AsyncStream[ResponseStreamEvent]",
     old_iterator: "AsyncIterator[ResponseStreamEvent]",
     finish_span: "bool",
 ):
+    """
+    Sets information received while iterating the response stream on the AI Client Span.
+    Compute token count based on inputs and outputs using tiktoken if token counts are not in the model response.
+    Responsible for closing the AI Client Span if instructed to by the `finish_span` argument.
+    """
     ttft: "Optional[float]" = None
     data_buf: "list[list[str]]" = []  # one for each choice
 

From 1a24a66153699191f6cf52c143c6529eb8a7dc5b Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Mon, 30 Mar 2026 11:19:35 +0200
Subject: [PATCH 4/8] .

---
 sentry_sdk/integrations/openai.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 43163942e6..b8d0e94e03 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -614,11 +614,11 @@ def _wrap_synchronous_completions_chunk_iterator(
     span: "Span",
     integration: "OpenAIIntegration",
     start_time: "Optional[float]",
-    messages: "Iterable[ChatCompletionMessageParam]",
+    messages: "Optional[Iterable[ChatCompletionMessageParam]]",
     response: "Stream[ChatCompletionChunk]",
     old_iterator: "Iterator[ChatCompletionChunk]",
     finish_span: "bool",
-):
+) -> "Iterator[ChatCompletionChunk]":
     """
     Sets information received while iterating the response stream on the AI Client Span.
     Compute token count based on inputs and outputs using tiktoken if token counts are not in the model response.
@@ -670,11 +670,11 @@ async def _wrap_asynchronous_completions_chunk_iterator(
     span: "Span",
     integration: "OpenAIIntegration",
     start_time: "Optional[float]",
-    messages: "Iterable[ChatCompletionMessageParam]",
+    messages: "Optional[Iterable[ChatCompletionMessageParam]]",
     response: "AsyncStream[ChatCompletionChunk]",
     old_iterator: "AsyncIterator[ChatCompletionChunk]",
     finish_span: "bool",
-):
+) -> "AsyncIterator[ChatCompletionChunk]":
     """
     Sets information received while iterating the response stream on the AI Client Span.
     Compute token count based on inputs and outputs using tiktoken if token counts are not in the model response.
@@ -730,7 +730,7 @@ def _wrap_synchronous_responses_event_iterator(
     response: "Stream[ResponseStreamEvent]",
     old_iterator: "Iterator[ResponseStreamEvent]",
     finish_span: "bool",
-):
+) -> "Iterator[ResponseStreamEvent]":
     """
     Sets information received while iterating the response stream on the AI Client Span.
     Compute token count based on inputs and outputs using tiktoken if token counts are not in the model response.
@@ -793,7 +793,7 @@ async def _wrap_asynchronous_responses_event_iterator(
     response: "AsyncStream[ResponseStreamEvent]",
     old_iterator: "AsyncIterator[ResponseStreamEvent]",
     finish_span: "bool",
-):
+) -> "AsyncIterator[ResponseStreamEvent]":
     """
     Sets information received while iterating the response stream on the AI Client Span.
     Compute token count based on inputs and outputs using tiktoken if token counts are not in the model response.

From 301b9c8496cc1b36e35fbb04f41a1b90607fb452 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Mon, 30 Mar 2026 12:01:31 +0200
Subject: [PATCH 5/8] remove indirection

---
 sentry_sdk/integrations/openai.py | 137 +++++++++++++-----------------
 1 file changed, 60 insertions(+), 77 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index b8d0e94e03..8bacc7d932 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -577,9 +577,36 @@ def _new_chat_completion_common(f: "Any", *args: "Any", **kwargs: "Any") -> "Any
     start_time = time.perf_counter()
     response = yield f, args, kwargs
 
-    if is_streaming_response:
-        _set_streaming_completions_api_output_data(
-            span, response, kwargs, integration, start_time, finish_span=True
+    if isinstance(response, Stream):
+        messages = kwargs.get("messages")
+
+        if messages is not None and isinstance(messages, str):
+            messages = [messages]
+
+        if isinstance(response, Stream):
+            response._iterator = _wrap_synchronous_completions_chunk_iterator(
+                span=span,
+                integration=integration,
+                start_time=start_time,
+                messages=messages,
+                response=response,
+                old_iterator=response._iterator,
+                finish_span=True,
+            )
+    elif isinstance(response, AsyncStream):
+        messages = kwargs.get("messages")
+
+        if messages is not None and isinstance(messages, str):
+            messages = [messages]
+
+        response._iterator = _wrap_asynchronous_completions_chunk_iterator(
+            span=span,
+            integration=integration,
+            start_time=start_time,
+            messages=messages,
+            response=response,
+            old_iterator=response._iterator,
+            finish_span=True,
         )
     else:
         _set_completions_api_output_data(
@@ -847,41 +874,6 @@ async def _wrap_asynchronous_responses_event_iterator(
         span.__exit__(None, None, None)
 
 
-def _set_streaming_completions_api_output_data(
-    span: "Span",
-    response: "Any",
-    kwargs: "dict[str, Any]",
-    integration: "OpenAIIntegration",
-    start_time: "Optional[float]" = None,
-    finish_span: bool = True,
-) -> None:
-    messages = kwargs.get("messages")
-
-    if messages is not None and isinstance(messages, str):
-        messages = [messages]
-
-    if isinstance(response, Stream):
-        response._iterator = _wrap_synchronous_completions_chunk_iterator(
-            span=span,
-            integration=integration,
-            start_time=start_time,
-            messages=messages,
-            response=response,
-            old_iterator=response._iterator,
-            finish_span=finish_span,
-        )
-    elif isinstance(response, AsyncStream):
-        response._iterator = _wrap_asynchronous_completions_chunk_iterator(
-            span=span,
-            integration=integration,
-            start_time=start_time,
-            messages=messages,
-            response=response,
-            old_iterator=response._iterator,
-            finish_span=finish_span,
-        )
-
-
 def _set_responses_api_output_data(
     span: "Span",
     response: "Any",
@@ -903,42 +895,6 @@ def _set_responses_api_output_data(
     )
 
 
-def _set_streaming_responses_api_output_data(
-    span: "Span",
-    response: "Any",
-    kwargs: "dict[str, Any]",
-    integration: "OpenAIIntegration",
-    start_time: "Optional[float]" = None,
-    finish_span: bool = True,
-) -> None:
-    input = kwargs.get("input")
-
-    if input is not None and isinstance(input, str):
-        input = [input]
-
-    if isinstance(response, Stream):
-        response._iterator = _wrap_synchronous_responses_event_iterator(
-            span=span,
-            integration=integration,
-            start_time=start_time,
-            input=input,
-            response=response,
-            old_iterator=response._iterator,
-            finish_span=finish_span,
-        )
-
-    elif isinstance(response, AsyncStream):
-        response._iterator = _wrap_asynchronous_responses_event_iterator(
-            span=span,
-            integration=integration,
-            start_time=start_time,
-            input=input,
-            response=response,
-            old_iterator=response._iterator,
-            finish_span=finish_span,
-        )
-
-
 def _set_embeddings_output_data(
     span: "Span",
     response: "Any",
@@ -1143,9 +1099,36 @@ def _new_responses_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "An
     start_time = time.perf_counter()
     response = yield f, args, kwargs
 
-    if is_streaming_response:
-        _set_streaming_responses_api_output_data(
-            span, response, kwargs, integration, start_time, finish_span=True
+    if isinstance(response, Stream):
+        input = kwargs.get("input")
+
+        if input is not None and isinstance(input, str):
+            input = [input]
+
+        response._iterator = _wrap_synchronous_responses_event_iterator(
+            span=span,
+            integration=integration,
+            start_time=start_time,
+            input=input,
+            response=response,
+            old_iterator=response._iterator,
+            finish_span=True,
+        )
+
+    elif isinstance(response, AsyncStream):
+        input = kwargs.get("input")
+
+        if input is not None and isinstance(input, str):
+            input = [input]
+
+        response._iterator = _wrap_asynchronous_responses_event_iterator(
+            span=span,
+            integration=integration,
+            start_time=start_time,
+            input=input,
+            response=response,
+            old_iterator=response._iterator,
+            finish_span=True,
         )
     else:
         _set_responses_api_output_data(

From 7b7bd060b7b95d330f1bc1a2cf1d58a6e7f548c0 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Mon, 30 Mar 2026 13:36:30 +0200
Subject: [PATCH 6/8] stop checking isinstance twice

---
 sentry_sdk/integrations/openai.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 8bacc7d932..b106a1063d 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -583,16 +583,15 @@ def _new_chat_completion_common(f: "Any", *args: "Any", **kwargs: "Any") -> "Any
         if messages is not None and isinstance(messages, str):
             messages = [messages]
 
-        if isinstance(response, Stream):
-            response._iterator = _wrap_synchronous_completions_chunk_iterator(
-                span=span,
-                integration=integration,
-                start_time=start_time,
-                messages=messages,
-                response=response,
-                old_iterator=response._iterator,
-                finish_span=True,
-            )
+        response._iterator = _wrap_synchronous_completions_chunk_iterator(
+            span=span,
+            integration=integration,
+            start_time=start_time,
+            messages=messages,
+            response=response,
+            old_iterator=response._iterator,
+            finish_span=True,
+        )
     elif isinstance(response, AsyncStream):
         messages = kwargs.get("messages")
 

From e25afa76d13b2655903cfb5e1d2039625ee096ef Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 31 Mar 2026 10:57:32 +0200
Subject: [PATCH 7/8] add attribute check

---
 sentry_sdk/integrations/openai.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index b106a1063d..8d59863a6a 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -577,7 +577,8 @@ def _new_chat_completion_common(f: "Any", *args: "Any", **kwargs: "Any") -> "Any
     start_time = time.perf_counter()
     response = yield f, args, kwargs
 
-    if isinstance(response, Stream):
+    # Attribute check to fail gracefully if the attribute is not present in future `openai` versions.
+    if isinstance(response, Stream) and hasattr(response, "_iterator"):
         messages = kwargs.get("messages")
 
         if messages is not None and isinstance(messages, str):
@@ -592,7 +593,9 @@ def _new_chat_completion_common(f: "Any", *args: "Any", **kwargs: "Any") -> "Any
             old_iterator=response._iterator,
             finish_span=True,
         )
-    elif isinstance(response, AsyncStream):
+
+    # Attribute check to fail gracefully if the attribute is not present in future `openai` versions.
+    elif isinstance(response, AsyncStream) and hasattr(response, "_iterator"):
         messages = kwargs.get("messages")
 
         if messages is not None and isinstance(messages, str):
@@ -1098,6 +1101,7 @@ def _new_responses_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "An
     start_time = time.perf_counter()
     response = yield f, args, kwargs
 
+    # Attribute check to fail gracefully if the attribute is not present in future `openai` versions.
     if isinstance(response, Stream):
         input = kwargs.get("input")
 
@@ -1114,6 +1118,7 @@ def _new_responses_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "An
             finish_span=True,
         )
 
+    # Attribute check to fail gracefully if the attribute is not present in future `openai` versions.
     elif isinstance(response, AsyncStream):
         input = kwargs.get("input")
 

From 88edfcddaf7f729d8facee6ee75e3a6d4953850f Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Tue, 31 Mar 2026 10:58:43 +0200
Subject: [PATCH 8/8] .

---
 sentry_sdk/integrations/openai.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 8d59863a6a..6707f8194b 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -1102,7 +1102,7 @@ def _new_responses_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "An
     response = yield f, args, kwargs
 
     # Attribute check to fail gracefully if the attribute is not present in future `openai` versions.
-    if isinstance(response, Stream):
+    if isinstance(response, Stream) and hasattr(response, "_iterator"):
         input = kwargs.get("input")
 
         if input is not None and isinstance(input, str):
@@ -1119,7 +1119,7 @@ def _new_responses_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "An
         )
 
     # Attribute check to fail gracefully if the attribute is not present in future `openai` versions.
-    elif isinstance(response, AsyncStream):
+    elif isinstance(response, AsyncStream) and hasattr(response, "_iterator"):
         input = kwargs.get("input")
 
         if input is not None and isinstance(input, str):