getsentry · constantinius · May 5, 2026 · May 5, 2026 · May 18, 2026 · sentry
@@ -6,6 +6,7 @@
 from sentry_sdk.ai.monitoring import record_token_usage
 from sentry_sdk.ai.utils import (
     get_start_span_function,
+    normalize_message_roles,
     set_data_normalized,
     truncate_and_annotate_messages,
     transform_openai_content_part,
@@ -23,6 +24,11 @@
 try:
     import litellm  # type: ignore[import-not-found]
     from litellm import input_callback, success_callback, failure_callback
+    from litellm.types.llms.openai import (  # type: ignore[import-not-found]
+        ResponseAPIUsage,
+        ResponseCompletedEvent,
+        ResponsesAPIResponse,
+    )
 except ImportError:
     raise DidNotEnable("LiteLLM not installed")
 
@@ -66,6 +72,48 @@ def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str,
     return messages
 
 
+def _record_responses_conversation_id(
+    span: "Any", complete_input: "Dict[str, Any]"
+) -> None:
+    """Set the conversation id on the span when the Responses API request carries one."""
+    conversation = complete_input.get("conversation")
+    if conversation is None:
+        return
+
+    if isinstance(conversation, str):
+        conversation_id = conversation
+    elif isinstance(conversation, dict):
+        conversation_id = conversation.get("id")
+    else:
+        conversation_id = None
+
+    if conversation_id is not None:
+        set_data_normalized(span, SPANDATA.GEN_AI_CONVERSATION_ID, conversation_id)
+
+
+def _record_responses_input_messages(
+    span: "Any", scope: "Any", responses_input: "Any"
+) -> None:
+    """Record the request messages for a Responses API call."""
+    if not responses_input:
+        return
+
+    # `input` is either a string or a list of message dicts (same shape as
+    # the OpenAI Responses API).
+    if isinstance(responses_input, str):
+        input_messages = [responses_input]
+    else:
+        input_messages = list(responses_input)
+    normalized = normalize_message_roles(input_messages)  # type: ignore[arg-type]
+    messages_data = truncate_and_annotate_messages(normalized, span, scope)
+    if messages_data is not None:
+        span.set_data(
+            SPANDATA.GEN_AI_REQUEST_MESSAGES,
+            messages_data,
+            unpack=False,
+        )
+
+
 def _input_callback(kwargs: "Dict[str, Any]") -> None:
     """Handle the start of a request."""
     integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)
@@ -84,16 +132,17 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
     call_type = kwargs.get("call_type", None)
     if call_type == "embedding" or call_type == "aembedding":
         operation = "embeddings"
+        op = consts.OP.GEN_AI_EMBEDDINGS
+    elif call_type == "responses" or call_type == "aresponses":
+        operation = "responses"
+        op = consts.OP.GEN_AI_RESPONSES
     else:
         operation = "chat"
+        op = consts.OP.GEN_AI_CHAT
 
     # Start a new span/transaction
     span = get_start_span_function()(
-        op=(
-            consts.OP.GEN_AI_CHAT
-            if operation == "chat"
-            else consts.OP.GEN_AI_EMBEDDINGS
-        ),
+        op=op,
         name=f"{operation} {model}",
         origin=LiteLLMIntegration.origin,
     )
@@ -106,14 +155,15 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
     set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, provider)
     set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation)
 
-    # Record input/messages if allowed
-    if should_send_default_pii() and integration.include_prompts:
-        if operation == "embeddings":
-            # For embeddings, look for the 'input' parameter
+    # Per-operation request data. Conversation id (responses) is set
+    # unconditionally; user-content fields are gated on PII / include_prompts.
+    record_prompts = should_send_default_pii() and integration.include_prompts
+    scope = sentry_sdk.get_current_scope()
+
+    if operation == "embeddings":
+        if record_prompts:
             embedding_input = kwargs.get("input")
             if embedding_input:
-                scope = sentry_sdk.get_current_scope()
-                # Normalize to list format
                 input_list = (
                     embedding_input
                     if isinstance(embedding_input, list)
@@ -129,11 +179,23 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
                         messages_data,
                         unpack=False,
                     )
-        else:
-            # For chat, look for the 'messages' parameter
+
+    elif operation == "responses":
+        # litellm unpacks `extra_body` into the request body, so the
+        # `conversation` field shows up in additional_args.complete_input_dict
+        # rather than as a top-level kwarg.
+        complete_input = (kwargs.get("additional_args") or {}).get(
+            "complete_input_dict"
+        ) or {}
+        _record_responses_conversation_id(span, complete_input)
+        if record_prompts:
+            _record_responses_input_messages(span, scope, kwargs.get("input"))
+
+    else:
+        # Chat completions.
+        if record_prompts:
             messages = kwargs.get("messages", [])
             if messages:
-                scope = sentry_sdk.get_current_scope()
                 messages = _convert_message_parts(messages)
                 messages_data = truncate_and_annotate_messages(messages, span, scope)
                 if messages_data is not None:
@@ -164,13 +226,122 @@ async def _async_input_callback(kwargs: "Dict[str, Any]") -> None:
     return _input_callback(kwargs)
 
 
+def _record_chat_response_messages(span: "Any", response: "Any") -> None:
+    """Record response.text from a Chat Completions response."""
+    response_messages = []
+    for choice in response.choices:
+        message = getattr(choice, "message", None)
+        if message is None:
+            continue
+        if hasattr(message, "model_dump"):
+            response_messages.append(message.model_dump())
+        elif hasattr(message, "dict"):
+            response_messages.append(message.dict())
+        else:
+            # Fallback for basic message objects
+            msg = {}
+            if hasattr(message, "role"):
+                msg["role"] = message.role
+            if hasattr(message, "content"):
+                msg["content"] = message.content
+            if hasattr(message, "tool_calls"):
+                msg["tool_calls"] = message.tool_calls
+            response_messages.append(msg)
+
+    if response_messages:
+        set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_messages)
+
+
+def _record_responses_output(span: "Any", response: "ResponsesAPIResponse") -> None:
+    """Record response text and tool calls from a Responses API response."""
+    output_text = []  # type: List[Any]
+    tool_calls = []  # type: List[Any]
+    for output in response.output:
+        output_type = getattr(output, "type", None)
+        if output_type == "function_call":
+            if hasattr(output, "model_dump"):
+                tool_calls.append(output.model_dump())
+            elif hasattr(output, "dict"):
+                tool_calls.append(output.dict())
+        elif output_type == "message":
+            for content_item in getattr(output, "content", []) or []:
+                text = getattr(content_item, "text", None)
+                if text is not None:
+                    output_text.append(text)
+                elif hasattr(content_item, "model_dump"):
+                    output_text.append(content_item.model_dump())
+                elif hasattr(content_item, "dict"):
+                    output_text.append(content_item.dict())
+
+    if tool_calls:
+        set_data_normalized(
+            span,
+            SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS,
+            tool_calls,
+            unpack=False,
+        )
+    if output_text:
+        set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, output_text)
+
+
+def _record_token_usage_from_response(span: "Any", response: "Any") -> None:
+    """Record token usage. The shape of ``usage`` depends on the litellm
+    processing pipeline rather than the API path:
+
+    - ``ResponseAPIUsage``: raw Responses API usage (``input_tokens`` /
+      ``output_tokens``). Seen when litellm has not yet normalized the value.
+    - ``dict``: chat-style dict (``prompt_tokens`` / ``completion_tokens``).
+      litellm assembles streaming Responses API usage as a dict.
+    - Otherwise: chat-style Pydantic ``Usage`` (``prompt_tokens`` /
+      ``completion_tokens``). Used for Chat Completions, Embeddings, and
+      non-streaming Responses API after litellm's post-processing.
+    """
+    usage = getattr(response, "usage", None)
+    if usage is None:
+        return
+
+    if isinstance(usage, ResponseAPIUsage):
+        record_token_usage(
+            span,
+            input_tokens=usage.input_tokens,
+            output_tokens=usage.output_tokens,
+            total_tokens=usage.total_tokens,
+        )
+    elif isinstance(usage, dict):
+        record_token_usage(
+            span,
+            input_tokens=usage.get("prompt_tokens"),
+            output_tokens=usage.get("completion_tokens"),
+            total_tokens=usage.get("total_tokens"),
+        )
+    else:
+        record_token_usage(
+            span,
+            input_tokens=getattr(usage, "prompt_tokens", None),
+            output_tokens=getattr(usage, "completion_tokens", None),
+            total_tokens=getattr(usage, "total_tokens", None),
+        )
+
+
 def _success_callback(
     kwargs: "Dict[str, Any]",
-    completion_response: "Any",
+    response: "Any",
     start_time: "datetime",
     end_time: "datetime",
 ) -> None:
-    """Handle successful completion."""
+    """Handle a successful chat completion, embeddings, or Responses API call.
+
+    The shape of `response` differs between API paths:
+      - Chat Completions: ModelResponse with ``.choices[].message`` and
+        ``.usage`` carrying ``prompt_tokens`` / ``completion_tokens``.
+      - Responses API (non-streaming): ResponsesAPIResponse with ``.output[]``
+        items (``message`` / ``function_call``) and ``.usage`` carrying
+        ``input_tokens`` / ``output_tokens``.
+      - Responses API (streaming): a ResponseCompletedEvent wrapping a
+        ``ResponsesAPIResponse``, which we unwrap below.
+      - Embeddings: CreateEmbeddingResponse with ``.usage`` only (no choices
+        or output).
+    """
 
     metadata = _get_metadata_dict(kwargs)
     span = metadata.get("_sentry_span")
@@ -181,48 +352,23 @@ def _success_callback(
     if integration is None:
         return
 
+    # Streaming Responses API: unwrap the ResponseCompletedEvent so the rest of
+    # the function sees the assembled ResponsesAPIResponse directly.
+    if isinstance(response, ResponseCompletedEvent):
+        response = response.response
+
     try:
-        # Record model information
-        if hasattr(completion_response, "model"):
-            set_data_normalized(
-                span, SPANDATA.GEN_AI_RESPONSE_MODEL, completion_response.model
-            )
+        # `model` is set by all API shapes (chat / responses / embeddings).
+        if hasattr(response, "model"):
+            set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, response.model)
 
-        # Record response content if allowed
         if should_send_default_pii() and integration.include_prompts:
-            if hasattr(completion_response, "choices"):
-                response_messages = []
-                for choice in completion_response.choices:
-                    if hasattr(choice, "message"):
-                        if hasattr(choice.message, "model_dump"):
-                            response_messages.append(choice.message.model_dump())
-                        elif hasattr(choice.message, "dict"):
-                            response_messages.append(choice.message.dict())
-                        else:
-                            # Fallback for basic message objects
-                            msg = {}
-                            if hasattr(choice.message, "role"):
-                                msg["role"] = choice.message.role
-                            if hasattr(choice.message, "content"):
-                                msg["content"] = choice.message.content
-                            if hasattr(choice.message, "tool_calls"):
-                                msg["tool_calls"] = choice.message.tool_calls
-                            response_messages.append(msg)
-
-                if response_messages:
-                    set_data_normalized(
-                        span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_messages
-                    )
+            if isinstance(response, ResponsesAPIResponse):
+                _record_responses_output(span, response)
+            elif hasattr(response, "choices"):
+                _record_chat_response_messages(span, response)
 
-        # Record token usage
-        if hasattr(completion_response, "usage"):
-            usage = completion_response.usage
-            record_token_usage(
-                span,
-                input_tokens=getattr(usage, "prompt_tokens", None),
-                output_tokens=getattr(usage, "completion_tokens", None),
-                total_tokens=getattr(usage, "total_tokens", None),
-            )
+        _record_token_usage_from_response(span, response)
 
     finally:
         is_streaming = kwargs.get("stream")