Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
254 changes: 200 additions & 54 deletions sentry_sdk/integrations/litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from sentry_sdk.ai.monitoring import record_token_usage
from sentry_sdk.ai.utils import (
get_start_span_function,
normalize_message_roles,
set_data_normalized,
truncate_and_annotate_messages,
transform_openai_content_part,
Expand All @@ -23,6 +24,11 @@
try:
import litellm # type: ignore[import-not-found]
from litellm import input_callback, success_callback, failure_callback
from litellm.types.llms.openai import ( # type: ignore[import-not-found]
ResponseAPIUsage,
ResponseCompletedEvent,
ResponsesAPIResponse,
)
except ImportError:
raise DidNotEnable("LiteLLM not installed")

Expand Down Expand Up @@ -66,6 +72,48 @@ def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str,
return messages


def _record_responses_conversation_id(
span: "Any", complete_input: "Dict[str, Any]"
) -> None:
"""Set the conversation id on the span when the Responses API request carries one."""
conversation = complete_input.get("conversation")
if conversation is None:
return

if isinstance(conversation, str):
conversation_id = conversation
elif isinstance(conversation, dict):
conversation_id = conversation.get("id")
else:
conversation_id = None

if conversation_id is not None:
set_data_normalized(span, SPANDATA.GEN_AI_CONVERSATION_ID, conversation_id)


def _record_responses_input_messages(
span: "Any", scope: "Any", responses_input: "Any"
) -> None:
"""Record the request messages for a Responses API call."""
if not responses_input:
return

# `input` is either a string or a list of message dicts (same shape as
# the OpenAI Responses API).
if isinstance(responses_input, str):
input_messages = [responses_input]
else:
input_messages = list(responses_input)
normalized = normalize_message_roles(input_messages) # type: ignore[arg-type]
messages_data = truncate_and_annotate_messages(normalized, span, scope)
if messages_data is not None:
span.set_data(
SPANDATA.GEN_AI_REQUEST_MESSAGES,
messages_data,
unpack=False,
)
Comment on lines +110 to +114
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The function _record_responses_input_messages calls span.set_data with an unsupported unpack argument, which will cause a TypeError.
Severity: HIGH

Suggested Fix

Replace the call to span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False) with set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False). This aligns with the pattern used elsewhere in the file for setting data with the unpack option.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent. Verify if this is a real issue. If it is, propose a fix; if not, explain why it's
not valid.

Location: sentry_sdk/integrations/litellm.py#L110-L114

Potential issue: In the `_record_responses_input_messages` function, `span.set_data` is
called with an `unpack=False` keyword argument. The `set_data` method on a `Span` object
does not accept this argument, which will cause a `TypeError` at runtime when this code
is executed. This path is triggered when using `litellm.responses()` or
`litellm.aresponses()` with prompt recording enabled. Other parts of the code correctly
use `set_data_normalized` when the `unpack` parameter is needed.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

span.set_data() called with unsupported unpack keyword argument

High Severity

_record_responses_input_messages calls span.set_data() with unpack=False, but Span.set_data(self, key, value) only accepts key and value — it has no unpack parameter. This raises a TypeError at runtime whenever Responses API input messages are recorded (i.e., when send_default_pii=True and include_prompts=True). The likely intent was to call set_data_normalized() (which does accept unpack) or to drop the unpack argument. Since the crash occurs inside _input_callback with no surrounding try/except, the already-entered span will never be exited, causing a resource leak.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit b3d837c. Configure here.



def _input_callback(kwargs: "Dict[str, Any]") -> None:
"""Handle the start of a request."""
integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)
Expand All @@ -84,16 +132,17 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
call_type = kwargs.get("call_type", None)
if call_type == "embedding" or call_type == "aembedding":
operation = "embeddings"
op = consts.OP.GEN_AI_EMBEDDINGS
elif call_type == "responses" or call_type == "aresponses":
operation = "responses"
op = consts.OP.GEN_AI_RESPONSES
else:
operation = "chat"
op = consts.OP.GEN_AI_CHAT

# Start a new span/transaction
span = get_start_span_function()(
op=(
consts.OP.GEN_AI_CHAT
if operation == "chat"
else consts.OP.GEN_AI_EMBEDDINGS
),
op=op,
name=f"{operation} {model}",
origin=LiteLLMIntegration.origin,
)
Expand All @@ -106,14 +155,15 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, provider)
set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation)

# Record input/messages if allowed
if should_send_default_pii() and integration.include_prompts:
if operation == "embeddings":
# For embeddings, look for the 'input' parameter
# Per-operation request data. Conversation id (responses) is set
# unconditionally; user-content fields are gated on PII / include_prompts.
record_prompts = should_send_default_pii() and integration.include_prompts
scope = sentry_sdk.get_current_scope()

if operation == "embeddings":
if record_prompts:
embedding_input = kwargs.get("input")
if embedding_input:
scope = sentry_sdk.get_current_scope()
# Normalize to list format
input_list = (
embedding_input
if isinstance(embedding_input, list)
Expand All @@ -129,11 +179,23 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
messages_data,
unpack=False,
)
else:
# For chat, look for the 'messages' parameter

elif operation == "responses":
# litellm unpacks `extra_body` into the request body, so the
# `conversation` field shows up in additional_args.complete_input_dict
# rather than as a top-level kwarg.
complete_input = (kwargs.get("additional_args") or {}).get(
"complete_input_dict"
) or {}
_record_responses_conversation_id(span, complete_input)
if record_prompts:
_record_responses_input_messages(span, scope, kwargs.get("input"))

else:
# Chat completions.
if record_prompts:
messages = kwargs.get("messages", [])
if messages:
scope = sentry_sdk.get_current_scope()
messages = _convert_message_parts(messages)
messages_data = truncate_and_annotate_messages(messages, span, scope)
if messages_data is not None:
Expand Down Expand Up @@ -164,13 +226,122 @@ async def _async_input_callback(kwargs: "Dict[str, Any]") -> None:
return _input_callback(kwargs)


def _record_chat_response_messages(span: "Any", response: "Any") -> None:
"""Record response.text from a Chat Completions response."""
response_messages = []
for choice in response.choices:
message = getattr(choice, "message", None)
if message is None:
continue
if hasattr(message, "model_dump"):
response_messages.append(message.model_dump())
elif hasattr(message, "dict"):
response_messages.append(message.dict())
else:
# Fallback for basic message objects
msg = {}
if hasattr(message, "role"):
msg["role"] = message.role
if hasattr(message, "content"):
msg["content"] = message.content
if hasattr(message, "tool_calls"):
msg["tool_calls"] = message.tool_calls
response_messages.append(msg)

if response_messages:
set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_messages)


def _record_responses_output(span: "Any", response: "ResponsesAPIResponse") -> None:
"""Record response text and tool calls from a Responses API response."""
output_text = [] # type: List[Any]
tool_calls = [] # type: List[Any]
for output in response.output:
output_type = getattr(output, "type", None)
if output_type == "function_call":
if hasattr(output, "model_dump"):
tool_calls.append(output.model_dump())
elif hasattr(output, "dict"):
tool_calls.append(output.dict())
elif output_type == "message":
for content_item in getattr(output, "content", []) or []:
text = getattr(content_item, "text", None)
if text is not None:
output_text.append(text)
elif hasattr(content_item, "model_dump"):
output_text.append(content_item.model_dump())
elif hasattr(content_item, "dict"):
output_text.append(content_item.dict())

if tool_calls:
set_data_normalized(
span,
SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS,
tool_calls,
unpack=False,
)
if output_text:
set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, output_text)


def _record_token_usage_from_response(span: "Any", response: "Any") -> None:
"""Record token usage. The shape of ``usage`` depends on the litellm
processing pipeline rather than the API path:

- ``ResponseAPIUsage``: raw Responses API usage (``input_tokens`` /
``output_tokens``). Seen when litellm has not yet normalized the value.
- ``dict``: chat-style dict (``prompt_tokens`` / ``completion_tokens``).
litellm assembles streaming Responses API usage as a dict.
- Otherwise: chat-style Pydantic ``Usage`` (``prompt_tokens`` /
``completion_tokens``). Used for Chat Completions, Embeddings, and
non-streaming Responses API after litellm's post-processing.
"""
usage = getattr(response, "usage", None)
if usage is None:
return

if isinstance(usage, ResponseAPIUsage):
record_token_usage(
span,
input_tokens=usage.input_tokens,
output_tokens=usage.output_tokens,
total_tokens=usage.total_tokens,
)
elif isinstance(usage, dict):
record_token_usage(
span,
input_tokens=usage.get("prompt_tokens"),
output_tokens=usage.get("completion_tokens"),
total_tokens=usage.get("total_tokens"),
)
else:
record_token_usage(
span,
input_tokens=getattr(usage, "prompt_tokens", None),
output_tokens=getattr(usage, "completion_tokens", None),
total_tokens=getattr(usage, "total_tokens", None),
)


def _success_callback(
kwargs: "Dict[str, Any]",
completion_response: "Any",
response: "Any",
start_time: "datetime",
end_time: "datetime",
) -> None:
"""Handle successful completion."""
"""Handle a successful chat completion, embeddings, or Responses API call.

The shape of `response` differs between API paths:
- Chat Completions: ModelResponse with ``.choices[].message`` and
``.usage`` carrying ``prompt_tokens`` / ``completion_tokens``.
- Responses API (non-streaming): ResponsesAPIResponse with ``.output[]``
items (``message`` / ``function_call``) and ``.usage`` carrying
``input_tokens`` / ``output_tokens``.
- Responses API (streaming): a ResponseCompletedEvent wrapping a
``ResponsesAPIResponse``, which we unwrap below.
- Embeddings: CreateEmbeddingResponse with ``.usage`` only (no choices
or output).
"""

metadata = _get_metadata_dict(kwargs)
span = metadata.get("_sentry_span")
Expand All @@ -181,48 +352,23 @@ def _success_callback(
if integration is None:
Comment thread
sentry[bot] marked this conversation as resolved.
return

# Streaming Responses API: unwrap the ResponseCompletedEvent so the rest of
# the function sees the assembled ResponsesAPIResponse directly.
if isinstance(response, ResponseCompletedEvent):
response = response.response

try:
# Record model information
if hasattr(completion_response, "model"):
set_data_normalized(
span, SPANDATA.GEN_AI_RESPONSE_MODEL, completion_response.model
)
# `model` is set by all API shapes (chat / responses / embeddings).
if hasattr(response, "model"):
set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, response.model)

# Record response content if allowed
if should_send_default_pii() and integration.include_prompts:
if hasattr(completion_response, "choices"):
response_messages = []
for choice in completion_response.choices:
if hasattr(choice, "message"):
if hasattr(choice.message, "model_dump"):
response_messages.append(choice.message.model_dump())
elif hasattr(choice.message, "dict"):
response_messages.append(choice.message.dict())
else:
# Fallback for basic message objects
msg = {}
if hasattr(choice.message, "role"):
msg["role"] = choice.message.role
if hasattr(choice.message, "content"):
msg["content"] = choice.message.content
if hasattr(choice.message, "tool_calls"):
msg["tool_calls"] = choice.message.tool_calls
response_messages.append(msg)

if response_messages:
set_data_normalized(
span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_messages
)
if isinstance(response, ResponsesAPIResponse):
_record_responses_output(span, response)
elif hasattr(response, "choices"):
_record_chat_response_messages(span, response)

# Record token usage
if hasattr(completion_response, "usage"):
usage = completion_response.usage
record_token_usage(
span,
input_tokens=getattr(usage, "prompt_tokens", None),
output_tokens=getattr(usage, "completion_tokens", None),
total_tokens=getattr(usage, "total_tokens", None),
)
_record_token_usage_from_response(span, response)

finally:
is_streaming = kwargs.get("stream")
Expand Down
Loading