-
Notifications
You must be signed in to change notification settings - Fork 614
feat(integrations): add support for the litellm responses/aresponses APIs
#6205
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,6 +6,7 @@ | |
| from sentry_sdk.ai.monitoring import record_token_usage | ||
| from sentry_sdk.ai.utils import ( | ||
| get_start_span_function, | ||
| normalize_message_roles, | ||
| set_data_normalized, | ||
| truncate_and_annotate_messages, | ||
| transform_openai_content_part, | ||
|
|
@@ -23,6 +24,11 @@ | |
| try: | ||
| import litellm # type: ignore[import-not-found] | ||
| from litellm import input_callback, success_callback, failure_callback | ||
| from litellm.types.llms.openai import ( # type: ignore[import-not-found] | ||
| ResponseAPIUsage, | ||
| ResponseCompletedEvent, | ||
| ResponsesAPIResponse, | ||
| ) | ||
| except ImportError: | ||
| raise DidNotEnable("LiteLLM not installed") | ||
|
|
||
|
|
@@ -66,6 +72,48 @@ def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, | |
| return messages | ||
|
|
||
|
|
||
| def _record_responses_conversation_id( | ||
| span: "Any", complete_input: "Dict[str, Any]" | ||
| ) -> None: | ||
| """Set the conversation id on the span when the Responses API request carries one.""" | ||
| conversation = complete_input.get("conversation") | ||
| if conversation is None: | ||
| return | ||
|
|
||
| if isinstance(conversation, str): | ||
| conversation_id = conversation | ||
| elif isinstance(conversation, dict): | ||
| conversation_id = conversation.get("id") | ||
| else: | ||
| conversation_id = None | ||
|
|
||
| if conversation_id is not None: | ||
| set_data_normalized(span, SPANDATA.GEN_AI_CONVERSATION_ID, conversation_id) | ||
|
|
||
|
|
||
| def _record_responses_input_messages( | ||
| span: "Any", scope: "Any", responses_input: "Any" | ||
| ) -> None: | ||
| """Record the request messages for a Responses API call.""" | ||
| if not responses_input: | ||
| return | ||
|
|
||
| # `input` is either a string or a list of message dicts (same shape as | ||
| # the OpenAI Responses API). | ||
| if isinstance(responses_input, str): | ||
| input_messages = [responses_input] | ||
| else: | ||
| input_messages = list(responses_input) | ||
| normalized = normalize_message_roles(input_messages) # type: ignore[arg-type] | ||
| messages_data = truncate_and_annotate_messages(normalized, span, scope) | ||
| if messages_data is not None: | ||
| span.set_data( | ||
| SPANDATA.GEN_AI_REQUEST_MESSAGES, | ||
| messages_data, | ||
| unpack=False, | ||
| ) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
|
||
|
|
||
| def _input_callback(kwargs: "Dict[str, Any]") -> None: | ||
| """Handle the start of a request.""" | ||
| integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration) | ||
|
|
@@ -84,16 +132,17 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None: | |
| call_type = kwargs.get("call_type", None) | ||
| if call_type == "embedding" or call_type == "aembedding": | ||
| operation = "embeddings" | ||
| op = consts.OP.GEN_AI_EMBEDDINGS | ||
| elif call_type == "responses" or call_type == "aresponses": | ||
| operation = "responses" | ||
| op = consts.OP.GEN_AI_RESPONSES | ||
| else: | ||
| operation = "chat" | ||
| op = consts.OP.GEN_AI_CHAT | ||
|
|
||
| # Start a new span/transaction | ||
| span = get_start_span_function()( | ||
| op=( | ||
| consts.OP.GEN_AI_CHAT | ||
| if operation == "chat" | ||
| else consts.OP.GEN_AI_EMBEDDINGS | ||
| ), | ||
| op=op, | ||
| name=f"{operation} {model}", | ||
| origin=LiteLLMIntegration.origin, | ||
| ) | ||
|
|
@@ -106,14 +155,15 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None: | |
| set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, provider) | ||
| set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation) | ||
|
|
||
| # Record input/messages if allowed | ||
| if should_send_default_pii() and integration.include_prompts: | ||
| if operation == "embeddings": | ||
| # For embeddings, look for the 'input' parameter | ||
| # Per-operation request data. Conversation id (responses) is set | ||
| # unconditionally; user-content fields are gated on PII / include_prompts. | ||
| record_prompts = should_send_default_pii() and integration.include_prompts | ||
| scope = sentry_sdk.get_current_scope() | ||
|
|
||
| if operation == "embeddings": | ||
| if record_prompts: | ||
| embedding_input = kwargs.get("input") | ||
| if embedding_input: | ||
| scope = sentry_sdk.get_current_scope() | ||
| # Normalize to list format | ||
| input_list = ( | ||
| embedding_input | ||
| if isinstance(embedding_input, list) | ||
|
|
@@ -129,11 +179,23 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None: | |
| messages_data, | ||
| unpack=False, | ||
| ) | ||
| else: | ||
| # For chat, look for the 'messages' parameter | ||
|
|
||
| elif operation == "responses": | ||
| # litellm unpacks `extra_body` into the request body, so the | ||
| # `conversation` field shows up in additional_args.complete_input_dict | ||
| # rather than as a top-level kwarg. | ||
| complete_input = (kwargs.get("additional_args") or {}).get( | ||
| "complete_input_dict" | ||
| ) or {} | ||
| _record_responses_conversation_id(span, complete_input) | ||
| if record_prompts: | ||
| _record_responses_input_messages(span, scope, kwargs.get("input")) | ||
|
|
||
| else: | ||
| # Chat completions. | ||
| if record_prompts: | ||
| messages = kwargs.get("messages", []) | ||
| if messages: | ||
| scope = sentry_sdk.get_current_scope() | ||
| messages = _convert_message_parts(messages) | ||
| messages_data = truncate_and_annotate_messages(messages, span, scope) | ||
| if messages_data is not None: | ||
|
|
@@ -164,13 +226,122 @@ async def _async_input_callback(kwargs: "Dict[str, Any]") -> None: | |
| return _input_callback(kwargs) | ||
|
|
||
|
|
||
| def _record_chat_response_messages(span: "Any", response: "Any") -> None: | ||
| """Record response.text from a Chat Completions response.""" | ||
| response_messages = [] | ||
| for choice in response.choices: | ||
| message = getattr(choice, "message", None) | ||
| if message is None: | ||
| continue | ||
| if hasattr(message, "model_dump"): | ||
| response_messages.append(message.model_dump()) | ||
| elif hasattr(message, "dict"): | ||
| response_messages.append(message.dict()) | ||
| else: | ||
| # Fallback for basic message objects | ||
| msg = {} | ||
| if hasattr(message, "role"): | ||
| msg["role"] = message.role | ||
| if hasattr(message, "content"): | ||
| msg["content"] = message.content | ||
| if hasattr(message, "tool_calls"): | ||
| msg["tool_calls"] = message.tool_calls | ||
| response_messages.append(msg) | ||
|
|
||
| if response_messages: | ||
| set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_messages) | ||
|
|
||
|
|
||
| def _record_responses_output(span: "Any", response: "ResponsesAPIResponse") -> None: | ||
| """Record response text and tool calls from a Responses API response.""" | ||
| output_text = [] # type: List[Any] | ||
| tool_calls = [] # type: List[Any] | ||
| for output in response.output: | ||
| output_type = getattr(output, "type", None) | ||
| if output_type == "function_call": | ||
| if hasattr(output, "model_dump"): | ||
| tool_calls.append(output.model_dump()) | ||
| elif hasattr(output, "dict"): | ||
| tool_calls.append(output.dict()) | ||
| elif output_type == "message": | ||
| for content_item in getattr(output, "content", []) or []: | ||
| text = getattr(content_item, "text", None) | ||
| if text is not None: | ||
| output_text.append(text) | ||
| elif hasattr(content_item, "model_dump"): | ||
| output_text.append(content_item.model_dump()) | ||
| elif hasattr(content_item, "dict"): | ||
| output_text.append(content_item.dict()) | ||
|
|
||
| if tool_calls: | ||
| set_data_normalized( | ||
| span, | ||
| SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, | ||
| tool_calls, | ||
| unpack=False, | ||
| ) | ||
| if output_text: | ||
| set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, output_text) | ||
|
|
||
|
|
||
| def _record_token_usage_from_response(span: "Any", response: "Any") -> None: | ||
| """Record token usage. The shape of ``usage`` depends on the litellm | ||
| processing pipeline rather than the API path: | ||
|
|
||
| - ``ResponseAPIUsage``: raw Responses API usage (``input_tokens`` / | ||
| ``output_tokens``). Seen when litellm has not yet normalized the value. | ||
| - ``dict``: chat-style dict (``prompt_tokens`` / ``completion_tokens``). | ||
| litellm assembles streaming Responses API usage as a dict. | ||
| - Otherwise: chat-style Pydantic ``Usage`` (``prompt_tokens`` / | ||
| ``completion_tokens``). Used for Chat Completions, Embeddings, and | ||
| non-streaming Responses API after litellm's post-processing. | ||
| """ | ||
| usage = getattr(response, "usage", None) | ||
| if usage is None: | ||
| return | ||
|
|
||
| if isinstance(usage, ResponseAPIUsage): | ||
| record_token_usage( | ||
| span, | ||
| input_tokens=usage.input_tokens, | ||
| output_tokens=usage.output_tokens, | ||
| total_tokens=usage.total_tokens, | ||
| ) | ||
| elif isinstance(usage, dict): | ||
| record_token_usage( | ||
| span, | ||
| input_tokens=usage.get("prompt_tokens"), | ||
| output_tokens=usage.get("completion_tokens"), | ||
| total_tokens=usage.get("total_tokens"), | ||
| ) | ||
| else: | ||
| record_token_usage( | ||
| span, | ||
| input_tokens=getattr(usage, "prompt_tokens", None), | ||
| output_tokens=getattr(usage, "completion_tokens", None), | ||
| total_tokens=getattr(usage, "total_tokens", None), | ||
| ) | ||
|
|
||
|
|
||
| def _success_callback( | ||
| kwargs: "Dict[str, Any]", | ||
| completion_response: "Any", | ||
| response: "Any", | ||
| start_time: "datetime", | ||
| end_time: "datetime", | ||
| ) -> None: | ||
| """Handle successful completion.""" | ||
| """Handle a successful chat completion, embeddings, or Responses API call. | ||
|
|
||
| The shape of `response` differs between API paths: | ||
| - Chat Completions: ModelResponse with ``.choices[].message`` and | ||
| ``.usage`` carrying ``prompt_tokens`` / ``completion_tokens``. | ||
| - Responses API (non-streaming): ResponsesAPIResponse with ``.output[]`` | ||
| items (``message`` / ``function_call``) and ``.usage`` carrying | ||
| ``input_tokens`` / ``output_tokens``. | ||
| - Responses API (streaming): a ResponseCompletedEvent wrapping a | ||
| ``ResponsesAPIResponse``, which we unwrap below. | ||
| - Embeddings: CreateEmbeddingResponse with ``.usage`` only (no choices | ||
| or output). | ||
| """ | ||
|
|
||
| metadata = _get_metadata_dict(kwargs) | ||
| span = metadata.get("_sentry_span") | ||
|
|
@@ -181,48 +352,23 @@ def _success_callback( | |
| if integration is None: | ||
|
sentry[bot] marked this conversation as resolved.
|
||
| return | ||
|
|
||
| # Streaming Responses API: unwrap the ResponseCompletedEvent so the rest of | ||
| # the function sees the assembled ResponsesAPIResponse directly. | ||
| if isinstance(response, ResponseCompletedEvent): | ||
| response = response.response | ||
|
|
||
| try: | ||
| # Record model information | ||
| if hasattr(completion_response, "model"): | ||
| set_data_normalized( | ||
| span, SPANDATA.GEN_AI_RESPONSE_MODEL, completion_response.model | ||
| ) | ||
| # `model` is set by all API shapes (chat / responses / embeddings). | ||
| if hasattr(response, "model"): | ||
| set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, response.model) | ||
|
|
||
| # Record response content if allowed | ||
| if should_send_default_pii() and integration.include_prompts: | ||
| if hasattr(completion_response, "choices"): | ||
| response_messages = [] | ||
| for choice in completion_response.choices: | ||
| if hasattr(choice, "message"): | ||
| if hasattr(choice.message, "model_dump"): | ||
| response_messages.append(choice.message.model_dump()) | ||
| elif hasattr(choice.message, "dict"): | ||
| response_messages.append(choice.message.dict()) | ||
| else: | ||
| # Fallback for basic message objects | ||
| msg = {} | ||
| if hasattr(choice.message, "role"): | ||
| msg["role"] = choice.message.role | ||
| if hasattr(choice.message, "content"): | ||
| msg["content"] = choice.message.content | ||
| if hasattr(choice.message, "tool_calls"): | ||
| msg["tool_calls"] = choice.message.tool_calls | ||
| response_messages.append(msg) | ||
|
|
||
| if response_messages: | ||
| set_data_normalized( | ||
| span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_messages | ||
| ) | ||
| if isinstance(response, ResponsesAPIResponse): | ||
| _record_responses_output(span, response) | ||
| elif hasattr(response, "choices"): | ||
| _record_chat_response_messages(span, response) | ||
|
|
||
| # Record token usage | ||
| if hasattr(completion_response, "usage"): | ||
| usage = completion_response.usage | ||
| record_token_usage( | ||
| span, | ||
| input_tokens=getattr(usage, "prompt_tokens", None), | ||
| output_tokens=getattr(usage, "completion_tokens", None), | ||
| total_tokens=getattr(usage, "total_tokens", None), | ||
| ) | ||
| _record_token_usage_from_response(span, response) | ||
|
|
||
| finally: | ||
| is_streaming = kwargs.get("stream") | ||
|
|
||


There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Bug: The function
_record_responses_input_messagescallsspan.set_datawith an unsupportedunpackargument, which will cause aTypeError.Severity: HIGH
Suggested Fix
Replace the call to
span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False)withset_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False). This aligns with the pattern used elsewhere in the file for setting data with theunpackoption.Prompt for AI Agent