diff --git a/chatlas/_chat.py b/chatlas/_chat.py index edfeabdd..85547c65 100644 --- a/chatlas/_chat.py +++ b/chatlas/_chat.py @@ -2571,47 +2571,54 @@ def _chat_impl( *, controller: StreamController, ) -> Generator[str | Content, None, None]: - user_turn_result: UserTurn | None = user_turn - while user_turn_result is not None: - for chunk in self._submit_turns( - user_turn_result, - echo=echo, - stream=stream, - data_model=data_model, - kwargs=kwargs, - content_mode=content, - controller=controller, - ): - yield chunk + from ._otel import end_span, start_agent_span + + agent_span = start_agent_span(self.provider) + try: + user_turn_result: UserTurn | None = user_turn + while user_turn_result is not None: + for chunk in self._submit_turns( + user_turn_result, + echo=echo, + stream=stream, + data_model=data_model, + kwargs=kwargs, + content_mode=content, + controller=controller, + _otel_parent=agent_span, + ): + yield chunk - turn = self.get_last_turn(role="assistant") - assert turn is not None - user_turn_result = None + turn = self.get_last_turn(role="assistant") + assert turn is not None + user_turn_result = None - # Don't invoke tools if the stream was cancelled - if controller.cancelled: - break + # Don't invoke tools if the stream was cancelled + if controller.cancelled: + break - all_results: list[ContentToolResult] = [] - for x in turn.contents: - if isinstance(x, ContentToolRequest): - tool = self._tools.get(x.name) - if tool is not None: - x.tool = ToolInfo.from_tool(tool) - if echo == "output": - self._echo_content(f"\n\n{x}\n\n") - if content == "all": - yield x - results = self._invoke_tool(x) - for res in results: + all_results: list[ContentToolResult] = [] + for x in turn.contents: + if isinstance(x, ContentToolRequest): + tool = self._tools.get(x.name) + if tool is not None: + x.tool = ToolInfo.from_tool(tool) if echo == "output": - self._echo_content(f"\n\n{res}\n\n") + self._echo_content(f"\n\n{x}\n\n") if content == "all": - yield res - all_results.append(res) - - if all_results: - user_turn_result = UserTurn(all_results) + yield x + results = self._invoke_tool(x, _otel_parent=agent_span) + for res in results: + if echo == "output": + self._echo_content(f"\n\n{res}\n\n") + if content == "all": + yield res + all_results.append(res) + + if all_results: + user_turn_result = UserTurn(all_results) + finally: + end_span(agent_span) @overload def _chat_impl_async( @@ -2652,53 +2659,60 @@ async def _chat_impl_async( *, controller: StreamController, ) -> AsyncGenerator[str | Content, None]: - user_turn_result: UserTurn | None = user_turn - while user_turn_result is not None: - turn_generator = self._submit_turns_async( - user_turn_result, - echo=echo, - stream=stream, - data_model=data_model, - kwargs=kwargs, - content_mode=content, - controller=controller, - ) - try: - async for chunk in turn_generator: - yield chunk - finally: - await turn_generator.aclose() - - turn = self.get_last_turn(role="assistant") - assert turn is not None - user_turn_result = None + from ._otel import end_span, start_agent_span - # Don't invoke tools if the stream was cancelled - if controller.cancelled: - break - - all_results: list[ContentToolResult] = [] - for x in turn.contents: - if isinstance(x, ContentToolRequest): - tool = self._tools.get(x.name) - if tool is not None: - x.tool = ToolInfo.from_tool(tool) - if echo == "output": - self._echo_content(f"\n\n{x}\n\n") - if content == "all": - yield x - results = self._invoke_tool_async(x) - async for res in results: + agent_span = start_agent_span(self.provider) + try: + user_turn_result: UserTurn | None = user_turn + while user_turn_result is not None: + turn_generator = self._submit_turns_async( + user_turn_result, + echo=echo, + stream=stream, + data_model=data_model, + kwargs=kwargs, + content_mode=content, + controller=controller, + _otel_parent=agent_span, + ) + try: + async for chunk in turn_generator: + yield chunk + finally: + await turn_generator.aclose() + + turn = self.get_last_turn(role="assistant") + assert turn is not None + user_turn_result = None + + # Don't invoke tools if the stream was cancelled + if controller.cancelled: + break + + all_results: list[ContentToolResult] = [] + for x in turn.contents: + if isinstance(x, ContentToolRequest): + tool = self._tools.get(x.name) + if tool is not None: + x.tool = ToolInfo.from_tool(tool) if echo == "output": - self._echo_content(f"\n\n{res}\n\n") + self._echo_content(f"\n\n{x}\n\n") if content == "all": - yield res - else: - yield "\n\n" - all_results.append(res) - - if all_results: - user_turn_result = UserTurn(all_results) + yield x + results = self._invoke_tool_async(x, _otel_parent=agent_span) + async for res in results: + if echo == "output": + self._echo_content(f"\n\n{res}\n\n") + if content == "all": + yield res + else: + yield "\n\n" + all_results.append(res) + + if all_results: + user_turn_result = UserTurn(all_results) + finally: + end_span(agent_span) @overload def _submit_turns( @@ -2711,6 +2725,7 @@ def _submit_turns( content_mode: Literal["text"] = "text", *, controller: StreamController, + _otel_parent: Any = None, ) -> Generator[str, None, None]: ... @overload @@ -2724,6 +2739,7 @@ def _submit_turns( *, content_mode: Literal["all"], controller: StreamController, + _otel_parent: Any = None, ) -> Generator[str | Content, None, None]: ... def _submit_turns( @@ -2734,82 +2750,105 @@ def _submit_turns( data_model: type[BaseModel] | None = None, kwargs: Optional[SubmitInputArgsT] = None, content_mode: Literal["text", "all"] = "text", + _otel_parent: Any = None, *, controller: StreamController, ) -> Generator[str | Content, None, None]: + from ._otel import end_span, record_chat_result, start_chat_span + if any(isinstance(x, Tool) and x._is_async for x in self._tools.values()): raise ValueError("Cannot use async tools in a synchronous chat") - def emit(text: str | Content): - self._echo_content(str(text)) + system_turn = ( + self._turns[0] + if self._turns and isinstance(self._turns[0], SystemTurn) + else None + ) + history = self._turns[1:] if system_turn is not None else self._turns + chat_span = start_chat_span( + self.provider, + turns=[*history, user_turn], + system_turn=system_turn, + parent=_otel_parent, + ) + try: - emit("
\n\n") + def emit(text: str | Content): + self._echo_content(str(text)) - if echo == "all": - emit_user_contents(user_turn, emit) + emit("
\n\n") - # Start collecting additional keyword args (from model parameters) - all_kwargs = self._collect_all_kwargs(kwargs) + if echo == "all": + emit_user_contents(user_turn, emit) + + # Start collecting additional keyword args (from model parameters) + all_kwargs = self._collect_all_kwargs(kwargs) + + if stream: + response = self.provider.chat_perform( + stream=True, + turns=[*self._turns, user_turn], + tools=self._tools, + data_model=data_model, + kwargs=all_kwargs, + ) - if stream: - response = self.provider.chat_perform( - stream=True, - turns=[*self._turns, user_turn], - tools=self._tools, - data_model=data_model, - kwargs=all_kwargs, - ) + acc = TurnAccumulator(self._turns, controller) + acc.begin_turn(user_turn) + + try: + result = None + for chunk in response: + if controller.cancelled: + break + content = self.provider.stream_content(chunk) + if content is not None: + text = self.provider.stream_text(chunk) + yield from acc.process_content( + content, text, content_mode, emit + ) + result = self.provider.stream_merge_chunks(result, chunk) - acc = TurnAccumulator(self._turns, controller) - acc.begin_turn(user_turn) + yield from acc.flush_thinking(content_mode, emit) - try: - result = None - for chunk in response: - if controller.cancelled: - break - content = self.provider.stream_content(chunk) - if content is not None: - text = self.provider.stream_text(chunk) - yield from acc.process_content(content, text, content_mode, emit) - result = self.provider.stream_merge_chunks(result, chunk) - - yield from acc.flush_thinking(content_mode, emit) - - if not controller.cancelled: - turn = self.provider.stream_turn( - result, - has_data_model=data_model is not None, - ) - if echo == "all": - emit_other_contents(turn, emit) - turn = finalize_assistant_turn(self.provider, turn) - acc.complete_turn(turn) - finally: - acc.finalize_turn() - close_response(response) + if not controller.cancelled: + turn = self.provider.stream_turn( + result, + has_data_model=data_model is not None, + ) + if echo == "all": + emit_other_contents(turn, emit) + turn = finalize_assistant_turn(self.provider, turn) + record_chat_result(chat_span, turn) + acc.complete_turn(turn) + finally: + acc.finalize_turn() + close_response(response) - else: - response = self.provider.chat_perform( - stream=False, - turns=[*self._turns, user_turn], - tools=self._tools, - data_model=data_model, - kwargs=all_kwargs, - ) + else: + response = self.provider.chat_perform( + stream=False, + turns=[*self._turns, user_turn], + tools=self._tools, + data_model=data_model, + kwargs=all_kwargs, + ) - turn = self.provider.value_turn( - response, has_data_model=data_model is not None - ) - if turn.text: - emit(turn.text) - yield turn.text + turn = self.provider.value_turn( + response, has_data_model=data_model is not None + ) + if turn.text: + emit(turn.text) + yield turn.text - if echo == "all": - emit_other_contents(turn, emit) + if echo == "all": + emit_other_contents(turn, emit) - turn = finalize_assistant_turn(self.provider, turn) - self._turns.extend([user_turn, turn]) + turn = finalize_assistant_turn(self.provider, turn) + record_chat_result(chat_span, turn) + self._turns.extend([user_turn, turn]) + finally: + end_span(chat_span) @overload def _submit_turns_async( @@ -2822,6 +2861,7 @@ def _submit_turns_async( content_mode: Literal["text"] = "text", *, controller: StreamController, + _otel_parent: Any = None, ) -> AsyncGenerator[str, None]: ... @overload @@ -2835,6 +2875,7 @@ def _submit_turns_async( *, content_mode: Literal["all"], controller: StreamController, + _otel_parent: Any = None, ) -> AsyncGenerator[str | Content, None]: ... async def _submit_turns_async( @@ -2845,81 +2886,104 @@ async def _submit_turns_async( data_model: type[BaseModel] | None = None, kwargs: Optional[SubmitInputArgsT] = None, content_mode: Literal["text", "all"] = "text", + _otel_parent: Any = None, *, controller: StreamController, ) -> AsyncGenerator[str | Content, None]: - def emit(text: str | Content): - self._echo_content(str(text)) - - emit("
\n\n") + from ._otel import end_span, record_chat_result, start_chat_span - if echo == "all": - emit_user_contents(user_turn, emit) + system_turn = ( + self._turns[0] + if self._turns and isinstance(self._turns[0], SystemTurn) + else None + ) + history = self._turns[1:] if system_turn is not None else self._turns + chat_span = start_chat_span( + self.provider, + turns=[*history, user_turn], + system_turn=system_turn, + parent=_otel_parent, + ) + try: - # Start collecting additional keyword args (from model parameters) - all_kwargs = self._collect_all_kwargs(kwargs) + def emit(text: str | Content): + self._echo_content(str(text)) - if stream: - response = await self.provider.chat_perform_async( - stream=True, - turns=[*self._turns, user_turn], - tools=self._tools, - data_model=data_model, - kwargs=all_kwargs, - ) + emit("
\n\n") - acc = TurnAccumulator(self._turns, controller) - acc.begin_turn(user_turn) + if echo == "all": + emit_user_contents(user_turn, emit) + + # Start collecting additional keyword args (from model parameters) + all_kwargs = self._collect_all_kwargs(kwargs) + + if stream: + response = await self.provider.chat_perform_async( + stream=True, + turns=[*self._turns, user_turn], + tools=self._tools, + data_model=data_model, + kwargs=all_kwargs, + ) - try: - result = None - async for chunk in response: - if controller.cancelled: - break - content = self.provider.stream_content(chunk) - if content is not None: - text = self.provider.stream_text(chunk) - for item in acc.process_content(content, text, content_mode, emit): - yield item - result = self.provider.stream_merge_chunks(result, chunk) - - for item in acc.flush_thinking(content_mode, emit): - yield item - - if not controller.cancelled: - turn = self.provider.stream_turn( - result, - has_data_model=data_model is not None, - ) - if echo == "all": - emit_other_contents(turn, emit) - turn = finalize_assistant_turn(self.provider, turn) - acc.complete_turn(turn) - finally: - acc.finalize_turn() - await aclose_response(response) + acc = TurnAccumulator(self._turns, controller) + acc.begin_turn(user_turn) + + try: + result = None + async for chunk in response: + if controller.cancelled: + break + content = self.provider.stream_content(chunk) + if content is not None: + text = self.provider.stream_text(chunk) + for item in acc.process_content( + content, text, content_mode, emit + ): + yield item + result = self.provider.stream_merge_chunks(result, chunk) + + for item in acc.flush_thinking(content_mode, emit): + yield item + + if not controller.cancelled: + turn = self.provider.stream_turn( + result, + has_data_model=data_model is not None, + ) + if echo == "all": + emit_other_contents(turn, emit) + turn = finalize_assistant_turn(self.provider, turn) + record_chat_result(chat_span, turn) + acc.complete_turn(turn) + finally: + acc.finalize_turn() + await aclose_response(response) - else: - response = await self.provider.chat_perform_async( - stream=False, - turns=[*self._turns, user_turn], - tools=self._tools, - data_model=data_model, - kwargs=all_kwargs, - ) + else: + response = await self.provider.chat_perform_async( + stream=False, + turns=[*self._turns, user_turn], + tools=self._tools, + data_model=data_model, + kwargs=all_kwargs, + ) - turn = self.provider.value_turn( - response, has_data_model=data_model is not None - ) - if turn.text: - emit(turn.text) - yield turn.text + turn = self.provider.value_turn( + response, has_data_model=data_model is not None + ) + if turn.text: + emit(turn.text) + yield turn.text - if echo == "all": - emit_other_contents(turn, emit) + if echo == "all": + emit_other_contents(turn, emit) - turn = finalize_assistant_turn(self.provider, turn) - self._turns.extend([user_turn, turn]) + turn = finalize_assistant_turn(self.provider, turn) + record_chat_result(chat_span, turn) + self._turns.extend([user_turn, turn]) + finally: + end_span(chat_span) def _collect_all_kwargs( self, @@ -2939,7 +3003,9 @@ def _collect_all_kwargs( return all_kwargs - def _invoke_tool(self, request: ContentToolRequest): + def _invoke_tool(self, request: ContentToolRequest, _otel_parent: Any = None): + from ._otel import end_span, record_tool_error, start_tool_span + tool = self._tools.get(request.name) if tool is None: @@ -2959,44 +3025,54 @@ def _invoke_tool(self, request: ContentToolRequest): ) return - # First, invoke the request callbacks. If a ToolRejectError is raised, - # treat it like a tool failure (i.e., gracefully handle it). - result: ContentToolResult | None = None + tool_span = start_tool_span(request, parent=_otel_parent) try: - self._on_tool_request_callbacks.invoke(request) - except ToolRejectError as e: - yield self._handle_tool_error_result(request, e) - return + # First, invoke the request callbacks. If a ToolRejectError is raised, + # treat it like a tool failure (i.e., gracefully handle it). + result: ContentToolResult | None = None + try: + self._on_tool_request_callbacks.invoke(request) + except ToolRejectError as e: + record_tool_error(tool_span, e) + yield self._handle_tool_error_result(request, e) + return - try: - if isinstance(request.arguments, dict): - res = tool.func(**request.arguments) - else: - res = tool.func(request.arguments) + try: + if isinstance(request.arguments, dict): + res = tool.func(**request.arguments) + else: + res = tool.func(request.arguments) - # Normalize res as a generator of results. - if not inspect.isgenerator(res): + # Normalize res as a generator of results. + if not inspect.isgenerator(res): - def _as_generator(res): - yield res + def _as_generator(res): + yield res - res = _as_generator(res) + res = _as_generator(res) - for x in res: - if isinstance(x, ContentToolResult): - result = x - else: - result = ContentToolResult(value=x) + for x in res: + if isinstance(x, ContentToolResult): + result = x + else: + result = ContentToolResult(value=x) - result.request = request + result.request = request - self._on_tool_result_callbacks.invoke(result) - yield result + self._on_tool_result_callbacks.invoke(result) + yield result - except Exception as e: - yield self._handle_tool_error_result(request, e) + except Exception as e: + record_tool_error(tool_span, e) + yield self._handle_tool_error_result(request, e) + finally: + end_span(tool_span) + + async def _invoke_tool_async( + self, request: ContentToolRequest, _otel_parent: Any = None + ): + from ._otel import end_span, record_tool_error, start_tool_span - async def _invoke_tool_async(self, request: ContentToolRequest): tool = self._tools.get(request.name) if tool is None: @@ -3016,47 +3092,53 @@ async def _invoke_tool_async(self, request: ContentToolRequest): ) return - # First, invoke the request callbacks. If a ToolRejectError is raised, - # treat it like a tool failure (i.e., gracefully handle it). - result: ContentToolResult | None = None + tool_span = start_tool_span(request, parent=_otel_parent) try: - await self._on_tool_request_callbacks.invoke_async(request) - except ToolRejectError as e: - yield self._handle_tool_error_result(request, e) - return - - if tool._is_async: - func = tool.func - else: - func = wrap_async(tool.func) - - # Invoke the tool (if it hasn't been rejected). - try: - if isinstance(request.arguments, dict): - res = await func(**request.arguments) + # First, invoke the request callbacks. If a ToolRejectError is raised, + # treat it like a tool failure (i.e., gracefully handle it). + result: ContentToolResult | None = None + try: + await self._on_tool_request_callbacks.invoke_async(request) + except ToolRejectError as e: + record_tool_error(tool_span, e) + yield self._handle_tool_error_result(request, e) + return + + if tool._is_async: + func = tool.func else: - res = await func(request.arguments) + func = wrap_async(tool.func) - # Normalize res into a generator of results. - if not inspect.isasyncgen(res): + # Invoke the tool (if it hasn't been rejected). + try: + if isinstance(request.arguments, dict): + res = await func(**request.arguments) + else: + res = await func(request.arguments) - async def _as_async_generator(res): - yield res + # Normalize res into a generator of results. + if not inspect.isasyncgen(res): - res = _as_async_generator(res) + async def _as_async_generator(res): + yield res - async for x in res: - if isinstance(x, ContentToolResult): - result = x - else: - result = ContentToolResult(value=x) + res = _as_async_generator(res) + + async for x in res: + if isinstance(x, ContentToolResult): + result = x + else: + result = ContentToolResult(value=x) - result.request = request - await self._on_tool_result_callbacks.invoke_async(result) - yield result + result.request = request + await self._on_tool_result_callbacks.invoke_async(result) + yield result - except Exception as e: - yield self._handle_tool_error_result(request, e) + except Exception as e: + record_tool_error(tool_span, e) + yield self._handle_tool_error_result(request, e) + finally: + end_span(tool_span) def _handle_tool_error_result(self, request: ContentToolRequest, error: Exception): warnings.warn( diff --git a/chatlas/_otel.py b/chatlas/_otel.py new file mode 100644 index 00000000..f67f447a --- /dev/null +++ b/chatlas/_otel.py @@ -0,0 +1,197 @@ +from __future__ import annotations + +import os +from typing import TYPE_CHECKING, Any, Optional + +from opentelemetry import trace +from opentelemetry.trace import SpanKind, StatusCode + +if TYPE_CHECKING: + from opentelemetry.trace import Span + + from ._content import Content, ContentToolRequest + from ._provider import Provider + from ._turn import AssistantTurn, SystemTurn, Turn + + +tracer = trace.get_tracer("com.posit.python-package.chatlas") + +capture_content: bool = os.environ.get( + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "" +).lower() in ("true", "1") + + +def start_agent_span(provider: Provider[Any, Any, Any, Any]) -> Span: + return tracer.start_span( + "invoke_agent", + kind=SpanKind.CLIENT, + attributes={ + "gen_ai.operation.name": "invoke_agent", + "gen_ai.provider.name": provider.name.lower(), + "gen_ai.request.model": provider.model, + }, + ) + + +def start_chat_span( + provider: Provider[Any, Any, Any, Any], + turns: list[Turn], + system_turn: Optional[SystemTurn], + parent: Span, +) -> Span: + ctx = trace.set_span_in_context(parent) + span = tracer.start_span( + f"chat {provider.model}", + kind=SpanKind.CLIENT, + attributes={ + "gen_ai.operation.name": "chat", + "gen_ai.provider.name": provider.name.lower(), + "gen_ai.request.model": provider.model, + }, + context=ctx, + ) + + if capture_content and span.is_recording(): + record_input_content(span, turns, system_turn) + + return span + + +def start_tool_span( + request: ContentToolRequest, + parent: Span, +) -> Span: + ctx = trace.set_span_in_context(parent) + + attrs: dict[str, Any] = { + "gen_ai.operation.name": "execute_tool", + "gen_ai.tool.name": request.name, + "gen_ai.tool.call.id": request.id, + } + if request.tool is not None and request.tool.description: + attrs["gen_ai.tool.description"] = request.tool.description + + return tracer.start_span( + f"execute_tool {request.name}", + attributes=attrs, + context=ctx, + ) + + +def record_chat_result( + span: Span, + turn: AssistantTurn[Any], +) -> None: + if not span.is_recording(): + return + + if turn.tokens is not None: + input_tokens, output_tokens, cached_input = turn.tokens + span.set_attribute("gen_ai.usage.input_tokens", input_tokens + cached_input) + span.set_attribute("gen_ai.usage.output_tokens", output_tokens) + + completion = turn.completion + if completion is not None: + response_model = getattr(completion, "model", None) or getattr( + completion, "model_version", None + ) + if response_model is not None: + span.set_attribute("gen_ai.response.model", str(response_model)) + + response_id = getattr(completion, "id", None) or getattr( + completion, "response_id", None + ) + if response_id is not None: + span.set_attribute("gen_ai.response.id", str(response_id)) + + if capture_content: + try: + msg = as_otel_message(turn) + span.set_attribute("gen_ai.output.messages", to_json([msg])) + except Exception: + pass + + span.set_status(StatusCode.OK) + + +def record_tool_error(span: Span, error: Exception) -> None: + if not span.is_recording(): + return + + span.record_exception(error) + span.set_attribute("error.type", type(error).__name__) + span.set_status(StatusCode.ERROR, str(error)) + + +def end_span(span: Span) -> None: + span.end() + + +def as_otel_message(turn: Turn) -> dict[str, Any]: + from ._content import ContentToolResult + from ._turn import UserTurn + + is_tool_turn = ( + isinstance(turn, UserTurn) + and len(turn.contents) > 0 + and all(isinstance(c, ContentToolResult) for c in turn.contents) + ) + + return { + "role": "tool" if is_tool_turn else turn.role, + "parts": [as_otel_part(c) for c in turn.contents], + } + + +def as_otel_part(content: Content) -> dict[str, Any]: + from ._content import ContentText, ContentToolRequest, ContentToolResult + + if isinstance(content, ContentText): + return {"type": "text", "content": content.text} + + if isinstance(content, ContentToolRequest): + return { + "type": "tool_call", + "id": content.id, + "name": content.name, + "arguments": content.arguments, + } + + if isinstance(content, ContentToolResult): + part: dict[str, Any] = {"type": "tool_call_response"} + if content.request is not None: + part["id"] = content.request.id + if content.error is not None: + part["response"] = str(content.error) + elif isinstance(content.value, str): + part["response"] = content.value + else: + try: + part["response"] = to_json(content.value) + except Exception: + part["response"] = str(content.value) + return part + + return {"type": "generic", "class": type(content).__name__} + + +def to_json(obj: Any) -> str: + import orjson + + return orjson.dumps(obj).decode("utf-8") + + +def record_input_content( + span: Span, + turns: list[Turn], + system_turn: Optional[SystemTurn], +) -> None: + try: + if system_turn is not None: + parts = [as_otel_part(c) for c in system_turn.contents] + span.set_attribute("gen_ai.system_instructions", to_json(parts)) + + msgs = [as_otel_message(t) for t in turns] + span.set_attribute("gen_ai.input.messages", to_json(msgs)) + except Exception: + pass diff --git a/docs/get-started/monitor.qmd b/docs/get-started/monitor.qmd index bffe8753..fa7170bb 100644 --- a/docs/get-started/monitor.qmd +++ b/docs/get-started/monitor.qmd @@ -5,14 +5,111 @@ callout-appearance: simple As mentioned in the [debugging](debug.qmd) section, chatlas has support for gaining more insight into the behavior of your application through things like [logging](debug.qmd#logging). -However, in a production setting, you may want to go beyond simple logging and use more sophisticated observability tools [Datadog](https://www.datadoghq.com/), [Logfire](https://logfire.io/), etc., to monitor your application. +However, in a production setting, you may want to go beyond simple logging and use more sophisticated observability tools like [Datadog](https://www.datadoghq.com/), [Logfire](https://logfire.pydantic.dev/), etc., to monitor your application. These tools can give you a more structured way to view and monitor app performance, including things like latency, error rates, and other metrics. These tools tend to integrate well with open standards like [OpenTelemetry](https://opentelemetry.io/) (OTel), meaning if you "instrument" your app with OTel, you can view your app's telemetry data in any observability tool that supports OTel. -There are at least a few different ways to do this, but we'll cover some of the more simpler approaches here. -## OpenLLMetry +## Framework-Level Tracing -The simplest (and most model agnostic) way to instrument your app with OTel is to leverage [openllmetry](https://github.com/traceloop/openllmetry), which can be as easy as adding the following code to your app: +Chatlas can emit OpenTelemetry spans that capture the full structure of multi-turn conversations, including tool execution: + +``` +invoke_agent # wraps the full chat loop +├── chat gpt-4o # each model API call +├── execute_tool get_weather # each tool invocation +├── chat gpt-4o # follow-up model call +└── ... +``` + +### Quick Start: Console Output + +The simplest way to see chatlas spans is to print them to the console. Install the OTel SDK: + +```bash +pip install opentelemetry-sdk +``` + +Then configure a console exporter before creating any chats: + +```python +from opentelemetry import trace +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import ConsoleSpanExporter, SimpleSpanProcessor + +provider = TracerProvider() +provider.add_span_processor(SimpleSpanProcessor(ConsoleSpanExporter())) +trace.set_tracer_provider(provider) +``` + +That's it — chatlas will automatically emit spans for every `chat()`, `stream()`, and tool invocation. + +### Production: Logfire + +For production observability, we recommend [Pydantic Logfire](https://logfire.pydantic.dev/), which provides a dashboard for exploring traces with minimal setup: + +```bash +pip install logfire +logfire auth +``` + +```python +import logfire + +logfire.configure() +``` + +Other OpenTelemetry-compatible backends (Datadog, Honeycomb, Jaeger, etc.) work too — just configure the appropriate exporter via the standard `opentelemetry-sdk` and `opentelemetry-exporter-*` packages. + +### Configuration Module Pattern + +For apps (especially Shiny apps), extract OTel setup into a dedicated module that runs before any other imports: + +```python +# otel_config.py +import logfire +logfire.configure() +``` + +```python +# app.py +import otel_config # noqa: F401 — side-effect import +from chatlas import ChatOpenAI +# ... +``` + +### What's Captured + +Each **chat span** records: + +- Token usage (`gen_ai.usage.input_tokens`, `gen_ai.usage.output_tokens`) +- Response model and ID +- Optionally: system prompt, input messages, output messages + +Each **tool span** records: + +- Tool name, description, and call ID +- Errors (if the tool raises) + +### Content Capture + +Message content is **not** captured by default (it may contain sensitive data). To enable: + +```bash +export OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true +``` + +### Combining with Provider Instrumentors + +Framework spans complement the provider-specific instrumentors described below. If you have both enabled, the provider instrumentor's HTTP spans will appear as children of chatlas's chat spans, giving you visibility at both the application and HTTP levels. + + +## Provider-Specific Instrumentors + +In addition to (or instead of) chatlas's built-in framework-level tracing, you can use provider-specific OpenTelemetry instrumentor libraries. These operate at the HTTP/SDK level and capture lower-level details about each API call. + +### OpenLLMetry {#openllmetry} + +The simplest (and most model agnostic) way to add provider-level instrumentation is to leverage [openllmetry](https://github.com/traceloop/openllmetry), which can be as easy as adding the following code to your app: ```bash pip install traceloop-sdk @@ -32,7 +129,7 @@ From here, a quick and easy way to get started visualizing your app's telemetry If you want to avoid the Traceloop Python SDK, you can also use these OTel instrumentation libraries from the openllmetry project more directly (e.g., [openai](https://github.com/traceloop/openllmetry/tree/main/packages/opentelemetry-instrumentation-openai) and [anthropic](https://github.com/traceloop/openllmetry/tree/main/packages/opentelemetry-instrumentation-anthropic)). -## OpenTelemetry +### Official OpenTelemetry Libraries To use OpenTelemetry's "official" instrumentation libraries, you'll need to first install the relevant instrumentation packages for the model providers you are using. diff --git a/pyproject.toml b/pyproject.toml index 9b9ab8d7..c180d218 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,8 @@ dependencies = [ "jinja2", "orjson", "rich", - "openai" + "openai", + "opentelemetry-api>=1.0" ] classifiers = [ "Development Status :: 4 - Beta", @@ -43,6 +44,7 @@ test = [ "syrupy>=4", "vcrpy>=6.0.0", "pytest-recording>=0.13", + "opentelemetry-sdk>=1.0", ] dev = [ "ruff>=0.6.5", @@ -64,7 +66,8 @@ dev = [ "snowflake-ml-python>=1.8.4", # torch (a dependency of snowflake-ml-python) is not yet compatible with Python >3.11 "torch;python_version<='3.11'", - "tenacity" + "tenacity", + "opentelemetry-sdk>=1.0", ] docs = [ "griffe>=1.3.2,<2.0.0", diff --git a/tests/_vcr/test_otel/test_content_capture_enabled.yaml b/tests/_vcr/test_otel/test_content_capture_enabled.yaml new file mode 100644 index 00000000..dc53c602 --- /dev/null +++ b/tests/_vcr/test_otel/test_content_capture_enabled.yaml @@ -0,0 +1,111 @@ +interactions: +- request: + body: '{"input": [{"role": "system", "content": [{"type": "input_text", "text": + "Be terse."}]}, {"role": "user", "content": [{"type": "input_text", "text": + "Say hello."}]}], "model": "gpt-4o-mini", "store": false, "stream": true}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '202' + Content-Type: + - application/json + Host: + - api.openai.com + X-Stainless-Async: + - 'false' + x-stainless-read-timeout: + - '600' + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: 'event: response.created + + data: {"type":"response.created","response":{"id":"resp_01dcaa7663ed9c7b016a038b49d7b081978b9da12d5c1a24f7","object":"response","created_at":1778617161,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0} + + + event: response.in_progress + + data: {"type":"response.in_progress","response":{"id":"resp_01dcaa7663ed9c7b016a038b49d7b081978b9da12d5c1a24f7","object":"response","created_at":1778617161,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} + + + event: response.output_item.added + + data: {"type":"response.output_item.added","item":{"id":"msg_01dcaa7663ed9c7b016a038b4a9948819791480bec3982fa7c","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":2} + + + event: response.content_part.added + + data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_01dcaa7663ed9c7b016a038b4a9948819791480bec3982fa7c","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":3} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":"Hello","item_id":"msg_01dcaa7663ed9c7b016a038b4a9948819791480bec3982fa7c","logprobs":[],"obfuscation":"pY80aBEPuLV","output_index":0,"sequence_number":4} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":"!","item_id":"msg_01dcaa7663ed9c7b016a038b4a9948819791480bec3982fa7c","logprobs":[],"obfuscation":"AluhTtUbF646oMu","output_index":0,"sequence_number":5} + + + event: response.output_text.done + + data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_01dcaa7663ed9c7b016a038b4a9948819791480bec3982fa7c","logprobs":[],"output_index":0,"sequence_number":6,"text":"Hello!"} + + + event: response.content_part.done + + data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_01dcaa7663ed9c7b016a038b4a9948819791480bec3982fa7c","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"Hello!"},"sequence_number":7} + + + event: response.output_item.done + + data: {"type":"response.output_item.done","item":{"id":"msg_01dcaa7663ed9c7b016a038b4a9948819791480bec3982fa7c","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Hello!"}],"role":"assistant"},"output_index":0,"sequence_number":8} + + + event: response.completed + + data: {"type":"response.completed","response":{"id":"resp_01dcaa7663ed9c7b016a038b49d7b081978b9da12d5c1a24f7","object":"response","created_at":1778617161,"status":"completed","background":false,"completed_at":1778617162,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","moderation":null,"output":[{"id":"msg_01dcaa7663ed9c7b016a038b4a9948819791480bec3982fa7c","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Hello!"}],"role":"assistant"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":17,"input_tokens_details":{"cached_tokens":0},"output_tokens":3,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":20},"user":null,"metadata":{}},"sequence_number":9} + + + ' + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9fac1e2cf87facdc-MSP + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Tue, 12 May 2026 20:19:21 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-processing-ms: + - '131' + openai-version: + - '2020-10-01' + status: + code: 200 + message: OK +version: 1 diff --git a/tests/_vcr/test_otel/test_content_capture_off_by_default.yaml b/tests/_vcr/test_otel/test_content_capture_off_by_default.yaml new file mode 100644 index 00000000..da523763 --- /dev/null +++ b/tests/_vcr/test_otel/test_content_capture_off_by_default.yaml @@ -0,0 +1,149 @@ +interactions: +- request: + body: '{"input": [{"role": "user", "content": [{"type": "input_text", "text": + "Say hello."}]}], "model": "gpt-4o-mini", "store": false, "stream": true}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '131' + Content-Type: + - application/json + Host: + - api.openai.com + X-Stainless-Async: + - 'false' + x-stainless-read-timeout: + - '600' + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: 'event: response.created + + data: {"type":"response.created","response":{"id":"resp_04171bfa9c19f237016a038b48edec8196a762b5f7d5361ed2","object":"response","created_at":1778617160,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0} + + + event: response.in_progress + + data: {"type":"response.in_progress","response":{"id":"resp_04171bfa9c19f237016a038b48edec8196a762b5f7d5361ed2","object":"response","created_at":1778617160,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} + + + event: response.output_item.added + + data: {"type":"response.output_item.added","item":{"id":"msg_04171bfa9c19f237016a038b49590481969cffd96137fa028e","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":2} + + + event: response.content_part.added + + data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_04171bfa9c19f237016a038b49590481969cffd96137fa028e","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":3} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":"Hello","item_id":"msg_04171bfa9c19f237016a038b49590481969cffd96137fa028e","logprobs":[],"obfuscation":"STRAZclEFyO","output_index":0,"sequence_number":4} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":"!","item_id":"msg_04171bfa9c19f237016a038b49590481969cffd96137fa028e","logprobs":[],"obfuscation":"Eq1D7a62WN7o0t0","output_index":0,"sequence_number":5} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" How","item_id":"msg_04171bfa9c19f237016a038b49590481969cffd96137fa028e","logprobs":[],"obfuscation":"28lmttDG6zKa","output_index":0,"sequence_number":6} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" can","item_id":"msg_04171bfa9c19f237016a038b49590481969cffd96137fa028e","logprobs":[],"obfuscation":"VjCOoCQOUXlY","output_index":0,"sequence_number":7} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" I","item_id":"msg_04171bfa9c19f237016a038b49590481969cffd96137fa028e","logprobs":[],"obfuscation":"bvZ3AVBCzs4mAx","output_index":0,"sequence_number":8} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" assist","item_id":"msg_04171bfa9c19f237016a038b49590481969cffd96137fa028e","logprobs":[],"obfuscation":"HylOQ36Tf","output_index":0,"sequence_number":9} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" you","item_id":"msg_04171bfa9c19f237016a038b49590481969cffd96137fa028e","logprobs":[],"obfuscation":"5Vj05rykRNi7","output_index":0,"sequence_number":10} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" today","item_id":"msg_04171bfa9c19f237016a038b49590481969cffd96137fa028e","logprobs":[],"obfuscation":"J8c3J9RGwi","output_index":0,"sequence_number":11} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":"?","item_id":"msg_04171bfa9c19f237016a038b49590481969cffd96137fa028e","logprobs":[],"obfuscation":"rcwYTs66B9Jrp0f","output_index":0,"sequence_number":12} + + + event: response.output_text.done + + data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_04171bfa9c19f237016a038b49590481969cffd96137fa028e","logprobs":[],"output_index":0,"sequence_number":13,"text":"Hello! + How can I assist you today?"} + + + event: response.content_part.done + + data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_04171bfa9c19f237016a038b49590481969cffd96137fa028e","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"Hello! + How can I assist you today?"},"sequence_number":14} + + + event: response.output_item.done + + data: {"type":"response.output_item.done","item":{"id":"msg_04171bfa9c19f237016a038b49590481969cffd96137fa028e","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Hello! + How can I assist you today?"}],"role":"assistant"},"output_index":0,"sequence_number":15} + + + event: response.completed + + data: {"type":"response.completed","response":{"id":"resp_04171bfa9c19f237016a038b48edec8196a762b5f7d5361ed2","object":"response","created_at":1778617160,"status":"completed","background":false,"completed_at":1778617161,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","moderation":null,"output":[{"id":"msg_04171bfa9c19f237016a038b49590481969cffd96137fa028e","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Hello! + How can I assist you today?"}],"role":"assistant"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":10,"input_tokens_details":{"cached_tokens":0},"output_tokens":10,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":20},"user":null,"metadata":{}},"sequence_number":16} + + + ' + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9fac1e271dfe511a-MSP + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Tue, 12 May 2026 20:19:21 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-processing-ms: + - '157' + openai-version: + - '2020-10-01' + status: + code: 200 + message: OK +version: 1 diff --git a/tests/_vcr/test_otel/test_span_hierarchy_with_tools.yaml b/tests/_vcr/test_otel/test_span_hierarchy_with_tools.yaml new file mode 100644 index 00000000..f39bf805 --- /dev/null +++ b/tests/_vcr/test_otel/test_span_hierarchy_with_tools.yaml @@ -0,0 +1,271 @@ +interactions: +- request: + body: '{"input": [{"role": "system", "content": [{"type": "input_text", "text": + "Always use the get_date tool to answer questions about the date."}]}, {"role": + "user", "content": [{"type": "input_text", "text": "What is today''s date?"}]}], + "model": "gpt-4o-mini", "store": false, "stream": true, "tools": [{"type": "function", + "name": "get_date", "description": "Return the current date", "parameters": + {"properties": {}, "type": "object", "additionalProperties": false, "required": + []}, "strict": true}]}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '460' + Content-Type: + - application/json + Host: + - api.openai.com + X-Stainless-Async: + - 'false' + x-stainless-read-timeout: + - '600' + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: 'event: response.created + + data: {"type":"response.created","response":{"id":"resp_0178fd92505350bf016a038b45068881948bdd5cfa9d2ff088","object":"response","created_at":1778617157,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Return + the current date","name":"get_date","parameters":{"properties":{},"type":"object","additionalProperties":false,"required":[]},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0} + + + event: response.in_progress + + data: {"type":"response.in_progress","response":{"id":"resp_0178fd92505350bf016a038b45068881948bdd5cfa9d2ff088","object":"response","created_at":1778617157,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Return + the current date","name":"get_date","parameters":{"properties":{},"type":"object","additionalProperties":false,"required":[]},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} + + + event: response.output_item.added + + data: {"type":"response.output_item.added","item":{"id":"fc_0178fd92505350bf016a038b4646348194b3c5853d39913b97","type":"function_call","status":"in_progress","arguments":"","call_id":"call_SYeKC74NjV5qSZ1mrgCFU6Xh","name":"get_date"},"output_index":0,"sequence_number":2} + + + event: response.function_call_arguments.delta + + data: {"type":"response.function_call_arguments.delta","delta":"{}","item_id":"fc_0178fd92505350bf016a038b4646348194b3c5853d39913b97","obfuscation":"MzyDvaK6muJ1l8","output_index":0,"sequence_number":3} + + + event: response.function_call_arguments.done + + data: {"type":"response.function_call_arguments.done","arguments":"{}","item_id":"fc_0178fd92505350bf016a038b4646348194b3c5853d39913b97","output_index":0,"sequence_number":4} + + + event: response.output_item.done + + data: {"type":"response.output_item.done","item":{"id":"fc_0178fd92505350bf016a038b4646348194b3c5853d39913b97","type":"function_call","status":"completed","arguments":"{}","call_id":"call_SYeKC74NjV5qSZ1mrgCFU6Xh","name":"get_date"},"output_index":0,"sequence_number":5} + + + event: response.completed + + data: {"type":"response.completed","response":{"id":"resp_0178fd92505350bf016a038b45068881948bdd5cfa9d2ff088","object":"response","created_at":1778617157,"status":"completed","background":false,"completed_at":1778617158,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","moderation":null,"output":[{"id":"fc_0178fd92505350bf016a038b4646348194b3c5853d39913b97","type":"function_call","status":"completed","arguments":"{}","call_id":"call_SYeKC74NjV5qSZ1mrgCFU6Xh","name":"get_date"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Return + the current date","name":"get_date","parameters":{"properties":{},"type":"object","additionalProperties":false,"required":[]},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":52,"input_tokens_details":{"cached_tokens":0},"output_tokens":11,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":63},"user":null,"metadata":{}},"sequence_number":6} + + + ' + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9fac1e0acea84c9e-MSP + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Tue, 12 May 2026 20:19:17 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-processing-ms: + - '281' + openai-version: + - '2020-10-01' + status: + code: 200 + message: OK +- request: + body: '{"input": [{"role": "system", "content": [{"type": "input_text", "text": + "Always use the get_date tool to answer questions about the date."}]}, {"role": + "user", "content": [{"type": "input_text", "text": "What is today''s date?"}]}, + {"type": "function_call", "call_id": "fc_0178fd92505350bf016a038b4646348194b3c5853d39913b97", + "name": "get_date", "arguments": "{}"}, {"type": "function_call_output", "call_id": + "fc_0178fd92505350bf016a038b4646348194b3c5853d39913b97", "output": "2026-05-12"}], + "model": "gpt-4o-mini", "store": false, "stream": true, "tools": [{"type": "function", + "name": "get_date", "description": "Return the current date", "parameters": + {"properties": {}, "type": "object", "additionalProperties": false, "required": + []}, "strict": true}]}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '706' + Content-Type: + - application/json + Host: + - api.openai.com + X-Stainless-Async: + - 'false' + x-stainless-read-timeout: + - '600' + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: 'event: response.created + + data: {"type":"response.created","response":{"id":"resp_016561209ff2495e016a038b46d49c819fa8415e118d9e080b","object":"response","created_at":1778617158,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Return + the current date","name":"get_date","parameters":{"properties":{},"type":"object","additionalProperties":false,"required":[]},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0} + + + event: response.in_progress + + data: {"type":"response.in_progress","response":{"id":"resp_016561209ff2495e016a038b46d49c819fa8415e118d9e080b","object":"response","created_at":1778617158,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Return + the current date","name":"get_date","parameters":{"properties":{},"type":"object","additionalProperties":false,"required":[]},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} + + + event: response.output_item.added + + data: {"type":"response.output_item.added","item":{"id":"msg_016561209ff2495e016a038b475b30819fbbf40ac9bb1a9670","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":2} + + + event: response.content_part.added + + data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_016561209ff2495e016a038b475b30819fbbf40ac9bb1a9670","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":3} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":"Today''s","item_id":"msg_016561209ff2495e016a038b475b30819fbbf40ac9bb1a9670","logprobs":[],"obfuscation":"XBGbVr60A","output_index":0,"sequence_number":4} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" date","item_id":"msg_016561209ff2495e016a038b475b30819fbbf40ac9bb1a9670","logprobs":[],"obfuscation":"hufd6GVoGUi","output_index":0,"sequence_number":5} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" is","item_id":"msg_016561209ff2495e016a038b475b30819fbbf40ac9bb1a9670","logprobs":[],"obfuscation":"anh3qrRO9rO19","output_index":0,"sequence_number":6} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" May","item_id":"msg_016561209ff2495e016a038b475b30819fbbf40ac9bb1a9670","logprobs":[],"obfuscation":"uruiS2eWhDs7","output_index":0,"sequence_number":7} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" ","item_id":"msg_016561209ff2495e016a038b475b30819fbbf40ac9bb1a9670","logprobs":[],"obfuscation":"8uMjiiq6EPxahnO","output_index":0,"sequence_number":8} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":"12","item_id":"msg_016561209ff2495e016a038b475b30819fbbf40ac9bb1a9670","logprobs":[],"obfuscation":"gOm7VfIkcnfzlF","output_index":0,"sequence_number":9} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":",","item_id":"msg_016561209ff2495e016a038b475b30819fbbf40ac9bb1a9670","logprobs":[],"obfuscation":"gsgAh38lRuq1f4u","output_index":0,"sequence_number":10} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" ","item_id":"msg_016561209ff2495e016a038b475b30819fbbf40ac9bb1a9670","logprobs":[],"obfuscation":"SEUG83vKDokH5kv","output_index":0,"sequence_number":11} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":"202","item_id":"msg_016561209ff2495e016a038b475b30819fbbf40ac9bb1a9670","logprobs":[],"obfuscation":"DULCuUWwN130Y","output_index":0,"sequence_number":12} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":"6","item_id":"msg_016561209ff2495e016a038b475b30819fbbf40ac9bb1a9670","logprobs":[],"obfuscation":"GV45AS53EQ1NcGb","output_index":0,"sequence_number":13} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":".","item_id":"msg_016561209ff2495e016a038b475b30819fbbf40ac9bb1a9670","logprobs":[],"obfuscation":"NHEiDA50h3gEpxS","output_index":0,"sequence_number":14} + + + event: response.output_text.done + + data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_016561209ff2495e016a038b475b30819fbbf40ac9bb1a9670","logprobs":[],"output_index":0,"sequence_number":15,"text":"Today''s + date is May 12, 2026."} + + + event: response.content_part.done + + data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_016561209ff2495e016a038b475b30819fbbf40ac9bb1a9670","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"Today''s + date is May 12, 2026."},"sequence_number":16} + + + event: response.output_item.done + + data: {"type":"response.output_item.done","item":{"id":"msg_016561209ff2495e016a038b475b30819fbbf40ac9bb1a9670","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Today''s + date is May 12, 2026."}],"role":"assistant"},"output_index":0,"sequence_number":17} + + + event: response.completed + + data: {"type":"response.completed","response":{"id":"resp_016561209ff2495e016a038b46d49c819fa8415e118d9e080b","object":"response","created_at":1778617158,"status":"completed","background":false,"completed_at":1778617159,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","moderation":null,"output":[{"id":"msg_016561209ff2495e016a038b475b30819fbbf40ac9bb1a9670","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Today''s + date is May 12, 2026."}],"role":"assistant"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Return + the current date","name":"get_date","parameters":{"properties":{},"type":"object","additionalProperties":false,"required":[]},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":74,"input_tokens_details":{"cached_tokens":0},"output_tokens":13,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":87},"user":null,"metadata":{}},"sequence_number":18} + + + ' + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9fac1e1a1affacc9-MSP + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Tue, 12 May 2026 20:19:19 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-processing-ms: + - '193' + openai-version: + - '2020-10-01' + status: + code: 200 + message: OK +version: 1 diff --git a/tests/_vcr/test_otel/test_streaming_span_lifecycle.yaml b/tests/_vcr/test_otel/test_streaming_span_lifecycle.yaml new file mode 100644 index 00000000..3d2f3f39 --- /dev/null +++ b/tests/_vcr/test_otel/test_streaming_span_lifecycle.yaml @@ -0,0 +1,149 @@ +interactions: +- request: + body: '{"input": [{"role": "user", "content": [{"type": "input_text", "text": + "Say hello."}]}], "model": "gpt-4o-mini", "store": false, "stream": true}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '131' + Content-Type: + - application/json + Host: + - api.openai.com + X-Stainless-Async: + - 'false' + x-stainless-read-timeout: + - '600' + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: 'event: response.created + + data: {"type":"response.created","response":{"id":"resp_0f5666f658f87b93016a038b4e7678819d9983b09c32aa61a6","object":"response","created_at":1778617166,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0} + + + event: response.in_progress + + data: {"type":"response.in_progress","response":{"id":"resp_0f5666f658f87b93016a038b4e7678819d9983b09c32aa61a6","object":"response","created_at":1778617166,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} + + + event: response.output_item.added + + data: {"type":"response.output_item.added","item":{"id":"msg_0f5666f658f87b93016a038b504320819da578a58f67ea87ba","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":2} + + + event: response.content_part.added + + data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_0f5666f658f87b93016a038b504320819da578a58f67ea87ba","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":3} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":"Hello","item_id":"msg_0f5666f658f87b93016a038b504320819da578a58f67ea87ba","logprobs":[],"obfuscation":"S7BXdc8CQKB","output_index":0,"sequence_number":4} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":"!","item_id":"msg_0f5666f658f87b93016a038b504320819da578a58f67ea87ba","logprobs":[],"obfuscation":"6HXXDD9qRcJn8aU","output_index":0,"sequence_number":5} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" How","item_id":"msg_0f5666f658f87b93016a038b504320819da578a58f67ea87ba","logprobs":[],"obfuscation":"NRxbg9QUoQHm","output_index":0,"sequence_number":6} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" can","item_id":"msg_0f5666f658f87b93016a038b504320819da578a58f67ea87ba","logprobs":[],"obfuscation":"rFv3UOraBC58","output_index":0,"sequence_number":7} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" I","item_id":"msg_0f5666f658f87b93016a038b504320819da578a58f67ea87ba","logprobs":[],"obfuscation":"IiYggymehLkuvT","output_index":0,"sequence_number":8} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" assist","item_id":"msg_0f5666f658f87b93016a038b504320819da578a58f67ea87ba","logprobs":[],"obfuscation":"EByPAkYvh","output_index":0,"sequence_number":9} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" you","item_id":"msg_0f5666f658f87b93016a038b504320819da578a58f67ea87ba","logprobs":[],"obfuscation":"SydVVchyJrP5","output_index":0,"sequence_number":10} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" today","item_id":"msg_0f5666f658f87b93016a038b504320819da578a58f67ea87ba","logprobs":[],"obfuscation":"JBkA4qmLSf","output_index":0,"sequence_number":11} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":"?","item_id":"msg_0f5666f658f87b93016a038b504320819da578a58f67ea87ba","logprobs":[],"obfuscation":"J7cpMBcPyuNWjb0","output_index":0,"sequence_number":12} + + + event: response.output_text.done + + data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_0f5666f658f87b93016a038b504320819da578a58f67ea87ba","logprobs":[],"output_index":0,"sequence_number":13,"text":"Hello! + How can I assist you today?"} + + + event: response.content_part.done + + data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_0f5666f658f87b93016a038b504320819da578a58f67ea87ba","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"Hello! + How can I assist you today?"},"sequence_number":14} + + + event: response.output_item.done + + data: {"type":"response.output_item.done","item":{"id":"msg_0f5666f658f87b93016a038b504320819da578a58f67ea87ba","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Hello! + How can I assist you today?"}],"role":"assistant"},"output_index":0,"sequence_number":15} + + + event: response.completed + + data: {"type":"response.completed","response":{"id":"resp_0f5666f658f87b93016a038b4e7678819d9983b09c32aa61a6","object":"response","created_at":1778617166,"status":"completed","background":false,"completed_at":1778617168,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","moderation":null,"output":[{"id":"msg_0f5666f658f87b93016a038b504320819da578a58f67ea87ba","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Hello! + How can I assist you today?"}],"role":"assistant"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":10,"input_tokens_details":{"cached_tokens":0},"output_tokens":10,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":20},"user":null,"metadata":{}},"sequence_number":16} + + + ' + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9fac1e49aa98acea-MSP + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Tue, 12 May 2026 20:19:26 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-processing-ms: + - '154' + openai-version: + - '2020-10-01' + status: + code: 200 + message: OK +version: 1 diff --git a/tests/_vcr/test_otel/test_token_usage_recorded.yaml b/tests/_vcr/test_otel/test_token_usage_recorded.yaml new file mode 100644 index 00000000..594a1dc1 --- /dev/null +++ b/tests/_vcr/test_otel/test_token_usage_recorded.yaml @@ -0,0 +1,149 @@ +interactions: +- request: + body: '{"input": [{"role": "user", "content": [{"type": "input_text", "text": + "Say hello."}]}], "model": "gpt-4o-mini", "store": false, "stream": true}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '131' + Content-Type: + - application/json + Host: + - api.openai.com + X-Stainless-Async: + - 'false' + x-stainless-read-timeout: + - '600' + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: 'event: response.created + + data: {"type":"response.created","response":{"id":"resp_02336e1e704f9374016a038b481fb88196acbba53888345668","object":"response","created_at":1778617160,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0} + + + event: response.in_progress + + data: {"type":"response.in_progress","response":{"id":"resp_02336e1e704f9374016a038b481fb88196acbba53888345668","object":"response","created_at":1778617160,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} + + + event: response.output_item.added + + data: {"type":"response.output_item.added","item":{"id":"msg_02336e1e704f9374016a038b4873dc8196ae70aebd80a30540","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":2} + + + event: response.content_part.added + + data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_02336e1e704f9374016a038b4873dc8196ae70aebd80a30540","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":3} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":"Hello","item_id":"msg_02336e1e704f9374016a038b4873dc8196ae70aebd80a30540","logprobs":[],"obfuscation":"bYBxFnuljlc","output_index":0,"sequence_number":4} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":"!","item_id":"msg_02336e1e704f9374016a038b4873dc8196ae70aebd80a30540","logprobs":[],"obfuscation":"dkyifaAVe7gNxS9","output_index":0,"sequence_number":5} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" How","item_id":"msg_02336e1e704f9374016a038b4873dc8196ae70aebd80a30540","logprobs":[],"obfuscation":"zvCMVAfX43DL","output_index":0,"sequence_number":6} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" can","item_id":"msg_02336e1e704f9374016a038b4873dc8196ae70aebd80a30540","logprobs":[],"obfuscation":"XUU832qcWgZT","output_index":0,"sequence_number":7} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" I","item_id":"msg_02336e1e704f9374016a038b4873dc8196ae70aebd80a30540","logprobs":[],"obfuscation":"agA6ckcGOsKrMS","output_index":0,"sequence_number":8} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" assist","item_id":"msg_02336e1e704f9374016a038b4873dc8196ae70aebd80a30540","logprobs":[],"obfuscation":"QZidHeGlo","output_index":0,"sequence_number":9} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" you","item_id":"msg_02336e1e704f9374016a038b4873dc8196ae70aebd80a30540","logprobs":[],"obfuscation":"jdjWoscOAv9g","output_index":0,"sequence_number":10} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" today","item_id":"msg_02336e1e704f9374016a038b4873dc8196ae70aebd80a30540","logprobs":[],"obfuscation":"JtsTBbOX4E","output_index":0,"sequence_number":11} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":"?","item_id":"msg_02336e1e704f9374016a038b4873dc8196ae70aebd80a30540","logprobs":[],"obfuscation":"yW61oPXP0nQrktC","output_index":0,"sequence_number":12} + + + event: response.output_text.done + + data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_02336e1e704f9374016a038b4873dc8196ae70aebd80a30540","logprobs":[],"output_index":0,"sequence_number":13,"text":"Hello! + How can I assist you today?"} + + + event: response.content_part.done + + data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_02336e1e704f9374016a038b4873dc8196ae70aebd80a30540","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"Hello! + How can I assist you today?"},"sequence_number":14} + + + event: response.output_item.done + + data: {"type":"response.output_item.done","item":{"id":"msg_02336e1e704f9374016a038b4873dc8196ae70aebd80a30540","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Hello! + How can I assist you today?"}],"role":"assistant"},"output_index":0,"sequence_number":15} + + + event: response.completed + + data: {"type":"response.completed","response":{"id":"resp_02336e1e704f9374016a038b481fb88196acbba53888345668","object":"response","created_at":1778617160,"status":"completed","background":false,"completed_at":1778617160,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","moderation":null,"output":[{"id":"msg_02336e1e704f9374016a038b4873dc8196ae70aebd80a30540","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"Hello! + How can I assist you today?"}],"role":"assistant"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":10,"input_tokens_details":{"cached_tokens":0},"output_tokens":10,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":20},"user":null,"metadata":{}},"sequence_number":16} + + + ' + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9fac1e2018c6ace1-MSP + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Tue, 12 May 2026 20:19:20 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-processing-ms: + - '113' + openai-version: + - '2020-10-01' + status: + code: 200 + message: OK +version: 1 diff --git a/tests/_vcr/test_otel/test_tool_error_recorded.yaml b/tests/_vcr/test_otel/test_tool_error_recorded.yaml new file mode 100644 index 00000000..c0745125 --- /dev/null +++ b/tests/_vcr/test_otel/test_tool_error_recorded.yaml @@ -0,0 +1,286 @@ +interactions: +- request: + body: '{"input": [{"role": "system", "content": [{"type": "input_text", "text": + "Always use the fail_tool to answer. Don''t retry if it errors."}]}, {"role": + "user", "content": [{"type": "input_text", "text": "Please call the fail_tool."}]}], + "model": "gpt-4o-mini", "store": false, "stream": true, "tools": [{"type": "function", + "name": "fail_tool", "description": "A tool that always fails", "parameters": + {"properties": {}, "type": "object", "additionalProperties": false, "required": + []}, "strict": true}]}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '464' + Content-Type: + - application/json + Host: + - api.openai.com + X-Stainless-Async: + - 'false' + x-stainless-read-timeout: + - '600' + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: 'event: response.created + + data: {"type":"response.created","response":{"id":"resp_0c4c334dab0d2a62016a038b4b029c81a29ca6f8fab42c4cb9","object":"response","created_at":1778617163,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"A + tool that always fails","name":"fail_tool","parameters":{"properties":{},"type":"object","additionalProperties":false,"required":[]},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0} + + + event: response.in_progress + + data: {"type":"response.in_progress","response":{"id":"resp_0c4c334dab0d2a62016a038b4b029c81a29ca6f8fab42c4cb9","object":"response","created_at":1778617163,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"A + tool that always fails","name":"fail_tool","parameters":{"properties":{},"type":"object","additionalProperties":false,"required":[]},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} + + + event: response.output_item.added + + data: {"type":"response.output_item.added","item":{"id":"fc_0c4c334dab0d2a62016a038b4bbf7c81a2b16db917b4f4ca34","type":"function_call","status":"in_progress","arguments":"","call_id":"call_WbQo0WgzkowanWgKwNFKhLQ2","name":"fail_tool"},"output_index":0,"sequence_number":2} + + + event: response.function_call_arguments.delta + + data: {"type":"response.function_call_arguments.delta","delta":"{}","item_id":"fc_0c4c334dab0d2a62016a038b4bbf7c81a2b16db917b4f4ca34","obfuscation":"ALov6E6edPT9bD","output_index":0,"sequence_number":3} + + + event: response.function_call_arguments.done + + data: {"type":"response.function_call_arguments.done","arguments":"{}","item_id":"fc_0c4c334dab0d2a62016a038b4bbf7c81a2b16db917b4f4ca34","output_index":0,"sequence_number":4} + + + event: response.output_item.done + + data: {"type":"response.output_item.done","item":{"id":"fc_0c4c334dab0d2a62016a038b4bbf7c81a2b16db917b4f4ca34","type":"function_call","status":"completed","arguments":"{}","call_id":"call_WbQo0WgzkowanWgKwNFKhLQ2","name":"fail_tool"},"output_index":0,"sequence_number":5} + + + event: response.completed + + data: {"type":"response.completed","response":{"id":"resp_0c4c334dab0d2a62016a038b4b029c81a29ca6f8fab42c4cb9","object":"response","created_at":1778617163,"status":"completed","background":false,"completed_at":1778617163,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","moderation":null,"output":[{"id":"fc_0c4c334dab0d2a62016a038b4bbf7c81a2b16db917b4f4ca34","type":"function_call","status":"completed","arguments":"{}","call_id":"call_WbQo0WgzkowanWgKwNFKhLQ2","name":"fail_tool"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"A + tool that always fails","name":"fail_tool","parameters":{"properties":{},"type":"object","additionalProperties":false,"required":[]},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":55,"input_tokens_details":{"cached_tokens":0},"output_tokens":11,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":66},"user":null,"metadata":{}},"sequence_number":6} + + + ' + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9fac1e346dbe511a-MSP + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Tue, 12 May 2026 20:19:23 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-processing-ms: + - '384' + openai-version: + - '2020-10-01' + status: + code: 200 + message: OK +- request: + body: '{"input": [{"role": "system", "content": [{"type": "input_text", "text": + "Always use the fail_tool to answer. Don''t retry if it errors."}]}, {"role": + "user", "content": [{"type": "input_text", "text": "Please call the fail_tool."}]}, + {"type": "function_call", "call_id": "fc_0c4c334dab0d2a62016a038b4bbf7c81a2b16db917b4f4ca34", + "name": "fail_tool", "arguments": "{}"}, {"type": "function_call_output", "call_id": + "fc_0c4c334dab0d2a62016a038b4bbf7c81a2b16db917b4f4ca34", "output": "Tool call + failed with error: ''intentional test error''"}], "model": "gpt-4o-mini", "store": + false, "stream": true, "tools": [{"type": "function", "name": "fail_tool", "description": + "A tool that always fails", "parameters": {"properties": {}, "type": "object", + "additionalProperties": false, "required": []}, "strict": true}]}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '754' + Content-Type: + - application/json + Host: + - api.openai.com + X-Stainless-Async: + - 'false' + x-stainless-read-timeout: + - '600' + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: 'event: response.created + + data: {"type":"response.created","response":{"id":"resp_0b2f0c3ab1d5cf5b016a038b4c10248191add5178da84f4073","object":"response","created_at":1778617164,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"A + tool that always fails","name":"fail_tool","parameters":{"properties":{},"type":"object","additionalProperties":false,"required":[]},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0} + + + event: response.in_progress + + data: {"type":"response.in_progress","response":{"id":"resp_0b2f0c3ab1d5cf5b016a038b4c10248191add5178da84f4073","object":"response","created_at":1778617164,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"A + tool that always fails","name":"fail_tool","parameters":{"properties":{},"type":"object","additionalProperties":false,"required":[]},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} + + + event: response.output_item.added + + data: {"type":"response.output_item.added","item":{"id":"msg_0b2f0c3ab1d5cf5b016a038b4dba188191bd41ca069d753f0c","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":2} + + + event: response.content_part.added + + data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_0b2f0c3ab1d5cf5b016a038b4dba188191bd41ca069d753f0c","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":3} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":"The","item_id":"msg_0b2f0c3ab1d5cf5b016a038b4dba188191bd41ca069d753f0c","logprobs":[],"obfuscation":"QASPq1ZiU20jp","output_index":0,"sequence_number":4} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" tool","item_id":"msg_0b2f0c3ab1d5cf5b016a038b4dba188191bd41ca069d753f0c","logprobs":[],"obfuscation":"i1Zoh0leQLo","output_index":0,"sequence_number":5} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" call","item_id":"msg_0b2f0c3ab1d5cf5b016a038b4dba188191bd41ca069d753f0c","logprobs":[],"obfuscation":"taVTtDNKPw9","output_index":0,"sequence_number":6} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" resulted","item_id":"msg_0b2f0c3ab1d5cf5b016a038b4dba188191bd41ca069d753f0c","logprobs":[],"obfuscation":"m2L5G98","output_index":0,"sequence_number":7} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" in","item_id":"msg_0b2f0c3ab1d5cf5b016a038b4dba188191bd41ca069d753f0c","logprobs":[],"obfuscation":"CkkVWsofQWudO","output_index":0,"sequence_number":8} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" an","item_id":"msg_0b2f0c3ab1d5cf5b016a038b4dba188191bd41ca069d753f0c","logprobs":[],"obfuscation":"SMF32SpLYdFwH","output_index":0,"sequence_number":9} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" error","item_id":"msg_0b2f0c3ab1d5cf5b016a038b4dba188191bd41ca069d753f0c","logprobs":[],"obfuscation":"jWR4o9KOaE","output_index":0,"sequence_number":10} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":":","item_id":"msg_0b2f0c3ab1d5cf5b016a038b4dba188191bd41ca069d753f0c","logprobs":[],"obfuscation":"SvuiEjlwmfPqzhx","output_index":0,"sequence_number":11} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" \"","item_id":"msg_0b2f0c3ab1d5cf5b016a038b4dba188191bd41ca069d753f0c","logprobs":[],"obfuscation":"EpOJqtTqh5YbjT","output_index":0,"sequence_number":12} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":"intent","item_id":"msg_0b2f0c3ab1d5cf5b016a038b4dba188191bd41ca069d753f0c","logprobs":[],"obfuscation":"YiB8la8Rsh","output_index":0,"sequence_number":13} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":"ional","item_id":"msg_0b2f0c3ab1d5cf5b016a038b4dba188191bd41ca069d753f0c","logprobs":[],"obfuscation":"ML9XxoUNtZf","output_index":0,"sequence_number":14} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" test","item_id":"msg_0b2f0c3ab1d5cf5b016a038b4dba188191bd41ca069d753f0c","logprobs":[],"obfuscation":"HgHEpCa4WYB","output_index":0,"sequence_number":15} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":" error","item_id":"msg_0b2f0c3ab1d5cf5b016a038b4dba188191bd41ca069d753f0c","logprobs":[],"obfuscation":"Tojca2FUmK","output_index":0,"sequence_number":16} + + + event: response.output_text.delta + + data: {"type":"response.output_text.delta","content_index":0,"delta":".\"","item_id":"msg_0b2f0c3ab1d5cf5b016a038b4dba188191bd41ca069d753f0c","logprobs":[],"obfuscation":"kzIHfW6ncfiYvD","output_index":0,"sequence_number":17} + + + event: response.output_text.done + + data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_0b2f0c3ab1d5cf5b016a038b4dba188191bd41ca069d753f0c","logprobs":[],"output_index":0,"sequence_number":18,"text":"The + tool call resulted in an error: \"intentional test error.\""} + + + event: response.content_part.done + + data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_0b2f0c3ab1d5cf5b016a038b4dba188191bd41ca069d753f0c","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"The + tool call resulted in an error: \"intentional test error.\""},"sequence_number":19} + + + event: response.output_item.done + + data: {"type":"response.output_item.done","item":{"id":"msg_0b2f0c3ab1d5cf5b016a038b4dba188191bd41ca069d753f0c","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"The + tool call resulted in an error: \"intentional test error.\""}],"role":"assistant"},"output_index":0,"sequence_number":20} + + + event: response.completed + + data: {"type":"response.completed","response":{"id":"resp_0b2f0c3ab1d5cf5b016a038b4c10248191add5178da84f4073","object":"response","created_at":1778617164,"status":"completed","background":false,"completed_at":1778617166,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","moderation":null,"output":[{"id":"msg_0b2f0c3ab1d5cf5b016a038b4dba188191bd41ca069d753f0c","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"The + tool call resulted in an error: \"intentional test error.\""}],"role":"assistant"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":false,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"A + tool that always fails","name":"fail_tool","parameters":{"properties":{},"type":"object","additionalProperties":false,"required":[]},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":83,"input_tokens_details":{"cached_tokens":0},"output_tokens":16,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":99},"user":null,"metadata":{}},"sequence_number":21} + + + ' + headers: + Access-Control-Expose-Headers: + - CF-Ray + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9fac1e3ab93ea203-MSP + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Tue, 12 May 2026 20:19:24 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + - CF-Ray + alt-svc: + - h3=":443"; ma=86400 + openai-processing-ms: + - '331' + openai-version: + - '2020-10-01' + status: + code: 200 + message: OK +version: 1 diff --git a/tests/test_otel.py b/tests/test_otel.py new file mode 100644 index 00000000..b41ea415 --- /dev/null +++ b/tests/test_otel.py @@ -0,0 +1,240 @@ +from __future__ import annotations + +import json +from typing import cast + +import pytest + +pytest.importorskip("opentelemetry.sdk") + +from chatlas import ChatOpenAI, _otel +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter + + +@pytest.fixture(autouse=True) +def otel_setup(): + exporter = InMemorySpanExporter() + provider = TracerProvider() + provider.add_span_processor(SimpleSpanProcessor(exporter)) + + orig_tracer = _otel.tracer + _otel.tracer = provider.get_tracer("com.posit.python-package.chatlas") + + yield exporter + + exporter.clear() + _otel.tracer = orig_tracer + + +@pytest.mark.vcr +def test_span_hierarchy_with_tools(otel_setup: InMemorySpanExporter): + chat = ChatOpenAI( + model="gpt-4o-mini", + system_prompt="Always use the get_date tool to answer questions about the date.", + ) + + def get_date() -> str: + "Return the current date" + return "2026-05-12" + + chat.register_tool(get_date) + chat.chat("What is today's date?") + + spans = otel_setup.get_finished_spans() + span_names = [s.name for s in spans] + + agent_spans = [s for s in spans if s.name == "invoke_agent"] + chat_spans = [s for s in spans if s.name.startswith("chat ")] + tool_spans = [s for s in spans if s.name.startswith("execute_tool ")] + + assert len(agent_spans) == 1, ( + f"Expected 1 invoke_agent span, got {len(agent_spans)}. Spans: {span_names}" + ) + assert len(chat_spans) >= 2, ( + f"Expected >=2 chat spans, got {len(chat_spans)}. Spans: {span_names}" + ) + assert len(tool_spans) >= 1, ( + f"Expected >=1 execute_tool span, got {len(tool_spans)}. Spans: {span_names}" + ) + + agent_span = agent_spans[0] + from opentelemetry.trace import SpanContext + + assert agent_span.context is not None + agent_ctx = cast(SpanContext, agent_span.context) + + for s in chat_spans: + assert s.parent is not None, f"Chat span {s.name!r} has no parent" + parent = cast(SpanContext, s.parent) + assert parent.span_id == agent_ctx.span_id, ( + f"Chat span {s.name!r} parent span_id mismatch" + ) + for s in tool_spans: + assert s.parent is not None, f"Tool span {s.name!r} has no parent" + parent = cast(SpanContext, s.parent) + assert parent.span_id == agent_ctx.span_id, ( + f"Tool span {s.name!r} parent span_id mismatch" + ) + + +@pytest.mark.vcr +def test_token_usage_recorded(otel_setup: InMemorySpanExporter): + chat = ChatOpenAI(model="gpt-4o-mini") + chat.chat("Say hello.") + + spans = otel_setup.get_finished_spans() + chat_spans = [s for s in spans if s.name.startswith("chat ")] + assert len(chat_spans) >= 1 + + # Check the last chat span (which has usage info from the final response) + chat_span = chat_spans[-1] + attrs = chat_span.attributes or {} + input_tokens = attrs.get("gen_ai.usage.input_tokens") + output_tokens = attrs.get("gen_ai.usage.output_tokens") + assert isinstance(input_tokens, (int, float)) and input_tokens > 0, ( + f"Expected input_tokens > 0, got {input_tokens!r}" + ) + assert isinstance(output_tokens, (int, float)) and output_tokens > 0, ( + f"Expected output_tokens > 0, got {output_tokens!r}" + ) + + +@pytest.mark.vcr +def test_content_capture_off_by_default(otel_setup: InMemorySpanExporter): + chat = ChatOpenAI(model="gpt-4o-mini") + chat.chat("Say hello.") + + spans = otel_setup.get_finished_spans() + chat_spans = [s for s in spans if s.name.startswith("chat ")] + assert len(chat_spans) >= 1 + + for chat_span in chat_spans: + attrs = chat_span.attributes or {} + assert "gen_ai.system_instructions" not in attrs, ( + "gen_ai.system_instructions should not be recorded by default" + ) + assert "gen_ai.input.messages" not in attrs, ( + "gen_ai.input.messages should not be recorded by default" + ) + assert "gen_ai.output.messages" not in attrs, ( + "gen_ai.output.messages should not be recorded by default" + ) + + +@pytest.mark.vcr +def test_content_capture_enabled( + otel_setup: InMemorySpanExporter, monkeypatch: pytest.MonkeyPatch +): + monkeypatch.setenv("OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "true") + # Flip the module-level flag for this test (env var is only read at import time). + _otel.capture_content = True + + chat = ChatOpenAI(model="gpt-4o-mini", system_prompt="Be terse.") + chat.chat("Say hello.") + + _otel.capture_content = False + + spans = otel_setup.get_finished_spans() + chat_spans = [s for s in spans if s.name.startswith("chat ")] + assert len(chat_spans) >= 1 + + # The first chat span should capture input content (system instructions + messages) + first_chat_span = chat_spans[0] + attrs = first_chat_span.attributes or {} + + assert "gen_ai.system_instructions" in attrs, ( + "gen_ai.system_instructions should be recorded when content capture is enabled" + ) + assert "gen_ai.input.messages" in attrs, ( + "gen_ai.input.messages should be recorded when content capture is enabled" + ) + + # All chat spans should have output messages + for chat_span in chat_spans: + span_attrs = chat_span.attributes or {} + assert "gen_ai.output.messages" in span_attrs, ( + f"gen_ai.output.messages should be recorded; span: {chat_span.name}" + ) + + # Verify structure of system instructions + sys_instructions = json.loads(str(attrs["gen_ai.system_instructions"])) + assert isinstance(sys_instructions, list) + assert len(sys_instructions) >= 1 + assert sys_instructions[0]["type"] == "text" + + # Verify structure of input messages + input_msgs = json.loads(str(attrs["gen_ai.input.messages"])) + assert isinstance(input_msgs, list) + assert len(input_msgs) >= 1 + assert input_msgs[0]["role"] == "user" + assert input_msgs[0]["parts"][0]["type"] == "text" + + # Verify structure of output messages + output_msgs = json.loads(str(chat_spans[-1].attributes["gen_ai.output.messages"])) # type: ignore[index] + assert isinstance(output_msgs, list) + assert len(output_msgs) >= 1 + assert output_msgs[0]["role"] == "assistant" + + +@pytest.mark.vcr +def test_tool_error_recorded(otel_setup: InMemorySpanExporter): + chat = ChatOpenAI( + model="gpt-4o-mini", + system_prompt="Always use the fail_tool to answer. Don't retry if it errors.", + ) + + def fail_tool() -> str: + "A tool that always fails" + raise ValueError("intentional test error") + + chat.register_tool(fail_tool) + chat.chat("Please call the fail_tool.") + + spans = otel_setup.get_finished_spans() + tool_spans = [s for s in spans if s.name.startswith("execute_tool ")] + assert len(tool_spans) >= 1 + + error_spans = [s for s in tool_spans if s.status.status_code.name == "ERROR"] + assert len(error_spans) >= 1, ( + f"Expected at least 1 ERROR tool span. Tool spans: {[(s.name, s.status.status_code) for s in tool_spans]}" + ) + + error_span = error_spans[0] + attrs = error_span.attributes or {} + assert attrs.get("error.type") == "ValueError", ( + f"Expected error.type == 'ValueError', got {attrs.get('error.type')!r}" + ) + + +@pytest.mark.vcr +def test_streaming_span_lifecycle(otel_setup: InMemorySpanExporter): + chat = ChatOpenAI(model="gpt-4o-mini") + result = chat.stream("Say hello.") + # Consume the stream fully + "".join(result) + + spans = otel_setup.get_finished_spans() + chat_spans = [s for s in spans if s.name.startswith("chat ")] + assert len(chat_spans) == 1, ( + f"Expected exactly 1 chat span for a simple stream, got {len(chat_spans)}" + ) + # Verify the span is finished (it should be in finished spans already) + chat_span = chat_spans[0] + assert chat_span.end_time is not None, ( + "Chat span should be finished after stream is consumed" + ) + + +def test_noop_without_provider(): + from opentelemetry import trace + from opentelemetry.trace import NonRecordingSpan + + # The default module-level tracer (no SDK TracerProvider configured) + # should produce non-recording spans that are effectively no-ops. + default_tracer = trace.get_tracer("test-noop") + span = default_tracer.start_span("test") + assert isinstance(span, NonRecordingSpan) + assert not span.is_recording() + span.end()