posit-dev · cpsievert · May 12, 2026 · May 12, 2026 · May 12, 2026 · May 12, 2026
diff --git a/chatlas/_chat.py b/chatlas/_chat.py
diff --git a/chatlas/_otel.py b/chatlas/_otel.py
@@ -0,0 +1,197 @@
+from __future__ import annotations
+
+import os
+from typing import TYPE_CHECKING, Any, Optional
+
+from opentelemetry import trace
+from opentelemetry.trace import SpanKind, StatusCode
+
+if TYPE_CHECKING:
+    from opentelemetry.trace import Span
+
+    from ._content import Content, ContentToolRequest
+    from ._provider import Provider
+    from ._turn import AssistantTurn, SystemTurn, Turn
+
+
+tracer = trace.get_tracer("com.posit.python-package.chatlas")
+
+capture_content: bool = os.environ.get(
+    "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", ""
+).lower() in ("true", "1")
+
+
+def start_agent_span(provider: Provider[Any, Any, Any, Any]) -> Span:
+    return tracer.start_span(
+        "invoke_agent",
+        kind=SpanKind.CLIENT,
+        attributes={
+            "gen_ai.operation.name": "invoke_agent",
+            "gen_ai.provider.name": provider.name.lower(),
+            "gen_ai.request.model": provider.model,
+        },
+    )
+
+
+def start_chat_span(
+    provider: Provider[Any, Any, Any, Any],
+    turns: list[Turn],
+    system_turn: Optional[SystemTurn],
+    parent: Span,
+) -> Span:
+    ctx = trace.set_span_in_context(parent)
+    span = tracer.start_span(
+        f"chat {provider.model}",
+        kind=SpanKind.CLIENT,
+        attributes={
+            "gen_ai.operation.name": "chat",
+            "gen_ai.provider.name": provider.name.lower(),
+            "gen_ai.request.model": provider.model,
+        },
+        context=ctx,
+    )
+
+    if capture_content and span.is_recording():
+        record_input_content(span, turns, system_turn)
+
+    return span
+
+
+def start_tool_span(
+    request: ContentToolRequest,
+    parent: Span,
+) -> Span:
+    ctx = trace.set_span_in_context(parent)
+
+    attrs: dict[str, Any] = {
+        "gen_ai.operation.name": "execute_tool",
+        "gen_ai.tool.name": request.name,
+        "gen_ai.tool.call.id": request.id,
+    }
+    if request.tool is not None and request.tool.description:
+        attrs["gen_ai.tool.description"] = request.tool.description
+
+    return tracer.start_span(
+        f"execute_tool {request.name}",
+        attributes=attrs,
+        context=ctx,
+    )
+
+
+def record_chat_result(
+    span: Span,
+    turn: AssistantTurn[Any],
+) -> None:
+    if not span.is_recording():
+        return
+
+    if turn.tokens is not None:
+        input_tokens, output_tokens, cached_input = turn.tokens
+        span.set_attribute("gen_ai.usage.input_tokens", input_tokens + cached_input)
+        span.set_attribute("gen_ai.usage.output_tokens", output_tokens)
+
+    completion = turn.completion
+    if completion is not None:
+        response_model = getattr(completion, "model", None) or getattr(
+            completion, "model_version", None
+        )
+        if response_model is not None:
+            span.set_attribute("gen_ai.response.model", str(response_model))
+
+        response_id = getattr(completion, "id", None) or getattr(
+            completion, "response_id", None
+        )
+        if response_id is not None:
+            span.set_attribute("gen_ai.response.id", str(response_id))
+
+    if capture_content:
+        try:
+            msg = as_otel_message(turn)
+            span.set_attribute("gen_ai.output.messages", to_json([msg]))
+        except Exception:
+            pass
+
+    span.set_status(StatusCode.OK)
+
+
+def record_tool_error(span: Span, error: Exception) -> None:
+    if not span.is_recording():
+        return
+
+    span.record_exception(error)
+    span.set_attribute("error.type", type(error).__name__)
+    span.set_status(StatusCode.ERROR, str(error))
+
+
+def end_span(span: Span) -> None:
+    span.end()
+
+
+def as_otel_message(turn: Turn) -> dict[str, Any]:
+    from ._content import ContentToolResult
+    from ._turn import UserTurn
+
+    is_tool_turn = (
+        isinstance(turn, UserTurn)
+        and len(turn.contents) > 0
+        and all(isinstance(c, ContentToolResult) for c in turn.contents)
+    )
+
+    return {
+        "role": "tool" if is_tool_turn else turn.role,
+        "parts": [as_otel_part(c) for c in turn.contents],
+    }
+
+
+def as_otel_part(content: Content) -> dict[str, Any]:
+    from ._content import ContentText, ContentToolRequest, ContentToolResult
+
+    if isinstance(content, ContentText):
+        return {"type": "text", "content": content.text}
+
+    if isinstance(content, ContentToolRequest):
+        return {
+            "type": "tool_call",
+            "id": content.id,
+            "name": content.name,
+            "arguments": content.arguments,
+        }
+
+    if isinstance(content, ContentToolResult):
+        part: dict[str, Any] = {"type": "tool_call_response"}
+        if content.request is not None:
+            part["id"] = content.request.id
+        if content.error is not None:
+            part["response"] = str(content.error)
+        elif isinstance(content.value, str):
+            part["response"] = content.value
+        else:
+            try:
+                part["response"] = to_json(content.value)
+            except Exception:
+                part["response"] = str(content.value)
+        return part
+
+    return {"type": "generic", "class": type(content).__name__}
+
+
+def to_json(obj: Any) -> str:
+    import orjson
+
+    return orjson.dumps(obj).decode("utf-8")
+
+
+def record_input_content(
+    span: Span,
+    turns: list[Turn],
+    system_turn: Optional[SystemTurn],
+) -> None:
+    try:
+        if system_turn is not None:
+            parts = [as_otel_part(c) for c in system_turn.contents]
+            span.set_attribute("gen_ai.system_instructions", to_json(parts))
+
+        msgs = [as_otel_message(t) for t in turns]
+        span.set_attribute("gen_ai.input.messages", to_json(msgs))
+    except Exception:
+        pass
diff --git a/docs/get-started/monitor.qmd b/docs/get-started/monitor.qmd
@@ -5,14 +5,111 @@ callout-appearance: simple
 
 As mentioned in the [debugging](debug.qmd) section, chatlas has support for gaining more insight into the behavior of your application through things like [logging](debug.qmd#logging).
 
-However, in a production setting, you may want to go beyond simple logging and use more sophisticated observability tools [Datadog](https://www.datadoghq.com/), [Logfire](https://logfire.io/), etc., to monitor your application.
+However, in a production setting, you may want to go beyond simple logging and use more sophisticated observability tools like [Datadog](https://www.datadoghq.com/), [Logfire](https://logfire.pydantic.dev/), etc., to monitor your application.
 These tools can give you a more structured way to view and monitor app performance, including things like latency, error rates, and other metrics.
 These tools tend to integrate well with open standards like [OpenTelemetry](https://opentelemetry.io/) (OTel), meaning if you "instrument" your app with OTel, you can view your app's telemetry data in any observability tool that supports OTel.
-There are at least a few different ways to do this, but we'll cover some of the more simpler approaches here.
 
-## OpenLLMetry
+## Framework-Level Tracing
 
-The simplest (and most model agnostic) way to instrument your app with OTel is to leverage [openllmetry](https://github.com/traceloop/openllmetry), which can be as easy as adding the following code to your app:
+Chatlas can emit OpenTelemetry spans that capture the full structure of multi-turn conversations, including tool execution:
+
+```
+invoke_agent                      # wraps the full chat loop
+├── chat gpt-4o                   # each model API call
+├── execute_tool get_weather      # each tool invocation
+├── chat gpt-4o                   # follow-up model call
+└── ...
+```
+
+### Quick Start: Console Output
+
+The simplest way to see chatlas spans is to print them to the console. Install the OTel SDK:
+
+```bash
+pip install opentelemetry-sdk
+```
+
+Then configure a console exporter before creating any chats:
+
+```python
+from opentelemetry import trace
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import ConsoleSpanExporter, SimpleSpanProcessor
+
+provider = TracerProvider()
+provider.add_span_processor(SimpleSpanProcessor(ConsoleSpanExporter()))
+trace.set_tracer_provider(provider)
+```
+
+That's it — chatlas will automatically emit spans for every `chat()`, `stream()`, and tool invocation.
+
+### Production: Logfire
+
+For production observability, we recommend [Pydantic Logfire](https://logfire.pydantic.dev/), which provides a dashboard for exploring traces with minimal setup:
+
+```bash
+pip install logfire
+logfire auth
+```
+
+```python
+import logfire
+
+logfire.configure()
+```
+
+Other OpenTelemetry-compatible backends (Datadog, Honeycomb, Jaeger, etc.) work too — just configure the appropriate exporter via the standard `opentelemetry-sdk` and `opentelemetry-exporter-*` packages.
+
+### Configuration Module Pattern
+
+For apps (especially Shiny apps), extract OTel setup into a dedicated module that runs before any other imports:
+
+```python
+# otel_config.py
+import logfire
+logfire.configure()
+```
+
+```python
+# app.py
+import otel_config  # noqa: F401 — side-effect import
+from chatlas import ChatOpenAI
+# ...
+```
+
+### What's Captured
+
+Each **chat span** records:
+
+- Token usage (`gen_ai.usage.input_tokens`, `gen_ai.usage.output_tokens`)
+- Response model and ID
+- Optionally: system prompt, input messages, output messages
+
+Each **tool span** records:
+
+- Tool name, description, and call ID
+- Errors (if the tool raises)
+
+### Content Capture
+
+Message content is **not** captured by default (it may contain sensitive data). To enable:
+
+```bash
+export OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true
+```
+
+### Combining with Provider Instrumentors
+
+Framework spans complement the provider-specific instrumentors described below. If you have both enabled, the provider instrumentor's HTTP spans will appear as children of chatlas's chat spans, giving you visibility at both the application and HTTP levels.
+
+
+## Provider-Specific Instrumentors
+
+In addition to (or instead of) chatlas's built-in framework-level tracing, you can use provider-specific OpenTelemetry instrumentor libraries. These operate at the HTTP/SDK level and capture lower-level details about each API call.
+
+### OpenLLMetry {#openllmetry}
+
+The simplest (and most model agnostic) way to add provider-level instrumentation is to leverage [openllmetry](https://github.com/traceloop/openllmetry), which can be as easy as adding the following code to your app:
 
 ```bash
 pip install traceloop-sdk
@@ -32,7 +129,7 @@ From here, a quick and easy way to get started visualizing your app's telemetry
 If you want to avoid the Traceloop Python SDK, you can also use these OTel instrumentation libraries from the openllmetry project more directly (e.g., [openai](https://github.com/traceloop/openllmetry/tree/main/packages/opentelemetry-instrumentation-openai) and [anthropic](https://github.com/traceloop/openllmetry/tree/main/packages/opentelemetry-instrumentation-anthropic)).
 
 
-## OpenTelemetry
+### Official OpenTelemetry Libraries
 
 To use OpenTelemetry's "official" instrumentation libraries, you'll need to first install the relevant instrumentation packages for the model providers you are using.
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -11,7 +11,8 @@ dependencies = [
   "jinja2",
   "orjson",
   "rich",
-  "openai"
+  "openai",
+  "opentelemetry-api>=1.0"
 ]
 classifiers = [
   "Development Status :: 4 - Beta",
@@ -43,6 +44,7 @@ test = [
     "syrupy>=4",
     "vcrpy>=6.0.0",
     "pytest-recording>=0.13",
+    "opentelemetry-sdk>=1.0",
 ]
 dev = [
     "ruff>=0.6.5",
@@ -64,7 +66,8 @@ dev = [
     "snowflake-ml-python>=1.8.4",
     # torch (a dependency of snowflake-ml-python) is not yet compatible with Python >3.11
     "torch;python_version<='3.11'",
-    "tenacity"
+    "tenacity",
+    "opentelemetry-sdk>=1.0",
 ]
 docs = [
     "griffe>=1.3.2,<2.0.0",