Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
612 changes: 347 additions & 265 deletions chatlas/_chat.py

Large diffs are not rendered by default.

197 changes: 197 additions & 0 deletions chatlas/_otel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
from __future__ import annotations

import os
from typing import TYPE_CHECKING, Any, Optional

from opentelemetry import trace
from opentelemetry.trace import SpanKind, StatusCode

if TYPE_CHECKING:
from opentelemetry.trace import Span

from ._content import Content, ContentToolRequest
from ._provider import Provider
from ._turn import AssistantTurn, SystemTurn, Turn


tracer = trace.get_tracer("com.posit.python-package.chatlas")

capture_content: bool = os.environ.get(
"OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", ""
).lower() in ("true", "1")


def start_agent_span(provider: Provider[Any, Any, Any, Any]) -> Span:
return tracer.start_span(
"invoke_agent",
kind=SpanKind.CLIENT,
attributes={
"gen_ai.operation.name": "invoke_agent",
"gen_ai.provider.name": provider.name.lower(),
"gen_ai.request.model": provider.model,
},
)


def start_chat_span(
provider: Provider[Any, Any, Any, Any],
turns: list[Turn],
system_turn: Optional[SystemTurn],
parent: Span,
) -> Span:
ctx = trace.set_span_in_context(parent)
span = tracer.start_span(
f"chat {provider.model}",
kind=SpanKind.CLIENT,
attributes={
"gen_ai.operation.name": "chat",
"gen_ai.provider.name": provider.name.lower(),
"gen_ai.request.model": provider.model,
},
context=ctx,
)

if capture_content and span.is_recording():
record_input_content(span, turns, system_turn)

return span


def start_tool_span(
request: ContentToolRequest,
parent: Span,
) -> Span:
ctx = trace.set_span_in_context(parent)

attrs: dict[str, Any] = {
"gen_ai.operation.name": "execute_tool",
"gen_ai.tool.name": request.name,
"gen_ai.tool.call.id": request.id,
}
if request.tool is not None and request.tool.description:
attrs["gen_ai.tool.description"] = request.tool.description

return tracer.start_span(
f"execute_tool {request.name}",
attributes=attrs,
context=ctx,
)


def record_chat_result(
span: Span,
turn: AssistantTurn[Any],
) -> None:
if not span.is_recording():
return

if turn.tokens is not None:
input_tokens, output_tokens, cached_input = turn.tokens
span.set_attribute("gen_ai.usage.input_tokens", input_tokens + cached_input)
span.set_attribute("gen_ai.usage.output_tokens", output_tokens)

completion = turn.completion
if completion is not None:
response_model = getattr(completion, "model", None) or getattr(
completion, "model_version", None
)
if response_model is not None:
span.set_attribute("gen_ai.response.model", str(response_model))

response_id = getattr(completion, "id", None) or getattr(
completion, "response_id", None
)
if response_id is not None:
span.set_attribute("gen_ai.response.id", str(response_id))

if capture_content:
try:
msg = as_otel_message(turn)
span.set_attribute("gen_ai.output.messages", to_json([msg]))
except Exception:
pass

span.set_status(StatusCode.OK)


def record_tool_error(span: Span, error: Exception) -> None:
if not span.is_recording():
return

span.record_exception(error)
span.set_attribute("error.type", type(error).__name__)
span.set_status(StatusCode.ERROR, str(error))


def end_span(span: Span) -> None:
span.end()


def as_otel_message(turn: Turn) -> dict[str, Any]:
from ._content import ContentToolResult
from ._turn import UserTurn

is_tool_turn = (
isinstance(turn, UserTurn)
and len(turn.contents) > 0
and all(isinstance(c, ContentToolResult) for c in turn.contents)
)

return {
"role": "tool" if is_tool_turn else turn.role,
"parts": [as_otel_part(c) for c in turn.contents],
}


def as_otel_part(content: Content) -> dict[str, Any]:
from ._content import ContentText, ContentToolRequest, ContentToolResult

if isinstance(content, ContentText):
return {"type": "text", "content": content.text}

if isinstance(content, ContentToolRequest):
return {
"type": "tool_call",
"id": content.id,
"name": content.name,
"arguments": content.arguments,
}

if isinstance(content, ContentToolResult):
part: dict[str, Any] = {"type": "tool_call_response"}
if content.request is not None:
part["id"] = content.request.id
if content.error is not None:
part["response"] = str(content.error)
elif isinstance(content.value, str):
part["response"] = content.value
else:
try:
part["response"] = to_json(content.value)
except Exception:
part["response"] = str(content.value)
return part

return {"type": "generic", "class": type(content).__name__}


def to_json(obj: Any) -> str:
import orjson

return orjson.dumps(obj).decode("utf-8")


def record_input_content(
span: Span,
turns: list[Turn],
system_turn: Optional[SystemTurn],
) -> None:
try:
if system_turn is not None:
parts = [as_otel_part(c) for c in system_turn.contents]
span.set_attribute("gen_ai.system_instructions", to_json(parts))

msgs = [as_otel_message(t) for t in turns]
span.set_attribute("gen_ai.input.messages", to_json(msgs))
except Exception:
pass
107 changes: 102 additions & 5 deletions docs/get-started/monitor.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,111 @@ callout-appearance: simple

As mentioned in the [debugging](debug.qmd) section, chatlas has support for gaining more insight into the behavior of your application through things like [logging](debug.qmd#logging).

However, in a production setting, you may want to go beyond simple logging and use more sophisticated observability tools [Datadog](https://www.datadoghq.com/), [Logfire](https://logfire.io/), etc., to monitor your application.
However, in a production setting, you may want to go beyond simple logging and use more sophisticated observability tools like [Datadog](https://www.datadoghq.com/), [Logfire](https://logfire.pydantic.dev/), etc., to monitor your application.
These tools can give you a more structured way to view and monitor app performance, including things like latency, error rates, and other metrics.
These tools tend to integrate well with open standards like [OpenTelemetry](https://opentelemetry.io/) (OTel), meaning if you "instrument" your app with OTel, you can view your app's telemetry data in any observability tool that supports OTel.
There are at least a few different ways to do this, but we'll cover some of the more simpler approaches here.

## OpenLLMetry
## Framework-Level Tracing

The simplest (and most model agnostic) way to instrument your app with OTel is to leverage [openllmetry](https://github.com/traceloop/openllmetry), which can be as easy as adding the following code to your app:
Chatlas can emit OpenTelemetry spans that capture the full structure of multi-turn conversations, including tool execution:

```
invoke_agent # wraps the full chat loop
├── chat gpt-4o # each model API call
├── execute_tool get_weather # each tool invocation
├── chat gpt-4o # follow-up model call
└── ...
```

### Quick Start: Console Output

The simplest way to see chatlas spans is to print them to the console. Install the OTel SDK:

```bash
pip install opentelemetry-sdk
```

Then configure a console exporter before creating any chats:

```python
from opentelemetry import trace
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import ConsoleSpanExporter, SimpleSpanProcessor

provider = TracerProvider()
provider.add_span_processor(SimpleSpanProcessor(ConsoleSpanExporter()))
trace.set_tracer_provider(provider)
```

That's it — chatlas will automatically emit spans for every `chat()`, `stream()`, and tool invocation.

### Production: Logfire

For production observability, we recommend [Pydantic Logfire](https://logfire.pydantic.dev/), which provides a dashboard for exploring traces with minimal setup:

```bash
pip install logfire
logfire auth
```

```python
import logfire

logfire.configure()
```

Other OpenTelemetry-compatible backends (Datadog, Honeycomb, Jaeger, etc.) work too — just configure the appropriate exporter via the standard `opentelemetry-sdk` and `opentelemetry-exporter-*` packages.

### Configuration Module Pattern

For apps (especially Shiny apps), extract OTel setup into a dedicated module that runs before any other imports:

```python
# otel_config.py
import logfire
logfire.configure()
```

```python
# app.py
import otel_config # noqa: F401 — side-effect import
from chatlas import ChatOpenAI
# ...
```

### What's Captured

Each **chat span** records:

- Token usage (`gen_ai.usage.input_tokens`, `gen_ai.usage.output_tokens`)
- Response model and ID
- Optionally: system prompt, input messages, output messages

Each **tool span** records:

- Tool name, description, and call ID
- Errors (if the tool raises)

### Content Capture

Message content is **not** captured by default (it may contain sensitive data). To enable:

```bash
export OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true
```

### Combining with Provider Instrumentors

Framework spans complement the provider-specific instrumentors described below. If you have both enabled, the provider instrumentor's HTTP spans will appear as children of chatlas's chat spans, giving you visibility at both the application and HTTP levels.


## Provider-Specific Instrumentors

In addition to (or instead of) chatlas's built-in framework-level tracing, you can use provider-specific OpenTelemetry instrumentor libraries. These operate at the HTTP/SDK level and capture lower-level details about each API call.

### OpenLLMetry {#openllmetry}

The simplest (and most model agnostic) way to add provider-level instrumentation is to leverage [openllmetry](https://github.com/traceloop/openllmetry), which can be as easy as adding the following code to your app:

```bash
pip install traceloop-sdk
Expand All @@ -32,7 +129,7 @@ From here, a quick and easy way to get started visualizing your app's telemetry
If you want to avoid the Traceloop Python SDK, you can also use these OTel instrumentation libraries from the openllmetry project more directly (e.g., [openai](https://github.com/traceloop/openllmetry/tree/main/packages/opentelemetry-instrumentation-openai) and [anthropic](https://github.com/traceloop/openllmetry/tree/main/packages/opentelemetry-instrumentation-anthropic)).


## OpenTelemetry
### Official OpenTelemetry Libraries

To use OpenTelemetry's "official" instrumentation libraries, you'll need to first install the relevant instrumentation packages for the model providers you are using.

Expand Down
7 changes: 5 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ dependencies = [
"jinja2",
"orjson",
"rich",
"openai"
"openai",
"opentelemetry-api>=1.0"
]
classifiers = [
"Development Status :: 4 - Beta",
Expand Down Expand Up @@ -43,6 +44,7 @@ test = [
"syrupy>=4",
"vcrpy>=6.0.0",
"pytest-recording>=0.13",
"opentelemetry-sdk>=1.0",
]
dev = [
"ruff>=0.6.5",
Expand All @@ -64,7 +66,8 @@ dev = [
"snowflake-ml-python>=1.8.4",
# torch (a dependency of snowflake-ml-python) is not yet compatible with Python >3.11
"torch;python_version<='3.11'",
"tenacity"
"tenacity",
"opentelemetry-sdk>=1.0",
]
docs = [
"griffe>=1.3.2,<2.0.0",
Expand Down
Loading
Loading