Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ dependencies = [
"python-dateutil >=2.8.2",
"typing-inspection >=0.4.0",
"opentelemetry-api (>=1.33.1,<2.0.0)",
"opentelemetry-semantic-conventions (>=0.60b1,<0.61)",
"opentelemetry-semantic-conventions (>=0.61b0,<0.62)",
"jsonpath-python >=1.0.6", # required for speakeasy generated path with pagination
]

Expand Down
42 changes: 31 additions & 11 deletions src/mistralai/extra/observability/otel.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,20 +251,41 @@ def _enrich_response_genai_attrs(
# Usage
usage = response_data.get("usage", {})
if usage:
attributes.update(
{
gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS: usage.get(
"prompt_tokens", 0
),
gen_ai_attributes.GEN_AI_USAGE_OUTPUT_TOKENS: usage.get(
"completion_tokens", 0
),
}
attributes[gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS] = usage.get(
"prompt_tokens", 0
)
attributes[gen_ai_attributes.GEN_AI_USAGE_OUTPUT_TOKENS] = usage.get(
"completion_tokens", 0
)

cached_input_tokens = _extract_cached_input_tokens(usage)
if cached_input_tokens is not None:
attributes[
gen_ai_attributes.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS
] = cached_input_tokens

set_available_attributes(span, attributes)


def _extract_cached_input_tokens(usage: dict[str, Any]) -> int | None:
# The generated usage schema currently exposes both plural/singular
# prompt token details names, plus the legacy top-level cached token count.
# Prefer the nested cached_tokens value when present.
prompt_token_details = usage.get("prompt_tokens_details") or usage.get(
"prompt_token_details"
)
if isinstance(prompt_token_details, dict):
cached_tokens = prompt_token_details.get("cached_tokens")
if isinstance(cached_tokens, int):
return cached_tokens

num_cached_tokens = usage.get("num_cached_tokens")
if isinstance(num_cached_tokens, int):
return num_cached_tokens
Comment on lines +277 to +284
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How did you arbitrate the priority between the two (prompt token details and number of cached tokens) ?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made the priority explicit in code in b6cce3d: prefer prompt_tokens_details.cached_tokens when present, and only fall back to top-level num_cached_tokens for payloads that expose the legacy field instead.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok maybe the spec is not dry yet. The UsageInfo model I linked in the linear ticket is specific to a voice endpoint, and all other endpoints (chat completion, conversation, etc) do not have the cache tokens attributes defined yet in the models.

Let's wait a bit for this PR, will come back later

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Understood. I am not making a follow-up code change from this comment. The current branch only records gen_ai.usage.cache_read.input_tokens when the raw usage payload actually contains one of the cache-token fields, so endpoints whose generated models do not expose those fields today remain unaffected. I will leave the PR here and wait for your follow-up on whether you want to keep or revert this behavior once the models/spec settle.


return None


def _enrich_create_agent(span: Span, response_data: dict[str, Any]) -> None:
"""Set agent-specific attributes from create_agent response.

Expand All @@ -274,8 +295,7 @@ def _enrich_create_agent(span: Span, response_data: dict[str, Any]) -> None:
gen_ai_attributes.GEN_AI_AGENT_DESCRIPTION: response_data.get("description"),
gen_ai_attributes.GEN_AI_AGENT_ID: response_data.get("id"),
gen_ai_attributes.GEN_AI_AGENT_NAME: response_data.get("name"),
# As of 2026-03-02: in convention, but not yet in opentelemetry-semantic-conventions
"gen_ai.agent.version": str(response_data.get("version")),
gen_ai_attributes.GEN_AI_AGENT_VERSION: str(response_data.get("version")),
gen_ai_attributes.GEN_AI_REQUEST_MODEL: response_data.get("model"),
gen_ai_attributes.GEN_AI_SYSTEM_INSTRUCTIONS: response_data.get("instructions"),
}
Expand Down
23 changes: 19 additions & 4 deletions src/mistralai/extra/tests/test_otel_tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,16 @@ def test_simple_chat_completion(self):
finish_reason="stop",
),
],
usage=UsageInfo(prompt_tokens=20, completion_tokens=25, total_tokens=45),
usage=UsageInfo.model_validate(
{
"prompt_tokens": 20,
"completion_tokens": 25,
"total_tokens": 45,
"prompt_tokens_details": {
"cached_tokens": 12,
},
}
),
)

self._run_hook_lifecycle(
Expand Down Expand Up @@ -301,6 +310,7 @@ def test_simple_chat_completion(self):
"gen_ai.response.finish_reasons": ("stop",),
"gen_ai.usage.input_tokens": 20,
"gen_ai.usage.output_tokens": 25,
"gen_ai.usage.cache_read.input_tokens": 12,
},
)

Expand Down Expand Up @@ -1390,8 +1400,13 @@ def test_streaming_chat_completion_enriches_span(self):
finish_reason="stop",
),
],
usage=UsageInfo(
prompt_tokens=20, completion_tokens=8, total_tokens=28
usage=UsageInfo.model_validate(
{
"prompt_tokens": 20,
"completion_tokens": 8,
"total_tokens": 28,
"num_cached_tokens": 10,
}
),
),
),
Expand Down Expand Up @@ -1419,6 +1434,7 @@ def test_streaming_chat_completion_enriches_span(self):
"gen_ai.response.model": "mistral-large-latest",
"gen_ai.usage.input_tokens": 20,
"gen_ai.usage.output_tokens": 8,
"gen_ai.usage.cache_read.input_tokens": 10,
"gen_ai.response.finish_reasons": ("stop",),
},
)
Expand Down Expand Up @@ -1526,7 +1542,6 @@ def failing_tool(x: int) -> str:
"Expected an exception event on the span",
)


# -- Baggage propagation: gen_ai.conversation.id ---------------------------

def test_conversation_id_from_baggage(self):
Expand Down
20 changes: 10 additions & 10 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading