Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 28 additions & 9 deletions src/mistralai/extra/observability/otel.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@
os.getenv("MISTRAL_SDK_DEBUG_TRACING", "false").lower() == "true"
)
DEBUG_HINT: str = "To see detailed tracing logs, set MISTRAL_SDK_DEBUG_TRACING=true."
# As of 2026-03-27: in GenAI semantic conventions, but not yet in
Comment thread
simonvdk-mistral marked this conversation as resolved.
Outdated
# opentelemetry-semantic-conventions for Python.
GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS = "gen_ai.usage.cache_read.input_tokens"


class MistralAIAttributes:
Expand Down Expand Up @@ -251,20 +254,36 @@ def _enrich_response_genai_attrs(
# Usage
usage = response_data.get("usage", {})
if usage:
attributes.update(
{
gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS: usage.get(
"prompt_tokens", 0
),
gen_ai_attributes.GEN_AI_USAGE_OUTPUT_TOKENS: usage.get(
"completion_tokens", 0
),
}
attributes[gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS] = usage.get(
"prompt_tokens", 0
)
attributes[gen_ai_attributes.GEN_AI_USAGE_OUTPUT_TOKENS] = usage.get(
"completion_tokens", 0
)

cached_input_tokens = _extract_cached_input_tokens(usage)
if cached_input_tokens is not None:
attributes[GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS] = cached_input_tokens

set_available_attributes(span, attributes)


def _extract_cached_input_tokens(usage: dict[str, Any]) -> int | None:
prompt_token_details = usage.get("prompt_tokens_details") or usage.get(
"prompt_token_details"
)
Comment thread
simonvdk-mistral marked this conversation as resolved.
if isinstance(prompt_token_details, dict):
cached_tokens = prompt_token_details.get("cached_tokens")
if isinstance(cached_tokens, int):
return cached_tokens

num_cached_tokens = usage.get("num_cached_tokens")
if isinstance(num_cached_tokens, int):
return num_cached_tokens
Comment on lines +277 to +284
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How did you arbitrate the priority between the two (prompt token details and number of cached tokens) ?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made the priority explicit in code in b6cce3d: prefer prompt_tokens_details.cached_tokens when present, and only fall back to top-level num_cached_tokens for payloads that expose the legacy field instead.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok maybe the spec is not dry yet. The UsageInfo model I linked in the linear ticket is specific to a voice endpoint, and all other endpoints (chat completion, conversation, etc) do not have the cache tokens attributes defined yet in the models.

Let's wait a bit for this PR, will come back later

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Understood. I am not making a follow-up code change from this comment. The current branch only records gen_ai.usage.cache_read.input_tokens when the raw usage payload actually contains one of the cache-token fields, so endpoints whose generated models do not expose those fields today remain unaffected. I will leave the PR here and wait for your follow-up on whether you want to keep or revert this behavior once the models/spec settle.


return None


def _enrich_create_agent(span: Span, response_data: dict[str, Any]) -> None:
"""Set agent-specific attributes from create_agent response.

Expand Down
114 changes: 113 additions & 1 deletion src/mistralai/extra/tests/test_otel_tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,54 @@ def test_chat_completion_with_tool_calls(self):
],
)

def test_chat_completion_with_cached_prompt_tokens(self):
Comment thread
simonvdk-mistral marked this conversation as resolved.
Outdated
request = ChatCompletionRequest(
model="mistral-large-latest",
messages=[
UserMessage(content="Summarize this document."),
],
)
response = {
"id": "cmpl-cache-001",
"object": "chat.completion",
"model": "mistral-large-latest",
"created": 1700000002,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Here is the summary.",
},
"finish_reason": "stop",
}
],
"usage": {
"prompt_tokens": 42,
"completion_tokens": 9,
"total_tokens": 51,
"prompt_tokens_details": {
"cached_tokens": 12,
},
},
}

self._run_hook_lifecycle(
"chat_completion_v1_chat_completions_post",
request,
response,
)
span = self._get_single_span()

self.assertSpanAttributes(
span,
{
"gen_ai.usage.input_tokens": 42,
"gen_ai.usage.output_tokens": 9,
"gen_ai.usage.cache_read.input_tokens": 12,
},
)

# -- Embeddings ------------------------------------------------------------

def test_embeddings(self):
Expand Down Expand Up @@ -1440,6 +1488,71 @@ def test_streaming_chat_completion_enriches_span(self):
],
)

def test_streaming_chat_completion_with_num_cached_tokens(self):
Comment thread
simonvdk-mistral marked this conversation as resolved.
Outdated
request = ChatCompletionRequest(
model="mistral-large-latest",
messages=[
UserMessage(content="Continue."),
],
)
response_events = [
CompletionEvent(
data=CompletionChunk(
id="cmpl-stream-cache-001",
model="mistral-large-latest",
object="chat.completion.chunk",
created=1700000000,
choices=[
CompletionResponseStreamChoice(
index=0,
delta=DeltaMessage(role="assistant", content="Done."),
finish_reason=None,
),
],
),
),
CompletionEvent(
data=CompletionChunk(
id="cmpl-stream-cache-001",
model="mistral-large-latest",
object="chat.completion.chunk",
created=1700000000,
choices=[
CompletionResponseStreamChoice(
index=0,
delta=DeltaMessage(content=""),
finish_reason="stop",
),
],
usage=UsageInfo.model_validate(
{
"prompt_tokens": 24,
"completion_tokens": 3,
"total_tokens": 27,
"num_cached_tokens": 10,
}
),
),
),
]

self._run_hook_lifecycle(
"chat_completion_v1_chat_completions_post",
request,
response_events,
streaming=True,
)
span = self._get_single_span()

self.assertSpanAttributes(
span,
{
"gen_ai.usage.input_tokens": 24,
"gen_ai.usage.output_tokens": 3,
"gen_ai.usage.cache_read.input_tokens": 10,
},
)

# -- create_function_result (client-side tool execution) -------------------

def test_create_function_result_span_attributes(self):
Expand Down Expand Up @@ -1526,7 +1639,6 @@ def failing_tool(x: int) -> str:
"Expected an exception event on the span",
)


# -- Baggage propagation: gen_ai.conversation.id ---------------------------

def test_conversation_id_from_baggage(self):
Expand Down
Loading