mistralai · simonvdk-mistral · Mar 27, 2026 · Mar 27, 2026 · Mar 27, 2026 · Mar 30, 2026
diff --git a/src/mistralai/extra/observability/otel.py b/src/mistralai/extra/observability/otel.py
@@ -40,6 +40,9 @@
     os.getenv("MISTRAL_SDK_DEBUG_TRACING", "false").lower() == "true"
 )
 DEBUG_HINT: str = "To see detailed tracing logs, set MISTRAL_SDK_DEBUG_TRACING=true."
+# As of 2026-03-27: in GenAI semantic conventions, but not yet in
+# opentelemetry-semantic-conventions for Python.
+GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS = "gen_ai.usage.cache_read.input_tokens"
 
 
 class MistralAIAttributes:
@@ -251,20 +254,36 @@ def _enrich_response_genai_attrs(
     # Usage
     usage = response_data.get("usage", {})
     if usage:
-        attributes.update(
-            {
-                gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS: usage.get(
-                    "prompt_tokens", 0
-                ),
-                gen_ai_attributes.GEN_AI_USAGE_OUTPUT_TOKENS: usage.get(
-                    "completion_tokens", 0
-                ),
-            }
+        attributes[gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS] = usage.get(
+            "prompt_tokens", 0
         )
+        attributes[gen_ai_attributes.GEN_AI_USAGE_OUTPUT_TOKENS] = usage.get(
+            "completion_tokens", 0
+        )
+
+        cached_input_tokens = _extract_cached_input_tokens(usage)
+        if cached_input_tokens is not None:
+            attributes[GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS] = cached_input_tokens
 
     set_available_attributes(span, attributes)
 
 
+def _extract_cached_input_tokens(usage: dict[str, Any]) -> int | None:
+    prompt_token_details = usage.get("prompt_tokens_details") or usage.get(
+        "prompt_token_details"
+    )
+    if isinstance(prompt_token_details, dict):
+        cached_tokens = prompt_token_details.get("cached_tokens")
+        if isinstance(cached_tokens, int):
+            return cached_tokens
+
+    num_cached_tokens = usage.get("num_cached_tokens")
+    if isinstance(num_cached_tokens, int):
+        return num_cached_tokens
+
+    return None
+
+
 def _enrich_create_agent(span: Span, response_data: dict[str, Any]) -> None:
     """Set agent-specific attributes from create_agent response.
 

diff --git a/src/mistralai/extra/tests/test_otel_tracing.py b/src/mistralai/extra/tests/test_otel_tracing.py
@@ -462,6 +462,54 @@ def test_chat_completion_with_tool_calls(self):
             ],
         )
 
+    def test_chat_completion_with_cached_prompt_tokens(self):
+        request = ChatCompletionRequest(
+            model="mistral-large-latest",
+            messages=[
+                UserMessage(content="Summarize this document."),
+            ],
+        )
+        response = {
+            "id": "cmpl-cache-001",
+            "object": "chat.completion",
+            "model": "mistral-large-latest",
+            "created": 1700000002,
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": "Here is the summary.",
+                    },
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 42,
+                "completion_tokens": 9,
+                "total_tokens": 51,
+                "prompt_tokens_details": {
+                    "cached_tokens": 12,
+                },
+            },
+        }
+
+        self._run_hook_lifecycle(
+            "chat_completion_v1_chat_completions_post",
+            request,
+            response,
+        )
+        span = self._get_single_span()
+
+        self.assertSpanAttributes(
+            span,
+            {
+                "gen_ai.usage.input_tokens": 42,
+                "gen_ai.usage.output_tokens": 9,
+                "gen_ai.usage.cache_read.input_tokens": 12,
+            },
+        )
+
     # -- Embeddings ------------------------------------------------------------
 
     def test_embeddings(self):
@@ -1440,6 +1488,71 @@ def test_streaming_chat_completion_enriches_span(self):
             ],
         )
 
+    def test_streaming_chat_completion_with_num_cached_tokens(self):
+        request = ChatCompletionRequest(
+            model="mistral-large-latest",
+            messages=[
+                UserMessage(content="Continue."),
+            ],
+        )
+        response_events = [
+            CompletionEvent(
+                data=CompletionChunk(
+                    id="cmpl-stream-cache-001",
+                    model="mistral-large-latest",
+                    object="chat.completion.chunk",
+                    created=1700000000,
+                    choices=[
+                        CompletionResponseStreamChoice(
+                            index=0,
+                            delta=DeltaMessage(role="assistant", content="Done."),
+                            finish_reason=None,
+                        ),
+                    ],
+                ),
+            ),
+            CompletionEvent(
+                data=CompletionChunk(
+                    id="cmpl-stream-cache-001",
+                    model="mistral-large-latest",
+                    object="chat.completion.chunk",
+                    created=1700000000,
+                    choices=[
+                        CompletionResponseStreamChoice(
+                            index=0,
+                            delta=DeltaMessage(content=""),
+                            finish_reason="stop",
+                        ),
+                    ],
+                    usage=UsageInfo.model_validate(
+                        {
+                            "prompt_tokens": 24,
+                            "completion_tokens": 3,
+                            "total_tokens": 27,
+                            "num_cached_tokens": 10,
+                        }
+                    ),
+                ),
+            ),
+        ]
+
+        self._run_hook_lifecycle(
+            "chat_completion_v1_chat_completions_post",
+            request,
+            response_events,
+            streaming=True,
+        )
+        span = self._get_single_span()
+
+        self.assertSpanAttributes(
+            span,
+            {
+                "gen_ai.usage.input_tokens": 24,
+                "gen_ai.usage.output_tokens": 3,
+                "gen_ai.usage.cache_read.input_tokens": 10,
+            },
+        )
+
     # -- create_function_result (client-side tool execution) -------------------
 
     def test_create_function_result_span_attributes(self):
@@ -1526,7 +1639,6 @@ def failing_tool(x: int) -> str:
             "Expected an exception event on the span",
         )
 
-
     # -- Baggage propagation: gen_ai.conversation.id ---------------------------
 
     def test_conversation_id_from_baggage(self):