mistralai · simonvdk-mistral · Mar 27, 2026 · Mar 27, 2026 · Mar 27, 2026 · Mar 30, 2026
diff --git a/pyproject.toml b/pyproject.toml
@@ -12,7 +12,7 @@ dependencies = [
     "python-dateutil >=2.8.2",
     "typing-inspection >=0.4.0",
     "opentelemetry-api (>=1.33.1,<2.0.0)",
-    "opentelemetry-semantic-conventions (>=0.60b1,<0.61)",
+    "opentelemetry-semantic-conventions (>=0.61b0,<0.62)",
     "jsonpath-python >=1.0.6", # required for speakeasy generated path with pagination
 ]
 

diff --git a/src/mistralai/extra/observability/otel.py b/src/mistralai/extra/observability/otel.py
@@ -251,20 +251,41 @@ def _enrich_response_genai_attrs(
     # Usage
     usage = response_data.get("usage", {})
     if usage:
-        attributes.update(
-            {
-                gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS: usage.get(
-                    "prompt_tokens", 0
-                ),
-                gen_ai_attributes.GEN_AI_USAGE_OUTPUT_TOKENS: usage.get(
-                    "completion_tokens", 0
-                ),
-            }
+        attributes[gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS] = usage.get(
+            "prompt_tokens", 0
         )
+        attributes[gen_ai_attributes.GEN_AI_USAGE_OUTPUT_TOKENS] = usage.get(
+            "completion_tokens", 0
+        )
+
+        cached_input_tokens = _extract_cached_input_tokens(usage)
+        if cached_input_tokens is not None:
+            attributes[
+                gen_ai_attributes.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS
+            ] = cached_input_tokens
 
     set_available_attributes(span, attributes)
 
 
+def _extract_cached_input_tokens(usage: dict[str, Any]) -> int | None:
+    # The generated usage schema currently exposes both plural/singular
+    # prompt token details names, plus the legacy top-level cached token count.
+    # Prefer the nested cached_tokens value when present.
+    prompt_token_details = usage.get("prompt_tokens_details") or usage.get(
+        "prompt_token_details"
+    )
+    if isinstance(prompt_token_details, dict):
+        cached_tokens = prompt_token_details.get("cached_tokens")
+        if isinstance(cached_tokens, int):
+            return cached_tokens
+
+    num_cached_tokens = usage.get("num_cached_tokens")
+    if isinstance(num_cached_tokens, int):
+        return num_cached_tokens
+
+    return None
+
+
 def _enrich_create_agent(span: Span, response_data: dict[str, Any]) -> None:
     """Set agent-specific attributes from create_agent response.
 
@@ -274,8 +295,7 @@ def _enrich_create_agent(span: Span, response_data: dict[str, Any]) -> None:
         gen_ai_attributes.GEN_AI_AGENT_DESCRIPTION: response_data.get("description"),
         gen_ai_attributes.GEN_AI_AGENT_ID: response_data.get("id"),
         gen_ai_attributes.GEN_AI_AGENT_NAME: response_data.get("name"),
-        # As of 2026-03-02: in convention, but not yet in opentelemetry-semantic-conventions
-        "gen_ai.agent.version": str(response_data.get("version")),
+        gen_ai_attributes.GEN_AI_AGENT_VERSION: str(response_data.get("version")),
         gen_ai_attributes.GEN_AI_REQUEST_MODEL: response_data.get("model"),
         gen_ai_attributes.GEN_AI_SYSTEM_INSTRUCTIONS: response_data.get("instructions"),
     }

diff --git a/src/mistralai/extra/tests/test_otel_tracing.py b/src/mistralai/extra/tests/test_otel_tracing.py
@@ -272,7 +272,16 @@ def test_simple_chat_completion(self):
                     finish_reason="stop",
                 ),
             ],
-            usage=UsageInfo(prompt_tokens=20, completion_tokens=25, total_tokens=45),
+            usage=UsageInfo.model_validate(
+                {
+                    "prompt_tokens": 20,
+                    "completion_tokens": 25,
+                    "total_tokens": 45,
+                    "prompt_tokens_details": {
+                        "cached_tokens": 12,
+                    },
+                }
+            ),
         )
 
         self._run_hook_lifecycle(
@@ -301,6 +310,7 @@ def test_simple_chat_completion(self):
                 "gen_ai.response.finish_reasons": ("stop",),
                 "gen_ai.usage.input_tokens": 20,
                 "gen_ai.usage.output_tokens": 25,
+                "gen_ai.usage.cache_read.input_tokens": 12,
             },
         )
 
@@ -1390,8 +1400,13 @@ def test_streaming_chat_completion_enriches_span(self):
                             finish_reason="stop",
                         ),
                     ],
-                    usage=UsageInfo(
-                        prompt_tokens=20, completion_tokens=8, total_tokens=28
+                    usage=UsageInfo.model_validate(
+                        {
+                            "prompt_tokens": 20,
+                            "completion_tokens": 8,
+                            "total_tokens": 28,
+                            "num_cached_tokens": 10,
+                        }
                     ),
                 ),
             ),
@@ -1419,6 +1434,7 @@ def test_streaming_chat_completion_enriches_span(self):
                 "gen_ai.response.model": "mistral-large-latest",
                 "gen_ai.usage.input_tokens": 20,
                 "gen_ai.usage.output_tokens": 8,
+                "gen_ai.usage.cache_read.input_tokens": 10,
                 "gen_ai.response.finish_reasons": ("stop",),
             },
         )
@@ -1526,7 +1542,6 @@ def failing_tool(x: int) -> str:
             "Expected an exception event on the span",
         )
 
-
     # -- Baggage propagation: gen_ai.conversation.id ---------------------------
 
     def test_conversation_id_from_baggage(self):

diff --git a/uv.lock b/uv.lock