feat: Add thinking parameter support for Command A Reasoning models

fede-kamel · fede-kamel · commit bd21958e91c8 · 2026-02-24T15:50:51.000-05:00
Support the thinking/reasoning feature for command-a-reasoning-08-2025
on OCI. Transforms Cohere's thinking parameter (type, token_budget) to
OCI format and handles thinking content in both non-streaming and
streaming responses.
diff --git a/src/cohere/oci_client.py b/src/cohere/oci_client.py
@@ -684,6 +684,16 @@ def transform_request_to_oci(
                 chat_request["citationOptions"] = cohere_body["citation_options"]
             if "safety_mode" in cohere_body:
                 chat_request["safetyMode"] = cohere_body["safety_mode"]
+            # Thinking parameter for Command A Reasoning models
+            if "thinking" in cohere_body:
+                thinking = cohere_body["thinking"]
+                oci_thinking: typing.Dict[str, typing.Any] = {}
+                if "type" in thinking:
+                    oci_thinking["type"] = thinking["type"].upper()
+                if "token_budget" in thinking and thinking["token_budget"] is not None:
+                    oci_thinking["token_budget"] = thinking["token_budget"]
+                if oci_thinking:
+                    chat_request["thinking"] = oci_thinking
         else:
             # V1 API: uses single message string
             chat_request["message"] = cohere_body["message"]
@@ -830,9 +840,23 @@ def transform_oci_response_to_cohere(
                     "output_tokens": usage_data.get("completionTokens", 0),
                 }
 
+            # Transform message content types from OCI (uppercase) to Cohere (lowercase)
+            message = chat_response.get("message", {})
+            if "content" in message and isinstance(message["content"], list):
+                transformed_content = []
+                for item in message["content"]:
+                    if isinstance(item, dict):
+                        transformed_item = item.copy()
+                        if "type" in transformed_item:
+                            transformed_item["type"] = transformed_item["type"].lower()
+                        transformed_content.append(transformed_item)
+                    else:
+                        transformed_content.append(item)
+                message = {**message, "content": transformed_content}
+
             return {
                 "id": chat_response.get("id", str(uuid.uuid4())),
-                "message": chat_response.get("message", {}),
+                "message": message,
                 "finish_reason": chat_response.get("finishReason", "COMPLETE").lower(),
                 "usage": usage,
             }
@@ -987,14 +1011,22 @@ def transform_stream_event(
     if endpoint in ["chat_stream", "chat"]:
         if is_v2:
             # V2 API format: OCI returns full message structure in each event
-            # Extract text from nested structure: message.content[0].text
-            text = ""
+            # Extract content from nested structure: message.content[0]
+            content_type = "text"
+            content_value = ""
+
             if "message" in oci_event and "content" in oci_event["message"]:
                 content_list = oci_event["message"]["content"]
                 if content_list and isinstance(content_list, list) and len(content_list) > 0:
                     first_content = content_list[0]
-                    if "text" in first_content:
-                        text = first_content["text"]
+                    # Detect content type (TEXT or THINKING)
+                    oci_type = first_content.get("type", "TEXT").upper()
+                    if oci_type == "THINKING":
+                        content_type = "thinking"
+                        content_value = first_content.get("thinking", "")
+                    else:
+                        content_type = "text"
+                        content_value = first_content.get("text", "")
 
             is_finished = "finishReason" in oci_event
 
@@ -1005,15 +1037,19 @@ def transform_stream_event(
                     "index": 0,
                 }
             else:
-                # Content delta event
+                # Content delta event - include type for thinking vs text
+                delta_content: typing.Dict[str, typing.Any] = {}
+                if content_type == "thinking":
+                    delta_content["thinking"] = content_value
+                else:
+                    delta_content["text"] = content_value
+
                 return {
                     "type": "content-delta",
                     "index": 0,
                     "delta": {
                         "message": {
-                            "content": {
-                                "text": text,
-                            }
+                            "content": delta_content,
                         }
                     },
                 }
diff --git a/tests/test_oci_client.py b/tests/test_oci_client.py
@@ -236,6 +236,46 @@ def test_chat_v2(self):
         self.assertIsNotNone(response)
         self.assertIsNotNone(response.message)
 
+    @unittest.skip(
+        "Command A Reasoning model (command-a-reasoning-08-2025) may not be available in all regions. "
+        "Enable this test when the reasoning model is available in your OCI region."
+    )
+    def test_chat_v2_with_thinking(self):
+        """Test chat with thinking parameter for Command A Reasoning model."""
+        from cohere.types import Thinking
+
+        response = self.client.chat(
+            model="command-a-reasoning-08-2025",
+            messages=[{"role": "user", "content": "What is 15 * 27? Think step by step."}],
+            thinking=Thinking(type="enabled", token_budget=5000),
+        )
+
+        self.assertIsNotNone(response)
+        self.assertIsNotNone(response.message)
+        # The response should contain content (may include thinking content)
+        self.assertIsNotNone(response.message.content)
+
+    @unittest.skip(
+        "Command A Reasoning model (command-a-reasoning-08-2025) may not be available in all regions. "
+        "Enable this test when the reasoning model is available in your OCI region."
+    )
+    def test_chat_stream_v2_with_thinking(self):
+        """Test streaming chat with thinking parameter for Command A Reasoning model."""
+        from cohere.types import Thinking
+
+        events = []
+        for event in self.client.chat_stream(
+            model="command-a-reasoning-08-2025",
+            messages=[{"role": "user", "content": "What is 15 * 27? Think step by step."}],
+            thinking=Thinking(type="enabled", token_budget=5000),
+        ):
+            events.append(event)
+
+        self.assertTrue(len(events) > 0)
+        # Verify we received content-delta events
+        content_delta_events = [e for e in events if hasattr(e, "type") and e.type == "content-delta"]
+        self.assertTrue(len(content_delta_events) > 0)
+
     def test_chat_stream_v2(self):
         """Test streaming chat with v2 client."""
         events = []
@@ -455,5 +495,108 @@ def test_rerank_v3(self):
         self.assertIsNotNone(response.results)
 
 
+class TestOciClientTransformations(unittest.TestCase):
+    """Unit tests for OCI request/response transformations (no OCI credentials required)."""
+
+    def test_thinking_parameter_transformation(self):
+        """Test that thinking parameter is correctly transformed to OCI format."""
+        from cohere.oci_client import transform_request_to_oci
+
+        cohere_body = {
+            "model": "command-a-reasoning-08-2025",
+            "messages": [{"role": "user", "content": "What is 2+2?"}],
+            "thinking": {
+                "type": "enabled",
+                "token_budget": 10000,
+            },
+        }
+
+        result = transform_request_to_oci("chat", cohere_body, "compartment-123")
+
+        # Verify thinking parameter is transformed
+        chat_request = result["chatRequest"]
+        self.assertIn("thinking", chat_request)
+        self.assertEqual(chat_request["thinking"]["type"], "ENABLED")
+        self.assertEqual(chat_request["thinking"]["token_budget"], 10000)
+
+    def test_thinking_parameter_disabled(self):
+        """Test that disabled thinking is correctly transformed."""
+        from cohere.oci_client import transform_request_to_oci
+
+        cohere_body = {
+            "model": "command-a-reasoning-08-2025",
+            "messages": [{"role": "user", "content": "Hello"}],
+            "thinking": {
+                "type": "disabled",
+            },
+        }
+
+        result = transform_request_to_oci("chat", cohere_body, "compartment-123")
+
+        chat_request = result["chatRequest"]
+        self.assertIn("thinking", chat_request)
+        self.assertEqual(chat_request["thinking"]["type"], "DISABLED")
+        self.assertNotIn("token_budget", chat_request["thinking"])
+
+    def test_thinking_response_transformation(self):
+        """Test that thinking content in response is correctly transformed."""
+        from cohere.oci_client import transform_oci_response_to_cohere
+
+        oci_response = {
+            "chatResponse": {
+                "id": "test-id",
+                "message": {
+                    "role": "ASSISTANT",
+                    "content": [
+                        {"type": "THINKING", "thinking": "Let me think about this..."},
+                        {"type": "TEXT", "text": "The answer is 4."},
+                    ],
+                },
+                "finishReason": "COMPLETE",
+                "usage": {"inputTokens": 10, "completionTokens": 20},
+            }
+        }
+
+        result = transform_oci_response_to_cohere("chat", oci_response, is_v2=True)
+
+        # Verify content types are lowercased
+        self.assertEqual(result["message"]["content"][0]["type"], "thinking")
+        self.assertEqual(result["message"]["content"][1]["type"], "text")
+
+    def test_stream_event_thinking_transformation(self):
+        """Test that thinking content in stream events is correctly transformed."""
+        from cohere.oci_client import transform_stream_event
+
+        # OCI thinking event
+        oci_event = {
+            "message": {
+                "content": [{"type": "THINKING", "thinking": "Reasoning step..."}]
+            }
+        }
+
+        result = transform_stream_event("chat", oci_event, is_v2=True)
+
+        self.assertEqual(result["type"], "content-delta")
+        self.assertIn("thinking", result["delta"]["message"]["content"])
+        self.assertEqual(result["delta"]["message"]["content"]["thinking"], "Reasoning step...")
+
+    def test_stream_event_text_transformation(self):
+        """Test that text content in stream events is correctly transformed."""
+        from cohere.oci_client import transform_stream_event
+
+        # OCI text event
+        oci_event = {
+            "message": {
+                "content": [{"type": "TEXT", "text": "The answer is..."}]
+            }
+        }
+
+        result = transform_stream_event("chat", oci_event, is_v2=True)
+
+        self.assertEqual(result["type"], "content-delta")
+        self.assertIn("text", result["delta"]["message"]["content"])
+        self.assertEqual(result["delta"]["message"]["content"]["text"], "The answer is...")
+
+
 if __name__ == "__main__":
     unittest.main()