fix: limit orphan validation to paired reasoning items, strip SDK-only fields

majiayu000 · majiayu000 · commit 63485fa8f735 · 2026-03-28T22:01:17.000+08:00
- validate_response_input now checks from the reasoning side: each
  reasoning item must be immediately followed by an assistant message.
  Standalone assistant messages are no longer flagged, fixing false
  positives in mixed histories.
- get_response_input_items now uses __api_exclude__ and strips nested
  'parsed' fields so ParsedResponse objects don't leak SDK-only data
  into next-turn input.

Signed-off-by: majiayu000 &lt;1835304752@qq.com&gt;
diff --git a/src/openai/lib/_response_input_builder.py b/src/openai/lib/_response_input_builder.py
@@ -18,6 +18,9 @@ def get_response_input_items(response: Response) -> List[ResponseInputItemParam]
     consecutive pair when building the ``input`` for the next turn.  Filtering
     out reasoning items (or re-ordering them) causes a 400 error from the API.
 
+    SDK-only fields (e.g. ``parsed`` from ``ParsedResponseOutputText``) are
+    excluded so the returned dicts conform to ``ResponseInputItemParam``.
+
     Example usage::
 
         from openai.lib import get_response_input_items
@@ -35,17 +38,29 @@ def get_response_input_items(response: Response) -> List[ResponseInputItemParam]
     """
     items: List[ResponseInputItemParam] = []
     for output_item in response.output:
-        items.append(output_item.model_dump(exclude_unset=True))  # type: ignore[arg-type]
+        data = output_item.model_dump(
+            exclude_unset=True,
+            exclude=getattr(output_item, "__api_exclude__", None),
+        )
+        # Strip SDK-only fields from nested content items
+        # (e.g. ParsedResponseOutputText.parsed is not part of the API schema)
+        for content_item in data.get("content", []):
+            if isinstance(content_item, dict):
+                content_item.pop("parsed", None)
+        items.append(data)  # type: ignore[arg-type]
     return items
 
 
 def validate_response_input(items: Sequence[Union[ResponseInputItemParam, object]]) -> None:
-    """Validate that reasoning+message pairs are not orphaned in an input list.
+    """Validate that reasoning+message pairs are intact in an input list.
+
+    Walks ``items`` and raises ``ValueError`` when it detects a ``reasoning``-type
+    item that is NOT immediately followed by a ``message``-type item with
+    role=assistant — the classic broken-pair pattern that causes a 400 from the
+    API.
 
-    Walks ``items`` and raises ``ValueError`` when it detects a ``message``-type
-    item (role=assistant) that is NOT immediately preceded by a ``reasoning``-type
-    item, but where a ``reasoning`` item exists elsewhere in the list — the classic
-    orphaning pattern that causes a 400 from the API.
+    Standalone assistant messages (those not part of a reasoning pair) are allowed
+    and will not trigger a validation error.
 
     This validator is a standalone opt-in helper.  The primary recommendation is
     to build the input list with :func:`get_response_input_items` instead of
@@ -59,37 +74,30 @@ def validate_response_input(items: Sequence[Union[ResponseInputItemParam, object
 
         from openai.lib import validate_response_input
 
-        validate_response_input(conversation)  # raises ValueError if orphaned
+        validate_response_input(conversation)  # raises ValueError if broken pair
         response = client.responses.create(model="o3", input=conversation)
     """
-    has_reasoning = any(_item_type(item) == "reasoning" for item in items)
-    if not has_reasoning:
-        # No reasoning items at all — nothing to validate.
-        return
-
     for i, item in enumerate(items):
-        if _item_type(item) != "message":
+        if _item_type(item) != "reasoning":
             continue
-        role = _item_role(item)
-        if role != "assistant":
-            continue
-        # This is an assistant message.  It must be immediately preceded by a
-        # reasoning item when reasoning items exist in the list.
-        preceded_by_reasoning = i > 0 and _item_type(items[i - 1]) == "reasoning"
-        if not preceded_by_reasoning:
-            item_id = _item_id(item)
-            id_hint = f" (id={item_id!r})" if item_id else ""
-            raise ValueError(
-                f"Orphaned assistant message{id_hint} detected: a 'message' item with "
-                f"role='assistant' must be immediately preceded by its paired 'reasoning' "
-                f"item when reasoning items are present in the input. "
-                f"The OpenAI Responses API requires that reasoning and the immediately "
-                f"following assistant message are always passed together as a consecutive "
-                f"pair. Either include the paired reasoning item directly before this "
-                f"message, use 'previous_response_id' to let the API manage context, or "
-                f"build the input list with get_response_input_items() which preserves "
-                f"pairs automatically."
-            )
+        # Each reasoning item must be immediately followed by an assistant message.
+        next_idx = i + 1
+        if next_idx < len(items):
+            next_item = items[next_idx]
+            if _item_type(next_item) == "message" and _item_role(next_item) == "assistant":
+                continue
+        item_id = _item_id(item)
+        id_hint = f" (id={item_id!r})" if item_id else ""
+        raise ValueError(
+            f"Orphaned reasoning item{id_hint} detected: a 'reasoning' item "
+            f"must be immediately followed by its paired 'message' item with "
+            f"role='assistant'. The OpenAI Responses API requires that reasoning "
+            f"and the immediately following assistant message are always passed "
+            f"together as a consecutive pair. Either include the paired assistant "
+            f"message directly after this reasoning item, or remove the reasoning "
+            f"item. Use get_response_input_items() to build input lists that "
+            f"preserve pairs automatically."
+        )
 
 
 def _item_type(item: object) -> str:
diff --git a/tests/lib/test_response_input_builder.py b/tests/lib/test_response_input_builder.py
@@ -81,15 +81,15 @@ def test_validate_passes_for_consecutive_pair_dicts() -> None:
     validate_response_input(items)  # should not raise
 
 
-def test_validate_raises_for_orphaned_message_dicts() -> None:
-    """ValueError raised when assistant message is not preceded by reasoning (dict form)."""
+def test_validate_raises_for_orphaned_reasoning_dicts() -> None:
+    """ValueError raised when reasoning is not followed by assistant message (dict form)."""
     items = [
         {"type": "message", "role": "user", "content": "hello"},
         {"type": "reasoning", "id": "rs_1", "summary": []},
         {"type": "message", "role": "user", "content": "follow-up"},
         {"type": "message", "role": "assistant", "id": "msg_orphan", "content": []},
     ]
-    with pytest.raises(ValueError, match="msg_orphan"):
+    with pytest.raises(ValueError, match="rs_1"):
         validate_response_input(items)
 
 
@@ -133,7 +133,7 @@ def test_validate_passes_with_object_form() -> None:
 
 
 def test_validate_raises_with_object_form_orphaned() -> None:
-    """Raises ValueError with object-form items when message is orphaned."""
+    """Raises ValueError with object-form items when reasoning is not followed by assistant."""
     reasoning = MagicMock()
     reasoning.type = "reasoning"
     reasoning.id = "rs_obj"
@@ -147,5 +147,66 @@ def test_validate_raises_with_object_form_orphaned() -> None:
     asst_msg.role = "assistant"
     asst_msg.id = "msg_orphan_obj"
 
-    with pytest.raises(ValueError, match="msg_orphan_obj"):
+    with pytest.raises(ValueError, match="rs_obj"):
         validate_response_input([reasoning, user_msg, asst_msg])
+
+
+def test_validate_passes_for_standalone_assistant_with_later_pair() -> None:
+    """Standalone assistant message is valid even when a reasoning+assistant pair exists later."""
+    items = [
+        {"type": "message", "role": "assistant", "id": "msg_standalone", "content": []},
+        {"type": "message", "role": "user", "content": "follow-up"},
+        {"type": "reasoning", "id": "rs_1", "summary": []},
+        {"type": "message", "role": "assistant", "id": "msg_paired", "content": []},
+    ]
+    validate_response_input(items)  # should not raise
+
+
+def test_validate_raises_for_reasoning_at_end() -> None:
+    """ValueError raised when reasoning item is the last item with no following message."""
+    items = [
+        {"type": "message", "role": "user", "content": "hello"},
+        {"type": "reasoning", "id": "rs_trailing", "summary": []},
+    ]
+    with pytest.raises(ValueError, match="rs_trailing"):
+        validate_response_input(items)
+
+
+def test_get_response_input_items_excludes_parsed_fields() -> None:
+    """SDK-only 'parsed' field is stripped from nested content items."""
+    message = MagicMock()
+    message.type = "message"
+    message.model_dump.return_value = {
+        "type": "message",
+        "role": "assistant",
+        "id": "msg_parsed",
+        "content": [
+            {"type": "output_text", "text": "hello", "parsed": {"key": "value"}},
+        ],
+    }
+    response = _make_response([message])
+    result = get_response_input_items(response)
+    assert len(result) == 1
+    assert "parsed" not in result[0]["content"][0]  # type: ignore[index]
+
+
+def test_get_response_input_items_respects_api_exclude() -> None:
+    """SDK-only fields listed in __api_exclude__ are excluded from output."""
+    full_data = {
+        "type": "function_call",
+        "id": "fc_1",
+        "name": "my_func",
+        "arguments": "{}",
+        "parsed_arguments": {"key": "value"},
+    }
+    tool_call = MagicMock()
+    tool_call.type = "function_call"
+    tool_call.__api_exclude__ = {"parsed_arguments"}
+    tool_call.model_dump.side_effect = lambda **kwargs: {
+        k: v for k, v in full_data.items()
+        if k not in (kwargs.get("exclude") or set())
+    }
+    response = _make_response([tool_call])
+    result = get_response_input_items(response)
+    assert len(result) == 1
+    assert "parsed_arguments" not in result[0]