Skip to content

Commit 47c0c4f

Browse files
rgambeegithub-actions[bot]
authored andcommitted
Preserve array item schema (#5200)
When CC submits a schema to the MCP server, our manual conversion from JSON schema to Pydantic model was dropping important details. When submitting a schema containing an array, the schema for the array's items was lost completely. This meant the array items could be anything: strings, numbers, objects with arbitrary fields. This gives the agents far too much freedom to return their results in whatever format they want. And it caused serialization errors when writing the results since the backend would make incorrect assumptions about the array item type. This PR lets the MCP tools bypass the JSON schema -> Pydantic -> JSON schema chain. Sourced from commit dda388ec445ee7e681b68bed303a320c56334aa7
1 parent ac04a47 commit 47c0c4f

8 files changed

Lines changed: 318 additions & 199 deletions

File tree

futuresearch-mcp/src/futuresearch_mcp/models.py

Lines changed: 0 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -17,23 +17,13 @@
1717
BaseModel,
1818
ConfigDict,
1919
Field,
20-
create_model,
2120
field_validator,
2221
model_validator,
2322
)
2423

2524
from futuresearch_mcp.config import settings
2625
from futuresearch_mcp.utils import is_url, validate_csv_path, validate_url
2726

28-
JSON_TYPE_MAP = {
29-
"string": str,
30-
"integer": int,
31-
"number": float,
32-
"boolean": bool,
33-
"array": list,
34-
"object": dict,
35-
}
36-
3727

3828
class InputDataMode(StrEnum):
3929
dataframe = "DATAFRAME"
@@ -80,37 +70,6 @@ def _validate_response_schema(schema: dict[str, Any] | None) -> dict[str, Any] |
8070
return schema
8171

8272

83-
def _schema_to_model(name: str, schema: dict[str, Any]) -> type[BaseModel]:
84-
"""Convert a JSON schema dict to a dynamic Pydantic model.
85-
86-
This allows the MCP client to pass arbitrary response schemas without
87-
needing to define Python classes.
88-
"""
89-
properties = schema["properties"]
90-
required = set(schema.get("required", []))
91-
92-
fields: dict[str, Any] = {}
93-
for field_name, field_def in properties.items():
94-
if not isinstance(field_def, dict):
95-
raise ValueError(
96-
f"Invalid property schema for '{field_name}': expected an object."
97-
)
98-
99-
field_type_str = field_def.get("type", "string")
100-
python_type = JSON_TYPE_MAP.get(field_type_str, str)
101-
description = field_def.get("description", "")
102-
103-
if field_name in required:
104-
fields[field_name] = (python_type, Field(..., description=description))
105-
else:
106-
fields[field_name] = (
107-
python_type | None,
108-
Field(default=None, description=description),
109-
)
110-
111-
return create_model(name, **fields)
112-
113-
11473
def _check_exactly_one(
11574
*,
11675
values: tuple[Any | None, ...],

futuresearch-mcp/src/futuresearch_mcp/tools.py

Lines changed: 14 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@
1717
from futuresearch.generated.api.tasks import get_task_status_tasks_task_id_status_get
1818
from futuresearch.generated.models.task_status import TaskStatus
1919
from futuresearch.ops import (
20-
agent_map_async,
20+
_submit_agent_map,
21+
_submit_rank,
22+
_submit_single_agent,
2123
classify_async,
2224
create_table_artifact,
2325
dedupe_async,
2426
forecast_async,
2527
merge_async,
26-
rank_async,
27-
single_agent_async,
2828
)
2929
from futuresearch.session import list_sessions
3030
from futuresearch.task import cancel_task
@@ -51,7 +51,6 @@
5151
StdioResultsInput,
5252
UploadDataInput,
5353
UseListInput,
54-
_schema_to_model,
5554
)
5655
from futuresearch_mcp.result_store import (
5756
_build_result_response,
@@ -270,10 +269,6 @@ async def futuresearch_agent(
270269

271270
input_data = params._aid_or_dataframe
272271

273-
response_model: type[BaseModel] | None = None
274-
if params.response_schema:
275-
response_model = _schema_to_model("AgentResult", params.response_schema)
276-
277272
async with create_linked_session(
278273
client=client, session_id=params.session_id, name=params.session_name
279274
) as session:
@@ -284,8 +279,8 @@ async def futuresearch_agent(
284279
"input": input_data,
285280
"enforce_row_independence": params.enforce_row_independence,
286281
}
287-
if response_model:
288-
kwargs["response_model"] = response_model
282+
if params.response_schema:
283+
kwargs["response_schema"] = params.response_schema
289284
kwargs["effort_level"] = params.effort_level
290285
if params.effort_level is None:
291286
if params.llm is not None:
@@ -294,8 +289,8 @@ async def futuresearch_agent(
294289
kwargs["iteration_budget"] = params.iteration_budget
295290
if params.include_reasoning is not None:
296291
kwargs["include_reasoning"] = params.include_reasoning
297-
cohort_task = await agent_map_async(**kwargs)
298-
task_id = str(cohort_task.task_id)
292+
submitted = await _submit_agent_map(**kwargs)
293+
task_id = str(submitted.task_id)
299294
total = len(input_data) if isinstance(input_data, pd.DataFrame) else 0
300295

301296
return await create_tool_response(
@@ -356,10 +351,6 @@ async def futuresearch_single_agent(
356351
log_client_info(ctx, "futuresearch_single_agent")
357352
client = _get_client(ctx)
358353

359-
response_model: type[BaseModel] | None = None
360-
if params.response_schema:
361-
response_model = _schema_to_model("SingleAgentResult", params.response_schema)
362-
363354
# Convert input_data dict to a BaseModel if provided
364355
input_model: BaseModel | None = None
365356
if params.input_data:
@@ -378,8 +369,8 @@ async def futuresearch_single_agent(
378369
}
379370
if input_model is not None:
380371
kwargs["input"] = input_model
381-
if response_model is not None:
382-
kwargs["response_model"] = response_model
372+
if params.response_schema:
373+
kwargs["response_schema"] = params.response_schema
383374
kwargs["effort_level"] = params.effort_level
384375
if params.effort_level is None:
385376
if params.llm is not None:
@@ -388,8 +379,8 @@ async def futuresearch_single_agent(
388379
kwargs["iteration_budget"] = params.iteration_budget
389380
if params.include_reasoning is not None:
390381
kwargs["include_reasoning"] = params.include_reasoning
391-
cohort_task = await single_agent_async(**kwargs)
392-
task_id = str(cohort_task.task_id)
382+
submitted = await _submit_single_agent(**kwargs)
383+
task_id = str(submitted.task_id)
393384

394385
return await create_tool_response(
395386
task_id=task_id,
@@ -445,24 +436,20 @@ async def futuresearch_rank(
445436

446437
input_data = params._aid_or_dataframe
447438

448-
response_model: type[BaseModel] | None = None
449-
if params.response_schema:
450-
response_model = _schema_to_model("RankResult", params.response_schema)
451-
452439
async with create_linked_session(
453440
client=client, session_id=params.session_id, name=params.session_name
454441
) as session:
455442
session_id_str = str(session.session_id)
456-
cohort_task = await rank_async(
443+
submitted = await _submit_rank(
457444
task=params.task,
458445
session=session,
459446
input=input_data,
460447
field_name=params.field_name,
461448
field_type=params.field_type,
462-
response_model=response_model,
449+
response_schema=params.response_schema,
463450
ascending_order=params.ascending_order,
464451
)
465-
task_id = str(cohort_task.task_id)
452+
task_id = str(submitted.task_id)
466453
total = len(input_data) if isinstance(input_data, pd.DataFrame) else 0
467454

468455
return await create_tool_response(

futuresearch-mcp/tests/test_mcp_e2e.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ async def test_call_agent_tool(self, _http_state):
234234
return_value=MagicMock(token="fake-token"),
235235
),
236236
patch(
237-
"futuresearch_mcp.tools.agent_map_async",
237+
"futuresearch_mcp.tools._submit_agent_map",
238238
new_callable=AsyncMock,
239239
return_value=mock_task,
240240
),
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
import json
2+
3+
from futuresearch.generated.models.agent_map_operation_response_schema_type_0 import (
4+
AgentMapOperationResponseSchemaType0,
5+
)
6+
7+
# ---------------------------------------------------------------------------
8+
# Helpers
9+
# ---------------------------------------------------------------------------
10+
11+
EBAY_DEALS_SCHEMA = {
12+
"type": "object",
13+
"properties": {
14+
"deals": {
15+
"type": "array",
16+
"description": "List of eBay deals found where per-item cost is below avg_90d",
17+
"items": {
18+
"type": "object",
19+
"properties": {
20+
"title": {
21+
"type": "string",
22+
"description": "Full eBay listing title",
23+
},
24+
"per_item_cost": {
25+
"type": "number",
26+
"description": "Price per item",
27+
},
28+
"shipping_cost": {"type": "string", "description": "Shipping cost"},
29+
"margin": {
30+
"type": "number",
31+
"description": "avg_90d minus per_item_cost",
32+
},
33+
"url": {
34+
"type": "string",
35+
"description": "Full URL to the eBay listing",
36+
},
37+
},
38+
"required": [
39+
"title",
40+
"per_item_cost",
41+
"shipping_cost",
42+
"margin",
43+
"url",
44+
],
45+
},
46+
},
47+
"no_deals_reason": {
48+
"type": "string",
49+
"description": "If no deals found, explain why",
50+
},
51+
},
52+
"required": ["deals"],
53+
}
54+
55+
56+
def _roundtrip_via_sdk(schema: dict) -> dict:
57+
"""Simulate the SDK path: schema → generated model → dict."""
58+
sdk_obj = AgentMapOperationResponseSchemaType0.from_dict(schema)
59+
return sdk_obj.to_dict()
60+
61+
62+
class TestSchemaPassthrough:
63+
"""The SDK should pass the schema dict through without modification."""
64+
65+
def test_ebay_deals_schema_preserved(self):
66+
"""The exact schema from the failing eBay CPAP session."""
67+
result = _roundtrip_via_sdk(EBAY_DEALS_SCHEMA)
68+
deals = result["properties"]["deals"]
69+
assert deals["type"] == "array"
70+
assert deals["items"]["type"] == "object"
71+
assert "title" in deals["items"]["properties"]
72+
assert "url" in deals["items"]["properties"]
73+
assert deals["items"]["required"] == [
74+
"title",
75+
"per_item_cost",
76+
"shipping_cost",
77+
"margin",
78+
"url",
79+
]
80+
81+
def test_array_of_objects_items_preserved(self):
82+
schema = {
83+
"type": "object",
84+
"properties": {
85+
"items": {
86+
"type": "array",
87+
"items": {
88+
"type": "object",
89+
"properties": {
90+
"name": {"type": "string"},
91+
"value": {"type": "number"},
92+
},
93+
"required": ["name"],
94+
},
95+
}
96+
},
97+
"required": ["items"],
98+
}
99+
result = _roundtrip_via_sdk(schema)
100+
assert (
101+
result["properties"]["items"]["items"]["properties"]["name"]["type"]
102+
== "string"
103+
)
104+
assert result["properties"]["items"]["items"]["required"] == ["name"]
105+
106+
def test_array_of_primitives_preserved(self):
107+
schema = {
108+
"type": "object",
109+
"properties": {
110+
"tags": {"type": "array", "items": {"type": "string"}},
111+
},
112+
"required": ["tags"],
113+
}
114+
result = _roundtrip_via_sdk(schema)
115+
assert result["properties"]["tags"]["items"]["type"] == "string"
116+
117+
def test_nested_object_properties_preserved(self):
118+
schema = {
119+
"type": "object",
120+
"properties": {
121+
"metadata": {
122+
"type": "object",
123+
"properties": {
124+
"source": {"type": "string"},
125+
"confidence": {"type": "number"},
126+
},
127+
}
128+
},
129+
"required": ["metadata"],
130+
}
131+
result = _roundtrip_via_sdk(schema)
132+
assert (
133+
result["properties"]["metadata"]["properties"]["source"]["type"] == "string"
134+
)
135+
136+
def test_schema_is_json_serializable(self):
137+
"""The schema must be JSON-serializable for the HTTP request."""
138+
result = _roundtrip_via_sdk(EBAY_DEALS_SCHEMA)
139+
json.dumps(result) # should not raise

0 commit comments

Comments
 (0)