Skip to content

Commit cee6ff5

Browse files
RafaelPogithub-actions[bot]
authored andcommitted
feat(futuresearch-mcp): add futuresearch_status tool with unified progress widget (#5013)
## Summary - New `futuresearch_status` MCP tool with unified tabbed widget (Activity + Results) that auto-polls progress and auto-fetches results on completion - `get_task_result` unified to parquet-first with artifact fallback for failed/legacy tasks — consistent `[title](url)` citation format via `process_rows` - Fixed `linkify()` double-escaping bug that rendered `<a>` tags as visible text - Removed ~930 lines of dead widget templates (`RESULTS_HTML`, `SESSION_HTML`) - Simplified MCP download endpoint (poll token auth, no separate download token) Supersedes #5002 (squashed). ## Test plan - [ ] `uv run pytest tests/server/api_v0/test_tasks.py` — 43 tests pass - [ ] Run a task via Claude.ai MCP → verify widget shows progress + results with clickable citation links - [ ] Verify paginated (`?offset=0&limit=10`) and non-paginated paths return same citation format - [ ] Verify failed tasks return data via artifact fallback - [ ] Verify CSV/JSON download from widget works 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Sourced from commit a9b3e0cdfd7543c404bffc0fe05b04626455278c
1 parent 825bd88 commit cee6ff5

13 files changed

Lines changed: 691 additions & 682 deletions

futuresearch-mcp/manifest.json

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@
2222
"entry_point": "src/futuresearch_mcp/server.py",
2323
"mcp_config": {
2424
"command": "uv",
25-
"args": ["run", "${__dirname}/src/futuresearch_mcp/server.py"],
25+
"args": [
26+
"run",
27+
"${__dirname}/src/futuresearch_mcp/server.py"
28+
],
2629
"env": {
2730
"FUTURESEARCH_API_KEY": "${user_config.api_key}"
2831
}
@@ -61,6 +64,10 @@
6164
"name": "futuresearch_progress",
6265
"description": "Check progress of a running task. Blocks briefly to limit the polling rate."
6366
},
67+
{
68+
"name": "futuresearch_status",
69+
"description": "Check task status and display a live progress widget."
70+
},
6471
{
6572
"name": "futuresearch_results",
6673
"description": "Retrieve results from a completed futuresearch task and save them to a CSV."
@@ -104,12 +111,29 @@
104111
}
105112
},
106113
"compatibility": {
107-
"platforms": ["darwin", "linux", "win32"],
114+
"platforms": [
115+
"darwin",
116+
"linux",
117+
"win32"
118+
],
108119
"runtimes": {
109120
"python": ">=3.12"
110121
}
111122
},
112-
"keywords": ["futuresearch", "dataframe", "csv", "ai", "data-processing", "classify", "dedupe", "merge", "rank", "forecast"],
123+
"keywords": [
124+
"futuresearch",
125+
"dataframe",
126+
"csv",
127+
"ai",
128+
"data-processing",
129+
"classify",
130+
"dedupe",
131+
"merge",
132+
"rank",
133+
"forecast"
134+
],
113135
"license": "MIT",
114-
"privacy_policies": ["https://futuresearch.ai/privacy/"]
136+
"privacy_policies": [
137+
"https://futuresearch.ai/privacy/"
138+
]
115139
}

futuresearch-mcp/src/futuresearch_mcp/http_config.py

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@
2828
SecurityHeadersMiddleware,
2929
)
3030
from futuresearch_mcp.redis_store import get_redis_client
31-
from futuresearch_mcp.routes import api_download, api_download_url, api_progress
32-
from futuresearch_mcp.templates import RESULTS_HTML, SESSION_HTML
31+
from futuresearch_mcp.routes import api_download, api_progress
32+
from futuresearch_mcp.templates import UNIFIED_HTML
3333
from futuresearch_mcp.uploads import proxy_upload
3434

3535
logger = logging.getLogger(__name__)
@@ -136,15 +136,7 @@ def _register_widgets(
136136
meta={"ui": {"csp": widget_csp}},
137137
)
138138
def _session_ui_http() -> str:
139-
return SESSION_HTML
140-
141-
@mcp.resource(
142-
"ui://futuresearch/results.html",
143-
mime_type="text/html;profile=mcp-app",
144-
meta={"ui": {"csp": widget_csp}},
145-
)
146-
def _results_ui_http() -> str:
147-
return RESULTS_HTML
139+
return UNIFIED_HTML
148140

149141

150142
def _register_routes(
@@ -157,9 +149,6 @@ def _register_routes(
157149
mcp.custom_route("/api/results/{task_id}/download", ["GET", "OPTIONS"])(
158150
api_download
159151
)
160-
mcp.custom_route("/api/results/{task_id}/download-token", ["GET", "OPTIONS"])(
161-
api_download_url
162-
)
163152

164153
async def _health(_request: Request) -> Response:
165154
try:

futuresearch-mcp/src/futuresearch_mcp/result_store.py

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -91,12 +91,9 @@ def _build_result_response(
9191
columns: list[str],
9292
offset: int,
9393
page_size: int,
94-
poll_token: str = "",
95-
mcp_server_url: str = "",
9694
artifact_id: str = "",
9795
*,
9896
requested_page_size: int | None = None,
99-
skip_widget: bool = False,
10097
) -> CallToolResult:
10198
"""Build a CallToolResult with separate content and structuredContent.
10299
@@ -116,26 +113,6 @@ def _build_result_response(
116113
has_more = offset + page_size < total
117114
next_offset = offset + page_size if has_more else None
118115

119-
# ── Widget data → structuredContent (client only, NOT the LLM) ───
120-
#
121-
# Only emit on the first page — the widget fetches the full dataset
122-
# independently, so subsequent pages only need the text summary.
123-
structured: dict[str, Any] | None = None
124-
if offset == 0 and not skip_widget:
125-
structured = {
126-
"csv_url": csv_url,
127-
"preview": preview_records,
128-
"total": total,
129-
"fetch_full_results": True,
130-
}
131-
if artifact_id:
132-
structured["artifact_id"] = artifact_id
133-
if poll_token:
134-
structured["poll_token"] = poll_token
135-
structured["download_token_url"] = (
136-
f"{mcp_server_url}/api/results/{task_id}/download-token"
137-
)
138-
139116
# ── Summary + inline data → content (for the LLM) ───────────────
140117
if has_more:
141118
page_size_arg = f", page_size={hint_page_size}"
@@ -175,7 +152,6 @@ def _build_result_response(
175152

176153
return CallToolResult(
177154
content=[TextContent(type="text", text=summary)], # pyright: ignore[reportArgumentType] # list invariance
178-
structuredContent=structured,
179155
isError=False,
180156
)
181157

futuresearch-mcp/src/futuresearch_mcp/routes.py

Lines changed: 99 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import secrets
99
from uuid import UUID
1010

11-
import httpx
1211
import pandas as pd
1312
from futuresearch.api_utils import handle_response
1413
from futuresearch.generated.api.tasks import get_task_status_tasks_task_id_status_get
@@ -18,7 +17,8 @@
1817

1918
from futuresearch_mcp import redis_store
2019
from futuresearch_mcp.config import settings
21-
from futuresearch_mcp.tool_helpers import _UI_EXCLUDE, TaskState
20+
from futuresearch_mcp.result_store import _sanitize_records
21+
from futuresearch_mcp.tool_helpers import _UI_EXCLUDE, TaskState, _fetch_task_result
2222

2323
logger = logging.getLogger(__name__)
2424

@@ -81,6 +81,61 @@ async def _validate_poll_token(task_id: str, request: Request) -> JSONResponse |
8181
return None
8282

8383

84+
async def _fetch_summaries_rest(
85+
client: AuthenticatedClient, task_id: str, cursor: str | None
86+
) -> tuple[list[dict] | None, str | None]:
87+
"""Fetch agent summaries from the Engine API for the REST progress endpoint."""
88+
try:
89+
params: dict[str, str] = {}
90+
if cursor:
91+
params["cursor"] = cursor
92+
httpx_client = client.get_async_httpx_client()
93+
resp = await httpx_client.request(
94+
method="get",
95+
url=f"/tasks/{task_id}/summaries",
96+
params=params,
97+
)
98+
if resp.status_code == 200:
99+
data = resp.json()
100+
return data.get("summaries") or None, data.get("cursor") or cursor
101+
except Exception:
102+
logger.debug("Failed to fetch summaries for task %s via REST", task_id)
103+
return None, cursor
104+
105+
106+
async def _fetch_aggregate_rest(
107+
client: AuthenticatedClient, task_id: str, cursor: str | None
108+
) -> tuple[str | None, list[dict] | None, str | None]:
109+
"""Fetch aggregate + micro-summaries from the Engine API.
110+
111+
Returns (aggregate_text, micro_summaries, updated_cursor).
112+
Falls back to plain summaries when the aggregate endpoint is unavailable.
113+
"""
114+
try:
115+
params: dict[str, str] = {}
116+
if cursor:
117+
params["cursor"] = cursor
118+
httpx_client = client.get_async_httpx_client()
119+
resp = await httpx_client.request(
120+
method="get",
121+
url=f"/tasks/{task_id}/summaries/aggregate",
122+
params=params,
123+
)
124+
if resp.status_code == 200:
125+
data = resp.json()
126+
return (
127+
data.get("aggregate") or None,
128+
data.get("micro_summaries") or None,
129+
data.get("cursor") or cursor,
130+
)
131+
except Exception:
132+
pass
133+
134+
# Fallback: plain summaries without aggregate
135+
summaries, new_cursor = await _fetch_summaries_rest(client, task_id, cursor)
136+
return None, summaries, new_cursor
137+
138+
84139
async def api_progress(request: Request) -> Response:
85140
"""REST endpoint for the session widget to poll task progress."""
86141
cors = _cors_headers()
@@ -119,12 +174,28 @@ async def api_progress(request: Request) -> Response:
119174

120175
ts = TaskState(status_response)
121176

122-
# Don't pop the token on completion — the download route needs it.
123-
# Let the Redis TTL expire it naturally.
177+
if ts.is_terminal:
178+
# Don't pop the token immediately — the widget's autoFetchResults
179+
# needs it to call /download-token after task completion.
180+
# The token will expire naturally via Redis TTL.
181+
pass
124182

125-
return JSONResponse(
126-
ts.model_dump(mode="json", exclude=_UI_EXCLUDE), headers=cors
127-
)
183+
payload = ts.model_dump(mode="json", exclude=_UI_EXCLUDE)
184+
185+
# Fetch aggregate + micro-summaries + partial rows for non-terminal tasks
186+
if not ts.is_terminal:
187+
cursor = request.query_params.get("cursor")
188+
aggregate, summaries, new_cursor = await _fetch_aggregate_rest(
189+
client, task_id, cursor
190+
)
191+
if aggregate:
192+
payload["aggregate_summary"] = aggregate
193+
if summaries:
194+
payload["summaries"] = summaries
195+
if new_cursor:
196+
payload["cursor"] = new_cursor
197+
198+
return JSONResponse(payload, headers=cors)
128199
except Exception as exc:
129200
logger.error(
130201
"Progress poll failed for task %s: %s", task_id, type(exc).__name__
@@ -153,12 +224,11 @@ async def _validate_poll_token_bearer_only(
153224
return None
154225

155226

156-
async def api_download_url(request: Request) -> Response:
157-
"""Return the download URL for a task.
227+
async def api_download(request: Request) -> Response: # noqa: PLR0911
228+
"""REST endpoint to download task results as CSV or JSON.
158229
159-
The widget calls this to get the download URL. Validates the poll
160-
token so only the session owner gets the URL (the download itself
161-
is open by task ID).
230+
Authenticates via the poll token (Authorization: Bearer header or
231+
?token= query param). No separate download token needed.
162232
"""
163233
cors = _cors_headers()
164234
if request.method == "OPTIONS":
@@ -172,72 +242,38 @@ async def api_download_url(request: Request) -> Response:
172242
if err := _validate_uuid(task_id):
173243
return err
174244

175-
if err := await _validate_poll_token_bearer_only(task_id, request):
176-
return err
177-
178-
download_url = f"{settings.mcp_server_url}/api/results/{task_id}/download"
179-
return JSONResponse({"download_url": download_url}, headers=cors)
180-
181-
182-
async def api_download(request: Request) -> Response: # noqa: PLR0911
183-
"""Download task results as CSV or JSON.
184-
185-
Fetches results from the public Engine API using the per-task API key
186-
stored in Redis.
187-
188-
Query params:
189-
format: "csv" (default) or "json"
190-
"""
191-
cors = _cors_headers()
192-
if request.method == "OPTIONS":
193-
return Response(
194-
status_code=204,
195-
headers={**cors, "Access-Control-Max-Age": "3600"},
196-
)
197-
198-
task_id = request.path_params["task_id"]
199-
200-
if err := _validate_uuid(task_id):
245+
if err := await _validate_poll_token(task_id, request):
201246
return err
202247

203248
fmt = request.query_params.get("format", "csv")
204249
if fmt not in ("csv", "json"):
205250
return JSONResponse(
206251
{"error": "Unsupported format"}, status_code=400, headers=cors
207252
)
208-
# Fetch results via the public API (paginated path handles citation
253+
254+
# Fetch results via the public API (parquet-first path handles citation
209255
# resolution and internal column stripping automatically).
210256
api_key = await redis_store.get_task_token(task_id)
211257
if not api_key:
212258
return JSONResponse(
213-
{"error": "Unknown task or expired session"},
214-
status_code=404,
215-
headers=cors,
259+
{"error": "Results not found or expired"}, status_code=404, headers=cors
216260
)
217-
218261
try:
219-
# Trailing slash on base_url is required for httpx to append
220-
# relative paths correctly (RFC 3986). A leading slash on the
221-
# request path would *replace* the base path, sending the
222-
# request to https://host/tasks/… instead of …/api/v0/tasks/….
223-
base = settings.futuresearch_api_url.rstrip("/") + "/"
224-
async with httpx.AsyncClient(
225-
base_url=base,
226-
headers={"Authorization": f"Bearer {api_key}"},
227-
) as http:
228-
resp = await http.get(
229-
f"tasks/{task_id}/result",
230-
params={"offset": 0, "limit": 100000},
231-
)
232-
resp.raise_for_status()
233-
body = resp.json()
234-
records: list[dict] = body.get("data") or []
262+
client = AuthenticatedClient(
263+
base_url=settings.futuresearch_api_url,
264+
token=api_key,
265+
raise_on_unexpected_status=True,
266+
follow_redirects=True,
267+
)
268+
rows, _total, _session_id, _artifact_id = await _fetch_task_result(
269+
client, task_id
270+
)
271+
records: list[dict] = _sanitize_records(rows)
235272
except Exception:
236-
logger.exception("Failed to fetch results for download, task %s", task_id)
273+
logger.warning("Failed to fetch results for task %s", task_id, exc_info=True)
237274
return JSONResponse(
238-
{"error": "Failed to fetch results"}, status_code=500, headers=cors
275+
{"error": "Results not found or expired"}, status_code=404, headers=cors
239276
)
240-
241277
safe_prefix = "".join(c for c in task_id[:8] if c.isalnum() or c == "-")
242278

243279
if fmt == "json":
@@ -251,6 +287,7 @@ async def api_download(request: Request) -> Response: # noqa: PLR0911
251287
},
252288
)
253289

290+
# CSV generated on-the-fly from the already-resolved records.
254291
csv_text = pd.DataFrame(records).to_csv(index=False, quoting=csv.QUOTE_ALL)
255292
return Response(
256293
content=csv_text,

futuresearch-mcp/src/futuresearch_mcp/server.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
from futuresearch_mcp.redis_store import Transport
2020
from futuresearch_mcp.tools import (
2121
_RESULTS_ANNOTATIONS,
22-
_RESULTS_META,
2322
futuresearch_results_http,
2423
)
2524
from futuresearch_mcp.uploads import register_upload_tool
@@ -98,6 +97,11 @@ def main():
9897
settings.transport = transport.value
9998
mcp._mcp_server.instructions = get_instructions(is_http=input_args.http)
10099

100+
# futuresearch_status is only useful for widget-capable clients (HTTP mode).
101+
# Remove it in stdio mode so Claude Code never sees it.
102+
if transport != Transport.HTTP:
103+
mcp._tool_manager.remove_tool("futuresearch_status")
104+
101105
# tools.py registers futuresearch_results_stdio by default.
102106
# Override with the HTTP variant when running in HTTP mode.
103107
# ToolManager.add_tool() is a no-op for existing names, so remove first.
@@ -107,7 +111,6 @@ def main():
107111
name="futuresearch_results",
108112
structured_output=False,
109113
annotations=_RESULTS_ANNOTATIONS,
110-
meta=_RESULTS_META,
111114
)(futuresearch_results_http)
112115

113116
if input_args.http:

0 commit comments

Comments
 (0)