Skip to content

Commit 90f2494

Browse files
RafaelPogithub-actions[bot]
authored andcommitted
feat: paginate result endpoint, remove Redis result cache (#4957)
## Summary - **Phase A**: `GET /api/v0/tasks/{id}/result` gains optional `offset`/`limit` query params. Reads paginated rows from ClickHouse with citation resolution and internal column stripping. Sets `X-Total-Row-Count` header. Without params, behavior unchanged (backward compatible). - **Phase B**: MCP server no longer caches results in Redis. `futuresearch_results` fetches paginated rows directly from the Engine. Download endpoint calls the Engine's internal API (no auth). Deletes ~400 lines of Redis caching logic. - Makes `MCP_SERVER_URL`, `MCP_SANDBOX_URL`, `TRUST_PROXY_HEADERS` configurable in everyrow-cc compose (needed for CF tunnel testing). ## What gets deleted (net simplification) | Component | Lines | |-----------|-------| | `try_cached_result()` | ~65 | | `try_store_result()` | ~70 | | Redis result cache functions | ~50 | | Test classes for cache logic | ~200+ | | **Net** | **-528 lines** | ## Test plan - [x] Engine unit tests (40 passed) - [x] MCP unit tests (392 passed) - [x] Local E2E: submit rank task → paginated result → download CSV - [x] Claude.ai E2E via CF tunnel: submit task, progress polling, results - [ ] Deploy Engine to staging, verify backward compat - [ ] Deploy MCP to staging, verify no `mcp:result:*` Redis keys 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Sourced from commit 58ea2dd6c73bdfa517e974fad3573712508203d4
1 parent a3942ae commit 90f2494

14 files changed

Lines changed: 451 additions & 1601 deletions

futuresearch-mcp/src/futuresearch_mcp/http_config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
SecurityHeadersMiddleware,
2929
)
3030
from futuresearch_mcp.redis_store import get_redis_client
31-
from futuresearch_mcp.routes import api_download, api_download_token, api_progress
31+
from futuresearch_mcp.routes import api_download, api_download_url, api_progress
3232
from futuresearch_mcp.templates import RESULTS_HTML, SESSION_HTML
3333
from futuresearch_mcp.uploads import proxy_upload
3434

@@ -151,7 +151,7 @@ def _register_routes(
151151
api_download
152152
)
153153
mcp.custom_route("/api/results/{task_id}/download-token", ["GET", "OPTIONS"])(
154-
api_download_token
154+
api_download_url
155155
)
156156

157157
async def _health(_request: Request) -> Response:

futuresearch-mcp/src/futuresearch_mcp/redis_store.py

Lines changed: 0 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,7 @@
2222
HEALTH_CHECK_INTERVAL = 30
2323

2424
PROGRESS_POLL_DELAY = 12
25-
RESULT_CACHE_TTL = 600
26-
RESULT_DATA_TTL = 3600 # 1 hour — full result data stored in Redis for download
2725
TOKEN_TTL = 86400 # 24 hours — must outlive the longest possible task
28-
DOWNLOAD_TOKEN_TTL = 3600 # 1 hour — single-use download tokens (match RESULT_DATA_TTL)
2926

3027

3128
class Transport(StrEnum):
@@ -175,65 +172,6 @@ def set_redis_client(client: Redis | None) -> None:
175172
_redis_client = client
176173

177174

178-
async def get_result_meta(task_id: str) -> str | None:
179-
return await get_redis_client().get(build_key("result", task_id))
180-
181-
182-
async def store_result_meta(task_id: str, meta_json: str) -> None:
183-
await get_redis_client().setex(
184-
build_key("result", task_id), RESULT_CACHE_TTL, meta_json
185-
)
186-
187-
188-
# ── Result pages ──────────────────────────────────────────────
189-
190-
191-
async def get_result_page(task_id: str, offset: int, page_size: int) -> str | None:
192-
key = build_key("result", task_id, "page", str(offset), str(page_size))
193-
return await get_redis_client().get(key)
194-
195-
196-
async def store_result_page(
197-
task_id: str, offset: int, page_size: int, preview_json: str
198-
) -> None:
199-
await get_redis_client().setex(
200-
build_key("result", task_id, "page", str(offset), str(page_size)),
201-
RESULT_CACHE_TTL,
202-
preview_json,
203-
)
204-
205-
206-
# ── JSON result storage ────────────────────────────────────────
207-
208-
209-
MAX_JSON_CACHE_CHARS = (
210-
80 * 1024 * 1024
211-
) # 80M characters — skip Redis cache for oversized results (JSON is ~1.5x larger than CSV)
212-
213-
214-
async def store_result_json(task_id: str, json_text: str) -> None:
215-
if len(json_text) > MAX_JSON_CACHE_CHARS:
216-
logger.warning(
217-
"Skipping Redis cache for task %s: JSON is %d chars (limit %d)",
218-
task_id,
219-
len(json_text),
220-
MAX_JSON_CACHE_CHARS,
221-
)
222-
return
223-
await get_redis_client().setex(
224-
name=build_key("result", task_id, "json"), time=RESULT_DATA_TTL, value=json_text
225-
)
226-
227-
228-
async def get_result_json(task_id: str) -> str | None:
229-
return await get_redis_client().get(name=build_key("result", task_id, "json"))
230-
231-
232-
async def result_json_exists(task_id: str) -> bool:
233-
"""O(1) existence check — avoids reading the full JSON into memory."""
234-
return await get_redis_client().exists(build_key("result", task_id, "json")) > 0
235-
236-
237175
async def store_task_token(task_id: str, token: str) -> None:
238176
await get_redis_client().setex(
239177
build_key("task_token", task_id), TOKEN_TTL, encrypt_value(token)
@@ -293,30 +231,3 @@ async def store_task_owner(task_id: str, user_id: str) -> None:
293231
async def get_task_owner(task_id: str) -> str | None:
294232
"""Return the user_id that owns a task, or None if not recorded."""
295233
return await get_redis_client().get(build_key("task_owner", task_id))
296-
297-
298-
# ── Download tokens (reusable, TTL-limited) ──────────────────
299-
300-
301-
async def store_download_token(download_token: str, task_id: str) -> None:
302-
"""Store a download token that maps back to a task_id.
303-
304-
Keyed by token (reverse-lookup) so multiple concurrent download tokens
305-
can exist for the same task. Tokens are reusable until they expire.
306-
"""
307-
await get_redis_client().setex(
308-
name=build_key("dl_token", download_token),
309-
time=DOWNLOAD_TOKEN_TTL,
310-
value=encrypt_value(task_id),
311-
)
312-
313-
314-
async def get_download_token(download_token: str) -> str | None:
315-
"""Look up a download token without consuming it.
316-
317-
Returns the task_id or None if the token does not exist / has expired.
318-
"""
319-
encrypted = await get_redis_client().get(build_key("dl_token", download_token))
320-
if encrypted is None:
321-
return None
322-
return decrypt_value(encrypted)

0 commit comments

Comments
 (0)