Skip to content

Commit 21e9137

Browse files
RafaelPoclaude
andauthored
Skip widget, session URL, and page_size prompt for internal clients (#240)
* Skip widget, session URL, and page_size prompt for internal clients Detect requests from EveryRow CC (via User-Agent) and strip widget JSON, session URL references, and submission widget from responses. This saves bandwidth and context tokens for our own app which has its own UI. Also adds auto_page_size_threshold setting (default 50, configurable via AUTO_PAGE_SIZE_THRESHOLD env var) — when total rows are at or below the threshold, the agent loads all rows directly instead of asking the user. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Fix page_size=0 bug and add everyrow-cc to widget blocklist - Guard against total=0 producing page_size=0 (fails ge=1 validation) - Add "everyrow-cc" to _NO_WIDGET_UA_SUBSTRINGS so client_supports_widgets is self-consistent without needing an override in the results handler Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent ee88975 commit 21e9137

7 files changed

Lines changed: 92 additions & 39 deletions

File tree

everyrow-mcp/src/everyrow_mcp/app.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ async def no_auth_http_lifespan(_server: FastMCP):
101101
4. **Results** — call `everyrow_results(task_id)` to retrieve the output.
102102
103103
## Key rules
104-
- Always share the session_url with the user after submitting a task.
104+
- If a session_url appears in the submission response, share it with the user. If none is present, do not mention it.
105105
- Never guess or fabricate results — always wait for the task to complete.
106106
- For small datasets (< 50 rows), prefer passing `data` directly.
107107
- For larger datasets, use `everyrow_upload_data` to get an artifact_id first.
@@ -120,31 +120,35 @@ async def no_auth_http_lifespan(_server: FastMCP):
120120
"""
121121
)
122122

123-
_INSTRUCTIONS_HTTP = (
124-
_INSTRUCTIONS_COMMON
125-
+ """\
123+
124+
def _build_instructions_http() -> str:
125+
threshold = settings.auto_page_size_threshold
126+
return (
127+
_INSTRUCTIONS_COMMON
128+
+ f"""\
126129
## Data ingestion (remote mode)
127130
- `everyrow_upload_data(source="https://...")` — upload from a URL (Google Sheets supported).
128131
- For local/sandbox files, use `everyrow_request_upload_url(filename="data.csv")`, \
129132
then execute the returned curl command, then use the artifact_id from the response.
130-
- Or pass `data=[{"col": "val"}, ...]` directly to any processing tool.
133+
- Or pass `data=[{{"col": "val"}}, ...]` directly to any processing tool.
131134
- Do NOT pass local file paths to `everyrow_upload_data` — it will fail in remote mode.
132135
133136
## Results
134-
- IMPORTANT: When a task completes, you MUST ask the user how many rows they want loaded into \
135-
your context BEFORE calling everyrow_results. Do NOT call everyrow_results without asking first.
137+
- IMPORTANT: When a task completes with more than {threshold} rows, you MUST ask the user how many rows \
138+
they want loaded into your context BEFORE calling everyrow_results. Do NOT call everyrow_results \
139+
without asking first. If the task produced {threshold} or fewer rows, skip asking and load all rows directly.
136140
- `everyrow_results(task_id, page_size=N)` loads N rows into your context so you can read them. \
137141
The user always has access to all rows via the widget and download link.
138142
- After retrieving results, tell the user how many rows you can see vs the total, and that \
139143
they have access to the full dataset via the widget above and the download link.
140144
- Use offset to paginate through larger datasets.
141145
"""
142-
)
146+
)
143147

144148

145149
def get_instructions(is_http: bool) -> str:
146150
"""Return server instructions appropriate for the transport mode."""
147-
return _INSTRUCTIONS_HTTP if is_http else _INSTRUCTIONS_STDIO
151+
return _build_instructions_http() if is_http else _INSTRUCTIONS_STDIO
148152

149153

150154
mcp = FastMCP(

everyrow-mcp/src/everyrow_mcp/config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ class Settings(BaseSettings):
8888
default=5000,
8989
description="Maximum rows allowed in inline data (list[dict]).",
9090
)
91+
auto_page_size_threshold: int = Field(
92+
default=50,
93+
description="If total rows <= this value, skip asking the user for page_size and load all rows directly.",
94+
)
9195

9296
# Upload settings (HTTP mode only)
9397
upload_secret: str = Field(

everyrow-mcp/src/everyrow_mcp/models.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -658,8 +658,10 @@ def validate_task_id(cls, v: str) -> str:
658658
description=(
659659
"Number of result rows to load into your context so you can read them. "
660660
"The user has access to all rows via the widget regardless of this value. "
661-
"REQUIRED: You must ask the user how many rows they want before calling this tool. "
661+
f"REQUIRED: If the task produced more than {settings.auto_page_size_threshold} rows, "
662+
"you must ask the user how many rows they want before calling this tool. "
662663
"Do not use the default without asking. "
664+
f"If {settings.auto_page_size_threshold} or fewer rows, skip asking and set page_size to the total. "
663665
"Use offset to paginate through larger datasets."
664666
),
665667
ge=1,

everyrow-mcp/src/everyrow_mcp/result_store.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ def _build_result_response(
9898
*,
9999
requested_page_size: int | None = None,
100100
skip_widget: bool = False,
101+
skip_session: bool = False,
101102
) -> list[TextContent]:
102103
"""Build MCP TextContent response for Redis-backed results.
103104
@@ -110,6 +111,8 @@ def _build_result_response(
110111
token via the ``download-token`` endpoint — no pre-minted URL is baked
111112
into the response, avoiding stale-token issues on re-render.
112113
"""
114+
if skip_session:
115+
session_url = ""
113116
col_names = _format_columns(columns)
114117
hint_page_size = (
115118
requested_page_size if requested_page_size is not None else page_size
@@ -209,6 +212,7 @@ async def try_cached_result(
209212
mcp_server_url: str = "",
210213
*,
211214
skip_widget: bool = False,
215+
skip_session: bool = False,
212216
) -> list[TextContent] | None:
213217
cached_meta_raw = await redis_store.get_result_meta(task_id)
214218
if not cached_meta_raw:
@@ -263,6 +267,7 @@ async def try_cached_result(
263267
mcp_server_url=mcp_server_url,
264268
requested_page_size=page_size,
265269
skip_widget=skip_widget,
270+
skip_session=skip_session,
266271
)
267272

268273

@@ -275,6 +280,7 @@ async def try_store_result(
275280
mcp_server_url: str = "",
276281
*,
277282
skip_widget: bool = False,
283+
skip_session: bool = False,
278284
) -> list[TextContent]:
279285
"""Store a DataFrame in Redis and return a paginated response."""
280286
try:
@@ -324,6 +330,7 @@ async def try_store_result(
324330
mcp_server_url=mcp_server_url,
325331
requested_page_size=page_size,
326332
skip_widget=skip_widget,
333+
skip_session=skip_session,
327334
)
328335
except Exception:
329336
logger.exception("Failed to store results in Redis for task %s", task_id)

everyrow-mcp/src/everyrow_mcp/tool_helpers.py

Lines changed: 54 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def _widgets_from_user_agent() -> bool:
177177
# Claude Code: "claude-code/2.1.59 (cli)"
178178
# MCP SDK: "python-httpx/0.28.1" (test client)
179179
# OAuth flow: "Bun/1.3.10" (Claude Code's OAuth helper)
180-
_NO_WIDGET_UA_SUBSTRINGS = {"claude-code"}
180+
_NO_WIDGET_UA_SUBSTRINGS = {"claude-code", "everyrow-cc"}
181181

182182
if any(pattern in ua for pattern in _NO_WIDGET_UA_SUBSTRINGS):
183183
return False
@@ -186,6 +186,13 @@ def _widgets_from_user_agent() -> bool:
186186
return True
187187

188188

189+
def is_internal_client() -> bool:
190+
"""Return True if the request comes from EveryRow's own app (CC)."""
191+
from everyrow_mcp.http_config import get_user_agent # noqa: PLC0415
192+
193+
return "everyrow-cc" in get_user_agent().lower()
194+
195+
189196
def _submission_text(
190197
label: str, session_url: str, task_id: str, session_id: str = ""
191198
) -> str:
@@ -198,6 +205,12 @@ def _submission_text(
198205
Task ID: {task_id}
199206
200207
Share the session_url with the user, then immediately call everyrow_progress(task_id='{task_id}').""")
208+
if is_internal_client():
209+
return dedent(f"""\
210+
{label}
211+
Task ID: {task_id}
212+
213+
Immediately call everyrow_progress(task_id='{task_id}').""")
201214
session_line = f"\nSession ID: {session_id}" if session_id else ""
202215
return dedent(f"""\
203216
{label}{session_line}
@@ -206,15 +219,14 @@ def _submission_text(
206219
Immediately call everyrow_progress(task_id='{task_id}').""")
207220

208221

209-
async def _submission_ui_json(
210-
session_url: str,
211-
task_id: str,
212-
total: int,
213-
token: str,
214-
mcp_server_url: str = "",
215-
session_id: str = "",
216-
) -> str:
217-
"""Build JSON for the session MCP App widget, and store the token for polling."""
222+
async def _record_task_ownership(task_id: str, token: str) -> str:
223+
"""Record task ownership and create a poll token.
224+
225+
Must run for every HTTP submission (including internal clients) so that
226+
downstream ownership checks in progress/results don't fail.
227+
228+
Returns the poll_token.
229+
"""
218230
poll_token = secrets.token_urlsafe(32)
219231
await redis_store.store_task_token(task_id, token)
220232

@@ -235,6 +247,18 @@ async def _submission_ui_json(
235247
# Bind the poll token to the same user identity so the REST layer
236248
# can cross-check poll_owner == task_owner.
237249
await redis_store.store_poll_token(task_id, poll_token, user_id=user_id)
250+
return poll_token
251+
252+
253+
async def _submission_ui_json(
254+
session_url: str,
255+
task_id: str,
256+
total: int,
257+
poll_token: str,
258+
mcp_server_url: str = "",
259+
session_id: str = "",
260+
) -> str:
261+
"""Build JSON for the session MCP App widget."""
238262
data: dict[str, Any] = {
239263
"session_url": session_url,
240264
"task_id": task_id,
@@ -267,15 +291,17 @@ async def create_tool_response(
267291
text = _submission_text(label, session_url, task_id, session_id=session_id)
268292
main_content = TextContent(type="text", text=text)
269293
if settings.is_http:
270-
ui_json = await _submission_ui_json(
271-
session_url=session_url,
272-
task_id=task_id,
273-
total=total,
274-
token=token,
275-
mcp_server_url=mcp_server_url,
276-
session_id=session_id,
277-
)
278-
return [TextContent(type="text", text=ui_json), main_content]
294+
poll_token = await _record_task_ownership(task_id, token)
295+
if not is_internal_client():
296+
ui_json = await _submission_ui_json(
297+
session_url=session_url,
298+
task_id=task_id,
299+
total=total,
300+
poll_token=poll_token,
301+
mcp_server_url=mcp_server_url,
302+
session_id=session_id,
303+
)
304+
return [TextContent(type="text", text=ui_json), main_content]
279305
return [main_content]
280306

281307

@@ -407,11 +433,15 @@ def progress_message(self, task_id: str) -> str:
407433
else:
408434
completed_msg = f"Completed: {self.completed}/{self.total} ({self.failed} failed) in {self.elapsed_s}s."
409435
if settings.is_http:
410-
next_call = dedent(f"""\
411-
IMPORTANT: Do NOT call everyrow_results yet.\
412-
First, ask the user: "The task produced {self.total} rows. How many would you like me to load into my context so I can read them? (default: 50). You will have access to all of them via the widget.".\
413-
The answer the user provides will correspond to the `page_size`.\
414-
After the user responds, call everyrow_results(task_id='{task_id}', page_size=N).""")
436+
if self.total <= settings.auto_page_size_threshold:
437+
next_call = dedent(f"""\
438+
Call everyrow_results(task_id='{task_id}', page_size={max(self.total, 1)}) to load all rows.""")
439+
else:
440+
next_call = dedent(f"""\
441+
IMPORTANT: Do NOT call everyrow_results yet.\
442+
First, ask the user: "The task produced {self.total} rows. How many would you like me to load into my context so I can read them? (default: 50). You will have access to all of them via the widget.".\
443+
The answer the user provides will correspond to the `page_size`.\
444+
After the user responds, call everyrow_results(task_id='{task_id}', page_size=N).""")
415445
else:
416446
next_call = f"Call everyrow_results(task_id='{task_id}', output_path='<choose_a_path>.csv') to save the output."
417447
return f"{completed_msg}\n{next_call}"

everyrow-mcp/src/everyrow_mcp/tools.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
_get_client,
6262
client_supports_widgets,
6363
create_tool_response,
64+
is_internal_client,
6465
log_client_info,
6566
write_initial_task_state,
6667
)
@@ -875,18 +876,19 @@ async def everyrow_results_http(
875876
"""Retrieve results from a completed everyrow task.
876877
877878
Only call this after everyrow_progress reports status 'completed'.
878-
IMPORTANT: You MUST ask the user how many rows they want loaded into your
879-
context BEFORE calling this tool. Do NOT call with the default — always
880-
ask first and use their answer as page_size.
881879
The user always has access to all rows via the widget — page_size only
882-
controls how many rows you can read.
880+
controls how many rows _you_ can read.
883881
After results load, tell the user how many rows you can see vs the total.
884882
"""
885883
client = _get_client(ctx)
886884
task_id = params.task_id
887885
mcp_server_url = ctx.request_context.lifespan_context.mcp_server_url
888886
log_client_info(ctx, "everyrow_results")
889887
skip_widget = not client_supports_widgets(ctx)
888+
skip_session = False
889+
if is_internal_client():
890+
skip_widget = True
891+
skip_session = True
890892

891893
# ── Cross-user access check ──────────────────────────────────
892894
try:
@@ -908,6 +910,7 @@ async def everyrow_results_http(
908910
params.page_size,
909911
mcp_server_url=mcp_server_url,
910912
skip_widget=skip_widget,
913+
skip_session=skip_session,
911914
)
912915
if cached is not None:
913916
return cached
@@ -946,6 +949,7 @@ async def everyrow_results_http(
946949
session_url,
947950
mcp_server_url=mcp_server_url,
948951
skip_widget=skip_widget,
952+
skip_session=skip_session,
949953
)
950954

951955

everyrow-mcp/tests/test_stdio_content.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -703,7 +703,9 @@ async def test_progress_http_completed_no_output_path_hint(self):
703703
human_text = result[-1].text
704704
assert "output_path" not in human_text
705705
assert "everyrow_results" in human_text
706-
assert "ask the user" in human_text.lower()
706+
# total=5 is below auto_page_size_threshold (50), so the model
707+
# should be told to load all rows directly instead of asking.
708+
assert "load all rows" in human_text.lower()
707709

708710

709711
# ── MCP protocol integration tests (real API) ────────────────────────

0 commit comments

Comments
 (0)