Skip widget, session URL, and page_size prompt for internal clients (#240)

RafaelPo · claude · web-flow · commit 21e913778b43 · 2026-02-26T19:15:47.000Z
* Skip widget, session URL, and page_size prompt for internal clients

Detect requests from EveryRow CC (via User-Agent) and strip widget JSON,
session URL references, and submission widget from responses. This saves
bandwidth and context tokens for our own app which has its own UI.

Also adds auto_page_size_threshold setting (default 50, configurable via
AUTO_PAGE_SIZE_THRESHOLD env var) — when total rows are at or below the
threshold, the agent loads all rows directly instead of asking the user.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;

* Fix page_size=0 bug and add everyrow-cc to widget blocklist

- Guard against total=0 producing page_size=0 (fails ge=1 validation)
- Add "everyrow-cc" to _NO_WIDGET_UA_SUBSTRINGS so client_supports_widgets
  is self-consistent without needing an override in the results handler

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;

---------

Co-authored-by: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/everyrow-mcp/src/everyrow_mcp/app.py b/everyrow-mcp/src/everyrow_mcp/app.py
@@ -101,7 +101,7 @@ async def no_auth_http_lifespan(_server: FastMCP):
 4. **Results** — call `everyrow_results(task_id)` to retrieve the output.
 
 ## Key rules
-- Always share the session_url with the user after submitting a task.
+- If a session_url appears in the submission response, share it with the user. If none is present, do not mention it.
 - Never guess or fabricate results — always wait for the task to complete.
 - For small datasets (< 50 rows), prefer passing `data` directly.
 - For larger datasets, use `everyrow_upload_data` to get an artifact_id first.
@@ -120,31 +120,35 @@ async def no_auth_http_lifespan(_server: FastMCP):
 """
 )
 
-_INSTRUCTIONS_HTTP = (
-    _INSTRUCTIONS_COMMON
-    + """\
+
+def _build_instructions_http() -> str:
+    threshold = settings.auto_page_size_threshold
+    return (
+        _INSTRUCTIONS_COMMON
+        + f"""\
 ## Data ingestion (remote mode)
 - `everyrow_upload_data(source="https://...")` — upload from a URL (Google Sheets supported).
 - For local/sandbox files, use `everyrow_request_upload_url(filename="data.csv")`, \
 then execute the returned curl command, then use the artifact_id from the response.
-- Or pass `data=[{"col": "val"}, ...]` directly to any processing tool.
+- Or pass `data=[{{"col": "val"}}, ...]` directly to any processing tool.
 - Do NOT pass local file paths to `everyrow_upload_data` — it will fail in remote mode.
 
 ## Results
-- IMPORTANT: When a task completes, you MUST ask the user how many rows they want loaded into \
-your context BEFORE calling everyrow_results. Do NOT call everyrow_results without asking first.
+- IMPORTANT: When a task completes with more than {threshold} rows, you MUST ask the user how many rows \
+they want loaded into your context BEFORE calling everyrow_results. Do NOT call everyrow_results \
+without asking first. If the task produced {threshold} or fewer rows, skip asking and load all rows directly.
 - `everyrow_results(task_id, page_size=N)` loads N rows into your context so you can read them. \
 The user always has access to all rows via the widget and download link.
 - After retrieving results, tell the user how many rows you can see vs the total, and that \
 they have access to the full dataset via the widget above and the download link.
 - Use offset to paginate through larger datasets.
 """
-)
+    )
 
 
 def get_instructions(is_http: bool) -> str:
     """Return server instructions appropriate for the transport mode."""
-    return _INSTRUCTIONS_HTTP if is_http else _INSTRUCTIONS_STDIO
+    return _build_instructions_http() if is_http else _INSTRUCTIONS_STDIO
 
 
 mcp = FastMCP(
diff --git a/everyrow-mcp/src/everyrow_mcp/config.py b/everyrow-mcp/src/everyrow_mcp/config.py
@@ -88,6 +88,10 @@ class Settings(BaseSettings):
         default=5000,
         description="Maximum rows allowed in inline data (list[dict]).",
     )
+    auto_page_size_threshold: int = Field(
+        default=50,
+        description="If total rows <= this value, skip asking the user for page_size and load all rows directly.",
+    )
 
     # Upload settings (HTTP mode only)
     upload_secret: str = Field(
diff --git a/everyrow-mcp/src/everyrow_mcp/models.py b/everyrow-mcp/src/everyrow_mcp/models.py
@@ -658,8 +658,10 @@ def validate_task_id(cls, v: str) -> str:
         description=(
             "Number of result rows to load into your context so you can read them. "
             "The user has access to all rows via the widget regardless of this value. "
-            "REQUIRED: You must ask the user how many rows they want before calling this tool. "
+            f"REQUIRED: If the task produced more than {settings.auto_page_size_threshold} rows, "
+            "you must ask the user how many rows they want before calling this tool. "
             "Do not use the default without asking. "
+            f"If {settings.auto_page_size_threshold} or fewer rows, skip asking and set page_size to the total. "
             "Use offset to paginate through larger datasets."
         ),
         ge=1,
diff --git a/everyrow-mcp/src/everyrow_mcp/result_store.py b/everyrow-mcp/src/everyrow_mcp/result_store.py
@@ -98,6 +98,7 @@ def _build_result_response(
     *,
     requested_page_size: int | None = None,
     skip_widget: bool = False,
+    skip_session: bool = False,
 ) -> list[TextContent]:
     """Build MCP TextContent response for Redis-backed results.
 
@@ -110,6 +111,8 @@ def _build_result_response(
     token via the ``download-token`` endpoint — no pre-minted URL is baked
     into the response, avoiding stale-token issues on re-render.
     """
+    if skip_session:
+        session_url = ""
     col_names = _format_columns(columns)
     hint_page_size = (
         requested_page_size if requested_page_size is not None else page_size
@@ -209,6 +212,7 @@ async def try_cached_result(
     mcp_server_url: str = "",
     *,
     skip_widget: bool = False,
+    skip_session: bool = False,
 ) -> list[TextContent] | None:
     cached_meta_raw = await redis_store.get_result_meta(task_id)
     if not cached_meta_raw:
@@ -263,6 +267,7 @@ async def try_cached_result(
         mcp_server_url=mcp_server_url,
         requested_page_size=page_size,
         skip_widget=skip_widget,
+        skip_session=skip_session,
     )
 
 
@@ -275,6 +280,7 @@ async def try_store_result(
     mcp_server_url: str = "",
     *,
     skip_widget: bool = False,
+    skip_session: bool = False,
 ) -> list[TextContent]:
     """Store a DataFrame in Redis and return a paginated response."""
     try:
@@ -324,6 +330,7 @@ async def try_store_result(
             mcp_server_url=mcp_server_url,
             requested_page_size=page_size,
             skip_widget=skip_widget,
+            skip_session=skip_session,
         )
     except Exception:
         logger.exception("Failed to store results in Redis for task %s", task_id)
diff --git a/everyrow-mcp/src/everyrow_mcp/tool_helpers.py b/everyrow-mcp/src/everyrow_mcp/tool_helpers.py
@@ -177,7 +177,7 @@ def _widgets_from_user_agent() -> bool:
     #   Claude Code: "claude-code/2.1.59 (cli)"
     #   MCP SDK:     "python-httpx/0.28.1"  (test client)
     #   OAuth flow:  "Bun/1.3.10"  (Claude Code's OAuth helper)
-    _NO_WIDGET_UA_SUBSTRINGS = {"claude-code"}
+    _NO_WIDGET_UA_SUBSTRINGS = {"claude-code", "everyrow-cc"}
 
     if any(pattern in ua for pattern in _NO_WIDGET_UA_SUBSTRINGS):
         return False
@@ -186,6 +186,13 @@ def _widgets_from_user_agent() -> bool:
     return True
 
 
+def is_internal_client() -> bool:
+    """Return True if the request comes from EveryRow's own app (CC)."""
+    from everyrow_mcp.http_config import get_user_agent  # noqa: PLC0415
+
+    return "everyrow-cc" in get_user_agent().lower()
+
+
 def _submission_text(
     label: str, session_url: str, task_id: str, session_id: str = ""
 ) -> str:
@@ -198,6 +205,12 @@ def _submission_text(
         Task ID: {task_id}
 
         Share the session_url with the user, then immediately call everyrow_progress(task_id='{task_id}').""")
+    if is_internal_client():
+        return dedent(f"""\
+        {label}
+        Task ID: {task_id}
+
+        Immediately call everyrow_progress(task_id='{task_id}').""")
     session_line = f"\nSession ID: {session_id}" if session_id else ""
     return dedent(f"""\
         {label}{session_line}
@@ -206,15 +219,14 @@ def _submission_text(
         Immediately call everyrow_progress(task_id='{task_id}').""")
 
 
-async def _submission_ui_json(
-    session_url: str,
-    task_id: str,
-    total: int,
-    token: str,
-    mcp_server_url: str = "",
-    session_id: str = "",
-) -> str:
-    """Build JSON for the session MCP App widget, and store the token for polling."""
+async def _record_task_ownership(task_id: str, token: str) -> str:
+    """Record task ownership and create a poll token.
+
+    Must run for every HTTP submission (including internal clients) so that
+    downstream ownership checks in progress/results don't fail.
+
+    Returns the poll_token.
+    """
     poll_token = secrets.token_urlsafe(32)
     await redis_store.store_task_token(task_id, token)
 
@@ -235,6 +247,18 @@ async def _submission_ui_json(
     # Bind the poll token to the same user identity so the REST layer
     # can cross-check poll_owner == task_owner.
     await redis_store.store_poll_token(task_id, poll_token, user_id=user_id)
+    return poll_token
+
+
+async def _submission_ui_json(
+    session_url: str,
+    task_id: str,
+    total: int,
+    poll_token: str,
+    mcp_server_url: str = "",
+    session_id: str = "",
+) -> str:
+    """Build JSON for the session MCP App widget."""
     data: dict[str, Any] = {
         "session_url": session_url,
         "task_id": task_id,
@@ -267,15 +291,17 @@ async def create_tool_response(
     text = _submission_text(label, session_url, task_id, session_id=session_id)
     main_content = TextContent(type="text", text=text)
     if settings.is_http:
-        ui_json = await _submission_ui_json(
-            session_url=session_url,
-            task_id=task_id,
-            total=total,
-            token=token,
-            mcp_server_url=mcp_server_url,
-            session_id=session_id,
-        )
-        return [TextContent(type="text", text=ui_json), main_content]
+        poll_token = await _record_task_ownership(task_id, token)
+        if not is_internal_client():
+            ui_json = await _submission_ui_json(
+                session_url=session_url,
+                task_id=task_id,
+                total=total,
+                poll_token=poll_token,
+                mcp_server_url=mcp_server_url,
+                session_id=session_id,
+            )
+            return [TextContent(type="text", text=ui_json), main_content]
     return [main_content]
 
 
@@ -407,11 +433,15 @@ def progress_message(self, task_id: str) -> str:
                 else:
                     completed_msg = f"Completed: {self.completed}/{self.total} ({self.failed} failed) in {self.elapsed_s}s."
                 if settings.is_http:
-                    next_call = dedent(f"""\
-                        IMPORTANT: Do NOT call everyrow_results yet.\
-                         First, ask the user: "The task produced {self.total} rows. How many would you like me to load into my context so I can read them? (default: 50). You will have access to all of them via the widget.".\
-                         The answer the user provides will correspond to the `page_size`.\
-                         After the user responds, call everyrow_results(task_id='{task_id}', page_size=N).""")
+                    if self.total <= settings.auto_page_size_threshold:
+                        next_call = dedent(f"""\
+                            Call everyrow_results(task_id='{task_id}', page_size={max(self.total, 1)}) to load all rows.""")
+                    else:
+                        next_call = dedent(f"""\
+                            IMPORTANT: Do NOT call everyrow_results yet.\
+                             First, ask the user: "The task produced {self.total} rows. How many would you like me to load into my context so I can read them? (default: 50). You will have access to all of them via the widget.".\
+                             The answer the user provides will correspond to the `page_size`.\
+                             After the user responds, call everyrow_results(task_id='{task_id}', page_size=N).""")
                 else:
                     next_call = f"Call everyrow_results(task_id='{task_id}', output_path='<choose_a_path>.csv') to save the output."
                 return f"{completed_msg}\n{next_call}"
diff --git a/everyrow-mcp/src/everyrow_mcp/tools.py b/everyrow-mcp/src/everyrow_mcp/tools.py
@@ -61,6 +61,7 @@
     _get_client,
     client_supports_widgets,
     create_tool_response,
+    is_internal_client,
     log_client_info,
     write_initial_task_state,
 )
@@ -875,18 +876,19 @@ async def everyrow_results_http(
     """Retrieve results from a completed everyrow task.
 
     Only call this after everyrow_progress reports status 'completed'.
-    IMPORTANT: You MUST ask the user how many rows they want loaded into your
-    context BEFORE calling this tool. Do NOT call with the default — always
-    ask first and use their answer as page_size.
     The user always has access to all rows via the widget — page_size only
-    controls how many rows you can read.
+    controls how many rows _you_ can read.
     After results load, tell the user how many rows you can see vs the total.
     """
     client = _get_client(ctx)
     task_id = params.task_id
     mcp_server_url = ctx.request_context.lifespan_context.mcp_server_url
     log_client_info(ctx, "everyrow_results")
     skip_widget = not client_supports_widgets(ctx)
+    skip_session = False
+    if is_internal_client():
+        skip_widget = True
+        skip_session = True
 
     # ── Cross-user access check ──────────────────────────────────
     try:
@@ -908,6 +910,7 @@ async def everyrow_results_http(
         params.page_size,
         mcp_server_url=mcp_server_url,
         skip_widget=skip_widget,
+        skip_session=skip_session,
     )
     if cached is not None:
         return cached
@@ -946,6 +949,7 @@ async def everyrow_results_http(
         session_url,
         mcp_server_url=mcp_server_url,
         skip_widget=skip_widget,
+        skip_session=skip_session,
     )
 
 
diff --git a/everyrow-mcp/tests/test_stdio_content.py b/everyrow-mcp/tests/test_stdio_content.py
@@ -703,7 +703,9 @@ async def test_progress_http_completed_no_output_path_hint(self):
         human_text = result[-1].text
         assert "output_path" not in human_text
         assert "everyrow_results" in human_text
-        assert "ask the user" in human_text.lower()
+        # total=5 is below auto_page_size_threshold (50), so the model
+        # should be told to load all rows directly instead of asking.
+        assert "load all rows" in human_text.lower()
 
 
 # ── MCP protocol integration tests (real API) ────────────────────────