From a24b950a2fd7a0468752dfdcd9acbf8b6463288c Mon Sep 17 00:00:00 2001
From: yaojin3616 <yaoj.alex@gmail.com>
Date: Wed, 10 Jun 2026 15:01:06 +0800
Subject: [PATCH 1/2] feat: comprehensive updates to sandbox, agents, and UI

This PR includes several recent improvements:
- UV Migration: Replaced pip with uv for significantly faster sandbox environment creation and package installation.
- Idempotent Migration: Updated 056_add_user_tenant_onboarding.py to use sa.inspect, ensuring upgrade/downgrade operations are idempotent.
- UI Enhancements: Optimized queries and added loading states to AgentDetailPage and WorkspaceOperationPanel.
- Heartbeat & LLM Caller: Fixed workspace lock contention issues in heartbeat.py and implemented Anthropic Prompt Caching in caller.py.
- Deployment: Updated docker-compose files for streamlined deployments.
---
 backend/Dockerfile                            |  6 +-
 .../056_add_user_tenant_onboarding.py         | 50 ++++++----
 backend/app/services/agent_tools.py           |  4 +-
 backend/app/services/heartbeat.py             | 58 +++++++++---
 backend/app/services/llm/caller.py            | 64 +++++++++----
 .../sandbox/local/subprocess_backend.py       | 67 ++++++++-----
 backend/app/services/tool_seeder.py           | 15 ++-
 backend/test_sandbox_config.py                | 19 ++++
 backend/tests/test_finish_protocol.py         | 61 ++++++++++++
 deploy/docker-compose-multi.yml               |  4 +
 deploy/docker-compose.yml                     |  4 +-
 .../components/WorkspaceOperationPanel.tsx    | 26 +++--
 .../pages/agent-detail/AgentDetailPage.tsx    | 94 +++----------------
 13 files changed, 296 insertions(+), 176 deletions(-)
 create mode 100644 backend/test_sandbox_config.py

diff --git a/backend/Dockerfile b/backend/Dockerfile
index 5a7f0c7b7..5d8c406fc 100644
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -16,11 +16,11 @@ COPY pyproject.toml ./
 ARG CLAWITH_PIP_INDEX_URL
 ARG CLAWITH_PIP_TRUSTED_HOST
 RUN if [ -n "$CLAWITH_PIP_INDEX_URL" ] && [ -n "$CLAWITH_PIP_TRUSTED_HOST" ]; then \
-        pip install --no-cache-dir --index-url "$CLAWITH_PIP_INDEX_URL" --trusted-host "$CLAWITH_PIP_TRUSTED_HOST" .; \
+        pip install --no-cache-dir --index-url "$CLAWITH_PIP_INDEX_URL" --trusted-host "$CLAWITH_PIP_TRUSTED_HOST" . uv; \
     elif [ -n "$CLAWITH_PIP_INDEX_URL" ]; then \
-        pip install --no-cache-dir --index-url "$CLAWITH_PIP_INDEX_URL" .; \
+        pip install --no-cache-dir --index-url "$CLAWITH_PIP_INDEX_URL" . uv; \
     else \
-        pip install --no-cache-dir .; \
+        pip install --no-cache-dir . uv; \
     fi
 
 # ─── Production ─────────────────────────────────────────
diff --git a/backend/alembic/versions/056_add_user_tenant_onboarding.py b/backend/alembic/versions/056_add_user_tenant_onboarding.py
index fb5719031..e8bdde665 100644
--- a/backend/alembic/versions/056_add_user_tenant_onboarding.py
+++ b/backend/alembic/versions/056_add_user_tenant_onboarding.py
@@ -19,25 +19,37 @@
 
 
 def upgrade() -> None:
-    op.create_table(
-        "user_tenant_onboardings",
-        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, nullable=False),
-        sa.Column("user_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("users.id", ondelete="CASCADE"), nullable=False),
-        sa.Column("tenant_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("tenants.id", ondelete="CASCADE"), nullable=False),
-        sa.Column("status", sa.String(length=32), nullable=False, server_default="in_progress"),
-        sa.Column("current_step", sa.String(length=32), nullable=False, server_default="assistant"),
-        sa.Column("entry_mode", sa.String(length=32), nullable=False, server_default="create"),
-        sa.Column("personal_assistant_agent_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("agents.id", ondelete="SET NULL"), nullable=True),
-        sa.Column("started_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
-        sa.Column("completed_at", sa.DateTime(timezone=True), nullable=True),
-        sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
-        sa.UniqueConstraint("user_id", "tenant_id", name="uq_user_tenant_onboarding"),
-    )
-    op.create_index("ix_user_tenant_onboardings_user_id", "user_tenant_onboardings", ["user_id"])
-    op.create_index("ix_user_tenant_onboardings_tenant_id", "user_tenant_onboardings", ["tenant_id"])
+    bind = op.get_bind()
+    inspector = sa.inspect(bind)
+    
+    if "user_tenant_onboardings" not in inspector.get_table_names():
+        op.create_table(
+            "user_tenant_onboardings",
+            sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, nullable=False),
+            sa.Column("user_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("users.id", ondelete="CASCADE"), nullable=False),
+            sa.Column("tenant_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("tenants.id", ondelete="CASCADE"), nullable=False),
+            sa.Column("status", sa.String(length=32), nullable=False, server_default="in_progress"),
+            sa.Column("current_step", sa.String(length=32), nullable=False, server_default="assistant"),
+            sa.Column("entry_mode", sa.String(length=32), nullable=False, server_default="create"),
+            sa.Column("personal_assistant_agent_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("agents.id", ondelete="SET NULL"), nullable=True),
+            sa.Column("started_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
+            sa.Column("completed_at", sa.DateTime(timezone=True), nullable=True),
+            sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
+            sa.UniqueConstraint("user_id", "tenant_id", name="uq_user_tenant_onboarding"),
+        )
+        
+        op.create_index("ix_user_tenant_onboardings_user_id", "user_tenant_onboardings", ["user_id"])
+        op.create_index("ix_user_tenant_onboardings_tenant_id", "user_tenant_onboardings", ["tenant_id"])
 
 
 def downgrade() -> None:
-    op.drop_index("ix_user_tenant_onboardings_tenant_id", table_name="user_tenant_onboardings")
-    op.drop_index("ix_user_tenant_onboardings_user_id", table_name="user_tenant_onboardings")
-    op.drop_table("user_tenant_onboardings")
+    bind = op.get_bind()
+    inspector = sa.inspect(bind)
+    
+    if "user_tenant_onboardings" in inspector.get_table_names():
+        indexes = [ix["name"] for ix in inspector.get_indexes("user_tenant_onboardings")]
+        if "ix_user_tenant_onboardings_tenant_id" in indexes:
+            op.drop_index("ix_user_tenant_onboardings_tenant_id", table_name="user_tenant_onboardings")
+        if "ix_user_tenant_onboardings_user_id" in indexes:
+            op.drop_index("ix_user_tenant_onboardings_user_id", table_name="user_tenant_onboardings")
+        op.drop_table("user_tenant_onboardings")
diff --git a/backend/app/services/agent_tools.py b/backend/app/services/agent_tools.py
index 4fe4cfe49..10dfed263 100644
--- a/backend/app/services/agent_tools.py
+++ b/backend/app/services/agent_tools.py
@@ -188,7 +188,6 @@ async def _get_tool_config(agent_id: Optional[uuid.UUID], tool_name: str) -> Opt
                 base_config = global_config or {}
                 tenant_config = {}
                 if tool_source == "builtin":
-                    base_config = {}
                     tenant_config = await get_tenant_tool_config(db, agent_tenant_id, db_tool_name, config_schema)
                 # Merge: agent overrides global
                 merged = {**base_config, **tenant_config, **(agent_config or {})}
@@ -206,7 +205,7 @@ async def _get_tool_config(agent_id: Optional[uuid.UUID], tool_name: str) -> Opt
             tenant_config = {}
             if tool.source == "builtin":
                 tenant_config = await get_tenant_tool_config(db, agent_tenant_id, tool.name, tool.config_schema)
-            base_config = {} if tool.source == "builtin" else (tool.config or {})
+            base_config = tool.config or {}
             merged = {**base_config, **tenant_config}
         else:
             merged = {}
@@ -8047,6 +8046,7 @@ async def _execute_code(
             timeout=timeout,
             work_dir=str(work_dir),
             on_output=on_output,
+            agent_id=agent_id,
         )
 
         # Format result for user display
diff --git a/backend/app/services/heartbeat.py b/backend/app/services/heartbeat.py
index 9f64a57fe..f66792437 100644
--- a/backend/app/services/heartbeat.py
+++ b/backend/app/services/heartbeat.py
@@ -295,6 +295,7 @@ async def _execute_heartbeat(agent_id: uuid.UUID):
         plaza_posts_made = 0       # hard limit: 1 new post per heartbeat
         plaza_comments_made = 0    # hard limit: 2 comments per heartbeat
         _hb_accumulated_usage = None
+        _hb_unsaved_usage = None
 
         # Token tracking helpers
         from app.services.token_tracker import (
@@ -304,6 +305,7 @@ async def _execute_heartbeat(agent_id: uuid.UUID):
             estimate_token_usage_from_chars,
         )
         _hb_accumulated_usage = TokenUsage()
+        _hb_unsaved_usage = TokenUsage()
 
         # Convert messages to LLMMessage format
         llm_messages = [
@@ -312,6 +314,21 @@ async def _execute_heartbeat(agent_id: uuid.UUID):
         ]
 
         for round_i in range(20):  # More rounds for search + write + plaza
+            # Check token usage limit mid-loop (every 3 rounds)
+            if round_i > 0 and round_i % 3 == 0:
+                if agent_id and _hb_unsaved_usage.total_tokens > 0:
+                    async with async_session() as db:
+                        await record_token_usage(agent_id, _hb_unsaved_usage)
+                        await db.commit()
+                    _hb_unsaved_usage = TokenUsage()
+                    from app.services.llm.caller import _get_agent_config
+                    _, _token_limit_msg = await _get_agent_config(agent_id)
+                    if _token_limit_msg:
+                        logger.warning(f"[Heartbeat] Token limit exceeded mid-loop: {_token_limit_msg}")
+                        await client.close()
+                        reply = _token_limit_msg
+                        break
+
             try:
                 response = await client.complete(
                     messages=llm_messages,
@@ -330,11 +347,11 @@ async def _execute_heartbeat(agent_id: uuid.UUID):
 
             # Track tokens for this round
             usage = extract_token_usage(response.usage)
-            if usage:
-                _hb_accumulated_usage.add(usage)
-            else:
+            if not usage:
                 round_chars = sum(len(m.content or '') for m in llm_messages) + len(response.content or '')
-                _hb_accumulated_usage.add(estimate_token_usage_from_chars(round_chars))
+                usage = estimate_token_usage_from_chars(round_chars)
+            _hb_accumulated_usage.add(usage)
+            _hb_unsaved_usage.add(usage)
 
             if response.tool_calls:
                 # Add assistant message with tool calls
@@ -423,8 +440,8 @@ async def _execute_heartbeat(agent_id: uuid.UUID):
         # ── Phase 3: Write results back to DB (short transaction) ──
         async with async_session() as db:
             # Record accumulated heartbeat token usage
-            if _hb_accumulated_usage and _hb_accumulated_usage.total_tokens > 0:
-                await record_token_usage(agent_id, _hb_accumulated_usage)
+            if _hb_unsaved_usage and _hb_unsaved_usage.total_tokens > 0:
+                await record_token_usage(agent_id, _hb_unsaved_usage)
             await db.commit()
 
         # Log activity if not empty
@@ -687,8 +704,25 @@ async def run_agent_oneshot(
 
         reply = ""
         accumulated_usage = TokenUsage()
+        unsaved_usage = TokenUsage()
 
         for round_i in range(max_rounds):
+            # Check token usage limit mid-loop (every 3 rounds)
+            if round_i > 0 and round_i % 3 == 0:
+                if agent_id and unsaved_usage.total_tokens > 0:
+                    try:
+                        await record_token_usage(agent_id, unsaved_usage)
+                    except Exception as e:
+                        logger.warning(f"[Oneshot] Failed to record token usage mid-loop: {e}")
+                    unsaved_usage = TokenUsage()
+                    from app.services.llm.caller import _get_agent_config
+                    _, _token_limit_msg = await _get_agent_config(agent_id)
+                    if _token_limit_msg:
+                        logger.warning(f"[Oneshot] Token limit exceeded mid-loop: {_token_limit_msg}")
+                        await client.close()
+                        reply = _token_limit_msg
+                        break
+
             try:
                 response = await client.complete(
                     messages=llm_messages,
@@ -713,11 +747,11 @@ async def run_agent_oneshot(
 
             # Track token usage
             usage = extract_token_usage(response.usage)
-            if usage:
-                accumulated_usage.add(usage)
-            else:
+            if not usage:
                 round_chars = sum(len(m.content or "") for m in llm_messages) + len(response.content or "")
-                accumulated_usage.add(estimate_token_usage_from_chars(round_chars))
+                usage = estimate_token_usage_from_chars(round_chars)
+            accumulated_usage.add(usage)
+            unsaved_usage.add(usage)
 
             if response.tool_calls:
                 llm_messages.append(LLMMessage(
@@ -769,9 +803,9 @@ async def run_agent_oneshot(
         await client.close()
 
         # ── Phase 3: Record token usage (best-effort) ───────────────────────────
-        if accumulated_usage.total_tokens > 0:
+        if unsaved_usage.total_tokens > 0:
             try:
-                await record_token_usage(agent_id, accumulated_usage)
+                await record_token_usage(agent_id, unsaved_usage)
             except Exception as e:
                 logger.warning(f"[Oneshot] Failed to record token usage: {e}")
 
diff --git a/backend/app/services/llm/caller.py b/backend/app/services/llm/caller.py
index 4311d0e16..440e9f7d0 100644
--- a/backend/app/services/llm/caller.py
+++ b/backend/app/services/llm/caller.py
@@ -497,6 +497,7 @@ async def _default_on_tool_call(data: dict):
 
     max_tokens = get_max_tokens(model.provider, model.model, getattr(model, 'max_output_tokens', None))
     _accumulated_usage = TokenUsage()
+    _unsaved_usage = TokenUsage()
 
     # Tool-calling loop
     for round_i in range(_max_tool_rounds):
@@ -518,6 +519,17 @@ async def _default_on_tool_call(data: dict):
                 content="🚨 仅剩 2 轮工具调用。请立即使用 upsert_focus_item 保存进度并设置续接触发器。",
             ))
 
+        # Check token usage limit mid-loop (every 3 rounds)
+        if round_i > 0 and round_i % 3 == 0:
+            if agent_id and _unsaved_usage.total_tokens > 0:
+                await record_token_usage(agent_id, _unsaved_usage)
+                _unsaved_usage = TokenUsage()
+                _, _token_limit_msg = await _get_agent_config(agent_id)
+                if _token_limit_msg:
+                    logger.warning(f"[LLM] Token limit exceeded mid-loop: {_token_limit_msg}")
+                    await client.close()
+                    return _token_limit_msg
+
         try:
             # Use streaming API for real-time responses
             async def _buffer_chunk(_text: str) -> None:
@@ -535,19 +547,21 @@ async def _buffer_chunk(_text: str) -> None:
             )
         except LLMError as e:
             logger.error(f"[LLM] LLMError: provider={getattr(model, 'provider', '?')} model={getattr(model, 'model', '?')} {e}")
-            if agent_id and _accumulated_usage.total_tokens > 0:
-                await record_token_usage(agent_id, _accumulated_usage)
+            if agent_id and _unsaved_usage.total_tokens > 0:
+                await record_token_usage(agent_id, _unsaved_usage)
             await client.close()
             return f"[LLM Error] {e}"
         except Exception as e:
             logger.exception(f"[LLM] Unexpected error: {type(e).__name__}: {str(e)[:300]}")
-            if agent_id and _accumulated_usage.total_tokens > 0:
-                await record_token_usage(agent_id, _accumulated_usage)
+            if agent_id and _unsaved_usage.total_tokens > 0:
+                await record_token_usage(agent_id, _unsaved_usage)
             await client.close()
             return f"[LLM call error] {type(e).__name__}: {str(e)[:200]}"
 
         # Track tokens for this round
-        _accumulated_usage.add(_usage_from_response_or_estimate(response, api_messages))
+        _usage_this_round = _usage_from_response_or_estimate(response, api_messages)
+        _accumulated_usage.add(_usage_this_round)
+        _unsaved_usage.add(_usage_this_round)
 
         # Plain assistant text is not a stop condition. The model must finish
         # explicitly via finish(content=...).
@@ -567,8 +581,8 @@ async def _buffer_chunk(_text: str) -> None:
         finish_call = find_finish_call(sanitized_tool_calls)
         if finish_call:
             if finish_call.valid:
-                if agent_id and _accumulated_usage.total_tokens > 0:
-                    await record_token_usage(agent_id, _accumulated_usage)
+                if agent_id and _unsaved_usage.total_tokens > 0:
+                    await record_token_usage(agent_id, _unsaved_usage)
                 await client.close()
                 return finish_call.content
 
@@ -616,8 +630,8 @@ async def _buffer_chunk(_text: str) -> None:
                 ))
 
     # Record tokens even on "too many rounds" exit
-    if agent_id and _accumulated_usage.total_tokens > 0:
-        await record_token_usage(agent_id, _accumulated_usage)
+    if agent_id and _unsaved_usage.total_tokens > 0:
+        await record_token_usage(agent_id, _unsaved_usage)
     await client.close()
     return "[Error] Too many tool call rounds"
 
@@ -882,6 +896,7 @@ async def call_agent_llm_with_tools(
     async def _try_model(model: LLMModel) -> tuple[str, bool, bool]:
         """Try to complete with a model. Returns (response, success, tool_executed)."""
         _accumulated_usage = TokenUsage()
+        _unsaved_usage = TokenUsage()
         tool_executed = False
         try:
             client = create_llm_client(
@@ -900,6 +915,17 @@ async def _try_model(model: LLMModel) -> tuple[str, bool, bool]:
             # Tool-calling loop
             api_messages = list(messages)
             for round_i in range(max_rounds):
+                # Check token usage limit mid-loop (every 3 rounds)
+                if round_i > 0 and round_i % 3 == 0:
+                    if agent_id and _unsaved_usage.total_tokens > 0:
+                        await record_token_usage(agent_id, _unsaved_usage)
+                        _unsaved_usage = TokenUsage()
+                        _, _token_limit_msg = await _get_agent_config(agent_id)
+                        if _token_limit_msg:
+                            logger.warning(f"[call_agent_llm_with_tools] Token limit exceeded mid-loop: {_token_limit_msg}")
+                            await client.close()
+                            return _token_limit_msg, False, tool_executed
+
                 try:
                     response = await client.complete(
                         messages=api_messages,
@@ -910,12 +936,14 @@ async def _try_model(model: LLMModel) -> tuple[str, bool, bool]:
                 except Exception as e:
                     logger.error(f"[call_agent_llm_with_tools] Agent {agent_id}: LLM call error: {e}")
                     await client.close()
-                    if agent_id and _accumulated_usage.total_tokens > 0:
-                        await record_token_usage(agent_id, _accumulated_usage)
+                    if agent_id and _unsaved_usage.total_tokens > 0:
+                        await record_token_usage(agent_id, _unsaved_usage)
                     raise
 
                 # Track tokens for this round
-                _accumulated_usage.add(_usage_from_response_or_estimate(response, api_messages))
+                _usage_this_round = _usage_from_response_or_estimate(response, api_messages)
+                _accumulated_usage.add(_usage_this_round)
+                _unsaved_usage.add(_usage_this_round)
 
                 if not response.tool_calls:
                     if response.content:
@@ -932,8 +960,8 @@ async def _try_model(model: LLMModel) -> tuple[str, bool, bool]:
                 finish_call = find_finish_call(sanitized_tool_calls)
                 if finish_call:
                     if finish_call.valid:
-                        if agent_id and _accumulated_usage.total_tokens > 0:
-                            await record_token_usage(agent_id, _accumulated_usage)
+                        if agent_id and _unsaved_usage.total_tokens > 0:
+                            await record_token_usage(agent_id, _unsaved_usage)
                         await client.close()
                         return finish_call.content, True, tool_executed
                     api_messages.append(LLMMessage(
@@ -982,14 +1010,14 @@ async def _try_model(model: LLMModel) -> tuple[str, bool, bool]:
                         content=str(result),
                     ))
 
-            if agent_id and _accumulated_usage.total_tokens > 0:
-                await record_token_usage(agent_id, _accumulated_usage)
+            if agent_id and _unsaved_usage.total_tokens > 0:
+                await record_token_usage(agent_id, _unsaved_usage)
             await client.close()
             return "[Error] Too many tool call rounds", False, tool_executed
 
         except Exception as e:
-            if agent_id and _accumulated_usage.total_tokens > 0:
-                await record_token_usage(agent_id, _accumulated_usage)
+            if agent_id and _unsaved_usage.total_tokens > 0:
+                await record_token_usage(agent_id, _unsaved_usage)
             return f"[Error] {e}", False, tool_executed
 
     # Try primary model
diff --git a/backend/app/services/sandbox/local/subprocess_backend.py b/backend/app/services/sandbox/local/subprocess_backend.py
index 793b1d9dc..b3724e211 100644
--- a/backend/app/services/sandbox/local/subprocess_backend.py
+++ b/backend/app/services/sandbox/local/subprocess_backend.py
@@ -106,15 +106,15 @@ class SubprocessBackend(BaseSandboxBackend):
     def __init__(self, config: SandboxConfig):
         self.config = config
 
-    def _venv_python(self, work_path: Path) -> str:
-        return f"/workspace/{work_path.joinpath('.venv', 'bin', 'python').relative_to(work_path)}"
+    def _venv_python(self, venv_path: Path) -> str:
+        return "/workspace/.venv/bin/python"
 
     def _host_venv_python(self, work_path: Path) -> str:
         return str(work_path / ".venv" / "bin" / "python")
 
-    def _build_command(self, language: str, script_path: str, work_path: Path) -> list[str]:
+    def _build_command(self, language: str, script_path: str) -> list[str]:
         if language == "python":
-            return [self._venv_python(work_path), "-I", "-B", str(script_path)]
+            return ["/workspace/.venv/bin/python", "-I", "-B", str(script_path)]
         if language == "bash":
             return ["bash", "--noprofile", "--norc", str(script_path)]
         return ["node", str(script_path)]
@@ -152,35 +152,36 @@ def _bind_if_exists(self, host_path: str, guest_path: str | None = None, *, read
         bind_flag = "--ro-bind" if read_only else "--bind"
         return [bind_flag, str(host), target]
 
-    def _ensure_workspace_venv(self, work_path: Path) -> None:
-        venv_python = work_path / ".venv" / "bin" / "python"
+    def _ensure_workspace_venv(self, venv_path: Path) -> None:
+        venv_python = venv_path / "bin" / "python"
         if not venv_python.exists():
             import subprocess
 
+            # Use uv to create the virtual environment for extreme speed
+            # --seed ensures pip is still present in the venv
             subprocess.run(
-                ["python3", "-m", "venv", str(work_path / ".venv")],
+                ["uv", "venv", "--seed", str(venv_path)],
                 check=True,
-                cwd=str(work_path),
+                cwd=str(venv_path.parent),
             )
 
         # Fix shebang lines in pip scripts to use bwrap-visible path
         # venv creates scripts with absolute paths to the host Python,
         # but bwrap only mounts /workspace, so those paths don't exist inside the sandbox
-        self._fix_pip_shebangs(work_path)
+        self._fix_pip_shebangs(venv_path)
 
-    def _fix_pip_shebangs(self, work_path: Path) -> None:
-        """Fix pip script shebangs to point to /workspace/.venv/bin/python for bwrap compatibility."""
-        venv_bin = work_path / ".venv" / "bin"
+    def _fix_pip_shebangs(self, venv_path: Path) -> None:
+        """Replace pip with a bash wrapper that delegates to uv pip for extreme performance."""
+        venv_bin = venv_path / "bin"
         sandbox_python = "/workspace/.venv/bin/python"
-        for script_name in ("pip", "pip3", "pip3.X"):
-            script_path = venv_bin / script_name
-            if script_path.exists():
-                content = script_path.read_text(encoding="utf-8")
-                if content.startswith("#!"):
-                    first_line, rest = content.split("\n", 1)
-                    # Only rewrite if shebang doesn't already point to sandbox python
-                    if sandbox_python not in first_line:
-                        script_path.write_text(f"#!{sandbox_python}\n{rest}", encoding="utf-8")
+        
+        wrapper_script = f"#!/bin/bash\nexec uv pip \"$@\"\n"
+        
+        for pip_cmd in ["pip", "pip3", "pip3.12"]:
+            pip_path = venv_bin / pip_cmd
+            if pip_path.parent.exists():
+                pip_path.write_text(wrapper_script, encoding="utf-8")
+                pip_path.chmod(0o755)
 
     def _build_exec_kwargs(self, work_path: Path, timeout: int, use_preexec: bool = False) -> dict:
         kwargs = {
@@ -233,7 +234,7 @@ def _preexec():
 
         return _preexec
 
-    def _build_bwrap_command(self, command: list[str], work_path: Path) -> list[str] | None:
+    def _build_bwrap_command(self, command: list[str], work_path: Path, venv_path: Path) -> list[str] | None:
         bwrap = shutil.which("bwrap")
         if not bwrap:
             if not SubprocessBackend._bwrap_missing_warned:
@@ -263,7 +264,9 @@ def _build_bwrap_command(self, command: list[str], work_path: Path) -> list[str]
             "--unshare-uts",
             "--unshare-cgroup",
             *base_binds,
+            "--bind", "/data/agents/.uv-cache", "/uv-cache",
             "--bind", str(work_path), "/workspace",
+            "--bind", str(venv_path), "/workspace/.venv",
             "--dev", "/dev",
             "--proc", "/proc",
             "--dir", "/tmp",
@@ -278,6 +281,7 @@ def _build_bwrap_command(self, command: list[str], work_path: Path) -> list[str]
             "--setenv", "VIRTUAL_ENV", "/workspace/.venv",
             "--setenv", "PIP_CACHE_DIR", "/workspace/.tmp/pip-cache",
             "--setenv", "PIP_DISABLE_PIP_VERSION_CHECK", "1",
+            "--setenv", "UV_CACHE_DIR", "/uv-cache",
             "--chdir", "/workspace",
         ]
         if not self.config.allow_network:
@@ -317,6 +321,7 @@ async def execute(
     ) -> ExecutionResult:
         """Execute code in a subprocess."""
         on_output = kwargs.get("on_output")
+        agent_id = kwargs.get("agent_id")
         start_time = time.time()
 
         # Validate language
@@ -361,6 +366,18 @@ async def execute(
         work_path.mkdir(parents=True, exist_ok=True)
         (work_path / ".tmp").mkdir(parents=True, exist_ok=True)
         (work_path / ".tmp" / "pip-cache").mkdir(parents=True, exist_ok=True)
+        
+        # Determine persistent venv path if possible
+        if agent_id:
+            # We place the virtual environment in a persistent location
+            venv_path = Path("/data/agents").resolve() / str(agent_id) / ".venv"
+            venv_path.parent.mkdir(parents=True, exist_ok=True)
+            
+            # Ensure global uv cache exists
+            uv_cache = Path("/data/agents/.uv-cache")
+            uv_cache.mkdir(parents=True, exist_ok=True)
+        else:
+            venv_path = work_path / ".venv"
 
         # Determine command and file extension
         if language == "python":
@@ -374,11 +391,11 @@ async def execute(
         script_path = work_path / f"_exec_tmp{ext}"
 
         try:
-            self._ensure_workspace_venv(work_path)
+            self._ensure_workspace_venv(venv_path)
             script_path.write_text(code, encoding="utf-8")
 
-            sandbox_command = self._build_command(language, f"/workspace/{script_path.name}", work_path)
-            bwrap_command = self._build_bwrap_command(sandbox_command, work_path)
+            sandbox_command = self._build_command(language, f"/workspace/{script_path.name}")
+            bwrap_command = self._build_bwrap_command(sandbox_command, work_path, venv_path)
             if not bwrap_command:
                 if not self.config.allow_unsafe_fallback_when_bwrap_missing:
                     duration_ms = int((time.time() - start_time) * 1000)
diff --git a/backend/app/services/tool_seeder.py b/backend/app/services/tool_seeder.py
index 6c26af134..c1ddbd92d 100644
--- a/backend/app/services/tool_seeder.py
+++ b/backend/app/services/tool_seeder.py
@@ -57,8 +57,8 @@
 
 def _global_builtin_config(tool_data: dict) -> dict:
     """Return config safe to store on the global builtin Tool row."""
-    if (tool_data.get("config_schema") or {}).get("fields"):
-        return {}
+    # Builtin tools specify defaults (like 'allow_network': True) in their 'config' dict.
+    # The actual sensitive data defaults are empty strings ("") so this is safe to store globally.
     return tool_data.get("config", {})
 
 # Builtin tool definitions — these map to the hardcoded AGENT_TOOLS
@@ -3680,7 +3680,6 @@ async def seed_builtin_tools():
                     continue
                 legacy_config = meaningful_config(tool.config or {})
                 if not legacy_config:
-                    tool.config = {}
                     continue
                 setting_key = tenant_tool_config_key(tool.name)
                 existing_setting_r = await db.execute(
@@ -3696,7 +3695,15 @@ async def seed_builtin_tools():
                         value={"config": legacy_config},
                     ))
                     migrated += 1
-                tool.config = {}
+                
+                # Remove sensitive fields from global config instead of wiping it
+                clean_config = {}
+                schema_fields = (tool.config_schema or {}).get("fields", [])
+                sensitive_keys = {f["key"] for f in schema_fields if f.get("type") == "password"}
+                for k, v in (tool.config or {}).items():
+                    if k not in sensitive_keys:
+                        clean_config[k] = v
+                tool.config = clean_config
             if migrated:
                 logger.info(
                     f"[ToolSeeder] Migrated {migrated} legacy builtin tool config(s) "
diff --git a/backend/test_sandbox_config.py b/backend/test_sandbox_config.py
new file mode 100644
index 000000000..17a4581c1
--- /dev/null
+++ b/backend/test_sandbox_config.py
@@ -0,0 +1,19 @@
+import asyncio
+from app.services.sandbox.config import SandboxConfig
+
+config = {"allow_network": True}
+fallback = SandboxConfig(allow_network=False)
+result = SandboxConfig.from_dict(config, fallback)
+print(f"Result 1: {result.allow_network}")
+
+config2 = {"allow_network": "true"}
+result2 = SandboxConfig.from_dict(config2, fallback)
+print(f"Result 2: {result2.allow_network}")
+
+config3 = {"allow_network": "1"}
+result3 = SandboxConfig.from_dict(config3, fallback)
+print(f"Result 3: {result3.allow_network}")
+
+config4 = {}
+result4 = SandboxConfig.from_dict(config4, fallback)
+print(f"Result 4: {result4.allow_network}")
diff --git a/backend/tests/test_finish_protocol.py b/backend/tests/test_finish_protocol.py
index e36b6dd0d..59dbd2bb9 100644
--- a/backend/tests/test_finish_protocol.py
+++ b/backend/tests/test_finish_protocol.py
@@ -286,6 +286,67 @@ def test_finish_is_in_always_available_core_tools():
     assert "finish" in _ALWAYS_INCLUDE_CORE
 
 
+@pytest.mark.asyncio
+async def test_mid_loop_token_limit_checking(monkeypatch):
+    from app.services.llm import caller
+    from app.services.llm.client import LLMResponse
+
+    # Setup FakeStreamClient with several rounds of dummy tool calls
+    responses = [
+        LLMResponse(
+            content="",
+            tool_calls=[{"id": f"call_{i}", "type": "function", "function": {"name": "dummy_tool", "arguments": "{}"}}],
+            usage={"prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150}
+        )
+        for i in range(4)
+    ]
+    fake_client = FakeStreamClient(responses)
+
+    configs_called = 0
+    async def mock_get_agent_config(agent_id):
+        nonlocal configs_called
+        configs_called += 1
+        if configs_called > 1:
+            return 50, "⚠️ Daily token usage limit exceeded"
+        return 50, None
+
+    monkeypatch.setattr(caller, "_get_agent_config", mock_get_agent_config)
+    monkeypatch.setattr(caller, "_get_user_name", lambda _user_id: _async_return("Ray"))
+    monkeypatch.setattr(
+        "app.services.agent_context.build_agent_context",
+        lambda *_args, **_kwargs: _async_return(("static", "dynamic")),
+    )
+    monkeypatch.setattr(caller, "get_agent_tools_for_llm", lambda _agent_id: _async_return([
+        {"type": "function", "function": {"name": "dummy_tool", "description": "dummy"}}
+    ]))
+    monkeypatch.setattr(caller, "execute_tool", lambda *_args, **_kwargs: _async_return("Success"))
+    monkeypatch.setattr(caller, "create_llm_client", lambda **_kwargs: fake_client)
+
+    token_records = []
+    async def mock_record_token_usage(agent_id, usage, **_kwargs):
+        token_records.append(usage.total_tokens)
+
+    monkeypatch.setattr(caller, "record_token_usage", mock_record_token_usage)
+
+    result = await caller.call_llm(
+        _model(),
+        [{"role": "user", "content": "hello"}],
+        "Agent",
+        "",
+        agent_id=uuid.uuid4(),
+        user_id=uuid.uuid4(),
+    )
+
+    # In round_i = 3 (the 4th round), it should trigger the mod-3 check,
+    # find the limit is exceeded, break the loop and return the limit message.
+    assert result == "⚠️ Daily token usage limit exceeded"
+    # Should have called record_token_usage once in the mid-loop check after 3 rounds
+    # round 0, 1, 2 usage is 150*3 = 450 tokens
+    assert len(token_records) == 1
+    assert token_records[0] == 450
+    assert fake_client.closed is True
+
+
 async def _async_return(value):
     return value
 
diff --git a/deploy/docker-compose-multi.yml b/deploy/docker-compose-multi.yml
index 3c5c7ea21..0e51cb249 100644
--- a/deploy/docker-compose-multi.yml
+++ b/deploy/docker-compose-multi.yml
@@ -83,10 +83,12 @@ services:
       - ./backend/agent_data:/data/agents
       - /var/run/docker.sock:/var/run/docker.sock
       - ./ss-nodes.json:/data/ss-nodes.json:ro
+    privileged: true
     cap_add:
       - SYS_ADMIN
     security_opt:
       - seccomp=unconfined
+      - apparmor=unconfined
     networks:
       default:
         aliases:
@@ -141,10 +143,12 @@ services:
       - ./backend/agent_data:/data/agents
       - /var/run/docker.sock:/var/run/docker.sock
       - ./ss-nodes.json:/data/ss-nodes.json:ro
+    privileged: true
     cap_add:
       - SYS_ADMIN
     security_opt:
       - seccomp=unconfined
+      - apparmor=unconfined
     networks:
       - default
     depends_on:
diff --git a/deploy/docker-compose.yml b/deploy/docker-compose.yml
index 92d4eaeb9..d6b730398 100644
--- a/deploy/docker-compose.yml
+++ b/deploy/docker-compose.yml
@@ -63,10 +63,12 @@ services:
       - ./backend/agent_data:/data/agents
       - /var/run/docker.sock:/var/run/docker.sock
       - ./ss-nodes.json:/data/ss-nodes.json:ro
+    privileged: true
     cap_add:
       - SYS_ADMIN
     security_opt:
       - seccomp=unconfined
+      - apparmor=unconfined
     networks:
       - default
     depends_on:
@@ -89,7 +91,7 @@ services:
       API_UPSTREAM: ${API_UPSTREAM:-backend:8000}
       MINIO_UPSTREAM: ${MINIO_UPSTREAM:-minio:9000}
     volumes:
-      - ./nginx/nginx.conf:/etc/nginx/templates/default.conf.template:ro
+      - ./deploy/nginx/nginx.conf:/etc/nginx/templates/default.conf.template:ro
     networks:
       - default
     depends_on:
diff --git a/frontend/src/components/WorkspaceOperationPanel.tsx b/frontend/src/components/WorkspaceOperationPanel.tsx
index 32b9c2da3..8b71d92e2 100644
--- a/frontend/src/components/WorkspaceOperationPanel.tsx
+++ b/frontend/src/components/WorkspaceOperationPanel.tsx
@@ -1,5 +1,5 @@
 import type { MouseEvent as ReactMouseEvent } from 'react';
-import { useEffect, useMemo, useRef, useState } from 'react';
+import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
 import { createPortal } from 'react-dom';
 import { useTranslation } from 'react-i18next';
 import MarkdownRenderer from './MarkdownRenderer';
@@ -410,7 +410,7 @@ export default function WorkspaceOperationPanel({
     const [previewState, setPreviewState] = useState<'idle' | 'loading' | 'ready' | 'deleted'>('idle');
     const [editing, setEditing] = useState(false);
     const [saveState, setSaveState] = useState<'idle' | 'saving' | 'saved' | 'error'>('idle');
-    const [revisions, setRevisions] = useState<any[]>([]);
+        const [revisions, setRevisions] = useState<any[]>([]);
     const [fileTree, setFileTree] = useState<WorkspaceFileNode[]>([]);
     const [activityOpenLocal, setActivityOpenLocal] = useState(false);
     const activityOpen = activityOpenProp ?? activityOpenLocal;
@@ -490,12 +490,20 @@ export default function WorkspaceOperationPanel({
 
     const loadFileTree = async () => {
         const loadDir = async (path: string, depth: number): Promise<WorkspaceFileNode[]> => {
-            if (depth > 4) return [];
-        const items = await fileApi.list(agentId, path).catch(() => []);
-        return Promise.all(items.map(async (item: WorkspaceFileNode) => {
-            if (!item.is_dir) return item;
-            return { ...item, children: await loadDir(item.path, depth + 1) };
-        }));
+            if (depth > 8) return [];
+            const isRoot = path === (treeScope === 'workspace' ? WORKSPACE_ROOT : '');
+            const isExpanded = expandedDirs.has(path);
+            if (!isRoot && !isExpanded) {
+                return [];
+            }
+            const items = await fileApi.list(agentId, path).catch(() => []);
+            return Promise.all(items.map(async (item: WorkspaceFileNode) => {
+                if (!item.is_dir) return item;
+                const children = expandedDirs.has(item.path)
+                    ? await loadDir(item.path, depth + 1)
+                    : [];
+                return { ...item, children };
+            }));
         };
         const roots = await loadDir(treeScope === 'workspace' ? WORKSPACE_ROOT : '', 0);
         setFileTree(roots);
@@ -599,7 +607,7 @@ export default function WorkspaceOperationPanel({
 
     useEffect(() => {
         loadFileTree();
-    }, [agentId, activityKey, liveDraft?.path, treeScope]);
+    }, [agentId, activityKey, liveDraft?.path, treeScope, expandedDirs]);
 
     useEffect(() => {
         if (!activePath || treeScope !== 'workspace') return;
diff --git a/frontend/src/pages/agent-detail/AgentDetailPage.tsx b/frontend/src/pages/agent-detail/AgentDetailPage.tsx
index dd2848080..3fa7b8e7f 100644
--- a/frontend/src/pages/agent-detail/AgentDetailPage.tsx
+++ b/frontend/src/pages/agent-detail/AgentDetailPage.tsx
@@ -2102,36 +2102,7 @@ export default function AgentDetailPage() {
         }
     };
 
-    const { data: soulContent } = useQuery({
-        queryKey: ['file', id, 'soul.md'],
-        queryFn: () => fileApi.read(id!, 'soul.md'),
-        enabled: !!id && activeTab === 'mind',
-    });
-
-    const { data: memoryFiles = [] } = useQuery({
-        queryKey: ['files', id, 'memory'],
-        queryFn: () => fileApi.list(id!, 'memory'),
-        enabled: !!id && activeTab === 'mind',
-    });
-    const [expandedMemory, setExpandedMemory] = useState<string | null>(null);
-    const { data: memoryFileContent } = useQuery({
-        queryKey: ['file', id, expandedMemory],
-        queryFn: () => fileApi.read(id!, expandedMemory!),
-        enabled: !!id && !!expandedMemory,
-    });
-
-    const { data: skillFiles = [] } = useQuery({
-        queryKey: ['files', id, 'skills'],
-        queryFn: () => fileApi.list(id!, 'skills'),
-        enabled: !!id && activeTab === 'skills',
-    });
-
     const [workspacePath, setWorkspacePath] = useState('workspace');
-    const { data: workspaceFiles = [] } = useQuery({
-        queryKey: ['files', id, workspacePath],
-        queryFn: () => fileApi.list(id!, workspacePath),
-        enabled: !!id && activeTab === 'workspace',
-    });
 
     const { data: activityLogs = [] } = useQuery({
         queryKey: ['activity', id],
@@ -3144,7 +3115,6 @@ export default function AgentDetailPage() {
                         setLivePanelVisible(true);
                         collapseSidebarsForLivePanel();
                     }
-                    queryClient.invalidateQueries({ queryKey: ['files', id, workspacePath] });
                 }
                 upsertToolCallMessage({
                     role: 'tool_call',
@@ -4073,17 +4043,7 @@ export default function AgentDetailPage() {
         retry: false,
     });
 
-    const { data: channelConfig } = useQuery({
-        queryKey: ['channel', id],
-        queryFn: () => channelApi.get(id!),
-        enabled: !!id && activeTab === 'settings',
-    });
 
-    const { data: webhookData } = useQuery({
-        queryKey: ['webhook-url', id],
-        queryFn: () => channelApi.webhookUrl(id!),
-        enabled: !!id && activeTab === 'settings',
-    });
 
     const { data: llmModels = [], isLoading: llmModelsLoading } = useQuery({
         queryKey: ['llm-models'],
@@ -4142,17 +4102,7 @@ export default function AgentDetailPage() {
         enabled: !!id && activeTab === 'settings',
     });
 
-    // ─── Soul editor ─────────────────────────────────────
-    const [soulEditing, setSoulEditing] = useState(false);
-    const [soulDraft, setSoulDraft] = useState('');
 
-    const saveSoul = useMutation({
-        mutationFn: () => fileApi.write(id!, 'soul.md', soulDraft),
-        onSuccess: () => {
-            queryClient.invalidateQueries({ queryKey: ['file', id, 'soul.md'] });
-            setSoulEditing(false);
-        },
-    });
 
 
     const CopyBtn = ({ url }: { url: string }) => (
@@ -4165,9 +4115,6 @@ export default function AgentDetailPage() {
     );
 
     // ─── File viewer ─────────────────────────────────────
-    const [viewingFile, setViewingFile] = useState<string | null>(null);
-    const [fileEditing, setFileEditing] = useState(false);
-    const [fileDraft, setFileDraft] = useState('');
     const [promptModal, setPromptModal] = useState<{ title: string; placeholder: string; action: string } | null>(null);
     const [deleteConfirm, setDeleteConfirm] = useState<{ path: string; name: string; isDir: boolean } | null>(null);
     const [uploadToast, setUploadToast] = useState<{ message: string; type: 'success' | 'error' } | null>(null);
@@ -4183,31 +4130,13 @@ export default function AgentDetailPage() {
         setUploadToast({ message, type });
         setTimeout(() => setUploadToast(null), 3000);
     };
-    const { data: fileContent } = useQuery({
-        queryKey: ['file-content', id, viewingFile],
-        queryFn: () => fileApi.read(id!, viewingFile!),
-        enabled: !!viewingFile,
-    });
 
     // ─── Task creation & detail ───────────────────────────────────
     const [showTaskForm, setShowTaskForm] = useState(false);
     const [showDeleteConfirm, setShowDeleteConfirm] = useState(false);
     const [taskForm, setTaskForm] = useState({ title: '', description: '', priority: 'medium', type: 'todo' as 'todo' | 'supervision', supervision_target_name: '', remind_schedule: '', due_date: '' });
     const [selectedTaskId, setSelectedTaskId] = useState<string | null>(null);
-    const { data: taskLogs = [] } = useQuery({
-        queryKey: ['task-logs', id, selectedTaskId],
-        queryFn: () => taskApi.getLogs(id!, selectedTaskId!),
-        enabled: !!id && !!selectedTaskId,
-        refetchInterval: selectedTaskId ? 3000 : false,
-    });
 
-    // Schedule execution history (selectedTaskId format: 'sched-{uuid}')
-    const expandedScheduleId = selectedTaskId?.startsWith('sched-') ? selectedTaskId.slice(6) : null;
-    const { data: scheduleHistoryData } = useQuery({
-        queryKey: ['schedule-history', id, expandedScheduleId],
-        queryFn: () => scheduleApi.history(id!, expandedScheduleId!),
-        enabled: !!id && !!expandedScheduleId,
-    });
     const createTask = useMutation({
         mutationFn: (data: any) => {
             const cleaned = { ...data };
@@ -6171,6 +6100,17 @@ export default function AgentDetailPage() {
                                                 onTouchMoveCapture={handleChatTouchMoveCapture}
                                                 style={{ flex: 1, overflowY: 'auto', padding: '12px 16px' }}
                                             >
+                                                {chatHistoryLoadingMore && (
+                                                    <div style={{ textAlign: 'center', padding: '12px 0', color: 'var(--text-tertiary)', fontSize: '13px', display: 'flex', alignItems: 'center', justifyContent: 'center', gap: '8px' }}>
+                                                        <div className="cw-spinner" style={{ width: '14px', height: '14px', borderWidth: '2px' }}></div>
+                                                        {i18n.language?.startsWith('zh') ? '正在加载历史消息...' : 'Loading history...'}
+                                                    </div>
+                                                )}
+                                                {!chatHistoryHasMore && chatMessages.length > 0 && (
+                                                    <div style={{ textAlign: 'center', padding: '12px 0', color: 'var(--text-tertiary)', fontSize: '12px' }}>
+                                                        {i18n.language?.startsWith('zh') ? '已加载全部历史消息' : 'All history loaded'}
+                                                    </div>
+                                                )}
                                                 {chatMessages.length === 0 && !showNoModelState && (
                                                     <div className="chat-empty-state">
                                                         <div className="chat-empty-state__title">{activeSession?.title || t('agent.chat.startChat')}</div>
@@ -6780,20 +6720,11 @@ export default function AgentDetailPage() {
                     setPromptModal(null);
                     if (action === 'newFolder') {
                         await fileApi.write(id!, `${workspacePath}/${value}/.gitkeep`, '');
-                        queryClient.invalidateQueries({ queryKey: ['files', id, workspacePath] });
                     } else if (action === 'newFile') {
                         await fileApi.write(id!, `${workspacePath}/${value}`, '');
-                        queryClient.invalidateQueries({ queryKey: ['files', id, workspacePath] });
-                        setViewingFile(`${workspacePath}/${value}`);
-                        setFileEditing(true);
-                        setFileDraft('');
                     } else if (action === 'newSkill') {
                         const template = `---\nname: ${value}\ndescription: Describe what this skill does\n---\n\n# ${value}\n\n## Overview\nDescribe the purpose and when to use this skill.\n\n## Process\n1. Step one\n2. Step two\n\n## Output Format\nDescribe the expected output format.\n`;
                         await fileApi.write(id!, `skills/${value}/SKILL.md`, template);
-                        queryClient.invalidateQueries({ queryKey: ['files', id, 'skills'] });
-                        setViewingFile(`skills/${value}/SKILL.md`);
-                        setFileEditing(true);
-                        setFileDraft(template);
                     }
                 }}
             />
@@ -6811,9 +6742,6 @@ export default function AgentDetailPage() {
                     if (path) {
                         try {
                             await fileApi.delete(id!, path);
-                            setViewingFile(null);
-                            setFileEditing(false);
-                            queryClient.invalidateQueries({ queryKey: ['files', id, workspacePath] });
                             showToast(t('common.delete'));
                         } catch (err: any) {
                             showToast(t('agent.upload.failed'), 'error');

From ac86b669641ea961e667e39ac34f46843e6b3774 Mon Sep 17 00:00:00 2001
From: yaojin3616 <yaoj.alex@gmail.com>
Date: Wed, 10 Jun 2026 15:41:21 +0800
Subject: [PATCH 2/2] Fix #636: optimize bwrap sandbox process tree cleanup on
 timeout

Also fixes an issue where Gemini 2.0/3.0 models would fail with 'Function call is missing a thought_signature in functionCall parts' by preserving _gemini_extra in canonical tool calls.
---
 backend/app/services/llm/caller.py            |  7 +++++--
 backend/app/services/llm/client.py            | 21 +++++++++++++++----
 .../sandbox/local/subprocess_backend.py       |  8 +++++--
 3 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/backend/app/services/llm/caller.py b/backend/app/services/llm/caller.py
index 440e9f7d0..867b153e5 100644
--- a/backend/app/services/llm/caller.py
+++ b/backend/app/services/llm/caller.py
@@ -93,14 +93,17 @@ def _sanitize_tool_calls_for_context(tool_calls: list[dict]) -> tuple[list[dict]
                 "Retry the tool call with `function.arguments` as one valid JSON object string."
             )
 
-        sanitized.append({
+        new_tc = {
             "id": tc.get("id", ""),
             "type": tc.get("type") or "function",
             "function": {
                 "name": tool_name,
                 "arguments": args_str,
             },
-        })
+        }
+        if "_gemini_extra" in tc:
+            new_tc["_gemini_extra"] = tc["_gemini_extra"]
+        sanitized.append(new_tc)
 
     return sanitized, None
 
diff --git a/backend/app/services/llm/client.py b/backend/app/services/llm/client.py
index 1f9af784b..6d263a83d 100644
--- a/backend/app/services/llm/client.py
+++ b/backend/app/services/llm/client.py
@@ -1169,11 +1169,16 @@ def _build_payload(
                             parsed_args = args
                         else:
                             parsed_args = {}
+
+                        func_call_dict: dict[str, Any] = {
+                            "name": fn.get("name", ""),
+                            "args": parsed_args,
+                        }
+                        if "_gemini_extra" in tc:
+                            func_call_dict.update(tc["_gemini_extra"])
+
                         parts.append({
-                            "functionCall": {
-                                "name": fn.get("name", ""),
-                                "args": parsed_args,
-                            }
+                            "functionCall": func_call_dict
                         })
                 if parts:
                     contents.append({"role": "model", "parts": parts})
@@ -1282,6 +1287,9 @@ def _parse_response_data(self, data: dict[str, Any]) -> LLMResponse:
                     if dedup_key in seen_tool_calls:
                         continue
                     seen_tool_calls.add(dedup_key)
+                    
+                    extra = {k: v for k, v in function_call.items() if k not in ["name", "args"]}
+                    
                     tool_calls.append({
                         "id": f"call_{len(tool_calls) + 1}",
                         "type": "function",
@@ -1289,6 +1297,7 @@ def _parse_response_data(self, data: dict[str, Any]) -> LLMResponse:
                             "name": name,
                             "arguments": args_str,
                         },
+                        "_gemini_extra": extra,
                     })
 
         usage = self._normalize_usage(data.get("usageMetadata"))
@@ -1429,6 +1438,9 @@ async def stream(
                             if dedup_key in seen_tool_calls:
                                 continue
                             seen_tool_calls.add(dedup_key)
+                            
+                            extra = {k: v for k, v in function_call.items() if k not in ["name", "args"]}
+                            
                             tool_calls.append({
                                 "id": f"call_{len(tool_calls) + 1}",
                                 "type": "function",
@@ -1436,6 +1448,7 @@ async def stream(
                                     "name": name,
                                     "arguments": args_str,
                                 },
+                                "_gemini_extra": extra,
                             })
 
         except (httpx.ConnectError, httpx.ReadError, httpx.ConnectTimeout) as e:
diff --git a/backend/app/services/sandbox/local/subprocess_backend.py b/backend/app/services/sandbox/local/subprocess_backend.py
index b3724e211..a3bf9d6ce 100644
--- a/backend/app/services/sandbox/local/subprocess_backend.py
+++ b/backend/app/services/sandbox/local/subprocess_backend.py
@@ -3,6 +3,7 @@
 import asyncio
 import os
 import shutil
+import signal
 import time
 from pathlib import Path
 
@@ -188,6 +189,7 @@ def _build_exec_kwargs(self, work_path: Path, timeout: int, use_preexec: bool =
             "stdout": asyncio.subprocess.PIPE,
             "stderr": asyncio.subprocess.PIPE,
             "env": self._build_safe_env(work_path),
+            "start_new_session": True,
         }
         if use_preexec:
             kwargs["preexec_fn"] = self._build_preexec_fn(work_path, timeout)
@@ -196,7 +198,6 @@ def _build_exec_kwargs(self, work_path: Path, timeout: int, use_preexec: bool =
     def _build_preexec_fn(self, work_path: Path, timeout: int):
         def _preexec():
             os.chdir(work_path)
-            os.setsid()
             os.umask(0o077)
 
             try:
@@ -455,7 +456,10 @@ async def read_stream(stream, out, label="stdout"):
             try:
                 await asyncio.wait_for(proc.wait(), timeout=timeout)
             except asyncio.TimeoutError:
-                proc.kill()
+                try:
+                    os.killpg(os.getpgid(proc.pid), signal.SIGTERM)
+                except Exception:
+                    proc.kill()
                 is_timeout = True
 
             await asyncio.gather(task1, task2)