From a24b950a2fd7a0468752dfdcd9acbf8b6463288c Mon Sep 17 00:00:00 2001 From: yaojin3616 Date: Wed, 10 Jun 2026 15:01:06 +0800 Subject: [PATCH 1/2] feat: comprehensive updates to sandbox, agents, and UI This PR includes several recent improvements: - UV Migration: Replaced pip with uv for significantly faster sandbox environment creation and package installation. - Idempotent Migration: Updated 056_add_user_tenant_onboarding.py to use sa.inspect, ensuring upgrade/downgrade operations are idempotent. - UI Enhancements: Optimized queries and added loading states to AgentDetailPage and WorkspaceOperationPanel. - Heartbeat & LLM Caller: Fixed workspace lock contention issues in heartbeat.py and implemented Anthropic Prompt Caching in caller.py. - Deployment: Updated docker-compose files for streamlined deployments. --- backend/Dockerfile | 6 +- .../056_add_user_tenant_onboarding.py | 50 ++++++---- backend/app/services/agent_tools.py | 4 +- backend/app/services/heartbeat.py | 58 +++++++++--- backend/app/services/llm/caller.py | 64 +++++++++---- .../sandbox/local/subprocess_backend.py | 67 ++++++++----- backend/app/services/tool_seeder.py | 15 ++- backend/test_sandbox_config.py | 19 ++++ backend/tests/test_finish_protocol.py | 61 ++++++++++++ deploy/docker-compose-multi.yml | 4 + deploy/docker-compose.yml | 4 +- .../components/WorkspaceOperationPanel.tsx | 26 +++-- .../pages/agent-detail/AgentDetailPage.tsx | 94 +++---------------- 13 files changed, 296 insertions(+), 176 deletions(-) create mode 100644 backend/test_sandbox_config.py diff --git a/backend/Dockerfile b/backend/Dockerfile index 5a7f0c7b7..5d8c406fc 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -16,11 +16,11 @@ COPY pyproject.toml ./ ARG CLAWITH_PIP_INDEX_URL ARG CLAWITH_PIP_TRUSTED_HOST RUN if [ -n "$CLAWITH_PIP_INDEX_URL" ] && [ -n "$CLAWITH_PIP_TRUSTED_HOST" ]; then \ - pip install --no-cache-dir --index-url "$CLAWITH_PIP_INDEX_URL" --trusted-host "$CLAWITH_PIP_TRUSTED_HOST" .; \ + pip install --no-cache-dir --index-url "$CLAWITH_PIP_INDEX_URL" --trusted-host "$CLAWITH_PIP_TRUSTED_HOST" . uv; \ elif [ -n "$CLAWITH_PIP_INDEX_URL" ]; then \ - pip install --no-cache-dir --index-url "$CLAWITH_PIP_INDEX_URL" .; \ + pip install --no-cache-dir --index-url "$CLAWITH_PIP_INDEX_URL" . uv; \ else \ - pip install --no-cache-dir .; \ + pip install --no-cache-dir . uv; \ fi # ─── Production ───────────────────────────────────────── diff --git a/backend/alembic/versions/056_add_user_tenant_onboarding.py b/backend/alembic/versions/056_add_user_tenant_onboarding.py index fb5719031..e8bdde665 100644 --- a/backend/alembic/versions/056_add_user_tenant_onboarding.py +++ b/backend/alembic/versions/056_add_user_tenant_onboarding.py @@ -19,25 +19,37 @@ def upgrade() -> None: - op.create_table( - "user_tenant_onboardings", - sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, nullable=False), - sa.Column("user_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("users.id", ondelete="CASCADE"), nullable=False), - sa.Column("tenant_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("tenants.id", ondelete="CASCADE"), nullable=False), - sa.Column("status", sa.String(length=32), nullable=False, server_default="in_progress"), - sa.Column("current_step", sa.String(length=32), nullable=False, server_default="assistant"), - sa.Column("entry_mode", sa.String(length=32), nullable=False, server_default="create"), - sa.Column("personal_assistant_agent_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("agents.id", ondelete="SET NULL"), nullable=True), - sa.Column("started_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False), - sa.Column("completed_at", sa.DateTime(timezone=True), nullable=True), - sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False), - sa.UniqueConstraint("user_id", "tenant_id", name="uq_user_tenant_onboarding"), - ) - op.create_index("ix_user_tenant_onboardings_user_id", "user_tenant_onboardings", ["user_id"]) - op.create_index("ix_user_tenant_onboardings_tenant_id", "user_tenant_onboardings", ["tenant_id"]) + bind = op.get_bind() + inspector = sa.inspect(bind) + + if "user_tenant_onboardings" not in inspector.get_table_names(): + op.create_table( + "user_tenant_onboardings", + sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, nullable=False), + sa.Column("user_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("users.id", ondelete="CASCADE"), nullable=False), + sa.Column("tenant_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("tenants.id", ondelete="CASCADE"), nullable=False), + sa.Column("status", sa.String(length=32), nullable=False, server_default="in_progress"), + sa.Column("current_step", sa.String(length=32), nullable=False, server_default="assistant"), + sa.Column("entry_mode", sa.String(length=32), nullable=False, server_default="create"), + sa.Column("personal_assistant_agent_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("agents.id", ondelete="SET NULL"), nullable=True), + sa.Column("started_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False), + sa.Column("completed_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False), + sa.UniqueConstraint("user_id", "tenant_id", name="uq_user_tenant_onboarding"), + ) + + op.create_index("ix_user_tenant_onboardings_user_id", "user_tenant_onboardings", ["user_id"]) + op.create_index("ix_user_tenant_onboardings_tenant_id", "user_tenant_onboardings", ["tenant_id"]) def downgrade() -> None: - op.drop_index("ix_user_tenant_onboardings_tenant_id", table_name="user_tenant_onboardings") - op.drop_index("ix_user_tenant_onboardings_user_id", table_name="user_tenant_onboardings") - op.drop_table("user_tenant_onboardings") + bind = op.get_bind() + inspector = sa.inspect(bind) + + if "user_tenant_onboardings" in inspector.get_table_names(): + indexes = [ix["name"] for ix in inspector.get_indexes("user_tenant_onboardings")] + if "ix_user_tenant_onboardings_tenant_id" in indexes: + op.drop_index("ix_user_tenant_onboardings_tenant_id", table_name="user_tenant_onboardings") + if "ix_user_tenant_onboardings_user_id" in indexes: + op.drop_index("ix_user_tenant_onboardings_user_id", table_name="user_tenant_onboardings") + op.drop_table("user_tenant_onboardings") diff --git a/backend/app/services/agent_tools.py b/backend/app/services/agent_tools.py index 4fe4cfe49..10dfed263 100644 --- a/backend/app/services/agent_tools.py +++ b/backend/app/services/agent_tools.py @@ -188,7 +188,6 @@ async def _get_tool_config(agent_id: Optional[uuid.UUID], tool_name: str) -> Opt base_config = global_config or {} tenant_config = {} if tool_source == "builtin": - base_config = {} tenant_config = await get_tenant_tool_config(db, agent_tenant_id, db_tool_name, config_schema) # Merge: agent overrides global merged = {**base_config, **tenant_config, **(agent_config or {})} @@ -206,7 +205,7 @@ async def _get_tool_config(agent_id: Optional[uuid.UUID], tool_name: str) -> Opt tenant_config = {} if tool.source == "builtin": tenant_config = await get_tenant_tool_config(db, agent_tenant_id, tool.name, tool.config_schema) - base_config = {} if tool.source == "builtin" else (tool.config or {}) + base_config = tool.config or {} merged = {**base_config, **tenant_config} else: merged = {} @@ -8047,6 +8046,7 @@ async def _execute_code( timeout=timeout, work_dir=str(work_dir), on_output=on_output, + agent_id=agent_id, ) # Format result for user display diff --git a/backend/app/services/heartbeat.py b/backend/app/services/heartbeat.py index 9f64a57fe..f66792437 100644 --- a/backend/app/services/heartbeat.py +++ b/backend/app/services/heartbeat.py @@ -295,6 +295,7 @@ async def _execute_heartbeat(agent_id: uuid.UUID): plaza_posts_made = 0 # hard limit: 1 new post per heartbeat plaza_comments_made = 0 # hard limit: 2 comments per heartbeat _hb_accumulated_usage = None + _hb_unsaved_usage = None # Token tracking helpers from app.services.token_tracker import ( @@ -304,6 +305,7 @@ async def _execute_heartbeat(agent_id: uuid.UUID): estimate_token_usage_from_chars, ) _hb_accumulated_usage = TokenUsage() + _hb_unsaved_usage = TokenUsage() # Convert messages to LLMMessage format llm_messages = [ @@ -312,6 +314,21 @@ async def _execute_heartbeat(agent_id: uuid.UUID): ] for round_i in range(20): # More rounds for search + write + plaza + # Check token usage limit mid-loop (every 3 rounds) + if round_i > 0 and round_i % 3 == 0: + if agent_id and _hb_unsaved_usage.total_tokens > 0: + async with async_session() as db: + await record_token_usage(agent_id, _hb_unsaved_usage) + await db.commit() + _hb_unsaved_usage = TokenUsage() + from app.services.llm.caller import _get_agent_config + _, _token_limit_msg = await _get_agent_config(agent_id) + if _token_limit_msg: + logger.warning(f"[Heartbeat] Token limit exceeded mid-loop: {_token_limit_msg}") + await client.close() + reply = _token_limit_msg + break + try: response = await client.complete( messages=llm_messages, @@ -330,11 +347,11 @@ async def _execute_heartbeat(agent_id: uuid.UUID): # Track tokens for this round usage = extract_token_usage(response.usage) - if usage: - _hb_accumulated_usage.add(usage) - else: + if not usage: round_chars = sum(len(m.content or '') for m in llm_messages) + len(response.content or '') - _hb_accumulated_usage.add(estimate_token_usage_from_chars(round_chars)) + usage = estimate_token_usage_from_chars(round_chars) + _hb_accumulated_usage.add(usage) + _hb_unsaved_usage.add(usage) if response.tool_calls: # Add assistant message with tool calls @@ -423,8 +440,8 @@ async def _execute_heartbeat(agent_id: uuid.UUID): # ── Phase 3: Write results back to DB (short transaction) ── async with async_session() as db: # Record accumulated heartbeat token usage - if _hb_accumulated_usage and _hb_accumulated_usage.total_tokens > 0: - await record_token_usage(agent_id, _hb_accumulated_usage) + if _hb_unsaved_usage and _hb_unsaved_usage.total_tokens > 0: + await record_token_usage(agent_id, _hb_unsaved_usage) await db.commit() # Log activity if not empty @@ -687,8 +704,25 @@ async def run_agent_oneshot( reply = "" accumulated_usage = TokenUsage() + unsaved_usage = TokenUsage() for round_i in range(max_rounds): + # Check token usage limit mid-loop (every 3 rounds) + if round_i > 0 and round_i % 3 == 0: + if agent_id and unsaved_usage.total_tokens > 0: + try: + await record_token_usage(agent_id, unsaved_usage) + except Exception as e: + logger.warning(f"[Oneshot] Failed to record token usage mid-loop: {e}") + unsaved_usage = TokenUsage() + from app.services.llm.caller import _get_agent_config + _, _token_limit_msg = await _get_agent_config(agent_id) + if _token_limit_msg: + logger.warning(f"[Oneshot] Token limit exceeded mid-loop: {_token_limit_msg}") + await client.close() + reply = _token_limit_msg + break + try: response = await client.complete( messages=llm_messages, @@ -713,11 +747,11 @@ async def run_agent_oneshot( # Track token usage usage = extract_token_usage(response.usage) - if usage: - accumulated_usage.add(usage) - else: + if not usage: round_chars = sum(len(m.content or "") for m in llm_messages) + len(response.content or "") - accumulated_usage.add(estimate_token_usage_from_chars(round_chars)) + usage = estimate_token_usage_from_chars(round_chars) + accumulated_usage.add(usage) + unsaved_usage.add(usage) if response.tool_calls: llm_messages.append(LLMMessage( @@ -769,9 +803,9 @@ async def run_agent_oneshot( await client.close() # ── Phase 3: Record token usage (best-effort) ─────────────────────────── - if accumulated_usage.total_tokens > 0: + if unsaved_usage.total_tokens > 0: try: - await record_token_usage(agent_id, accumulated_usage) + await record_token_usage(agent_id, unsaved_usage) except Exception as e: logger.warning(f"[Oneshot] Failed to record token usage: {e}") diff --git a/backend/app/services/llm/caller.py b/backend/app/services/llm/caller.py index 4311d0e16..440e9f7d0 100644 --- a/backend/app/services/llm/caller.py +++ b/backend/app/services/llm/caller.py @@ -497,6 +497,7 @@ async def _default_on_tool_call(data: dict): max_tokens = get_max_tokens(model.provider, model.model, getattr(model, 'max_output_tokens', None)) _accumulated_usage = TokenUsage() + _unsaved_usage = TokenUsage() # Tool-calling loop for round_i in range(_max_tool_rounds): @@ -518,6 +519,17 @@ async def _default_on_tool_call(data: dict): content="🚨 仅剩 2 轮工具调用。请立即使用 upsert_focus_item 保存进度并设置续接触发器。", )) + # Check token usage limit mid-loop (every 3 rounds) + if round_i > 0 and round_i % 3 == 0: + if agent_id and _unsaved_usage.total_tokens > 0: + await record_token_usage(agent_id, _unsaved_usage) + _unsaved_usage = TokenUsage() + _, _token_limit_msg = await _get_agent_config(agent_id) + if _token_limit_msg: + logger.warning(f"[LLM] Token limit exceeded mid-loop: {_token_limit_msg}") + await client.close() + return _token_limit_msg + try: # Use streaming API for real-time responses async def _buffer_chunk(_text: str) -> None: @@ -535,19 +547,21 @@ async def _buffer_chunk(_text: str) -> None: ) except LLMError as e: logger.error(f"[LLM] LLMError: provider={getattr(model, 'provider', '?')} model={getattr(model, 'model', '?')} {e}") - if agent_id and _accumulated_usage.total_tokens > 0: - await record_token_usage(agent_id, _accumulated_usage) + if agent_id and _unsaved_usage.total_tokens > 0: + await record_token_usage(agent_id, _unsaved_usage) await client.close() return f"[LLM Error] {e}" except Exception as e: logger.exception(f"[LLM] Unexpected error: {type(e).__name__}: {str(e)[:300]}") - if agent_id and _accumulated_usage.total_tokens > 0: - await record_token_usage(agent_id, _accumulated_usage) + if agent_id and _unsaved_usage.total_tokens > 0: + await record_token_usage(agent_id, _unsaved_usage) await client.close() return f"[LLM call error] {type(e).__name__}: {str(e)[:200]}" # Track tokens for this round - _accumulated_usage.add(_usage_from_response_or_estimate(response, api_messages)) + _usage_this_round = _usage_from_response_or_estimate(response, api_messages) + _accumulated_usage.add(_usage_this_round) + _unsaved_usage.add(_usage_this_round) # Plain assistant text is not a stop condition. The model must finish # explicitly via finish(content=...). @@ -567,8 +581,8 @@ async def _buffer_chunk(_text: str) -> None: finish_call = find_finish_call(sanitized_tool_calls) if finish_call: if finish_call.valid: - if agent_id and _accumulated_usage.total_tokens > 0: - await record_token_usage(agent_id, _accumulated_usage) + if agent_id and _unsaved_usage.total_tokens > 0: + await record_token_usage(agent_id, _unsaved_usage) await client.close() return finish_call.content @@ -616,8 +630,8 @@ async def _buffer_chunk(_text: str) -> None: )) # Record tokens even on "too many rounds" exit - if agent_id and _accumulated_usage.total_tokens > 0: - await record_token_usage(agent_id, _accumulated_usage) + if agent_id and _unsaved_usage.total_tokens > 0: + await record_token_usage(agent_id, _unsaved_usage) await client.close() return "[Error] Too many tool call rounds" @@ -882,6 +896,7 @@ async def call_agent_llm_with_tools( async def _try_model(model: LLMModel) -> tuple[str, bool, bool]: """Try to complete with a model. Returns (response, success, tool_executed).""" _accumulated_usage = TokenUsage() + _unsaved_usage = TokenUsage() tool_executed = False try: client = create_llm_client( @@ -900,6 +915,17 @@ async def _try_model(model: LLMModel) -> tuple[str, bool, bool]: # Tool-calling loop api_messages = list(messages) for round_i in range(max_rounds): + # Check token usage limit mid-loop (every 3 rounds) + if round_i > 0 and round_i % 3 == 0: + if agent_id and _unsaved_usage.total_tokens > 0: + await record_token_usage(agent_id, _unsaved_usage) + _unsaved_usage = TokenUsage() + _, _token_limit_msg = await _get_agent_config(agent_id) + if _token_limit_msg: + logger.warning(f"[call_agent_llm_with_tools] Token limit exceeded mid-loop: {_token_limit_msg}") + await client.close() + return _token_limit_msg, False, tool_executed + try: response = await client.complete( messages=api_messages, @@ -910,12 +936,14 @@ async def _try_model(model: LLMModel) -> tuple[str, bool, bool]: except Exception as e: logger.error(f"[call_agent_llm_with_tools] Agent {agent_id}: LLM call error: {e}") await client.close() - if agent_id and _accumulated_usage.total_tokens > 0: - await record_token_usage(agent_id, _accumulated_usage) + if agent_id and _unsaved_usage.total_tokens > 0: + await record_token_usage(agent_id, _unsaved_usage) raise # Track tokens for this round - _accumulated_usage.add(_usage_from_response_or_estimate(response, api_messages)) + _usage_this_round = _usage_from_response_or_estimate(response, api_messages) + _accumulated_usage.add(_usage_this_round) + _unsaved_usage.add(_usage_this_round) if not response.tool_calls: if response.content: @@ -932,8 +960,8 @@ async def _try_model(model: LLMModel) -> tuple[str, bool, bool]: finish_call = find_finish_call(sanitized_tool_calls) if finish_call: if finish_call.valid: - if agent_id and _accumulated_usage.total_tokens > 0: - await record_token_usage(agent_id, _accumulated_usage) + if agent_id and _unsaved_usage.total_tokens > 0: + await record_token_usage(agent_id, _unsaved_usage) await client.close() return finish_call.content, True, tool_executed api_messages.append(LLMMessage( @@ -982,14 +1010,14 @@ async def _try_model(model: LLMModel) -> tuple[str, bool, bool]: content=str(result), )) - if agent_id and _accumulated_usage.total_tokens > 0: - await record_token_usage(agent_id, _accumulated_usage) + if agent_id and _unsaved_usage.total_tokens > 0: + await record_token_usage(agent_id, _unsaved_usage) await client.close() return "[Error] Too many tool call rounds", False, tool_executed except Exception as e: - if agent_id and _accumulated_usage.total_tokens > 0: - await record_token_usage(agent_id, _accumulated_usage) + if agent_id and _unsaved_usage.total_tokens > 0: + await record_token_usage(agent_id, _unsaved_usage) return f"[Error] {e}", False, tool_executed # Try primary model diff --git a/backend/app/services/sandbox/local/subprocess_backend.py b/backend/app/services/sandbox/local/subprocess_backend.py index 793b1d9dc..b3724e211 100644 --- a/backend/app/services/sandbox/local/subprocess_backend.py +++ b/backend/app/services/sandbox/local/subprocess_backend.py @@ -106,15 +106,15 @@ class SubprocessBackend(BaseSandboxBackend): def __init__(self, config: SandboxConfig): self.config = config - def _venv_python(self, work_path: Path) -> str: - return f"/workspace/{work_path.joinpath('.venv', 'bin', 'python').relative_to(work_path)}" + def _venv_python(self, venv_path: Path) -> str: + return "/workspace/.venv/bin/python" def _host_venv_python(self, work_path: Path) -> str: return str(work_path / ".venv" / "bin" / "python") - def _build_command(self, language: str, script_path: str, work_path: Path) -> list[str]: + def _build_command(self, language: str, script_path: str) -> list[str]: if language == "python": - return [self._venv_python(work_path), "-I", "-B", str(script_path)] + return ["/workspace/.venv/bin/python", "-I", "-B", str(script_path)] if language == "bash": return ["bash", "--noprofile", "--norc", str(script_path)] return ["node", str(script_path)] @@ -152,35 +152,36 @@ def _bind_if_exists(self, host_path: str, guest_path: str | None = None, *, read bind_flag = "--ro-bind" if read_only else "--bind" return [bind_flag, str(host), target] - def _ensure_workspace_venv(self, work_path: Path) -> None: - venv_python = work_path / ".venv" / "bin" / "python" + def _ensure_workspace_venv(self, venv_path: Path) -> None: + venv_python = venv_path / "bin" / "python" if not venv_python.exists(): import subprocess + # Use uv to create the virtual environment for extreme speed + # --seed ensures pip is still present in the venv subprocess.run( - ["python3", "-m", "venv", str(work_path / ".venv")], + ["uv", "venv", "--seed", str(venv_path)], check=True, - cwd=str(work_path), + cwd=str(venv_path.parent), ) # Fix shebang lines in pip scripts to use bwrap-visible path # venv creates scripts with absolute paths to the host Python, # but bwrap only mounts /workspace, so those paths don't exist inside the sandbox - self._fix_pip_shebangs(work_path) + self._fix_pip_shebangs(venv_path) - def _fix_pip_shebangs(self, work_path: Path) -> None: - """Fix pip script shebangs to point to /workspace/.venv/bin/python for bwrap compatibility.""" - venv_bin = work_path / ".venv" / "bin" + def _fix_pip_shebangs(self, venv_path: Path) -> None: + """Replace pip with a bash wrapper that delegates to uv pip for extreme performance.""" + venv_bin = venv_path / "bin" sandbox_python = "/workspace/.venv/bin/python" - for script_name in ("pip", "pip3", "pip3.X"): - script_path = venv_bin / script_name - if script_path.exists(): - content = script_path.read_text(encoding="utf-8") - if content.startswith("#!"): - first_line, rest = content.split("\n", 1) - # Only rewrite if shebang doesn't already point to sandbox python - if sandbox_python not in first_line: - script_path.write_text(f"#!{sandbox_python}\n{rest}", encoding="utf-8") + + wrapper_script = f"#!/bin/bash\nexec uv pip \"$@\"\n" + + for pip_cmd in ["pip", "pip3", "pip3.12"]: + pip_path = venv_bin / pip_cmd + if pip_path.parent.exists(): + pip_path.write_text(wrapper_script, encoding="utf-8") + pip_path.chmod(0o755) def _build_exec_kwargs(self, work_path: Path, timeout: int, use_preexec: bool = False) -> dict: kwargs = { @@ -233,7 +234,7 @@ def _preexec(): return _preexec - def _build_bwrap_command(self, command: list[str], work_path: Path) -> list[str] | None: + def _build_bwrap_command(self, command: list[str], work_path: Path, venv_path: Path) -> list[str] | None: bwrap = shutil.which("bwrap") if not bwrap: if not SubprocessBackend._bwrap_missing_warned: @@ -263,7 +264,9 @@ def _build_bwrap_command(self, command: list[str], work_path: Path) -> list[str] "--unshare-uts", "--unshare-cgroup", *base_binds, + "--bind", "/data/agents/.uv-cache", "/uv-cache", "--bind", str(work_path), "/workspace", + "--bind", str(venv_path), "/workspace/.venv", "--dev", "/dev", "--proc", "/proc", "--dir", "/tmp", @@ -278,6 +281,7 @@ def _build_bwrap_command(self, command: list[str], work_path: Path) -> list[str] "--setenv", "VIRTUAL_ENV", "/workspace/.venv", "--setenv", "PIP_CACHE_DIR", "/workspace/.tmp/pip-cache", "--setenv", "PIP_DISABLE_PIP_VERSION_CHECK", "1", + "--setenv", "UV_CACHE_DIR", "/uv-cache", "--chdir", "/workspace", ] if not self.config.allow_network: @@ -317,6 +321,7 @@ async def execute( ) -> ExecutionResult: """Execute code in a subprocess.""" on_output = kwargs.get("on_output") + agent_id = kwargs.get("agent_id") start_time = time.time() # Validate language @@ -361,6 +366,18 @@ async def execute( work_path.mkdir(parents=True, exist_ok=True) (work_path / ".tmp").mkdir(parents=True, exist_ok=True) (work_path / ".tmp" / "pip-cache").mkdir(parents=True, exist_ok=True) + + # Determine persistent venv path if possible + if agent_id: + # We place the virtual environment in a persistent location + venv_path = Path("/data/agents").resolve() / str(agent_id) / ".venv" + venv_path.parent.mkdir(parents=True, exist_ok=True) + + # Ensure global uv cache exists + uv_cache = Path("/data/agents/.uv-cache") + uv_cache.mkdir(parents=True, exist_ok=True) + else: + venv_path = work_path / ".venv" # Determine command and file extension if language == "python": @@ -374,11 +391,11 @@ async def execute( script_path = work_path / f"_exec_tmp{ext}" try: - self._ensure_workspace_venv(work_path) + self._ensure_workspace_venv(venv_path) script_path.write_text(code, encoding="utf-8") - sandbox_command = self._build_command(language, f"/workspace/{script_path.name}", work_path) - bwrap_command = self._build_bwrap_command(sandbox_command, work_path) + sandbox_command = self._build_command(language, f"/workspace/{script_path.name}") + bwrap_command = self._build_bwrap_command(sandbox_command, work_path, venv_path) if not bwrap_command: if not self.config.allow_unsafe_fallback_when_bwrap_missing: duration_ms = int((time.time() - start_time) * 1000) diff --git a/backend/app/services/tool_seeder.py b/backend/app/services/tool_seeder.py index 6c26af134..c1ddbd92d 100644 --- a/backend/app/services/tool_seeder.py +++ b/backend/app/services/tool_seeder.py @@ -57,8 +57,8 @@ def _global_builtin_config(tool_data: dict) -> dict: """Return config safe to store on the global builtin Tool row.""" - if (tool_data.get("config_schema") or {}).get("fields"): - return {} + # Builtin tools specify defaults (like 'allow_network': True) in their 'config' dict. + # The actual sensitive data defaults are empty strings ("") so this is safe to store globally. return tool_data.get("config", {}) # Builtin tool definitions — these map to the hardcoded AGENT_TOOLS @@ -3680,7 +3680,6 @@ async def seed_builtin_tools(): continue legacy_config = meaningful_config(tool.config or {}) if not legacy_config: - tool.config = {} continue setting_key = tenant_tool_config_key(tool.name) existing_setting_r = await db.execute( @@ -3696,7 +3695,15 @@ async def seed_builtin_tools(): value={"config": legacy_config}, )) migrated += 1 - tool.config = {} + + # Remove sensitive fields from global config instead of wiping it + clean_config = {} + schema_fields = (tool.config_schema or {}).get("fields", []) + sensitive_keys = {f["key"] for f in schema_fields if f.get("type") == "password"} + for k, v in (tool.config or {}).items(): + if k not in sensitive_keys: + clean_config[k] = v + tool.config = clean_config if migrated: logger.info( f"[ToolSeeder] Migrated {migrated} legacy builtin tool config(s) " diff --git a/backend/test_sandbox_config.py b/backend/test_sandbox_config.py new file mode 100644 index 000000000..17a4581c1 --- /dev/null +++ b/backend/test_sandbox_config.py @@ -0,0 +1,19 @@ +import asyncio +from app.services.sandbox.config import SandboxConfig + +config = {"allow_network": True} +fallback = SandboxConfig(allow_network=False) +result = SandboxConfig.from_dict(config, fallback) +print(f"Result 1: {result.allow_network}") + +config2 = {"allow_network": "true"} +result2 = SandboxConfig.from_dict(config2, fallback) +print(f"Result 2: {result2.allow_network}") + +config3 = {"allow_network": "1"} +result3 = SandboxConfig.from_dict(config3, fallback) +print(f"Result 3: {result3.allow_network}") + +config4 = {} +result4 = SandboxConfig.from_dict(config4, fallback) +print(f"Result 4: {result4.allow_network}") diff --git a/backend/tests/test_finish_protocol.py b/backend/tests/test_finish_protocol.py index e36b6dd0d..59dbd2bb9 100644 --- a/backend/tests/test_finish_protocol.py +++ b/backend/tests/test_finish_protocol.py @@ -286,6 +286,67 @@ def test_finish_is_in_always_available_core_tools(): assert "finish" in _ALWAYS_INCLUDE_CORE +@pytest.mark.asyncio +async def test_mid_loop_token_limit_checking(monkeypatch): + from app.services.llm import caller + from app.services.llm.client import LLMResponse + + # Setup FakeStreamClient with several rounds of dummy tool calls + responses = [ + LLMResponse( + content="", + tool_calls=[{"id": f"call_{i}", "type": "function", "function": {"name": "dummy_tool", "arguments": "{}"}}], + usage={"prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150} + ) + for i in range(4) + ] + fake_client = FakeStreamClient(responses) + + configs_called = 0 + async def mock_get_agent_config(agent_id): + nonlocal configs_called + configs_called += 1 + if configs_called > 1: + return 50, "⚠️ Daily token usage limit exceeded" + return 50, None + + monkeypatch.setattr(caller, "_get_agent_config", mock_get_agent_config) + monkeypatch.setattr(caller, "_get_user_name", lambda _user_id: _async_return("Ray")) + monkeypatch.setattr( + "app.services.agent_context.build_agent_context", + lambda *_args, **_kwargs: _async_return(("static", "dynamic")), + ) + monkeypatch.setattr(caller, "get_agent_tools_for_llm", lambda _agent_id: _async_return([ + {"type": "function", "function": {"name": "dummy_tool", "description": "dummy"}} + ])) + monkeypatch.setattr(caller, "execute_tool", lambda *_args, **_kwargs: _async_return("Success")) + monkeypatch.setattr(caller, "create_llm_client", lambda **_kwargs: fake_client) + + token_records = [] + async def mock_record_token_usage(agent_id, usage, **_kwargs): + token_records.append(usage.total_tokens) + + monkeypatch.setattr(caller, "record_token_usage", mock_record_token_usage) + + result = await caller.call_llm( + _model(), + [{"role": "user", "content": "hello"}], + "Agent", + "", + agent_id=uuid.uuid4(), + user_id=uuid.uuid4(), + ) + + # In round_i = 3 (the 4th round), it should trigger the mod-3 check, + # find the limit is exceeded, break the loop and return the limit message. + assert result == "⚠️ Daily token usage limit exceeded" + # Should have called record_token_usage once in the mid-loop check after 3 rounds + # round 0, 1, 2 usage is 150*3 = 450 tokens + assert len(token_records) == 1 + assert token_records[0] == 450 + assert fake_client.closed is True + + async def _async_return(value): return value diff --git a/deploy/docker-compose-multi.yml b/deploy/docker-compose-multi.yml index 3c5c7ea21..0e51cb249 100644 --- a/deploy/docker-compose-multi.yml +++ b/deploy/docker-compose-multi.yml @@ -83,10 +83,12 @@ services: - ./backend/agent_data:/data/agents - /var/run/docker.sock:/var/run/docker.sock - ./ss-nodes.json:/data/ss-nodes.json:ro + privileged: true cap_add: - SYS_ADMIN security_opt: - seccomp=unconfined + - apparmor=unconfined networks: default: aliases: @@ -141,10 +143,12 @@ services: - ./backend/agent_data:/data/agents - /var/run/docker.sock:/var/run/docker.sock - ./ss-nodes.json:/data/ss-nodes.json:ro + privileged: true cap_add: - SYS_ADMIN security_opt: - seccomp=unconfined + - apparmor=unconfined networks: - default depends_on: diff --git a/deploy/docker-compose.yml b/deploy/docker-compose.yml index 92d4eaeb9..d6b730398 100644 --- a/deploy/docker-compose.yml +++ b/deploy/docker-compose.yml @@ -63,10 +63,12 @@ services: - ./backend/agent_data:/data/agents - /var/run/docker.sock:/var/run/docker.sock - ./ss-nodes.json:/data/ss-nodes.json:ro + privileged: true cap_add: - SYS_ADMIN security_opt: - seccomp=unconfined + - apparmor=unconfined networks: - default depends_on: @@ -89,7 +91,7 @@ services: API_UPSTREAM: ${API_UPSTREAM:-backend:8000} MINIO_UPSTREAM: ${MINIO_UPSTREAM:-minio:9000} volumes: - - ./nginx/nginx.conf:/etc/nginx/templates/default.conf.template:ro + - ./deploy/nginx/nginx.conf:/etc/nginx/templates/default.conf.template:ro networks: - default depends_on: diff --git a/frontend/src/components/WorkspaceOperationPanel.tsx b/frontend/src/components/WorkspaceOperationPanel.tsx index 32b9c2da3..8b71d92e2 100644 --- a/frontend/src/components/WorkspaceOperationPanel.tsx +++ b/frontend/src/components/WorkspaceOperationPanel.tsx @@ -1,5 +1,5 @@ import type { MouseEvent as ReactMouseEvent } from 'react'; -import { useEffect, useMemo, useRef, useState } from 'react'; +import { useCallback, useEffect, useMemo, useRef, useState } from 'react'; import { createPortal } from 'react-dom'; import { useTranslation } from 'react-i18next'; import MarkdownRenderer from './MarkdownRenderer'; @@ -410,7 +410,7 @@ export default function WorkspaceOperationPanel({ const [previewState, setPreviewState] = useState<'idle' | 'loading' | 'ready' | 'deleted'>('idle'); const [editing, setEditing] = useState(false); const [saveState, setSaveState] = useState<'idle' | 'saving' | 'saved' | 'error'>('idle'); - const [revisions, setRevisions] = useState([]); + const [revisions, setRevisions] = useState([]); const [fileTree, setFileTree] = useState([]); const [activityOpenLocal, setActivityOpenLocal] = useState(false); const activityOpen = activityOpenProp ?? activityOpenLocal; @@ -490,12 +490,20 @@ export default function WorkspaceOperationPanel({ const loadFileTree = async () => { const loadDir = async (path: string, depth: number): Promise => { - if (depth > 4) return []; - const items = await fileApi.list(agentId, path).catch(() => []); - return Promise.all(items.map(async (item: WorkspaceFileNode) => { - if (!item.is_dir) return item; - return { ...item, children: await loadDir(item.path, depth + 1) }; - })); + if (depth > 8) return []; + const isRoot = path === (treeScope === 'workspace' ? WORKSPACE_ROOT : ''); + const isExpanded = expandedDirs.has(path); + if (!isRoot && !isExpanded) { + return []; + } + const items = await fileApi.list(agentId, path).catch(() => []); + return Promise.all(items.map(async (item: WorkspaceFileNode) => { + if (!item.is_dir) return item; + const children = expandedDirs.has(item.path) + ? await loadDir(item.path, depth + 1) + : []; + return { ...item, children }; + })); }; const roots = await loadDir(treeScope === 'workspace' ? WORKSPACE_ROOT : '', 0); setFileTree(roots); @@ -599,7 +607,7 @@ export default function WorkspaceOperationPanel({ useEffect(() => { loadFileTree(); - }, [agentId, activityKey, liveDraft?.path, treeScope]); + }, [agentId, activityKey, liveDraft?.path, treeScope, expandedDirs]); useEffect(() => { if (!activePath || treeScope !== 'workspace') return; diff --git a/frontend/src/pages/agent-detail/AgentDetailPage.tsx b/frontend/src/pages/agent-detail/AgentDetailPage.tsx index dd2848080..3fa7b8e7f 100644 --- a/frontend/src/pages/agent-detail/AgentDetailPage.tsx +++ b/frontend/src/pages/agent-detail/AgentDetailPage.tsx @@ -2102,36 +2102,7 @@ export default function AgentDetailPage() { } }; - const { data: soulContent } = useQuery({ - queryKey: ['file', id, 'soul.md'], - queryFn: () => fileApi.read(id!, 'soul.md'), - enabled: !!id && activeTab === 'mind', - }); - - const { data: memoryFiles = [] } = useQuery({ - queryKey: ['files', id, 'memory'], - queryFn: () => fileApi.list(id!, 'memory'), - enabled: !!id && activeTab === 'mind', - }); - const [expandedMemory, setExpandedMemory] = useState(null); - const { data: memoryFileContent } = useQuery({ - queryKey: ['file', id, expandedMemory], - queryFn: () => fileApi.read(id!, expandedMemory!), - enabled: !!id && !!expandedMemory, - }); - - const { data: skillFiles = [] } = useQuery({ - queryKey: ['files', id, 'skills'], - queryFn: () => fileApi.list(id!, 'skills'), - enabled: !!id && activeTab === 'skills', - }); - const [workspacePath, setWorkspacePath] = useState('workspace'); - const { data: workspaceFiles = [] } = useQuery({ - queryKey: ['files', id, workspacePath], - queryFn: () => fileApi.list(id!, workspacePath), - enabled: !!id && activeTab === 'workspace', - }); const { data: activityLogs = [] } = useQuery({ queryKey: ['activity', id], @@ -3144,7 +3115,6 @@ export default function AgentDetailPage() { setLivePanelVisible(true); collapseSidebarsForLivePanel(); } - queryClient.invalidateQueries({ queryKey: ['files', id, workspacePath] }); } upsertToolCallMessage({ role: 'tool_call', @@ -4073,17 +4043,7 @@ export default function AgentDetailPage() { retry: false, }); - const { data: channelConfig } = useQuery({ - queryKey: ['channel', id], - queryFn: () => channelApi.get(id!), - enabled: !!id && activeTab === 'settings', - }); - const { data: webhookData } = useQuery({ - queryKey: ['webhook-url', id], - queryFn: () => channelApi.webhookUrl(id!), - enabled: !!id && activeTab === 'settings', - }); const { data: llmModels = [], isLoading: llmModelsLoading } = useQuery({ queryKey: ['llm-models'], @@ -4142,17 +4102,7 @@ export default function AgentDetailPage() { enabled: !!id && activeTab === 'settings', }); - // ─── Soul editor ───────────────────────────────────── - const [soulEditing, setSoulEditing] = useState(false); - const [soulDraft, setSoulDraft] = useState(''); - const saveSoul = useMutation({ - mutationFn: () => fileApi.write(id!, 'soul.md', soulDraft), - onSuccess: () => { - queryClient.invalidateQueries({ queryKey: ['file', id, 'soul.md'] }); - setSoulEditing(false); - }, - }); const CopyBtn = ({ url }: { url: string }) => ( @@ -4165,9 +4115,6 @@ export default function AgentDetailPage() { ); // ─── File viewer ───────────────────────────────────── - const [viewingFile, setViewingFile] = useState(null); - const [fileEditing, setFileEditing] = useState(false); - const [fileDraft, setFileDraft] = useState(''); const [promptModal, setPromptModal] = useState<{ title: string; placeholder: string; action: string } | null>(null); const [deleteConfirm, setDeleteConfirm] = useState<{ path: string; name: string; isDir: boolean } | null>(null); const [uploadToast, setUploadToast] = useState<{ message: string; type: 'success' | 'error' } | null>(null); @@ -4183,31 +4130,13 @@ export default function AgentDetailPage() { setUploadToast({ message, type }); setTimeout(() => setUploadToast(null), 3000); }; - const { data: fileContent } = useQuery({ - queryKey: ['file-content', id, viewingFile], - queryFn: () => fileApi.read(id!, viewingFile!), - enabled: !!viewingFile, - }); // ─── Task creation & detail ─────────────────────────────────── const [showTaskForm, setShowTaskForm] = useState(false); const [showDeleteConfirm, setShowDeleteConfirm] = useState(false); const [taskForm, setTaskForm] = useState({ title: '', description: '', priority: 'medium', type: 'todo' as 'todo' | 'supervision', supervision_target_name: '', remind_schedule: '', due_date: '' }); const [selectedTaskId, setSelectedTaskId] = useState(null); - const { data: taskLogs = [] } = useQuery({ - queryKey: ['task-logs', id, selectedTaskId], - queryFn: () => taskApi.getLogs(id!, selectedTaskId!), - enabled: !!id && !!selectedTaskId, - refetchInterval: selectedTaskId ? 3000 : false, - }); - // Schedule execution history (selectedTaskId format: 'sched-{uuid}') - const expandedScheduleId = selectedTaskId?.startsWith('sched-') ? selectedTaskId.slice(6) : null; - const { data: scheduleHistoryData } = useQuery({ - queryKey: ['schedule-history', id, expandedScheduleId], - queryFn: () => scheduleApi.history(id!, expandedScheduleId!), - enabled: !!id && !!expandedScheduleId, - }); const createTask = useMutation({ mutationFn: (data: any) => { const cleaned = { ...data }; @@ -6171,6 +6100,17 @@ export default function AgentDetailPage() { onTouchMoveCapture={handleChatTouchMoveCapture} style={{ flex: 1, overflowY: 'auto', padding: '12px 16px' }} > + {chatHistoryLoadingMore && ( +
+
+ {i18n.language?.startsWith('zh') ? '正在加载历史消息...' : 'Loading history...'} +
+ )} + {!chatHistoryHasMore && chatMessages.length > 0 && ( +
+ {i18n.language?.startsWith('zh') ? '已加载全部历史消息' : 'All history loaded'} +
+ )} {chatMessages.length === 0 && !showNoModelState && (
{activeSession?.title || t('agent.chat.startChat')}
@@ -6780,20 +6720,11 @@ export default function AgentDetailPage() { setPromptModal(null); if (action === 'newFolder') { await fileApi.write(id!, `${workspacePath}/${value}/.gitkeep`, ''); - queryClient.invalidateQueries({ queryKey: ['files', id, workspacePath] }); } else if (action === 'newFile') { await fileApi.write(id!, `${workspacePath}/${value}`, ''); - queryClient.invalidateQueries({ queryKey: ['files', id, workspacePath] }); - setViewingFile(`${workspacePath}/${value}`); - setFileEditing(true); - setFileDraft(''); } else if (action === 'newSkill') { const template = `---\nname: ${value}\ndescription: Describe what this skill does\n---\n\n# ${value}\n\n## Overview\nDescribe the purpose and when to use this skill.\n\n## Process\n1. Step one\n2. Step two\n\n## Output Format\nDescribe the expected output format.\n`; await fileApi.write(id!, `skills/${value}/SKILL.md`, template); - queryClient.invalidateQueries({ queryKey: ['files', id, 'skills'] }); - setViewingFile(`skills/${value}/SKILL.md`); - setFileEditing(true); - setFileDraft(template); } }} /> @@ -6811,9 +6742,6 @@ export default function AgentDetailPage() { if (path) { try { await fileApi.delete(id!, path); - setViewingFile(null); - setFileEditing(false); - queryClient.invalidateQueries({ queryKey: ['files', id, workspacePath] }); showToast(t('common.delete')); } catch (err: any) { showToast(t('agent.upload.failed'), 'error'); From ac86b669641ea961e667e39ac34f46843e6b3774 Mon Sep 17 00:00:00 2001 From: yaojin3616 Date: Wed, 10 Jun 2026 15:41:21 +0800 Subject: [PATCH 2/2] Fix #636: optimize bwrap sandbox process tree cleanup on timeout Also fixes an issue where Gemini 2.0/3.0 models would fail with 'Function call is missing a thought_signature in functionCall parts' by preserving _gemini_extra in canonical tool calls. --- backend/app/services/llm/caller.py | 7 +++++-- backend/app/services/llm/client.py | 21 +++++++++++++++---- .../sandbox/local/subprocess_backend.py | 8 +++++-- 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/backend/app/services/llm/caller.py b/backend/app/services/llm/caller.py index 440e9f7d0..867b153e5 100644 --- a/backend/app/services/llm/caller.py +++ b/backend/app/services/llm/caller.py @@ -93,14 +93,17 @@ def _sanitize_tool_calls_for_context(tool_calls: list[dict]) -> tuple[list[dict] "Retry the tool call with `function.arguments` as one valid JSON object string." ) - sanitized.append({ + new_tc = { "id": tc.get("id", ""), "type": tc.get("type") or "function", "function": { "name": tool_name, "arguments": args_str, }, - }) + } + if "_gemini_extra" in tc: + new_tc["_gemini_extra"] = tc["_gemini_extra"] + sanitized.append(new_tc) return sanitized, None diff --git a/backend/app/services/llm/client.py b/backend/app/services/llm/client.py index 1f9af784b..6d263a83d 100644 --- a/backend/app/services/llm/client.py +++ b/backend/app/services/llm/client.py @@ -1169,11 +1169,16 @@ def _build_payload( parsed_args = args else: parsed_args = {} + + func_call_dict: dict[str, Any] = { + "name": fn.get("name", ""), + "args": parsed_args, + } + if "_gemini_extra" in tc: + func_call_dict.update(tc["_gemini_extra"]) + parts.append({ - "functionCall": { - "name": fn.get("name", ""), - "args": parsed_args, - } + "functionCall": func_call_dict }) if parts: contents.append({"role": "model", "parts": parts}) @@ -1282,6 +1287,9 @@ def _parse_response_data(self, data: dict[str, Any]) -> LLMResponse: if dedup_key in seen_tool_calls: continue seen_tool_calls.add(dedup_key) + + extra = {k: v for k, v in function_call.items() if k not in ["name", "args"]} + tool_calls.append({ "id": f"call_{len(tool_calls) + 1}", "type": "function", @@ -1289,6 +1297,7 @@ def _parse_response_data(self, data: dict[str, Any]) -> LLMResponse: "name": name, "arguments": args_str, }, + "_gemini_extra": extra, }) usage = self._normalize_usage(data.get("usageMetadata")) @@ -1429,6 +1438,9 @@ async def stream( if dedup_key in seen_tool_calls: continue seen_tool_calls.add(dedup_key) + + extra = {k: v for k, v in function_call.items() if k not in ["name", "args"]} + tool_calls.append({ "id": f"call_{len(tool_calls) + 1}", "type": "function", @@ -1436,6 +1448,7 @@ async def stream( "name": name, "arguments": args_str, }, + "_gemini_extra": extra, }) except (httpx.ConnectError, httpx.ReadError, httpx.ConnectTimeout) as e: diff --git a/backend/app/services/sandbox/local/subprocess_backend.py b/backend/app/services/sandbox/local/subprocess_backend.py index b3724e211..a3bf9d6ce 100644 --- a/backend/app/services/sandbox/local/subprocess_backend.py +++ b/backend/app/services/sandbox/local/subprocess_backend.py @@ -3,6 +3,7 @@ import asyncio import os import shutil +import signal import time from pathlib import Path @@ -188,6 +189,7 @@ def _build_exec_kwargs(self, work_path: Path, timeout: int, use_preexec: bool = "stdout": asyncio.subprocess.PIPE, "stderr": asyncio.subprocess.PIPE, "env": self._build_safe_env(work_path), + "start_new_session": True, } if use_preexec: kwargs["preexec_fn"] = self._build_preexec_fn(work_path, timeout) @@ -196,7 +198,6 @@ def _build_exec_kwargs(self, work_path: Path, timeout: int, use_preexec: bool = def _build_preexec_fn(self, work_path: Path, timeout: int): def _preexec(): os.chdir(work_path) - os.setsid() os.umask(0o077) try: @@ -455,7 +456,10 @@ async def read_stream(stream, out, label="stdout"): try: await asyncio.wait_for(proc.wait(), timeout=timeout) except asyncio.TimeoutError: - proc.kill() + try: + os.killpg(os.getpgid(proc.pid), signal.SIGTERM) + except Exception: + proc.kill() is_timeout = True await asyncio.gather(task1, task2)