fix(core): fix browser agent UX issues and improve E2E test reliability (#24312)

gsquared94 · web-flow · commit 782bb4e4bd02 · 2026-03-31T23:00:16.000Z
diff --git a/integration-tests/browser-policy.test.ts b/integration-tests/browser-policy.test.ts
@@ -10,8 +10,13 @@ import { dirname, join } from 'node:path';
 import { fileURLToPath } from 'node:url';
 import { execSync } from 'node:child_process';
 import { existsSync, writeFileSync, readFileSync, mkdirSync } from 'node:fs';
+import { env } from 'node:process';
 import stripAnsi from 'strip-ansi';
 
+// Browser agent Chrome DevTools MCP connection is flaky in Docker sandbox.
+// See: https://github.com/google-gemini/gemini-cli/issues/24382
+const isDockerSandbox = env['GEMINI_SANDBOX'] === 'docker';
+
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = dirname(__filename);
 
@@ -59,122 +64,128 @@ describe.skipIf(!chromeAvailable)('browser-policy', () => {
     await rig.cleanup();
   });
 
-  it('should skip confirmation when "Allow all server tools for this session" is chosen', async () => {
-    rig.setup('browser-policy-skip-confirmation', {
-      fakeResponsesPath: join(__dirname, 'browser-policy.responses'),
-      settings: {
-        agents: {
-          overrides: {
-            browser_agent: {
-              enabled: true,
+  it.skipIf(isDockerSandbox)(
+    'should skip confirmation when "Allow all server tools for this session" is chosen',
+    async () => {
+      rig.setup('browser-policy-skip-confirmation', {
+        fakeResponsesPath: join(__dirname, 'browser-policy.responses'),
+        settings: {
+          agents: {
+            overrides: {
+              browser_agent: {
+                enabled: true,
+              },
+            },
+            browser: {
+              headless: true,
+              sessionMode: 'isolated',
+              allowedDomains: ['example.com'],
             },
-          },
-          browser: {
-            headless: true,
-            sessionMode: 'isolated',
-            allowedDomains: ['example.com'],
           },
         },
-      },
-    });
+      });
+
+      // Manually trust the folder to avoid the dialog and enable option 3
+      const geminiDir = join(rig.homeDir!, '.gemini');
+      mkdirSync(geminiDir, { recursive: true });
+
+      // Write to trustedFolders.json
+      const trustedFoldersPath = join(geminiDir, 'trustedFolders.json');
+      const trustedFolders = {
+        [rig.testDir!]: 'TRUST_FOLDER',
+      };
+      writeFileSync(
+        trustedFoldersPath,
+        JSON.stringify(trustedFolders, null, 2),
+      );
 
-    // Manually trust the folder to avoid the dialog and enable option 3
-    const geminiDir = join(rig.homeDir!, '.gemini');
-    mkdirSync(geminiDir, { recursive: true });
-
-    // Write to trustedFolders.json
-    const trustedFoldersPath = join(geminiDir, 'trustedFolders.json');
-    const trustedFolders = {
-      [rig.testDir!]: 'TRUST_FOLDER',
-    };
-    writeFileSync(trustedFoldersPath, JSON.stringify(trustedFolders, null, 2));
-
-    // Force confirmation for browser agent.
-    // NOTE: We don't force confirm browser tools here because "Allow all server tools"
-    // adds a rule with ALWAYS_ALLOW_PRIORITY (3.9x) which would be overshadowed by
-    // a rule in the user tier (4.x) like the one from this TOML.
-    // By removing the explicit mcp rule, the first MCP tool will still prompt
-    // due to default approvalMode = 'default', and then "Allow all" will correctly
-    // bypass subsequent tools.
-    const policyFile = join(rig.testDir!, 'force-confirm.toml');
-    writeFileSync(
-      policyFile,
-      `
+      // Force confirmation for browser agent.
+      // NOTE: We don't force confirm browser tools here because "Allow all server tools"
+      // adds a rule with ALWAYS_ALLOW_PRIORITY (3.9x) which would be overshadowed by
+      // a rule in the user tier (4.x) like the one from this TOML.
+      // By removing the explicit mcp rule, the first MCP tool will still prompt
+      // due to default approvalMode = 'default', and then "Allow all" will correctly
+      // bypass subsequent tools.
+      const policyFile = join(rig.testDir!, 'force-confirm.toml');
+      writeFileSync(
+        policyFile,
+        `
 [[rule]]
 name = "Force confirm browser_agent"
 toolName = "browser_agent"
 decision = "ask_user"
 priority = 200
 `,
-    );
-
-    // Update settings.json in both project and home directories to point to the policy file
-    for (const baseDir of [rig.testDir!, rig.homeDir!]) {
-      const settingsPath = join(baseDir, '.gemini', 'settings.json');
-      if (existsSync(settingsPath)) {
-        const settings = JSON.parse(readFileSync(settingsPath, 'utf-8'));
-        settings.policyPaths = [policyFile];
-        // Ensure folder trust is enabled
-        settings.security = settings.security || {};
-        settings.security.folderTrust = settings.security.folderTrust || {};
-        settings.security.folderTrust.enabled = true;
-        writeFileSync(settingsPath, JSON.stringify(settings, null, 2));
+      );
+
+      // Update settings.json in both project and home directories to point to the policy file
+      for (const baseDir of [rig.testDir!, rig.homeDir!]) {
+        const settingsPath = join(baseDir, '.gemini', 'settings.json');
+        if (existsSync(settingsPath)) {
+          const settings = JSON.parse(readFileSync(settingsPath, 'utf-8'));
+          settings.policyPaths = [policyFile];
+          // Ensure folder trust is enabled
+          settings.security = settings.security || {};
+          settings.security.folderTrust = settings.security.folderTrust || {};
+          settings.security.folderTrust.enabled = true;
+          writeFileSync(settingsPath, JSON.stringify(settings, null, 2));
+        }
       }
-    }
 
-    const run = await rig.runInteractive({
-      approvalMode: 'default',
-      env: {
-        GEMINI_CLI_INTEGRATION_TEST: 'true',
-      },
-    });
+      const run = await rig.runInteractive({
+        approvalMode: 'default',
+        env: {
+          GEMINI_CLI_INTEGRATION_TEST: 'true',
+        },
+      });
 
-    await run.sendKeys(
-      'Open https://example.com and check if there is a heading\r',
-    );
-    await run.sendKeys('\r');
-
-    // Handle confirmations.
-    // 1. Initial browser_agent delegation (likely only 3 options, so use option 1: Allow once)
-    await poll(
-      () => stripAnsi(run.output).toLowerCase().includes('action required'),
-      60000,
-      1000,
-    );
-    await run.sendKeys('1\r');
-    await new Promise((r) => setTimeout(r, 2000));
-
-    // Handle privacy notice
-    await poll(
-      () => stripAnsi(run.output).toLowerCase().includes('privacy notice'),
-      5000,
-      100,
-    );
-    await run.sendKeys('1\r');
-    await new Promise((r) => setTimeout(r, 5000));
-
-    // new_page (MCP tool, should have 4 options, use option 3: Allow all server tools)
-    await poll(
-      () => {
-        const stripped = stripAnsi(run.output).toLowerCase();
-        return (
-          stripped.includes('new_page') &&
-          stripped.includes('allow all server tools for this session')
-        );
-      },
-      60000,
-      1000,
-    );
+      await run.sendKeys(
+        'Open https://example.com and check if there is a heading\r',
+      );
+      await run.sendKeys('\r');
+
+      // Handle confirmations.
+      // 1. Initial browser_agent delegation (likely only 3 options, so use option 1: Allow once)
+      await poll(
+        () => stripAnsi(run.output).toLowerCase().includes('action required'),
+        60000,
+        1000,
+      );
+      await run.sendKeys('1\r');
+      await new Promise((r) => setTimeout(r, 2000));
+
+      // Handle privacy notice
+      await poll(
+        () => stripAnsi(run.output).toLowerCase().includes('privacy notice'),
+        5000,
+        100,
+      );
+      await run.sendKeys('1\r');
+      await new Promise((r) => setTimeout(r, 5000));
+
+      // new_page (MCP tool, should have 4 options, use option 3: Allow all server tools)
+      await poll(
+        () => {
+          const stripped = stripAnsi(run.output).toLowerCase();
+          return (
+            stripped.includes('new_page') &&
+            stripped.includes('allow all server tools for this session')
+          );
+        },
+        60000,
+        1000,
+      );
 
-    // Select "Allow all server tools for this session" (option 3)
-    await run.sendKeys('3\r');
-    await new Promise((r) => setTimeout(r, 30000));
+      // Select "Allow all server tools for this session" (option 3)
+      await run.sendKeys('3\r');
+      await new Promise((r) => setTimeout(r, 30000));
 
-    const output = stripAnsi(run.output).toLowerCase();
+      const output = stripAnsi(run.output).toLowerCase();
 
-    expect(output).toContain('browser_agent');
-    expect(output).toContain('completed successfully');
-  });
+      expect(output).toContain('browser_agent');
+      expect(output).toContain('completed successfully');
+    },
+  );
 
   it('should show the visible warning when browser agent starts in existing session mode', async () => {
     rig.setup('browser-session-warning', {
diff --git a/integration-tests/file-system.test.ts b/integration-tests/file-system.test.ts
@@ -121,6 +121,7 @@ describe('file-system', () => {
 
     const result = await rig.run({
       args: `write "hello" to "${fileName}" and then stop. Do not perform any other actions.`,
+      timeout: 600000, // 10 min — real LLM can be slow in Docker sandbox
     });
 
     const foundToolCall = await rig.waitForToolCall('write_file');
diff --git a/packages/core/src/agents/browser/browserAgentFactory.test.ts b/packages/core/src/agents/browser/browserAgentFactory.test.ts
@@ -166,7 +166,7 @@ describe('browserAgentFactory', () => {
       expect(browserManager).toBeDefined();
     });
 
-    it('should call printOutput when provided', async () => {
+    it('should not call printOutput for internal setup messages', async () => {
       const printOutput = vi.fn();
 
       await createBrowserAgentDefinition(
@@ -175,7 +175,7 @@ describe('browserAgentFactory', () => {
         printOutput,
       );
 
-      expect(printOutput).toHaveBeenCalled();
+      expect(printOutput).not.toHaveBeenCalled();
     });
 
     it('should create definition with correct structure', async () => {
diff --git a/packages/core/src/agents/browser/browserAgentFactory.ts b/packages/core/src/agents/browser/browserAgentFactory.ts
@@ -53,7 +53,7 @@ import {
 export async function createBrowserAgentDefinition(
   config: Config,
   messageBus: MessageBus,
-  printOutput?: (msg: string) => void,
+  _printOutput?: (msg: string) => void,
 ): Promise<{
   definition: LocalAgentDefinition<typeof BrowserTaskResultSchema>;
   browserManager: BrowserManager;
@@ -66,23 +66,17 @@ export async function createBrowserAgentDefinition(
   const browserManager = BrowserManager.getInstance(config);
   await browserManager.ensureConnection();
 
-  if (printOutput) {
-    printOutput('Browser connected with isolated MCP client.');
-  }
+  debugLogger.log('Browser connected with isolated MCP client.');
 
   // Determine if input blocker should be active (non-headless + enabled)
   const shouldDisableInput = config.shouldDisableBrowserUserInput();
   // Inject automation overlay and input blocker if not in headless mode
   const browserConfig = config.getBrowserAgentConfig();
   if (!browserConfig?.customConfig?.headless) {
-    if (printOutput) {
-      printOutput('Injecting automation overlay...');
-    }
+    debugLogger.log('Injecting automation overlay...');
     await injectAutomationOverlay(browserManager);
     if (shouldDisableInput) {
-      if (printOutput) {
-        printOutput('Injecting input blocker...');
-      }
+      debugLogger.log('Injecting input blocker...');
       await injectInputBlocker(browserManager);
     }
   }
diff --git a/packages/core/src/agents/browser/browserAgentInvocation.ts b/packages/core/src/agents/browser/browserAgentInvocation.ts