Skip to content

Commit 782bb4e

Browse files
authored
fix(core): fix browser agent UX issues and improve E2E test reliability (#24312)
1 parent 94f9480 commit 782bb4e

5 files changed

Lines changed: 150 additions & 134 deletions

File tree

integration-tests/browser-policy.test.ts

Lines changed: 111 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,13 @@ import { dirname, join } from 'node:path';
1010
import { fileURLToPath } from 'node:url';
1111
import { execSync } from 'node:child_process';
1212
import { existsSync, writeFileSync, readFileSync, mkdirSync } from 'node:fs';
13+
import { env } from 'node:process';
1314
import stripAnsi from 'strip-ansi';
1415

16+
// Browser agent Chrome DevTools MCP connection is flaky in Docker sandbox.
17+
// See: https://github.com/google-gemini/gemini-cli/issues/24382
18+
const isDockerSandbox = env['GEMINI_SANDBOX'] === 'docker';
19+
1520
const __filename = fileURLToPath(import.meta.url);
1621
const __dirname = dirname(__filename);
1722

@@ -59,122 +64,128 @@ describe.skipIf(!chromeAvailable)('browser-policy', () => {
5964
await rig.cleanup();
6065
});
6166

62-
it('should skip confirmation when "Allow all server tools for this session" is chosen', async () => {
63-
rig.setup('browser-policy-skip-confirmation', {
64-
fakeResponsesPath: join(__dirname, 'browser-policy.responses'),
65-
settings: {
66-
agents: {
67-
overrides: {
68-
browser_agent: {
69-
enabled: true,
67+
it.skipIf(isDockerSandbox)(
68+
'should skip confirmation when "Allow all server tools for this session" is chosen',
69+
async () => {
70+
rig.setup('browser-policy-skip-confirmation', {
71+
fakeResponsesPath: join(__dirname, 'browser-policy.responses'),
72+
settings: {
73+
agents: {
74+
overrides: {
75+
browser_agent: {
76+
enabled: true,
77+
},
78+
},
79+
browser: {
80+
headless: true,
81+
sessionMode: 'isolated',
82+
allowedDomains: ['example.com'],
7083
},
71-
},
72-
browser: {
73-
headless: true,
74-
sessionMode: 'isolated',
75-
allowedDomains: ['example.com'],
7684
},
7785
},
78-
},
79-
});
86+
});
87+
88+
// Manually trust the folder to avoid the dialog and enable option 3
89+
const geminiDir = join(rig.homeDir!, '.gemini');
90+
mkdirSync(geminiDir, { recursive: true });
91+
92+
// Write to trustedFolders.json
93+
const trustedFoldersPath = join(geminiDir, 'trustedFolders.json');
94+
const trustedFolders = {
95+
[rig.testDir!]: 'TRUST_FOLDER',
96+
};
97+
writeFileSync(
98+
trustedFoldersPath,
99+
JSON.stringify(trustedFolders, null, 2),
100+
);
80101

81-
// Manually trust the folder to avoid the dialog and enable option 3
82-
const geminiDir = join(rig.homeDir!, '.gemini');
83-
mkdirSync(geminiDir, { recursive: true });
84-
85-
// Write to trustedFolders.json
86-
const trustedFoldersPath = join(geminiDir, 'trustedFolders.json');
87-
const trustedFolders = {
88-
[rig.testDir!]: 'TRUST_FOLDER',
89-
};
90-
writeFileSync(trustedFoldersPath, JSON.stringify(trustedFolders, null, 2));
91-
92-
// Force confirmation for browser agent.
93-
// NOTE: We don't force confirm browser tools here because "Allow all server tools"
94-
// adds a rule with ALWAYS_ALLOW_PRIORITY (3.9x) which would be overshadowed by
95-
// a rule in the user tier (4.x) like the one from this TOML.
96-
// By removing the explicit mcp rule, the first MCP tool will still prompt
97-
// due to default approvalMode = 'default', and then "Allow all" will correctly
98-
// bypass subsequent tools.
99-
const policyFile = join(rig.testDir!, 'force-confirm.toml');
100-
writeFileSync(
101-
policyFile,
102-
`
102+
// Force confirmation for browser agent.
103+
// NOTE: We don't force confirm browser tools here because "Allow all server tools"
104+
// adds a rule with ALWAYS_ALLOW_PRIORITY (3.9x) which would be overshadowed by
105+
// a rule in the user tier (4.x) like the one from this TOML.
106+
// By removing the explicit mcp rule, the first MCP tool will still prompt
107+
// due to default approvalMode = 'default', and then "Allow all" will correctly
108+
// bypass subsequent tools.
109+
const policyFile = join(rig.testDir!, 'force-confirm.toml');
110+
writeFileSync(
111+
policyFile,
112+
`
103113
[[rule]]
104114
name = "Force confirm browser_agent"
105115
toolName = "browser_agent"
106116
decision = "ask_user"
107117
priority = 200
108118
`,
109-
);
110-
111-
// Update settings.json in both project and home directories to point to the policy file
112-
for (const baseDir of [rig.testDir!, rig.homeDir!]) {
113-
const settingsPath = join(baseDir, '.gemini', 'settings.json');
114-
if (existsSync(settingsPath)) {
115-
const settings = JSON.parse(readFileSync(settingsPath, 'utf-8'));
116-
settings.policyPaths = [policyFile];
117-
// Ensure folder trust is enabled
118-
settings.security = settings.security || {};
119-
settings.security.folderTrust = settings.security.folderTrust || {};
120-
settings.security.folderTrust.enabled = true;
121-
writeFileSync(settingsPath, JSON.stringify(settings, null, 2));
119+
);
120+
121+
// Update settings.json in both project and home directories to point to the policy file
122+
for (const baseDir of [rig.testDir!, rig.homeDir!]) {
123+
const settingsPath = join(baseDir, '.gemini', 'settings.json');
124+
if (existsSync(settingsPath)) {
125+
const settings = JSON.parse(readFileSync(settingsPath, 'utf-8'));
126+
settings.policyPaths = [policyFile];
127+
// Ensure folder trust is enabled
128+
settings.security = settings.security || {};
129+
settings.security.folderTrust = settings.security.folderTrust || {};
130+
settings.security.folderTrust.enabled = true;
131+
writeFileSync(settingsPath, JSON.stringify(settings, null, 2));
132+
}
122133
}
123-
}
124134

125-
const run = await rig.runInteractive({
126-
approvalMode: 'default',
127-
env: {
128-
GEMINI_CLI_INTEGRATION_TEST: 'true',
129-
},
130-
});
135+
const run = await rig.runInteractive({
136+
approvalMode: 'default',
137+
env: {
138+
GEMINI_CLI_INTEGRATION_TEST: 'true',
139+
},
140+
});
131141

132-
await run.sendKeys(
133-
'Open https://example.com and check if there is a heading\r',
134-
);
135-
await run.sendKeys('\r');
136-
137-
// Handle confirmations.
138-
// 1. Initial browser_agent delegation (likely only 3 options, so use option 1: Allow once)
139-
await poll(
140-
() => stripAnsi(run.output).toLowerCase().includes('action required'),
141-
60000,
142-
1000,
143-
);
144-
await run.sendKeys('1\r');
145-
await new Promise((r) => setTimeout(r, 2000));
146-
147-
// Handle privacy notice
148-
await poll(
149-
() => stripAnsi(run.output).toLowerCase().includes('privacy notice'),
150-
5000,
151-
100,
152-
);
153-
await run.sendKeys('1\r');
154-
await new Promise((r) => setTimeout(r, 5000));
155-
156-
// new_page (MCP tool, should have 4 options, use option 3: Allow all server tools)
157-
await poll(
158-
() => {
159-
const stripped = stripAnsi(run.output).toLowerCase();
160-
return (
161-
stripped.includes('new_page') &&
162-
stripped.includes('allow all server tools for this session')
163-
);
164-
},
165-
60000,
166-
1000,
167-
);
142+
await run.sendKeys(
143+
'Open https://example.com and check if there is a heading\r',
144+
);
145+
await run.sendKeys('\r');
146+
147+
// Handle confirmations.
148+
// 1. Initial browser_agent delegation (likely only 3 options, so use option 1: Allow once)
149+
await poll(
150+
() => stripAnsi(run.output).toLowerCase().includes('action required'),
151+
60000,
152+
1000,
153+
);
154+
await run.sendKeys('1\r');
155+
await new Promise((r) => setTimeout(r, 2000));
156+
157+
// Handle privacy notice
158+
await poll(
159+
() => stripAnsi(run.output).toLowerCase().includes('privacy notice'),
160+
5000,
161+
100,
162+
);
163+
await run.sendKeys('1\r');
164+
await new Promise((r) => setTimeout(r, 5000));
165+
166+
// new_page (MCP tool, should have 4 options, use option 3: Allow all server tools)
167+
await poll(
168+
() => {
169+
const stripped = stripAnsi(run.output).toLowerCase();
170+
return (
171+
stripped.includes('new_page') &&
172+
stripped.includes('allow all server tools for this session')
173+
);
174+
},
175+
60000,
176+
1000,
177+
);
168178

169-
// Select "Allow all server tools for this session" (option 3)
170-
await run.sendKeys('3\r');
171-
await new Promise((r) => setTimeout(r, 30000));
179+
// Select "Allow all server tools for this session" (option 3)
180+
await run.sendKeys('3\r');
181+
await new Promise((r) => setTimeout(r, 30000));
172182

173-
const output = stripAnsi(run.output).toLowerCase();
183+
const output = stripAnsi(run.output).toLowerCase();
174184

175-
expect(output).toContain('browser_agent');
176-
expect(output).toContain('completed successfully');
177-
});
185+
expect(output).toContain('browser_agent');
186+
expect(output).toContain('completed successfully');
187+
},
188+
);
178189

179190
it('should show the visible warning when browser agent starts in existing session mode', async () => {
180191
rig.setup('browser-session-warning', {

integration-tests/file-system.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ describe('file-system', () => {
121121

122122
const result = await rig.run({
123123
args: `write "hello" to "${fileName}" and then stop. Do not perform any other actions.`,
124+
timeout: 600000, // 10 min — real LLM can be slow in Docker sandbox
124125
});
125126

126127
const foundToolCall = await rig.waitForToolCall('write_file');

packages/core/src/agents/browser/browserAgentFactory.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ describe('browserAgentFactory', () => {
166166
expect(browserManager).toBeDefined();
167167
});
168168

169-
it('should call printOutput when provided', async () => {
169+
it('should not call printOutput for internal setup messages', async () => {
170170
const printOutput = vi.fn();
171171

172172
await createBrowserAgentDefinition(
@@ -175,7 +175,7 @@ describe('browserAgentFactory', () => {
175175
printOutput,
176176
);
177177

178-
expect(printOutput).toHaveBeenCalled();
178+
expect(printOutput).not.toHaveBeenCalled();
179179
});
180180

181181
it('should create definition with correct structure', async () => {

packages/core/src/agents/browser/browserAgentFactory.ts

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ import {
5353
export async function createBrowserAgentDefinition(
5454
config: Config,
5555
messageBus: MessageBus,
56-
printOutput?: (msg: string) => void,
56+
_printOutput?: (msg: string) => void,
5757
): Promise<{
5858
definition: LocalAgentDefinition<typeof BrowserTaskResultSchema>;
5959
browserManager: BrowserManager;
@@ -66,23 +66,17 @@ export async function createBrowserAgentDefinition(
6666
const browserManager = BrowserManager.getInstance(config);
6767
await browserManager.ensureConnection();
6868

69-
if (printOutput) {
70-
printOutput('Browser connected with isolated MCP client.');
71-
}
69+
debugLogger.log('Browser connected with isolated MCP client.');
7270

7371
// Determine if input blocker should be active (non-headless + enabled)
7472
const shouldDisableInput = config.shouldDisableBrowserUserInput();
7573
// Inject automation overlay and input blocker if not in headless mode
7674
const browserConfig = config.getBrowserAgentConfig();
7775
if (!browserConfig?.customConfig?.headless) {
78-
if (printOutput) {
79-
printOutput('Injecting automation overlay...');
80-
}
76+
debugLogger.log('Injecting automation overlay...');
8177
await injectAutomationOverlay(browserManager);
8278
if (shouldDisableInput) {
83-
if (printOutput) {
84-
printOutput('Injecting input blocker...');
85-
}
79+
debugLogger.log('Injecting input blocker...');
8680
await injectInputBlocker(browserManager);
8781
}
8882
}

0 commit comments

Comments
 (0)