Skip to content

Commit dbf206f

Browse files
feat(e2e): Enable E2E tests - 39 passing tests (#10720)
Co-authored-by: roomote[bot] <219738659+roomote[bot]@users.noreply.github.com>
1 parent b04597f commit dbf206f

10 files changed

Lines changed: 1097 additions & 2042 deletions

apps/vscode-e2e/README.md

Lines changed: 405 additions & 0 deletions
Large diffs are not rendered by default.

apps/vscode-e2e/src/suite/index.ts

Lines changed: 100 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,18 @@ import type { RooCodeAPI } from "@roo-code/types"
77

88
import { waitFor } from "./utils"
99

10+
/**
11+
* Models to test against - high-performing models from different providers
12+
*/
13+
const MODELS_TO_TEST = ["openai/gpt-5.2", "anthropic/claude-sonnet-4.5", "google/gemini-3-pro-preview"]
14+
15+
interface ModelTestResult {
16+
model: string
17+
failures: number
18+
passes: number
19+
duration: number
20+
}
21+
1022
export async function run() {
1123
const extension = vscode.extensions.getExtension<RooCodeAPI>("RooVeterinaryInc.roo-cline")
1224

@@ -16,28 +28,18 @@ export async function run() {
1628

1729
const api = extension.isActive ? extension.exports : await extension.activate()
1830

31+
// Initial configuration with first model (will be reconfigured per model)
1932
await api.setConfiguration({
2033
apiProvider: "openrouter" as const,
2134
openRouterApiKey: process.env.OPENROUTER_API_KEY!,
22-
openRouterModelId: "openai/gpt-4.1",
35+
openRouterModelId: MODELS_TO_TEST[0],
2336
})
2437

2538
await vscode.commands.executeCommand("roo-cline.SidebarProvider.focus")
2639
await waitFor(() => api.isReady())
2740

2841
globalThis.api = api
2942

30-
const mochaOptions: Mocha.MochaOptions = {
31-
ui: "tdd",
32-
timeout: 20 * 60 * 1_000, // 20m
33-
}
34-
35-
if (process.env.TEST_GREP) {
36-
mochaOptions.grep = process.env.TEST_GREP
37-
console.log(`Running tests matching pattern: ${process.env.TEST_GREP}`)
38-
}
39-
40-
const mocha = new Mocha(mochaOptions)
4143
const cwd = path.resolve(__dirname, "..")
4244

4345
let testFiles: string[]
@@ -57,9 +59,91 @@ export async function run() {
5759
throw new Error(`No test files found matching criteria: ${process.env.TEST_FILE || "all tests"}`)
5860
}
5961

60-
testFiles.forEach((testFile) => mocha.addFile(path.resolve(cwd, testFile)))
62+
const results: ModelTestResult[] = []
63+
let totalFailures = 0
64+
65+
// Run tests for each model sequentially
66+
for (const model of MODELS_TO_TEST) {
67+
console.log(`\n${"=".repeat(60)}`)
68+
console.log(` TESTING WITH MODEL: ${model}`)
69+
console.log(`${"=".repeat(60)}\n`)
70+
71+
// Reconfigure API for this model
72+
await api.setConfiguration({
73+
apiProvider: "openrouter" as const,
74+
openRouterApiKey: process.env.OPENROUTER_API_KEY!,
75+
openRouterModelId: model,
76+
})
77+
78+
// Wait for API to be ready with new configuration
79+
await waitFor(() => api.isReady())
80+
81+
const startTime = Date.now()
82+
83+
const mochaOptions: Mocha.MochaOptions = {
84+
ui: "tdd",
85+
timeout: 20 * 60 * 1_000, // 20m
86+
}
87+
88+
if (process.env.TEST_GREP) {
89+
mochaOptions.grep = process.env.TEST_GREP
90+
console.log(`Running tests matching pattern: ${process.env.TEST_GREP}`)
91+
}
92+
93+
const mocha = new Mocha(mochaOptions)
94+
95+
// Add test files fresh for each model run
96+
testFiles.forEach((testFile) => mocha.addFile(path.resolve(cwd, testFile)))
97+
98+
// Run tests for this model
99+
const modelResult = await new Promise<{ failures: number; passes: number }>((resolve) => {
100+
const runner = mocha.run((failures) => {
101+
resolve({
102+
failures,
103+
passes: runner.stats?.passes ?? 0,
104+
})
105+
})
106+
})
107+
108+
const duration = Date.now() - startTime
109+
110+
results.push({
111+
model,
112+
failures: modelResult.failures,
113+
passes: modelResult.passes,
114+
duration,
115+
})
116+
117+
totalFailures += modelResult.failures
118+
119+
console.log(
120+
`\n[${model}] Completed: ${modelResult.passes} passed, ${modelResult.failures} failed (${(duration / 1000).toFixed(1)}s)\n`,
121+
)
122+
123+
// Clear mocha's require cache to allow re-running tests
124+
mocha.dispose()
125+
testFiles.forEach((testFile) => {
126+
const fullPath = path.resolve(cwd, testFile)
127+
delete require.cache[require.resolve(fullPath)]
128+
})
129+
}
130+
131+
// Print summary
132+
console.log(`\n${"=".repeat(60)}`)
133+
console.log(` MULTI-MODEL TEST SUMMARY`)
134+
console.log(`${"=".repeat(60)}`)
135+
136+
for (const result of results) {
137+
const status = result.failures === 0 ? "✓ PASS" : "✗ FAIL"
138+
console.log(` ${status} ${result.model}`)
139+
console.log(
140+
` ${result.passes} passed, ${result.failures} failed (${(result.duration / 1000).toFixed(1)}s)`,
141+
)
142+
}
61143

62-
return new Promise<void>((resolve, reject) =>
63-
mocha.run((failures) => (failures === 0 ? resolve() : reject(new Error(`${failures} tests failed.`)))),
64-
)
144+
console.log(`${"=".repeat(60)}\n`)
145+
146+
if (totalFailures > 0) {
147+
throw new Error(`${totalFailures} total test failures across all models.`)
148+
}
65149
}

apps/vscode-e2e/src/suite/subtasks.test.ts

Lines changed: 66 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -2,73 +2,92 @@ import * as assert from "assert"
22

33
import { RooCodeEventName, type ClineMessage } from "@roo-code/types"
44

5-
import { sleep, waitFor, waitUntilCompleted } from "./utils"
5+
import { waitFor } from "./utils"
66

7-
suite.skip("Roo Code Subtasks", () => {
8-
test("Should handle subtask cancellation and resumption correctly", async () => {
7+
suite("Roo Code Subtasks", () => {
8+
test("Should create and complete a subtask successfully", async function () {
9+
this.timeout(180_000) // 3 minutes for complex orchestration
910
const api = globalThis.api
1011

11-
const messages: Record<string, ClineMessage[]> = {}
12+
const messages: ClineMessage[] = []
13+
let childTaskCompleted = false
14+
let parentCompleted = false
1215

13-
api.on(RooCodeEventName.Message, ({ taskId, message }) => {
14-
if (message.type === "say" && message.partial === false) {
15-
messages[taskId] = messages[taskId] || []
16-
messages[taskId].push(message)
16+
// Listen for messages to detect subtask result
17+
const messageHandler = ({ message }: { message: ClineMessage }) => {
18+
messages.push(message)
19+
20+
// Log completion messages
21+
if (message.type === "say" && message.say === "completion_result") {
22+
console.log("Completion result:", message.text?.substring(0, 100))
1723
}
18-
})
24+
}
25+
api.on(RooCodeEventName.Message, messageHandler)
26+
27+
// Listen for task completion
28+
const completionHandler = (taskId: string) => {
29+
if (taskId === parentTaskId) {
30+
parentCompleted = true
31+
console.log("✓ Parent task completed")
32+
} else {
33+
childTaskCompleted = true
34+
console.log("✓ Child task completed:", taskId)
35+
}
36+
}
37+
api.on(RooCodeEventName.TaskCompleted, completionHandler)
1938

20-
const childPrompt = "You are a calculator. Respond only with numbers. What is the square root of 9?"
39+
const childPrompt = "What is 2 + 2? Respond with just the number."
2140

22-
// Start a parent task that will create a subtask.
41+
// Start a parent task that will create a subtask
42+
console.log("Starting parent task that will spawn subtask...")
2343
const parentTaskId = await api.startNewTask({
2444
configuration: {
25-
mode: "ask",
45+
mode: "code",
2646
alwaysAllowModeSwitch: true,
2747
alwaysAllowSubtasks: true,
2848
autoApprovalEnabled: true,
2949
enableCheckpoints: false,
3050
},
31-
text:
32-
"You are the parent task. " +
33-
`Create a subtask by using the new_task tool with the message '${childPrompt}'.` +
34-
"After creating the subtask, wait for it to complete and then respond 'Parent task resumed'.",
51+
text: `Create a subtask using the new_task tool with this message: "${childPrompt}". Wait for the subtask to complete, then tell me the result.`,
3552
})
3653

37-
let spawnedTaskId: string | undefined = undefined
54+
try {
55+
// Wait for child task to complete
56+
console.log("Waiting for child task to complete...")
57+
await waitFor(() => childTaskCompleted, { timeout: 90_000 })
58+
console.log("✓ Child task completed")
3859

39-
// Wait for the subtask to be spawned and then cancel it.
40-
api.on(RooCodeEventName.TaskSpawned, (_, childTaskId) => (spawnedTaskId = childTaskId))
41-
await waitFor(() => !!spawnedTaskId)
42-
await sleep(1_000) // Give the task a chance to start and populate the history.
43-
await api.cancelCurrentTask()
60+
// Wait for parent to complete
61+
console.log("Waiting for parent task to complete...")
62+
await waitFor(() => parentCompleted, { timeout: 90_000 })
63+
console.log("✓ Parent task completed")
4464

45-
// Wait a bit to ensure any task resumption would have happened.
46-
await sleep(2_000)
65+
// Verify the parent task mentions the subtask result (should contain "4")
66+
const hasSubtaskResult = messages.some(
67+
(m) =>
68+
m.type === "say" &&
69+
m.say === "completion_result" &&
70+
m.text?.includes("4") &&
71+
m.text?.toLowerCase().includes("subtask"),
72+
)
4773

48-
// The parent task should not have resumed yet, so we shouldn't see
49-
// "Parent task resumed".
50-
assert.ok(
51-
messages[parentTaskId]?.find(({ type, text }) => type === "say" && text === "Parent task resumed") ===
52-
undefined,
53-
"Parent task should not have resumed after subtask cancellation",
54-
)
74+
// Verify all events occurred
75+
assert.ok(childTaskCompleted, "Child task should have completed")
76+
assert.ok(parentCompleted, "Parent task should have completed")
77+
assert.ok(hasSubtaskResult, "Parent task should mention the subtask result")
5578

56-
// Start a new task with the same message as the subtask.
57-
const anotherTaskId = await api.startNewTask({ text: childPrompt })
58-
await waitUntilCompleted({ api, taskId: anotherTaskId })
79+
console.log("Test passed! Subtask orchestration working correctly")
80+
} finally {
81+
// Clean up
82+
api.off(RooCodeEventName.Message, messageHandler)
83+
api.off(RooCodeEventName.TaskCompleted, completionHandler)
5984

60-
// Wait a bit to ensure any task resumption would have happened.
61-
await sleep(2_000)
62-
63-
// The parent task should still not have resumed.
64-
assert.ok(
65-
messages[parentTaskId]?.find(({ type, text }) => type === "say" && text === "Parent task resumed") ===
66-
undefined,
67-
"Parent task should not have resumed after subtask cancellation",
68-
)
69-
70-
// Clean up - cancel all tasks.
71-
await api.clearCurrentTask()
72-
await waitUntilCompleted({ api, taskId: parentTaskId })
85+
// Cancel any remaining tasks
86+
try {
87+
await api.cancelCurrentTask()
88+
} catch {
89+
// Task might already be complete
90+
}
91+
}
7392
})
7493
})

0 commit comments

Comments
 (0)