Skip to content

Commit 13c3943

Browse files
authored
Add out-of-memory session recovery (#491)
* fix: verify published cli with okcode bin * fix: make npm publish rerun-safe * fix: publish cli from cjs bin * Add out-of-memory session recovery
1 parent c0d1ed4 commit 13c3943

6 files changed

Lines changed: 188 additions & 57 deletions

File tree

apps/web/src/components/ChatView.browser.tsx

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,34 @@ function buildFixture(snapshot: OrchestrationReadModel): TestFixture {
291291
};
292292
}
293293

294+
function withThreadSessionError(
295+
snapshot: OrchestrationReadModel,
296+
input: {
297+
status: OrchestrationSessionStatus;
298+
lastError: string;
299+
},
300+
): OrchestrationReadModel {
301+
return {
302+
...snapshot,
303+
threads: snapshot.threads.map((thread) =>
304+
thread.id === THREAD_ID && thread.session
305+
? {
306+
...thread,
307+
session: {
308+
threadId: thread.session.threadId,
309+
providerName: thread.session.providerName,
310+
runtimeMode: thread.session.runtimeMode,
311+
activeTurnId: thread.session.activeTurnId,
312+
status: input.status,
313+
lastError: input.lastError,
314+
updatedAt: NOW_ISO,
315+
},
316+
}
317+
: thread,
318+
),
319+
};
320+
}
321+
294322
function addThreadToSnapshot(
295323
snapshot: OrchestrationReadModel,
296324
threadId: ThreadId,
@@ -1634,6 +1662,50 @@ describe("ChatView timeline estimator parity (full app)", () => {
16341662
}
16351663
});
16361664

1665+
it("resets the provider session from the error banner after an out-of-memory failure", async () => {
1666+
wsRequests.length = 0;
1667+
1668+
const mounted = await mountChatView({
1669+
viewport: DEFAULT_VIEWPORT,
1670+
snapshot: withThreadSessionError(
1671+
createSnapshotForTargetUser({
1672+
targetMessageId: "msg-user-oom-reset" as MessageId,
1673+
targetText: "oom reset target",
1674+
}),
1675+
{
1676+
status: "error",
1677+
lastError:
1678+
"FATAL ERROR: Reached heap limit Allocation failed - JavaScript heap out of memory",
1679+
},
1680+
),
1681+
});
1682+
1683+
try {
1684+
const recoverButton = await waitForElement(
1685+
() =>
1686+
document.querySelector<HTMLButtonElement>(
1687+
'button[aria-label="Reset session after out-of-memory failure"]',
1688+
),
1689+
"Unable to find out-of-memory recovery button.",
1690+
);
1691+
1692+
recoverButton.click();
1693+
1694+
await vi.waitFor(
1695+
() =>
1696+
wsRequests.some(
1697+
(request) =>
1698+
request._tag === ORCHESTRATION_WS_METHODS.dispatchCommand &&
1699+
request.type === "thread.session.stop" &&
1700+
request.threadId === THREAD_ID,
1701+
),
1702+
{ timeout: 8_000, interval: 16 },
1703+
);
1704+
} finally {
1705+
await mounted.cleanup();
1706+
}
1707+
});
1708+
16371709
it("keeps the new thread selected after clicking the new-thread button", async () => {
16381710
const mounted = await mountChatView({
16391711
viewport: DEFAULT_VIEWPORT,

apps/web/src/components/ChatView.tsx

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3880,6 +3880,17 @@ export default function ChatView({
38803880
});
38813881
};
38823882

3883+
const onRecoverFromOutOfMemory = async () => {
3884+
const api = readNativeApi();
3885+
if (!api || !activeThread || isRemoteActionBlocked) return;
3886+
await api.orchestration.dispatchCommand({
3887+
type: "thread.session.stop",
3888+
commandId: newCommandId(),
3889+
threadId: activeThread.id,
3890+
createdAt: new Date().toISOString(),
3891+
});
3892+
};
3893+
38833894
const onClearQueue = useCallback(() => {
38843895
setOptimisticUserMessages((existing) => {
38853896
for (const msg of existing) {
@@ -4955,6 +4966,7 @@ export default function ChatView({
49554966
showNotificationDetails={settings.showNotificationDetails}
49564967
includeDiagnosticsTipsInCopy={settings.includeDiagnosticsTipsInCopy}
49574968
onDismissThreadError={() => setThreadError(activeThread.id, null)}
4969+
onRecoverFromOutOfMemory={() => void onRecoverFromOutOfMemory()}
49584970
providerStatus={activeProviderStatus}
49594971
transportState={transportState}
49604972
isMobileCompanion={isMobileCompanion}

apps/web/src/components/chat/ErrorNotificationBar.test.tsx

Lines changed: 15 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ const THREAD_ERROR =
2424
function renderBar(
2525
overrides: Partial<ComponentProps<typeof ErrorNotificationBar>> = {},
2626
): ReactElement {
27-
const { onDismissThreadError, transportState, ...restOverrides } = overrides;
27+
const { onDismissThreadError, onRecoverFromOutOfMemory, transportState, ...restOverrides } =
28+
overrides;
2829
return (
2930
<ErrorNotificationBar
3031
threadError={THREAD_ERROR}
@@ -35,6 +36,7 @@ function renderBar(
3536
isMobileCompanion={false}
3637
{...restOverrides}
3738
{...(onDismissThreadError ? { onDismissThreadError } : {})}
39+
{...(onRecoverFromOutOfMemory ? { onRecoverFromOutOfMemory } : {})}
3840
{...(transportState ? { transportState } : {})}
3941
/>
4042
);
@@ -86,72 +88,28 @@ describe("ErrorNotificationBar", () => {
8688
expect(markup).toContain("Base branch &#x27;main&#x27; does not resolve to a commit yet.");
8789
});
8890

89-
it("re-shows thread errors when the message changes after dismissal", async () => {
90-
const onDismissThreadError = vi.fn();
91+
it("shows an out-of-memory recovery action when the thread error is recoverable", async () => {
92+
const onRecoverFromOutOfMemory = vi.fn();
9193
let renderer: ReactTestRenderer | null = null;
92-
9394
await act(async () => {
9495
renderer = create(
95-
<ErrorNotificationBar
96-
threadError={THREAD_ERROR}
97-
showAuthFailuresAsErrors
98-
showNotificationDetails={false}
99-
includeDiagnosticsTipsInCopy={false}
100-
providerStatus={null}
101-
isMobileCompanion={false}
102-
onDismissThreadError={onDismissThreadError}
103-
/>,
104-
);
105-
});
106-
107-
const dismissAll = renderer!.root.findByProps({ "aria-label": "Dismiss notifications" });
108-
await act(async () => {
109-
dismissAll.props.onClick();
110-
});
111-
112-
expect(renderer!.toJSON()).toBeNull();
113-
114-
await act(async () => {
115-
renderer!.update(
116-
<ErrorNotificationBar
117-
threadError="Codex CLI is not authenticated. Run `codex login` and try again."
118-
showAuthFailuresAsErrors
119-
showNotificationDetails={false}
120-
includeDiagnosticsTipsInCopy={false}
121-
providerStatus={null}
122-
isMobileCompanion={false}
123-
onDismissThreadError={onDismissThreadError}
124-
/>,
96+
renderBar({
97+
threadError:
98+
"FATAL ERROR: Reached heap limit Allocation failed - JavaScript heap out of memory",
99+
onRecoverFromOutOfMemory,
100+
}),
125101
);
126102
});
127103

128-
expect(renderer!.toJSON()).not.toBeNull();
129-
expect(renderer!.root.findByProps({ "aria-label": "Show 1 notification" })).toBeTruthy();
130-
});
131-
132-
it("does not hide non-dismissible provider notifications via dismiss all", async () => {
133-
let renderer: ReactTestRenderer | null = null;
134-
135-
await act(async () => {
136-
renderer = create(
137-
<ErrorNotificationBar
138-
threadError={null}
139-
showAuthFailuresAsErrors
140-
showNotificationDetails={false}
141-
includeDiagnosticsTipsInCopy={false}
142-
providerStatus={makeProviderStatus()}
143-
isMobileCompanion={false}
144-
/>,
145-
);
104+
const root = renderer!.root;
105+
const recoverButton = root.findByProps({
106+
"aria-label": "Reset session after out-of-memory failure",
146107
});
147108

148-
const dismissAll = renderer!.root.findByProps({ "aria-label": "Dismiss notifications" });
149109
await act(async () => {
150-
dismissAll.props.onClick();
110+
recoverButton.props.onClick();
151111
});
152112

153-
expect(renderer!.toJSON()).not.toBeNull();
154-
expect(renderer!.root.findByProps({ "aria-label": "Show 1 notification" })).toBeTruthy();
155-
expect(JSON.stringify(renderer!.toJSON())).toContain("OpenAI (Codex CLI) needs verification");
113+
expect(onRecoverFromOutOfMemory).toHaveBeenCalledTimes(1);
156114
});
157115
});

apps/web/src/components/chat/ErrorNotificationBar.tsx

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import {
1313
buildThreadErrorDiagnosticsCopy,
1414
humanizeThreadError,
1515
isAuthenticationThreadError,
16+
isOutOfMemoryThreadError,
1617
} from "./threadError";
1718
import {
1819
getProviderStatusHeading,
@@ -33,6 +34,8 @@ interface ErrorNotificationBarProps {
3334
includeDiagnosticsTipsInCopy?: boolean;
3435
/** Dismiss the thread error */
3536
onDismissThreadError?: () => void;
37+
/** Reset a provider session after an OOM failure */
38+
onRecoverFromOutOfMemory?: () => void;
3639
/** Provider health status */
3740
providerStatus: ServerProviderStatus | null;
3841
/** Companion transport state (only relevant for mobile companion) */
@@ -49,6 +52,9 @@ interface NotificationItem {
4952
description: string;
5053
detailsText?: string | null;
5154
diagnosticsCopyText?: string | null;
55+
actionLabel?: string;
56+
actionAriaLabel?: string;
57+
onAction?: () => void;
5258
severity: "error" | "warning" | "info";
5359
dismissible: boolean;
5460
onDismiss?: () => void;
@@ -64,6 +70,7 @@ export const ErrorNotificationBar = memo(function ErrorNotificationBar({
6470
showNotificationDetails = false,
6571
includeDiagnosticsTipsInCopy = false,
6672
onDismissThreadError,
73+
onRecoverFromOutOfMemory,
6774
providerStatus,
6875
transportState,
6976
isMobileCompanion,
@@ -133,6 +140,8 @@ export const ErrorNotificationBar = memo(function ErrorNotificationBar({
133140
if (threadError) {
134141
if (showAuthFailuresAsErrors || !isAuthenticationThreadError(threadError)) {
135142
const presentation = humanizeThreadError(threadError);
143+
const showOutOfMemoryRecovery =
144+
isOutOfMemoryThreadError(threadError) && onRecoverFromOutOfMemory !== undefined;
136145
items.push({
137146
id: buildThreadErrorNotificationId(threadError),
138147
kind: "thread-error",
@@ -143,6 +152,13 @@ export const ErrorNotificationBar = memo(function ErrorNotificationBar({
143152
diagnosticsCopyText: buildThreadErrorDiagnosticsCopy(threadError, {
144153
includeTips: includeDiagnosticsTipsInCopy,
145154
}),
155+
...(showOutOfMemoryRecovery
156+
? {
157+
actionLabel: "Reset session",
158+
actionAriaLabel: "Reset session after out-of-memory failure",
159+
onAction: onRecoverFromOutOfMemory,
160+
}
161+
: {}),
146162
severity: "error",
147163
dismissible: !!onDismissThreadError,
148164
...(onDismissThreadError ? { onDismiss: onDismissThreadError } : {}),
@@ -156,6 +172,7 @@ export const ErrorNotificationBar = memo(function ErrorNotificationBar({
156172
showAuthFailuresAsErrors,
157173
includeDiagnosticsTipsInCopy,
158174
onDismissThreadError,
175+
onRecoverFromOutOfMemory,
159176
providerStatus,
160177
transportState,
161178
isMobileCompanion,
@@ -221,6 +238,9 @@ export const ErrorNotificationBar = memo(function ErrorNotificationBar({
221238
if (visibleNotifications.length === 0) return null;
222239

223240
const primary = visibleNotifications[0]!;
241+
const actionNotification = visibleNotifications.find(
242+
(notification) => notification.onAction && notification.actionLabel,
243+
);
224244
const PrimaryIcon = primary.icon;
225245
const count = visibleNotifications.length;
226246
const countLabel = count === 1 ? "1 notification" : `${count} notifications`;
@@ -261,6 +281,18 @@ export const ErrorNotificationBar = memo(function ErrorNotificationBar({
261281
</span>
262282

263283
<div className="flex shrink-0 items-center gap-1">
284+
{actionNotification?.onAction && actionNotification.actionLabel ? (
285+
<Button
286+
type="button"
287+
variant="outline"
288+
size="xs"
289+
aria-label={actionNotification.actionAriaLabel ?? actionNotification.actionLabel}
290+
className="min-w-0 px-2 text-[10px] font-medium"
291+
onClick={actionNotification.onAction}
292+
>
293+
{actionNotification.actionLabel}
294+
</Button>
295+
) : null}
264296
<Button
265297
type="button"
266298
variant="outline"
@@ -313,6 +345,18 @@ export const ErrorNotificationBar = memo(function ErrorNotificationBar({
313345
) : null}
314346
</div>
315347
<div className="mt-0.5 flex shrink-0 items-center gap-1">
348+
{notif.onAction && notif.actionLabel ? (
349+
<Button
350+
type="button"
351+
variant="outline"
352+
size="xs"
353+
aria-label={notif.actionAriaLabel ?? notif.actionLabel}
354+
className="h-6 px-2 text-[10px]"
355+
onClick={notif.onAction}
356+
>
357+
{notif.actionLabel}
358+
</Button>
359+
) : null}
316360
{notif.kind === "thread-error" && notif.diagnosticsCopyText ? (
317361
<MessageCopyButton
318362
text={notif.diagnosticsCopyText}

apps/web/src/components/chat/threadError.test.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { describe, expect, it } from "vitest";
33
import {
44
buildThreadErrorDiagnosticsCopy,
55
humanizeThreadError,
6+
isOutOfMemoryThreadError,
67
isAuthenticationThreadError,
78
} from "./threadError";
89

@@ -57,6 +58,18 @@ describe("humanizeThreadError", () => {
5758
expect(isAuthenticationThreadError("Provider crashed while starting.")).toBe(false);
5859
});
5960

61+
it("detects out-of-memory failures", () => {
62+
expect(
63+
isOutOfMemoryThreadError(
64+
"Provider crashed: FATAL ERROR: Reached heap limit Allocation failed - JavaScript heap out of memory",
65+
),
66+
).toBe(true);
67+
expect(
68+
isOutOfMemoryThreadError("Process exited: memory limit exceeded while streaming turn"),
69+
).toBe(true);
70+
expect(isOutOfMemoryThreadError("Provider crashed while starting.")).toBe(false);
71+
});
72+
6073
it("builds redacted diagnostics copy without optional tips by default", () => {
6174
expect(
6275
buildThreadErrorDiagnosticsCopy(

0 commit comments

Comments
 (0)