From 19d7450f33f9f9fa443838975a97023180a0abb8 Mon Sep 17 00:00:00 2001 From: Hubert Zub Date: Tue, 5 May 2026 14:35:04 +0200 Subject: [PATCH 1/3] feat(appkit): supervisor api adapter --- apps/dev-playground/server/index.ts | 36 +- docs/docs/plugins/agents.md | 75 ++ packages/appkit/src/agents/supervisor-api.ts | 577 +++++++++++++++ .../src/agents/tests/supervisor-api.test.ts | 662 ++++++++++++++++++ packages/appkit/src/beta.ts | 10 + .../appkit/src/connectors/serving/client.ts | 63 +- packages/appkit/src/stream/index.ts | 1 + packages/appkit/src/stream/sse-reader.ts | 114 +++ .../src/stream/tests/sse-reader.test.ts | 182 +++++ 9 files changed, 1704 insertions(+), 16 deletions(-) create mode 100644 packages/appkit/src/agents/supervisor-api.ts create mode 100644 packages/appkit/src/agents/tests/supervisor-api.test.ts create mode 100644 packages/appkit/src/stream/sse-reader.ts create mode 100644 packages/appkit/src/stream/tests/sse-reader.test.ts diff --git a/apps/dev-playground/server/index.ts b/apps/dev-playground/server/index.ts index ecbd18e78..67187dcbe 100644 --- a/apps/dev-playground/server/index.ts +++ b/apps/dev-playground/server/index.ts @@ -11,7 +11,12 @@ import { serving, WRITE_ACTIONS, } from "@databricks/appkit"; -import { agents, createAgent, tool } from "@databricks/appkit/beta"; +import { + agents, + createAgent, + fromSupervisorApi, + tool, +} from "@databricks/appkit/beta"; import { WorkspaceClient } from "@databricks/sdk-experimental"; import { z } from "zod"; import { lakebaseExamples } from "./lakebase-examples-plugin"; @@ -68,6 +73,33 @@ const helper = createAgent({ }, }); +// Supervisor API demo agent. Tools are configured on the adapter (the SA +// endpoint executes them server-side), not on the createAgent definition. +// Uncomment a `supervisorTools.*` entry (and import 'supervisorTools' from +// '@databricks/appkit/beta') to give the model real powers. +// +// We `await` the factory at module init so a misconfigured workspace +// (missing host, bad credentials) fails fast with a clear error here +// instead of as an unhandled rejection. Top-level await is fine in this +// ESM module. +const supervisor = createAgent({ + instructions: + "You are an assistant powered by the Databricks Supervisor API.", + model: fromSupervisorApi({ + model: "databricks-claude-sonnet-4-5", + tools: [ + // supervisorTools.genieSpace( + // "01ABCDEF12345678", + // "NYC taxi trip records and zones", + // ), + // supervisorTools.ucFunction( + // "main.default.add", + // "Adds two integers and returns the sum.", + // ), + ], + }), +}); + /* * Smart-Dashboard agents. * @@ -385,7 +417,7 @@ createApp({ }), serving(), agents({ - agents: { helper, sql_analyst, dashboard_pilot }, + agents: { helper, sql_analyst, dashboard_pilot, supervisor }, // `query` (markdown dispatcher) + `sql_analyst` + `dashboard_pilot` // wire the /smart-dashboard route. `insights` and `anomaly` are // ephemeral markdown agents auto-fired by the route's AgentSidebar. diff --git a/docs/docs/plugins/agents.md b/docs/docs/plugins/agents.md index 0ba2ab301..c228551e2 100644 --- a/docs/docs/plugins/agents.md +++ b/docs/docs/plugins/agents.md @@ -16,6 +16,8 @@ This page covers the full lifecycle. For the hand-written primitives (`tool()`, The agents plugin drives the LLM over Server-Sent Events. Foundation Model APIs (Claude, Llama, GPT, etc.) and other chat-style endpoints support streaming and work out of the box. Custom model endpoints that return a single JSON response (e.g. typical `sklearn` or MLflow `pyfunc` deployments) do **not** stream — pointing an agent at one will fail with "Response body is null — streaming not supported" on the first turn. If you list a serving endpoint in `apps init`, pick one whose model implements the chat-completions streaming protocol; the agents plugin reads its name from `DATABRICKS_SERVING_ENDPOINT_NAME` whenever an agent doesn't pin `model:` itself. For the non-streaming path against a custom endpoint, use the `serving` plugin's `/invoke` route with `useServingInvoke` instead. + +Or skip serving-endpoint setup entirely with the managed [Supervisor API adapter](#managed-agents-the-supervisor-api-adapter) (beta). ::: ## Install @@ -217,6 +219,79 @@ const result = await runAgent(classifier, { Hosted tools (MCP) are still `agents()`-only since they require the live MCP client. Plugin tool dispatch in standalone mode runs as the service principal (no OBO) and **bypasses the agents-plugin approval gate** — treat standalone runAgent as a trusted-prompt environment (CI, batch eval, internal scripts), not as an exposed user-facing surface. +## Managed agents: the Supervisor API adapter + +`fromSupervisorApi` (beta) is the zero-config way to run an agent: instead of provisioning and pointing at a model-serving endpoint, you run the agentic loop in the Databricks workspace by targeting the AI Gateway Responses API (`/ai-gateway/mlflow/v1/responses`), which runs the LLM — and any hosted tools — as a managed service on Databricks. No `DATABRICKS_SERVING_ENDPOINT_NAME`, no stream-capability check, no JS tool plumbing for the common cases. + +The minimal agent is one extra line versus a markdown agent: + +```ts +import { createApp, createAgent } from "@databricks/appkit"; +import { agents, fromSupervisorApi } from "@databricks/appkit/beta"; + +await createApp({ + plugins: [ + agents({ + agents: { + assistant: createAgent({ + instructions: "You are a helpful assistant.", + model: fromSupervisorApi({ model: "databricks-claude-sonnet-4-5" }), + }), + }, + }), + ], +}); +``` + +`createAgent({ model })` already accepts adapters and adapter promises in addition to the model-name string used in earlier examples, so you can drop the factory result straight in. `fromSupervisorApi` resolves credentials through the SDK chain (`DATABRICKS_HOST`, OAuth, PAT, …); pass `workspaceClient` to reuse an existing client. + +### Hosted tools + +Expose Genie spaces, Unity Catalog functions/connections, Knowledge Assistants, or other AppKit apps to the model by listing them on the adapter — execution stays server-side, you write no tool code: + +```ts +import { fromSupervisorApi, supervisorTools } from "@databricks/appkit/beta"; + +const model = fromSupervisorApi({ + model: "databricks-claude-sonnet-4-5", + tools: [ + supervisorTools.genieSpace( + "01ABCDEF12345678", + "NYC taxi trip records and zones", + ), + supervisorTools.ucFunction( + "main.default.add", + "Adds two integers and returns the sum.", + ), + ], +}); +``` + +`description` is **required and non-empty** — the LLM uses it to route between tools, so two Genie spaces both labelled "Genie space" will be indistinguishable. + +| Factory | Tool kind | Identifier | +|---|---|---| +| `supervisorTools.genieSpace(id, description)` | Genie space | space id | +| `supervisorTools.ucFunction(name, description)` | Unity Catalog function | three-part name | +| `supervisorTools.knowledgeAssistant(id, description)` | Knowledge Assistant | assistant id | +| `supervisorTools.app(name, description)` | Databricks App | app name | +| `supervisorTools.ucConnection(name, description)` | UC connection | connection name | + +### What does *not* apply to Supervisor-API agents + +The managed runtime owns its own tool execution, so the adapter intentionally **ignores the agents-plugin tool index**. For any agent whose `model:` is a Supervisor adapter: + +- Tools wired via markdown `tools:` or the `tools(plugins)` function form are not exposed to the model — declare hosted tools via `fromSupervisorApi({ tools: […] })` instead. +- The **human-in-the-loop approval gate** does not fire (tool calls never enter the Node process; `effect: "destructive"` annotations on plugin tools are irrelevant here). +- `limits.maxToolCalls` is not enforced (the managed runtime accounts for its own calls). +- Per-call **OBO** does not apply to hosted tools; they run with the credentials the managed runtime uses for the target resource. + +Standard-adapter agents and Supervisor-API agents can coexist in the same `agents({ agents: { … } })` map and can be composed as sub-agents (Level 4) — only the agent whose `model:` points at a Supervisor adapter is exempt from the items above. + +:::note Recovery path for non-streaming tool turns +Some hosted tool kinds return their final assistant text without incremental `output_text.delta` events. The adapter has a recovery path that pulls the text out of `response.completed.output[]` so the turn is not silently empty. Set `DEBUG=appkit:agents:supervisor-api` to log the per-turn event-type histogram if you want to verify which path a turn took. +::: + ## Configuration reference ```ts diff --git a/packages/appkit/src/agents/supervisor-api.ts b/packages/appkit/src/agents/supervisor-api.ts new file mode 100644 index 000000000..228eb8be9 --- /dev/null +++ b/packages/appkit/src/agents/supervisor-api.ts @@ -0,0 +1,577 @@ +import type { + AgentAdapter, + AgentEvent, + AgentInput, + AgentRunContext, + Message, + ResponseStreamEvent, +} from "shared"; +import { type ApiClientLike, streamPath } from "../connectors/serving/client"; +import { createLogger } from "../logging/logger"; +import { readSseEvents } from "../stream"; + +const logger = createLogger("agents:supervisor-api"); + +/** + * Transport shim: given a request body, returns the raw SSE byte stream from + * the Supervisor API endpoint. Injected at construction time so callers can + * swap in the workspace SDK (the {@link fromSupervisorApi} factory), a bare + * `fetch` (a reverse proxy / mock), or a test fake. Mirrors `StreamBody` in + * `agents/databricks.ts` so both adapters share one transport surface. + */ +type StreamBody = ( + body: Record, + signal?: AbortSignal, +) => Promise>; + +/** + * Structural shape of a Databricks SDK client used by {@link fromSupervisorApi}. + * Only what we need: `apiClient.request` for streaming and + * `config.ensureResolved` to materialise the host/credentials. + */ +interface WorkspaceClientLike extends ApiClientLike { + config: { ensureResolved(): Promise }; +} + +// --------------------------------------------------------------------------- +// Supervisor API tool surface (wire format) +// --------------------------------------------------------------------------- + +/** + * Tools supported by the Databricks AI Gateway Responses API. The shapes match + * the wire format the endpoint expects, so the adapter passes the array + * straight into the request body. + * + * Prefer the {@link supervisorTools} factories — they fill in the + * SA-validation-bug workaround for `description` (must be non-empty). + */ +export type SupervisorTool = + | { type: "genie_space"; genie_space: { id: string; description: string } } + | { type: "uc_function"; uc_function: { name: string; description: string } } + | { + type: "knowledge_assistant"; + knowledge_assistant: { + knowledge_assistant_id: string; + description: string; + }; + } + | { type: "app"; app: { name: string; description: string } } + | { + type: "uc_connection"; + uc_connection: { name: string; description: string }; + }; + +/** + * Concise factories for declaring Supervisor API tools. + * + * `description` is required: SA's protobuf validation rejects `null`/`""`, + * AND the LLM running on SA reads this string to decide when to route to + * the tool. Two genie spaces both labelled "Genie space" give the model + * nothing to discriminate on, so callers always own the routing hint. + * + * @example + * ```ts + * fromSupervisorApi({ + * model: "databricks-claude-sonnet-4", + * tools: [ + * supervisorTools.genieSpace( + * "01ABCDEF12345678", + * "NYC taxi trip records and zones", + * ), + * supervisorTools.ucFunction( + * "main.default.add", + * "Adds two integers and returns the sum.", + * ), + * ], + * }); + * ``` + */ +export const supervisorTools = { + genieSpace: (id: string, description: string): SupervisorTool => ({ + type: "genie_space", + genie_space: { id, description }, + }), + ucFunction: (name: string, description: string): SupervisorTool => ({ + type: "uc_function", + uc_function: { name, description }, + }), + knowledgeAssistant: ( + knowledgeAssistantId: string, + description: string, + ): SupervisorTool => ({ + type: "knowledge_assistant", + knowledge_assistant: { + knowledge_assistant_id: knowledgeAssistantId, + description, + }, + }), + app: (name: string, description: string): SupervisorTool => ({ + type: "app", + app: { name, description }, + }), + ucConnection: (name: string, description: string): SupervisorTool => ({ + type: "uc_connection", + uc_connection: { name, description }, + }), +}; + +// --------------------------------------------------------------------------- +// Adapter +// --------------------------------------------------------------------------- + +export interface SupervisorApiAdapterOptions { + /** + * Model identifier to pass in the request body + * (e.g. "databricks-claude-sonnet-4"). + */ + model: string; + /** + * Hosted tools the SA endpoint should expose to the model. Use the + * {@link supervisorTools} factories for the most common shapes. + */ + tools?: SupervisorTool[]; + /** + * A WorkspaceClient (or structural equivalent) used for host resolution + * and per-request authentication. When omitted, a `WorkspaceClient({})` + * is created internally using the default SDK credential chain + * (`DATABRICKS_HOST`, OAuth, PAT, etc.). + */ + workspaceClient?: WorkspaceClientLike; +} + +export interface SupervisorApiAdapterCtorOptions { + streamBody: StreamBody; + model: string; + tools?: SupervisorTool[]; +} + +/** + * Adapter that calls the Databricks AI Gateway Responses API + * (`/ai-gateway/mlflow/v1/responses`). + * + * Streams SSE events in the OpenAI Responses API wire format and maps them + * to the AppKit `AgentEvent` protocol. Tool execution is handled + * server-side, so the adapter ignores the agents-plugin tool index. + * + * Authentication is handled via the Databricks SDK credential chain — the + * same mechanism used by `DatabricksAdapter.fromModelServing`. The transport + * is injected via {@link SupervisorApiAdapterCtorOptions.streamBody}; the + * {@link fromSupervisorApi} factory wires it through the SDK's + * `apiClient.request({ raw: true })`. + * + * Set `DEBUG=appkit:agents:supervisor-api` to log the outbound request + * shape (model, instructions length, input shape, tool count) and to be + * notified when the recovery path engages (no incremental deltas, text + * pulled from `response.completed.output[]`). The no-delta warning includes + * a per-turn event-type histogram and the SA-reported status/error/ + * incomplete_details, so it's already actionable without DEBUG. + * + * @example + * ```ts + * import { createApp, createAgent, agents } from "@databricks/appkit"; + * import { + * fromSupervisorApi, + * supervisorTools, + * } from "@databricks/appkit/agents/supervisor-api"; + * + * const adapter = await fromSupervisorApi({ + * model: "databricks-claude-sonnet-4", + * tools: [ + * supervisorTools.genieSpace( + * "01ABCDEF12345678", + * "NYC taxi trip records and zones", + * ), + * ], + * }); + * + * await createApp({ + * plugins: [ + * agents({ + * agents: { + * assistant: createAgent({ + * instructions: "You are a helpful assistant.", + * model: adapter, + * }), + * }, + * }), + * ], + * }); + * ``` + */ +export class SupervisorApiAdapter implements AgentAdapter { + private streamBody: StreamBody; + private model: string; + private tools: SupervisorTool[]; + + constructor(options: SupervisorApiAdapterCtorOptions) { + this.streamBody = options.streamBody; + this.model = options.model; + this.tools = options.tools ?? []; + } + + async *run( + input: AgentInput, + context: AgentRunContext, + ): AsyncGenerator { + if (context.signal?.aborted) return; + + yield { type: "status", status: "running" }; + + const { instructions, input: payloadInput } = this.buildInput( + input.messages, + ); + yield* this.streamResponse(instructions, payloadInput, context.signal); + } + + private async *streamResponse( + instructions: string | undefined, + input: ResponseInput, + signal?: AbortSignal, + ): AsyncGenerator { + const body: Record = { + model: this.model, + input, + stream: true, + }; + if (instructions) { + body.instructions = instructions; + } + // SA's protobuf validation rejects `tools: []` and `tools: null`. Only + // include the field when at least one tool is configured. + if (this.tools.length > 0) { + body.tools = this.tools; + } + + logger.debug( + "model=%s instructionsLen=%d inputType=%s tools=%d", + this.model, + instructions?.length ?? 0, + typeof input === "string" ? "string" : `array[${input.length}]`, + this.tools.length, + ); + + let stream: ReadableStream; + try { + stream = await this.streamBody(body, signal); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + logger.warn("Supervisor API request failed: %s", message); + yield { + type: "status", + status: "error", + error: `Supervisor API error: ${message}`, + }; + return; + } + + let receivedAnyDelta = false; + // Tracks `item_id`s we've already streamed text deltas for. Used by + // `mapEvent` to fall back to the final item text on `output_item.done` + // only when no incremental deltas streamed for that item — avoids + // double-emitting text when SA does both delta and done. + const streamedItemIds = new Set(); + // Histogram of received event types — surfaced in the no-delta warning + // so it's actionable without re-running with DEBUG. + const eventCounts = new Map(); + // Set to true once we've yielded a terminal `{status:"error"}` event so + // the recovery / completion / no-delta-warning blocks below all bail + // out — the consumer's already seen the terminal status, anything + // further would contradict the protocol's terminal-event semantics. + let terminated = false; + // Diagnostic snapshot of the last `response.completed` event. SA stuffs + // the final assistant message into `response.output[]` even when it + // didn't emit any deltas (e.g. when a tool failed or the model produced + // nothing). Keeping it lets us recover the text and surface useful + // errors instead of a silent empty turn. + let lastCompleted: + | { + status?: string; + output?: Array<{ + type?: string; + content?: Array<{ type?: string; text?: string }>; + }>; + error?: unknown; + incomplete_details?: unknown; + } + | undefined; + + for await (const { event, data } of readSseEvents(stream, signal)) { + if (data === "[DONE]") continue; + + let parsed: Record; + try { + parsed = JSON.parse(data); + } catch (err) { + logger.debug( + "Failed to parse SSE data line: %s (%O)", + data.slice(0, 200), + err, + ); + continue; + } + + const eventType = event || (parsed.type as string) || ""; + eventCounts.set(eventType, (eventCounts.get(eventType) ?? 0) + 1); + + // `response.completed` is held back until after the loop so we can + // synthesise a `message_delta` from `response.output[]` when the + // stream produced no incremental deltas (intermittent SA behaviour). + // Emitting `complete` first would let UIs finalise the turn before the + // recovered text arrives. + if (eventType === "response.completed") { + lastCompleted = parsed.response as typeof lastCompleted; + continue; + } + + const out = mapEvent(eventType, parsed, streamedItemIds); + if (out) { + if (out.type === "message_delta") receivedAnyDelta = true; + yield out; + if (out.type === "status" && out.status === "error") { + terminated = true; + break; + } + } + } + + if (signal?.aborted) return; + + if (eventCounts.size === 0) { + logger.warn( + "Supervisor API stream closed without emitting any SSE events.", + ); + return; + } + + if (terminated) return; + + // Recovery path: no deltas streamed but SA finished — pull the assistant + // text out of `response.completed.response.output[]`. + if (!receivedAnyDelta) { + const recovered = extractTextFromCompletedResponse(lastCompleted); + if (recovered) { + logger.debug( + "Recovered %d chars from response.completed.output[]", + recovered.length, + ); + yield { type: "message_delta", content: recovered }; + receivedAnyDelta = true; + } + } + + if (eventCounts.has("response.completed")) { + yield { type: "status", status: "complete" }; + } + + if (!receivedAnyDelta) { + const histogram = [...eventCounts.entries()] + .map(([t, n]) => `${t}=${n}`) + .join(", "); + const completedError = lastCompleted?.error + ? JSON.stringify(lastCompleted.error) + : undefined; + const completedIncomplete = lastCompleted?.incomplete_details + ? JSON.stringify(lastCompleted.incomplete_details) + : undefined; + logger.warn( + "Supervisor API stream completed without any output_text deltas. " + + "events={%s} completed.status=%s completed.error=%s completed.incomplete=%s", + histogram, + lastCompleted?.status ?? "", + completedError ?? "", + completedIncomplete ?? "", + ); + } + } + + /** + * Splits the agent's message list into a Responses-API payload. System + * messages are concatenated (in order) into the top-level `instructions` + * field; user/assistant turns become `input` (as a plain string for the + * common single-user-turn case, otherwise as `{role,content}[]`). Tool-role + * messages are skipped — SA owns its own tool history server-side, so + * re-feeding our tool-result records would only confuse it. + */ + private buildInput(messages: Message[]): { + instructions: string | undefined; + input: ResponseInput; + } { + const instructionsParts: string[] = []; + const turns: Array<{ + role: "user" | "assistant" | "system"; + content: string; + }> = []; + + for (const m of messages) { + if (m.role === "system") instructionsParts.push(m.content); + else if (m.role !== "tool") + turns.push({ role: m.role, content: m.content }); + } + + const instructions = instructionsParts.length + ? instructionsParts.join("\n\n") + : undefined; + + if (turns.length === 1 && turns[0].role === "user") { + return { instructions, input: turns[0].content }; + } + return { instructions, input: turns }; + } +} + +type ResponseInput = + | string + | Array<{ role: "user" | "assistant" | "system"; content: string }>; + +/** + * Pulls the final assistant text out of the `response` payload attached to a + * `response.completed` event. SA always materialises the full response there, + * so this is our last-resort recovery path when the stream produced neither + * `output_text.delta` nor an actionable `output_item.done` (observed + * intermittently with tool-enabled SA agents). + */ +function extractTextFromCompletedResponse( + response: + | { + output?: Array<{ + type?: string; + content?: Array<{ type?: string; text?: string }>; + }>; + } + | undefined, +): string { + if (!response?.output) return ""; + let text = ""; + for (const item of response.output) { + if (item?.type !== "message" || !Array.isArray(item.content)) continue; + for (const part of item.content) { + if (part?.type === "output_text" && typeof part.text === "string") { + text += part.text; + } + } + } + return text; +} + +function mapEvent( + eventType: string, + data: Record, + streamedItemIds: Set, +): AgentEvent | null { + // The cast restricts the switch domain to the closed wire-event union + // exported by `shared`, so typos in case clauses (e.g. `response.faled`) + // become compile errors instead of silent string mismatches. Unknown + // event names still fall through to `default` at runtime — we don't + // require exhaustive matching since SA emits more lifecycle events + // than we care to map. + switch (eventType as ResponseStreamEvent["type"]) { + case "response.output_text.delta": { + const itemId = data.item_id as string | undefined; + if (itemId) streamedItemIds.add(itemId); + return { type: "message_delta", content: (data.delta as string) ?? "" }; + } + + // `response.completed` is intentionally absent: `streamResponse` holds + // it back so it can synthesise a delta from `response.output[]` when + // the stream produced none, then emits `{status:"complete"}` itself. + + case "response.failed": + return { type: "status", status: "error", error: "Response failed" }; + + case "error": { + const errMsg = + typeof data.error === "string" + ? data.error + : JSON.stringify(data.error ?? "Unknown error"); + return { type: "status", status: "error", error: errMsg }; + } + + case "response.output_item.done": { + const item = data.item as + | { + id?: string; + type?: string; + content?: Array<{ text?: string; type?: string }>; + } + | undefined; + + if (item?.id === "error") { + const errText = item.content?.[0]?.text ?? "Unknown tool error from SA"; + return { type: "status", status: "error", error: errText }; + } + + // Fallback: when SA produces a tool-driven response (e.g. Genie space), + // it often omits `response.output_text.delta` events and only emits the + // final assistant message via `output_item.done`. Surface that text as + // a single delta so the UI sees the answer. + if ( + item?.type === "message" && + item.id && + !streamedItemIds.has(item.id) + ) { + const text = (item.content ?? []) + .map((c) => (c.type === "output_text" ? (c.text ?? "") : "")) + .join(""); + if (text.length > 0) { + streamedItemIds.add(item.id); + return { type: "message_delta", content: text }; + } + } + return null; + } + + // All other event types are intentionally ignored. Notable lifecycle + // events we drop on the floor: `response.created`, `response.in_progress`, + // `response.output_text.done`, `response.output_item.added`, + // `response.content_part.added`, `response.content_part.done`. + default: + return null; + } +} + +/** + * Creates an {@link AgentAdapter} backed by the Databricks AI Gateway + * Responses API (`/ai-gateway/mlflow/v1/responses`). + * + * Uses the SDK's default credential chain for auth (reads DATABRICKS_HOST, + * DATABRICKS_TOKEN, OAuth config, etc.). + * + * @example + * ```ts + * import { + * fromSupervisorApi, + * supervisorTools, + * } from "@databricks/appkit/agents/supervisor-api"; + * + * const adapter = await fromSupervisorApi({ + * model: "databricks-claude-sonnet-4", + * tools: [ + * supervisorTools.genieSpace( + * "01ABCDEF12345678", + * "NYC taxi trip records and zones", + * ), + * ], + * }); + * ``` + */ +export async function fromSupervisorApi( + options: SupervisorApiAdapterOptions, +): Promise { + let client = options.workspaceClient; + if (!client) { + const sdk = await import("@databricks/sdk-experimental"); + client = new sdk.WorkspaceClient({}) as unknown as WorkspaceClientLike; + } + + await client.config.ensureResolved(); + + // Capture the resolved client so the closure doesn't depend on the outer + // `let` binding being reassigned later. + const resolved = client; + return new SupervisorApiAdapter({ + streamBody: (body, signal) => + streamPath(resolved, "/ai-gateway/mlflow/v1/responses", body, signal), + model: options.model, + tools: options.tools ?? [], + }); +} diff --git a/packages/appkit/src/agents/tests/supervisor-api.test.ts b/packages/appkit/src/agents/tests/supervisor-api.test.ts new file mode 100644 index 000000000..9606b1c6a --- /dev/null +++ b/packages/appkit/src/agents/tests/supervisor-api.test.ts @@ -0,0 +1,662 @@ +import type { AgentEvent, AgentInput } from "shared"; +import { afterEach, describe, expect, test, vi } from "vitest"; +import { + fromSupervisorApi, + SupervisorApiAdapter, + type SupervisorTool, + supervisorTools, +} from "../supervisor-api"; + +function createReadableStream(chunks: string[]): ReadableStream { + const encoder = new TextEncoder(); + let i = 0; + return new ReadableStream({ + pull(controller) { + if (i < chunks.length) { + controller.enqueue(encoder.encode(chunks[i])); + i++; + } else { + controller.close(); + } + }, + }); +} + +function sseEvent(eventName: string, data: Record): string { + return `event: ${eventName}\ndata: ${JSON.stringify(data)}\n\n`; +} + +/** + * Captures the body the adapter posts and returns a fake stream of SSE + * chunks. Mirrors the `streamBody` test pattern used by `DatabricksAdapter`. + */ +function makeStreamBody(chunks: string[]): { + streamBody: ReturnType; + lastBody: () => Record | undefined; +} { + let captured: Record | undefined; + const streamBody = vi.fn(async (body: Record) => { + captured = body; + return createReadableStream(chunks); + }); + return { streamBody, lastBody: () => captured }; +} + +function createInput(): AgentInput { + return { + messages: [ + { id: "1", role: "user", content: "Hello", createdAt: new Date() }, + ], + tools: [], + threadId: "thread-1", + }; +} + +async function collect( + gen: AsyncGenerator, +): Promise { + const out: AgentEvent[] = []; + for await (const e of gen) out.push(e); + return out; +} + +describe("supervisorTools factories", () => { + test("genieSpace produces correct wire shape", () => { + expect(supervisorTools.genieSpace("space123", "NYC taxi data")).toEqual({ + type: "genie_space", + genie_space: { id: "space123", description: "NYC taxi data" }, + }); + }); + + test("ucFunction produces correct wire shape", () => { + expect( + supervisorTools.ucFunction("main.default.add", "Adds two integers."), + ).toEqual({ + type: "uc_function", + uc_function: { + name: "main.default.add", + description: "Adds two integers.", + }, + }); + }); + + test("knowledgeAssistant maps id into knowledge_assistant_id", () => { + expect( + supervisorTools.knowledgeAssistant("ka-1", "Internal docs Q&A"), + ).toEqual({ + type: "knowledge_assistant", + knowledge_assistant: { + knowledge_assistant_id: "ka-1", + description: "Internal docs Q&A", + }, + }); + }); + + test("app produces correct wire shape", () => { + expect(supervisorTools.app("my-app", "Demo Databricks app.")).toEqual({ + type: "app", + app: { name: "my-app", description: "Demo Databricks app." }, + }); + }); + + test("ucConnection produces correct wire shape", () => { + expect( + supervisorTools.ucConnection("my-conn", "Connection to external DB."), + ).toEqual({ + type: "uc_connection", + uc_connection: { + name: "my-conn", + description: "Connection to external DB.", + }, + }); + }); +}); + +describe("SupervisorApiAdapter", () => { + afterEach(() => { + vi.restoreAllMocks(); + }); + + test("posts model, input, tools, and stream:true through streamBody", async () => { + const { streamBody, lastBody } = makeStreamBody([ + sseEvent("response.output_text.delta", { delta: "Hi" }), + sseEvent("response.completed", {}), + ]); + + const tools: SupervisorTool[] = [ + supervisorTools.genieSpace("g1", "Test genie space"), + ]; + const adapter = new SupervisorApiAdapter({ + streamBody, + model: "databricks-claude-sonnet-4", + tools, + }); + + await collect(adapter.run(createInput(), { executeTool: vi.fn() })); + + expect(streamBody).toHaveBeenCalledTimes(1); + expect(lastBody()).toMatchObject({ + model: "databricks-claude-sonnet-4", + input: "Hello", + stream: true, + tools, + }); + }); + + test("omits the tools field entirely when no tools are configured", async () => { + const { streamBody, lastBody } = makeStreamBody([ + sseEvent("response.completed", {}), + ]); + const adapter = new SupervisorApiAdapter({ + streamBody, + model: "databricks-claude-sonnet-4", + }); + await collect(adapter.run(createInput(), { executeTool: vi.fn() })); + expect(lastBody()).not.toHaveProperty("tools"); + }); + + test("hoists system messages into the top-level instructions field", async () => { + const { streamBody, lastBody } = makeStreamBody([ + sseEvent("response.completed", {}), + ]); + const adapter = new SupervisorApiAdapter({ + streamBody, + model: "databricks-claude-sonnet-4", + }); + await collect( + adapter.run( + { + messages: [ + { + id: "s", + role: "system", + content: "Be terse.", + createdAt: new Date(), + }, + { id: "u", role: "user", content: "Hi", createdAt: new Date() }, + ], + tools: [], + threadId: "t", + }, + { executeTool: vi.fn() }, + ), + ); + const body = lastBody(); + expect(body?.instructions).toBe("Be terse."); + expect(body?.input).toBe("Hi"); + }); + + test("omits instructions when there is no system message", async () => { + const { streamBody, lastBody } = makeStreamBody([ + sseEvent("response.completed", {}), + ]); + const adapter = new SupervisorApiAdapter({ + streamBody, + model: "databricks-claude-sonnet-4", + }); + await collect(adapter.run(createInput(), { executeTool: vi.fn() })); + expect(lastBody()).not.toHaveProperty("instructions"); + }); + + test("emits message_delta and complete on the happy path", async () => { + const { streamBody } = makeStreamBody([ + sseEvent("response.output_text.delta", { delta: "Hello" }), + sseEvent("response.output_text.delta", { delta: " world" }), + sseEvent("response.completed", {}), + ]); + + const adapter = new SupervisorApiAdapter({ + streamBody, + model: "databricks-claude-sonnet-4", + }); + const events = await collect( + adapter.run(createInput(), { executeTool: vi.fn() }), + ); + + expect(events).toEqual([ + { type: "status", status: "running" }, + { type: "message_delta", content: "Hello" }, + { type: "message_delta", content: " world" }, + { type: "status", status: "complete" }, + ]); + }); + + test("maps response.failed to a status:error event", async () => { + const { streamBody } = makeStreamBody([sseEvent("response.failed", {})]); + const adapter = new SupervisorApiAdapter({ + streamBody, + model: "databricks-claude-sonnet-4", + }); + const events = await collect( + adapter.run(createInput(), { executeTool: vi.fn() }), + ); + expect(events).toContainEqual({ + type: "status", + status: "error", + error: "Response failed", + }); + }); + + test("maps top-level error events", async () => { + const { streamBody } = makeStreamBody([ + sseEvent("error", { error: "rate limited" }), + ]); + const adapter = new SupervisorApiAdapter({ + streamBody, + model: "databricks-claude-sonnet-4", + }); + const events = await collect( + adapter.run(createInput(), { executeTool: vi.fn() }), + ); + expect(events).toContainEqual({ + type: "status", + status: "error", + error: "rate limited", + }); + }); + + test("maps response.output_item.done with id:'error' to status:error", async () => { + const { streamBody } = makeStreamBody([ + sseEvent("response.output_item.done", { + item: { + id: "error", + content: [{ text: "Tool execution failed" }], + }, + }), + sseEvent("response.completed", {}), + ]); + const adapter = new SupervisorApiAdapter({ + streamBody, + model: "databricks-claude-sonnet-4", + }); + const events = await collect( + adapter.run(createInput(), { executeTool: vi.fn() }), + ); + expect(events).toContainEqual({ + type: "status", + status: "error", + error: "Tool execution failed", + }); + }); + + test("falls back to output_item.done text when no deltas streamed (tool-driven SA response)", async () => { + const { streamBody } = makeStreamBody([ + sseEvent("response.output_item.added", { + item: { type: "message", id: "msg-1", role: "assistant", content: [] }, + }), + sseEvent("response.output_item.done", { + item: { + type: "message", + id: "msg-1", + status: "completed", + role: "assistant", + content: [{ type: "output_text", text: "Genie says hi." }], + }, + }), + sseEvent("response.completed", {}), + ]); + const adapter = new SupervisorApiAdapter({ + streamBody, + model: "databricks-claude-sonnet-4", + }); + const events = await collect( + adapter.run(createInput(), { executeTool: vi.fn() }), + ); + expect(events).toEqual([ + { type: "status", status: "running" }, + { type: "message_delta", content: "Genie says hi." }, + { type: "status", status: "complete" }, + ]); + }); + + test("does not double-emit when both deltas and output_item.done arrive for the same item", async () => { + const { streamBody } = makeStreamBody([ + sseEvent("response.output_text.delta", { + item_id: "msg-1", + delta: "Hello", + }), + sseEvent("response.output_text.delta", { + item_id: "msg-1", + delta: " world", + }), + sseEvent("response.output_item.done", { + item: { + type: "message", + id: "msg-1", + status: "completed", + role: "assistant", + content: [{ type: "output_text", text: "Hello world" }], + }, + }), + sseEvent("response.completed", {}), + ]); + const adapter = new SupervisorApiAdapter({ + streamBody, + model: "databricks-claude-sonnet-4", + }); + const events = await collect( + adapter.run(createInput(), { executeTool: vi.fn() }), + ); + expect(events).toEqual([ + { type: "status", status: "running" }, + { type: "message_delta", content: "Hello" }, + { type: "message_delta", content: " world" }, + { type: "status", status: "complete" }, + ]); + }); + + test("emits status:error when the underlying streamBody throws", async () => { + const streamBody = vi + .fn() + .mockRejectedValue(new Error("Supervisor API error (500): boom")); + const adapter = new SupervisorApiAdapter({ + streamBody, + model: "databricks-claude-sonnet-4", + }); + const events = await collect( + adapter.run(createInput(), { executeTool: vi.fn() }), + ); + expect(events).toContainEqual({ + type: "status", + status: "error", + error: "Supervisor API error: Supervisor API error (500): boom", + }); + }); + + test("short-circuits when the signal is already aborted", async () => { + const streamBody = vi.fn(); + const adapter = new SupervisorApiAdapter({ + streamBody, + model: "databricks-claude-sonnet-4", + }); + + const controller = new AbortController(); + controller.abort(); + + const events = await collect( + adapter.run(createInput(), { + executeTool: vi.fn(), + signal: controller.signal, + }), + ); + + expect(events).toEqual([]); + expect(streamBody).not.toHaveBeenCalled(); + }); + + test("multi-turn input (user + assistant + user) is sent as a structured array", async () => { + const { streamBody, lastBody } = makeStreamBody([ + sseEvent("response.completed", {}), + ]); + const adapter = new SupervisorApiAdapter({ + streamBody, + model: "databricks-claude-sonnet-4", + }); + + await collect( + adapter.run( + { + messages: [ + { id: "u1", role: "user", content: "Hi", createdAt: new Date() }, + { + id: "a", + role: "assistant", + content: "Hello!", + createdAt: new Date(), + }, + { + id: "u2", + role: "user", + content: "Tell me more", + createdAt: new Date(), + }, + ], + tools: [], + threadId: "t", + }, + { executeTool: vi.fn() }, + ), + ); + + expect(lastBody()?.input).toEqual([ + { role: "user", content: "Hi" }, + { role: "assistant", content: "Hello!" }, + { role: "user", content: "Tell me more" }, + ]); + }); + + test("drops tool-role messages from the request payload", async () => { + const { streamBody, lastBody } = makeStreamBody([ + sseEvent("response.completed", {}), + ]); + const adapter = new SupervisorApiAdapter({ + streamBody, + model: "databricks-claude-sonnet-4", + }); + await collect( + adapter.run( + { + messages: [ + { id: "u", role: "user", content: "Hi", createdAt: new Date() }, + { + id: "t1", + role: "tool", + content: "(genie result)", + createdAt: new Date(), + }, + ], + tools: [], + threadId: "t", + }, + { executeTool: vi.fn() }, + ), + ); + expect(lastBody()?.input).toBe("Hi"); + }); + + test("recovers final assistant text from response.completed.output when no deltas streamed", async () => { + // Real-world flake: SA occasionally finishes a turn with zero + // `output_text.delta` events and no `output_item.done` for the message, + // but still mirrors the full assistant text in `response.completed`. + // Without recovery the UI sees a silent empty turn. + const { streamBody } = makeStreamBody([ + sseEvent("response.created", {}), + sseEvent("response.in_progress", {}), + sseEvent("response.completed", { + response: { + status: "completed", + output: [ + { + type: "message", + id: "msg-x", + role: "assistant", + content: [ + { type: "output_text", text: "Recovered " }, + { type: "output_text", text: "answer." }, + ], + }, + ], + }, + }), + ]); + + const adapter = new SupervisorApiAdapter({ + streamBody, + model: "databricks-claude-sonnet-4", + }); + const events = await collect( + adapter.run(createInput(), { executeTool: vi.fn() }), + ); + + expect(events).toEqual([ + { type: "status", status: "running" }, + { type: "message_delta", content: "Recovered answer." }, + { type: "status", status: "complete" }, + ]); + }); + + test("does not recover from response.completed when deltas already streamed", async () => { + const { streamBody } = makeStreamBody([ + sseEvent("response.output_text.delta", { + item_id: "msg-x", + delta: "Hi", + }), + sseEvent("response.completed", { + response: { + status: "completed", + output: [ + { + type: "message", + id: "msg-x", + role: "assistant", + content: [{ type: "output_text", text: "Hi" }], + }, + ], + }, + }), + ]); + const adapter = new SupervisorApiAdapter({ + streamBody, + model: "databricks-claude-sonnet-4", + }); + const events = await collect( + adapter.run(createInput(), { executeTool: vi.fn() }), + ); + const deltas = events.filter((e) => e.type === "message_delta"); + expect(deltas).toHaveLength(1); + expect(deltas[0]).toEqual({ type: "message_delta", content: "Hi" }); + }); + + test("treats response.failed as terminal: no events follow the error", async () => { + // SA may keep sending events after `response.failed` (and even a stray + // `response.completed`). The adapter must stop yielding once it has + // surfaced a terminal `status: error` so consumers don't see contradictory + // `message_delta`/`complete` events after the failure. + const { streamBody } = makeStreamBody([ + sseEvent("response.failed", {}), + sseEvent("response.output_text.delta", { delta: "ignored" }), + sseEvent("response.completed", {}), + ]); + const adapter = new SupervisorApiAdapter({ + streamBody, + model: "databricks-claude-sonnet-4", + }); + const events = await collect( + adapter.run(createInput(), { executeTool: vi.fn() }), + ); + expect(events).toEqual([ + { type: "status", status: "running" }, + { type: "status", status: "error", error: "Response failed" }, + ]); + }); + + test("does not yield complete when the consumer aborts mid-stream", async () => { + // Stream that yields one delta, then waits forever — the consumer aborts + // after the first event arrives. The adapter must NOT subsequently yield + // a synthesised `complete` from a buffered `response.completed`. + const controller = new AbortController(); + const encoder = new TextEncoder(); + const stream = new ReadableStream({ + start(c) { + c.enqueue( + encoder.encode( + sseEvent("response.output_text.delta", { delta: "Hi" }), + ), + ); + }, + pull() { + return new Promise(() => { + /* never resolves until cancel() */ + }); + }, + }); + + const adapter = new SupervisorApiAdapter({ + streamBody: async () => stream, + model: "databricks-claude-sonnet-4", + }); + + const events: AgentEvent[] = []; + for await (const e of adapter.run(createInput(), { + executeTool: vi.fn(), + signal: controller.signal, + })) { + events.push(e); + if (e.type === "message_delta") controller.abort(); + } + + expect(events).toEqual([ + { type: "status", status: "running" }, + { type: "message_delta", content: "Hi" }, + ]); + }); + + test("recovers when event: and data: lines arrive in separate chunks", async () => { + const { streamBody } = makeStreamBody([ + "event: response.output_text.delta\n", + `data: ${JSON.stringify({ delta: "split" })}\n\n`, + "event: response.completed\ndata: {}\n\n", + ]); + + const adapter = new SupervisorApiAdapter({ + streamBody, + model: "databricks-claude-sonnet-4", + }); + const events = await collect( + adapter.run(createInput(), { executeTool: vi.fn() }), + ); + expect(events).toContainEqual({ + type: "message_delta", + content: "split", + }); + expect(events).toContainEqual({ type: "status", status: "complete" }); + }); +}); + +describe("fromSupervisorApi", () => { + test("calls ensureResolved on the supplied workspace client", async () => { + const ensureResolved = vi.fn(async () => {}); + const adapter = await fromSupervisorApi({ + model: "databricks-claude-sonnet-4", + workspaceClient: { + config: { ensureResolved }, + apiClient: { request: vi.fn() }, + }, + }); + expect(ensureResolved).toHaveBeenCalledTimes(1); + expect(adapter).toBeInstanceOf(SupervisorApiAdapter); + }); + + test("routes streaming through apiClient.request with the SA path", async () => { + const encoder = new TextEncoder(); + const contents = new ReadableStream({ + start(controller) { + controller.enqueue(encoder.encode(sseEvent("response.completed", {}))); + controller.close(); + }, + }); + const request = vi.fn().mockResolvedValue({ contents }); + + const adapter = await fromSupervisorApi({ + model: "databricks-claude-sonnet-4", + workspaceClient: { + config: { ensureResolved: vi.fn(async () => {}) }, + apiClient: { request }, + }, + }); + + await collect(adapter.run(createInput(), { executeTool: vi.fn() })); + + expect(request).toHaveBeenCalledTimes(1); + const [requestArgs] = request.mock.calls[0]; + expect(requestArgs.path).toBe("/ai-gateway/mlflow/v1/responses"); + expect(requestArgs.method).toBe("POST"); + expect(requestArgs.raw).toBe(true); + expect(requestArgs.payload).toMatchObject({ + model: "databricks-claude-sonnet-4", + input: "Hello", + stream: true, + }); + expect(requestArgs.payload).not.toHaveProperty("tools"); + }); +}); diff --git a/packages/appkit/src/beta.ts b/packages/appkit/src/beta.ts index 3f5bba80c..7ccc77c5b 100644 --- a/packages/appkit/src/beta.ts +++ b/packages/appkit/src/beta.ts @@ -19,6 +19,16 @@ export type { ToolProvider, } from "shared"; export { DatabricksAdapter, parseTextToolCalls } from "./agents/databricks"; +export type { + SupervisorApiAdapterCtorOptions, + SupervisorApiAdapterOptions, + SupervisorTool, +} from "./agents/supervisor-api"; +export { + fromSupervisorApi, + SupervisorApiAdapter, + supervisorTools, +} from "./agents/supervisor-api"; // Agent runtime export { createAgent } from "./core/agent/create-agent"; diff --git a/packages/appkit/src/connectors/serving/client.ts b/packages/appkit/src/connectors/serving/client.ts index 83f065e69..f75993a39 100644 --- a/packages/appkit/src/connectors/serving/client.ts +++ b/packages/appkit/src/connectors/serving/client.ts @@ -41,6 +41,20 @@ function cancellationTokenFromAbortSignal( }; } +/** + * Structural shape of a Databricks SDK client we need for the low-level + * `apiClient.request` call. Lets `streamPath` be reused by adapters that + * don't want a hard dependency on the concrete `WorkspaceClient` type. + */ +export interface ApiClientLike { + apiClient: { + request( + options: Record, + context?: unknown, + ): Promise; + }; +} + /** * Invokes a serving endpoint using the SDK's high-level query API. * Returns a typed QueryEndpointResponse. @@ -62,22 +76,23 @@ export async function invoke( } /** - * Returns the raw SSE byte stream from a serving endpoint. - * No parsing is performed — bytes are passed through as-is. + * POSTs `body` as JSON to an arbitrary workspace API path and returns the raw + * SSE byte stream. No parsing is performed — bytes are passed through as-is. + * + * Uses the SDK's low-level `apiClient.request({ raw: true })` so callers + * inherit URL resolution, the SDK credential chain (PAT/OAuth/OIDC), and + * any future retries/telemetry baked into the SDK transport. * - * Uses the SDK's low-level `apiClient.request({ raw: true })` because - * the high-level `servingEndpoints.query()` returns `Promise` - * and does not support SSE streaming. + * When `signal` is provided it is bridged to the SDK's `Context` / + * `CancellationToken` so aborts cancel the outbound HTTP request. */ -export async function stream( - client: WorkspaceClient, - endpointName: string, +export async function streamPath( + client: ApiClientLike, + path: string, body: Record, signal?: AbortSignal, ): Promise> { - const { stream: _stream, ...cleanBody } = body; - - logger.debug("Streaming from endpoint %s", endpointName); + logger.debug("Streaming from path %s", path); const context = signal ? new Context({ @@ -87,17 +102,17 @@ export async function stream( const response = (await client.apiClient.request( { - path: `/serving-endpoints/${encodeURIComponent(endpointName)}/invocations`, + path, method: "POST", headers: new Headers({ "Content-Type": "application/json", Accept: "text/event-stream", }), - payload: { ...cleanBody, stream: true }, + payload: body, raw: true, }, context, - )) as { contents: ReadableStream }; + )) as { contents: ReadableStream | null }; if (!response.contents) { throw new Error("Response body is null — streaming not supported"); @@ -105,3 +120,23 @@ export async function stream( return response.contents; } + +/** + * Returns the raw SSE byte stream from a serving endpoint. Thin wrapper over + * {@link streamPath} that handles serving-specific URL encoding and forces + * `stream: true` in the payload. + */ +export async function stream( + client: WorkspaceClient, + endpointName: string, + body: Record, + signal?: AbortSignal, +): Promise> { + const { stream: _stream, ...cleanBody } = body; + return streamPath( + client as unknown as ApiClientLike, + `/serving-endpoints/${encodeURIComponent(endpointName)}/invocations`, + { ...cleanBody, stream: true }, + signal, + ); +} diff --git a/packages/appkit/src/stream/index.ts b/packages/appkit/src/stream/index.ts index cc756130a..75ad8b5c4 100644 --- a/packages/appkit/src/stream/index.ts +++ b/packages/appkit/src/stream/index.ts @@ -1 +1,2 @@ +export { readSseEvents } from "./sse-reader"; export { StreamManager } from "./stream-manager"; diff --git a/packages/appkit/src/stream/sse-reader.ts b/packages/appkit/src/stream/sse-reader.ts new file mode 100644 index 000000000..091f132dc --- /dev/null +++ b/packages/appkit/src/stream/sse-reader.ts @@ -0,0 +1,114 @@ +/** + * One parsed Server-Sent Event. Field names follow the spec: + * https://html.spec.whatwg.org/multipage/server-sent-events.html + * + * The reader does not interpret `data` (no JSON parsing), so callers control + * the wire shape they expect. + */ +export interface SseEvent { + /** Value of the most recent `event:` field, or `""` for an unnamed event. */ + event: string; + /** Joined `data:` lines for the event (empty string when no data was set). */ + data: string; + /** Value of the most recent `id:` field, or `undefined` if none. */ + id?: string; +} + +/** + * Async-iterates Server-Sent Events from a UTF-8 byte stream. + * + * Block-oriented parser: events are delimited by blank lines (`\n\n` after + * CRLF normalization), so an `event:` line in chunk N pairs correctly with a + * `data:` line in chunk N+1 — no hoisted state needed. + * + * The reader passes through the sentinel string `[DONE]` as `event=""`, + * `data="[DONE]"`. Callers that care about it should match `data === "[DONE]"` + * after destructuring. + * + * Terminates when the stream closes or `signal` aborts; releases the reader + * lock in either case. + */ +export async function* readSseEvents( + stream: ReadableStream, + signal?: AbortSignal, +): AsyncGenerator { + const reader = stream.getReader(); + const decoder = new TextDecoder(); + let buffer = ""; + + // Cancel the reader on abort so an in-flight `reader.read()` returns + // immediately instead of waiting for the next chunk. Without this, an + // aborted consumer would only notice between reads — fine for chatty + // streams, but unbounded for an idle/heartbeat-less upstream. + const onAbort = () => { + reader.cancel().catch(() => { + // `cancel()` rejects if the stream is already errored/closed; ignore. + }); + }; + if (signal) { + if (signal.aborted) onAbort(); + else signal.addEventListener("abort", onAbort, { once: true }); + } + + try { + while (true) { + if (signal?.aborted) break; + const { done, value } = await reader.read(); + if (done) { + const tail = parseSseBlock(buffer); + if (tail) yield tail; + break; + } + + buffer += decoder.decode(value, { stream: true }); + + const normalized = buffer.replace(/\r\n/g, "\n"); + const blocks = normalized.split("\n\n"); + // Last entry is either an incomplete block or "" (when the chunk ended + // exactly on a boundary). Either way, keep it for the next iteration. + buffer = blocks.pop() ?? ""; + + for (const block of blocks) { + const event = parseSseBlock(block); + if (event) yield event; + } + } + } finally { + if (signal) signal.removeEventListener("abort", onAbort); + reader.releaseLock(); + } +} + +function parseSseBlock(block: string): SseEvent | null { + if (block.length === 0) return null; + const lines = block.split("\n"); + + let eventName = ""; + let id: string | undefined; + const dataLines: string[] = []; + + for (const rawLine of lines) { + const line = rawLine.replace(/\r$/, ""); + if (line === "" || line.startsWith(":")) continue; + + if (line.startsWith("event:")) { + eventName = line.slice(6).trimStart(); + } else if (line.startsWith("data:")) { + dataLines.push(line.slice(5).trimStart()); + } else if (line.startsWith("id:")) { + id = line.slice(3).trimStart(); + } + // Other fields (`retry:`, custom) are ignored by design. + } + + // Per the SSE spec, a block is only dispatched when the data buffer is + // non-empty. Blocks containing only `event:`/`id:` (or comments) do not + // surface as events. + if (dataLines.length === 0) return null; + + return { + event: eventName, + data: dataLines.join("\n"), + id, + }; +} diff --git a/packages/appkit/src/stream/tests/sse-reader.test.ts b/packages/appkit/src/stream/tests/sse-reader.test.ts new file mode 100644 index 000000000..6f7176b62 --- /dev/null +++ b/packages/appkit/src/stream/tests/sse-reader.test.ts @@ -0,0 +1,182 @@ +import { describe, expect, test } from "vitest"; +import { readSseEvents, type SseEvent } from "../sse-reader"; + +function streamOf(chunks: string[]): ReadableStream { + const encoder = new TextEncoder(); + let i = 0; + return new ReadableStream({ + pull(controller) { + if (i < chunks.length) { + controller.enqueue(encoder.encode(chunks[i])); + i++; + } else { + controller.close(); + } + }, + }); +} + +async function collect( + gen: AsyncGenerator, +): Promise { + const out: SseEvent[] = []; + for await (const e of gen) out.push(e); + return out; +} + +describe("readSseEvents", () => { + test("parses a single named event with JSON data", async () => { + const events = await collect( + readSseEvents( + streamOf(['event: response.completed\ndata: {"ok":true}\n\n']), + ), + ); + expect(events).toEqual([ + { event: "response.completed", data: '{"ok":true}', id: undefined }, + ]); + }); + + test("pairs event: and data: across chunk boundaries", async () => { + const events = await collect( + readSseEvents( + streamOf([ + "event: response.output_text.delta\n", + 'data: {"delta":"split"}\n\n', + ]), + ), + ); + expect(events).toEqual([ + { + event: "response.output_text.delta", + data: '{"delta":"split"}', + id: undefined, + }, + ]); + }); + + test("ignores blank lines, comment lines, and unknown fields", async () => { + const events = await collect( + readSseEvents( + streamOf([": heartbeat\n\nretry: 1000\nevent: ping\ndata: hi\n\n"]), + ), + ); + expect(events).toEqual([{ event: "ping", data: "hi", id: undefined }]); + }); + + test("captures id: when present", async () => { + const events = await collect( + readSseEvents(streamOf(["id: abc-123\nevent: ping\ndata: hi\n\n"])), + ); + expect(events).toEqual([{ event: "ping", data: "hi", id: "abc-123" }]); + }); + + test("falls back to empty event name when only data: is present", async () => { + const events = await collect(readSseEvents(streamOf(["data: 1\n\n"]))); + expect(events).toEqual([{ event: "", data: "1", id: undefined }]); + }); + + test("joins multi-line data: payloads with \\n", async () => { + const events = await collect( + readSseEvents(streamOf(["data: line1\ndata: line2\n\n"])), + ); + expect(events).toEqual([ + { event: "", data: "line1\nline2", id: undefined }, + ]); + }); + + test("normalises CRLF line endings", async () => { + const events = await collect( + readSseEvents(streamOf(["event: x\r\ndata: y\r\n\r\n"])), + ); + expect(events).toEqual([{ event: "x", data: "y", id: undefined }]); + }); + + test("emits a trailing event when the stream closes without a final blank line", async () => { + const events = await collect( + readSseEvents(streamOf(["event: ping\ndata: hi"])), + ); + expect(events).toEqual([{ event: "ping", data: "hi", id: undefined }]); + }); + + test("passes through [DONE] sentinels as data", async () => { + const events = await collect(readSseEvents(streamOf(["data: [DONE]\n\n"]))); + expect(events).toEqual([{ event: "", data: "[DONE]", id: undefined }]); + }); + + test("aborts when the signal fires before the next read", async () => { + const controller = new AbortController(); + let pulls = 0; + const stream = new ReadableStream({ + pull(c) { + pulls++; + if (pulls === 1) { + c.enqueue(new TextEncoder().encode("event: a\ndata: 1\n\n")); + } else { + controller.abort(); + c.enqueue(new TextEncoder().encode("event: b\ndata: 2\n\n")); + } + }, + }); + + const out: SseEvent[] = []; + for await (const e of readSseEvents(stream, controller.signal)) { + out.push(e); + if (out.length === 1) controller.abort(); + } + expect(out.map((e) => e.event)).toEqual(["a"]); + }); + + test("aborts an idle reader immediately via reader.cancel()", async () => { + // Stream that sends one event then never resolves further reads — models + // an upstream that has stopped sending data. Without `reader.cancel()` + // the consumer would block forever after aborting. + const controller = new AbortController(); + let cancelled = false; + const stream = new ReadableStream({ + start(c) { + c.enqueue(new TextEncoder().encode("event: a\ndata: 1\n\n")); + }, + pull() { + return new Promise(() => { + /* never resolves */ + }); + }, + cancel() { + cancelled = true; + }, + }); + + const out: SseEvent[] = []; + const iterator = readSseEvents(stream, controller.signal); + const first = await iterator.next(); + if (!first.done) out.push(first.value); + controller.abort(); + const second = await iterator.next(); + expect(second.done).toBe(true); + expect(out.map((e) => e.event)).toEqual(["a"]); + expect(cancelled).toBe(true); + }); + + test("does not dispatch a block whose only field is id: (spec compliance)", async () => { + const events = await collect( + readSseEvents(streamOf(["id: only\n\nevent: ping\ndata: hi\n\n"])), + ); + expect(events).toEqual([{ event: "ping", data: "hi", id: undefined }]); + }); + + test("decodes a multi-byte UTF-8 character split across chunks", async () => { + const checkBytes = new TextEncoder().encode("✓"); + expect(checkBytes.length).toBe(3); + const stream = new ReadableStream({ + start(c) { + c.enqueue(new TextEncoder().encode("data: ")); + c.enqueue(checkBytes.subarray(0, 1)); + c.enqueue(checkBytes.subarray(1)); + c.enqueue(new TextEncoder().encode("\n\n")); + c.close(); + }, + }); + const events = await collect(readSseEvents(stream)); + expect(events).toEqual([{ event: "", data: "✓", id: undefined }]); + }); +}); From 6514dc03ddc368a631ada0a01911a8135ad510ac Mon Sep 17 00:00:00 2001 From: Hubert Zub Date: Fri, 22 May 2026 09:45:15 +0200 Subject: [PATCH 2/3] fix(appkit): address PR #345 review findings (section 9) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apply Mario's defensive/correctness fixes from the supervisor API adapter review without touching the public API shape (sections 1-8 will land in a stacked branch). Highlights: High - Route the three SSE error-leak sites in supervisor-api.ts (streamBody catch, mapEvent "error", output_item.done with id="error") through a single emitError helper that returns a stable client-facing code (`Supervisor API error (transport|upstream_failed|upstream_tool| upstream_unknown)`) and logs the verbose detail server-side only. Addresses CWE-209 verbatim-upstream-error-text leak. Medium - Gate the terminal {status:"complete"} emission on lastCompleted.status / .error / .incomplete_details so a `response.completed` with a nested failed status no longer silently succeeds; surface as upstream_failed instead. Regression tests added. - Skip the terminal error in the streamBody catch when signal.aborted — consumer-initiated aborts now end with a clean stop, not a contradictory terminal error event. Regression test added. - Tighten the output_item.done error match: require item.type === "error" (or pair the reserved id="error" with a non-message type) so a stray assistant message with id="error" is not mis-classified. - Add maxLineChars / maxBufferChars caps to readSseEvents with 1 MiB / 8 MiB defaults; throw on overflow. Addresses CWE-770. Tests added. - Docs: add a CWE-1427 callout warning that hosted-tool `description` is a prompt-injection sink — do not derive it from untrusted input. - Redact the no-delta warning log: summariseErrorPayload extracts a short `type: message` line; full payload only via DEBUG. Addresses CWE-532. - Gate the buffer-level CRLF normalize in sse-reader on `\r` presence to skip the regex on LF-only steady state. Low - mapEvent("error") fallback no longer wraps "Unknown error" with literal JSON quotes (uses string branch). - Drop the misleading "we await the factory at module init" comment in dev-playground; the code never awaits. - Fix @example imports in supervisor-api.ts JSDoc to use @databricks/appkit/beta (the actual public re-export). - Replace trimStart() with single-U+0020 strip in sse-reader per the SSE spec; remove the now-dead per-line `\r$` strip after the buffer-level CRLF normalise. - Flag streamPath as @internal in connectors/serving/client.ts noting the CWE-918 SSRF risk if it ever leaks to user-controlled input. - Add JSDoc warning to workspaceClient on SupervisorApiAdapterOptions: passing a per-request OBO client would leak identity across requests (CWE-664). Signed-off-by: Hubert Zub --- apps/dev-playground/server/index.ts | 8 +- docs/docs/plugins/agents.md | 6 + packages/appkit/src/agents/supervisor-api.ts | 141 ++++++++++--- .../src/agents/tests/supervisor-api.test.ts | 185 ++++++++++++++++-- .../appkit/src/connectors/serving/client.ts | 9 + packages/appkit/src/stream/sse-reader.ts | 77 +++++++- .../src/stream/tests/sse-reader.test.ts | 53 +++++ 7 files changed, 430 insertions(+), 49 deletions(-) diff --git a/apps/dev-playground/server/index.ts b/apps/dev-playground/server/index.ts index 67187dcbe..53f6cade8 100644 --- a/apps/dev-playground/server/index.ts +++ b/apps/dev-playground/server/index.ts @@ -78,10 +78,10 @@ const helper = createAgent({ // Uncomment a `supervisorTools.*` entry (and import 'supervisorTools' from // '@databricks/appkit/beta') to give the model real powers. // -// We `await` the factory at module init so a misconfigured workspace -// (missing host, bad credentials) fails fast with a clear error here -// instead of as an unhandled rejection. Top-level await is fine in this -// ESM module. +// `createAgent({ model })` accepts an adapter promise, so the factory's +// host/credential resolution is awaited lazily on first dispatch (via +// `resolveAdapter` in the agents plugin). A misconfigured workspace will +// surface at first chat request, not at module init. const supervisor = createAgent({ instructions: "You are an assistant powered by the Databricks Supervisor API.", diff --git a/docs/docs/plugins/agents.md b/docs/docs/plugins/agents.md index c228551e2..f7a9548a8 100644 --- a/docs/docs/plugins/agents.md +++ b/docs/docs/plugins/agents.md @@ -269,6 +269,12 @@ const model = fromSupervisorApi({ `description` is **required and non-empty** — the LLM uses it to route between tools, so two Genie spaces both labelled "Genie space" will be indistinguishable. +:::warning Hosted-tool descriptions are trusted application configuration (CWE-1427) +A hosted tool's `description` is read by the LLM to decide when to route to that tool. **Do not derive it from untrusted input** — user messages, request bodies, freeform fields from external systems, or any value an attacker could influence. Treat `description` (and `id`/`name`) as application-controlled, alongside the agent's `instructions`. Allowing a user-controlled string here is a prompt-injection sink: a hostile description can convince the model to route to (or away from) a tool for any future request handled by the agent. + +The same caution applies to MCP `description`s and to any other field the model reads at routing time. +::: + | Factory | Tool kind | Identifier | |---|---|---| | `supervisorTools.genieSpace(id, description)` | Genie space | space id | diff --git a/packages/appkit/src/agents/supervisor-api.ts b/packages/appkit/src/agents/supervisor-api.ts index 228eb8be9..4e7edbb83 100644 --- a/packages/appkit/src/agents/supervisor-api.ts +++ b/packages/appkit/src/agents/supervisor-api.ts @@ -12,6 +12,59 @@ import { readSseEvents } from "../stream"; const logger = createLogger("agents:supervisor-api"); +/** + * Stable client-facing error codes. We never surface raw upstream error + * strings to the client (CWE-209) — the helper logs the verbose detail + * server-side and returns one of these codes in the {@link AgentEvent}. + */ +type SupervisorErrorCode = + | "transport" + | "upstream_failed" + | "upstream_tool" + | "upstream_unknown"; + +/** + * Single sink for all error events emitted by the adapter. Logs the verbose + * detail (stack, upstream payload, etc.) at `warn` level and returns a + * sanitised {@link AgentEvent} carrying only a stable code so the client + * never sees raw upstream text. + */ +function emitError(code: SupervisorErrorCode, detail: unknown): AgentEvent { + logger.warn("supervisor-api error code=%s detail=%O", code, detail); + return { + type: "status", + status: "error", + error: `Supervisor API error (${code})`, + }; +} + +/** + * Renders an upstream error / incomplete_details payload as a short + * single-line string for log lines. Avoids dumping the full JSON tree + * (CWE-532): we keep the discriminator (`type`/`code`) plus a trimmed + * message, and that's it. Full payloads are still available via + * `DEBUG=appkit:agents:supervisor-api`. + */ +function summariseErrorPayload(payload: unknown): string { + if (payload == null) return ""; + if (typeof payload === "string") { + return payload.length > 80 ? `${payload.slice(0, 80)}…` : payload; + } + if (typeof payload !== "object") return String(payload); + const obj = payload as Record; + const kind = + (typeof obj.type === "string" && obj.type) || + (typeof obj.code === "string" && obj.code) || + (typeof obj.reason === "string" && obj.reason) || + "object"; + const message = + (typeof obj.message === "string" && obj.message) || + (typeof obj.detail === "string" && obj.detail) || + ""; + const trimmed = message.length > 80 ? `${message.slice(0, 80)}…` : message; + return trimmed ? `${kind}: ${trimmed}` : kind; +} + /** * Transport shim: given a request body, returns the raw SSE byte stream from * the Supervisor API endpoint. Injected at construction time so callers can @@ -135,6 +188,12 @@ export interface SupervisorApiAdapterOptions { * and per-request authentication. When omitted, a `WorkspaceClient({})` * is created internally using the default SDK credential chain * (`DATABRICKS_HOST`, OAuth, PAT, etc.). + * + * ⚠ The `workspaceClient` is captured at construction and reused across + * every request. Passing a per-request OBO (On-Behalf-Of) client here + * would silently leak the first request's identity into all subsequent + * requests served by this adapter instance. Use the default credential + * chain or pass a service-principal client. (CWE-664) */ workspaceClient?: WorkspaceClientLike; } @@ -168,11 +227,12 @@ export interface SupervisorApiAdapterCtorOptions { * * @example * ```ts - * import { createApp, createAgent, agents } from "@databricks/appkit"; + * import { createApp, createAgent } from "@databricks/appkit"; * import { + * agents, * fromSupervisorApi, * supervisorTools, - * } from "@databricks/appkit/agents/supervisor-api"; + * } from "@databricks/appkit/beta"; * * const adapter = await fromSupervisorApi({ * model: "databricks-claude-sonnet-4", @@ -254,13 +314,11 @@ export class SupervisorApiAdapter implements AgentAdapter { try { stream = await this.streamBody(body, signal); } catch (err) { - const message = err instanceof Error ? err.message : String(err); - logger.warn("Supervisor API request failed: %s", message); - yield { - type: "status", - status: "error", - error: `Supervisor API error: ${message}`, - }; + // Aborts surface as exceptions thrown by `fetch`/SDK transports when + // the consumer cancels mid-request. Treat as a clean stop so consumers + // don't see a contradictory terminal `error` after their own abort. + if (signal?.aborted) return; + yield emitError("transport", err); return; } @@ -360,6 +418,22 @@ export class SupervisorApiAdapter implements AgentAdapter { } if (eventCounts.has("response.completed")) { + // SA sometimes signals a failed turn via `response.completed` with a + // nested `status: "failed"` (or a populated `error`/`incomplete_details`) + // rather than emitting `response.failed`. Without this gate the + // adapter would silently yield `complete` on a server-side failure. + if ( + lastCompleted?.status === "failed" || + lastCompleted?.error != null || + lastCompleted?.incomplete_details != null + ) { + yield emitError("upstream_failed", { + status: lastCompleted?.status, + error: lastCompleted?.error, + incomplete_details: lastCompleted?.incomplete_details, + }); + return; + } yield { type: "status", status: "complete" }; } @@ -367,19 +441,18 @@ export class SupervisorApiAdapter implements AgentAdapter { const histogram = [...eventCounts.entries()] .map(([t, n]) => `${t}=${n}`) .join(", "); - const completedError = lastCompleted?.error - ? JSON.stringify(lastCompleted.error) - : undefined; - const completedIncomplete = lastCompleted?.incomplete_details - ? JSON.stringify(lastCompleted.incomplete_details) - : undefined; logger.warn( "Supervisor API stream completed without any output_text deltas. " + "events={%s} completed.status=%s completed.error=%s completed.incomplete=%s", histogram, lastCompleted?.status ?? "", - completedError ?? "", - completedIncomplete ?? "", + summariseErrorPayload(lastCompleted?.error), + summariseErrorPayload(lastCompleted?.incomplete_details), + ); + logger.debug( + "Supervisor API no-delta full payload: error=%O incomplete=%O", + lastCompleted?.error, + lastCompleted?.incomplete_details, ); } } @@ -476,14 +549,20 @@ function mapEvent( // the stream produced none, then emits `{status:"complete"}` itself. case "response.failed": - return { type: "status", status: "error", error: "Response failed" }; + return emitError("upstream_failed", data); case "error": { - const errMsg = + // Branch detail extraction so a missing `error` field doesn't surface + // the JSON-stringified literal `'"Unknown error"'` (with quotes) in + // server logs. The client never sees this string — `emitError` + // sanitises it to a stable code. + const detail = typeof data.error === "string" ? data.error - : JSON.stringify(data.error ?? "Unknown error"); - return { type: "status", status: "error", error: errMsg }; + : data.error == null + ? "Unknown error" + : data.error; + return emitError("upstream_unknown", detail); } case "response.output_item.done": { @@ -495,9 +574,15 @@ function mapEvent( } | undefined; - if (item?.id === "error") { - const errText = item.content?.[0]?.text ?? "Unknown tool error from SA"; - return { type: "status", status: "error", error: errText }; + // SA's contract reserves `item.id === "error"` for tool failures, but + // a 5-char identifier collision is too small a margin. Require either + // an explicit `type === "error"` or pair the reserved id with a + // non-message type (a normal assistant message uses `type: "message"`). + if ( + item?.type === "error" || + (item?.id === "error" && item?.type !== "message") + ) { + return emitError("upstream_tool", item); } // Fallback: when SA produces a tool-driven response (e.g. Genie space), @@ -541,7 +626,7 @@ function mapEvent( * import { * fromSupervisorApi, * supervisorTools, - * } from "@databricks/appkit/agents/supervisor-api"; + * } from "@databricks/appkit/beta"; * * const adapter = await fromSupervisorApi({ * model: "databricks-claude-sonnet-4", @@ -553,6 +638,12 @@ function mapEvent( * ], * }); * ``` + * + * @remarks + * ⚠ When passing your own `workspaceClient`, see the warning on + * {@link SupervisorApiAdapterOptions.workspaceClient} — the client is + * captured once and reused, so per-request OBO clients would leak + * identity across requests. */ export async function fromSupervisorApi( options: SupervisorApiAdapterOptions, diff --git a/packages/appkit/src/agents/tests/supervisor-api.test.ts b/packages/appkit/src/agents/tests/supervisor-api.test.ts index 9606b1c6a..9877808e4 100644 --- a/packages/appkit/src/agents/tests/supervisor-api.test.ts +++ b/packages/appkit/src/agents/tests/supervisor-api.test.ts @@ -221,8 +221,15 @@ describe("SupervisorApiAdapter", () => { ]); }); - test("maps response.failed to a status:error event", async () => { - const { streamBody } = makeStreamBody([sseEvent("response.failed", {})]); + test("maps response.failed to a sanitised status:error event", async () => { + // The verbose upstream payload must NOT reach the client (CWE-209) — + // only the stable `upstream_failed` code does. Server logs still keep + // the full detail via logger.warn. + const { streamBody } = makeStreamBody([ + sseEvent("response.failed", { + response: { error: { message: "secret-internal-stack-trace" } }, + }), + ]); const adapter = new SupervisorApiAdapter({ streamBody, model: "databricks-claude-sonnet-4", @@ -233,13 +240,19 @@ describe("SupervisorApiAdapter", () => { expect(events).toContainEqual({ type: "status", status: "error", - error: "Response failed", + error: "Supervisor API error (upstream_failed)", }); + // Belt-and-braces: the leaky upstream string is never in the event. + for (const e of events) { + if (e.type === "status" && "error" in e) { + expect(e.error).not.toContain("secret-internal-stack-trace"); + } + } }); - test("maps top-level error events", async () => { + test("maps top-level error events to sanitised upstream_unknown code", async () => { const { streamBody } = makeStreamBody([ - sseEvent("error", { error: "rate limited" }), + sseEvent("error", { error: "rate limited (workspace abc-123)" }), ]); const adapter = new SupervisorApiAdapter({ streamBody, @@ -251,16 +264,22 @@ describe("SupervisorApiAdapter", () => { expect(events).toContainEqual({ type: "status", status: "error", - error: "rate limited", + error: "Supervisor API error (upstream_unknown)", }); + for (const e of events) { + if (e.type === "status" && "error" in e) { + expect(e.error).not.toContain("workspace abc-123"); + } + } }); - test("maps response.output_item.done with id:'error' to status:error", async () => { + test("maps response.output_item.done error item to sanitised upstream_tool code", async () => { const { streamBody } = makeStreamBody([ sseEvent("response.output_item.done", { item: { id: "error", - content: [{ text: "Tool execution failed" }], + type: "error", + content: [{ text: "Tool stack trace with /home/user paths" }], }, }), sseEvent("response.completed", {}), @@ -275,8 +294,43 @@ describe("SupervisorApiAdapter", () => { expect(events).toContainEqual({ type: "status", status: "error", - error: "Tool execution failed", + error: "Supervisor API error (upstream_tool)", + }); + for (const e of events) { + if (e.type === "status" && "error" in e) { + expect(e.error).not.toContain("/home/user"); + } + } + }); + + test("does NOT treat output_item.done id:'error' as error when type:'message' (collision guard)", async () => { + // SA reserves `id === "error"` for tool failures, but the 5-char id + // could collide with a legitimately-id'd assistant message. The guard + // requires `type === "error"` (or a non-message type alongside the + // reserved id) so a stray message with id="error" is not mis-classified. + const { streamBody } = makeStreamBody([ + sseEvent("response.output_item.done", { + item: { + id: "error", + type: "message", + role: "assistant", + content: [{ type: "output_text", text: "hello from error-id msg" }], + }, + }), + sseEvent("response.completed", {}), + ]); + const adapter = new SupervisorApiAdapter({ + streamBody, + model: "databricks-claude-sonnet-4", }); + const events = await collect( + adapter.run(createInput(), { executeTool: vi.fn() }), + ); + expect(events).toEqual([ + { type: "status", status: "running" }, + { type: "message_delta", content: "hello from error-id msg" }, + { type: "status", status: "complete" }, + ]); }); test("falls back to output_item.done text when no deltas streamed (tool-driven SA response)", async () => { @@ -345,10 +399,14 @@ describe("SupervisorApiAdapter", () => { ]); }); - test("emits status:error when the underlying streamBody throws", async () => { + test("emits sanitised transport error when the underlying streamBody throws", async () => { const streamBody = vi .fn() - .mockRejectedValue(new Error("Supervisor API error (500): boom")); + .mockRejectedValue( + new Error( + "HTTP 500 from https://workspace-internal.foo: stack trace ...", + ), + ); const adapter = new SupervisorApiAdapter({ streamBody, model: "databricks-claude-sonnet-4", @@ -359,8 +417,45 @@ describe("SupervisorApiAdapter", () => { expect(events).toContainEqual({ type: "status", status: "error", - error: "Supervisor API error: Supervisor API error (500): boom", + error: "Supervisor API error (transport)", }); + for (const e of events) { + if (e.type === "status" && "error" in e) { + expect(e.error).not.toContain("workspace-internal.foo"); + expect(e.error).not.toContain("stack trace"); + } + } + }); + + test("does NOT emit a terminal error when the consumer aborts before streamBody resolves", async () => { + // Regression: previously the streamBody catch yielded a sanitised + // `{status:"error"}` even when the underlying rejection was an abort + // triggered by the consumer. Consumers that issued the abort must see + // a clean stop (zero further events after their abort), not a + // contradictory terminal error. + const controller = new AbortController(); + const streamBody = vi.fn(async (_body, signal?: AbortSignal) => { + controller.abort(); + // Simulate the SDK transport rejecting because the signal aborted. + // The catch path must observe `signal.aborted` and return silently. + throw new DOMException( + signal?.aborted ? "aborted" : "fetch failed", + "AbortError", + ); + }); + + const adapter = new SupervisorApiAdapter({ + streamBody, + model: "databricks-claude-sonnet-4", + }); + const events = await collect( + adapter.run(createInput(), { + executeTool: vi.fn(), + signal: controller.signal, + }), + ); + + expect(events).toEqual([{ type: "status", status: "running" }]); }); test("short-circuits when the signal is already aborted", async () => { @@ -546,8 +641,72 @@ describe("SupervisorApiAdapter", () => { ); expect(events).toEqual([ { type: "status", status: "running" }, - { type: "status", status: "error", error: "Response failed" }, + { + type: "status", + status: "error", + error: "Supervisor API error (upstream_failed)", + }, + ]); + }); + + test("does NOT yield complete when response.completed carries status:'failed'", async () => { + // Regression for the silent-success-on-failure bug: SA occasionally + // reports a failed turn via `response.completed.status = "failed"` + // (with optional `error`/`incomplete_details`) rather than emitting + // `response.failed`. The adapter must surface this as a terminal + // error and NOT yield `{status:"complete"}`. + const { streamBody } = makeStreamBody([ + sseEvent("response.completed", { + response: { + status: "failed", + error: { message: "tool timeout" }, + }, + }), + ]); + const adapter = new SupervisorApiAdapter({ + streamBody, + model: "databricks-claude-sonnet-4", + }); + const events = await collect( + adapter.run(createInput(), { executeTool: vi.fn() }), + ); + expect(events).toEqual([ + { type: "status", status: "running" }, + { + type: "status", + status: "error", + error: "Supervisor API error (upstream_failed)", + }, + ]); + }); + + test("does NOT yield complete when response.completed carries a populated error", async () => { + // Variant: status reported as "completed" but `error` is non-null. + // Treat as a terminal failure rather than silently completing. + const { streamBody } = makeStreamBody([ + sseEvent("response.completed", { + response: { + status: "completed", + error: { code: "internal" }, + }, + }), ]); + const adapter = new SupervisorApiAdapter({ + streamBody, + model: "databricks-claude-sonnet-4", + }); + const events = await collect( + adapter.run(createInput(), { executeTool: vi.fn() }), + ); + expect(events).toContainEqual({ + type: "status", + status: "error", + error: "Supervisor API error (upstream_failed)", + }); + expect(events).not.toContainEqual({ + type: "status", + status: "complete", + }); }); test("does not yield complete when the consumer aborts mid-stream", async () => { diff --git a/packages/appkit/src/connectors/serving/client.ts b/packages/appkit/src/connectors/serving/client.ts index f75993a39..3c556da79 100644 --- a/packages/appkit/src/connectors/serving/client.ts +++ b/packages/appkit/src/connectors/serving/client.ts @@ -85,6 +85,15 @@ export async function invoke( * * When `signal` is provided it is bridged to the SDK's `Context` / * `CancellationToken` so aborts cancel the outbound HTTP request. + * + * @internal + * + * Not part of the public AppKit surface. `path` is passed through to the + * SDK without any allowlist — exposing this to user-controlled input would + * turn it into workspace-credentialled SSRF (CWE-918). Internal callers + * must hard-code the path (or build it from a closed enum). New callers + * inside the package: keep this constraint, and do not re-export from + * `beta.ts` or any other entry point. */ export async function streamPath( client: ApiClientLike, diff --git a/packages/appkit/src/stream/sse-reader.ts b/packages/appkit/src/stream/sse-reader.ts index 091f132dc..f80f0738e 100644 --- a/packages/appkit/src/stream/sse-reader.ts +++ b/packages/appkit/src/stream/sse-reader.ts @@ -14,6 +14,33 @@ export interface SseEvent { id?: string; } +/** + * Configuration for {@link readSseEvents}. All limits are in UTF-16 code + * units (JS string `.length`) and exist as a DoS guard (CWE-770) for + * untrusted upstreams that might stream arbitrarily large lines or never + * emit a block terminator. + */ +interface ReadSseEventsOptions { + /** + * Maximum length of any single SSE event block (i.e. the text between + * two `\n\n` separators). Exceeding this throws. + * + * @default 1 MiB (1_048_576) + */ + maxLineChars?: number; + /** + * Maximum length of the rolling input buffer when no block terminator + * has been seen yet. Exceeding this throws — protects against an + * upstream that streams indefinitely without ever sending `\n\n`. + * + * @default 8 MiB (8_388_608) + */ + maxBufferChars?: number; +} + +const DEFAULT_MAX_SSE_LINE_CHARS = 1024 * 1024; +const DEFAULT_MAX_SSE_BUFFER_CHARS = 8 * 1024 * 1024; + /** * Async-iterates Server-Sent Events from a UTF-8 byte stream. * @@ -26,12 +53,18 @@ export interface SseEvent { * after destructuring. * * Terminates when the stream closes or `signal` aborts; releases the reader - * lock in either case. + * lock in either case. Throws when {@link ReadSseEventsOptions.maxLineChars} + * or {@link ReadSseEventsOptions.maxBufferChars} are exceeded. */ export async function* readSseEvents( stream: ReadableStream, signal?: AbortSignal, + options?: ReadSseEventsOptions, ): AsyncGenerator { + const maxLineChars = options?.maxLineChars ?? DEFAULT_MAX_SSE_LINE_CHARS; + const maxBufferChars = + options?.maxBufferChars ?? DEFAULT_MAX_SSE_BUFFER_CHARS; + const reader = stream.getReader(); const decoder = new TextDecoder(); let buffer = ""; @@ -55,6 +88,11 @@ export async function* readSseEvents( if (signal?.aborted) break; const { done, value } = await reader.read(); if (done) { + if (buffer.length > maxLineChars) { + throw new Error( + `readSseEvents: trailing SSE block exceeds maxLineChars (${maxLineChars} UTF-16 code units)`, + ); + } const tail = parseSseBlock(buffer); if (tail) yield tail; break; @@ -62,13 +100,27 @@ export async function* readSseEvents( buffer += decoder.decode(value, { stream: true }); - const normalized = buffer.replace(/\r\n/g, "\n"); + // Gate the CRLF normalize on `\r` presence — saves a full-buffer + // regex scan on every chunk for the common LF-only steady state. + const normalized = + buffer.indexOf("\r") !== -1 ? buffer.replace(/\r\n/g, "\n") : buffer; const blocks = normalized.split("\n\n"); // Last entry is either an incomplete block or "" (when the chunk ended // exactly on a boundary). Either way, keep it for the next iteration. buffer = blocks.pop() ?? ""; + if (buffer.length > maxBufferChars) { + throw new Error( + `readSseEvents: incomplete SSE block exceeds maxBufferChars (${maxBufferChars} UTF-16 code units) without a terminator`, + ); + } + for (const block of blocks) { + if (block.length > maxLineChars) { + throw new Error( + `readSseEvents: SSE block exceeds maxLineChars (${maxLineChars} UTF-16 code units)`, + ); + } const event = parseSseBlock(block); if (event) yield event; } @@ -79,24 +131,35 @@ export async function* readSseEvents( } } +/** + * Per the SSE spec, only a single leading `U+0020` is stripped from a field + * value — not arbitrary whitespace. `trimStart()` would also strip tabs, + * NBSP, etc.; for callers that feed binary or whitespace-prefixed payloads + * this is a footgun. + */ +function stripOneLeadingSpace(s: string): string { + return s.startsWith(" ") ? s.slice(1) : s; +} + function parseSseBlock(block: string): SseEvent | null { if (block.length === 0) return null; + // CRLF was already normalised at the buffer level, so each `line` here is + // already free of trailing `\r` — no per-line strip needed. const lines = block.split("\n"); let eventName = ""; let id: string | undefined; const dataLines: string[] = []; - for (const rawLine of lines) { - const line = rawLine.replace(/\r$/, ""); + for (const line of lines) { if (line === "" || line.startsWith(":")) continue; if (line.startsWith("event:")) { - eventName = line.slice(6).trimStart(); + eventName = stripOneLeadingSpace(line.slice(6)); } else if (line.startsWith("data:")) { - dataLines.push(line.slice(5).trimStart()); + dataLines.push(stripOneLeadingSpace(line.slice(5))); } else if (line.startsWith("id:")) { - id = line.slice(3).trimStart(); + id = stripOneLeadingSpace(line.slice(3)); } // Other fields (`retry:`, custom) are ignored by design. } diff --git a/packages/appkit/src/stream/tests/sse-reader.test.ts b/packages/appkit/src/stream/tests/sse-reader.test.ts index 6f7176b62..d83ba26f2 100644 --- a/packages/appkit/src/stream/tests/sse-reader.test.ts +++ b/packages/appkit/src/stream/tests/sse-reader.test.ts @@ -179,4 +179,57 @@ describe("readSseEvents", () => { const events = await collect(readSseEvents(stream)); expect(events).toEqual([{ event: "", data: "✓", id: undefined }]); }); + + test("throws when a single block exceeds maxLineChars (DoS guard)", async () => { + // A complete block whose total length exceeds the cap must throw rather + // than silently propagate to the consumer — protects callers from + // upstreams that stream arbitrarily large payloads (CWE-770). + const huge = `data: ${"x".repeat(200)}\n\n`; + await expect(async () => { + for await (const _ of readSseEvents(streamOf([huge]), undefined, { + maxLineChars: 100, + })) { + /* iterate */ + } + }).rejects.toThrow(/exceeds maxLineChars/); + }); + + test("throws when the rolling buffer exceeds maxBufferChars without a terminator", async () => { + // An upstream that streams forever without ever sending the `\n\n` + // block separator must not grow the buffer unboundedly — throw once + // the cap is exceeded. + const stream = new ReadableStream({ + pull(c) { + c.enqueue(new TextEncoder().encode("x".repeat(50))); + // No close() — keep feeding until the cap fires. + }, + }); + await expect(async () => { + for await (const _ of readSseEvents(stream, undefined, { + maxBufferChars: 200, + maxLineChars: 10_000, + })) { + /* iterate */ + } + }).rejects.toThrow(/exceeds maxBufferChars/); + }); + + test("strips only a single leading U+0020 from field values (spec compliance)", async () => { + // `trimStart()` would strip tabs / NBSP / multi-space prefixes, which + // is wrong per the SSE spec — only one leading U+0020 may be removed. + const events = await collect( + readSseEvents(streamOf(["data: with-leading-spaces\n\n"])), + ); + // First space is stripped; second is preserved. + expect(events).toEqual([ + { event: "", data: " with-leading-spaces", id: undefined }, + ]); + }); + + test("preserves tab-prefixed data values (trimStart would have stripped)", async () => { + const events = await collect( + readSseEvents(streamOf(["data:\t\tvalue\n\n"])), + ); + expect(events).toEqual([{ event: "", data: "\t\tvalue", id: undefined }]); + }); }); From abe6d7627f3dbe77f6edecd7365ea3fffe8ec486 Mon Sep 17 00:00:00 2001 From: Hubert Zub Date: Fri, 22 May 2026 09:59:46 +0200 Subject: [PATCH 3/3] refactor(appkit): restructure supervisor-api adapter per PR #345 review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address structural feedback from Mario Cadenas' review of PR #345 (sections 1-8). Stacked on top of the §9 defensive fixes commit. API changes (BETA surface only): - Add `DatabricksAdapter.fromSupervisorApi` static factory for discoverability alongside `.fromChatCompletions`. - Shrink `SupervisorApiAdapterOptions` to `{ model, workspaceClient? }`; tools no longer live on the adapter. - Hosted tools (`supervisorTools.*`) now return tagged `HostedSupervisorTool` records and accept named options instead of positional args. - Declare hosted tools on the agent's `tools` map (same place as function tools / sub-agents); the agents plugin and `runAgent` route them to the adapter via the new `AgentInput.extensions[SUPERVISOR_EXTENSION_KEY]`. - Add capability-negotiation fields to `AgentAdapter`: `acceptsExtensions?` + `consumesInputTools?`. The agents plugin and `runAgent` warn at registration when adapter capabilities don't match declared tools. Internals: - Extend `ResolvedToolEntry` / `StandaloneEntry` with a `hosted-supervisor` branch; `classifyTool` matches it before MCP hosted-tool rejection so standalone `runAgent` supports supervisor tools. - Defense-in-depth: both indexers throw if a `hosted-supervisor` entry is ever dispatched as a callable function. - `DatabricksAdapter.fromSupervisorApi` uses a dynamic import to avoid load-time cycles. Docs: - Rewrite supervisor-API section in docs/plugins/agents.md for the new shape. - Add cross-adapter sub-agent composition note (one-directional: chat parents can call supervisor children, not vice-versa, until SA's function-call events are routed back through `context.executeTool`). Playground: - Update dev-playground supervisor agent to the new shape (`DatabricksAdapter.fromSupervisorApi(...)` + tools on `createAgent`). Tests: - Rewrite supervisor-api.test.ts factory + adapter tests for the new shape. - Add `isSupervisorTool` and `DatabricksAdapter.fromSupervisorApi` tests. - New regression tests in `run-agent.test.ts` covering the hosted-supervisor extension-routing path and both capability-mismatch warnings. - New agents-plugin tests covering the same warning paths and the new `hosted-supervisor` tool-index branch. Signed-off-by: Hubert Zub --- apps/dev-playground/server/index.ts | 34 +- .../api/appkit/Class.DatabricksAdapter.md | 37 +++ .../api/appkit/Class.SupervisorApiAdapter.md | 141 +++++++++ .../api/appkit/Function.fromSupervisorApi.md | 73 +++++ .../api/appkit/Function.isSupervisorTool.md | 20 ++ .../docs/api/appkit/Interface.AgentAdapter.md | 29 ++ docs/docs/api/appkit/Interface.AgentInput.md | 17 + .../appkit/Interface.HostedSupervisorTool.md | 24 ++ ...terface.SupervisorApiAdapterCtorOptions.md | 17 + .../Interface.SupervisorApiAdapterOptions.md | 31 ++ .../appkit/Interface.SupervisorExtension.md | 13 + docs/docs/api/appkit/TypeAlias.AgentTool.md | 8 +- .../api/appkit/TypeAlias.ResolvedToolEntry.md | 153 +++++++++ .../api/appkit/TypeAlias.SupervisorTool.md | 49 +++ .../Variable.SUPERVISOR_EXTENSION_KEY.md | 10 + .../api/appkit/Variable.supervisorTools.md | 178 +++++++++++ docs/docs/api/appkit/index.md | 12 +- docs/docs/api/appkit/typedoc-sidebar.ts | 50 +++ docs/docs/plugins/agents.md | 103 ++++-- packages/appkit/src/agents/databricks.ts | 26 ++ packages/appkit/src/agents/supervisor-api.ts | 292 ++++++++++++++---- .../src/agents/tests/supervisor-api.test.ts | 251 ++++++++++++--- packages/appkit/src/beta.ts | 4 + packages/appkit/src/core/agent/run-agent.ts | 129 +++++++- .../src/core/agent/tests/run-agent.test.ts | 115 +++++++ packages/appkit/src/core/agent/types.ts | 26 +- packages/appkit/src/plugins/agents/agents.ts | 140 ++++++++- .../agents/tests/agents-plugin.test.ts | 124 ++++++++ packages/shared/src/agent.ts | 30 ++ 29 files changed, 1971 insertions(+), 165 deletions(-) create mode 100644 docs/docs/api/appkit/Class.SupervisorApiAdapter.md create mode 100644 docs/docs/api/appkit/Function.fromSupervisorApi.md create mode 100644 docs/docs/api/appkit/Function.isSupervisorTool.md create mode 100644 docs/docs/api/appkit/Interface.HostedSupervisorTool.md create mode 100644 docs/docs/api/appkit/Interface.SupervisorApiAdapterCtorOptions.md create mode 100644 docs/docs/api/appkit/Interface.SupervisorApiAdapterOptions.md create mode 100644 docs/docs/api/appkit/Interface.SupervisorExtension.md create mode 100644 docs/docs/api/appkit/TypeAlias.SupervisorTool.md create mode 100644 docs/docs/api/appkit/Variable.SUPERVISOR_EXTENSION_KEY.md create mode 100644 docs/docs/api/appkit/Variable.supervisorTools.md diff --git a/apps/dev-playground/server/index.ts b/apps/dev-playground/server/index.ts index 53f6cade8..358b35b4d 100644 --- a/apps/dev-playground/server/index.ts +++ b/apps/dev-playground/server/index.ts @@ -14,7 +14,7 @@ import { import { agents, createAgent, - fromSupervisorApi, + DatabricksAdapter, tool, } from "@databricks/appkit/beta"; import { WorkspaceClient } from "@databricks/sdk-experimental"; @@ -73,10 +73,12 @@ const helper = createAgent({ }, }); -// Supervisor API demo agent. Tools are configured on the adapter (the SA -// endpoint executes them server-side), not on the createAgent definition. -// Uncomment a `supervisorTools.*` entry (and import 'supervisorTools' from -// '@databricks/appkit/beta') to give the model real powers. +// Supervisor API demo agent. The Databricks AI Gateway executes hosted +// tools server-side; declare them via `createAgent({ tools })` like any +// other agent tool — the agents plugin classifies the tagged record and +// routes it to the adapter via AgentInput.extensions. Import +// `supervisorTools` from '@databricks/appkit/beta' and uncomment an +// entry below to give the model real powers. // // `createAgent({ model })` accepts an adapter promise, so the factory's // host/credential resolution is awaited lazily on first dispatch (via @@ -85,18 +87,18 @@ const helper = createAgent({ const supervisor = createAgent({ instructions: "You are an assistant powered by the Databricks Supervisor API.", - model: fromSupervisorApi({ + model: DatabricksAdapter.fromSupervisorApi({ model: "databricks-claude-sonnet-4-5", - tools: [ - // supervisorTools.genieSpace( - // "01ABCDEF12345678", - // "NYC taxi trip records and zones", - // ), - // supervisorTools.ucFunction( - // "main.default.add", - // "Adds two integers and returns the sum.", - // ), - ], + }), + tools: () => ({ + // nyc: supervisorTools.genieSpace({ + // id: "01ABCDEF12345678", + // description: "NYC taxi trip records and zones", + // }), + // add: supervisorTools.ucFunction({ + // name: "main.default.add", + // description: "Adds two integers and returns the sum.", + // }), }), }); diff --git a/docs/docs/api/appkit/Class.DatabricksAdapter.md b/docs/docs/api/appkit/Class.DatabricksAdapter.md index ba4a8a187..8492aa875 100644 --- a/docs/docs/api/appkit/Class.DatabricksAdapter.md +++ b/docs/docs/api/appkit/Class.DatabricksAdapter.md @@ -150,3 +150,40 @@ serving surface — no bespoke `fetch()` + `authenticate()` plumbing. #### Returns `Promise`\<`DatabricksAdapter`\> + +*** + +### fromSupervisorApi() + +```ts +static fromSupervisorApi(options: SupervisorApiAdapterOptions): Promise; +``` + +Discoverability shim for the Supervisor API adapter. Returns a +import("./supervisor-api").SupervisorApiAdapter, NOT a +DatabricksAdapter — the two are separate classes (different +wire formats, different lifecycle). Surfaced here so application +developers see a single `DatabricksAdapter.from*` autocomplete root. + +Dynamic-imports `./supervisor-api` to avoid forming a load-time cycle: +both files share `connectors/serving/client.ts`. + +#### Parameters + +| Parameter | Type | +| ------ | ------ | +| `options` | [`SupervisorApiAdapterOptions`](Interface.SupervisorApiAdapterOptions.md) | + +#### Returns + +`Promise`\<[`SupervisorApiAdapter`](Class.SupervisorApiAdapter.md)\> + +#### Example + +```ts +import { DatabricksAdapter } from "@databricks/appkit/beta"; + +const model = await DatabricksAdapter.fromSupervisorApi({ + model: "databricks-claude-sonnet-4-5", +}); +``` diff --git a/docs/docs/api/appkit/Class.SupervisorApiAdapter.md b/docs/docs/api/appkit/Class.SupervisorApiAdapter.md new file mode 100644 index 000000000..e1721b51b --- /dev/null +++ b/docs/docs/api/appkit/Class.SupervisorApiAdapter.md @@ -0,0 +1,141 @@ +# Class: SupervisorApiAdapter + +Adapter that calls the Databricks AI Gateway Responses API +(`/ai-gateway/mlflow/v1/responses`). + +Streams SSE events in the OpenAI Responses API wire format and maps them +to the AppKit `AgentEvent` protocol. Tool execution is handled +server-side, so the adapter ignores the agents-plugin tool index. + +Authentication is handled via the Databricks SDK credential chain — the +same mechanism used by `DatabricksAdapter.fromModelServing`. The transport +is injected via [SupervisorApiAdapterCtorOptions.streamBody](Interface.SupervisorApiAdapterCtorOptions.md#streambody); the +[fromSupervisorApi](Function.fromSupervisorApi.md) factory wires it through the SDK's +`apiClient.request({ raw: true })`. + +Set `DEBUG=appkit:agents:supervisor-api` to log the outbound request +shape (model, instructions length, input shape, tool count) and to be +notified when the recovery path engages (no incremental deltas, text +pulled from `response.completed.output[]`). The no-delta warning includes +a per-turn event-type histogram and the SA-reported status/error/ +incomplete_details, so it's already actionable without DEBUG. + +Tools are not configured on the adapter. Declare them via +`createAgent({ tools: () => ({ key: supervisorTools.genieSpace({...}) }) })` +(or markdown frontmatter referencing an ambient `supervisorTools.*` entry); +the agents plugin / standalone `runAgent` aggregates hosted-supervisor +entries and routes them to the adapter via +`AgentInput.extensions[SUPERVISOR_EXTENSION_KEY]`. Advanced callers +invoking `adapter.run(...)` directly populate that key themselves. + +## Example + +```ts +import { createApp, createAgent } from "@databricks/appkit"; +import { + agents, + DatabricksAdapter, + supervisorTools, +} from "@databricks/appkit/beta"; + +await createApp({ + plugins: [ + agents({ + agents: { + assistant: createAgent({ + instructions: "You are a helpful assistant.", + model: DatabricksAdapter.fromSupervisorApi({ + model: "databricks-claude-sonnet-4", + }), + tools: () => ({ + nyc: supervisorTools.genieSpace({ + id: "01ABCDEF12345678", + description: "NYC taxi trip records and zones", + }), + }), + }), + }, + }), + ], +}); +``` + +## Implements + +- [`AgentAdapter`](Interface.AgentAdapter.md) + +## Constructors + +### Constructor + +```ts +new SupervisorApiAdapter(options: SupervisorApiAdapterCtorOptions): SupervisorApiAdapter; +``` + +#### Parameters + +| Parameter | Type | +| ------ | ------ | +| `options` | [`SupervisorApiAdapterCtorOptions`](Interface.SupervisorApiAdapterCtorOptions.md) | + +#### Returns + +`SupervisorApiAdapter` + +## Properties + +### acceptsExtensions + +```ts +readonly acceptsExtensions: readonly ["databricks.supervisor"]; +``` + +Capability negotiation: the adapter reads its hosted-tool payload +from [AgentInput.extensions](Interface.AgentInput.md#extensions) under [SUPERVISOR\_EXTENSION\_KEY](Variable.SUPERVISOR_EXTENSION_KEY.md). +The agents plugin uses this list to warn at registration when the tool +index produces extensions the adapter wouldn't consume. + +#### Implementation of + +[`AgentAdapter`](Interface.AgentAdapter.md).[`acceptsExtensions`](Interface.AgentAdapter.md#acceptsextensions) + +*** + +### consumesInputTools + +```ts +readonly consumesInputTools: false = false; +``` + +Capability negotiation: the adapter does not consume `input.tools`. +Tool execution is owned by the Databricks AI Gateway server-side, so +any function tools or local sub-agents declared on this agent would +be silently dropped — the agents plugin warns at registration when +that combination is detected. + +#### Implementation of + +[`AgentAdapter`](Interface.AgentAdapter.md).[`consumesInputTools`](Interface.AgentAdapter.md#consumesinputtools) + +## Methods + +### run() + +```ts +run(input: AgentInput, context: AgentRunContext): AsyncGenerator; +``` + +#### Parameters + +| Parameter | Type | +| ------ | ------ | +| `input` | [`AgentInput`](Interface.AgentInput.md) | +| `context` | [`AgentRunContext`](Interface.AgentRunContext.md) | + +#### Returns + +`AsyncGenerator`\<[`AgentEvent`](TypeAlias.AgentEvent.md), `void`, `unknown`\> + +#### Implementation of + +[`AgentAdapter`](Interface.AgentAdapter.md).[`run`](Interface.AgentAdapter.md#run) diff --git a/docs/docs/api/appkit/Function.fromSupervisorApi.md b/docs/docs/api/appkit/Function.fromSupervisorApi.md new file mode 100644 index 000000000..cb54e7b60 --- /dev/null +++ b/docs/docs/api/appkit/Function.fromSupervisorApi.md @@ -0,0 +1,73 @@ +# Function: fromSupervisorApi() + +```ts +function fromSupervisorApi(options: SupervisorApiAdapterOptions): Promise; +``` + +Creates an [AgentAdapter](Interface.AgentAdapter.md) backed by the Databricks AI Gateway +Responses API (`/ai-gateway/mlflow/v1/responses`). + +Uses the SDK's default credential chain for auth (reads DATABRICKS_HOST, +DATABRICKS_TOKEN, OAuth config, etc.). Tools are declared on the agent +(via `createAgent({ tools })`), not on this factory. + +Application code should prefer the +[DatabricksAdapter.fromSupervisorApi](Class.DatabricksAdapter.md#fromsupervisorapi) static — it delegates here +and keeps a single `DatabricksAdapter.from*` autocomplete root for all +Databricks-backed adapters. This free function is the implementation +behind the static and remains exported for callers that want to import +it directly without pulling in [DatabricksAdapter](Class.DatabricksAdapter.md). + +## Parameters + +| Parameter | Type | +| ------ | ------ | +| `options` | [`SupervisorApiAdapterOptions`](Interface.SupervisorApiAdapterOptions.md) | + +## Returns + +`Promise`\<[`SupervisorApiAdapter`](Class.SupervisorApiAdapter.md)\> + +## Example + +```ts +import { createApp, createAgent } from "@databricks/appkit"; +import { + agents, + DatabricksAdapter, + supervisorTools, +} from "@databricks/appkit/beta"; + +await createApp({ + plugins: [ + agents({ + agents: { + assistant: createAgent({ + instructions: "You are a helpful assistant.", + model: DatabricksAdapter.fromSupervisorApi({ + model: "databricks-claude-sonnet-4", + }), + tools: () => ({ + nyc: supervisorTools.genieSpace({ + id: "01ABCDEF12345678", + description: "NYC taxi trip records and zones", + }), + }), + }), + }, + }), + ], +}); +``` + +## Remarks + +⚠ When passing your own `workspaceClient`, see the warning on +[SupervisorApiAdapterOptions.workspaceClient](Interface.SupervisorApiAdapterOptions.md#workspaceclient) — the client is +captured once and reused, so per-request OBO clients would leak +identity across requests. + +## See + +[DatabricksAdapter.fromSupervisorApi](Class.DatabricksAdapter.md#fromsupervisorapi) — the recommended +application-facing entry point. diff --git a/docs/docs/api/appkit/Function.isSupervisorTool.md b/docs/docs/api/appkit/Function.isSupervisorTool.md new file mode 100644 index 000000000..e34696a71 --- /dev/null +++ b/docs/docs/api/appkit/Function.isSupervisorTool.md @@ -0,0 +1,20 @@ +# Function: isSupervisorTool() + +```ts +function isSupervisorTool(value: unknown): value is HostedSupervisorTool; +``` + +Type guard for [HostedSupervisorTool](Interface.HostedSupervisorTool.md). Used by the agents plugin +(`buildToolIndex`) and standalone `runAgent` (`classifyTool`) to route +supervisor-hosted tools to the extensions payload rather than the +adapter's `tools` array. + +## Parameters + +| Parameter | Type | +| ------ | ------ | +| `value` | `unknown` | + +## Returns + +`value is HostedSupervisorTool` diff --git a/docs/docs/api/appkit/Interface.AgentAdapter.md b/docs/docs/api/appkit/Interface.AgentAdapter.md index 52083157e..7e9f6ef22 100644 --- a/docs/docs/api/appkit/Interface.AgentAdapter.md +++ b/docs/docs/api/appkit/Interface.AgentAdapter.md @@ -1,5 +1,34 @@ # Interface: AgentAdapter +## Properties + +### acceptsExtensions? + +```ts +readonly optional acceptsExtensions: readonly string[]; +``` + +Extension keys this adapter consumes from [AgentInput.extensions](Interface.AgentInput.md#extensions). +The agents plugin (and standalone `runAgent`) warns at registration +if the tool index produces extensions whose keys aren't listed here. + +Adapters that don't read extensions can omit this field. + +*** + +### consumesInputTools? + +```ts +readonly optional consumesInputTools: boolean; +``` + +Whether the adapter consumes tools from `input.tools`. Defaults to +true. Adapters whose tool execution happens elsewhere (e.g. the +Supervisor API, where SA owns the tool loop server-side) declare +false; the agents plugin warns at registration if the agent declares +function tools or local sub-agents alongside such an adapter, since +those tools would never reach the model. + ## Methods ### run() diff --git a/docs/docs/api/appkit/Interface.AgentInput.md b/docs/docs/api/appkit/Interface.AgentInput.md index 6d2eff8b0..17102bb87 100644 --- a/docs/docs/api/appkit/Interface.AgentInput.md +++ b/docs/docs/api/appkit/Interface.AgentInput.md @@ -2,6 +2,23 @@ ## Properties +### extensions? + +```ts +optional extensions: Readonly>; +``` + +Adapter-specific opaque payloads, keyed by adapter namespace. The +shared contract intentionally does not enumerate keys — see each +adapter's docs for which keys it reads and the shape of each value. + +The agents plugin and standalone `runAgent` populate this from the +agent's tool index when entries declare an adapter-side spec (e.g. +Supervisor API hosted tools). Adapters that don't read extensions +should leave it untouched. + +*** + ### messages ```ts diff --git a/docs/docs/api/appkit/Interface.HostedSupervisorTool.md b/docs/docs/api/appkit/Interface.HostedSupervisorTool.md new file mode 100644 index 000000000..2f1b45d25 --- /dev/null +++ b/docs/docs/api/appkit/Interface.HostedSupervisorTool.md @@ -0,0 +1,24 @@ +# Interface: HostedSupervisorTool + +Tagged record returned by every [supervisorTools](Variable.supervisorTools.md) factory. The +`__kind` discriminator lets the agents plugin (and standalone +`runAgent`) classify these tools without a structural match against the +wire format — keeps the SA wire shape free to evolve and avoids +namespace collisions with MCP hosted tools (which use `type: "genie-space"` +hyphenated, vs SA's `type: "genie_space"` underscored). + +## Properties + +### \_\_kind + +```ts +readonly __kind: "hosted-supervisor"; +``` + +*** + +### spec + +```ts +readonly spec: SupervisorTool; +``` diff --git a/docs/docs/api/appkit/Interface.SupervisorApiAdapterCtorOptions.md b/docs/docs/api/appkit/Interface.SupervisorApiAdapterCtorOptions.md new file mode 100644 index 000000000..6e332d67c --- /dev/null +++ b/docs/docs/api/appkit/Interface.SupervisorApiAdapterCtorOptions.md @@ -0,0 +1,17 @@ +# Interface: SupervisorApiAdapterCtorOptions + +## Properties + +### model + +```ts +model: string; +``` + +*** + +### streamBody + +```ts +streamBody: StreamBody; +``` diff --git a/docs/docs/api/appkit/Interface.SupervisorApiAdapterOptions.md b/docs/docs/api/appkit/Interface.SupervisorApiAdapterOptions.md new file mode 100644 index 000000000..d5884d872 --- /dev/null +++ b/docs/docs/api/appkit/Interface.SupervisorApiAdapterOptions.md @@ -0,0 +1,31 @@ +# Interface: SupervisorApiAdapterOptions + +## Properties + +### model + +```ts +model: string; +``` + +Model identifier to pass in the request body +(e.g. "databricks-claude-sonnet-4"). + +*** + +### workspaceClient? + +```ts +optional workspaceClient: WorkspaceClientLike; +``` + +A WorkspaceClient (or structural equivalent) used for host resolution +and per-request authentication. When omitted, a `WorkspaceClient({})` +is created internally using the default SDK credential chain +(`DATABRICKS_HOST`, OAuth, PAT, etc.). + +⚠ The `workspaceClient` is captured at construction and reused across +every request. Passing a per-request OBO (On-Behalf-Of) client here +would silently leak the first request's identity into all subsequent +requests served by this adapter instance. Use the default credential +chain or pass a service-principal client. (CWE-664) diff --git a/docs/docs/api/appkit/Interface.SupervisorExtension.md b/docs/docs/api/appkit/Interface.SupervisorExtension.md new file mode 100644 index 000000000..28cc6c7cc --- /dev/null +++ b/docs/docs/api/appkit/Interface.SupervisorExtension.md @@ -0,0 +1,13 @@ +# Interface: SupervisorExtension + +Shape of the value at `AgentInput.extensions[SUPERVISOR_EXTENSION_KEY]`. +The agents plugin / `runAgent` build this from the tool index; advanced +callers invoking `adapter.run(...)` directly populate it themselves. + +## Properties + +### hostedTools? + +```ts +optional hostedTools: SupervisorTool[]; +``` diff --git a/docs/docs/api/appkit/TypeAlias.AgentTool.md b/docs/docs/api/appkit/TypeAlias.AgentTool.md index e165cec66..060c457ec 100644 --- a/docs/docs/api/appkit/TypeAlias.AgentTool.md +++ b/docs/docs/api/appkit/TypeAlias.AgentTool.md @@ -4,9 +4,11 @@ type AgentTool = | FunctionTool | HostedTool - | ToolkitEntry; + | ToolkitEntry + | HostedSupervisorTool; ``` Any tool an agent can invoke: inline function tools (`tool()`), hosted MCP -tools (`mcpServer()` / raw hosted), or toolkit references from plugins -(`analytics().toolkit()`). +tools (`mcpServer()` / raw hosted), toolkit references from plugins +(`analytics().toolkit()`), or adapter-hosted Supervisor-API tools +(`supervisorTools.*`). diff --git a/docs/docs/api/appkit/TypeAlias.ResolvedToolEntry.md b/docs/docs/api/appkit/TypeAlias.ResolvedToolEntry.md index e97b3ef97..d03c0afda 100644 --- a/docs/docs/api/appkit/TypeAlias.ResolvedToolEntry.md +++ b/docs/docs/api/appkit/TypeAlias.ResolvedToolEntry.md @@ -22,7 +22,160 @@ type ResolvedToolEntry = agentName: string; def: AgentToolDefinition; source: "subagent"; +} + | { + def: AgentToolDefinition; + source: "hosted-supervisor"; + spec: SupervisorTool; }; ``` Internal tool-index entry after a tool record has been resolved to a dispatchable form. + +## Type Declaration + +```ts +{ + def: AgentToolDefinition; + localName: string; + pluginName: string; + source: "toolkit"; +} +``` + +### def + +```ts +def: AgentToolDefinition; +``` + +### localName + +```ts +localName: string; +``` + +### pluginName + +```ts +pluginName: string; +``` + +### source + +```ts +source: "toolkit"; +``` + +```ts +{ + def: AgentToolDefinition; + functionTool: FunctionTool; + source: "function"; +} +``` + +### def + +```ts +def: AgentToolDefinition; +``` + +### functionTool + +```ts +functionTool: FunctionTool; +``` + +### source + +```ts +source: "function"; +``` + +```ts +{ + def: AgentToolDefinition; + mcpToolName: string; + source: "mcp"; +} +``` + +### def + +```ts +def: AgentToolDefinition; +``` + +### mcpToolName + +```ts +mcpToolName: string; +``` + +### source + +```ts +source: "mcp"; +``` + +```ts +{ + agentName: string; + def: AgentToolDefinition; + source: "subagent"; +} +``` + +### agentName + +```ts +agentName: string; +``` + +### def + +```ts +def: AgentToolDefinition; +``` + +### source + +```ts +source: "subagent"; +``` + +```ts +{ + def: AgentToolDefinition; + source: "hosted-supervisor"; + spec: SupervisorTool; +} +``` + +### def + +```ts +def: AgentToolDefinition; +``` + +### source + +```ts +source: "hosted-supervisor"; +``` + +Adapter-side hosted tool (executed by the model-host, not by the +Node process). Today: Supervisor API hosted tools (Genie spaces, +UC functions, etc.). The `spec` is opaque to the agents plugin — +it routes the entry into `AgentInput.extensions` for the adapter +that declared the matching `acceptsExtensions` key. `def` is a +synthetic placeholder kept so the index has a uniform shape; it +is intentionally NOT included in the `tools` array passed to +`adapter.run()` (those entries are not callable functions). + +### spec + +```ts +spec: SupervisorTool; +``` diff --git a/docs/docs/api/appkit/TypeAlias.SupervisorTool.md b/docs/docs/api/appkit/TypeAlias.SupervisorTool.md new file mode 100644 index 000000000..b4fe497b1 --- /dev/null +++ b/docs/docs/api/appkit/TypeAlias.SupervisorTool.md @@ -0,0 +1,49 @@ +# Type Alias: SupervisorTool + +```ts +type SupervisorTool = + | { + genie_space: { + description: string; + id: string; + }; + type: "genie_space"; +} + | { + type: "uc_function"; + uc_function: { + description: string; + name: string; + }; +} + | { + knowledge_assistant: { + description: string; + knowledge_assistant_id: string; + }; + type: "knowledge_assistant"; +} + | { + app: { + description: string; + name: string; + }; + type: "app"; +} + | { + type: "uc_connection"; + uc_connection: { + description: string; + name: string; + }; +}; +``` + +Tools supported by the Databricks AI Gateway Responses API. The shapes match +the wire format the endpoint expects, so the adapter passes the array +straight into the request body. + +This is an adapter-internal wire type. Application code authors tools via +the [supervisorTools](Variable.supervisorTools.md) factories, which return tagged +[HostedSupervisorTool](Interface.HostedSupervisorTool.md) records — the agents plugin then unwraps +the `.spec` when routing through [AgentInput.extensions](Interface.AgentInput.md#extensions). diff --git a/docs/docs/api/appkit/Variable.SUPERVISOR_EXTENSION_KEY.md b/docs/docs/api/appkit/Variable.SUPERVISOR_EXTENSION_KEY.md new file mode 100644 index 000000000..40d610b56 --- /dev/null +++ b/docs/docs/api/appkit/Variable.SUPERVISOR_EXTENSION_KEY.md @@ -0,0 +1,10 @@ +# Variable: SUPERVISOR\_EXTENSION\_KEY + +```ts +const SUPERVISOR_EXTENSION_KEY: "databricks.supervisor"; +``` + +Namespace key under which the adapter reads its hosted-tool payload +from [AgentInput.extensions](Interface.AgentInput.md#extensions). Exported so the agents plugin and +standalone `runAgent` (the producers) can write under the same key the +adapter reads. diff --git a/docs/docs/api/appkit/Variable.supervisorTools.md b/docs/docs/api/appkit/Variable.supervisorTools.md new file mode 100644 index 000000000..666dbe025 --- /dev/null +++ b/docs/docs/api/appkit/Variable.supervisorTools.md @@ -0,0 +1,178 @@ +# Variable: supervisorTools + +```ts +const supervisorTools: { + app: (__namedParameters: { + description: string; + name: string; + }) => HostedSupervisorTool; + genieSpace: (__namedParameters: { + description: string; + id: string; + }) => HostedSupervisorTool; + knowledgeAssistant: (__namedParameters: { + description: string; + knowledgeAssistantId: string; + }) => HostedSupervisorTool; + ucConnection: (__namedParameters: { + description: string; + name: string; + }) => HostedSupervisorTool; + ucFunction: (__namedParameters: { + description: string; + name: string; + }) => HostedSupervisorTool; +}; +``` + +Concise factories for declaring Supervisor API tools. + +Each factory accepts a single named-options object: routing-critical +strings (`id`, `name`, `description`) get labels at the call site so +"we swapped the args and didn't notice for two weeks" bugs are +impossible. + +`description` is required: SA's protobuf validation rejects `null`/`""`, +AND the LLM running on SA reads this string to decide when to route to +the tool. Two genie spaces both labelled "Genie space" give the model +nothing to discriminate on, so callers always own the routing hint. + +⚠ The `description` is read by the LLM at routing time — it is a +prompt-injection sink. Do **not** derive it from untrusted input (user +messages, request bodies, external systems). Treat it as application +configuration. (CWE-1427) + +## Type Declaration + +### app() + +```ts +app: (__namedParameters: { + description: string; + name: string; +}) => HostedSupervisorTool; +``` + +#### Parameters + +| Parameter | Type | +| ------ | ------ | +| `__namedParameters` | \{ `description`: `string`; `name`: `string`; \} | +| `__namedParameters.description` | `string` | +| `__namedParameters.name` | `string` | + +#### Returns + +[`HostedSupervisorTool`](Interface.HostedSupervisorTool.md) + +### genieSpace() + +```ts +genieSpace: (__namedParameters: { + description: string; + id: string; +}) => HostedSupervisorTool; +``` + +#### Parameters + +| Parameter | Type | +| ------ | ------ | +| `__namedParameters` | \{ `description`: `string`; `id`: `string`; \} | +| `__namedParameters.description` | `string` | +| `__namedParameters.id` | `string` | + +#### Returns + +[`HostedSupervisorTool`](Interface.HostedSupervisorTool.md) + +### knowledgeAssistant() + +```ts +knowledgeAssistant: (__namedParameters: { + description: string; + knowledgeAssistantId: string; +}) => HostedSupervisorTool; +``` + +#### Parameters + +| Parameter | Type | +| ------ | ------ | +| `__namedParameters` | \{ `description`: `string`; `knowledgeAssistantId`: `string`; \} | +| `__namedParameters.description` | `string` | +| `__namedParameters.knowledgeAssistantId` | `string` | + +#### Returns + +[`HostedSupervisorTool`](Interface.HostedSupervisorTool.md) + +### ucConnection() + +```ts +ucConnection: (__namedParameters: { + description: string; + name: string; +}) => HostedSupervisorTool; +``` + +#### Parameters + +| Parameter | Type | +| ------ | ------ | +| `__namedParameters` | \{ `description`: `string`; `name`: `string`; \} | +| `__namedParameters.description` | `string` | +| `__namedParameters.name` | `string` | + +#### Returns + +[`HostedSupervisorTool`](Interface.HostedSupervisorTool.md) + +### ucFunction() + +```ts +ucFunction: (__namedParameters: { + description: string; + name: string; +}) => HostedSupervisorTool; +``` + +#### Parameters + +| Parameter | Type | +| ------ | ------ | +| `__namedParameters` | \{ `description`: `string`; `name`: `string`; \} | +| `__namedParameters.description` | `string` | +| `__namedParameters.name` | `string` | + +#### Returns + +[`HostedSupervisorTool`](Interface.HostedSupervisorTool.md) + +## Example + +```ts +import { createAgent } from "@databricks/appkit"; +import { + agents, + DatabricksAdapter, + supervisorTools, +} from "@databricks/appkit/beta"; + +const assistant = createAgent({ + instructions: "You are a helpful assistant.", + model: DatabricksAdapter.fromSupervisorApi({ + model: "databricks-claude-sonnet-4", + }), + tools: () => ({ + nyc: supervisorTools.genieSpace({ + id: "01ABCDEF12345678", + description: "NYC taxi trip records and zones", + }), + add: supervisorTools.ucFunction({ + name: "main.default.add", + description: "Adds two integers and returns the sum.", + }), + }), +}); +``` diff --git a/docs/docs/api/appkit/index.md b/docs/docs/api/appkit/index.md index 6ac54fa47..e246d4c83 100644 --- a/docs/docs/api/appkit/index.md +++ b/docs/docs/api/appkit/index.md @@ -26,6 +26,7 @@ surface with `@databricks/appkit/beta`. Not meant for application imports. | [PolicyDeniedError](Class.PolicyDeniedError.md) | Thrown when a policy denies an action. | | [ResourceRegistry](Class.ResourceRegistry.md) | Central registry for tracking plugin resource requirements. Deduplication uses type + resourceKey (machine-stable); alias is for display only. | | [ServerError](Class.ServerError.md) | Error thrown when server lifecycle operations fail. Use for server start/stop issues, configuration conflicts, etc. | +| [SupervisorApiAdapter](Class.SupervisorApiAdapter.md) | Adapter that calls the Databricks AI Gateway Responses API (`/ai-gateway/mlflow/v1/responses`). | | [TunnelError](Class.TunnelError.md) | Error thrown when remote tunnel operations fail. Use for tunnel connection issues, message parsing failures, etc. | | [ValidationError](Class.ValidationError.md) | Error thrown when input validation fails. Use for invalid parameters, missing required fields, or type mismatches. | @@ -48,6 +49,7 @@ surface with `@databricks/appkit/beta`. Not meant for application imports. | [FileResource](Interface.FileResource.md) | Describes the file or directory being acted upon. | | [FunctionTool](Interface.FunctionTool.md) | - | | [GenerateDatabaseCredentialRequest](Interface.GenerateDatabaseCredentialRequest.md) | Request parameters for generating database OAuth credentials | +| [HostedSupervisorTool](Interface.HostedSupervisorTool.md) | Tagged record returned by every [supervisorTools](Variable.supervisorTools.md) factory. The `__kind` discriminator lets the agents plugin (and standalone `runAgent`) classify these tools without a structural match against the wire format — keeps the SA wire shape free to evolve and avoids namespace collisions with MCP hosted tools (which use `type: "genie-space"` hyphenated, vs SA's `type: "genie_space"` underscored). | | [IJobsConfig](Interface.IJobsConfig.md) | Configuration for the Jobs plugin. | | [ITelemetry](Interface.ITelemetry.md) | Plugin-facing interface for OpenTelemetry instrumentation. Provides a thin abstraction over OpenTelemetry APIs for plugins. | | [JobAPI](Interface.JobAPI.md) | User-facing API for a single configured job. | @@ -72,6 +74,9 @@ surface with `@databricks/appkit/beta`. Not meant for application imports. | [ServingEndpointEntry](Interface.ServingEndpointEntry.md) | Shape of a single registry entry. | | [ServingEndpointRegistry](Interface.ServingEndpointRegistry.md) | Registry interface for serving endpoint type generation. Empty by default — augmented by the Vite type generator's `.d.ts` output via module augmentation. When populated, provides autocomplete for alias names and typed request/response/chunk per endpoint. | | [StreamExecutionSettings](Interface.StreamExecutionSettings.md) | Execution settings for streaming endpoints. Extends PluginExecutionSettings with SSE stream configuration. | +| [SupervisorApiAdapterCtorOptions](Interface.SupervisorApiAdapterCtorOptions.md) | - | +| [SupervisorApiAdapterOptions](Interface.SupervisorApiAdapterOptions.md) | - | +| [SupervisorExtension](Interface.SupervisorExtension.md) | Shape of the value at `AgentInput.extensions[SUPERVISOR_EXTENSION_KEY]`. The agents plugin / `runAgent` build this from the tool index; advanced callers invoking `adapter.run(...)` directly populate it themselves. | | [TelemetryConfig](Interface.TelemetryConfig.md) | OpenTelemetry configuration for AppKit applications | | [Thread](Interface.Thread.md) | - | | [ThreadStore](Interface.ThreadStore.md) | - | @@ -88,7 +93,7 @@ surface with `@databricks/appkit/beta`. Not meant for application imports. | Type Alias | Description | | ------ | ------ | | [AgentEvent](TypeAlias.AgentEvent.md) | - | -| [AgentTool](TypeAlias.AgentTool.md) | Any tool an agent can invoke: inline function tools (`tool()`), hosted MCP tools (`mcpServer()` / raw hosted), or toolkit references from plugins (`analytics().toolkit()`). | +| [AgentTool](TypeAlias.AgentTool.md) | Any tool an agent can invoke: inline function tools (`tool()`), hosted MCP tools (`mcpServer()` / raw hosted), toolkit references from plugins (`analytics().toolkit()`), or adapter-hosted Supervisor-API tools (`supervisorTools.*`). | | [AgentTools](TypeAlias.AgentTools.md) | Per-agent tool record. String keys map to inline tools, toolkit entries, hosted tools, etc. | | [AgentToolsFn](TypeAlias.AgentToolsFn.md) | Function form of `AgentDefinition.tools`. Receives the typed [Plugins](TypeAlias.Plugins.md) map and returns a tool record. Invoked exactly once at setup (or once per `runAgent` call in standalone mode); the result is cached as the agent's resolved tool record. | | [BaseSystemPromptOption](TypeAlias.BaseSystemPromptOption.md) | - | @@ -105,6 +110,7 @@ surface with `@databricks/appkit/beta`. Not meant for application imports. | [ResolvedToolEntry](TypeAlias.ResolvedToolEntry.md) | Internal tool-index entry after a tool record has been resolved to a dispatchable form. | | [ResourcePermission](TypeAlias.ResourcePermission.md) | Union of all possible permission levels across all resource types. | | [ServingFactory](TypeAlias.ServingFactory.md) | Factory function returned by `AppKit.serving`. | +| [SupervisorTool](TypeAlias.SupervisorTool.md) | Tools supported by the Databricks AI Gateway Responses API. The shapes match the wire format the endpoint expects, so the adapter passes the array straight into the request body. | | [ToolRegistry](TypeAlias.ToolRegistry.md) | - | | [ToPlugin](TypeAlias.ToPlugin.md) | Factory function type returned by `toPlugin()`. Accepts optional config and returns a PluginData tuple. | @@ -115,6 +121,8 @@ surface with `@databricks/appkit/beta`. Not meant for application imports. | [agents](Variable.agents.md) | Plugin factory for the agents plugin. Reads `config/agents/*.md` by default, resolves toolkits/tools from registered plugins, exposes `appkit.agents.*` runtime API and mounts `/invocations`. | | [READ\_ACTIONS](Variable.READ_ACTIONS.md) | Actions that only read data. | | [sql](Variable.sql.md) | SQL helper namespace | +| [SUPERVISOR\_EXTENSION\_KEY](Variable.SUPERVISOR_EXTENSION_KEY.md) | Namespace key under which the adapter reads its hosted-tool payload from [AgentInput.extensions](Interface.AgentInput.md#extensions). Exported so the agents plugin and standalone `runAgent` (the producers) can write under the same key the adapter reads. | +| [supervisorTools](Variable.supervisorTools.md) | Concise factories for declaring Supervisor API tools. | | [WRITE\_ACTIONS](Variable.WRITE_ACTIONS.md) | Actions that mutate data. | ## Functions @@ -132,6 +140,7 @@ surface with `@databricks/appkit/beta`. Not meant for application imports. | [executeFromRegistry](Function.executeFromRegistry.md) | Validates tool-call arguments against the entry's schema and invokes its handler. On validation failure, returns an LLM-friendly error string (matching the behavior of `tool()`) rather than throwing, so the model can self-correct on its next turn. | | [extractServingEndpoints](Function.extractServingEndpoints.md) | Extract serving endpoint config from a server file by AST-parsing it. Looks for `serving({ endpoints: { alias: { env: "..." }, ... } })` calls and extracts the endpoint alias names and their environment variable mappings. | | [findServerFile](Function.findServerFile.md) | Find the server entry file by checking candidate paths in order. | +| [fromSupervisorApi](Function.fromSupervisorApi.md) | Creates an [AgentAdapter](Interface.AgentAdapter.md) backed by the Databricks AI Gateway Responses API (`/ai-gateway/mlflow/v1/responses`). | | [functionToolToDefinition](Function.functionToolToDefinition.md) | - | | [generateDatabaseCredential](Function.generateDatabaseCredential.md) | Generate OAuth credentials for Postgres database connection using the proper Postgres API. | | [getExecutionContext](Function.getExecutionContext.md) | Get the current execution context. | @@ -144,6 +153,7 @@ surface with `@databricks/appkit/beta`. Not meant for application imports. | [isFunctionTool](Function.isFunctionTool.md) | - | | [isHostedTool](Function.isHostedTool.md) | - | | [isSQLTypeMarker](Function.isSQLTypeMarker.md) | Type guard to check if a value is a SQL type marker | +| [isSupervisorTool](Function.isSupervisorTool.md) | Type guard for [HostedSupervisorTool](Interface.HostedSupervisorTool.md). Used by the agents plugin (`buildToolIndex`) and standalone `runAgent` (`classifyTool`) to route supervisor-hosted tools to the extensions payload rather than the adapter's `tools` array. | | [isToolkitEntry](Function.isToolkitEntry.md) | Type guard for `ToolkitEntry` — used by the agents plugin to differentiate toolkit references from inline tools in a mixed `tools` record. | | [loadAgentFromFile](Function.loadAgentFromFile.md) | Loads a single markdown agent file and resolves its frontmatter against registered plugin toolkits + ambient tool library. | | [loadAgentsFromDir](Function.loadAgentsFromDir.md) | Scans a directory for one subdirectory per agent, each containing `agent.md` (frontmatter + body). Produces an `AgentDefinition` record keyed by agent id (folder name). Throws on frontmatter errors or unresolved references. Returns an empty map if the directory does not exist. | diff --git a/docs/docs/api/appkit/typedoc-sidebar.ts b/docs/docs/api/appkit/typedoc-sidebar.ts index e7c06eefc..c1cbd88be 100644 --- a/docs/docs/api/appkit/typedoc-sidebar.ts +++ b/docs/docs/api/appkit/typedoc-sidebar.ts @@ -81,6 +81,11 @@ const typedocSidebar: SidebarsConfig = { id: "api/appkit/Class.ServerError", label: "ServerError" }, + { + type: "doc", + id: "api/appkit/Class.SupervisorApiAdapter", + label: "SupervisorApiAdapter" + }, { type: "doc", id: "api/appkit/Class.TunnelError", @@ -172,6 +177,11 @@ const typedocSidebar: SidebarsConfig = { id: "api/appkit/Interface.GenerateDatabaseCredentialRequest", label: "GenerateDatabaseCredentialRequest" }, + { + type: "doc", + id: "api/appkit/Interface.HostedSupervisorTool", + label: "HostedSupervisorTool" + }, { type: "doc", id: "api/appkit/Interface.IJobsConfig", @@ -292,6 +302,21 @@ const typedocSidebar: SidebarsConfig = { id: "api/appkit/Interface.StreamExecutionSettings", label: "StreamExecutionSettings" }, + { + type: "doc", + id: "api/appkit/Interface.SupervisorApiAdapterCtorOptions", + label: "SupervisorApiAdapterCtorOptions" + }, + { + type: "doc", + id: "api/appkit/Interface.SupervisorApiAdapterOptions", + label: "SupervisorApiAdapterOptions" + }, + { + type: "doc", + id: "api/appkit/Interface.SupervisorExtension", + label: "SupervisorExtension" + }, { type: "doc", id: "api/appkit/Interface.TelemetryConfig", @@ -438,6 +463,11 @@ const typedocSidebar: SidebarsConfig = { id: "api/appkit/TypeAlias.ServingFactory", label: "ServingFactory" }, + { + type: "doc", + id: "api/appkit/TypeAlias.SupervisorTool", + label: "SupervisorTool" + }, { type: "doc", id: "api/appkit/TypeAlias.ToolRegistry", @@ -469,6 +499,16 @@ const typedocSidebar: SidebarsConfig = { id: "api/appkit/Variable.sql", label: "sql" }, + { + type: "doc", + id: "api/appkit/Variable.SUPERVISOR_EXTENSION_KEY", + label: "SUPERVISOR_EXTENSION_KEY" + }, + { + type: "doc", + id: "api/appkit/Variable.supervisorTools", + label: "supervisorTools" + }, { type: "doc", id: "api/appkit/Variable.WRITE_ACTIONS", @@ -535,6 +575,11 @@ const typedocSidebar: SidebarsConfig = { id: "api/appkit/Function.findServerFile", label: "findServerFile" }, + { + type: "doc", + id: "api/appkit/Function.fromSupervisorApi", + label: "fromSupervisorApi" + }, { type: "doc", id: "api/appkit/Function.functionToolToDefinition", @@ -595,6 +640,11 @@ const typedocSidebar: SidebarsConfig = { id: "api/appkit/Function.isSQLTypeMarker", label: "isSQLTypeMarker" }, + { + type: "doc", + id: "api/appkit/Function.isSupervisorTool", + label: "isSupervisorTool" + }, { type: "doc", id: "api/appkit/Function.isToolkitEntry", diff --git a/docs/docs/plugins/agents.md b/docs/docs/plugins/agents.md index f7a9548a8..cf2123366 100644 --- a/docs/docs/plugins/agents.md +++ b/docs/docs/plugins/agents.md @@ -217,17 +217,17 @@ const result = await runAgent(classifier, { `runAgent` eagerly constructs each plugin in `RunAgentInput.plugins`, runs the standard `attachContext({})` + `await setup()` lifecycle, and shares the instances across the top-level run and every sub-agent dispatch. Plugins whose `setup()` requires `createApp`-only runtime (e.g. `WorkspaceClient`, `ServiceContext`) throw at standalone-init with a clear "use createApp instead" message rather than mid-stream. -Hosted tools (MCP) are still `agents()`-only since they require the live MCP client. Plugin tool dispatch in standalone mode runs as the service principal (no OBO) and **bypasses the agents-plugin approval gate** — treat standalone runAgent as a trusted-prompt environment (CI, batch eval, internal scripts), not as an exposed user-facing surface. +MCP hosted tools (`mcpServer(...)`) still require `agents()` (they need a live MCP client). Supervisor-API hosted tools (`supervisorTools.*`), by contrast, **work in standalone `runAgent`** — the adapter has everything it needs to execute them server-side. This makes batch-eval / CI use of supervisor agents possible without `createApp`. Plugin tool dispatch in standalone mode runs as the service principal (no OBO) and **bypasses the agents-plugin approval gate** — treat standalone runAgent as a trusted-prompt environment (CI, batch eval, internal scripts), not as an exposed user-facing surface. ## Managed agents: the Supervisor API adapter -`fromSupervisorApi` (beta) is the zero-config way to run an agent: instead of provisioning and pointing at a model-serving endpoint, you run the agentic loop in the Databricks workspace by targeting the AI Gateway Responses API (`/ai-gateway/mlflow/v1/responses`), which runs the LLM — and any hosted tools — as a managed service on Databricks. No `DATABRICKS_SERVING_ENDPOINT_NAME`, no stream-capability check, no JS tool plumbing for the common cases. +`DatabricksAdapter.fromSupervisorApi` (beta) is the zero-config way to run an agent: instead of provisioning and pointing at a model-serving endpoint, you run the agentic loop in the Databricks workspace by targeting the AI Gateway Responses API (`/ai-gateway/mlflow/v1/responses`), which runs the LLM — and any hosted tools — as a managed service on Databricks. No `DATABRICKS_SERVING_ENDPOINT_NAME`, no stream-capability check, no JS tool plumbing for the common cases. The minimal agent is one extra line versus a markdown agent: ```ts import { createApp, createAgent } from "@databricks/appkit"; -import { agents, fromSupervisorApi } from "@databricks/appkit/beta"; +import { agents, DatabricksAdapter } from "@databricks/appkit/beta"; await createApp({ plugins: [ @@ -235,7 +235,9 @@ await createApp({ agents: { assistant: createAgent({ instructions: "You are a helpful assistant.", - model: fromSupervisorApi({ model: "databricks-claude-sonnet-4-5" }), + model: DatabricksAdapter.fromSupervisorApi({ + model: "databricks-claude-sonnet-4-5", + }), }), }, }), @@ -243,30 +245,39 @@ await createApp({ }); ``` -`createAgent({ model })` already accepts adapters and adapter promises in addition to the model-name string used in earlier examples, so you can drop the factory result straight in. `fromSupervisorApi` resolves credentials through the SDK chain (`DATABRICKS_HOST`, OAuth, PAT, …); pass `workspaceClient` to reuse an existing client. +`createAgent({ model })` already accepts adapters and adapter promises in addition to the model-name string used in earlier examples, so you can drop the factory result straight in. The factory resolves credentials through the SDK chain (`DATABRICKS_HOST`, OAuth, PAT, …); pass `workspaceClient` to reuse an existing client. ### Hosted tools -Expose Genie spaces, Unity Catalog functions/connections, Knowledge Assistants, or other AppKit apps to the model by listing them on the adapter — execution stays server-side, you write no tool code: +Expose Genie spaces, Unity Catalog functions/connections, Knowledge Assistants, or other AppKit apps to the model by declaring them as agent tools — same place every other tool is declared. Execution stays server-side; you write no tool code: ```ts -import { fromSupervisorApi, supervisorTools } from "@databricks/appkit/beta"; - -const model = fromSupervisorApi({ - model: "databricks-claude-sonnet-4-5", - tools: [ - supervisorTools.genieSpace( - "01ABCDEF12345678", - "NYC taxi trip records and zones", - ), - supervisorTools.ucFunction( - "main.default.add", - "Adds two integers and returns the sum.", - ), - ], +import { createAgent } from "@databricks/appkit"; +import { + DatabricksAdapter, + supervisorTools, +} from "@databricks/appkit/beta"; + +const assistant = createAgent({ + instructions: "You are a helpful data assistant.", + model: DatabricksAdapter.fromSupervisorApi({ + model: "databricks-claude-sonnet-4-5", + }), + tools: () => ({ + nyc: supervisorTools.genieSpace({ + id: "01ABCDEF12345678", + description: "NYC taxi trip records and zones", + }), + add: supervisorTools.ucFunction({ + name: "main.default.add", + description: "Adds two integers and returns the sum.", + }), + }), }); ``` +Each `supervisorTools.*` factory takes a single named-options object — routing-critical strings get labels at the call site, so positional-argument swap bugs are impossible. + `description` is **required and non-empty** — the LLM uses it to route between tools, so two Genie spaces both labelled "Genie space" will be indistinguishable. :::warning Hosted-tool descriptions are trusted application configuration (CWE-1427) @@ -277,22 +288,56 @@ The same caution applies to MCP `description`s and to any other field the model | Factory | Tool kind | Identifier | |---|---|---| -| `supervisorTools.genieSpace(id, description)` | Genie space | space id | -| `supervisorTools.ucFunction(name, description)` | Unity Catalog function | three-part name | -| `supervisorTools.knowledgeAssistant(id, description)` | Knowledge Assistant | assistant id | -| `supervisorTools.app(name, description)` | Databricks App | app name | -| `supervisorTools.ucConnection(name, description)` | UC connection | connection name | +| `supervisorTools.genieSpace({ id, description })` | Genie space | space id | +| `supervisorTools.ucFunction({ name, description })` | Unity Catalog function | three-part name | +| `supervisorTools.knowledgeAssistant({ knowledgeAssistantId, description })` | Knowledge Assistant | assistant id | +| `supervisorTools.app({ name, description })` | Databricks App | app name | +| `supervisorTools.ucConnection({ name, description })` | UC connection | connection name | + +### Declaring hosted tools in markdown agents + +Hosted-supervisor tools also work in markdown-driven agents: declare the tool in code (under `agents({ tools: { ... } })`) and reference its key in frontmatter: + +```ts +// server.ts +agents({ + agents: { /* ... */ }, + tools: { + nyc_taxi: supervisorTools.genieSpace({ + id: "01ABCDEF12345678", + description: "NYC taxi trip records and zones", + }), + }, +}); +``` + +```md +--- +endpoint: databricks-claude-sonnet-4-5 +tools: + - nyc_taxi +--- + +You answer questions about NYC taxi data using the Genie space. +``` + +No new frontmatter syntax — the ambient-tool lookup in `tools:` already resolves bare keys against `agents({ tools })`, and the tagged-record shape of `supervisorTools.*` lets the plugin classify them automatically. ### What does *not* apply to Supervisor-API agents -The managed runtime owns its own tool execution, so the adapter intentionally **ignores the agents-plugin tool index**. For any agent whose `model:` is a Supervisor adapter: +The managed runtime owns its own tool execution, so the adapter intentionally **ignores function tools and sub-agents from the agents-plugin tool index**. For any agent whose `model:` is a Supervisor adapter: -- Tools wired via markdown `tools:` or the `tools(plugins)` function form are not exposed to the model — declare hosted tools via `fromSupervisorApi({ tools: […] })` instead. -- The **human-in-the-loop approval gate** does not fire (tool calls never enter the Node process; `effect: "destructive"` annotations on plugin tools are irrelevant here). +- Only `supervisorTools.*` entries reach the model. Function tools (`tool({...})`), MCP hosted tools (`mcpServer(...)`), and local sub-agents (`agents: { ... }`) declared alongside a supervisor adapter will trigger a registration-time warning and **will not be exposed to the model**. The capability check fires from `consumesInputTools: false` on the adapter. +- The **human-in-the-loop approval gate** does not fire (tool calls never enter the Node process; `effect: "destructive"` annotations are irrelevant for hosted tools). - `limits.maxToolCalls` is not enforced (the managed runtime accounts for its own calls). - Per-call **OBO** does not apply to hosted tools; they run with the credentials the managed runtime uses for the target resource. -Standard-adapter agents and Supervisor-API agents can coexist in the same `agents({ agents: { … } })` map and can be composed as sub-agents (Level 4) — only the agent whose `model:` points at a Supervisor adapter is exempt from the items above. +### Cross-adapter sub-agent composition + +Supervisor and chat-completions adapters can both appear in the same `agents({ agents: { ... } })` map, but composition only goes one direction: + +- **Chat-completions parent → supervisor sub-agent** works natively. The parent dispatches via `agent-{key}` as a regular function tool; the child's adapter runs entirely on the AI Gateway. +- **Supervisor parent → function-tool / local sub-agent children** is not yet wired. The capability check warns at registration; those tools will not reach the supervisor model. Future work will lift this restriction by routing SA's `response.function_call` events through `context.executeTool`. :::note Recovery path for non-streaming tool turns Some hosted tool kinds return their final assistant text without incremental `output_text.delta` events. The adapter has a recovery path that pulls the text out of `response.completed.output[]` so the turn is not silently empty. Set `DEBUG=appkit:agents:supervisor-api` to log the per-turn event-type histogram if you want to verify which path a turn took. diff --git a/packages/appkit/src/agents/databricks.ts b/packages/appkit/src/agents/databricks.ts index 6e2e78d60..b2f50663e 100644 --- a/packages/appkit/src/agents/databricks.ts +++ b/packages/appkit/src/agents/databricks.ts @@ -373,6 +373,32 @@ export class DatabricksAdapter implements AgentAdapter { }); } + /** + * Discoverability shim for the Supervisor API adapter. Returns a + * {@link import("./supervisor-api").SupervisorApiAdapter}, NOT a + * {@link DatabricksAdapter} — the two are separate classes (different + * wire formats, different lifecycle). Surfaced here so application + * developers see a single `DatabricksAdapter.from*` autocomplete root. + * + * Dynamic-imports `./supervisor-api` to avoid forming a load-time cycle: + * both files share `connectors/serving/client.ts`. + * + * @example + * ```ts + * import { DatabricksAdapter } from "@databricks/appkit/beta"; + * + * const model = await DatabricksAdapter.fromSupervisorApi({ + * model: "databricks-claude-sonnet-4-5", + * }); + * ``` + */ + static async fromSupervisorApi( + options: import("./supervisor-api").SupervisorApiAdapterOptions, + ): Promise { + const { fromSupervisorApi } = await import("./supervisor-api"); + return fromSupervisorApi(options); + } + async *run( input: AgentInput, context: AgentRunContext, diff --git a/packages/appkit/src/agents/supervisor-api.ts b/packages/appkit/src/agents/supervisor-api.ts index 4e7edbb83..b59d2dd10 100644 --- a/packages/appkit/src/agents/supervisor-api.ts +++ b/packages/appkit/src/agents/supervisor-api.ts @@ -95,8 +95,10 @@ interface WorkspaceClientLike extends ApiClientLike { * the wire format the endpoint expects, so the adapter passes the array * straight into the request body. * - * Prefer the {@link supervisorTools} factories — they fill in the - * SA-validation-bug workaround for `description` (must be non-empty). + * This is an adapter-internal wire type. Application code authors tools via + * the {@link supervisorTools} factories, which return tagged + * {@link HostedSupervisorTool} records — the agents plugin then unwraps + * the `.spec` when routing through {@link AgentInput.extensions}. */ export type SupervisorTool = | { type: "genie_space"; genie_space: { id: string; description: string } } @@ -114,60 +116,169 @@ export type SupervisorTool = uc_connection: { name: string; description: string }; }; +/** + * Tagged record returned by every {@link supervisorTools} factory. The + * `__kind` discriminator lets the agents plugin (and standalone + * `runAgent`) classify these tools without a structural match against the + * wire format — keeps the SA wire shape free to evolve and avoids + * namespace collisions with MCP hosted tools (which use `type: "genie-space"` + * hyphenated, vs SA's `type: "genie_space"` underscored). + */ +export interface HostedSupervisorTool { + readonly __kind: "hosted-supervisor"; + readonly spec: SupervisorTool; +} + +/** + * Type guard for {@link HostedSupervisorTool}. Used by the agents plugin + * (`buildToolIndex`) and standalone `runAgent` (`classifyTool`) to route + * supervisor-hosted tools to the extensions payload rather than the + * adapter's `tools` array. + */ +export function isSupervisorTool( + value: unknown, +): value is HostedSupervisorTool { + return ( + typeof value === "object" && + value !== null && + (value as Record).__kind === "hosted-supervisor" + ); +} + /** * Concise factories for declaring Supervisor API tools. * + * Each factory accepts a single named-options object: routing-critical + * strings (`id`, `name`, `description`) get labels at the call site so + * "we swapped the args and didn't notice for two weeks" bugs are + * impossible. + * * `description` is required: SA's protobuf validation rejects `null`/`""`, * AND the LLM running on SA reads this string to decide when to route to * the tool. Two genie spaces both labelled "Genie space" give the model * nothing to discriminate on, so callers always own the routing hint. * + * ⚠ The `description` is read by the LLM at routing time — it is a + * prompt-injection sink. Do **not** derive it from untrusted input (user + * messages, request bodies, external systems). Treat it as application + * configuration. (CWE-1427) + * * @example * ```ts - * fromSupervisorApi({ - * model: "databricks-claude-sonnet-4", - * tools: [ - * supervisorTools.genieSpace( - * "01ABCDEF12345678", - * "NYC taxi trip records and zones", - * ), - * supervisorTools.ucFunction( - * "main.default.add", - * "Adds two integers and returns the sum.", - * ), - * ], + * import { createAgent } from "@databricks/appkit"; + * import { + * agents, + * DatabricksAdapter, + * supervisorTools, + * } from "@databricks/appkit/beta"; + * + * const assistant = createAgent({ + * instructions: "You are a helpful assistant.", + * model: DatabricksAdapter.fromSupervisorApi({ + * model: "databricks-claude-sonnet-4", + * }), + * tools: () => ({ + * nyc: supervisorTools.genieSpace({ + * id: "01ABCDEF12345678", + * description: "NYC taxi trip records and zones", + * }), + * add: supervisorTools.ucFunction({ + * name: "main.default.add", + * description: "Adds two integers and returns the sum.", + * }), + * }), * }); * ``` */ export const supervisorTools = { - genieSpace: (id: string, description: string): SupervisorTool => ({ - type: "genie_space", - genie_space: { id, description }, + genieSpace: ({ + id, + description, + }: { + id: string; + description: string; + }): HostedSupervisorTool => ({ + __kind: "hosted-supervisor", + spec: { type: "genie_space", genie_space: { id, description } }, }), - ucFunction: (name: string, description: string): SupervisorTool => ({ - type: "uc_function", - uc_function: { name, description }, + ucFunction: ({ + name, + description, + }: { + name: string; + description: string; + }): HostedSupervisorTool => ({ + __kind: "hosted-supervisor", + spec: { type: "uc_function", uc_function: { name, description } }, }), - knowledgeAssistant: ( - knowledgeAssistantId: string, - description: string, - ): SupervisorTool => ({ - type: "knowledge_assistant", - knowledge_assistant: { - knowledge_assistant_id: knowledgeAssistantId, - description, + knowledgeAssistant: ({ + knowledgeAssistantId, + description, + }: { + knowledgeAssistantId: string; + description: string; + }): HostedSupervisorTool => ({ + __kind: "hosted-supervisor", + spec: { + type: "knowledge_assistant", + knowledge_assistant: { + knowledge_assistant_id: knowledgeAssistantId, + description, + }, }, }), - app: (name: string, description: string): SupervisorTool => ({ - type: "app", - app: { name, description }, + app: ({ + name, + description, + }: { + name: string; + description: string; + }): HostedSupervisorTool => ({ + __kind: "hosted-supervisor", + spec: { type: "app", app: { name, description } }, }), - ucConnection: (name: string, description: string): SupervisorTool => ({ - type: "uc_connection", - uc_connection: { name, description }, + ucConnection: ({ + name, + description, + }: { + name: string; + description: string; + }): HostedSupervisorTool => ({ + __kind: "hosted-supervisor", + spec: { type: "uc_connection", uc_connection: { name, description } }, }), }; +// --------------------------------------------------------------------------- +// AgentInput.extensions integration +// --------------------------------------------------------------------------- + +/** + * Namespace key under which the adapter reads its hosted-tool payload + * from {@link AgentInput.extensions}. Exported so the agents plugin and + * standalone `runAgent` (the producers) can write under the same key the + * adapter reads. + */ +export const SUPERVISOR_EXTENSION_KEY = "databricks.supervisor" as const; + +/** + * Shape of the value at `AgentInput.extensions[SUPERVISOR_EXTENSION_KEY]`. + * The agents plugin / `runAgent` build this from the tool index; advanced + * callers invoking `adapter.run(...)` directly populate it themselves. + */ +export interface SupervisorExtension { + hostedTools?: SupervisorTool[]; +} + +function readSupervisorExtension(input: AgentInput): SupervisorExtension { + const raw = input.extensions?.[SUPERVISOR_EXTENSION_KEY]; + // Single cast at the boundary. The contract on `extensions` is opaque; + // we trust the producer (agents plugin / runAgent / caller) to use the + // shape declared here. + if (!raw || typeof raw !== "object") return {}; + return raw as SupervisorExtension; +} + // --------------------------------------------------------------------------- // Adapter // --------------------------------------------------------------------------- @@ -178,11 +289,6 @@ export interface SupervisorApiAdapterOptions { * (e.g. "databricks-claude-sonnet-4"). */ model: string; - /** - * Hosted tools the SA endpoint should expose to the model. Use the - * {@link supervisorTools} factories for the most common shapes. - */ - tools?: SupervisorTool[]; /** * A WorkspaceClient (or structural equivalent) used for host resolution * and per-request authentication. When omitted, a `WorkspaceClient({})` @@ -201,7 +307,6 @@ export interface SupervisorApiAdapterOptions { export interface SupervisorApiAdapterCtorOptions { streamBody: StreamBody; model: string; - tools?: SupervisorTool[]; } /** @@ -225,32 +330,38 @@ export interface SupervisorApiAdapterCtorOptions { * a per-turn event-type histogram and the SA-reported status/error/ * incomplete_details, so it's already actionable without DEBUG. * + * Tools are not configured on the adapter. Declare them via + * `createAgent({ tools: () => ({ key: supervisorTools.genieSpace({...}) }) })` + * (or markdown frontmatter referencing an ambient `supervisorTools.*` entry); + * the agents plugin / standalone `runAgent` aggregates hosted-supervisor + * entries and routes them to the adapter via + * `AgentInput.extensions[SUPERVISOR_EXTENSION_KEY]`. Advanced callers + * invoking `adapter.run(...)` directly populate that key themselves. + * * @example * ```ts * import { createApp, createAgent } from "@databricks/appkit"; * import { * agents, - * fromSupervisorApi, + * DatabricksAdapter, * supervisorTools, * } from "@databricks/appkit/beta"; * - * const adapter = await fromSupervisorApi({ - * model: "databricks-claude-sonnet-4", - * tools: [ - * supervisorTools.genieSpace( - * "01ABCDEF12345678", - * "NYC taxi trip records and zones", - * ), - * ], - * }); - * * await createApp({ * plugins: [ * agents({ * agents: { * assistant: createAgent({ * instructions: "You are a helpful assistant.", - * model: adapter, + * model: DatabricksAdapter.fromSupervisorApi({ + * model: "databricks-claude-sonnet-4", + * }), + * tools: () => ({ + * nyc: supervisorTools.genieSpace({ + * id: "01ABCDEF12345678", + * description: "NYC taxi trip records and zones", + * }), + * }), * }), * }, * }), @@ -261,12 +372,27 @@ export interface SupervisorApiAdapterCtorOptions { export class SupervisorApiAdapter implements AgentAdapter { private streamBody: StreamBody; private model: string; - private tools: SupervisorTool[]; + + /** + * Capability negotiation: the adapter reads its hosted-tool payload + * from {@link AgentInput.extensions} under {@link SUPERVISOR_EXTENSION_KEY}. + * The agents plugin uses this list to warn at registration when the tool + * index produces extensions the adapter wouldn't consume. + */ + readonly acceptsExtensions = [SUPERVISOR_EXTENSION_KEY] as const; + + /** + * Capability negotiation: the adapter does not consume `input.tools`. + * Tool execution is owned by the Databricks AI Gateway server-side, so + * any function tools or local sub-agents declared on this agent would + * be silently dropped — the agents plugin warns at registration when + * that combination is detected. + */ + readonly consumesInputTools = false; constructor(options: SupervisorApiAdapterCtorOptions) { this.streamBody = options.streamBody; this.model = options.model; - this.tools = options.tools ?? []; } async *run( @@ -280,12 +406,19 @@ export class SupervisorApiAdapter implements AgentAdapter { const { instructions, input: payloadInput } = this.buildInput( input.messages, ); - yield* this.streamResponse(instructions, payloadInput, context.signal); + const hostedTools = readSupervisorExtension(input).hostedTools ?? []; + yield* this.streamResponse( + instructions, + payloadInput, + hostedTools, + context.signal, + ); } private async *streamResponse( instructions: string | undefined, input: ResponseInput, + hostedTools: SupervisorTool[], signal?: AbortSignal, ): AsyncGenerator { const body: Record = { @@ -298,8 +431,8 @@ export class SupervisorApiAdapter implements AgentAdapter { } // SA's protobuf validation rejects `tools: []` and `tools: null`. Only // include the field when at least one tool is configured. - if (this.tools.length > 0) { - body.tools = this.tools; + if (hostedTools.length > 0) { + body.tools = hostedTools; } logger.debug( @@ -307,7 +440,7 @@ export class SupervisorApiAdapter implements AgentAdapter { this.model, instructions?.length ?? 0, typeof input === "string" ? "string" : `array[${input.length}]`, - this.tools.length, + hostedTools.length, ); let stream: ReadableStream; @@ -619,22 +752,43 @@ function mapEvent( * Responses API (`/ai-gateway/mlflow/v1/responses`). * * Uses the SDK's default credential chain for auth (reads DATABRICKS_HOST, - * DATABRICKS_TOKEN, OAuth config, etc.). + * DATABRICKS_TOKEN, OAuth config, etc.). Tools are declared on the agent + * (via `createAgent({ tools })`), not on this factory. + * + * Application code should prefer the + * {@link DatabricksAdapter.fromSupervisorApi} static — it delegates here + * and keeps a single `DatabricksAdapter.from*` autocomplete root for all + * Databricks-backed adapters. This free function is the implementation + * behind the static and remains exported for callers that want to import + * it directly without pulling in {@link DatabricksAdapter}. * * @example * ```ts + * import { createApp, createAgent } from "@databricks/appkit"; * import { - * fromSupervisorApi, + * agents, + * DatabricksAdapter, * supervisorTools, * } from "@databricks/appkit/beta"; * - * const adapter = await fromSupervisorApi({ - * model: "databricks-claude-sonnet-4", - * tools: [ - * supervisorTools.genieSpace( - * "01ABCDEF12345678", - * "NYC taxi trip records and zones", - * ), + * await createApp({ + * plugins: [ + * agents({ + * agents: { + * assistant: createAgent({ + * instructions: "You are a helpful assistant.", + * model: DatabricksAdapter.fromSupervisorApi({ + * model: "databricks-claude-sonnet-4", + * }), + * tools: () => ({ + * nyc: supervisorTools.genieSpace({ + * id: "01ABCDEF12345678", + * description: "NYC taxi trip records and zones", + * }), + * }), + * }), + * }, + * }), * ], * }); * ``` @@ -644,6 +798,9 @@ function mapEvent( * {@link SupervisorApiAdapterOptions.workspaceClient} — the client is * captured once and reused, so per-request OBO clients would leak * identity across requests. + * + * @see {@link DatabricksAdapter.fromSupervisorApi} — the recommended + * application-facing entry point. */ export async function fromSupervisorApi( options: SupervisorApiAdapterOptions, @@ -663,6 +820,5 @@ export async function fromSupervisorApi( streamBody: (body, signal) => streamPath(resolved, "/ai-gateway/mlflow/v1/responses", body, signal), model: options.model, - tools: options.tools ?? [], }); } diff --git a/packages/appkit/src/agents/tests/supervisor-api.test.ts b/packages/appkit/src/agents/tests/supervisor-api.test.ts index 9877808e4..f7c69ade8 100644 --- a/packages/appkit/src/agents/tests/supervisor-api.test.ts +++ b/packages/appkit/src/agents/tests/supervisor-api.test.ts @@ -2,7 +2,10 @@ import type { AgentEvent, AgentInput } from "shared"; import { afterEach, describe, expect, test, vi } from "vitest"; import { fromSupervisorApi, + isSupervisorTool, + SUPERVISOR_EXTENSION_KEY, SupervisorApiAdapter, + type SupervisorExtension, type SupervisorTool, supervisorTools, } from "../supervisor-api"; @@ -42,16 +45,28 @@ function makeStreamBody(chunks: string[]): { return { streamBody, lastBody: () => captured }; } -function createInput(): AgentInput { +function createInput(overrides: Partial = {}): AgentInput { return { messages: [ { id: "1", role: "user", content: "Hello", createdAt: new Date() }, ], tools: [], threadId: "thread-1", + ...overrides, }; } +/** + * Convenience to build the `extensions` payload the agents plugin / runAgent + * produce, so tests don't have to repeat the key/shape boilerplate. + */ +function withSupervisorTools( + hostedTools: SupervisorTool[], +): Pick { + const ext: SupervisorExtension = { hostedTools }; + return { extensions: { [SUPERVISOR_EXTENSION_KEY]: ext } }; +} + async function collect( gen: AsyncGenerator, ): Promise { @@ -61,75 +76,158 @@ async function collect( } describe("supervisorTools factories", () => { - test("genieSpace produces correct wire shape", () => { - expect(supervisorTools.genieSpace("space123", "NYC taxi data")).toEqual({ - type: "genie_space", - genie_space: { id: "space123", description: "NYC taxi data" }, + test("genieSpace returns a tagged record wrapping the wire spec", () => { + const tool = supervisorTools.genieSpace({ + id: "space123", + description: "NYC taxi data", + }); + expect(tool).toEqual({ + __kind: "hosted-supervisor", + spec: { + type: "genie_space", + genie_space: { id: "space123", description: "NYC taxi data" }, + }, }); }); - test("ucFunction produces correct wire shape", () => { - expect( - supervisorTools.ucFunction("main.default.add", "Adds two integers."), - ).toEqual({ - type: "uc_function", - uc_function: { - name: "main.default.add", - description: "Adds two integers.", + test("ucFunction returns a tagged record wrapping the wire spec", () => { + const tool = supervisorTools.ucFunction({ + name: "main.default.add", + description: "Adds two integers.", + }); + expect(tool).toEqual({ + __kind: "hosted-supervisor", + spec: { + type: "uc_function", + uc_function: { + name: "main.default.add", + description: "Adds two integers.", + }, }, }); }); - test("knowledgeAssistant maps id into knowledge_assistant_id", () => { - expect( - supervisorTools.knowledgeAssistant("ka-1", "Internal docs Q&A"), - ).toEqual({ - type: "knowledge_assistant", - knowledge_assistant: { - knowledge_assistant_id: "ka-1", - description: "Internal docs Q&A", + test("knowledgeAssistant maps knowledgeAssistantId into the wire field", () => { + const tool = supervisorTools.knowledgeAssistant({ + knowledgeAssistantId: "ka-1", + description: "Internal docs Q&A", + }); + expect(tool).toEqual({ + __kind: "hosted-supervisor", + spec: { + type: "knowledge_assistant", + knowledge_assistant: { + knowledge_assistant_id: "ka-1", + description: "Internal docs Q&A", + }, }, }); }); - test("app produces correct wire shape", () => { - expect(supervisorTools.app("my-app", "Demo Databricks app.")).toEqual({ - type: "app", - app: { name: "my-app", description: "Demo Databricks app." }, + test("app returns a tagged record wrapping the wire spec", () => { + const tool = supervisorTools.app({ + name: "my-app", + description: "Demo Databricks app.", + }); + expect(tool).toEqual({ + __kind: "hosted-supervisor", + spec: { + type: "app", + app: { name: "my-app", description: "Demo Databricks app." }, + }, }); }); - test("ucConnection produces correct wire shape", () => { - expect( - supervisorTools.ucConnection("my-conn", "Connection to external DB."), - ).toEqual({ - type: "uc_connection", - uc_connection: { - name: "my-conn", - description: "Connection to external DB.", + test("ucConnection returns a tagged record wrapping the wire spec", () => { + const tool = supervisorTools.ucConnection({ + name: "my-conn", + description: "Connection to external DB.", + }); + expect(tool).toEqual({ + __kind: "hosted-supervisor", + spec: { + type: "uc_connection", + uc_connection: { + name: "my-conn", + description: "Connection to external DB.", + }, }, }); }); }); +describe("isSupervisorTool", () => { + test("accepts every supervisorTools.* factory output", () => { + expect( + isSupervisorTool( + supervisorTools.genieSpace({ id: "g", description: "d" }), + ), + ).toBe(true); + expect( + isSupervisorTool( + supervisorTools.ucFunction({ name: "main.x.y", description: "d" }), + ), + ).toBe(true); + expect( + isSupervisorTool( + supervisorTools.knowledgeAssistant({ + knowledgeAssistantId: "ka", + description: "d", + }), + ), + ).toBe(true); + expect( + isSupervisorTool(supervisorTools.app({ name: "a", description: "d" })), + ).toBe(true); + expect( + isSupervisorTool( + supervisorTools.ucConnection({ name: "c", description: "d" }), + ), + ).toBe(true); + }); + + test("rejects plain wire-format objects (no __kind tag)", () => { + const wireOnly: SupervisorTool = { + type: "genie_space", + genie_space: { id: "g", description: "d" }, + }; + expect(isSupervisorTool(wireOnly)).toBe(false); + }); + + test("rejects MCP hosted tools and other shapes", () => { + expect(isSupervisorTool({ type: "genie-space", genie_space: {} })).toBe( + false, + ); + expect(isSupervisorTool(null)).toBe(false); + expect(isSupervisorTool(undefined)).toBe(false); + expect(isSupervisorTool("hosted-supervisor")).toBe(false); + expect(isSupervisorTool({})).toBe(false); + expect(isSupervisorTool({ __kind: "function" })).toBe(false); + }); +}); + describe("SupervisorApiAdapter", () => { afterEach(() => { vi.restoreAllMocks(); }); - test("posts model, input, tools, and stream:true through streamBody", async () => { + test("declares capability negotiation fields (acceptsExtensions, consumesInputTools)", () => { + const adapter = new SupervisorApiAdapter({ + streamBody: vi.fn(), + model: "databricks-claude-sonnet-4", + }); + expect(adapter.acceptsExtensions).toEqual([SUPERVISOR_EXTENSION_KEY]); + expect(adapter.consumesInputTools).toBe(false); + }); + + test("posts model, input, and stream:true through streamBody", async () => { const { streamBody, lastBody } = makeStreamBody([ sseEvent("response.output_text.delta", { delta: "Hi" }), sseEvent("response.completed", {}), ]); - - const tools: SupervisorTool[] = [ - supervisorTools.genieSpace("g1", "Test genie space"), - ]; const adapter = new SupervisorApiAdapter({ streamBody, model: "databricks-claude-sonnet-4", - tools, }); await collect(adapter.run(createInput(), { executeTool: vi.fn() })); @@ -139,11 +237,12 @@ describe("SupervisorApiAdapter", () => { model: "databricks-claude-sonnet-4", input: "Hello", stream: true, - tools, }); + // No tools wired via extensions -> no `tools` field on the wire. + expect(lastBody()).not.toHaveProperty("tools"); }); - test("omits the tools field entirely when no tools are configured", async () => { + test("reads hosted tools from AgentInput.extensions and posts them in the request body", async () => { const { streamBody, lastBody } = makeStreamBody([ sseEvent("response.completed", {}), ]); @@ -151,7 +250,61 @@ describe("SupervisorApiAdapter", () => { streamBody, model: "databricks-claude-sonnet-4", }); - await collect(adapter.run(createInput(), { executeTool: vi.fn() })); + + const genie = supervisorTools.genieSpace({ + id: "g1", + description: "Test genie space", + }); + const uc = supervisorTools.ucFunction({ + name: "main.x.add", + description: "Adds two integers.", + }); + + await collect( + adapter.run(createInput(withSupervisorTools([genie.spec, uc.spec])), { + executeTool: vi.fn(), + }), + ); + + expect(lastBody()?.tools).toEqual([genie.spec, uc.spec]); + }); + + test("ignores extensions written under a different key (key namespacing)", async () => { + const { streamBody, lastBody } = makeStreamBody([ + sseEvent("response.completed", {}), + ]); + const adapter = new SupervisorApiAdapter({ + streamBody, + model: "databricks-claude-sonnet-4", + }); + + await collect( + adapter.run( + createInput({ + extensions: { + "other.namespace": { hostedTools: [{ type: "ignored" }] }, + }, + }), + { executeTool: vi.fn() }, + ), + ); + + expect(lastBody()).not.toHaveProperty("tools"); + }); + + test("omits the tools field entirely when extensions carry an empty hostedTools array", async () => { + const { streamBody, lastBody } = makeStreamBody([ + sseEvent("response.completed", {}), + ]); + const adapter = new SupervisorApiAdapter({ + streamBody, + model: "databricks-claude-sonnet-4", + }); + await collect( + adapter.run(createInput(withSupervisorTools([])), { + executeTool: vi.fn(), + }), + ); expect(lastBody()).not.toHaveProperty("tools"); }); @@ -819,3 +972,17 @@ describe("fromSupervisorApi", () => { expect(requestArgs.payload).not.toHaveProperty("tools"); }); }); + +describe("DatabricksAdapter.fromSupervisorApi", () => { + test("returns a SupervisorApiAdapter instance", async () => { + const { DatabricksAdapter } = await import("../databricks"); + const adapter = await DatabricksAdapter.fromSupervisorApi({ + model: "databricks-claude-sonnet-4", + workspaceClient: { + config: { ensureResolved: vi.fn(async () => {}) }, + apiClient: { request: vi.fn() }, + }, + }); + expect(adapter).toBeInstanceOf(SupervisorApiAdapter); + }); +}); diff --git a/packages/appkit/src/beta.ts b/packages/appkit/src/beta.ts index 7ccc77c5b..74f7a5d21 100644 --- a/packages/appkit/src/beta.ts +++ b/packages/appkit/src/beta.ts @@ -20,12 +20,16 @@ export type { } from "shared"; export { DatabricksAdapter, parseTextToolCalls } from "./agents/databricks"; export type { + HostedSupervisorTool, SupervisorApiAdapterCtorOptions, SupervisorApiAdapterOptions, + SupervisorExtension, SupervisorTool, } from "./agents/supervisor-api"; export { fromSupervisorApi, + isSupervisorTool, + SUPERVISOR_EXTENSION_KEY, SupervisorApiAdapter, supervisorTools, } from "./agents/supervisor-api"; diff --git a/packages/appkit/src/core/agent/run-agent.ts b/packages/appkit/src/core/agent/run-agent.ts index 4dd4401ac..5675edd36 100644 --- a/packages/appkit/src/core/agent/run-agent.ts +++ b/packages/appkit/src/core/agent/run-agent.ts @@ -8,6 +8,12 @@ import type { PluginData, ToolProvider, } from "shared"; +import { + isSupervisorTool, + SUPERVISOR_EXTENSION_KEY, + type SupervisorTool, +} from "../../agents/supervisor-api"; +import { createLogger } from "../../logging/logger"; import { consumeAdapterStream } from "./consume-adapter-stream"; import { createPluginsProxy } from "./plugins-map"; import { resolveToolkitFromProvider } from "./toolkit-resolver"; @@ -26,6 +32,8 @@ import type { } from "./types"; import { isToolkitEntry } from "./types"; +const logger = createLogger("agent:run-agent"); + export interface RunAgentInput { /** Seed messages for the run. Either a single user string or a full message list. */ messages: string | Message[]; @@ -102,7 +110,13 @@ async function runAgentInternal( input.plugins ?? [], providerCache, ); - const tools = Array.from(toolIndex.values()).map((e) => e.def); + // Hosted-supervisor entries are routed via `extensions`, not as callable + // tools — exclude their placeholder `def` from the wire `tools` array. + const tools = Array.from(toolIndex.values()) + .filter((e) => e.kind !== "hosted-supervisor") + .map((e) => e.def); + + warnOnCapabilityMismatch(def.name ?? "", adapter, toolIndex); const signal = input.signal; @@ -139,6 +153,15 @@ async function runAgentInternal( ); return res.text; } + if (entry.kind === "hosted-supervisor") { + // Defense-in-depth: should never fire. The placeholder def is + // filtered out of `tools` above, so the model never sees a callable + // schema for hosted-supervisor entries. If we ever reach here, the + // model was somehow handed the def and tried to invoke it directly. + throw new Error( + `runAgent: tool "${name}" is a hosted-supervisor tool, executed server-side by the Databricks AI Gateway. It must not be invoked from the Node process.`, + ); + } throw new Error( `runAgent: tool "${name}" is a ${entry.kind} tool. ` + "Hosted/MCP tools are only usable via createApp({ plugins: [..., agents(...)] }).", @@ -153,6 +176,7 @@ async function runAgentInternal( tools, threadId: randomUUID(), signal, + extensions: buildStandaloneExtensions(toolIndex), }, { executeTool, signal }, ); @@ -295,6 +319,19 @@ type StandaloneEntry = | { kind: "hosted"; def: AgentToolDefinition; + } + | { + /** + * Adapter-side hosted tool. Standalone `runAgent` accepts these + * (unlike MCP hosted tools, which need a live MCP client) because + * the adapter has everything it needs to execute them server-side: + * the spec travels via `AgentInput.extensions` and the SA endpoint + * runs the tool loop. Enables batch-eval / CI use of supervisor + * agents without `createApp`. + */ + kind: "hosted-supervisor"; + def: AgentToolDefinition; + spec: SupervisorTool; }; /** @@ -420,6 +457,23 @@ function classifyTool( def: { ...functionToolToDefinition(tool), name: key }, }; } + // Supervisor-API hosted tools work in standalone mode: the adapter + // executes them server-side via `AgentInput.extensions`, no MCP client + // required. Must come BEFORE the `isHostedTool` MCP rejection — the two + // predicates classify disjoint values (`isSupervisorTool` matches the + // `__kind` tag; `isHostedTool` matches the wire-format `type` field), + // but the placement makes the intent explicit. + if (isSupervisorTool(tool)) { + return { + kind: "hosted-supervisor", + spec: tool.spec, + def: { + name: key, + description: supervisorToolDescription(tool.spec), + parameters: { type: "object", properties: {} }, + }, + }; + } if (isHostedTool(tool)) { // Hosted tools (e.g. MCP `mcpServer(...)`) need a live MCP client that // only exists inside the agents plugin's lifecycle. In standalone @@ -433,6 +487,79 @@ function classifyTool( throw new Error(`runAgent: unrecognized tool shape at key "${key}"`); } +/** Mirrors `agents.ts`'s `supervisorToolDescription`. */ +function supervisorToolDescription(spec: SupervisorTool): string { + switch (spec.type) { + case "genie_space": + return spec.genie_space.description; + case "uc_function": + return spec.uc_function.description; + case "knowledge_assistant": + return spec.knowledge_assistant.description; + case "app": + return spec.app.description; + case "uc_connection": + return spec.uc_connection.description; + } +} + +/** Mirrors `agents.ts`'s `buildAdapterExtensions`. */ +function buildStandaloneExtensions( + toolIndex: Map, +): Readonly> | undefined { + const supervisorSpecs: SupervisorTool[] = []; + for (const entry of toolIndex.values()) { + if (entry.kind === "hosted-supervisor") { + supervisorSpecs.push(entry.spec); + } + } + if (supervisorSpecs.length === 0) return undefined; + return { + [SUPERVISOR_EXTENSION_KEY]: { hostedTools: supervisorSpecs }, + }; +} + +/** + * Mirrors the agents-plugin capability warning so standalone `runAgent` + * produces the same diagnostic when adapter capabilities don't match the + * tool index. Warn-not-throw: doesn't abort batch evals. + */ +function warnOnCapabilityMismatch( + agentName: string, + adapter: AgentAdapter, + toolIndex: Map, +): void { + const accepted = new Set(adapter.acceptsExtensions ?? []); + + const hostedSupervisorKeys: string[] = []; + const inputToolKeys: string[] = []; + for (const [key, entry] of toolIndex) { + if (entry.kind === "hosted-supervisor") { + hostedSupervisorKeys.push(key); + } else { + inputToolKeys.push(key); + } + } + + if ( + hostedSupervisorKeys.length > 0 && + !accepted.has(SUPERVISOR_EXTENSION_KEY) + ) { + logger.warn( + `Agent '${agentName}' declares hosted-supervisor tools (${hostedSupervisorKeys.join(", ")}) ` + + "but its model adapter does not accept the 'databricks.supervisor' extension. " + + "Pair them with `DatabricksAdapter.fromSupervisorApi(...)`, or remove them.", + ); + } + + if (adapter.consumesInputTools === false && inputToolKeys.length > 0) { + logger.warn( + `Agent '${agentName}' declares function tools / sub-agents (${inputToolKeys.join(", ")}) ` + + "but its model adapter does not consume input.tools. These tools will not be exposed to the model.", + ); + } +} + function providerCacheLookup( pluginName: string, cache: Map, diff --git a/packages/appkit/src/core/agent/tests/run-agent.test.ts b/packages/appkit/src/core/agent/tests/run-agent.test.ts index 4d60a8c96..efd3e4202 100644 --- a/packages/appkit/src/core/agent/tests/run-agent.test.ts +++ b/packages/appkit/src/core/agent/tests/run-agent.test.ts @@ -434,4 +434,119 @@ describe("runAgent", () => { // Both parent and child reported the same instance id. expect(result.text).toBe("parent-id=1;child-id=1"); }); + + test("hosted-supervisor tools are routed via AgentInput.extensions and filtered out of input.tools", async () => { + // Standalone runAgent must accept Supervisor-API hosted tools — unlike + // MCP hosted tools (which need a live MCP client). The tagged record + // gets classified as `hosted-supervisor`; its placeholder def is kept + // out of `input.tools` (the spec doesn't expose a callable function) + // and the spec is routed via `input.extensions[SUPERVISOR_EXTENSION_KEY]`. + const { supervisorTools, SUPERVISOR_EXTENSION_KEY } = await import( + "../../../agents/supervisor-api" + ); + + let captured: AgentInput | null = null; + const adapter: AgentAdapter = { + acceptsExtensions: [SUPERVISOR_EXTENSION_KEY], + consumesInputTools: false, + async *run(input, _context) { + captured = input; + yield { type: "message_delta", content: "ok" }; + }, + }; + + const def = createAgent({ + instructions: "x", + model: adapter, + tools: { + nyc: supervisorTools.genieSpace({ + id: "01ABC", + description: "NYC taxi", + }), + }, + }); + + const result = await runAgent(def, { messages: "hi" }); + expect(result.text).toBe("ok"); + + expect(captured).not.toBeNull(); + // biome-ignore lint/style/noNonNullAssertion: asserted above + const inp = captured!; + expect(inp.tools).toEqual([]); + expect(inp.extensions?.[SUPERVISOR_EXTENSION_KEY]).toEqual({ + hostedTools: [ + { + type: "genie_space", + genie_space: { id: "01ABC", description: "NYC taxi" }, + }, + ], + }); + }); + + test("warns when hosted-supervisor tools are paired with an adapter that does not accept the extension", async () => { + const { supervisorTools } = await import("../../../agents/supervisor-api"); + const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + + const adapter: AgentAdapter = { + // No `acceptsExtensions` declared. + async *run(_input, _context) { + yield { type: "message_delta", content: "" }; + }, + }; + + const def = createAgent({ + name: "mismatched", + instructions: "x", + model: adapter, + tools: { + nyc: supervisorTools.genieSpace({ + id: "01ABC", + description: "NYC taxi", + }), + }, + }); + + await runAgent(def, { messages: "hi" }); + + const warning = warnSpy.mock.calls + .map((args) => args.join(" ")) + .find((s) => s.includes("hosted-supervisor")); + expect(warning).toBeTruthy(); + expect(warning).toContain("'mismatched'"); + warnSpy.mockRestore(); + }); + + test("warns when function tools are paired with an adapter that opts out of input.tools", async () => { + const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + + const adapter: AgentAdapter = { + consumesInputTools: false, + async *run(_input, _context) { + yield { type: "message_delta", content: "" }; + }, + }; + + const def = createAgent({ + name: "leaky", + instructions: "x", + model: adapter, + tools: { + get_weather: tool({ + name: "get_weather", + description: "Weather", + schema: z.object({ city: z.string() }), + execute: async () => "sunny", + }), + }, + }); + + await runAgent(def, { messages: "hi" }); + + const warning = warnSpy.mock.calls + .map((args) => args.join(" ")) + .find((s) => s.includes("does not consume input.tools")); + expect(warning).toBeTruthy(); + expect(warning).toContain("'leaky'"); + warnSpy.mockRestore(); + }); }); diff --git a/packages/appkit/src/core/agent/types.ts b/packages/appkit/src/core/agent/types.ts index cf47845f7..eccac82b4 100644 --- a/packages/appkit/src/core/agent/types.ts +++ b/packages/appkit/src/core/agent/types.ts @@ -32,10 +32,15 @@ export interface ToolkitEntry { /** * Any tool an agent can invoke: inline function tools (`tool()`), hosted MCP - * tools (`mcpServer()` / raw hosted), or toolkit references from plugins - * (`analytics().toolkit()`). + * tools (`mcpServer()` / raw hosted), toolkit references from plugins + * (`analytics().toolkit()`), or adapter-hosted Supervisor-API tools + * (`supervisorTools.*`). */ -export type AgentTool = FunctionTool | HostedTool | ToolkitEntry; +export type AgentTool = + | FunctionTool + | HostedTool + | ToolkitEntry + | import("../../agents/supervisor-api").HostedSupervisorTool; export interface ToolkitOptions { /** Key prefix to prepend to each tool's local name. Defaults to `${pluginName}.`. */ @@ -299,6 +304,21 @@ export type ResolvedToolEntry = source: "subagent"; agentName: string; def: AgentToolDefinition; + } + | { + /** + * Adapter-side hosted tool (executed by the model-host, not by the + * Node process). Today: Supervisor API hosted tools (Genie spaces, + * UC functions, etc.). The `spec` is opaque to the agents plugin — + * it routes the entry into `AgentInput.extensions` for the adapter + * that declared the matching `acceptsExtensions` key. `def` is a + * synthetic placeholder kept so the index has a uniform shape; it + * is intentionally NOT included in the `tools` array passed to + * `adapter.run()` (those entries are not callable functions). + */ + source: "hosted-supervisor"; + spec: import("../../agents/supervisor-api").SupervisorTool; + def: AgentToolDefinition; }; export interface RegisteredAgent { diff --git a/packages/appkit/src/plugins/agents/agents.ts b/packages/appkit/src/plugins/agents/agents.ts index 40217e54e..a690ece05 100644 --- a/packages/appkit/src/plugins/agents/agents.ts +++ b/packages/appkit/src/plugins/agents/agents.ts @@ -15,6 +15,11 @@ import type { ToolAnnotations, ToolProvider, } from "shared"; +import { + isSupervisorTool, + SUPERVISOR_EXTENSION_KEY, + type SupervisorTool, +} from "../../agents/supervisor-api"; import { AppKitMcpClient, buildMcpHostPolicy } from "../../connectors/mcp"; import { getWorkspaceClient } from "../../context"; import { consumeAdapterStream } from "../../core/agent/consume-adapter-stream"; @@ -437,6 +442,8 @@ export class AgentsPlugin extends Plugin implements ToolProvider { const adapter = await this.resolveAdapter(def, name); const toolIndex = await this.buildToolIndex(name, def, src); + warnOnCapabilityMismatch(name, adapter, toolIndex); + return { name, instructions: def.instructions, @@ -556,6 +563,22 @@ export class AgentsPlugin extends Plugin implements ToolProvider { }); continue; } + if (isSupervisorTool(tool)) { + index.set(key, { + source: "hosted-supervisor", + spec: tool.spec, + def: { + // `def` is a placeholder so the index has a uniform shape; it + // is intentionally not passed to the adapter's `tools` array + // (the SA endpoint owns its own tool execution and would + // reject our synthetic schema). + name: key, + description: supervisorToolDescription(tool.spec), + parameters: { type: "object", properties: {} }, + }, + }); + continue; + } if (isHostedTool(tool)) { hostedToCollect.push(tool); continue; @@ -977,7 +1000,14 @@ export class AgentsPlugin extends Plugin implements ToolProvider { const requestId = randomUUID(); this.trackStream(requestId, userId, abortController); - const tools = Array.from(registered.toolIndex.values()).map((e) => e.def); + // `hosted-supervisor` entries are not callable from the Node process + // (the SA endpoint executes them server-side). Their `def` is a + // placeholder; including it in the adapter's `tools` array would + // make the SA endpoint reject the request with a schema mismatch. + // The hosted-tool specs are routed via `AgentInput.extensions` below. + const tools = Array.from(registered.toolIndex.values()) + .filter((e) => e.source !== "hosted-supervisor") + .map((e) => e.def); const approvalPolicy = this.resolvedApprovalPolicy; const limits = this.resolvedLimits; const outboundEvents = new EventChannel(); @@ -1047,6 +1077,7 @@ export class AgentsPlugin extends Plugin implements ToolProvider { tools, threadId: thread.id, signal, + extensions: buildAdapterExtensions(registered.toolIndex), }, { executeTool, signal }, ); @@ -1224,6 +1255,18 @@ export class AgentsPlugin extends Plugin implements ToolProvider { if (!childAgent) throw new Error(`Sub-agent not found: ${entry.agentName}`); result = await this.runSubAgent(runState, childAgent, args, depth + 1); + } else if (entry.source === "hosted-supervisor") { + // Defense-in-depth: should never fire. Hosted-supervisor entries are + // routed via `AgentInput.extensions` and the SA endpoint executes + // them server-side; their `def` is filtered out of the adapter's + // `tools` array, so the model never sees a callable schema for them. + // If we reach here, the agent is paired with a non-SA adapter that + // somehow surfaced the placeholder def to the model — surface a + // clear error rather than crash later in `normalizeToolResult`. + throw new Error( + `Tool '${name}' is a hosted-supervisor tool and cannot be invoked from the Node process. ` + + "It is executed server-side by the Databricks AI Gateway and is only reachable when the agent's model is a Supervisor API adapter.", + ); } return normalizeToolResult(result); @@ -1262,7 +1305,12 @@ export class AgentsPlugin extends Plugin implements ToolProvider { typeof (args as { input?: unknown }).input === "string" ? (args as { input: string }).input : JSON.stringify(args); - const childTools = Array.from(child.toolIndex.values()).map((e) => e.def); + // Same filter as the top-level path: hosted-supervisor `def` is a + // placeholder, not a callable function — exclude from the adapter's + // `tools` array. The specs are routed via `extensions` instead. + const childTools = Array.from(child.toolIndex.values()) + .filter((e) => e.source !== "hosted-supervisor") + .map((e) => e.def); const childExecute = (name: string, childArgs: unknown): Promise => this.dispatchToolCall(runState, child.toolIndex, name, childArgs, depth); @@ -1309,6 +1357,7 @@ export class AgentsPlugin extends Plugin implements ToolProvider { tools: childTools, threadId: randomUUID(), signal: runState.signal, + extensions: buildAdapterExtensions(child.toolIndex), }, runContext, ), @@ -1525,6 +1574,93 @@ function composePromptForAgent( return composeSystemPrompt(base, registered.instructions); } +/** + * Pulls the LLM-readable description off any {@link SupervisorTool} kind. + * Used to populate the synthetic placeholder `def.description` on + * hosted-supervisor tool-index entries. + */ +function supervisorToolDescription(spec: SupervisorTool): string { + switch (spec.type) { + case "genie_space": + return spec.genie_space.description; + case "uc_function": + return spec.uc_function.description; + case "knowledge_assistant": + return spec.knowledge_assistant.description; + case "app": + return spec.app.description; + case "uc_connection": + return spec.uc_connection.description; + } +} + +/** + * Builds the `AgentInput.extensions` payload from a tool index, aggregating + * the hosted-supervisor specs under {@link SUPERVISOR_EXTENSION_KEY}. Returns + * `undefined` when there are no adapter-side hosted tools so the field stays + * absent on the wire — adapters that don't read extensions never see it. + */ +function buildAdapterExtensions( + toolIndex: Map, +): Readonly> | undefined { + const supervisorSpecs: SupervisorTool[] = []; + for (const entry of toolIndex.values()) { + if (entry.source === "hosted-supervisor") { + supervisorSpecs.push(entry.spec); + } + } + if (supervisorSpecs.length === 0) return undefined; + return { + [SUPERVISOR_EXTENSION_KEY]: { hostedTools: supervisorSpecs }, + }; +} + +/** + * Compares the adapter's declared capabilities against the tool index and + * logs a warning when the agent's tool declarations would be silently + * dropped at runtime. Warn-not-throw: misconfiguration is loud enough to + * notice without taking the whole app down. + */ +function warnOnCapabilityMismatch( + agentName: string, + adapter: AgentAdapter, + toolIndex: Map, +): void { + const accepted = new Set(adapter.acceptsExtensions ?? []); + + const hostedSupervisorKeys: string[] = []; + const inputToolKeys: string[] = []; + for (const [key, entry] of toolIndex) { + if (entry.source === "hosted-supervisor") { + hostedSupervisorKeys.push(key); + } else { + inputToolKeys.push(key); + } + } + + if ( + hostedSupervisorKeys.length > 0 && + !accepted.has(SUPERVISOR_EXTENSION_KEY) + ) { + logger.warn( + `Agent '${agentName}' declares hosted-supervisor tools (${hostedSupervisorKeys.join(", ")}) ` + + "but its model adapter does not accept the 'databricks.supervisor' extension. " + + "These tools will not reach the model. Pair them with `DatabricksAdapter.fromSupervisorApi(...)`, or remove them.", + ); + } + + // `consumesInputTools` defaults to true. Only warn when an adapter + // explicitly opts out (`false`) and an input tool would be silently + // ignored. + if (adapter.consumesInputTools === false && inputToolKeys.length > 0) { + logger.warn( + `Agent '${agentName}' declares function tools / sub-agents / MCP tools (${inputToolKeys.join(", ")}) ` + + "but its model adapter does not consume input.tools (Supervisor API owns its own tool loop). " + + "These tools will not be exposed to the model. See docs/plugins/agents.md.", + ); + } +} + /** * Plugin factory for the agents plugin. Reads `config/agents/*.md` by default, * resolves toolkits/tools from registered plugins, exposes `appkit.agents.*` diff --git a/packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts b/packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts index c654e477f..7f6b453b4 100644 --- a/packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts +++ b/packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts @@ -665,4 +665,128 @@ describe("AgentsPlugin", () => { expect(toolsFn).toHaveBeenCalledTimes(1); }); }); + + describe("hosted-supervisor tools and capability negotiation", () => { + test("indexes supervisorTools.* entries with source 'hosted-supervisor'", async () => { + const { supervisorTools, SUPERVISOR_EXTENSION_KEY } = await import( + "../../../agents/supervisor-api" + ); + const ctx = fakeContext([]); + + const saAdapter: AgentAdapter = { + acceptsExtensions: [SUPERVISOR_EXTENSION_KEY], + consumesInputTools: false, + async *run(_input, _ctx) { + yield { type: "message_delta", content: "" }; + }, + }; + + const plugin = instantiate( + { + dir: false, + agents: { + assistant: { + instructions: "x", + model: saAdapter, + tools: { + nyc: supervisorTools.genieSpace({ + id: "01ABC", + description: "NYC taxi", + }), + }, + }, + }, + }, + ctx, + ); + await plugin.setup(); + + const api = plugin.exports() as { + // biome-ignore lint/suspicious/noExplicitAny: structural test access + get: (name: string) => any; + }; + const entry = api.get("assistant").toolIndex.get("nyc"); + expect(entry.source).toBe("hosted-supervisor"); + expect(entry.spec).toEqual({ + type: "genie_space", + genie_space: { id: "01ABC", description: "NYC taxi" }, + }); + }); + + test("warns at setup when hosted-supervisor tools paired with non-supervisor adapter", async () => { + const { supervisorTools } = await import( + "../../../agents/supervisor-api" + ); + const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + const ctx = fakeContext([]); + + const plugin = instantiate( + { + dir: false, + agents: { + mismatched: { + instructions: "x", + model: stubAdapter(), // does NOT declare acceptsExtensions + tools: { + nyc: supervisorTools.genieSpace({ + id: "01ABC", + description: "NYC taxi", + }), + }, + }, + }, + }, + ctx, + ); + await plugin.setup(); + + const warning = warnSpy.mock.calls + .map((args) => args.join(" ")) + .find((s) => s.includes("hosted-supervisor")); + expect(warning).toBeTruthy(); + expect(warning).toContain("'mismatched'"); + warnSpy.mockRestore(); + }); + + test("warns at setup when function tools paired with consumesInputTools:false adapter", async () => { + const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + const ctx = fakeContext([]); + + const saLikeAdapter: AgentAdapter = { + consumesInputTools: false, + async *run(_input, _ctx) { + yield { type: "message_delta", content: "" }; + }, + }; + + const plugin = instantiate( + { + dir: false, + agents: { + leaky: { + instructions: "x", + model: saLikeAdapter, + tools: { + weather: tool({ + name: "weather", + description: "w", + schema: z.object({ city: z.string() }), + execute: async () => "sunny", + }), + }, + }, + }, + }, + ctx, + ); + await plugin.setup(); + + const warning = warnSpy.mock.calls + .map((args) => args.join(" ")) + .find((s) => s.includes("does not consume input.tools")); + expect(warning).toBeTruthy(); + expect(warning).toContain("'leaky'"); + warnSpy.mockRestore(); + }); + }); }); diff --git a/packages/shared/src/agent.ts b/packages/shared/src/agent.ts index 6486b1b29..5ec2caf35 100644 --- a/packages/shared/src/agent.ts +++ b/packages/shared/src/agent.ts @@ -275,6 +275,17 @@ export interface AgentInput { tools: AgentToolDefinition[]; threadId: string; signal?: AbortSignal; + /** + * Adapter-specific opaque payloads, keyed by adapter namespace. The + * shared contract intentionally does not enumerate keys — see each + * adapter's docs for which keys it reads and the shape of each value. + * + * The agents plugin and standalone `runAgent` populate this from the + * agent's tool index when entries declare an adapter-side spec (e.g. + * Supervisor API hosted tools). Adapters that don't read extensions + * should leave it untouched. + */ + extensions?: Readonly>; } export interface AgentRunContext { @@ -288,4 +299,23 @@ export interface AgentAdapter { input: AgentInput, context: AgentRunContext, ): AsyncGenerator; + + /** + * Extension keys this adapter consumes from {@link AgentInput.extensions}. + * The agents plugin (and standalone `runAgent`) warns at registration + * if the tool index produces extensions whose keys aren't listed here. + * + * Adapters that don't read extensions can omit this field. + */ + readonly acceptsExtensions?: readonly string[]; + + /** + * Whether the adapter consumes tools from `input.tools`. Defaults to + * true. Adapters whose tool execution happens elsewhere (e.g. the + * Supervisor API, where SA owns the tool loop server-side) declare + * false; the agents plugin warns at registration if the agent declares + * function tools or local sub-agents alongside such an adapter, since + * those tools would never reach the model. + */ + readonly consumesInputTools?: boolean; }