fixed tool calling in playground and added playwright test (tensorzero#3032)

virajmehta · github-actions[bot] · web-flow · commit 9e3b117c7c93 · 2025-08-08T14:08:38.000Z
* fixed tool calling in playground and added playwright test

* Regenerate ModelInferenceCache fixtures

* added extra inference options code

* removed console log

* Regenerate ModelInferenceCache fixtures

---------

Co-authored-by: TensorZero Bot &lt;github-actions[bot]@users.noreply.github.com&gt;
diff --git a/ui/app/routes/api/tensorzero/inference.ts b/ui/app/routes/api/tensorzero/inference.ts
@@ -25,7 +25,6 @@ export async function action({ request }: Route.ActionArgs): Promise<Response> {
         { status: 400 },
       );
     }
-
     if (isTensorZeroServerError(error)) {
       return Response.json({ error: error.message }, { status: error.status });
     }
diff --git a/ui/app/routes/api/tensorzero/inference.utils.tsx b/ui/app/routes/api/tensorzero/inference.utils.tsx
@@ -7,7 +7,9 @@ import type {
   ClientInput,
   ClientInputMessage,
   ClientInputMessageContent,
+  FunctionConfig,
   JsonValue,
+  Tool,
 } from "tensorzero-node";
 import type {
   InputMessageContent as TensorZeroContent,
@@ -292,6 +294,7 @@ interface ClickHouseDatapointActionArgs {
   variant: string;
   cache_options: CacheParamsOptions;
   dryrun: boolean;
+  functionConfig: FunctionConfig;
 }
 
 export function prepareInferenceActionRequest(
@@ -351,7 +354,12 @@ export function prepareInferenceActionRequest(
     // Extract tool parameters from the ClickHouse datapoint args
     const tool_choice = args.tool_params?.tool_choice;
     const parallel_tool_calls = args.tool_params?.parallel_tool_calls;
-    const tools_available = args.tool_params?.tools_available;
+    const additional_tools = args.tool_params?.tools_available
+      ? subtractStaticToolsFromInferenceInput(
+          args.tool_params?.tools_available,
+          args.functionConfig,
+        )
+      : null;
 
     return {
       ...baseParams,
@@ -362,7 +370,7 @@ export function prepareInferenceActionRequest(
       tool_choice: tool_choice || null,
       dryrun: true,
       parallel_tool_calls: parallel_tool_calls || null,
-      additional_tools: tools_available || null,
+      additional_tools,
       cache_options: args.cache_options,
     };
   } else {
@@ -589,3 +597,26 @@ function resolvedFileContentToClientFile(
     data: data,
   };
 }
+
+/*
+ * For both inferences and datapoints, we store a full tool config that
+ * specifies what the model saw or could have seen at inference time for a particular example.
+ * However, TensorZero will automatically use the tools that are currently configured for inferences.
+ * It will also error if there are tools with duplicated names. In order to avoid this, we "subtract"
+ * out all currently configured tools from the tools that we pass in dynamically.
+ */
+function subtractStaticToolsFromInferenceInput(
+  datapointTools: Tool[],
+  functionConfig: FunctionConfig,
+): Tool[] {
+  if (functionConfig.type === "json") {
+    return datapointTools;
+  }
+  const resultTools = [];
+  for (const tool of datapointTools) {
+    if (!functionConfig.tools.some((t) => t === tool.name)) {
+      resultTools.push(tool);
+    }
+  }
+  return resultTools;
+}
diff --git a/ui/app/routes/playground/DatapointPlaygroundOutput.tsx b/ui/app/routes/playground/DatapointPlaygroundOutput.tsx
@@ -7,7 +7,11 @@ import { Button } from "~/components/ui/button";
 import { CodeEditor } from "~/components/ui/code-editor";
 import { refreshClientInference } from "./utils";
 import type { DisplayInput } from "~/utils/clickhouse/common";
-import type { Datapoint, InferenceResponse } from "tensorzero-node";
+import type {
+  Datapoint,
+  FunctionConfig,
+  InferenceResponse,
+} from "tensorzero-node";
 
 interface DatapointPlaygroundOutputProps {
   datapoint: Datapoint;
@@ -21,6 +25,7 @@ interface DatapointPlaygroundOutputProps {
   ) => void;
   input: DisplayInput;
   functionName: string;
+  functionConfig: FunctionConfig;
 }
 const DatapointPlaygroundOutput = memo(
   function DatapointPlaygroundOutput({
@@ -31,6 +36,7 @@ const DatapointPlaygroundOutput = memo(
     input,
     functionName,
     isLoading,
+    functionConfig,
   }: DatapointPlaygroundOutputProps) {
     const loadingIndicator = (
       <div className="flex min-h-[8rem] items-center justify-center">
@@ -49,6 +55,7 @@ const DatapointPlaygroundOutput = memo(
             datapoint,
             variantName,
             functionName,
+            functionConfig,
           );
         }}
       >
diff --git a/ui/app/routes/playground/route.tsx b/ui/app/routes/playground/route.tsx
@@ -21,7 +21,10 @@ import {
 } from "~/routes/api/tensorzero/inference.utils";
 import { resolveInput } from "~/utils/resolve.server";
 import { X } from "lucide-react";
-import type { Datapoint as TensorZeroDatapoint } from "tensorzero-node";
+import type {
+  FunctionConfig,
+  Datapoint as TensorZeroDatapoint,
+} from "tensorzero-node";
 import type { DisplayInput } from "~/utils/clickhouse/common";
 import { useCallback, useEffect, useMemo, useState } from "react";
 import { Button } from "~/components/ui/button";
@@ -148,26 +151,30 @@ export async function loader({ request }: Route.LoaderArgs) {
     datapoint: TensorZeroDatapoint,
     functionName: string,
     variantName: string,
+    functionConfig: FunctionConfig,
   ) => {
-    const request = prepareInferenceActionRequest({
-      source: "clickhouse_datapoint",
-      input,
-      functionName,
-      variant: variantName,
-      tool_params:
-        datapoint?.type === "chat"
-          ? (datapoint.tool_params ?? undefined)
-          : undefined,
-      output_schema:
-        datapoint?.type === "json" ? datapoint.output_schema : null,
-      // The default is write_only but we do off in the playground
-      cache_options: {
-        max_age_s: null,
-        enabled: "off",
-      },
-      dryrun: true,
+    const request = {
+      ...prepareInferenceActionRequest({
+        source: "clickhouse_datapoint",
+        input,
+        functionName,
+        variant: variantName,
+        tool_params:
+          datapoint?.type === "chat"
+            ? (datapoint.tool_params ?? undefined)
+            : undefined,
+        output_schema:
+          datapoint?.type === "json" ? datapoint.output_schema : null,
+        // The default is write_only but we do off in the playground
+        cache_options: {
+          max_age_s: null,
+          enabled: "off",
+        },
+        dryrun: true,
+        functionConfig,
+      }),
       ...getExtraInferenceOptions(),
-    });
+    };
     const nativeClient = await getNativeTensorZeroClient();
     const inferenceResponse = await nativeClient.inference(request);
     return inferenceResponse;
@@ -183,7 +190,7 @@ export async function loader({ request }: Route.LoaderArgs) {
   for (const variant of selectedVariants) {
     serverInferences.set(variant, new Map());
   }
-  if (datapoints && inputs && functionName) {
+  if (datapoints && inputs && functionName && functionConfig) {
     for (let index = 0; index < datapoints.length; index++) {
       const datapoint = datapoints[index];
       const input = inputs[index];
@@ -192,7 +199,13 @@ export async function loader({ request }: Route.LoaderArgs) {
           .get(variant)
           ?.set(
             datapoint.id,
-            serverInference(input, datapoint, functionName, variant),
+            serverInference(
+              input,
+              datapoint,
+              functionName,
+              variant,
+              functionConfig,
+            ),
           );
       }
     }
@@ -248,20 +261,21 @@ export default function PlaygroundPage({ loaderData }: Route.ComponentProps) {
     offset,
     limit,
   } = loaderData;
+  const functionConfig = useFunctionConfig(functionName);
+  if (functionName && !functionConfig) {
+    throw data(`Function config not found for function ${functionName}`, {
+      status: 404,
+    });
+  }
   const { map, setPromise } = useClientInferences(
     functionName,
     datapoints,
     inputs,
     selectedVariants,
     serverInferences,
+    functionConfig,
   );
 
-  const functionConfig = useFunctionConfig(functionName);
-  if (functionName && !functionConfig) {
-    throw data(`Function config not found for function ${functionName}`, {
-      status: 404,
-    });
-  }
   const variants = functionConfig?.variants ?? undefined;
   const variantData = variants
     ? Object.entries(variants).map(([variantName]) => ({
@@ -334,7 +348,8 @@ export default function PlaygroundPage({ loaderData }: Route.ComponentProps) {
         datapoints.length > 0 &&
         datasetName &&
         inputs &&
-        functionName && (
+        functionName &&
+        functionConfig && (
           <>
             <div className="overflow-x-auto rounded border">
               <div className="min-w-fit">
@@ -427,6 +442,7 @@ export default function PlaygroundPage({ loaderData }: Route.ComponentProps) {
                                 setPromise={setPromise}
                                 input={inputs[index]}
                                 functionName={functionName}
+                                functionConfig={functionConfig}
                               />
                             </div>
                           );
@@ -483,13 +499,14 @@ function useClientInferences(
   inputs: DisplayInput[] | undefined,
   selectedVariants: string[],
   serverInferences: NestedPromiseMap<InferenceResponse>,
+  functionConfig: FunctionConfig | null,
 ) {
   const { map, setPromise, setMap } =
     useNestedPromiseMap<InferenceResponse>(serverInferences);
 
   // Single combined effect to handle both server inferences and client inferences
   useEffect(() => {
-    if (!functionName || !datapoints || !inputs) return;
+    if (!functionName || !datapoints || !inputs || !functionConfig) return;
 
     // First check if we need any updates
     let needsUpdate = false;
@@ -529,24 +546,28 @@ function useClientInferences(
           newMap.set(variant, variantMap);
         }
 
-        const request = prepareInferenceActionRequest({
-          source: "clickhouse_datapoint",
-          input,
-          functionName,
-          variant: variant,
-          tool_params:
-            datapoint?.type === "chat"
-              ? (datapoint.tool_params ?? undefined)
-              : undefined,
-          output_schema:
-            datapoint?.type === "json" ? datapoint.output_schema : null,
-          // The default is write_only but we do off in the playground
-          cache_options: {
-            max_age_s: null,
-            enabled: "off",
-          },
-          dryrun: true,
-        });
+        const request = {
+          ...prepareInferenceActionRequest({
+            source: "clickhouse_datapoint",
+            input,
+            functionName,
+            variant: variant,
+            tool_params:
+              datapoint?.type === "chat"
+                ? (datapoint.tool_params ?? undefined)
+                : undefined,
+            output_schema:
+              datapoint?.type === "json" ? datapoint.output_schema : null,
+            // The default is write_only but we do off in the playground
+            cache_options: {
+              max_age_s: null,
+              enabled: "off",
+            },
+            dryrun: true,
+            functionConfig,
+          }),
+          ...getExtraInferenceOptions(),
+        };
         const formData = new FormData();
         formData.append("data", JSON.stringify(request));
         const responsePromise = fetch("/api/tensorzero/inference", {
@@ -564,7 +585,14 @@ function useClientInferences(
 
       return newMap;
     });
-  }, [functionName, datapoints, inputs, selectedVariants, setMap]);
+  }, [
+    functionName,
+    datapoints,
+    inputs,
+    selectedVariants,
+    setMap,
+    functionConfig,
+  ]);
 
   return { map, setPromise, setMap };
 }
diff --git a/ui/app/routes/playground/utils.ts b/ui/app/routes/playground/utils.ts
@@ -2,8 +2,10 @@ import type { DisplayInput } from "~/utils/clickhouse/common";
 import type {
   Datapoint as TensorZeroDatapoint,
   InferenceResponse,
+  FunctionConfig,
 } from "tensorzero-node";
 import { prepareInferenceActionRequest } from "../api/tensorzero/inference.utils";
+import { getExtraInferenceOptions } from "~/utils/feature_flags";
 
 export function refreshClientInference(
   setPromise: (
@@ -15,23 +17,29 @@ export function refreshClientInference(
   datapoint: TensorZeroDatapoint,
   variantName: string,
   functionName: string,
+  functionConfig: FunctionConfig,
 ) {
-  const request = prepareInferenceActionRequest({
-    source: "clickhouse_datapoint",
-    input,
-    functionName,
-    variant: variantName,
-    tool_params:
-      datapoint?.type === "chat"
-        ? (datapoint.tool_params ?? undefined)
-        : undefined,
-    output_schema: datapoint?.type === "json" ? datapoint.output_schema : null,
-    cache_options: {
-      max_age_s: null,
-      enabled: "off",
-    },
-    dryrun: true,
-  });
+  const request = {
+    ...prepareInferenceActionRequest({
+      source: "clickhouse_datapoint",
+      input,
+      functionName,
+      variant: variantName,
+      tool_params:
+        datapoint?.type === "chat"
+          ? (datapoint.tool_params ?? undefined)
+          : undefined,
+      output_schema:
+        datapoint?.type === "json" ? datapoint.output_schema : null,
+      cache_options: {
+        max_age_s: null,
+        enabled: "off",
+      },
+      dryrun: true,
+      functionConfig,
+    }),
+    ...getExtraInferenceOptions(),
+  };
   // The API endpoint takes form data so we need to stringify it and send as data
   const formData = new FormData();
   formData.append("data", JSON.stringify(request));
diff --git a/ui/app/utils/clickhouse/inference.ts b/ui/app/utils/clickhouse/inference.ts
@@ -11,6 +11,7 @@ import {
 import type {
   JsonInferenceOutput,
   ContentBlockChatOutput,
+  Tool,
 } from "tensorzero-node";
 
 // Zod schemas for ToolCallConfigDatabaseInsert
@@ -19,7 +20,7 @@ export const toolSchema = z.object({
   parameters: JsonValueSchema,
   name: z.string(),
   strict: z.boolean(),
-});
+}) satisfies z.ZodType<Tool>;
 
 export const toolChoiceSchema = z.union([
   z.literal("none"),
diff --git a/ui/e2e_tests/playground.spec.ts b/ui/e2e_tests/playground.spec.ts
diff --git a/ui/fixtures/chat_inference_datapoint_examples.jsonl b/ui/fixtures/chat_inference_datapoint_examples.jsonl
diff --git a/ui/fixtures/model_inference_cache_e2e.jsonl b/ui/fixtures/model_inference_cache_e2e.jsonl

Original file line number	Diff line number	Diff line change
`@@ -25,7 +25,6 @@ export async function action({ request }: Route.ActionArgs): Promise<Response> {`
`25`	`25`	`{ status: 400 },`
`26`	`26`	`);`
`27`	`27`	`}`
`28`		`-`
`29`	`28`	`if (isTensorZeroServerError(error)) {`
`30`	`29`	`return Response.json({ error: error.message }, { status: error.status });`
`31`	`30`	`}`