pulled feature flagging out of env file so we can use it on the client (tensorzero#2994)

virajmehta · web-flow · commit e0c9de1504ce · 2025-08-07T19:33:21.000Z
* pulled feature flagging out of env file so we can use it on the client

* updated references to TENSORZERO_FORCE_CACHE_ON

* pulled feature flagging out of env file so we can use it on the client

* updated references to TENSORZERO_FORCE_CACHE_ON
diff --git a/.github/workflows/ui-tests-e2e-model-inference-cache.yml b/.github/workflows/ui-tests-e2e-model-inference-cache.yml
@@ -86,7 +86,7 @@ jobs:
           echo "FIREWORKS_BASE_URL=http://mock-inference-provider:3030/fireworks/" >> fixtures/.env
           echo "OPENAI_BASE_URL=http://mock-inference-provider:3030/openai/" >> fixtures/.env
           echo "FIREWORKS_ACCOUNT_ID=fake_fireworks_account" >> fixtures/.env
-          echo "TENSORZERO_FORCE_CACHE_ON=1" >> fixtures/.env
+          echo "VITE_TENSORZERO_FORCE_CACHE_ON=1" >> fixtures/.env
 
       - name: Regenerate model inference cache
         working-directory: ui
diff --git a/ui/app/routes/api/tensorzero/inference.ts b/ui/app/routes/api/tensorzero/inference.ts
@@ -3,7 +3,7 @@ import { JSONParseError } from "~/utils/common";
 import type { Route } from "./+types/inference";
 import { getNativeTensorZeroClient } from "~/utils/tensorzero/native_client.server";
 import type { ClientInferenceParams } from "tensorzero-node";
-import { getExtraInferenceOptions } from "~/utils/env.server";
+import { getExtraInferenceOptions } from "~/utils/feature_flags";
 
 export async function action({ request }: Route.ActionArgs): Promise<Response> {
   const formData = await request.formData();
diff --git a/ui/app/routes/playground/route.tsx b/ui/app/routes/playground/route.tsx
@@ -34,7 +34,7 @@ import DatapointPlaygroundOutput from "./DatapointPlaygroundOutput";
 import { safeParseInt } from "~/utils/common";
 import { getNativeTensorZeroClient } from "~/utils/tensorzero/native_client.server";
 import type { InferenceResponse } from "tensorzero-node";
-import { getExtraInferenceOptions } from "~/utils/env.server";
+import { getExtraInferenceOptions } from "~/utils/feature_flags";
 
 const DEFAULT_LIMIT = 5;
 
diff --git a/ui/app/utils/env.server.ts b/ui/app/utils/env.server.ts
@@ -20,12 +20,6 @@ interface Env {
   OPENAI_BASE_URL: string | null;
   FIREWORKS_BASE_URL: string | null;
   FIREWORKS_ACCOUNT_ID: string | null;
-  /// When set, sets `cache_options.enabled = "on"` on all inference calls
-  /// Normally, we leave this unset, which uses the TensorZero default of 'write_only'
-  /// This is used by e2e tests to allow us to populate the model inference cache
-  /// from regen-fixtures without trampling existing entries, and then to use the cached
-  /// entries from the normal ui e2e tests
-  TENSORZERO_FORCE_CACHE_ON: boolean;
 }
 
 let _env: Env;
@@ -66,28 +60,11 @@ export function getEnv(): Env {
     TENSORZERO_EVALUATIONS_PATH:
       process.env.TENSORZERO_EVALUATIONS_PATH || "evaluations",
     FIREWORKS_ACCOUNT_ID: process.env.FIREWORKS_ACCOUNT_ID || null,
-    TENSORZERO_FORCE_CACHE_ON: process.env.TENSORZERO_FORCE_CACHE_ON === "1",
   };
 
   return _env;
 }
 
-/// Returns an object containing extra parameters that should be passed to
-/// inference calls on our TensorZero client
-export function getExtraInferenceOptions(): object {
-  if (getEnv().TENSORZERO_FORCE_CACHE_ON) {
-    return {
-      // We need to force dryrun off, as it prevents us from writing to the cache
-      // (which we need in order to populate our model inference cache)
-      dryrun: false,
-      cache_options: {
-        enabled: "on",
-      },
-    };
-  }
-  return {};
-}
-
 function getClickhouseUrl() {
   const url = process.env.TENSORZERO_CLICKHOUSE_URL;
   if (url) {
diff --git a/ui/app/utils/feature_flags.ts b/ui/app/utils/feature_flags.ts
@@ -0,0 +1,52 @@
+import { canUseDOM } from "./common";
+
+interface FeatureFlags {
+  /// When set, sets `cache_options.enabled = "on"` on all inference calls
+  /// Normally, we leave this unset, which uses the TensorZero default of 'write_only'
+  /// This is used by e2e tests to allow us to populate the model inference cache
+  /// from regen-fixtures without trampling existing entries, and then to use the cached
+  /// entries from the normal ui e2e tests
+  FORCE_CACHE_ON: boolean;
+}
+
+/**
+ * Get feature flags for the application.
+ * This can be accessed from the client.
+ * @returns FeatureFlags
+ */
+export function getFeatureFlags(): FeatureFlags {
+  const envValue = canUseDOM
+    ? import.meta.env.VITE_TENSORZERO_FORCE_CACHE_ON
+    : process.env.VITE_TENSORZERO_FORCE_CACHE_ON;
+  const FORCE_CACHE_ON = envValue === "1";
+  return {
+    FORCE_CACHE_ON,
+  };
+}
+
+interface ExtraInferenceOptions {
+  cache_options?: {
+    enabled: "on" | "off" | "write_only";
+    max_age_s: number | null;
+  };
+  dryrun?: boolean;
+}
+
+/**
+ * Returns an object containing extra parameters that should be passed to
+ * inference calls on our TensorZero client
+ */
+export function getExtraInferenceOptions(): ExtraInferenceOptions {
+  if (getFeatureFlags().FORCE_CACHE_ON) {
+    return {
+      // We need to force dryrun off, as it prevents us from writing to the
+      // cache (which we need in order to populate our model inference cache)
+      cache_options: {
+        enabled: "on",
+        max_age_s: null,
+      },
+      dryrun: false,
+    };
+  }
+  return {};
+}
diff --git a/ui/fixtures/docker-compose.ui.yml b/ui/fixtures/docker-compose.ui.yml
@@ -11,7 +11,7 @@ services:
       - OPENAI_BASE_URL
       - FIREWORKS_BASE_URL
       - FIREWORKS_ACCOUNT_ID
-      - TENSORZERO_FORCE_CACHE_ON
+      - VITE_TENSORZERO_FORCE_CACHE_ON
     volumes:
       - ./config:/app/config:ro
     env_file:
diff --git a/ui/fixtures/regenerate-model-inference-cache.sh b/ui/fixtures/regenerate-model-inference-cache.sh
@@ -6,7 +6,7 @@ cd "$(dirname "$0")"/../
 
 docker compose -f ./fixtures/docker-compose.e2e.yml -f ./fixtures/docker-compose.ui.yml down
 docker compose -f ./fixtures/docker-compose.e2e.yml -f ./fixtures/docker-compose.ui.yml rm -f
-OPENAI_BASE_URL=http://mock-inference-provider:3030/openai/ FIREWORKS_BASE_URL=http://mock-inference-provider:3030/fireworks/ FIREWORKS_ACCOUNT_ID=fake_fireworks_account TENSORZERO_SKIP_LARGE_FIXTURES=1 TENSORZERO_FORCE_CACHE_ON=1 docker compose -f ./fixtures/docker-compose.e2e.yml -f ./fixtures/docker-compose.ui.yml up --build --force-recreate -d
+OPENAI_BASE_URL=http://mock-inference-provider:3030/openai/ FIREWORKS_BASE_URL=http://mock-inference-provider:3030/fireworks/ FIREWORKS_ACCOUNT_ID=fake_fireworks_account TENSORZERO_SKIP_LARGE_FIXTURES=1 VITE_TENSORZERO_FORCE_CACHE_ON=1 docker compose -f ./fixtures/docker-compose.e2e.yml -f ./fixtures/docker-compose.ui.yml up --build --force-recreate -d
 docker compose -f ./fixtures/docker-compose.e2e.yml -f ./fixtures/docker-compose.ui.yml wait fixtures
 # Wipe the ModelInferenceCache table to ensure that we regenerate everything
 docker run --add-host=host.docker.internal:host-gateway clickhouse/clickhouse-server clickhouse-client --host host.docker.internal --user chuser --password chpassword --database tensorzero_ui_fixtures 'TRUNCATE TABLE ModelInferenceCache SYNC'