From 6f0244b18d708845dad9342cbb3350eff1f1ee59 Mon Sep 17 00:00:00 2001 From: Ian Ye Date: Wed, 27 May 2026 11:23:40 -0700 Subject: [PATCH] add wafer as a model provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wafer (https://wafer.ai) exposes an OpenAI-compatible chat completions endpoint at https://pass.wafer.ai/v1, so it slots into the existing OpenAI-format passthrough — no new request/response translator needed. Wiring: - new endpoint type "wafer" (schema/models.ts, schema/secrets.ts, schema/index.ts, scripts/verify_proxy_models.ts) - WAFER_API_KEY -> wafer in AISecretTypes - EndpointProviderToBaseURL.wafer = https://pass.wafer.ai/v1 - 7 entries in model_list.json for the public catalog (GLM-5.1, Kimi-K2.6, Qwen3.5-397B-A17B, Qwen3.6-35B-A3B, qwen3.7-max, deepseek-v4-flash, deepseek-v4-pro), each tagged available_providers: ["wafer"]. Pricing/context taken from https://pass.wafer.ai/v1/models on 2026-05-27. --- packages/proxy/schema/index.ts | 2 + packages/proxy/schema/model_list.json | 89 +++++++++++++++++++ packages/proxy/schema/models.ts | 1 + packages/proxy/schema/secrets.ts | 1 + packages/proxy/scripts/verify_proxy_models.ts | 1 + 5 files changed, 94 insertions(+) diff --git a/packages/proxy/schema/index.ts b/packages/proxy/schema/index.ts index e2412ba5..693f7751 100644 --- a/packages/proxy/schema/index.ts +++ b/packages/proxy/schema/index.ts @@ -1090,6 +1090,7 @@ export const AISecretTypes: { [keyName: string]: ModelEndpointType } = { CEREBRAS_API_KEY: "cerebras", REPLICATE_API_KEY: "replicate", BASETEN_API_KEY: "baseten", + WAFER_API_KEY: "wafer", }; export const CloudSecretTypes: { [keyName: string]: ModelEndpointType } = { @@ -1117,6 +1118,7 @@ export const EndpointProviderToBaseURL: { baseten: "https://inference.baseten.co/v1", cerebras: "https://api.cerebras.ai/v1", xAI: "https://api.x.ai/v1", + wafer: "https://pass.wafer.ai/v1", bedrock: null, vertex: null, azure: null, diff --git a/packages/proxy/schema/model_list.json b/packages/proxy/schema/model_list.json index 04bafafe..249fd6ca 100644 --- a/packages/proxy/schema/model_list.json +++ b/packages/proxy/schema/model_list.json @@ -12591,5 +12591,94 @@ "available_providers": [ "databricks" ] + }, + "GLM-5.1": { + "format": "openai", + "flavor": "chat", + "input_cost_per_mil_tokens": 1.2, + "output_cost_per_mil_tokens": 3.6, + "input_cache_read_cost_per_mil_tokens": 0.12, + "displayName": "GLM-5.1", + "reasoning": true, + "max_input_tokens": 202752, + "available_providers": [ + "wafer" + ] + }, + "Kimi-K2.6": { + "format": "openai", + "flavor": "chat", + "multimodal": true, + "input_cost_per_mil_tokens": 0.88, + "output_cost_per_mil_tokens": 3.84, + "input_cache_read_cost_per_mil_tokens": 0.09, + "displayName": "Kimi K2.6", + "reasoning": true, + "max_input_tokens": 262144, + "available_providers": [ + "wafer" + ] + }, + "Qwen3.5-397B-A17B": { + "format": "openai", + "flavor": "chat", + "multimodal": true, + "input_cost_per_mil_tokens": 0.48, + "output_cost_per_mil_tokens": 2.88, + "input_cache_read_cost_per_mil_tokens": 0.05, + "displayName": "Qwen3.5 397B A17B", + "max_input_tokens": 262144, + "available_providers": [ + "wafer" + ] + }, + "Qwen3.6-35B-A3B": { + "format": "openai", + "flavor": "chat", + "multimodal": true, + "input_cost_per_mil_tokens": 0.15, + "output_cost_per_mil_tokens": 1.0, + "input_cache_read_cost_per_mil_tokens": 0.02, + "displayName": "Qwen3.6 35B A3B", + "max_input_tokens": 262144, + "available_providers": [ + "wafer" + ] + }, + "qwen3.7-max": { + "format": "openai", + "flavor": "chat", + "input_cost_per_mil_tokens": 5.0, + "output_cost_per_mil_tokens": 15.0, + "input_cache_read_cost_per_mil_tokens": 0.5, + "displayName": "Qwen3.7 Max", + "max_input_tokens": 256000, + "available_providers": [ + "wafer" + ] + }, + "deepseek-v4-flash": { + "format": "openai", + "flavor": "chat", + "input_cost_per_mil_tokens": 0.14, + "output_cost_per_mil_tokens": 0.28, + "input_cache_read_cost_per_mil_tokens": 0.01, + "displayName": "DeepSeek V4 Flash", + "max_input_tokens": 1000000, + "available_providers": [ + "wafer" + ] + }, + "deepseek-v4-pro": { + "format": "openai", + "flavor": "chat", + "input_cost_per_mil_tokens": 1.74, + "output_cost_per_mil_tokens": 3.48, + "input_cache_read_cost_per_mil_tokens": 0.02, + "displayName": "DeepSeek V4 Pro", + "max_input_tokens": 1000000, + "available_providers": [ + "wafer" + ] } } diff --git a/packages/proxy/schema/models.ts b/packages/proxy/schema/models.ts index 7aac8d33..77140b0d 100644 --- a/packages/proxy/schema/models.ts +++ b/packages/proxy/schema/models.ts @@ -37,6 +37,7 @@ export const ModelEndpointType = [ "cerebras", "ollama", "replicate", + "wafer", "js", ] as const; export type ModelEndpointType = (typeof ModelEndpointType)[number]; diff --git a/packages/proxy/schema/secrets.ts b/packages/proxy/schema/secrets.ts index f3f21c33..d5235dbb 100644 --- a/packages/proxy/schema/secrets.ts +++ b/packages/proxy/schema/secrets.ts @@ -212,6 +212,7 @@ export const APISecretSchema = z.union([ "fireworks", "cerebras", "xAI", + "wafer", "js", ]), metadata: BaseMetadataSchema.nullish(), diff --git a/packages/proxy/scripts/verify_proxy_models.ts b/packages/proxy/scripts/verify_proxy_models.ts index 5d6dfc61..afb8562d 100644 --- a/packages/proxy/scripts/verify_proxy_models.ts +++ b/packages/proxy/scripts/verify_proxy_models.ts @@ -23,6 +23,7 @@ type ModelEndpointType = | "cerebras" | "ollama" | "replicate" + | "wafer" | "js"; type ModelFormat =