diff --git a/packages/proxy/schema/index.ts b/packages/proxy/schema/index.ts index 464a3820..b4caaf7c 100644 --- a/packages/proxy/schema/index.ts +++ b/packages/proxy/schema/index.ts @@ -1008,6 +1008,7 @@ export const AISecretTypes: { [keyName: string]: ModelEndpointType } = { CEREBRAS_API_KEY: "cerebras", REPLICATE_API_KEY: "replicate", BASETEN_API_KEY: "baseten", + WAFER_API_KEY: "wafer", }; export const CloudSecretTypes: { [keyName: string]: ModelEndpointType } = { @@ -1035,6 +1036,7 @@ export const EndpointProviderToBaseURL: { baseten: "https://inference.baseten.co/v1", cerebras: "https://api.cerebras.ai/v1", xAI: "https://api.x.ai/v1", + wafer: "https://pass.wafer.ai/v1", bedrock: null, vertex: null, azure: null, diff --git a/packages/proxy/schema/model_list.json b/packages/proxy/schema/model_list.json index 57d7b976..4abe1e88 100644 --- a/packages/proxy/schema/model_list.json +++ b/packages/proxy/schema/model_list.json @@ -11670,5 +11670,94 @@ "available_providers": [ "together" ] + }, + "GLM-5.1": { + "format": "openai", + "flavor": "chat", + "input_cost_per_mil_tokens": 1.2, + "output_cost_per_mil_tokens": 3.6, + "input_cache_read_cost_per_mil_tokens": 0.12, + "displayName": "GLM-5.1", + "reasoning": true, + "max_input_tokens": 202752, + "available_providers": [ + "wafer" + ] + }, + "Kimi-K2.6": { + "format": "openai", + "flavor": "chat", + "multimodal": true, + "input_cost_per_mil_tokens": 0.88, + "output_cost_per_mil_tokens": 3.84, + "input_cache_read_cost_per_mil_tokens": 0.09, + "displayName": "Kimi K2.6", + "reasoning": true, + "max_input_tokens": 262144, + "available_providers": [ + "wafer" + ] + }, + "Qwen3.5-397B-A17B": { + "format": "openai", + "flavor": "chat", + "multimodal": true, + "input_cost_per_mil_tokens": 0.48, + "output_cost_per_mil_tokens": 2.88, + "input_cache_read_cost_per_mil_tokens": 0.05, + "displayName": "Qwen3.5 397B A17B", + "max_input_tokens": 262144, + "available_providers": [ + "wafer" + ] + }, + "Qwen3.6-35B-A3B": { + "format": "openai", + "flavor": "chat", + "multimodal": true, + "input_cost_per_mil_tokens": 0.15, + "output_cost_per_mil_tokens": 1.0, + "input_cache_read_cost_per_mil_tokens": 0.02, + "displayName": "Qwen3.6 35B A3B", + "max_input_tokens": 262144, + "available_providers": [ + "wafer" + ] + }, + "qwen3.7-max": { + "format": "openai", + "flavor": "chat", + "input_cost_per_mil_tokens": 5.0, + "output_cost_per_mil_tokens": 15.0, + "input_cache_read_cost_per_mil_tokens": 0.5, + "displayName": "Qwen3.7 Max", + "max_input_tokens": 256000, + "available_providers": [ + "wafer" + ] + }, + "deepseek-v4-flash": { + "format": "openai", + "flavor": "chat", + "input_cost_per_mil_tokens": 0.14, + "output_cost_per_mil_tokens": 0.28, + "input_cache_read_cost_per_mil_tokens": 0.01, + "displayName": "DeepSeek V4 Flash", + "max_input_tokens": 1000000, + "available_providers": [ + "wafer" + ] + }, + "deepseek-v4-pro": { + "format": "openai", + "flavor": "chat", + "input_cost_per_mil_tokens": 1.74, + "output_cost_per_mil_tokens": 3.48, + "input_cache_read_cost_per_mil_tokens": 0.02, + "displayName": "DeepSeek V4 Pro", + "max_input_tokens": 1000000, + "available_providers": [ + "wafer" + ] } } diff --git a/packages/proxy/schema/models.ts b/packages/proxy/schema/models.ts index 7aac8d33..77140b0d 100644 --- a/packages/proxy/schema/models.ts +++ b/packages/proxy/schema/models.ts @@ -37,6 +37,7 @@ export const ModelEndpointType = [ "cerebras", "ollama", "replicate", + "wafer", "js", ] as const; export type ModelEndpointType = (typeof ModelEndpointType)[number]; diff --git a/packages/proxy/schema/secrets.ts b/packages/proxy/schema/secrets.ts index 59e25622..e86f1b63 100644 --- a/packages/proxy/schema/secrets.ts +++ b/packages/proxy/schema/secrets.ts @@ -201,6 +201,7 @@ export const APISecretSchema = z.union([ "fireworks", "cerebras", "xAI", + "wafer", "js", ]), metadata: BaseMetadataSchema.nullish(), diff --git a/packages/proxy/scripts/verify_proxy_models.ts b/packages/proxy/scripts/verify_proxy_models.ts index 5d6dfc61..afb8562d 100644 --- a/packages/proxy/scripts/verify_proxy_models.ts +++ b/packages/proxy/scripts/verify_proxy_models.ts @@ -23,6 +23,7 @@ type ModelEndpointType = | "cerebras" | "ollama" | "replicate" + | "wafer" | "js"; type ModelFormat =