Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -386,3 +386,67 @@ oauth-model-alias:
# protocol: "codex" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex
# params: # JSON path (gjson/sjson syntax) -> raw JSON value (strings are used as-is, must be valid JSON)
# "response_format": "{\"type\":\"json_schema\",\"json_schema\":{\"name\":\"answer\",\"schema\":{\"type\":\"object\"}}}"

# Model Pooling & Clustering
# Two-level system for aggregating models across providers under virtual model IDs.
#
# Level 1 — Pools: Route the same logical model from multiple providers under one virtual ID.
# Useful when you have the same model available via different subscriptions/providers.
# The gateway tries members in priority order and fails over on 429/503 errors.
#
# Level 2 — Clusters: Group multiple pools (or raw model IDs) under a semantic name.
# Use this to expose intent-based model IDs like "coding-high", "coding-fast", etc.
#
# Strategies:
# - "round-robin" (default for pools): rotate across members on each request
# - "priority" (default for clusters): try members in order, fail over on exhaustion
#
# model-pools:
# enabled: true
# default-strategy: round-robin
#
# # Level 1: Pools — same model, multiple providers
# pools:
# - id: "claude-sonnet-4" # Virtual model ID exposed to clients
# strategy: "priority" # Override default strategy for this pool
# members:
# - model: "kiro-claude-sonnet-4-5" # Actual model name sent to provider
# provider: "kiro" # Provider type
# priority: 0 # Lower = preferred (used with "priority" strategy)
# weight: 2 # Higher = more requests (used with "round-robin")
# - model: "claude-sonnet-4-20250514"
# provider: "claude"
# priority: 1
#
# - id: "gpt-4.1"
# members:
# - model: "gpt-4.1"
# provider: "codex"
#
# # Level 2: Clusters — semantic names grouping multiple pools
# clusters:
# - id: "coding-high" # Semantic model ID exposed to clients
# description: "Best available model for complex coding tasks"
# strategy: "priority"
# members:
# - pool: "claude-sonnet-4" # Reference a Level 1 pool by ID
# priority: 0
# - pool: "gpt-4.1"
# priority: 1
#
# - id: "coding-fast"
# description: "Fast, cost-efficient model for simple coding tasks"
# members:
# - model: "kiro-claude-haiku-4-5" # Can reference raw models directly
# - model: "gpt-4.1-mini"
#
# - id: "chat"
# description: "General-purpose conversational model"
# members:
# - pool: "claude-sonnet-4"
#
# - id: "reasoning"
# description: "Models optimized for complex reasoning and analysis"
# members:
# - model: "claude-sonnet-4-20250514(64000)" # Supports thinking suffixes
# - model: "gpt-4.1"
144 changes: 144 additions & 0 deletions internal/api/handlers/management/model_pools.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
package management

import (
"net/http"

"github.com/gin-gonic/gin"
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
)

// GetModelPools returns the current model-pools configuration.
func (h *Handler) GetModelPools(c *gin.Context) {
h.mu.Lock()
defer h.mu.Unlock()
c.JSON(http.StatusOK, h.cfg.ModelPools)
}

// PutModelPools replaces the entire model-pools configuration.
func (h *Handler) PutModelPools(c *gin.Context) {
var pools config.ModelPoolConfig
if err := c.ShouldBindJSON(&pools); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}

h.mu.Lock()
h.cfg.ModelPools = pools
h.mu.Unlock()

h.persist(c)
}

// PatchModelPool adds or updates a single pool by ID.
func (h *Handler) PatchModelPool(c *gin.Context) {
var pool config.ModelPool
if err := c.ShouldBindJSON(&pool); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
if pool.ID == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "pool id is required"})
return
}

h.mu.Lock()
found := false
for i, p := range h.cfg.ModelPools.Pools {
if p.ID == pool.ID {
h.cfg.ModelPools.Pools[i] = pool
found = true
break
}
}
if !found {
h.cfg.ModelPools.Pools = append(h.cfg.ModelPools.Pools, pool)
}
h.mu.Unlock()

h.persist(c)
}

// DeleteModelPool removes a pool by ID.
func (h *Handler) DeleteModelPool(c *gin.Context) {
id := c.Query("id")
if id == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "id query parameter is required"})
return
}

h.mu.Lock()
pools := h.cfg.ModelPools.Pools
found := false
for i, p := range pools {
if p.ID == id {
h.cfg.ModelPools.Pools = append(pools[:i], pools[i+1:]...)
found = true
break
}
}
h.mu.Unlock()

if !found {
c.JSON(http.StatusNotFound, gin.H{"error": "pool not found"})
return
}

h.persist(c)
}

// PatchModelCluster adds or updates a single cluster by ID.
func (h *Handler) PatchModelCluster(c *gin.Context) {
var cluster config.ModelCluster
if err := c.ShouldBindJSON(&cluster); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
if cluster.ID == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "cluster id is required"})
return
}

h.mu.Lock()
found := false
for i, cl := range h.cfg.ModelPools.Clusters {
if cl.ID == cluster.ID {
h.cfg.ModelPools.Clusters[i] = cluster
found = true
break
}
}
if !found {
h.cfg.ModelPools.Clusters = append(h.cfg.ModelPools.Clusters, cluster)
}
h.mu.Unlock()

h.persist(c)
}

// DeleteModelCluster removes a cluster by ID.
func (h *Handler) DeleteModelCluster(c *gin.Context) {
id := c.Query("id")
if id == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "id query parameter is required"})
return
}

h.mu.Lock()
clusters := h.cfg.ModelPools.Clusters
found := false
for i, cl := range clusters {
if cl.ID == id {
h.cfg.ModelPools.Clusters = append(clusters[:i], clusters[i+1:]...)
found = true
break
}
}
h.mu.Unlock()

if !found {
c.JSON(http.StatusNotFound, gin.H{"error": "cluster not found"})
return
}

h.persist(c)
}
8 changes: 8 additions & 0 deletions internal/api/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,14 @@ func (s *Server) registerManagementRoutes() {
mgmt.PATCH("/ampcode/upstream-api-keys", s.mgmt.PatchAmpUpstreamAPIKeys)
mgmt.DELETE("/ampcode/upstream-api-keys", s.mgmt.DeleteAmpUpstreamAPIKeys)

// Model pools and clusters
mgmt.GET("/model-pools", s.mgmt.GetModelPools)
mgmt.PUT("/model-pools", s.mgmt.PutModelPools)
mgmt.PATCH("/model-pools/pool", s.mgmt.PatchModelPool)
mgmt.DELETE("/model-pools/pool", s.mgmt.DeleteModelPool)
mgmt.PATCH("/model-pools/cluster", s.mgmt.PatchModelCluster)
mgmt.DELETE("/model-pools/cluster", s.mgmt.DeleteModelCluster)

mgmt.GET("/request-retry", s.mgmt.GetRequestRetry)
mgmt.PUT("/request-retry", s.mgmt.PutRequestRetry)
mgmt.PATCH("/request-retry", s.mgmt.PutRequestRetry)
Expand Down
82 changes: 82 additions & 0 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,11 @@ type Config struct {
// Payload defines default and override rules for provider payload parameters.
Payload PayloadConfig `yaml:"payload" json:"payload"`

// ModelPools defines model pooling and clustering configuration.
// Level 1 pools aggregate the same model across multiple providers under one ID.
// Level 2 clusters group multiple pools under a semantic name (e.g., "coding-high").
ModelPools ModelPoolConfig `yaml:"model-pools" json:"model-pools"`

// IncognitoBrowser enables opening OAuth URLs in incognito/private browsing mode.
// This is useful when you want to login with a different account without logging out
// from your current session. Default: false.
Expand Down Expand Up @@ -364,6 +369,83 @@ type PayloadModelRule struct {
Protocol string `yaml:"protocol" json:"protocol"`
}

// ModelPoolConfig defines model pooling and clustering.
type ModelPoolConfig struct {
// Enabled toggles the pooling feature. When false, pool/cluster model IDs are not recognized.
Enabled bool `yaml:"enabled" json:"enabled"`

// Pools defines Level 1 pools: same logical model aggregated across providers.
// Each pool exposes a virtual model ID that resolves to concrete models from multiple providers.
Pools []ModelPool `yaml:"pools,omitempty" json:"pools,omitempty"`

// Clusters defines Level 2 clusters: named groups of pools for semantic routing.
// A cluster ID (e.g., "coding-high") resolves to an ordered list of pool or model IDs.
Clusters []ModelCluster `yaml:"clusters,omitempty" json:"clusters,omitempty"`

// DefaultStrategy is the selection strategy for pools/clusters when not overridden.
// Supported values: "round-robin" (default), "priority", "latency" (future).
DefaultStrategy string `yaml:"default-strategy,omitempty" json:"default-strategy,omitempty"`
}

// ModelPool defines a Level 1 pool: a virtual model ID backed by concrete models from one or more providers.
type ModelPool struct {
// ID is the virtual model ID exposed to clients (e.g., "claude-sonnet-4").
// If empty, the first member's model name is used.
ID string `yaml:"id" json:"id"`

// Members lists the concrete model+provider pairs that back this pool.
Members []PoolMember `yaml:"members" json:"members"`

// Strategy overrides the default selection strategy for this pool.
Strategy string `yaml:"strategy,omitempty" json:"strategy,omitempty"`
}

// PoolMember is a concrete model+provider pair within a pool.
type PoolMember struct {
// Model is the actual model ID as known to the provider (e.g., "kiro-claude-sonnet-4-5").
Model string `yaml:"model" json:"model"`

// Provider is the provider type (e.g., "kiro", "claude", "codex"). Optional—if empty,
// all providers that offer this model are included.
Provider string `yaml:"provider,omitempty" json:"provider,omitempty"`

// Priority is an optional numeric priority (lower = preferred). Only used with "priority" strategy.
Priority int `yaml:"priority,omitempty" json:"priority,omitempty"`

// Weight is an optional weight for weighted round-robin. Default is 1.
Weight int `yaml:"weight,omitempty" json:"weight,omitempty"`
}

// ModelCluster defines a Level 2 cluster: a named group of pools or models.
type ModelCluster struct {
// ID is the cluster name exposed to clients (e.g., "coding-high").
ID string `yaml:"id" json:"id"`

// Description is an optional human-readable description.
Description string `yaml:"description,omitempty" json:"description,omitempty"`

// Members lists pool IDs or model IDs in priority order.
// The first available member is selected. Members can reference pool IDs or raw model IDs.
Members []ClusterMember `yaml:"members" json:"members"`

// Strategy overrides the default selection strategy for this cluster.
// "priority" (default for clusters): try members in order, use first available.
// "round-robin": rotate across available members.
Strategy string `yaml:"strategy,omitempty" json:"strategy,omitempty"`
}

// ClusterMember references a pool or model within a cluster.
type ClusterMember struct {
// Pool references a Level 1 pool ID. Mutually exclusive with Model.
Pool string `yaml:"pool,omitempty" json:"pool,omitempty"`

// Model references a raw model ID directly (bypasses pool lookup). Mutually exclusive with Pool.
Model string `yaml:"model,omitempty" json:"model,omitempty"`

// Priority is an optional numeric priority (lower = preferred). Only used with "priority" strategy.
Priority int `yaml:"priority,omitempty" json:"priority,omitempty"`
}

// CloakConfig configures request cloaking for non-Claude-Code clients.
// Cloaking disguises API requests to appear as originating from the official Claude Code CLI.
type CloakConfig struct {
Expand Down
Loading