From bcf6da7d2f9a55640d7d52bbce3742e49df6cb36 Mon Sep 17 00:00:00 2001 From: Blaine Kasten Date: Mon, 6 Apr 2026 13:45:49 -0500 Subject: [PATCH] feat: Add new API fields for gpu clusters --- openapi.yaml | 96 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 85 insertions(+), 11 deletions(-) diff --git a/openapi.yaml b/openapi.yaml index a2b955b..2df71b9 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -8454,7 +8454,8 @@ components: - gpu_type - num_gpus - cluster_name - - driver_version + - cuda_version + - nvidia_driver_version - billing_type type: object properties: @@ -8485,14 +8486,6 @@ components: x-stainless-terraform-configurability: computed description: Duration in days to keep the cluster running. type: integer - driver_version: - description: NVIDIA driver version to use in the cluster. - type: string - enum: - - CUDA_12_5_555 - - CUDA_12_6_560 - - CUDA_12_6_565 - - CUDA_12_8_570 shared_volume: x-stainless-terraform-configurability: computed $ref: '#/components/schemas/GPUClustersSharedVolumeCreateRequest' @@ -8509,6 +8502,69 @@ components: enum: - RESERVED - ON_DEMAND + - SCHEDULED_CAPACITY + gpu_node_failover_enabled: + type: boolean + default: false + description: Whether automated GPU node failover should be enabled for this cluster. By default, it is disabled. + auto_scaled: + type: boolean + default: false + description: Whether GPU cluster should be auto-scaled based on the workload. By default, it is not auto-scaled. + auto_scale_max_gpus: + type: integer + description: Maximum number of GPUs to which the cluster can be auto-scaled up. This field is required if auto_scaled is true. + format: uint32 + slurm_shm_size_gib: + type: integer + description: Shared memory size in GiB for Slurm cluster. This field is required if cluster_type is SLURM. + capacity_pool_id: + type: string + description: ID of the capacity pool to use for the cluster. This field is optional and only applicable if the cluster is created from a capacity pool. + provision_at_ts: + type: string + description: Provision timestamp of the cluster. This field is required for SCHEDULED_CAPACITY billing to specify the provision time for the cluster. If not provided, the cluster will be provisioned immediately. + format: date-time + decommission_at_ts: + type: string + description: Decommission timestamp of the cluster. This field is required for SCHEDULED_CAPACITY billing to specify the decommission time for the cluster. + format: date-time + oidc_config: + $ref: '#/components/schemas/GPUClusterOIDCConfig' + install_traefik: + type: boolean + default: false + description: Whether to install Traefik ingress controller in the cluster. This field is only applicable for Kubernetes clusters and is false by default. + cuda_version: + type: string + description: CUDA version for this cluster. For example, 12.5 + nvidia_driver_version: + type: string + description: Nvidia driver version for this cluster. For example, 550. Only some combination of cuda_version and nvidia_driver_version are supported. + GPUClusterOIDCConfig: + type: object + properties: + issuer_url: + type: string + description: OIDC issuer URL for authentication. For example, https://accounts.google.com + client_id: + type: string + description: OIDC client ID for authentication. + username_claim: + type: string + description: JWT claim to use as the username. For example, 'sub' or 'email' + username_prefix: + type: string + description: Prefix to add to the username claim to form the final username. For example, 'oidc:' + group_claim: + type: string + description: JWT claim to use for user groups. For example, 'groups' + group_prefix: + type: string + description: Prefix to add to the group claim to form the final group name. For example, 'oidc:' + ca_cert: + type: string + description: CA certificate in PEM format to validate the OIDC issuer's TLS certificate. This field is optional but recommended if the issuer uses a private CA or self-signed certificate. GPUClusterGPUWorkerNode: type: object required: @@ -8547,8 +8603,8 @@ components: - region - gpu_type - cluster_name - - duration_hours - - driver_version + - cuda_version + - nvidia_driver_version - volumes - status - control_plane_nodes @@ -8611,6 +8667,24 @@ components: type: string num_gpus: type: integer + slurm_shm_size_gib: + type: integer + capacity_pool_id: + type: string + provision_at_ts: + type: string + format: date-time + decommission_at_ts: + type: string + format: date-time + oidc_config: + $ref: '#/components/schemas/GPUClusterOIDCConfig' + install_traefik: + type: boolean + cuda_version: + type: string + nvidia_driver_version: + type: string GPUClusterUpdateRequest: type: object properties: