Skip to content

Commit 0431976

Browse files
authored
Update RL specs (#204)
* Update RL specs * add more
1 parent 0a07886 commit 0431976

1 file changed

Lines changed: 13 additions & 53 deletions

File tree

openapi.yaml

Lines changed: 13 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -7265,7 +7265,13 @@ components:
72657265
schemas:
72667266
RL.OptimStepBody:
72677267
type: object
7268-
properties: {}
7268+
properties:
7269+
learning_rate:
7270+
description: Learning rate for this step.
7271+
type: number
7272+
default: 0.0001
7273+
adamw_params:
7274+
$ref: '#/components/schemas/RL.AdamWOptimizerParams'
72697275
RL.ForwardBackwardBody:
72707276
type: object
72717277
required:
@@ -7462,10 +7468,6 @@ components:
74627468
example: checkpoint-123
74637469
lora_config:
74647470
$ref: '#/components/schemas/RL.LoraConfig'
7465-
optimizer_config:
7466-
$ref: '#/components/schemas/RL.OptimizerConfig'
7467-
lr_scheduler_config:
7468-
$ref: '#/components/schemas/RL.LRSchedulerConfig'
74697471
RL.TrainingSessionStatus:
74707472
description: Status of the training session
74717473
default: TRAINING_SESSION_STATUS_UNSPECIFIED
@@ -7489,6 +7491,12 @@ components:
74897491
type: string
74907492
example: meta-llama/Meta-Llama-3-8B-Instruct
74917493
description: Base model used for the training session
7494+
checkpoint_id:
7495+
description: Checkpoint ID to use for the training session
7496+
type: string
7497+
step:
7498+
description: Current training step
7499+
type: integer
74927500
created_at:
74937501
type: string
74947502
format: date-time
@@ -7499,44 +7507,6 @@ components:
74997507
description: Timestamp when the training session was last updated
75007508
lora_config:
75017509
$ref: '#/components/schemas/RL.LoraConfig'
7502-
optimizer_config:
7503-
$ref: '#/components/schemas/RL.OptimizerConfig'
7504-
lr_scheduler_config:
7505-
$ref: '#/components/schemas/RL.LRSchedulerConfig'
7506-
RL.LRSchedulerConfig:
7507-
description: Learning rate scheduler configuration
7508-
type: object
7509-
properties:
7510-
linear:
7511-
$ref: '#/components/schemas/RL.LinearLRScheduler'
7512-
RL.LinearLRScheduler:
7513-
description: Linear learning rate scheduler configuration
7514-
type: object
7515-
properties:
7516-
params:
7517-
$ref: '#/components/schemas/RL.LinearSchedulerParams'
7518-
RL.LinearSchedulerParams:
7519-
description: Linear learning rate scheduler parameters
7520-
type: object
7521-
properties:
7522-
warmup_steps:
7523-
description: Number of warmup steps
7524-
type: integer
7525-
default: 100
7526-
lr_min:
7527-
description: Minimum learning rate at the end of linear decay
7528-
type: number
7529-
default: 0.0
7530-
RL.OptimizerConfig:
7531-
description: Optimizer configuration. If omitted, defaults to AdamW with default parameters.
7532-
type: object
7533-
properties:
7534-
adamw:
7535-
$ref: '#/components/schemas/RL.AdamWOptimizer'
7536-
max_grad_norm:
7537-
description: Maximum gradient norm for gradient clipping. Applies to all optimizer types.
7538-
type: number
7539-
default: 1.0
75407510
RL.LoraConfig:
75417511
type: object
75427512
description: LoRA adapter configuration
@@ -7553,20 +7523,10 @@ components:
75537523
type: number
75547524
default: 0.05
75557525
description: Dropout of the LoRA adapter
7556-
RL.AdamWOptimizer:
7557-
description: AdamW optimizer configuration
7558-
type: object
7559-
properties:
7560-
params:
7561-
$ref: '#/components/schemas/RL.AdamWOptimizerParams'
75627526
RL.AdamWOptimizerParams:
75637527
description: AdamW optimizer parameters
75647528
type: object
75657529
properties:
7566-
lr:
7567-
description: Learning rate
7568-
type: number
7569-
default: 0.0001
75707530
beta1:
75717531
description: First moment decay rate
75727532
type: number

0 commit comments

Comments
 (0)