@@ -7265,7 +7265,13 @@ components:
72657265 schemas :
72667266 RL.OptimStepBody :
72677267 type : object
7268- properties : {}
7268+ properties :
7269+ learning_rate :
7270+ description : Learning rate for this step.
7271+ type : number
7272+ default : 0.0001
7273+ adamw_params :
7274+ $ref : ' #/components/schemas/RL.AdamWOptimizerParams'
72697275 RL.ForwardBackwardBody :
72707276 type : object
72717277 required :
@@ -7462,10 +7468,6 @@ components:
74627468 example : checkpoint-123
74637469 lora_config :
74647470 $ref : ' #/components/schemas/RL.LoraConfig'
7465- optimizer_config :
7466- $ref : ' #/components/schemas/RL.OptimizerConfig'
7467- lr_scheduler_config :
7468- $ref : ' #/components/schemas/RL.LRSchedulerConfig'
74697471 RL.TrainingSessionStatus :
74707472 description : Status of the training session
74717473 default : TRAINING_SESSION_STATUS_UNSPECIFIED
@@ -7489,6 +7491,12 @@ components:
74897491 type : string
74907492 example : meta-llama/Meta-Llama-3-8B-Instruct
74917493 description : Base model used for the training session
7494+ checkpoint_id :
7495+ description : Checkpoint ID to use for the training session
7496+ type : string
7497+ step :
7498+ description : Current training step
7499+ type : integer
74927500 created_at :
74937501 type : string
74947502 format : date-time
@@ -7499,44 +7507,6 @@ components:
74997507 description : Timestamp when the training session was last updated
75007508 lora_config :
75017509 $ref : ' #/components/schemas/RL.LoraConfig'
7502- optimizer_config :
7503- $ref : ' #/components/schemas/RL.OptimizerConfig'
7504- lr_scheduler_config :
7505- $ref : ' #/components/schemas/RL.LRSchedulerConfig'
7506- RL.LRSchedulerConfig :
7507- description : Learning rate scheduler configuration
7508- type : object
7509- properties :
7510- linear :
7511- $ref : ' #/components/schemas/RL.LinearLRScheduler'
7512- RL.LinearLRScheduler :
7513- description : Linear learning rate scheduler configuration
7514- type : object
7515- properties :
7516- params :
7517- $ref : ' #/components/schemas/RL.LinearSchedulerParams'
7518- RL.LinearSchedulerParams :
7519- description : Linear learning rate scheduler parameters
7520- type : object
7521- properties :
7522- warmup_steps :
7523- description : Number of warmup steps
7524- type : integer
7525- default : 100
7526- lr_min :
7527- description : Minimum learning rate at the end of linear decay
7528- type : number
7529- default : 0.0
7530- RL.OptimizerConfig :
7531- description : Optimizer configuration. If omitted, defaults to AdamW with default parameters.
7532- type : object
7533- properties :
7534- adamw :
7535- $ref : ' #/components/schemas/RL.AdamWOptimizer'
7536- max_grad_norm :
7537- description : Maximum gradient norm for gradient clipping. Applies to all optimizer types.
7538- type : number
7539- default : 1.0
75407510 RL.LoraConfig :
75417511 type : object
75427512 description : LoRA adapter configuration
@@ -7553,20 +7523,10 @@ components:
75537523 type : number
75547524 default : 0.05
75557525 description : Dropout of the LoRA adapter
7556- RL.AdamWOptimizer :
7557- description : AdamW optimizer configuration
7558- type : object
7559- properties :
7560- params :
7561- $ref : ' #/components/schemas/RL.AdamWOptimizerParams'
75627526 RL.AdamWOptimizerParams :
75637527 description : AdamW optimizer parameters
75647528 type : object
75657529 properties :
7566- lr :
7567- description : Learning rate
7568- type : number
7569- default : 0.0001
75707530 beta1 :
75717531 description : First moment decay rate
75727532 type : number
0 commit comments