@@ -80,6 +80,7 @@ def create_finetune_request(
8080 train_on_inputs : bool | Literal ["auto" ] = "auto" ,
8181 training_method : str = "sft" ,
8282 dpo_beta : float | None = None ,
83+ rpo_alpha : float | None = None ,
8384 from_checkpoint : str | None = None ,
8485) -> FinetuneRequest :
8586 if model is not None and from_checkpoint is not None :
@@ -193,7 +194,7 @@ def create_finetune_request(
193194
194195 training_method_cls : TrainingMethodSFT | TrainingMethodDPO = TrainingMethodSFT ()
195196 if training_method == "dpo" :
196- training_method_cls = TrainingMethodDPO (dpo_beta = dpo_beta )
197+ training_method_cls = TrainingMethodDPO (dpo_beta = dpo_beta , rpo_alpha = rpo_alpha )
197198
198199 finetune_request = FinetuneRequest (
199200 model = model ,
@@ -322,6 +323,7 @@ def create(
322323 train_on_inputs : bool | Literal ["auto" ] = "auto" ,
323324 training_method : str = "sft" ,
324325 dpo_beta : float | None = None ,
326+ rpo_alpha : float | None = None ,
325327 from_checkpoint : str | None = None ,
326328 ) -> FinetuneResponse :
327329 """
@@ -373,6 +375,7 @@ def create(
373375 training_method (str, optional): Training method. Defaults to "sft".
374376 Supported methods: "sft", "dpo".
375377 dpo_beta (float, optional): DPO beta parameter. Defaults to None.
378+ rpo_alpha (float, optional): RPO alpha to control the weight of NLL loss component for chosen responses. Defaults to None.
376379 from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job.
377380 The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}.
378381 The step value is optional, without it the final checkpoint will be used.
@@ -425,6 +428,7 @@ def create(
425428 train_on_inputs = train_on_inputs ,
426429 training_method = training_method ,
427430 dpo_beta = dpo_beta ,
431+ rpo_alpha = rpo_alpha ,
428432 from_checkpoint = from_checkpoint ,
429433 )
430434
@@ -710,6 +714,7 @@ async def create(
710714 train_on_inputs : bool | Literal ["auto" ] = "auto" ,
711715 training_method : str = "sft" ,
712716 dpo_beta : float | None = None ,
717+ rpo_alpha : float | None = None ,
713718 from_checkpoint : str | None = None ,
714719 ) -> FinetuneResponse :
715720 """
@@ -761,6 +766,7 @@ async def create(
761766 training_method (str, optional): Training method. Defaults to "sft".
762767 Supported methods: "sft", "dpo".
763768 dpo_beta (float, optional): DPO beta parameter. Defaults to None.
769+ rpo_alpha (float, optional): RPO alpha to control the weight of NLL loss component for chosen responses. Defaults to None.
764770 from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job.
765771 The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}.
766772 The step value is optional, without it the final checkpoint will be used.
@@ -813,6 +819,7 @@ async def create(
813819 train_on_inputs = train_on_inputs ,
814820 training_method = training_method ,
815821 dpo_beta = dpo_beta ,
822+ rpo_alpha = rpo_alpha ,
816823 from_checkpoint = from_checkpoint ,
817824 )
818825
0 commit comments