@@ -189,10 +189,17 @@ def create_finetune_request(
189189 raise ValueError (
190190 "dpo_normalize_logratios_by_length=True is only supported for DPO training"
191191 )
192- if rpo_alpha is not None and training_method != "dpo" :
193- raise ValueError ("rpo_alpha is only supported for DPO training" )
194- if simpo_gamma is not None and training_method != "dpo" :
195- raise ValueError ("simpo_gamma is only supported for DPO training" )
192+ if rpo_alpha is not None :
193+ if training_method != "dpo" :
194+ raise ValueError ("rpo_alpha is only supported for DPO training" )
195+ if not rpo_alpha >= 0.0 :
196+ raise ValueError (f"rpo_alpha should be non-negative (got { rpo_alpha } )" )
197+
198+ if simpo_gamma is not None :
199+ if training_method != "dpo" :
200+ raise ValueError ("simpo_gamma is only supported for DPO training" )
201+ if not simpo_gamma >= 0.0 :
202+ raise ValueError (f"simpo_gamma should be non-negative (got { simpo_gamma } )" )
196203
197204 lr_scheduler : FinetuneLRScheduler
198205 if lr_scheduler_type == "cosine" :
@@ -221,7 +228,7 @@ def create_finetune_request(
221228 rprint (
222229 f"Parameter simpo_gamma was set to { simpo_gamma } . "
223230 "SimPO training detected. Reference logits will not be used "
224- "and length normalization of logps will be enabled."
231+ "and length normalization of log-probabilities will be enabled."
225232 )
226233 else :
227234 dpo_reference_free = False
0 commit comments