Skip to content

Commit 92d7e01

Browse files
committed
Review fixes
1 parent a082aac commit 92d7e01

1 file changed

Lines changed: 12 additions & 5 deletions

File tree

src/together/resources/finetune.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -189,10 +189,17 @@ def create_finetune_request(
189189
raise ValueError(
190190
"dpo_normalize_logratios_by_length=True is only supported for DPO training"
191191
)
192-
if rpo_alpha is not None and training_method != "dpo":
193-
raise ValueError("rpo_alpha is only supported for DPO training")
194-
if simpo_gamma is not None and training_method != "dpo":
195-
raise ValueError("simpo_gamma is only supported for DPO training")
192+
if rpo_alpha is not None:
193+
if training_method != "dpo":
194+
raise ValueError("rpo_alpha is only supported for DPO training")
195+
if not rpo_alpha >= 0.0:
196+
raise ValueError(f"rpo_alpha should be non-negative (got {rpo_alpha})")
197+
198+
if simpo_gamma is not None:
199+
if training_method != "dpo":
200+
raise ValueError("simpo_gamma is only supported for DPO training")
201+
if not simpo_gamma >= 0.0:
202+
raise ValueError(f"simpo_gamma should be non-negative (got {simpo_gamma})")
196203

197204
lr_scheduler: FinetuneLRScheduler
198205
if lr_scheduler_type == "cosine":
@@ -221,7 +228,7 @@ def create_finetune_request(
221228
rprint(
222229
f"Parameter simpo_gamma was set to {simpo_gamma}. "
223230
"SimPO training detected. Reference logits will not be used "
224-
"and length normalization of logps will be enabled."
231+
"and length normalization of log-probabilities will be enabled."
225232
)
226233
else:
227234
dpo_reference_free = False

0 commit comments

Comments
 (0)