We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 0719212 commit 5f8b188Copy full SHA for 5f8b188
1 file changed
src/together/resources/finetune.py
@@ -215,9 +215,11 @@ def create_finetune_request(
215
elif training_method == "dpo":
216
if simpo_gamma is not None and simpo_gamma > 0:
217
dpo_reference_free = True
218
+ dpo_normalize_logratios_by_length = True
219
rprint(
220
f"Parameter simpo_gamma was set to {simpo_gamma}. "
- "SimPO training detected. Reference logits will not be used."
221
+ "SimPO training detected. Reference logits will not be used "
222
+ "and length normalization of logps will be enabled."
223
)
224
else:
225
dpo_reference_free = False
0 commit comments