1- import numpy as np
1+ import numpy as np
2+ import math
23from sklearn .metrics import precision_recall_fscore_support , accuracy_score
34
45
@@ -25,3 +26,33 @@ def compute_metrics(p):
2526 'true_negatives' : int (true_negatives ),
2627 'false_negatives' : int (false_negatives )
2728 }
29+
30+
31+
32+ def perplexity_metrics (eval_pred ):
33+ """
34+ Hugging Face passes (logits, labels) – no loss attribute.
35+ We compute CE‑loss ourselves, then PPL = exp(loss).
36+ Padding / ignored positions are ‑100 by HF convention.
37+ """
38+ # Unpack EvalPrediction → ndarray → torch.Tensor
39+ logits , labels = eval_pred
40+ logits = torch .as_tensor (logits , dtype = torch .float32 )
41+ labels = torch .as_tensor (labels , dtype = torch .long )
42+
43+ # Shift so that token t predicts t+1 (standard LM training)
44+ shift_logits = logits [..., :- 1 , :].contiguous ()
45+ shift_labels = labels [..., 1 :].contiguous ()
46+
47+ # Cross‑entropy over non‑ignored tokens
48+ loss = F .cross_entropy (
49+ shift_logits .view (- 1 , shift_logits .size (- 1 )),
50+ shift_labels .view (- 1 ),
51+ ignore_index = - 100 , # HF Trainer uses -100 for padding
52+ reduction = "mean"
53+ )
54+
55+ return {
56+ "loss" : loss .item (),
57+ "perplexity" : math .exp (loss .item ())
58+ }
0 commit comments