added metrics

paramkpr · paramkpr · commit f18c26db4307 · 2025-05-13T00:13:13.000-07:00
diff --git a/src/cli/02_fine_tune_generator.py b/src/cli/02_fine_tune_generator.py
@@ -16,7 +16,7 @@
 from src.data import GeneratorDataModule
 from src.models import build_generator
 from src.utils.wandb_setup import setup_wandb
-
+from src.utils.metrics import perplexity_metrics
 
 logging.basicConfig(level=logging.INFO,
                     format="%(asctime)s - %(levelname)s - %(message)s")
@@ -25,18 +25,6 @@
 app = typer.Typer()
 
 
-def perplexity_metrics(eval_pred):
-    """
-    For causal‑LM fine‑tuning we usually care about perplexity rather than
-    accuracy/F1.  `Trainer.evaluate` returns (loss, logits, labels) so we grab
-    the loss and exponentiate it.
-    """
-    # Depending on HF version eval_pred can be EvalPrediction or a tuple
-    if isinstance(eval_pred, tuple):
-        loss = eval_pred[0]
-    else:
-        loss = eval_pred.loss
-    return {"perplexity": math.exp(loss)}
 
 
 @app.command()
diff --git a/src/utils/metrics.py b/src/utils/metrics.py
@@ -1,4 +1,5 @@
-import numpy as np
+import numpy as np  
+import math
 from sklearn.metrics import precision_recall_fscore_support, accuracy_score
 
 
@@ -25,3 +26,33 @@ def compute_metrics(p):
         'true_negatives': int(true_negatives),
         'false_negatives': int(false_negatives)
     }
+
+
+
+def perplexity_metrics(eval_pred):
+    """
+    Hugging Face passes (logits, labels) – no loss attribute.
+    We compute CE‑loss ourselves, then PPL = exp(loss).
+    Padding / ignored positions are ‑100 by HF convention.
+    """
+    # Unpack EvalPrediction → ndarray → torch.Tensor
+    logits, labels = eval_pred
+    logits  = torch.as_tensor(logits,  dtype=torch.float32)
+    labels  = torch.as_tensor(labels,  dtype=torch.long)
+
+    # Shift so that token t predicts t+1 (standard LM training)
+    shift_logits = logits[..., :-1, :].contiguous()
+    shift_labels = labels[..., 1:].contiguous()
+
+    # Cross‑entropy over non‑ignored tokens
+    loss = F.cross_entropy(
+        shift_logits.view(-1, shift_logits.size(-1)),
+        shift_labels.view(-1),
+        ignore_index = -100,     # HF Trainer uses -100 for padding
+        reduction    = "mean"
+    )
+
+    return {
+        "loss":       loss.item(),
+        "perplexity": math.exp(loss.item())
+    }