fix: centralize ignore_index masking in metrics

Rusheel86 · Rusheel86 · commit b80becac2af1 · 2026-05-26T00:05:55.000+05:30
Signed-off-by: Rusheel Sharma &lt;rusheelhere@gmail.com&gt;
diff --git a/monai/losses/focal_loss.py b/monai/losses/focal_loss.py
@@ -185,8 +185,6 @@ def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
         else:
             loss = sigmoid_focal_loss(input, target, self.gamma, alpha_arg)
 
-        num_of_classes = target.shape[1]
-
         if mask is not None:
             loss = loss * mask
 
@@ -213,3 +211,112 @@ def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
             else:
                 broadcast_shape = [1, num_classes] + [1] * (loss.ndim - 2)
                 loss = loss * cw.view(broadcast_shape)
+
+        if self.reduction == LossReduction.SUM.value:
+            # Previously there was a mean over the last dimension, which did not
+            # return a compatible BCE loss. To maintain backwards compatible
+            # behavior we have a flag that performs this extra step, disable or
+            # parameterize if necessary. (Or justify why the mean should be there)
+            average_spatial_dims = True
+            if average_spatial_dims:
+                loss = loss.mean(dim=list(range(2, len(target.shape))))
+            loss = loss.sum()
+
+        elif self.reduction == LossReduction.MEAN.value:
+            if mask is not None:
+                # Ensure we only sum the loss where the mask is 1
+                # Then divide by the actual number of 1s in the mask
+                loss = (loss * mask).sum() / mask.sum().clamp(min=1e-5)
+            else:
+                loss = loss.mean()
+
+        elif self.reduction == LossReduction.NONE.value:
+            pass
+
+        return loss
+
+
+def softmax_focal_loss(
+    input: torch.Tensor, target: torch.Tensor, gamma: float = 2.0, alpha: float | torch.Tensor | None = None
+) -> torch.Tensor:
+    """
+    FL(pt) = -alpha * (1 - pt)**gamma * log(pt)
+
+    where p_i = exp(s_i) / sum_j exp(s_j), t is the target (ground truth) class, and
+    s_j is the unnormalized score for class j.
+    """
+    input_ls = input.log_softmax(1)
+    loss: torch.Tensor = -(1 - input_ls.exp()).pow(gamma) * input_ls * target
+
+    if alpha is not None:
+        if isinstance(alpha, torch.Tensor):
+            alpha_t = alpha.to(device=input.device, dtype=input.dtype)
+        else:
+            alpha_t = torch.tensor(alpha, device=input.device, dtype=input.dtype)
+
+        if alpha_t.ndim == 0:  # scalar
+            alpha_val = alpha_t.item()
+            # (1-alpha) for the background class and alpha for the other classes
+            alpha_fac = torch.tensor([1 - alpha_val] + [alpha_val] * (target.shape[1] - 1)).to(loss)
+        else:  # tensor (sequence)
+            if alpha_t.shape[0] != target.shape[1]:
+                raise ValueError(
+                    f"The length of alpha ({alpha_t.shape[0]}) must match the number of classes ({target.shape[1]})."
+                )
+            alpha_fac = alpha_t
+
+        broadcast_dims = [-1] + [1] * len(target.shape[2:])
+        alpha_fac = alpha_fac.view(broadcast_dims)
+        loss = alpha_fac * loss
+
+    return loss
+
+
+def sigmoid_focal_loss(
+    input: torch.Tensor, target: torch.Tensor, gamma: float = 2.0, alpha: float | torch.Tensor | None = None
+) -> torch.Tensor:
+    """
+    FL(pt) = -alpha * (1 - pt)**gamma * log(pt)
+
+    where p = sigmoid(x), pt = p if label is 1 or 1 - p if label is 0
+    """
+    # computing binary cross entropy with logits
+    # equivalent to F.binary_cross_entropy_with_logits(input, target, reduction='none')
+    # see also https://github.com/pytorch/pytorch/blob/main/aten/src/ATen/native/Loss.cpp#L363
+    loss: torch.Tensor = input - input * target - F.logsigmoid(input)
+
+    # sigmoid(-i) if t==1; sigmoid(i) if t==0 <=>
+    # 1-sigmoid(i) if t==1; sigmoid(i) if t==0 <=>
+    # 1-p if t==1; p if t==0 <=>
+    # pfac, that is, the term (1 - pt)
+    invprobs = F.logsigmoid(-input * (target * 2 - 1))  # reduced chance of overflow
+    # (pfac.log() * gamma).exp() <=>
+    # pfac.log().exp() ^ gamma <=>
+    # pfac ^ gamma
+    loss = (invprobs * gamma).exp() * loss
+
+    if alpha is not None:
+        if isinstance(alpha, torch.Tensor):
+            alpha_t = alpha.to(device=input.device, dtype=input.dtype)
+        else:
+            alpha_t = torch.tensor(alpha, device=input.device, dtype=input.dtype)
+
+        if alpha_t.ndim == 0:  # scalar
+            # alpha if t==1; (1-alpha) if t==0
+            alpha_factor = target * alpha_t + (1 - target) * (1 - alpha_t)
+        else:  # tensor (sequence)
+            if alpha_t.shape[0] != target.shape[1]:
+                raise ValueError(
+                    f"The length of alpha ({alpha_t.shape[0]}) must match the number of classes ({target.shape[1]})."
+                )
+            # Reshape alpha for broadcasting: (1, C, 1, 1...)
+            broadcast_dims = [-1] + [1] * len(target.shape[2:])
+            alpha_t = alpha_t.view(broadcast_dims)
+            # Apply per-class weight only to positive samples
+            # For positive samples (target==1): multiply by alpha[c]
+            # For negative samples (target==0): keep weight as 1.0
+            alpha_factor = torch.where(target == 1, alpha_t, torch.ones_like(alpha_t))
+
+        loss = alpha_factor * loss
+
+    return loss
diff --git a/monai/metrics/utils.py b/monai/metrics/utils.py
@@ -116,7 +116,7 @@ def create_ignore_mask(y: torch.Tensor, ignore_index: int | None) -> torch.Tenso
     num_classes = y.shape[1]
     if 0 <= ignore_index < num_classes:
         # Valid class index: exclude that channel
-        return 1.0 - y[:, ignore_index : ignore_index + 1] # type: ignore[no-any-return]
+        return 1.0 - y[:, ignore_index : ignore_index + 1]  # type: ignore[no-any-return]
     else:
         # Sentinel value: exclude where all channels are zero
         return (y.sum(dim=1, keepdim=True) > 0).float()
@@ -353,6 +353,7 @@ def get_edge_surface_distance(
     use_subvoxels: bool = False,
     symmetric: bool = False,
     class_index: int = -1,
+    mask: torch.Tensor | None = None,
 ) -> tuple[
     tuple[torch.Tensor, torch.Tensor],
     tuple[torch.Tensor, torch.Tensor] | tuple[torch.Tensor],