Fixing merging issue

sevmag · sevmag · commit 0a89fa8bc219 · 2025-11-11T10:36:37.000+01:00
diff --git a/src/graphnet/training/loss_functions.py b/src/graphnet/training/loss_functions.py
@@ -290,45 +290,35 @@ def backward(
         Mathematical Background:
         -----------------------
         For the von Mises-Fisher distribution, the gradient of log C_m(κ) with
-        For the von Mises-Fisher distribution, the gradient of log C_m(κ) with
         respect to κ is given by the ratio of modified Bessel functions:
 
-
         ∂/∂κ log C_m(κ) = (m/2-1)/κ - I_{m/2}(κ)/I_{m/2-1}(κ)
 
-
         For m=3, this simplifies to the exact formula:
         ∂/∂κ log C_3(κ) = 1/κ - 1/tanh(κ)
 
-
         For small κ values, we use the Taylor series approximation:
         f(κ) = -κ/3 + κ³/45 - 2κ⁵/945 + O(κ⁷)
 
-        The first-order approximation -κ/3 provides sufficient accuracy for
-
         The first-order approximation -κ/3 provides sufficient accuracy for
         |κ| < 1e-6, with truncation error bounded by |κ|³/45 ≲ O(10⁻²¹).
 
-
         Implementation Details:
         ----------------------
         Uses boolean masking to avoid double evaluation and RuntimeWarnings:
         - Small κ: |κ| < 1e-6 → gradient = -κ/3 (Taylor approximation)
         - Large κ: |κ| ≥ 1e-6 → gradient = 1/κ - 1/tanh(κ) (exact formula)
 
-
         References:
         ----------
         [1] von Mises-Fisher distribution: Wikipedia
         [2] arXiv:1812.04616, Section 8.2
         [3] MIT License (c) 2019 Max Ryabinin - Modified for GraphNeT
 
-
         Args:
             ctx: Autograd context containing saved tensors and metadata.
             grad_output: Gradient with respect to the output tensor.
 
-
         Returns:
             Tuple of gradients: (None for m, gradient w.r.t. κ).
         """
@@ -340,8 +330,7 @@ def backward(
             # Initialize gradient array
             grads = np.zeros_like(kappa)
 
-            # Handle small kappa values (including zero)
-            # to avoid division by zero
+            # Handle small kappa values (including zero) to avoid division by zero
             small_mask = np.abs(kappa) < 1e-6
             grads[small_mask] = -kappa[small_mask] / 3
 
@@ -350,7 +339,6 @@ def backward(
             if np.any(large_mask):
                 kappa_large = kappa[large_mask]
                 grads[large_mask] = 1 / kappa_large - 1 / np.tanh(kappa_large)
-                grads[large_mask] = 1 / kappa_large - 1 / np.tanh(kappa_large)
         else:
             grads = -(
                 (scipy.special.iv(m / 2.0, kappa))