From 889f20a9bca8462bf1c3482a8704b68188c439d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Bylica?= <pawel@hepcolgum.band>
Date: Mon, 25 May 2026 22:46:43 +0200
Subject: [PATCH] crypto: BN254 pairing line eval following Beuchat 2010
 literally
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Alternative to the ipsilon-convention line eval on master (PRs #1541
+ #1542): reimplement lin_func_and_dbl, lin_func_and_add, and lin_func
to follow Beuchat 2010 Algorithms 26 and 27
(https://eprint.iacr.org/2010/354.pdf) literally.

  lin_func_and_dbl (Alg 26):
    t[0] = -4·y·z³            (negated to compensate for -P.y at the
                               call site; Beuchat uses +y_P)
    t[1] = -6·x²·z²
    t[2] = 6·x³ - 4·y²
    T_out = standard Jacobian doubling (X3, Y3, Z3)

  lin_func_and_add (Alg 27):
    t[0] = +4·Z3              (Z3 = z0·H; call site uses +P.y)
    t[1] = -4·R               (R = y1·z0³ - y0)
    t[2] = 4·(R·x1 - y1·Z3)
    T_out RESCALED by λ=2 to (4·X3, 8·Y3, 2·Z3) — Beuchat's Alg 27
    output Jacobian. Same affine point, different Jacobian Z; required
    for the line scaling to compose consistently across Miller-loop
    iterations.

  lin_func: Alg 27 line eval without the T update.

Per Miller-loop step, this line equals -2 × the line on master (a
uniform F_p factor absorbed by final exponentiation). Both produce
the same pairing — the three discrepancies the ipsilon HackMD note
enumerates between its derivation and Beuchat's published formulas
are not correctness bugs, just a uniform F_p scaling difference that
vanishes under FE. The third claim ("tmp6 = -3X² + 3X³ + 9X⁴ - 4Y²
which is not close to 2Y² - 3X³") is an arithmetic slip in the note:
Beuchat's line 14 expansion needs the (a+b)² - a² - b² = 2ab identity
(which the algorithm uses earlier), and gives 6X³ - 4Y² = -2·(2Y² -
3X³) — the same uniform -2 factor.

Bench (build/clang-tt, 100 reps × 1s, ecpairing precompile):
  master (ipsilon factored):  2892598 ns mean, 2872961 ns median
  this branch (Beuchat lit.): 2861296 ns mean, 2862051 ns median
  Δ: -31k ns mean (-1.1 %), within ~7 % stddev — statistically
  indistinguishable. The DBL-side cost (3 extra Fq2 doublings per
  call × 64 iter/pair) and the ADD-side savings (2 Fq2 muls per ~9
  ADD steps/pair) roughly cancel.

Kept as a branch for reference / future comparison, not intended to
land on master.

Tests: 53/53 unit, EEST state tests 11/11 on every stable fork
(Byzantium / Istanbul / Cancun / Prague / Osaka).
---
 .../pairing/bn254/utils.hpp                   | 63 +++++++++++++++----
 1 file changed, 51 insertions(+), 12 deletions(-)
diff --git a/lib/evmone_precompiles/pairing/bn254/utils.hpp b/lib/evmone_precompiles/pairing/bn254/utils.hpp
index 0361b1d832..e5c44a9eb6 100644
--- a/lib/evmone_precompiles/pairing/bn254/utils.hpp
+++ b/lib/evmone_precompiles/pairing/bn254/utils.hpp
@@ -368,9 +368,20 @@ constexpr ecc::JacPoint<Fq2> lin_func_and_dbl(
     const auto Yp = M * (S - Xp) - (_4y_4 + _4y_4);
     const auto Zp = N * N - y_squared - z_squared;  // 2yz = (y+z)^2 - y^2 - z^2
 
-    t[0] = Zp * z_squared;
-    t[1] = M * z_squared;
-    t[2] = R - M * x;
+    // Beuchat 2010 Algorithm 26 line eval, literal form. Beuchat uses +y_P
+    // for the line; our code passes -P.y at the DBL call site, so t[0]
+    // is negated to compensate.
+    //   t[0] = -4·(Z_T·z²) = -4·y·z³   (Beuchat tmp0 = 2·Z_T·Z²_Q = 4yz³)
+    //   t[1] = -2·(M·z²)   = -6·x²·z²  (Beuchat tmp3 = -2·tmp4·Z²_Q)
+    //   t[2] =  2·(M·x − R) = 6·x³ − 4·y²  (Beuchat tmp6)
+    // Ratio Beuchat / ipsilon (per coefficient) = uniform -2, an F_p
+    // factor absorbed by final exponentiation.
+    const auto Zp_z2 = Zp * z_squared;
+    const auto M_z2 = M * z_squared;
+    const auto Mx_R = M * x - R;
+    t[0] = -(Zp_z2 + Zp_z2);
+    t[1] = -(M_z2 + M_z2);
+    t[2] = Mx_R + Mx_R;
 
     return ecc::JacPoint<Fq2>{Xp, Yp, Zp};
 }
@@ -406,11 +417,29 @@ constexpr ecc::JacPoint<Fq2> lin_func_and_dbl(
     const auto Y3 = R * (V - X3) - y0 * H_cubed;
     const auto Z3 = H * z0;
 
-    t[0] = -H * z0_cubed;   // = x0·z0³ − U2·z0³
-    t[1] = R * z0_squared;  // = S2·z0² − y0·z0²
-    t[2] = y0 * U2 - x0 * S2;
-
-    return ecc::JacPoint<Fq2>{X3, Y3, Z3};
+    // Beuchat 2010 Algorithm 27 line eval, literal form. Code's ADD call
+    // site uses +P.y, matching Beuchat's convention (no sign flip).
+    //   t[0] = +4·Z3   (Beuchat a0[0] = 2·Z_T·y_P, Z_T = 2·z0·H = 2·Z3)
+    //   t[1] = -4·R    (Beuchat a1[0] = 2·t6·x_P, t6_after_neg = -2·R)
+    //   t[2] = 4·(R·x1 - y1·Z3)
+    // T_out RESCALED to (4·X3, 8·Y3, 2·Z3) — Beuchat's λ=2 Jacobian.
+    // Required for self-consistent scaling across Miller-loop iterations.
+    const auto _2Z3 = Z3 + Z3;
+    const auto _2R = R + R;
+    const auto Rx1 = R * x1;
+    const auto y1_Z3 = y1 * Z3;
+    const auto t2_pre = Rx1 - y1_Z3;
+    const auto _2t2_pre = t2_pre + t2_pre;
+    t[0] = _2Z3 + _2Z3;
+    t[1] = -(_2R + _2R);
+    t[2] = _2t2_pre + _2t2_pre;
+
+    const auto _2X3 = X3 + X3;
+    const auto _4X3 = _2X3 + _2X3;
+    const auto _2Y3 = Y3 + Y3;
+    const auto _4Y3 = _2Y3 + _2Y3;
+    const auto _8Y3 = _4Y3 + _4Y3;
+    return ecc::JacPoint<Fq2>{_4X3, _8Y3, _2Z3};
 }
 
 /// Computes points P0 and P1 addition for twisted curve + line defined by untwisted P1 and P2
@@ -431,10 +460,20 @@ constexpr void lin_func(
 
     const auto U2 = x1 * z0_squared;
     const auto S2 = y1 * z0_cubed;
-
-    t[0] = (x0 - U2) * z0_cubed;    // = x0·z0³ − U2·z0³
-    t[1] = (S2 - y0) * z0_squared;  // = S2·z0² − y0·z0²
-    t[2] = y0 * U2 - x0 * S2;
+    const auto H = U2 - x0;
+    const auto R = S2 - y0;
+    const auto Z3 = z0 * H;
+
+    // Beuchat 2010 Algorithm 27 line eval (no-T-update variant).
+    const auto _2Z3 = Z3 + Z3;
+    const auto _2R = R + R;
+    const auto Rx1 = R * x1;
+    const auto y1_Z3 = y1 * Z3;
+    const auto t2_pre = Rx1 - y1_Z3;
+    const auto _2t2_pre = t2_pre + t2_pre;
+    t[0] = _2Z3 + _2Z3;
+    t[1] = -(_2R + _2R);
+    t[2] = _2t2_pre + _2t2_pre;
 }
 
 /// Computes f^2 for f in Fq12. For more ref https://eprint.iacr.org/2010/354.pdf Algorithm 22