From e82b2ba4ba8b8b9a64f26255dd83fd8b8628d438 Mon Sep 17 00:00:00 2001
From: Luc Patiny <luc@patiny.com>
Date: Fri, 29 Sep 2023 15:51:35 +0200
Subject: [PATCH 1/9] feat: optimize SVD ?

---
 scripts/benchmark.js | 39 +++++++++++++++++++++++++++++++++++++++
 src/matrix.js        |  2 ++
 2 files changed, 41 insertions(+)
 create mode 100644 scripts/benchmark.js

diff --git a/scripts/benchmark.js b/scripts/benchmark.js
new file mode 100644
index 0000000..d495226
--- /dev/null
+++ b/scripts/benchmark.js
@@ -0,0 +1,39 @@
+
+
+
+let { Matrix, SVD, LU, EVD } = require('../matrix');
+
+const algorithms = [SVD, LU, EVD];
+
+const benchmarks = [];
+
+test(10);
+test(100);
+test(500);
+test(1000);
+test(1500);
+
+console.table(benchmarks);
+
+function test(size) {
+  const benchmark = {};
+  benchmark.size = size;
+  benchmarks.push(benchmark);
+
+  for (const Algo of algorithms) {
+    const matrix = Matrix.rand(size, size);
+    let count = 1;
+    const start = performance.now();
+    const results = [new Algo(matrix)];
+    const current = performance.now() - start;
+    if (current < 5000) {
+      count = Math.ceil(5000 / current);
+      for (let i = 0; i < count - 1; i++) {
+        results.push(new Algo(matrix));
+      }
+    }
+    const time = (performance.now() - start) / results.length;
+    benchmark[Algo.name] = time;
+    console.log(`${Algo.name} ${size}x${size} matrix: ${time}ms (${count}x)`)
+  }
+}
\ No newline at end of file
diff --git a/src/matrix.js b/src/matrix.js
index c98253b..2844007 100644
--- a/src/matrix.js
+++ b/src/matrix.js
@@ -1579,11 +1579,13 @@ export default class Matrix extends AbstractMatrix {
 
   set(rowIndex, columnIndex, value) {
     this.data[rowIndex][columnIndex] = value;
+    //this.data[columnIndex][rowIndex] = value;
     return this;
   }
 
   get(rowIndex, columnIndex) {
     return this.data[rowIndex][columnIndex];
+    //return this.data[columnIndex][rowIndex];
   }
 
   removeRow(index) {

From 0f7c3ab8103b7dbecfe1aba90804095ebe696689 Mon Sep 17 00:00:00 2001
From: Luc Patiny <luc@patiny.com>
Date: Sat, 20 Jun 2026 11:09:42 +0200
Subject: [PATCH 2/9] perf: speed up SVD ~4.7x via transposed internal storage

The SVD hot loops iterate over rows for a fixed column. With the row-major
backing store (data[row][col]) these are column walks that thrash the cache.

Work on the transpose of the input and store the U/V singular vectors
transposed during the computation, then transpose them back before returning.
The inner loops then scan memory sequentially. The public API and results are
unchanged.

Measured (1000x1000, deterministic input): 17764 ms -> 3804 ms (~4.7x).
LU and EVD are unaffected. All tests pass.

Also remove the dead exploratory get/set comments in matrix.js and make
scripts/benchmark.js deterministic (seeded) and warmed up.

Assisted-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 scripts/benchmark.js |  59 ++++++++++++----------
 src/dc/svd.js        | 113 ++++++++++++++++++++++++-------------------
 src/matrix.js        |   2 -
 3 files changed, 96 insertions(+), 78 deletions(-)

diff --git a/scripts/benchmark.js b/scripts/benchmark.js
index d495226..7bfb9f9 100644
--- a/scripts/benchmark.js
+++ b/scripts/benchmark.js
@@ -1,39 +1,46 @@
+'use strict';
 
-
-
-let { Matrix, SVD, LU, EVD } = require('../matrix');
+const { Matrix, SVD, LU, EVD } = require('../matrix');
 
 const algorithms = [SVD, LU, EVD];
+const sizes = [10, 100, 500, 1000];
+const TARGET_MS = 3000;
 
 const benchmarks = [];
-
-test(10);
-test(100);
-test(500);
-test(1000);
-test(1500);
-
+for (const size of sizes) benchmarks.push(test(size));
 console.table(benchmarks);
 
 function test(size) {
-  const benchmark = {};
-  benchmark.size = size;
-  benchmarks.push(benchmark);
-
+  const benchmark = { size };
   for (const Algo of algorithms) {
-    const matrix = Matrix.rand(size, size);
+    // Fixed reusable matrix; decompositions clone internally so it is not mutated.
+    const matrix = Matrix.rand(size, size, { random: mulberry32(size) });
+
+    // warm-up
+    void new Algo(matrix);
+
     let count = 1;
     const start = performance.now();
-    const results = [new Algo(matrix)];
-    const current = performance.now() - start;
-    if (current < 5000) {
-      count = Math.ceil(5000 / current);
-      for (let i = 0; i < count - 1; i++) {
-        results.push(new Algo(matrix));
-      }
+    void new Algo(matrix);
+    const first = performance.now() - start;
+    if (first < TARGET_MS) {
+      count = Math.ceil(TARGET_MS / first);
+      for (let i = 0; i < count - 1; i++) void new Algo(matrix);
     }
-    const time = (performance.now() - start) / results.length;
-    benchmark[Algo.name] = time;
-    console.log(`${Algo.name} ${size}x${size} matrix: ${time}ms (${count}x)`)
+    const time = (performance.now() - start) / count;
+    benchmark[Algo.name] = Number(time.toFixed(4));
   }
-}
\ No newline at end of file
+  return benchmark;
+}
+
+// Deterministic PRNG so every run/layout uses identical input matrices.
+function mulberry32(seed) {
+  let a = seed >>> 0;
+  return function random() {
+    a |= 0;
+    a = (a + 0x6d2b79f5) | 0;
+    let t = Math.imul(a ^ (a >>> 15), 1 | a);
+    t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t;
+    return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
+  };
+}
diff --git a/src/dc/svd.js b/src/dc/svd.js
index 20ca002..e92560e 100644
--- a/src/dc/svd.js
+++ b/src/dc/svd.js
@@ -23,32 +23,40 @@ export default class SingularValueDecomposition {
     let wantu = Boolean(computeLeftSingularVectors);
     let wantv = Boolean(computeRightSingularVectors);
 
+    // Work on the transpose of the input so the hot inner loops (which iterate
+    // over rows for a fixed column) scan memory sequentially in the row-major
+    // backing store. `at` holds the transpose: at.get(j, i) === a.get(i, j)
+    // where `a` is the logical m x n working matrix.
     let swapped = false;
-    let a;
+    let at;
     if (m < n) {
       if (!autoTranspose) {
-        a = value.clone();
         // eslint-disable-next-line no-console
         console.warn(
           'Computing SVD on a matrix with more columns than rows. Consider enabling autoTranspose',
         );
+        at = value.transpose();
       } else {
-        a = value.transpose();
-        m = a.rows;
-        n = a.columns;
+        at = value.clone();
+        m = value.columns;
+        n = value.rows;
         swapped = true;
         let aux = wantu;
         wantu = wantv;
         wantv = aux;
       }
     } else {
-      a = value.clone();
+      at = value.transpose();
     }
 
     let nu = Math.min(m, n);
     let ni = Math.min(m + 1, n);
     let s = new Float64Array(ni);
-    let U = new Matrix(m, nu);
+    // U and V are stored transposed during the computation so the inner loops
+    // (which always vary the row index) scan memory sequentially. They are
+    // transposed back to their logical layout before being returned.
+    // Ut.get(j, i) === U.get(i, j) and Vt.get(j, i) === V.get(i, j).
+    let U = new Matrix(nu, m);
     let V = new Matrix(n, n);
 
     let e = new Float64Array(n);
@@ -65,16 +73,16 @@ export default class SingularValueDecomposition {
       if (k < nct) {
         s[k] = 0;
         for (let i = k; i < m; i++) {
-          s[k] = hypotenuse(s[k], a.get(i, k));
+          s[k] = hypotenuse(s[k], at.get(k, i));
         }
         if (s[k] !== 0) {
-          if (a.get(k, k) < 0) {
+          if (at.get(k, k) < 0) {
             s[k] = -s[k];
           }
           for (let i = k; i < m; i++) {
-            a.set(i, k, a.get(i, k) / s[k]);
+            at.set(k, i, at.get(k, i) / s[k]);
           }
-          a.set(k, k, a.get(k, k) + 1);
+          at.set(k, k, at.get(k, k) + 1);
         }
         s[k] = -s[k];
       }
@@ -83,19 +91,19 @@ export default class SingularValueDecomposition {
         if (k < nct && s[k] !== 0) {
           let t = 0;
           for (let i = k; i < m; i++) {
-            t += a.get(i, k) * a.get(i, j);
+            t += at.get(k, i) * at.get(j, i);
           }
-          t = -t / a.get(k, k);
+          t = -t / at.get(k, k);
           for (let i = k; i < m; i++) {
-            a.set(i, j, a.get(i, j) + t * a.get(i, k));
+            at.set(j, i, at.get(j, i) + t * at.get(k, i));
           }
         }
-        e[j] = a.get(k, j);
+        e[j] = at.get(j, k);
       }
 
       if (wantu && k < nct) {
         for (let i = k; i < m; i++) {
-          U.set(i, k, a.get(i, k));
+          U.set(k, i, at.get(k, i));
         }
       }
 
@@ -120,19 +128,19 @@ export default class SingularValueDecomposition {
           }
           for (let i = k + 1; i < m; i++) {
             for (let j = k + 1; j < n; j++) {
-              work[i] += e[j] * a.get(i, j);
+              work[i] += e[j] * at.get(j, i);
             }
           }
           for (let j = k + 1; j < n; j++) {
             let t = -e[j] / e[k + 1];
             for (let i = k + 1; i < m; i++) {
-              a.set(i, j, a.get(i, j) + t * work[i]);
+              at.set(j, i, at.get(j, i) + t * work[i]);
             }
           }
         }
         if (wantv) {
           for (let i = k + 1; i < n; i++) {
-            V.set(i, k, e[i]);
+            V.set(k, i, e[i]);
           }
         }
       }
@@ -140,20 +148,20 @@ export default class SingularValueDecomposition {
 
     let p = Math.min(n, m + 1);
     if (nct < n) {
-      s[nct] = a.get(nct, nct);
+      s[nct] = at.get(nct, nct);
     }
     if (m < p) {
       s[p - 1] = 0;
     }
     if (nrt + 1 < p) {
-      e[nrt] = a.get(nrt, p - 1);
+      e[nrt] = at.get(p - 1, nrt);
     }
     e[p - 1] = 0;
 
     if (wantu) {
       for (let j = nct; j < nu; j++) {
         for (let i = 0; i < m; i++) {
-          U.set(i, j, 0);
+          U.set(j, i, 0);
         }
         U.set(j, j, 1);
       }
@@ -162,23 +170,23 @@ export default class SingularValueDecomposition {
           for (let j = k + 1; j < nu; j++) {
             let t = 0;
             for (let i = k; i < m; i++) {
-              t += U.get(i, k) * U.get(i, j);
+              t += U.get(k, i) * U.get(j, i);
             }
             t = -t / U.get(k, k);
             for (let i = k; i < m; i++) {
-              U.set(i, j, U.get(i, j) + t * U.get(i, k));
+              U.set(j, i, U.get(j, i) + t * U.get(k, i));
             }
           }
           for (let i = k; i < m; i++) {
-            U.set(i, k, -U.get(i, k));
+            U.set(k, i, -U.get(k, i));
           }
           U.set(k, k, 1 + U.get(k, k));
           for (let i = 0; i < k - 1; i++) {
-            U.set(i, k, 0);
+            U.set(k, i, 0);
           }
         } else {
           for (let i = 0; i < m; i++) {
-            U.set(i, k, 0);
+            U.set(k, i, 0);
           }
           U.set(k, k, 1);
         }
@@ -191,16 +199,16 @@ export default class SingularValueDecomposition {
           for (let j = k + 1; j < n; j++) {
             let t = 0;
             for (let i = k + 1; i < n; i++) {
-              t += V.get(i, k) * V.get(i, j);
+              t += V.get(k, i) * V.get(j, i);
             }
-            t = -t / V.get(k + 1, k);
+            t = -t / V.get(k, k + 1);
             for (let i = k + 1; i < n; i++) {
-              V.set(i, j, V.get(i, j) + t * V.get(i, k));
+              V.set(j, i, V.get(j, i) + t * V.get(k, i));
             }
           }
         }
         for (let i = 0; i < n; i++) {
-          V.set(i, k, 0);
+          V.set(k, i, 0);
         }
         V.set(k, k, 1);
       }
@@ -265,9 +273,9 @@ export default class SingularValueDecomposition {
             }
             if (wantv) {
               for (let i = 0; i < n; i++) {
-                t = cs * V.get(i, j) + sn * V.get(i, p - 1);
-                V.set(i, p - 1, -sn * V.get(i, j) + cs * V.get(i, p - 1));
-                V.set(i, j, t);
+                t = cs * V.get(j, i) + sn * V.get(p - 1, i);
+                V.set(p - 1, i, -sn * V.get(j, i) + cs * V.get(p - 1, i));
+                V.set(j, i, t);
               }
             }
           }
@@ -285,9 +293,9 @@ export default class SingularValueDecomposition {
             e[j] = cs * e[j];
             if (wantu) {
               for (let i = 0; i < m; i++) {
-                t = cs * U.get(i, j) + sn * U.get(i, k - 1);
-                U.set(i, k - 1, -sn * U.get(i, j) + cs * U.get(i, k - 1));
-                U.set(i, j, t);
+                t = cs * U.get(j, i) + sn * U.get(k - 1, i);
+                U.set(k - 1, i, -sn * U.get(j, i) + cs * U.get(k - 1, i));
+                U.set(j, i, t);
               }
             }
           }
@@ -333,9 +341,9 @@ export default class SingularValueDecomposition {
             s[j + 1] = cs * s[j + 1];
             if (wantv) {
               for (let i = 0; i < n; i++) {
-                t = cs * V.get(i, j) + sn * V.get(i, j + 1);
-                V.set(i, j + 1, -sn * V.get(i, j) + cs * V.get(i, j + 1));
-                V.set(i, j, t);
+                t = cs * V.get(j, i) + sn * V.get(j + 1, i);
+                V.set(j + 1, i, -sn * V.get(j, i) + cs * V.get(j + 1, i));
+                V.set(j, i, t);
               }
             }
             t = hypotenuse(f, g);
@@ -349,9 +357,9 @@ export default class SingularValueDecomposition {
             e[j + 1] = cs * e[j + 1];
             if (wantu && j < m - 1) {
               for (let i = 0; i < m; i++) {
-                t = cs * U.get(i, j) + sn * U.get(i, j + 1);
-                U.set(i, j + 1, -sn * U.get(i, j) + cs * U.get(i, j + 1));
-                U.set(i, j, t);
+                t = cs * U.get(j, i) + sn * U.get(j + 1, i);
+                U.set(j + 1, i, -sn * U.get(j, i) + cs * U.get(j + 1, i));
+                U.set(j, i, t);
               }
             }
           }
@@ -364,7 +372,7 @@ export default class SingularValueDecomposition {
             s[k] = s[k] < 0 ? -s[k] : 0;
             if (wantv) {
               for (let i = 0; i <= pp; i++) {
-                V.set(i, k, -V.get(i, k));
+                V.set(k, i, -V.get(k, i));
               }
             }
           }
@@ -377,16 +385,16 @@ export default class SingularValueDecomposition {
             s[k + 1] = t;
             if (wantv && k < n - 1) {
               for (let i = 0; i < n; i++) {
-                t = V.get(i, k + 1);
-                V.set(i, k + 1, V.get(i, k));
-                V.set(i, k, t);
+                t = V.get(k + 1, i);
+                V.set(k + 1, i, V.get(k, i));
+                V.set(k, i, t);
               }
             }
             if (wantu && k < m - 1) {
               for (let i = 0; i < m; i++) {
-                t = U.get(i, k + 1);
-                U.set(i, k + 1, U.get(i, k));
-                U.set(i, k, t);
+                t = U.get(k + 1, i);
+                U.set(k + 1, i, U.get(k, i));
+                U.set(k, i, t);
               }
             }
             k++;
@@ -399,6 +407,11 @@ export default class SingularValueDecomposition {
       }
     }
 
+    // Restore the logical (row-major) layout of the singular vectors, which were
+    // accumulated in transposed storage for cache-sequential inner loops.
+    U = U.transpose();
+    V = V.transpose();
+
     if (swapped) {
       let tmp = V;
       V = U;
diff --git a/src/matrix.js b/src/matrix.js
index 3102b97..f4874ff 100644
--- a/src/matrix.js
+++ b/src/matrix.js
@@ -1666,13 +1666,11 @@ export default class Matrix extends AbstractMatrix {
 
   set(rowIndex, columnIndex, value) {
     this.data[rowIndex][columnIndex] = value;
-    //this.data[columnIndex][rowIndex] = value;
     return this;
   }
 
   get(rowIndex, columnIndex) {
     return this.data[rowIndex][columnIndex];
-    //return this.data[columnIndex][rowIndex];
   }
 
   removeRow(index) {

From 88fdfcfc67de8dc584d4f8a4821ee9783d39155e Mon Sep 17 00:00:00 2001
From: Luc Patiny <luc@patiny.com>
Date: Sat, 20 Jun 2026 11:13:48 +0200
Subject: [PATCH 3/9] =?UTF-8?q?test:=20add=20EVD=20correctness=20oracle=20?=
 =?UTF-8?q?(A=C2=B7V=20=3D=20V=C2=B7D,=20orthogonality,=20complex=20pairs)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The eigenvalue decomposition only had a single 2x2 example. Add reconstruction
tests (A·V = V·D) for symmetric (4x4 and 12x12), non-symmetric real-eigenvalue,
and complex-eigenvalue-pair matrices, plus orthonormality of symmetric
eigenvectors. These exercise tred2/tql2 and orthes/hqr2 and guard the upcoming
performance refactor.

Assisted-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/__tests__/decompositions/evd.test.js | 79 ++++++++++++++++++++++++
 1 file changed, 79 insertions(+)

diff --git a/src/__tests__/decompositions/evd.test.js b/src/__tests__/decompositions/evd.test.js
index 9e8b9f9..3acc808 100644
--- a/src/__tests__/decompositions/evd.test.js
+++ b/src/__tests__/decompositions/evd.test.js
@@ -1,7 +1,10 @@
+import { toBeDeepCloseTo } from 'jest-matcher-deep-close-to';
 import { describe, it, expect } from 'vitest';
 
 import { Matrix, EVD } from '../..';
 
+expect.extend({ toBeDeepCloseTo });
+
 describe('Eigenvalue decomposition', () => {
   it('simple example', () => {
     let matrix = new Matrix([
@@ -20,4 +23,80 @@ describe('Eigenvalue decomposition', () => {
     const matrix = new Matrix([]);
     expect(() => new EVD(matrix)).toThrow('Matrix must be non-empty');
   });
+
+  it('symmetric matrix: A·V = V·D and V is orthogonal', () => {
+    const matrix = new Matrix([
+      [2, -1, 0, 1],
+      [-1, 2, -1, 0],
+      [0, -1, 2, -1],
+      [1, 0, -1, 2],
+    ]);
+    const evd = new EVD(matrix);
+    const V = evd.eigenvectorMatrix;
+    const D = evd.diagonalMatrix;
+
+    // eigen relation
+    expect(matrix.mmul(V).to2DArray()).toBeDeepCloseTo(V.mmul(D).to2DArray(), 8);
+    // eigenvectors of a symmetric matrix are orthonormal: VᵀV = I
+    expect(V.transpose().mmul(V).to2DArray()).toBeDeepCloseTo(
+      Matrix.eye(4).to2DArray(),
+      8,
+    );
+  });
+
+  it('non-symmetric matrix with real eigenvalues: A·V = V·D', () => {
+    const matrix = new Matrix([
+      [4, 1, 2],
+      [0, 3, -1],
+      [0, 0, 2],
+    ]);
+    const evd = new EVD(matrix);
+    const V = evd.eigenvectorMatrix;
+    const D = evd.diagonalMatrix;
+    expect(evd.realEigenvalues.slice().sort((a, b) => a - b)).toBeDeepCloseTo(
+      [2, 3, 4],
+      8,
+    );
+    expect(matrix.mmul(V).to2DArray()).toBeDeepCloseTo(V.mmul(D).to2DArray(), 8);
+  });
+
+  it('larger symmetric matrix: A·V = V·D', () => {
+    const n = 12;
+    const base = Matrix.rand(n, n, { random: seededRandom(42) });
+    // make it symmetric
+    const matrix = base.add(base.transpose());
+    const evd = new EVD(matrix, { assumeSymmetric: true });
+    const V = evd.eigenvectorMatrix;
+    const D = evd.diagonalMatrix;
+    expect(matrix.mmul(V).to2DArray()).toBeDeepCloseTo(V.mmul(D).to2DArray(), 6);
+    expect(V.transpose().mmul(V).to2DArray()).toBeDeepCloseTo(
+      Matrix.eye(n).to2DArray(),
+      6,
+    );
+  });
+
+  it('matrix with a complex eigenvalue pair', () => {
+    // rotation-like block has eigenvalues 1 ± i
+    const matrix = new Matrix([
+      [1, -1],
+      [1, 1],
+    ]);
+    const evd = new EVD(matrix);
+    expect(evd.realEigenvalues).toBeDeepCloseTo([1, 1], 8);
+    expect(evd.imaginaryEigenvalues.slice().sort((a, b) => a - b)).toBeDeepCloseTo(
+      [-1, 1],
+      8,
+    );
+  });
 });
+
+function seededRandom(seed) {
+  let a = seed >>> 0;
+  return function random() {
+    a |= 0;
+    a = (a + 0x6d2b79f5) | 0;
+    let t = Math.imul(a ^ (a >>> 15), 1 | a);
+    t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t;
+    return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
+  };
+}

From b519a754891c6ce85df48a6b51da2b5e6093ae16 Mon Sep 17 00:00:00 2001
From: Luc Patiny <luc@patiny.com>
Date: Sat, 20 Jun 2026 11:41:25 +0200
Subject: [PATCH 4/9] perf: speed up symmetric EVD ~1.7x via transposed
 eigenvector storage

tred2/tql2 (the symmetric eigenproblem) accumulate the eigenvectors in V with
the row index varying in the hot loops, i.e. column walks of the row-major
backing store. Store V transposed during the reduction and transpose it back
before returning; the inner loops then scan memory sequentially.

Measured (symmetric, deterministic input): 600x600 666 ms -> 386 ms (~1.7x),
300x300 ~1.5x. Results unchanged (guarded by the new reconstruction tests).

The non-symmetric path (orthes/hqr2) is deliberately left row-major: its two
O(n^3) phases (QR sweep vs eigenvector back-transform) have opposite layout
preferences, so a single transposed storage cannot help both.

Add an "EVD (symmetric)" column to scripts/benchmark.js.

Assisted-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 scripts/benchmark.js | 35 +++++++++++++++++++++------
 src/dc/evd.js        | 56 ++++++++++++++++++++++++++------------------
 2 files changed, 61 insertions(+), 30 deletions(-)

diff --git a/scripts/benchmark.js b/scripts/benchmark.js
index 7bfb9f9..132555e 100644
--- a/scripts/benchmark.js
+++ b/scripts/benchmark.js
@@ -2,37 +2,58 @@
 
 const { Matrix, SVD, LU, EVD } = require('../matrix');
 
-const algorithms = [SVD, LU, EVD];
 const sizes = [10, 100, 500, 1000];
 const TARGET_MS = 3000;
 
+// EVD has two code paths: orthes/hqr2 for general matrices and tred2/tql2 for
+// symmetric ones. They are benchmarked separately ("EVD" vs "EVD (symmetric)").
+const algorithms = [
+  { name: 'SVD', make: (m) => new SVD(m), matrix: randomMatrix },
+  { name: 'LU', make: (m) => new LU(m), matrix: randomMatrix },
+  { name: 'EVD', make: (m) => new EVD(m), matrix: randomMatrix },
+  {
+    name: 'EVD (symmetric)',
+    make: (m) => new EVD(m, { assumeSymmetric: true }),
+    matrix: symmetricMatrix,
+  },
+];
+
 const benchmarks = [];
 for (const size of sizes) benchmarks.push(test(size));
 console.table(benchmarks);
 
 function test(size) {
   const benchmark = { size };
-  for (const Algo of algorithms) {
+  for (const algorithm of algorithms) {
     // Fixed reusable matrix; decompositions clone internally so it is not mutated.
-    const matrix = Matrix.rand(size, size, { random: mulberry32(size) });
+    const matrix = algorithm.matrix(size);
 
     // warm-up
-    void new Algo(matrix);
+    algorithm.make(matrix);
 
     let count = 1;
     const start = performance.now();
-    void new Algo(matrix);
+    algorithm.make(matrix);
     const first = performance.now() - start;
     if (first < TARGET_MS) {
       count = Math.ceil(TARGET_MS / first);
-      for (let i = 0; i < count - 1; i++) void new Algo(matrix);
+      for (let i = 0; i < count - 1; i++) algorithm.make(matrix);
     }
     const time = (performance.now() - start) / count;
-    benchmark[Algo.name] = Number(time.toFixed(4));
+    benchmark[algorithm.name] = Number(time.toFixed(4));
   }
   return benchmark;
 }
 
+function randomMatrix(size) {
+  return Matrix.rand(size, size, { random: mulberry32(size) });
+}
+
+function symmetricMatrix(size) {
+  const base = Matrix.rand(size, size, { random: mulberry32(size) });
+  return base.add(base.transpose());
+}
+
 // Deterministic PRNG so every run/layout uses identical input matrices.
 function mulberry32(seed) {
   let a = seed >>> 0;
diff --git a/src/dc/evd.js b/src/dc/evd.js
index 6c6a9a1..b470eec 100644
--- a/src/dc/evd.js
+++ b/src/dc/evd.js
@@ -31,14 +31,24 @@ export default class EigenvalueDecomposition {
     }
 
     if (isSymmetric) {
+      // tred2/tql2 access V almost exclusively down columns (the row index
+      // varies in the hot loops). Storing V transposed turns those into
+      // sequential row scans of the row-major backing store; we transpose it
+      // back to the logical layout before returning. V.get(j, i) holds the
+      // logical V(i, j).
       for (i = 0; i < n; i++) {
         for (j = 0; j < n; j++) {
-          V.set(i, j, value.get(i, j));
+          V.set(j, i, value.get(i, j));
         }
       }
       tred2(n, e, d, V);
       tql2(n, e, d, V);
+      V = V.transpose();
     } else {
+      // The non-symmetric path (orthes/hqr2) has two O(n^3) phases with opposite
+      // memory-layout preferences (the QR sweep favours column-major eigenvectors
+      // while the back-transform favours row-major), so a single transposed
+      // storage cannot help both. It is left in the original row-major layout.
       let H = new Matrix(n, n);
       let ort = new Float64Array(n);
       for (j = 0; j < n; j++) {
@@ -93,7 +103,7 @@ function tred2(n, e, d, V) {
   let f, g, h, i, j, k, hh, scale;
 
   for (j = 0; j < n; j++) {
-    d[j] = V.get(n - 1, j);
+    d[j] = V.get(j, n - 1);
   }
 
   for (i = n - 1; i > 0; i--) {
@@ -106,9 +116,9 @@ function tred2(n, e, d, V) {
     if (scale === 0) {
       e[i] = d[i - 1];
       for (j = 0; j < i; j++) {
-        d[j] = V.get(i - 1, j);
-        V.set(i, j, 0);
+        d[j] = V.get(j, i - 1);
         V.set(j, i, 0);
+        V.set(i, j, 0);
       }
     } else {
       for (k = 0; k < i; k++) {
@@ -131,11 +141,11 @@ function tred2(n, e, d, V) {
 
       for (j = 0; j < i; j++) {
         f = d[j];
-        V.set(j, i, f);
+        V.set(i, j, f);
         g = e[j] + V.get(j, j) * f;
         for (k = j + 1; k <= i - 1; k++) {
-          g += V.get(k, j) * d[k];
-          e[k] += V.get(k, j) * f;
+          g += V.get(j, k) * d[k];
+          e[k] += V.get(j, k) * f;
         }
         e[j] = g;
       }
@@ -155,43 +165,43 @@ function tred2(n, e, d, V) {
         f = d[j];
         g = e[j];
         for (k = j; k <= i - 1; k++) {
-          V.set(k, j, V.get(k, j) - (f * e[k] + g * d[k]));
+          V.set(j, k, V.get(j, k) - (f * e[k] + g * d[k]));
         }
-        d[j] = V.get(i - 1, j);
-        V.set(i, j, 0);
+        d[j] = V.get(j, i - 1);
+        V.set(j, i, 0);
       }
     }
     d[i] = h;
   }
 
   for (i = 0; i < n - 1; i++) {
-    V.set(n - 1, i, V.get(i, i));
+    V.set(i, n - 1, V.get(i, i));
     V.set(i, i, 1);
     h = d[i + 1];
     if (h !== 0) {
       for (k = 0; k <= i; k++) {
-        d[k] = V.get(k, i + 1) / h;
+        d[k] = V.get(i + 1, k) / h;
       }
 
       for (j = 0; j <= i; j++) {
         g = 0;
         for (k = 0; k <= i; k++) {
-          g += V.get(k, i + 1) * V.get(k, j);
+          g += V.get(i + 1, k) * V.get(j, k);
         }
         for (k = 0; k <= i; k++) {
-          V.set(k, j, V.get(k, j) - g * d[k]);
+          V.set(j, k, V.get(j, k) - g * d[k]);
         }
       }
     }
 
     for (k = 0; k <= i; k++) {
-      V.set(k, i + 1, 0);
+      V.set(i + 1, k, 0);
     }
   }
 
   for (j = 0; j < n; j++) {
-    d[j] = V.get(n - 1, j);
-    V.set(n - 1, j, 0);
+    d[j] = V.get(j, n - 1);
+    V.set(j, n - 1, 0);
   }
 
   V.set(n - 1, n - 1, 1);
@@ -264,9 +274,9 @@ function tql2(n, e, d, V) {
           d[i + 1] = h + s * (c * g + s * d[i]);
 
           for (k = 0; k < n; k++) {
-            h = V.get(k, i + 1);
-            V.set(k, i + 1, s * V.get(k, i) + c * h);
-            V.set(k, i, c * V.get(k, i) - s * h);
+            h = V.get(i + 1, k);
+            V.set(i + 1, k, s * V.get(i, k) + c * h);
+            V.set(i, k, c * V.get(i, k) - s * h);
           }
         }
 
@@ -293,9 +303,9 @@ function tql2(n, e, d, V) {
       d[k] = d[i];
       d[i] = p;
       for (j = 0; j < n; j++) {
-        p = V.get(j, i);
-        V.set(j, i, V.get(j, k));
-        V.set(j, k, p);
+        p = V.get(i, j);
+        V.set(i, j, V.get(k, j));
+        V.set(k, j, p);
       }
     }
   }

From 0e1d3979479b52b2cc08bb4efc184c9dfed060c6 Mon Sep 17 00:00:00 2001
From: Luc Patiny <luc@patiny.com>
Date: Sat, 20 Jun 2026 12:03:18 +0200
Subject: [PATCH 5/9] perf: transpose decomposition outputs in place to avoid
 extra allocation

The transposed-storage optimization restored the logical layout of the output
matrices with `M.transpose()`, which allocates a second full matrix while the
old one is still live (transient ~1.5x peak memory).

These outputs are square (SVD's V and EVD's V are always n x n; SVD's U is
square whenever the input is), so transpose them in place via a new
`transposeSquareInPlace` helper. No allocation in the common square case, so
the optimization is now memory-neutral versus the original implementation. The
working copy `at = value.transpose()` already replaced the original
`value.clone()`, so it is not an extra allocation.

Results remain bit-identical (verified by byte comparison against main) and the
speedups are unchanged. Non-square SVD U falls back to allocating transpose.

Assisted-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/dc/evd.js  |  5 +++--
 src/dc/svd.js  | 10 ++++++----
 src/dc/util.js | 21 +++++++++++++++++++++
 3 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/src/dc/evd.js b/src/dc/evd.js
index b470eec..c3c42a2 100644
--- a/src/dc/evd.js
+++ b/src/dc/evd.js
@@ -1,7 +1,7 @@
 import Matrix from '../matrix';
 import WrapperMatrix2D from '../wrap/WrapperMatrix2D';
 
-import { hypotenuse } from './util';
+import { hypotenuse, transposeSquareInPlace } from './util';
 
 export default class EigenvalueDecomposition {
   constructor(matrix, options = {}) {
@@ -43,7 +43,8 @@ export default class EigenvalueDecomposition {
       }
       tred2(n, e, d, V);
       tql2(n, e, d, V);
-      V = V.transpose();
+      // V is square; restore the logical layout in place (no allocation).
+      transposeSquareInPlace(V);
     } else {
       // The non-symmetric path (orthes/hqr2) has two O(n^3) phases with opposite
       // memory-layout preferences (the QR sweep favours column-major eigenvectors
diff --git a/src/dc/svd.js b/src/dc/svd.js
index e92560e..6b78606 100644
--- a/src/dc/svd.js
+++ b/src/dc/svd.js
@@ -1,7 +1,7 @@
 import Matrix from '../matrix';
 import WrapperMatrix2D from '../wrap/WrapperMatrix2D';
 
-import { hypotenuse } from './util';
+import { hypotenuse, transposeSquareInPlace } from './util';
 
 export default class SingularValueDecomposition {
   constructor(value, options = {}) {
@@ -408,9 +408,11 @@ export default class SingularValueDecomposition {
     }
 
     // Restore the logical (row-major) layout of the singular vectors, which were
-    // accumulated in transposed storage for cache-sequential inner loops.
-    U = U.transpose();
-    V = V.transpose();
+    // accumulated in transposed storage for cache-sequential inner loops. V is
+    // always square and U is square whenever the input is, so this is done in
+    // place (no allocation) in the common case.
+    U = U.isSquare() ? transposeSquareInPlace(U) : U.transpose();
+    V = transposeSquareInPlace(V);
 
     if (swapped) {
       let tmp = V;
diff --git a/src/dc/util.js b/src/dc/util.js
index 642aa11..29dd76e 100644
--- a/src/dc/util.js
+++ b/src/dc/util.js
@@ -1,3 +1,24 @@
+/**
+ * Transpose a square matrix in place, without allocating a copy.
+ * Used to restore the logical layout of decomposition outputs that were
+ * accumulated in transposed storage for cache-sequential inner loops.
+ * @param {import('../matrix').default} matrix - square matrix, mutated in place
+ * @returns {import('../matrix').default} the same matrix
+ */
+export function transposeSquareInPlace(matrix) {
+  const data = matrix.data;
+  const n = matrix.rows;
+  for (let i = 0; i < n; i++) {
+    const rowI = data[i];
+    for (let j = i + 1; j < n; j++) {
+      const tmp = rowI[j];
+      rowI[j] = data[j][i];
+      data[j][i] = tmp;
+    }
+  }
+  return matrix;
+}
+
 export function hypotenuse(a, b) {
   let r = 0;
   if (Math.abs(a) > Math.abs(b)) {

From 93bdb7ea679c3f0de19e3b7afbf7cee2fa9dcd39 Mon Sep 17 00:00:00 2001
From: Luc Patiny <luc@patiny.com>
Date: Sat, 20 Jun 2026 13:21:43 +0200
Subject: [PATCH 6/9] style: apply prettier formatting to EVD tests

Assisted-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/__tests__/decompositions/evd.test.js | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/src/__tests__/decompositions/evd.test.js b/src/__tests__/decompositions/evd.test.js
index 3acc808..20908bb 100644
--- a/src/__tests__/decompositions/evd.test.js
+++ b/src/__tests__/decompositions/evd.test.js
@@ -36,7 +36,10 @@ describe('Eigenvalue decomposition', () => {
     const D = evd.diagonalMatrix;
 
     // eigen relation
-    expect(matrix.mmul(V).to2DArray()).toBeDeepCloseTo(V.mmul(D).to2DArray(), 8);
+    expect(matrix.mmul(V).to2DArray()).toBeDeepCloseTo(
+      V.mmul(D).to2DArray(),
+      8,
+    );
     // eigenvectors of a symmetric matrix are orthonormal: VᵀV = I
     expect(V.transpose().mmul(V).to2DArray()).toBeDeepCloseTo(
       Matrix.eye(4).to2DArray(),
@@ -57,7 +60,10 @@ describe('Eigenvalue decomposition', () => {
       [2, 3, 4],
       8,
     );
-    expect(matrix.mmul(V).to2DArray()).toBeDeepCloseTo(V.mmul(D).to2DArray(), 8);
+    expect(matrix.mmul(V).to2DArray()).toBeDeepCloseTo(
+      V.mmul(D).to2DArray(),
+      8,
+    );
   });
 
   it('larger symmetric matrix: A·V = V·D', () => {
@@ -68,7 +74,10 @@ describe('Eigenvalue decomposition', () => {
     const evd = new EVD(matrix, { assumeSymmetric: true });
     const V = evd.eigenvectorMatrix;
     const D = evd.diagonalMatrix;
-    expect(matrix.mmul(V).to2DArray()).toBeDeepCloseTo(V.mmul(D).to2DArray(), 6);
+    expect(matrix.mmul(V).to2DArray()).toBeDeepCloseTo(
+      V.mmul(D).to2DArray(),
+      6,
+    );
     expect(V.transpose().mmul(V).to2DArray()).toBeDeepCloseTo(
       Matrix.eye(n).to2DArray(),
       6,
@@ -83,10 +92,9 @@ describe('Eigenvalue decomposition', () => {
     ]);
     const evd = new EVD(matrix);
     expect(evd.realEigenvalues).toBeDeepCloseTo([1, 1], 8);
-    expect(evd.imaginaryEigenvalues.slice().sort((a, b) => a - b)).toBeDeepCloseTo(
-      [-1, 1],
-      8,
-    );
+    expect(
+      evd.imaginaryEigenvalues.slice().sort((a, b) => a - b),
+    ).toBeDeepCloseTo([-1, 1], 8);
   });
 });
 

From a2f14f5a2e70649c01569d6924252772ed09f773 Mon Sep 17 00:00:00 2001
From: Luc Patiny <luc@patiny.com>
Date: Sat, 20 Jun 2026 13:35:34 +0200
Subject: [PATCH 7/9] refactor: use ml-xsadd for seeded RNG in EVD tests and
 benchmark

Replace the hand-rolled mulberry32 PRNG with the ecosystem's ml-xsadd
(XORSHIFT-ADD) generator via `new XSadd(seed).random`, in the EVD reconstruction
tests and scripts/benchmark.js. Add ml-xsadd to devDependencies.

Assisted-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 package.json                             |  1 +
 scripts/benchmark.js                     | 19 +++++--------------
 src/__tests__/decompositions/evd.test.js | 14 ++------------
 3 files changed, 8 insertions(+), 26 deletions(-)

diff --git a/package.json b/package.json
index 03c2199..5e2e2c5 100644
--- a/package.json
+++ b/package.json
@@ -75,6 +75,7 @@
     "jest-matcher-deep-close-to": "^3.0.2",
     "mathjs": "^14.5.2",
     "ml-dataset-iris": "^1.2.1",
+    "ml-xsadd": "^3.0.1",
     "numeric": "^1.2.6",
     "prettier": "^3.5.3",
     "pretty-hrtime": "^1.0.3",
diff --git a/scripts/benchmark.js b/scripts/benchmark.js
index 132555e..887f75d 100644
--- a/scripts/benchmark.js
+++ b/scripts/benchmark.js
@@ -1,5 +1,7 @@
 'use strict';
 
+const { XSadd } = require('ml-xsadd');
+
 const { Matrix, SVD, LU, EVD } = require('../matrix');
 
 const sizes = [10, 100, 500, 1000];
@@ -45,23 +47,12 @@ function test(size) {
   return benchmark;
 }
 
+// Deterministic seed so every run/layout uses identical input matrices.
 function randomMatrix(size) {
-  return Matrix.rand(size, size, { random: mulberry32(size) });
+  return Matrix.rand(size, size, { random: new XSadd(size).random });
 }
 
 function symmetricMatrix(size) {
-  const base = Matrix.rand(size, size, { random: mulberry32(size) });
+  const base = Matrix.rand(size, size, { random: new XSadd(size).random });
   return base.add(base.transpose());
 }
-
-// Deterministic PRNG so every run/layout uses identical input matrices.
-function mulberry32(seed) {
-  let a = seed >>> 0;
-  return function random() {
-    a |= 0;
-    a = (a + 0x6d2b79f5) | 0;
-    let t = Math.imul(a ^ (a >>> 15), 1 | a);
-    t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t;
-    return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
-  };
-}
diff --git a/src/__tests__/decompositions/evd.test.js b/src/__tests__/decompositions/evd.test.js
index 20908bb..49032bf 100644
--- a/src/__tests__/decompositions/evd.test.js
+++ b/src/__tests__/decompositions/evd.test.js
@@ -1,4 +1,5 @@
 import { toBeDeepCloseTo } from 'jest-matcher-deep-close-to';
+import { XSadd } from 'ml-xsadd';
 import { describe, it, expect } from 'vitest';
 
 import { Matrix, EVD } from '../..';
@@ -68,7 +69,7 @@ describe('Eigenvalue decomposition', () => {
 
   it('larger symmetric matrix: A·V = V·D', () => {
     const n = 12;
-    const base = Matrix.rand(n, n, { random: seededRandom(42) });
+    const base = Matrix.rand(n, n, { random: new XSadd(42).random });
     // make it symmetric
     const matrix = base.add(base.transpose());
     const evd = new EVD(matrix, { assumeSymmetric: true });
@@ -97,14 +98,3 @@ describe('Eigenvalue decomposition', () => {
     ).toBeDeepCloseTo([-1, 1], 8);
   });
 });
-
-function seededRandom(seed) {
-  let a = seed >>> 0;
-  return function random() {
-    a |= 0;
-    a = (a + 0x6d2b79f5) | 0;
-    let t = Math.imul(a ^ (a >>> 15), 1 | a);
-    t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t;
-    return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
-  };
-}

From 903f3f768785d8237d60e24b0740a12a1274cfc3 Mon Sep 17 00:00:00 2001
From: Luc Patiny <luc@patiny.com>
Date: Sat, 20 Jun 2026 13:35:34 +0200
Subject: [PATCH 8/9] chore: ignore .claude

Assisted-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 64e8143..a3b9f7e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+.claude
 .idea
 node_modules
 /matrix.*js

From aef6b5e4b3eed79dcf980e64fe39b40246bb366d Mon Sep 17 00:00:00 2001
From: Luc Patiny <luc@patiny.com>
Date: Mon, 22 Jun 2026 18:22:19 +0200
Subject: [PATCH 9/9] chore(benchmark): rewrite decompositions benchmark on the
 benchmark library

Move scripts/benchmark.js to benchmark/decompositions.js and rewrite it as a
Benchmark.Suite comparing SVD, LU, EVD and symmetric EVD on a seeded matrix,
consistent with the other files in benchmark/. Steady-state sampling makes the
results independent of execution order.
---
 benchmark/decompositions.js | 33 +++++++++++++++++++++
 scripts/benchmark.js        | 58 -------------------------------------
 2 files changed, 33 insertions(+), 58 deletions(-)
 create mode 100644 benchmark/decompositions.js
 delete mode 100644 scripts/benchmark.js

diff --git a/benchmark/decompositions.js b/benchmark/decompositions.js
new file mode 100644
index 0000000..c6686e2
--- /dev/null
+++ b/benchmark/decompositions.js
@@ -0,0 +1,33 @@
+'use strict';
+
+const Benchmark = require('benchmark');
+const { XSadd } = require('ml-xsadd');
+
+const { Matrix, SVD, LU, EVD } = require('..');
+
+const size = parseInt(process.argv[2], 10) || 200;
+console.log(`Decomposition benchmark for ${size}x${size} matrix`);
+
+// Deterministic input so every run uses identical matrices.
+const general = Matrix.rand(size, size, { random: new XSadd(size).random });
+const symmetric = general.add(general.transpose());
+
+// EVD has two code paths: orthes/hqr2 for general matrices and tred2/tql2 for
+// symmetric ones. They are benchmarked separately ("EVD" vs "EVD (symmetric)").
+new Benchmark.Suite()
+  .add('SVD', () => {
+    new SVD(general);
+  })
+  .add('LU', () => {
+    new LU(general);
+  })
+  .add('EVD', () => {
+    new EVD(general);
+  })
+  .add('EVD (symmetric)', () => {
+    new EVD(symmetric, { assumeSymmetric: true });
+  })
+  .on('cycle', (event) => {
+    console.log(String(event.target));
+  })
+  .run();
diff --git a/scripts/benchmark.js b/scripts/benchmark.js
deleted file mode 100644
index 887f75d..0000000
--- a/scripts/benchmark.js
+++ /dev/null
@@ -1,58 +0,0 @@
-'use strict';
-
-const { XSadd } = require('ml-xsadd');
-
-const { Matrix, SVD, LU, EVD } = require('../matrix');
-
-const sizes = [10, 100, 500, 1000];
-const TARGET_MS = 3000;
-
-// EVD has two code paths: orthes/hqr2 for general matrices and tred2/tql2 for
-// symmetric ones. They are benchmarked separately ("EVD" vs "EVD (symmetric)").
-const algorithms = [
-  { name: 'SVD', make: (m) => new SVD(m), matrix: randomMatrix },
-  { name: 'LU', make: (m) => new LU(m), matrix: randomMatrix },
-  { name: 'EVD', make: (m) => new EVD(m), matrix: randomMatrix },
-  {
-    name: 'EVD (symmetric)',
-    make: (m) => new EVD(m, { assumeSymmetric: true }),
-    matrix: symmetricMatrix,
-  },
-];
-
-const benchmarks = [];
-for (const size of sizes) benchmarks.push(test(size));
-console.table(benchmarks);
-
-function test(size) {
-  const benchmark = { size };
-  for (const algorithm of algorithms) {
-    // Fixed reusable matrix; decompositions clone internally so it is not mutated.
-    const matrix = algorithm.matrix(size);
-
-    // warm-up
-    algorithm.make(matrix);
-
-    let count = 1;
-    const start = performance.now();
-    algorithm.make(matrix);
-    const first = performance.now() - start;
-    if (first < TARGET_MS) {
-      count = Math.ceil(TARGET_MS / first);
-      for (let i = 0; i < count - 1; i++) algorithm.make(matrix);
-    }
-    const time = (performance.now() - start) / count;
-    benchmark[algorithm.name] = Number(time.toFixed(4));
-  }
-  return benchmark;
-}
-
-// Deterministic seed so every run/layout uses identical input matrices.
-function randomMatrix(size) {
-  return Matrix.rand(size, size, { random: new XSadd(size).random });
-}
-
-function symmetricMatrix(size) {
-  const base = Matrix.rand(size, size, { random: new XSadd(size).random });
-  return base.add(base.transpose());
-}