diff --git a/.gitignore b/.gitignore
index 64e8143..a3b9f7e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+.claude
 .idea
 node_modules
 /matrix.*js
diff --git a/benchmark/decompositions.js b/benchmark/decompositions.js
new file mode 100644
index 0000000..c6686e2
--- /dev/null
+++ b/benchmark/decompositions.js
@@ -0,0 +1,33 @@
+'use strict';
+
+const Benchmark = require('benchmark');
+const { XSadd } = require('ml-xsadd');
+
+const { Matrix, SVD, LU, EVD } = require('..');
+
+const size = parseInt(process.argv[2], 10) || 200;
+console.log(`Decomposition benchmark for ${size}x${size} matrix`);
+
+// Deterministic input so every run uses identical matrices.
+const general = Matrix.rand(size, size, { random: new XSadd(size).random });
+const symmetric = general.add(general.transpose());
+
+// EVD has two code paths: orthes/hqr2 for general matrices and tred2/tql2 for
+// symmetric ones. They are benchmarked separately ("EVD" vs "EVD (symmetric)").
+new Benchmark.Suite()
+  .add('SVD', () => {
+    new SVD(general);
+  })
+  .add('LU', () => {
+    new LU(general);
+  })
+  .add('EVD', () => {
+    new EVD(general);
+  })
+  .add('EVD (symmetric)', () => {
+    new EVD(symmetric, { assumeSymmetric: true });
+  })
+  .on('cycle', (event) => {
+    console.log(String(event.target));
+  })
+  .run();
diff --git a/package.json b/package.json
index 03c2199..5e2e2c5 100644
--- a/package.json
+++ b/package.json
@@ -75,6 +75,7 @@
     "jest-matcher-deep-close-to": "^3.0.2",
     "mathjs": "^14.5.2",
     "ml-dataset-iris": "^1.2.1",
+    "ml-xsadd": "^3.0.1",
     "numeric": "^1.2.6",
     "prettier": "^3.5.3",
     "pretty-hrtime": "^1.0.3",
diff --git a/src/__tests__/decompositions/evd.test.js b/src/__tests__/decompositions/evd.test.js
index 9e8b9f9..49032bf 100644
--- a/src/__tests__/decompositions/evd.test.js
+++ b/src/__tests__/decompositions/evd.test.js
@@ -1,7 +1,11 @@
+import { toBeDeepCloseTo } from 'jest-matcher-deep-close-to';
+import { XSadd } from 'ml-xsadd';
 import { describe, it, expect } from 'vitest';
 
 import { Matrix, EVD } from '../..';
 
+expect.extend({ toBeDeepCloseTo });
+
 describe('Eigenvalue decomposition', () => {
   it('simple example', () => {
     let matrix = new Matrix([
@@ -20,4 +24,77 @@ describe('Eigenvalue decomposition', () => {
     const matrix = new Matrix([]);
     expect(() => new EVD(matrix)).toThrow('Matrix must be non-empty');
   });
+
+  it('symmetric matrix: A·V = V·D and V is orthogonal', () => {
+    const matrix = new Matrix([
+      [2, -1, 0, 1],
+      [-1, 2, -1, 0],
+      [0, -1, 2, -1],
+      [1, 0, -1, 2],
+    ]);
+    const evd = new EVD(matrix);
+    const V = evd.eigenvectorMatrix;
+    const D = evd.diagonalMatrix;
+
+    // eigen relation
+    expect(matrix.mmul(V).to2DArray()).toBeDeepCloseTo(
+      V.mmul(D).to2DArray(),
+      8,
+    );
+    // eigenvectors of a symmetric matrix are orthonormal: VᵀV = I
+    expect(V.transpose().mmul(V).to2DArray()).toBeDeepCloseTo(
+      Matrix.eye(4).to2DArray(),
+      8,
+    );
+  });
+
+  it('non-symmetric matrix with real eigenvalues: A·V = V·D', () => {
+    const matrix = new Matrix([
+      [4, 1, 2],
+      [0, 3, -1],
+      [0, 0, 2],
+    ]);
+    const evd = new EVD(matrix);
+    const V = evd.eigenvectorMatrix;
+    const D = evd.diagonalMatrix;
+    expect(evd.realEigenvalues.slice().sort((a, b) => a - b)).toBeDeepCloseTo(
+      [2, 3, 4],
+      8,
+    );
+    expect(matrix.mmul(V).to2DArray()).toBeDeepCloseTo(
+      V.mmul(D).to2DArray(),
+      8,
+    );
+  });
+
+  it('larger symmetric matrix: A·V = V·D', () => {
+    const n = 12;
+    const base = Matrix.rand(n, n, { random: new XSadd(42).random });
+    // make it symmetric
+    const matrix = base.add(base.transpose());
+    const evd = new EVD(matrix, { assumeSymmetric: true });
+    const V = evd.eigenvectorMatrix;
+    const D = evd.diagonalMatrix;
+    expect(matrix.mmul(V).to2DArray()).toBeDeepCloseTo(
+      V.mmul(D).to2DArray(),
+      6,
+    );
+    expect(V.transpose().mmul(V).to2DArray()).toBeDeepCloseTo(
+      Matrix.eye(n).to2DArray(),
+      6,
+    );
+  });
+
+  it('matrix with a complex eigenvalue pair', () => {
+    // rotation-like block has eigenvalues 1 ± i
+    const matrix = new Matrix([
+      [1, -1],
+      [1, 1],
+    ]);
+    const evd = new EVD(matrix);
+    expect(evd.realEigenvalues).toBeDeepCloseTo([1, 1], 8);
+    expect(
+      evd.imaginaryEigenvalues.slice().sort((a, b) => a - b),
+    ).toBeDeepCloseTo([-1, 1], 8);
+  });
 });
diff --git a/src/dc/evd.js b/src/dc/evd.js
index 6c6a9a1..c3c42a2 100644
--- a/src/dc/evd.js
+++ b/src/dc/evd.js
@@ -1,7 +1,7 @@
 import Matrix from '../matrix';
 import WrapperMatrix2D from '../wrap/WrapperMatrix2D';
 
-import { hypotenuse } from './util';
+import { hypotenuse, transposeSquareInPlace } from './util';
 
 export default class EigenvalueDecomposition {
   constructor(matrix, options = {}) {
@@ -31,14 +31,25 @@ export default class EigenvalueDecomposition {
     }
 
     if (isSymmetric) {
+      // tred2/tql2 access V almost exclusively down columns (the row index
+      // varies in the hot loops). Storing V transposed turns those into
+      // sequential row scans of the row-major backing store; we transpose it
+      // back to the logical layout before returning. V.get(j, i) holds the
+      // logical V(i, j).
       for (i = 0; i < n; i++) {
         for (j = 0; j < n; j++) {
-          V.set(i, j, value.get(i, j));
+          V.set(j, i, value.get(i, j));
         }
       }
       tred2(n, e, d, V);
       tql2(n, e, d, V);
+      // V is square; restore the logical layout in place (no allocation).
+      transposeSquareInPlace(V);
     } else {
+      // The non-symmetric path (orthes/hqr2) has two O(n^3) phases with opposite
+      // memory-layout preferences (the QR sweep favours column-major eigenvectors
+      // while the back-transform favours row-major), so a single transposed
+      // storage cannot help both. It is left in the original row-major layout.
       let H = new Matrix(n, n);
       let ort = new Float64Array(n);
       for (j = 0; j < n; j++) {
@@ -93,7 +104,7 @@ function tred2(n, e, d, V) {
   let f, g, h, i, j, k, hh, scale;
 
   for (j = 0; j < n; j++) {
-    d[j] = V.get(n - 1, j);
+    d[j] = V.get(j, n - 1);
   }
 
   for (i = n - 1; i > 0; i--) {
@@ -106,9 +117,9 @@ function tred2(n, e, d, V) {
     if (scale === 0) {
       e[i] = d[i - 1];
       for (j = 0; j < i; j++) {
-        d[j] = V.get(i - 1, j);
-        V.set(i, j, 0);
+        d[j] = V.get(j, i - 1);
         V.set(j, i, 0);
+        V.set(i, j, 0);
       }
     } else {
       for (k = 0; k < i; k++) {
@@ -131,11 +142,11 @@ function tred2(n, e, d, V) {
 
       for (j = 0; j < i; j++) {
         f = d[j];
-        V.set(j, i, f);
+        V.set(i, j, f);
         g = e[j] + V.get(j, j) * f;
         for (k = j + 1; k <= i - 1; k++) {
-          g += V.get(k, j) * d[k];
-          e[k] += V.get(k, j) * f;
+          g += V.get(j, k) * d[k];
+          e[k] += V.get(j, k) * f;
         }
         e[j] = g;
       }
@@ -155,43 +166,43 @@ function tred2(n, e, d, V) {
         f = d[j];
         g = e[j];
         for (k = j; k <= i - 1; k++) {
-          V.set(k, j, V.get(k, j) - (f * e[k] + g * d[k]));
+          V.set(j, k, V.get(j, k) - (f * e[k] + g * d[k]));
         }
-        d[j] = V.get(i - 1, j);
-        V.set(i, j, 0);
+        d[j] = V.get(j, i - 1);
+        V.set(j, i, 0);
       }
     }
     d[i] = h;
   }
 
   for (i = 0; i < n - 1; i++) {
-    V.set(n - 1, i, V.get(i, i));
+    V.set(i, n - 1, V.get(i, i));
     V.set(i, i, 1);
     h = d[i + 1];
     if (h !== 0) {
       for (k = 0; k <= i; k++) {
-        d[k] = V.get(k, i + 1) / h;
+        d[k] = V.get(i + 1, k) / h;
       }
 
       for (j = 0; j <= i; j++) {
         g = 0;
         for (k = 0; k <= i; k++) {
-          g += V.get(k, i + 1) * V.get(k, j);
+          g += V.get(i + 1, k) * V.get(j, k);
         }
         for (k = 0; k <= i; k++) {
-          V.set(k, j, V.get(k, j) - g * d[k]);
+          V.set(j, k, V.get(j, k) - g * d[k]);
         }
       }
     }
 
     for (k = 0; k <= i; k++) {
-      V.set(k, i + 1, 0);
+      V.set(i + 1, k, 0);
     }
   }
 
   for (j = 0; j < n; j++) {
-    d[j] = V.get(n - 1, j);
-    V.set(n - 1, j, 0);
+    d[j] = V.get(j, n - 1);
+    V.set(j, n - 1, 0);
   }
 
   V.set(n - 1, n - 1, 1);
@@ -264,9 +275,9 @@ function tql2(n, e, d, V) {
           d[i + 1] = h + s * (c * g + s * d[i]);
 
           for (k = 0; k < n; k++) {
-            h = V.get(k, i + 1);
-            V.set(k, i + 1, s * V.get(k, i) + c * h);
-            V.set(k, i, c * V.get(k, i) - s * h);
+            h = V.get(i + 1, k);
+            V.set(i + 1, k, s * V.get(i, k) + c * h);
+            V.set(i, k, c * V.get(i, k) - s * h);
           }
         }
 
@@ -293,9 +304,9 @@ function tql2(n, e, d, V) {
       d[k] = d[i];
       d[i] = p;
       for (j = 0; j < n; j++) {
-        p = V.get(j, i);
-        V.set(j, i, V.get(j, k));
-        V.set(j, k, p);
+        p = V.get(i, j);
+        V.set(i, j, V.get(k, j));
+        V.set(k, j, p);
       }
     }
   }
diff --git a/src/dc/svd.js b/src/dc/svd.js
index 20ca002..6b78606 100644
--- a/src/dc/svd.js
+++ b/src/dc/svd.js
@@ -1,7 +1,7 @@
 import Matrix from '../matrix';
 import WrapperMatrix2D from '../wrap/WrapperMatrix2D';
 
-import { hypotenuse } from './util';
+import { hypotenuse, transposeSquareInPlace } from './util';
 
 export default class SingularValueDecomposition {
   constructor(value, options = {}) {
@@ -23,32 +23,40 @@ export default class SingularValueDecomposition {
     let wantu = Boolean(computeLeftSingularVectors);
     let wantv = Boolean(computeRightSingularVectors);
 
+    // Work on the transpose of the input so the hot inner loops (which iterate
+    // over rows for a fixed column) scan memory sequentially in the row-major
+    // backing store. `at` holds the transpose: at.get(j, i) === a.get(i, j)
+    // where `a` is the logical m x n working matrix.
     let swapped = false;
-    let a;
+    let at;
     if (m < n) {
       if (!autoTranspose) {
-        a = value.clone();
         // eslint-disable-next-line no-console
         console.warn(
           'Computing SVD on a matrix with more columns than rows. Consider enabling autoTranspose',
         );
+        at = value.transpose();
       } else {
-        a = value.transpose();
-        m = a.rows;
-        n = a.columns;
+        at = value.clone();
+        m = value.columns;
+        n = value.rows;
         swapped = true;
         let aux = wantu;
         wantu = wantv;
         wantv = aux;
       }
     } else {
-      a = value.clone();
+      at = value.transpose();
     }
 
     let nu = Math.min(m, n);
     let ni = Math.min(m + 1, n);
     let s = new Float64Array(ni);
-    let U = new Matrix(m, nu);
+    // U and V are stored transposed during the computation so the inner loops
+    // (which always vary the row index) scan memory sequentially. They are
+    // transposed back to their logical layout before being returned.
+    // Ut.get(j, i) === U.get(i, j) and Vt.get(j, i) === V.get(i, j).
+    let U = new Matrix(nu, m);
     let V = new Matrix(n, n);
 
     let e = new Float64Array(n);
@@ -65,16 +73,16 @@ export default class SingularValueDecomposition {
       if (k < nct) {
         s[k] = 0;
         for (let i = k; i < m; i++) {
-          s[k] = hypotenuse(s[k], a.get(i, k));
+          s[k] = hypotenuse(s[k], at.get(k, i));
         }
         if (s[k] !== 0) {
-          if (a.get(k, k) < 0) {
+          if (at.get(k, k) < 0) {
             s[k] = -s[k];
           }
           for (let i = k; i < m; i++) {
-            a.set(i, k, a.get(i, k) / s[k]);
+            at.set(k, i, at.get(k, i) / s[k]);
           }
-          a.set(k, k, a.get(k, k) + 1);
+          at.set(k, k, at.get(k, k) + 1);
         }
         s[k] = -s[k];
       }
@@ -83,19 +91,19 @@ export default class SingularValueDecomposition {
         if (k < nct && s[k] !== 0) {
           let t = 0;
           for (let i = k; i < m; i++) {
-            t += a.get(i, k) * a.get(i, j);
+            t += at.get(k, i) * at.get(j, i);
           }
-          t = -t / a.get(k, k);
+          t = -t / at.get(k, k);
           for (let i = k; i < m; i++) {
-            a.set(i, j, a.get(i, j) + t * a.get(i, k));
+            at.set(j, i, at.get(j, i) + t * at.get(k, i));
           }
         }
-        e[j] = a.get(k, j);
+        e[j] = at.get(j, k);
       }
 
       if (wantu && k < nct) {
         for (let i = k; i < m; i++) {
-          U.set(i, k, a.get(i, k));
+          U.set(k, i, at.get(k, i));
         }
       }
 
@@ -120,19 +128,19 @@ export default class SingularValueDecomposition {
           }
           for (let i = k + 1; i < m; i++) {
             for (let j = k + 1; j < n; j++) {
-              work[i] += e[j] * a.get(i, j);
+              work[i] += e[j] * at.get(j, i);
             }
           }
           for (let j = k + 1; j < n; j++) {
             let t = -e[j] / e[k + 1];
             for (let i = k + 1; i < m; i++) {
-              a.set(i, j, a.get(i, j) + t * work[i]);
+              at.set(j, i, at.get(j, i) + t * work[i]);
             }
           }
         }
         if (wantv) {
           for (let i = k + 1; i < n; i++) {
-            V.set(i, k, e[i]);
+            V.set(k, i, e[i]);
           }
         }
       }
@@ -140,20 +148,20 @@ export default class SingularValueDecomposition {
 
     let p = Math.min(n, m + 1);
     if (nct < n) {
-      s[nct] = a.get(nct, nct);
+      s[nct] = at.get(nct, nct);
     }
     if (m < p) {
       s[p - 1] = 0;
     }
     if (nrt + 1 < p) {
-      e[nrt] = a.get(nrt, p - 1);
+      e[nrt] = at.get(p - 1, nrt);
     }
     e[p - 1] = 0;
 
     if (wantu) {
       for (let j = nct; j < nu; j++) {
         for (let i = 0; i < m; i++) {
-          U.set(i, j, 0);
+          U.set(j, i, 0);
         }
         U.set(j, j, 1);
       }
@@ -162,23 +170,23 @@ export default class SingularValueDecomposition {
           for (let j = k + 1; j < nu; j++) {
             let t = 0;
             for (let i = k; i < m; i++) {
-              t += U.get(i, k) * U.get(i, j);
+              t += U.get(k, i) * U.get(j, i);
             }
             t = -t / U.get(k, k);
             for (let i = k; i < m; i++) {
-              U.set(i, j, U.get(i, j) + t * U.get(i, k));
+              U.set(j, i, U.get(j, i) + t * U.get(k, i));
             }
           }
           for (let i = k; i < m; i++) {
-            U.set(i, k, -U.get(i, k));
+            U.set(k, i, -U.get(k, i));
           }
           U.set(k, k, 1 + U.get(k, k));
           for (let i = 0; i < k - 1; i++) {
-            U.set(i, k, 0);
+            U.set(k, i, 0);
           }
         } else {
           for (let i = 0; i < m; i++) {
-            U.set(i, k, 0);
+            U.set(k, i, 0);
           }
           U.set(k, k, 1);
         }
@@ -191,16 +199,16 @@ export default class SingularValueDecomposition {
           for (let j = k + 1; j < n; j++) {
             let t = 0;
             for (let i = k + 1; i < n; i++) {
-              t += V.get(i, k) * V.get(i, j);
+              t += V.get(k, i) * V.get(j, i);
             }
-            t = -t / V.get(k + 1, k);
+            t = -t / V.get(k, k + 1);
             for (let i = k + 1; i < n; i++) {
-              V.set(i, j, V.get(i, j) + t * V.get(i, k));
+              V.set(j, i, V.get(j, i) + t * V.get(k, i));
             }
           }
         }
         for (let i = 0; i < n; i++) {
-          V.set(i, k, 0);
+          V.set(k, i, 0);
         }
         V.set(k, k, 1);
       }
@@ -265,9 +273,9 @@ export default class SingularValueDecomposition {
             }
             if (wantv) {
               for (let i = 0; i < n; i++) {
-                t = cs * V.get(i, j) + sn * V.get(i, p - 1);
-                V.set(i, p - 1, -sn * V.get(i, j) + cs * V.get(i, p - 1));
-                V.set(i, j, t);
+                t = cs * V.get(j, i) + sn * V.get(p - 1, i);
+                V.set(p - 1, i, -sn * V.get(j, i) + cs * V.get(p - 1, i));
+                V.set(j, i, t);
               }
             }
           }
@@ -285,9 +293,9 @@ export default class SingularValueDecomposition {
             e[j] = cs * e[j];
             if (wantu) {
               for (let i = 0; i < m; i++) {
-                t = cs * U.get(i, j) + sn * U.get(i, k - 1);
-                U.set(i, k - 1, -sn * U.get(i, j) + cs * U.get(i, k - 1));
-                U.set(i, j, t);
+                t = cs * U.get(j, i) + sn * U.get(k - 1, i);
+                U.set(k - 1, i, -sn * U.get(j, i) + cs * U.get(k - 1, i));
+                U.set(j, i, t);
               }
             }
           }
@@ -333,9 +341,9 @@ export default class SingularValueDecomposition {
             s[j + 1] = cs * s[j + 1];
             if (wantv) {
               for (let i = 0; i < n; i++) {
-                t = cs * V.get(i, j) + sn * V.get(i, j + 1);
-                V.set(i, j + 1, -sn * V.get(i, j) + cs * V.get(i, j + 1));
-                V.set(i, j, t);
+                t = cs * V.get(j, i) + sn * V.get(j + 1, i);
+                V.set(j + 1, i, -sn * V.get(j, i) + cs * V.get(j + 1, i));
+                V.set(j, i, t);
               }
             }
             t = hypotenuse(f, g);
@@ -349,9 +357,9 @@ export default class SingularValueDecomposition {
             e[j + 1] = cs * e[j + 1];
             if (wantu && j < m - 1) {
               for (let i = 0; i < m; i++) {
-                t = cs * U.get(i, j) + sn * U.get(i, j + 1);
-                U.set(i, j + 1, -sn * U.get(i, j) + cs * U.get(i, j + 1));
-                U.set(i, j, t);
+                t = cs * U.get(j, i) + sn * U.get(j + 1, i);
+                U.set(j + 1, i, -sn * U.get(j, i) + cs * U.get(j + 1, i));
+                U.set(j, i, t);
               }
             }
           }
@@ -364,7 +372,7 @@ export default class SingularValueDecomposition {
             s[k] = s[k] < 0 ? -s[k] : 0;
             if (wantv) {
               for (let i = 0; i <= pp; i++) {
-                V.set(i, k, -V.get(i, k));
+                V.set(k, i, -V.get(k, i));
               }
             }
           }
@@ -377,16 +385,16 @@ export default class SingularValueDecomposition {
             s[k + 1] = t;
             if (wantv && k < n - 1) {
               for (let i = 0; i < n; i++) {
-                t = V.get(i, k + 1);
-                V.set(i, k + 1, V.get(i, k));
-                V.set(i, k, t);
+                t = V.get(k + 1, i);
+                V.set(k + 1, i, V.get(k, i));
+                V.set(k, i, t);
               }
             }
             if (wantu && k < m - 1) {
               for (let i = 0; i < m; i++) {
-                t = U.get(i, k + 1);
-                U.set(i, k + 1, U.get(i, k));
-                U.set(i, k, t);
+                t = U.get(k + 1, i);
+                U.set(k + 1, i, U.get(k, i));
+                U.set(k, i, t);
               }
             }
             k++;
@@ -399,6 +407,13 @@ export default class SingularValueDecomposition {
       }
     }
 
+    // Restore the logical (row-major) layout of the singular vectors, which were
+    // accumulated in transposed storage for cache-sequential inner loops. V is
+    // always square and U is square whenever the input is, so this is done in
+    // place (no allocation) in the common case.
+    U = U.isSquare() ? transposeSquareInPlace(U) : U.transpose();
+    V = transposeSquareInPlace(V);
+
     if (swapped) {
       let tmp = V;
       V = U;
diff --git a/src/dc/util.js b/src/dc/util.js
index 642aa11..29dd76e 100644
--- a/src/dc/util.js
+++ b/src/dc/util.js
@@ -1,3 +1,24 @@
+/**
+ * Transpose a square matrix in place, without allocating a copy.
+ * Used to restore the logical layout of decomposition outputs that were
+ * accumulated in transposed storage for cache-sequential inner loops.
+ * @param {import('../matrix').default} matrix - square matrix, mutated in place
+ * @returns {import('../matrix').default} the same matrix
+ */
+export function transposeSquareInPlace(matrix) {
+  const data = matrix.data;
+  const n = matrix.rows;
+  for (let i = 0; i < n; i++) {
+    const rowI = data[i];
+    for (let j = i + 1; j < n; j++) {
+      const tmp = rowI[j];
+      rowI[j] = data[j][i];
+      data[j][i] = tmp;
+    }
+  }
+  return matrix;
+}
+
 export function hypotenuse(a, b) {
   let r = 0;
   if (Math.abs(a) > Math.abs(b)) {