From e82b2ba4ba8b8b9a64f26255dd83fd8b8628d438 Mon Sep 17 00:00:00 2001 From: Luc Patiny Date: Fri, 29 Sep 2023 15:51:35 +0200 Subject: [PATCH 1/9] feat: optimize SVD ? --- scripts/benchmark.js | 39 +++++++++++++++++++++++++++++++++++++++ src/matrix.js | 2 ++ 2 files changed, 41 insertions(+) create mode 100644 scripts/benchmark.js diff --git a/scripts/benchmark.js b/scripts/benchmark.js new file mode 100644 index 0000000..d495226 --- /dev/null +++ b/scripts/benchmark.js @@ -0,0 +1,39 @@ + + + +let { Matrix, SVD, LU, EVD } = require('../matrix'); + +const algorithms = [SVD, LU, EVD]; + +const benchmarks = []; + +test(10); +test(100); +test(500); +test(1000); +test(1500); + +console.table(benchmarks); + +function test(size) { + const benchmark = {}; + benchmark.size = size; + benchmarks.push(benchmark); + + for (const Algo of algorithms) { + const matrix = Matrix.rand(size, size); + let count = 1; + const start = performance.now(); + const results = [new Algo(matrix)]; + const current = performance.now() - start; + if (current < 5000) { + count = Math.ceil(5000 / current); + for (let i = 0; i < count - 1; i++) { + results.push(new Algo(matrix)); + } + } + const time = (performance.now() - start) / results.length; + benchmark[Algo.name] = time; + console.log(`${Algo.name} ${size}x${size} matrix: ${time}ms (${count}x)`) + } +} \ No newline at end of file diff --git a/src/matrix.js b/src/matrix.js index c98253b..2844007 100644 --- a/src/matrix.js +++ b/src/matrix.js @@ -1579,11 +1579,13 @@ export default class Matrix extends AbstractMatrix { set(rowIndex, columnIndex, value) { this.data[rowIndex][columnIndex] = value; + //this.data[columnIndex][rowIndex] = value; return this; } get(rowIndex, columnIndex) { return this.data[rowIndex][columnIndex]; + //return this.data[columnIndex][rowIndex]; } removeRow(index) { From 0f7c3ab8103b7dbecfe1aba90804095ebe696689 Mon Sep 17 00:00:00 2001 From: Luc Patiny Date: Sat, 20 Jun 2026 11:09:42 +0200 Subject: [PATCH 2/9] perf: speed up SVD ~4.7x via transposed internal storage The SVD hot loops iterate over rows for a fixed column. With the row-major backing store (data[row][col]) these are column walks that thrash the cache. Work on the transpose of the input and store the U/V singular vectors transposed during the computation, then transpose them back before returning. The inner loops then scan memory sequentially. The public API and results are unchanged. Measured (1000x1000, deterministic input): 17764 ms -> 3804 ms (~4.7x). LU and EVD are unaffected. All tests pass. Also remove the dead exploratory get/set comments in matrix.js and make scripts/benchmark.js deterministic (seeded) and warmed up. Assisted-By: Claude Opus 4.8 (1M context) --- scripts/benchmark.js | 59 ++++++++++++---------- src/dc/svd.js | 113 ++++++++++++++++++++++++------------------- src/matrix.js | 2 - 3 files changed, 96 insertions(+), 78 deletions(-) diff --git a/scripts/benchmark.js b/scripts/benchmark.js index d495226..7bfb9f9 100644 --- a/scripts/benchmark.js +++ b/scripts/benchmark.js @@ -1,39 +1,46 @@ +'use strict'; - - -let { Matrix, SVD, LU, EVD } = require('../matrix'); +const { Matrix, SVD, LU, EVD } = require('../matrix'); const algorithms = [SVD, LU, EVD]; +const sizes = [10, 100, 500, 1000]; +const TARGET_MS = 3000; const benchmarks = []; - -test(10); -test(100); -test(500); -test(1000); -test(1500); - +for (const size of sizes) benchmarks.push(test(size)); console.table(benchmarks); function test(size) { - const benchmark = {}; - benchmark.size = size; - benchmarks.push(benchmark); - + const benchmark = { size }; for (const Algo of algorithms) { - const matrix = Matrix.rand(size, size); + // Fixed reusable matrix; decompositions clone internally so it is not mutated. + const matrix = Matrix.rand(size, size, { random: mulberry32(size) }); + + // warm-up + void new Algo(matrix); + let count = 1; const start = performance.now(); - const results = [new Algo(matrix)]; - const current = performance.now() - start; - if (current < 5000) { - count = Math.ceil(5000 / current); - for (let i = 0; i < count - 1; i++) { - results.push(new Algo(matrix)); - } + void new Algo(matrix); + const first = performance.now() - start; + if (first < TARGET_MS) { + count = Math.ceil(TARGET_MS / first); + for (let i = 0; i < count - 1; i++) void new Algo(matrix); } - const time = (performance.now() - start) / results.length; - benchmark[Algo.name] = time; - console.log(`${Algo.name} ${size}x${size} matrix: ${time}ms (${count}x)`) + const time = (performance.now() - start) / count; + benchmark[Algo.name] = Number(time.toFixed(4)); } -} \ No newline at end of file + return benchmark; +} + +// Deterministic PRNG so every run/layout uses identical input matrices. +function mulberry32(seed) { + let a = seed >>> 0; + return function random() { + a |= 0; + a = (a + 0x6d2b79f5) | 0; + let t = Math.imul(a ^ (a >>> 15), 1 | a); + t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t; + return ((t ^ (t >>> 14)) >>> 0) / 4294967296; + }; +} diff --git a/src/dc/svd.js b/src/dc/svd.js index 20ca002..e92560e 100644 --- a/src/dc/svd.js +++ b/src/dc/svd.js @@ -23,32 +23,40 @@ export default class SingularValueDecomposition { let wantu = Boolean(computeLeftSingularVectors); let wantv = Boolean(computeRightSingularVectors); + // Work on the transpose of the input so the hot inner loops (which iterate + // over rows for a fixed column) scan memory sequentially in the row-major + // backing store. `at` holds the transpose: at.get(j, i) === a.get(i, j) + // where `a` is the logical m x n working matrix. let swapped = false; - let a; + let at; if (m < n) { if (!autoTranspose) { - a = value.clone(); // eslint-disable-next-line no-console console.warn( 'Computing SVD on a matrix with more columns than rows. Consider enabling autoTranspose', ); + at = value.transpose(); } else { - a = value.transpose(); - m = a.rows; - n = a.columns; + at = value.clone(); + m = value.columns; + n = value.rows; swapped = true; let aux = wantu; wantu = wantv; wantv = aux; } } else { - a = value.clone(); + at = value.transpose(); } let nu = Math.min(m, n); let ni = Math.min(m + 1, n); let s = new Float64Array(ni); - let U = new Matrix(m, nu); + // U and V are stored transposed during the computation so the inner loops + // (which always vary the row index) scan memory sequentially. They are + // transposed back to their logical layout before being returned. + // Ut.get(j, i) === U.get(i, j) and Vt.get(j, i) === V.get(i, j). + let U = new Matrix(nu, m); let V = new Matrix(n, n); let e = new Float64Array(n); @@ -65,16 +73,16 @@ export default class SingularValueDecomposition { if (k < nct) { s[k] = 0; for (let i = k; i < m; i++) { - s[k] = hypotenuse(s[k], a.get(i, k)); + s[k] = hypotenuse(s[k], at.get(k, i)); } if (s[k] !== 0) { - if (a.get(k, k) < 0) { + if (at.get(k, k) < 0) { s[k] = -s[k]; } for (let i = k; i < m; i++) { - a.set(i, k, a.get(i, k) / s[k]); + at.set(k, i, at.get(k, i) / s[k]); } - a.set(k, k, a.get(k, k) + 1); + at.set(k, k, at.get(k, k) + 1); } s[k] = -s[k]; } @@ -83,19 +91,19 @@ export default class SingularValueDecomposition { if (k < nct && s[k] !== 0) { let t = 0; for (let i = k; i < m; i++) { - t += a.get(i, k) * a.get(i, j); + t += at.get(k, i) * at.get(j, i); } - t = -t / a.get(k, k); + t = -t / at.get(k, k); for (let i = k; i < m; i++) { - a.set(i, j, a.get(i, j) + t * a.get(i, k)); + at.set(j, i, at.get(j, i) + t * at.get(k, i)); } } - e[j] = a.get(k, j); + e[j] = at.get(j, k); } if (wantu && k < nct) { for (let i = k; i < m; i++) { - U.set(i, k, a.get(i, k)); + U.set(k, i, at.get(k, i)); } } @@ -120,19 +128,19 @@ export default class SingularValueDecomposition { } for (let i = k + 1; i < m; i++) { for (let j = k + 1; j < n; j++) { - work[i] += e[j] * a.get(i, j); + work[i] += e[j] * at.get(j, i); } } for (let j = k + 1; j < n; j++) { let t = -e[j] / e[k + 1]; for (let i = k + 1; i < m; i++) { - a.set(i, j, a.get(i, j) + t * work[i]); + at.set(j, i, at.get(j, i) + t * work[i]); } } } if (wantv) { for (let i = k + 1; i < n; i++) { - V.set(i, k, e[i]); + V.set(k, i, e[i]); } } } @@ -140,20 +148,20 @@ export default class SingularValueDecomposition { let p = Math.min(n, m + 1); if (nct < n) { - s[nct] = a.get(nct, nct); + s[nct] = at.get(nct, nct); } if (m < p) { s[p - 1] = 0; } if (nrt + 1 < p) { - e[nrt] = a.get(nrt, p - 1); + e[nrt] = at.get(p - 1, nrt); } e[p - 1] = 0; if (wantu) { for (let j = nct; j < nu; j++) { for (let i = 0; i < m; i++) { - U.set(i, j, 0); + U.set(j, i, 0); } U.set(j, j, 1); } @@ -162,23 +170,23 @@ export default class SingularValueDecomposition { for (let j = k + 1; j < nu; j++) { let t = 0; for (let i = k; i < m; i++) { - t += U.get(i, k) * U.get(i, j); + t += U.get(k, i) * U.get(j, i); } t = -t / U.get(k, k); for (let i = k; i < m; i++) { - U.set(i, j, U.get(i, j) + t * U.get(i, k)); + U.set(j, i, U.get(j, i) + t * U.get(k, i)); } } for (let i = k; i < m; i++) { - U.set(i, k, -U.get(i, k)); + U.set(k, i, -U.get(k, i)); } U.set(k, k, 1 + U.get(k, k)); for (let i = 0; i < k - 1; i++) { - U.set(i, k, 0); + U.set(k, i, 0); } } else { for (let i = 0; i < m; i++) { - U.set(i, k, 0); + U.set(k, i, 0); } U.set(k, k, 1); } @@ -191,16 +199,16 @@ export default class SingularValueDecomposition { for (let j = k + 1; j < n; j++) { let t = 0; for (let i = k + 1; i < n; i++) { - t += V.get(i, k) * V.get(i, j); + t += V.get(k, i) * V.get(j, i); } - t = -t / V.get(k + 1, k); + t = -t / V.get(k, k + 1); for (let i = k + 1; i < n; i++) { - V.set(i, j, V.get(i, j) + t * V.get(i, k)); + V.set(j, i, V.get(j, i) + t * V.get(k, i)); } } } for (let i = 0; i < n; i++) { - V.set(i, k, 0); + V.set(k, i, 0); } V.set(k, k, 1); } @@ -265,9 +273,9 @@ export default class SingularValueDecomposition { } if (wantv) { for (let i = 0; i < n; i++) { - t = cs * V.get(i, j) + sn * V.get(i, p - 1); - V.set(i, p - 1, -sn * V.get(i, j) + cs * V.get(i, p - 1)); - V.set(i, j, t); + t = cs * V.get(j, i) + sn * V.get(p - 1, i); + V.set(p - 1, i, -sn * V.get(j, i) + cs * V.get(p - 1, i)); + V.set(j, i, t); } } } @@ -285,9 +293,9 @@ export default class SingularValueDecomposition { e[j] = cs * e[j]; if (wantu) { for (let i = 0; i < m; i++) { - t = cs * U.get(i, j) + sn * U.get(i, k - 1); - U.set(i, k - 1, -sn * U.get(i, j) + cs * U.get(i, k - 1)); - U.set(i, j, t); + t = cs * U.get(j, i) + sn * U.get(k - 1, i); + U.set(k - 1, i, -sn * U.get(j, i) + cs * U.get(k - 1, i)); + U.set(j, i, t); } } } @@ -333,9 +341,9 @@ export default class SingularValueDecomposition { s[j + 1] = cs * s[j + 1]; if (wantv) { for (let i = 0; i < n; i++) { - t = cs * V.get(i, j) + sn * V.get(i, j + 1); - V.set(i, j + 1, -sn * V.get(i, j) + cs * V.get(i, j + 1)); - V.set(i, j, t); + t = cs * V.get(j, i) + sn * V.get(j + 1, i); + V.set(j + 1, i, -sn * V.get(j, i) + cs * V.get(j + 1, i)); + V.set(j, i, t); } } t = hypotenuse(f, g); @@ -349,9 +357,9 @@ export default class SingularValueDecomposition { e[j + 1] = cs * e[j + 1]; if (wantu && j < m - 1) { for (let i = 0; i < m; i++) { - t = cs * U.get(i, j) + sn * U.get(i, j + 1); - U.set(i, j + 1, -sn * U.get(i, j) + cs * U.get(i, j + 1)); - U.set(i, j, t); + t = cs * U.get(j, i) + sn * U.get(j + 1, i); + U.set(j + 1, i, -sn * U.get(j, i) + cs * U.get(j + 1, i)); + U.set(j, i, t); } } } @@ -364,7 +372,7 @@ export default class SingularValueDecomposition { s[k] = s[k] < 0 ? -s[k] : 0; if (wantv) { for (let i = 0; i <= pp; i++) { - V.set(i, k, -V.get(i, k)); + V.set(k, i, -V.get(k, i)); } } } @@ -377,16 +385,16 @@ export default class SingularValueDecomposition { s[k + 1] = t; if (wantv && k < n - 1) { for (let i = 0; i < n; i++) { - t = V.get(i, k + 1); - V.set(i, k + 1, V.get(i, k)); - V.set(i, k, t); + t = V.get(k + 1, i); + V.set(k + 1, i, V.get(k, i)); + V.set(k, i, t); } } if (wantu && k < m - 1) { for (let i = 0; i < m; i++) { - t = U.get(i, k + 1); - U.set(i, k + 1, U.get(i, k)); - U.set(i, k, t); + t = U.get(k + 1, i); + U.set(k + 1, i, U.get(k, i)); + U.set(k, i, t); } } k++; @@ -399,6 +407,11 @@ export default class SingularValueDecomposition { } } + // Restore the logical (row-major) layout of the singular vectors, which were + // accumulated in transposed storage for cache-sequential inner loops. + U = U.transpose(); + V = V.transpose(); + if (swapped) { let tmp = V; V = U; diff --git a/src/matrix.js b/src/matrix.js index 3102b97..f4874ff 100644 --- a/src/matrix.js +++ b/src/matrix.js @@ -1666,13 +1666,11 @@ export default class Matrix extends AbstractMatrix { set(rowIndex, columnIndex, value) { this.data[rowIndex][columnIndex] = value; - //this.data[columnIndex][rowIndex] = value; return this; } get(rowIndex, columnIndex) { return this.data[rowIndex][columnIndex]; - //return this.data[columnIndex][rowIndex]; } removeRow(index) { From 88fdfcfc67de8dc584d4f8a4821ee9783d39155e Mon Sep 17 00:00:00 2001 From: Luc Patiny Date: Sat, 20 Jun 2026 11:13:48 +0200 Subject: [PATCH 3/9] =?UTF-8?q?test:=20add=20EVD=20correctness=20oracle=20?= =?UTF-8?q?(A=C2=B7V=20=3D=20V=C2=B7D,=20orthogonality,=20complex=20pairs)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The eigenvalue decomposition only had a single 2x2 example. Add reconstruction tests (A·V = V·D) for symmetric (4x4 and 12x12), non-symmetric real-eigenvalue, and complex-eigenvalue-pair matrices, plus orthonormality of symmetric eigenvectors. These exercise tred2/tql2 and orthes/hqr2 and guard the upcoming performance refactor. Assisted-By: Claude Opus 4.8 (1M context) --- src/__tests__/decompositions/evd.test.js | 79 ++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/src/__tests__/decompositions/evd.test.js b/src/__tests__/decompositions/evd.test.js index 9e8b9f9..3acc808 100644 --- a/src/__tests__/decompositions/evd.test.js +++ b/src/__tests__/decompositions/evd.test.js @@ -1,7 +1,10 @@ +import { toBeDeepCloseTo } from 'jest-matcher-deep-close-to'; import { describe, it, expect } from 'vitest'; import { Matrix, EVD } from '../..'; +expect.extend({ toBeDeepCloseTo }); + describe('Eigenvalue decomposition', () => { it('simple example', () => { let matrix = new Matrix([ @@ -20,4 +23,80 @@ describe('Eigenvalue decomposition', () => { const matrix = new Matrix([]); expect(() => new EVD(matrix)).toThrow('Matrix must be non-empty'); }); + + it('symmetric matrix: A·V = V·D and V is orthogonal', () => { + const matrix = new Matrix([ + [2, -1, 0, 1], + [-1, 2, -1, 0], + [0, -1, 2, -1], + [1, 0, -1, 2], + ]); + const evd = new EVD(matrix); + const V = evd.eigenvectorMatrix; + const D = evd.diagonalMatrix; + + // eigen relation + expect(matrix.mmul(V).to2DArray()).toBeDeepCloseTo(V.mmul(D).to2DArray(), 8); + // eigenvectors of a symmetric matrix are orthonormal: VᵀV = I + expect(V.transpose().mmul(V).to2DArray()).toBeDeepCloseTo( + Matrix.eye(4).to2DArray(), + 8, + ); + }); + + it('non-symmetric matrix with real eigenvalues: A·V = V·D', () => { + const matrix = new Matrix([ + [4, 1, 2], + [0, 3, -1], + [0, 0, 2], + ]); + const evd = new EVD(matrix); + const V = evd.eigenvectorMatrix; + const D = evd.diagonalMatrix; + expect(evd.realEigenvalues.slice().sort((a, b) => a - b)).toBeDeepCloseTo( + [2, 3, 4], + 8, + ); + expect(matrix.mmul(V).to2DArray()).toBeDeepCloseTo(V.mmul(D).to2DArray(), 8); + }); + + it('larger symmetric matrix: A·V = V·D', () => { + const n = 12; + const base = Matrix.rand(n, n, { random: seededRandom(42) }); + // make it symmetric + const matrix = base.add(base.transpose()); + const evd = new EVD(matrix, { assumeSymmetric: true }); + const V = evd.eigenvectorMatrix; + const D = evd.diagonalMatrix; + expect(matrix.mmul(V).to2DArray()).toBeDeepCloseTo(V.mmul(D).to2DArray(), 6); + expect(V.transpose().mmul(V).to2DArray()).toBeDeepCloseTo( + Matrix.eye(n).to2DArray(), + 6, + ); + }); + + it('matrix with a complex eigenvalue pair', () => { + // rotation-like block has eigenvalues 1 ± i + const matrix = new Matrix([ + [1, -1], + [1, 1], + ]); + const evd = new EVD(matrix); + expect(evd.realEigenvalues).toBeDeepCloseTo([1, 1], 8); + expect(evd.imaginaryEigenvalues.slice().sort((a, b) => a - b)).toBeDeepCloseTo( + [-1, 1], + 8, + ); + }); }); + +function seededRandom(seed) { + let a = seed >>> 0; + return function random() { + a |= 0; + a = (a + 0x6d2b79f5) | 0; + let t = Math.imul(a ^ (a >>> 15), 1 | a); + t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t; + return ((t ^ (t >>> 14)) >>> 0) / 4294967296; + }; +} From b519a754891c6ce85df48a6b51da2b5e6093ae16 Mon Sep 17 00:00:00 2001 From: Luc Patiny Date: Sat, 20 Jun 2026 11:41:25 +0200 Subject: [PATCH 4/9] perf: speed up symmetric EVD ~1.7x via transposed eigenvector storage tred2/tql2 (the symmetric eigenproblem) accumulate the eigenvectors in V with the row index varying in the hot loops, i.e. column walks of the row-major backing store. Store V transposed during the reduction and transpose it back before returning; the inner loops then scan memory sequentially. Measured (symmetric, deterministic input): 600x600 666 ms -> 386 ms (~1.7x), 300x300 ~1.5x. Results unchanged (guarded by the new reconstruction tests). The non-symmetric path (orthes/hqr2) is deliberately left row-major: its two O(n^3) phases (QR sweep vs eigenvector back-transform) have opposite layout preferences, so a single transposed storage cannot help both. Add an "EVD (symmetric)" column to scripts/benchmark.js. Assisted-By: Claude Opus 4.8 (1M context) --- scripts/benchmark.js | 35 +++++++++++++++++++++------ src/dc/evd.js | 56 ++++++++++++++++++++++++++------------------ 2 files changed, 61 insertions(+), 30 deletions(-) diff --git a/scripts/benchmark.js b/scripts/benchmark.js index 7bfb9f9..132555e 100644 --- a/scripts/benchmark.js +++ b/scripts/benchmark.js @@ -2,37 +2,58 @@ const { Matrix, SVD, LU, EVD } = require('../matrix'); -const algorithms = [SVD, LU, EVD]; const sizes = [10, 100, 500, 1000]; const TARGET_MS = 3000; +// EVD has two code paths: orthes/hqr2 for general matrices and tred2/tql2 for +// symmetric ones. They are benchmarked separately ("EVD" vs "EVD (symmetric)"). +const algorithms = [ + { name: 'SVD', make: (m) => new SVD(m), matrix: randomMatrix }, + { name: 'LU', make: (m) => new LU(m), matrix: randomMatrix }, + { name: 'EVD', make: (m) => new EVD(m), matrix: randomMatrix }, + { + name: 'EVD (symmetric)', + make: (m) => new EVD(m, { assumeSymmetric: true }), + matrix: symmetricMatrix, + }, +]; + const benchmarks = []; for (const size of sizes) benchmarks.push(test(size)); console.table(benchmarks); function test(size) { const benchmark = { size }; - for (const Algo of algorithms) { + for (const algorithm of algorithms) { // Fixed reusable matrix; decompositions clone internally so it is not mutated. - const matrix = Matrix.rand(size, size, { random: mulberry32(size) }); + const matrix = algorithm.matrix(size); // warm-up - void new Algo(matrix); + algorithm.make(matrix); let count = 1; const start = performance.now(); - void new Algo(matrix); + algorithm.make(matrix); const first = performance.now() - start; if (first < TARGET_MS) { count = Math.ceil(TARGET_MS / first); - for (let i = 0; i < count - 1; i++) void new Algo(matrix); + for (let i = 0; i < count - 1; i++) algorithm.make(matrix); } const time = (performance.now() - start) / count; - benchmark[Algo.name] = Number(time.toFixed(4)); + benchmark[algorithm.name] = Number(time.toFixed(4)); } return benchmark; } +function randomMatrix(size) { + return Matrix.rand(size, size, { random: mulberry32(size) }); +} + +function symmetricMatrix(size) { + const base = Matrix.rand(size, size, { random: mulberry32(size) }); + return base.add(base.transpose()); +} + // Deterministic PRNG so every run/layout uses identical input matrices. function mulberry32(seed) { let a = seed >>> 0; diff --git a/src/dc/evd.js b/src/dc/evd.js index 6c6a9a1..b470eec 100644 --- a/src/dc/evd.js +++ b/src/dc/evd.js @@ -31,14 +31,24 @@ export default class EigenvalueDecomposition { } if (isSymmetric) { + // tred2/tql2 access V almost exclusively down columns (the row index + // varies in the hot loops). Storing V transposed turns those into + // sequential row scans of the row-major backing store; we transpose it + // back to the logical layout before returning. V.get(j, i) holds the + // logical V(i, j). for (i = 0; i < n; i++) { for (j = 0; j < n; j++) { - V.set(i, j, value.get(i, j)); + V.set(j, i, value.get(i, j)); } } tred2(n, e, d, V); tql2(n, e, d, V); + V = V.transpose(); } else { + // The non-symmetric path (orthes/hqr2) has two O(n^3) phases with opposite + // memory-layout preferences (the QR sweep favours column-major eigenvectors + // while the back-transform favours row-major), so a single transposed + // storage cannot help both. It is left in the original row-major layout. let H = new Matrix(n, n); let ort = new Float64Array(n); for (j = 0; j < n; j++) { @@ -93,7 +103,7 @@ function tred2(n, e, d, V) { let f, g, h, i, j, k, hh, scale; for (j = 0; j < n; j++) { - d[j] = V.get(n - 1, j); + d[j] = V.get(j, n - 1); } for (i = n - 1; i > 0; i--) { @@ -106,9 +116,9 @@ function tred2(n, e, d, V) { if (scale === 0) { e[i] = d[i - 1]; for (j = 0; j < i; j++) { - d[j] = V.get(i - 1, j); - V.set(i, j, 0); + d[j] = V.get(j, i - 1); V.set(j, i, 0); + V.set(i, j, 0); } } else { for (k = 0; k < i; k++) { @@ -131,11 +141,11 @@ function tred2(n, e, d, V) { for (j = 0; j < i; j++) { f = d[j]; - V.set(j, i, f); + V.set(i, j, f); g = e[j] + V.get(j, j) * f; for (k = j + 1; k <= i - 1; k++) { - g += V.get(k, j) * d[k]; - e[k] += V.get(k, j) * f; + g += V.get(j, k) * d[k]; + e[k] += V.get(j, k) * f; } e[j] = g; } @@ -155,43 +165,43 @@ function tred2(n, e, d, V) { f = d[j]; g = e[j]; for (k = j; k <= i - 1; k++) { - V.set(k, j, V.get(k, j) - (f * e[k] + g * d[k])); + V.set(j, k, V.get(j, k) - (f * e[k] + g * d[k])); } - d[j] = V.get(i - 1, j); - V.set(i, j, 0); + d[j] = V.get(j, i - 1); + V.set(j, i, 0); } } d[i] = h; } for (i = 0; i < n - 1; i++) { - V.set(n - 1, i, V.get(i, i)); + V.set(i, n - 1, V.get(i, i)); V.set(i, i, 1); h = d[i + 1]; if (h !== 0) { for (k = 0; k <= i; k++) { - d[k] = V.get(k, i + 1) / h; + d[k] = V.get(i + 1, k) / h; } for (j = 0; j <= i; j++) { g = 0; for (k = 0; k <= i; k++) { - g += V.get(k, i + 1) * V.get(k, j); + g += V.get(i + 1, k) * V.get(j, k); } for (k = 0; k <= i; k++) { - V.set(k, j, V.get(k, j) - g * d[k]); + V.set(j, k, V.get(j, k) - g * d[k]); } } } for (k = 0; k <= i; k++) { - V.set(k, i + 1, 0); + V.set(i + 1, k, 0); } } for (j = 0; j < n; j++) { - d[j] = V.get(n - 1, j); - V.set(n - 1, j, 0); + d[j] = V.get(j, n - 1); + V.set(j, n - 1, 0); } V.set(n - 1, n - 1, 1); @@ -264,9 +274,9 @@ function tql2(n, e, d, V) { d[i + 1] = h + s * (c * g + s * d[i]); for (k = 0; k < n; k++) { - h = V.get(k, i + 1); - V.set(k, i + 1, s * V.get(k, i) + c * h); - V.set(k, i, c * V.get(k, i) - s * h); + h = V.get(i + 1, k); + V.set(i + 1, k, s * V.get(i, k) + c * h); + V.set(i, k, c * V.get(i, k) - s * h); } } @@ -293,9 +303,9 @@ function tql2(n, e, d, V) { d[k] = d[i]; d[i] = p; for (j = 0; j < n; j++) { - p = V.get(j, i); - V.set(j, i, V.get(j, k)); - V.set(j, k, p); + p = V.get(i, j); + V.set(i, j, V.get(k, j)); + V.set(k, j, p); } } } From 0e1d3979479b52b2cc08bb4efc184c9dfed060c6 Mon Sep 17 00:00:00 2001 From: Luc Patiny Date: Sat, 20 Jun 2026 12:03:18 +0200 Subject: [PATCH 5/9] perf: transpose decomposition outputs in place to avoid extra allocation The transposed-storage optimization restored the logical layout of the output matrices with `M.transpose()`, which allocates a second full matrix while the old one is still live (transient ~1.5x peak memory). These outputs are square (SVD's V and EVD's V are always n x n; SVD's U is square whenever the input is), so transpose them in place via a new `transposeSquareInPlace` helper. No allocation in the common square case, so the optimization is now memory-neutral versus the original implementation. The working copy `at = value.transpose()` already replaced the original `value.clone()`, so it is not an extra allocation. Results remain bit-identical (verified by byte comparison against main) and the speedups are unchanged. Non-square SVD U falls back to allocating transpose. Assisted-By: Claude Opus 4.8 (1M context) --- src/dc/evd.js | 5 +++-- src/dc/svd.js | 10 ++++++---- src/dc/util.js | 21 +++++++++++++++++++++ 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/dc/evd.js b/src/dc/evd.js index b470eec..c3c42a2 100644 --- a/src/dc/evd.js +++ b/src/dc/evd.js @@ -1,7 +1,7 @@ import Matrix from '../matrix'; import WrapperMatrix2D from '../wrap/WrapperMatrix2D'; -import { hypotenuse } from './util'; +import { hypotenuse, transposeSquareInPlace } from './util'; export default class EigenvalueDecomposition { constructor(matrix, options = {}) { @@ -43,7 +43,8 @@ export default class EigenvalueDecomposition { } tred2(n, e, d, V); tql2(n, e, d, V); - V = V.transpose(); + // V is square; restore the logical layout in place (no allocation). + transposeSquareInPlace(V); } else { // The non-symmetric path (orthes/hqr2) has two O(n^3) phases with opposite // memory-layout preferences (the QR sweep favours column-major eigenvectors diff --git a/src/dc/svd.js b/src/dc/svd.js index e92560e..6b78606 100644 --- a/src/dc/svd.js +++ b/src/dc/svd.js @@ -1,7 +1,7 @@ import Matrix from '../matrix'; import WrapperMatrix2D from '../wrap/WrapperMatrix2D'; -import { hypotenuse } from './util'; +import { hypotenuse, transposeSquareInPlace } from './util'; export default class SingularValueDecomposition { constructor(value, options = {}) { @@ -408,9 +408,11 @@ export default class SingularValueDecomposition { } // Restore the logical (row-major) layout of the singular vectors, which were - // accumulated in transposed storage for cache-sequential inner loops. - U = U.transpose(); - V = V.transpose(); + // accumulated in transposed storage for cache-sequential inner loops. V is + // always square and U is square whenever the input is, so this is done in + // place (no allocation) in the common case. + U = U.isSquare() ? transposeSquareInPlace(U) : U.transpose(); + V = transposeSquareInPlace(V); if (swapped) { let tmp = V; diff --git a/src/dc/util.js b/src/dc/util.js index 642aa11..29dd76e 100644 --- a/src/dc/util.js +++ b/src/dc/util.js @@ -1,3 +1,24 @@ +/** + * Transpose a square matrix in place, without allocating a copy. + * Used to restore the logical layout of decomposition outputs that were + * accumulated in transposed storage for cache-sequential inner loops. + * @param {import('../matrix').default} matrix - square matrix, mutated in place + * @returns {import('../matrix').default} the same matrix + */ +export function transposeSquareInPlace(matrix) { + const data = matrix.data; + const n = matrix.rows; + for (let i = 0; i < n; i++) { + const rowI = data[i]; + for (let j = i + 1; j < n; j++) { + const tmp = rowI[j]; + rowI[j] = data[j][i]; + data[j][i] = tmp; + } + } + return matrix; +} + export function hypotenuse(a, b) { let r = 0; if (Math.abs(a) > Math.abs(b)) { From 93bdb7ea679c3f0de19e3b7afbf7cee2fa9dcd39 Mon Sep 17 00:00:00 2001 From: Luc Patiny Date: Sat, 20 Jun 2026 13:21:43 +0200 Subject: [PATCH 6/9] style: apply prettier formatting to EVD tests Assisted-By: Claude Opus 4.8 (1M context) --- src/__tests__/decompositions/evd.test.js | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/__tests__/decompositions/evd.test.js b/src/__tests__/decompositions/evd.test.js index 3acc808..20908bb 100644 --- a/src/__tests__/decompositions/evd.test.js +++ b/src/__tests__/decompositions/evd.test.js @@ -36,7 +36,10 @@ describe('Eigenvalue decomposition', () => { const D = evd.diagonalMatrix; // eigen relation - expect(matrix.mmul(V).to2DArray()).toBeDeepCloseTo(V.mmul(D).to2DArray(), 8); + expect(matrix.mmul(V).to2DArray()).toBeDeepCloseTo( + V.mmul(D).to2DArray(), + 8, + ); // eigenvectors of a symmetric matrix are orthonormal: VᵀV = I expect(V.transpose().mmul(V).to2DArray()).toBeDeepCloseTo( Matrix.eye(4).to2DArray(), @@ -57,7 +60,10 @@ describe('Eigenvalue decomposition', () => { [2, 3, 4], 8, ); - expect(matrix.mmul(V).to2DArray()).toBeDeepCloseTo(V.mmul(D).to2DArray(), 8); + expect(matrix.mmul(V).to2DArray()).toBeDeepCloseTo( + V.mmul(D).to2DArray(), + 8, + ); }); it('larger symmetric matrix: A·V = V·D', () => { @@ -68,7 +74,10 @@ describe('Eigenvalue decomposition', () => { const evd = new EVD(matrix, { assumeSymmetric: true }); const V = evd.eigenvectorMatrix; const D = evd.diagonalMatrix; - expect(matrix.mmul(V).to2DArray()).toBeDeepCloseTo(V.mmul(D).to2DArray(), 6); + expect(matrix.mmul(V).to2DArray()).toBeDeepCloseTo( + V.mmul(D).to2DArray(), + 6, + ); expect(V.transpose().mmul(V).to2DArray()).toBeDeepCloseTo( Matrix.eye(n).to2DArray(), 6, @@ -83,10 +92,9 @@ describe('Eigenvalue decomposition', () => { ]); const evd = new EVD(matrix); expect(evd.realEigenvalues).toBeDeepCloseTo([1, 1], 8); - expect(evd.imaginaryEigenvalues.slice().sort((a, b) => a - b)).toBeDeepCloseTo( - [-1, 1], - 8, - ); + expect( + evd.imaginaryEigenvalues.slice().sort((a, b) => a - b), + ).toBeDeepCloseTo([-1, 1], 8); }); }); From a2f14f5a2e70649c01569d6924252772ed09f773 Mon Sep 17 00:00:00 2001 From: Luc Patiny Date: Sat, 20 Jun 2026 13:35:34 +0200 Subject: [PATCH 7/9] refactor: use ml-xsadd for seeded RNG in EVD tests and benchmark Replace the hand-rolled mulberry32 PRNG with the ecosystem's ml-xsadd (XORSHIFT-ADD) generator via `new XSadd(seed).random`, in the EVD reconstruction tests and scripts/benchmark.js. Add ml-xsadd to devDependencies. Assisted-By: Claude Opus 4.8 (1M context) --- package.json | 1 + scripts/benchmark.js | 19 +++++-------------- src/__tests__/decompositions/evd.test.js | 14 ++------------ 3 files changed, 8 insertions(+), 26 deletions(-) diff --git a/package.json b/package.json index 03c2199..5e2e2c5 100644 --- a/package.json +++ b/package.json @@ -75,6 +75,7 @@ "jest-matcher-deep-close-to": "^3.0.2", "mathjs": "^14.5.2", "ml-dataset-iris": "^1.2.1", + "ml-xsadd": "^3.0.1", "numeric": "^1.2.6", "prettier": "^3.5.3", "pretty-hrtime": "^1.0.3", diff --git a/scripts/benchmark.js b/scripts/benchmark.js index 132555e..887f75d 100644 --- a/scripts/benchmark.js +++ b/scripts/benchmark.js @@ -1,5 +1,7 @@ 'use strict'; +const { XSadd } = require('ml-xsadd'); + const { Matrix, SVD, LU, EVD } = require('../matrix'); const sizes = [10, 100, 500, 1000]; @@ -45,23 +47,12 @@ function test(size) { return benchmark; } +// Deterministic seed so every run/layout uses identical input matrices. function randomMatrix(size) { - return Matrix.rand(size, size, { random: mulberry32(size) }); + return Matrix.rand(size, size, { random: new XSadd(size).random }); } function symmetricMatrix(size) { - const base = Matrix.rand(size, size, { random: mulberry32(size) }); + const base = Matrix.rand(size, size, { random: new XSadd(size).random }); return base.add(base.transpose()); } - -// Deterministic PRNG so every run/layout uses identical input matrices. -function mulberry32(seed) { - let a = seed >>> 0; - return function random() { - a |= 0; - a = (a + 0x6d2b79f5) | 0; - let t = Math.imul(a ^ (a >>> 15), 1 | a); - t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t; - return ((t ^ (t >>> 14)) >>> 0) / 4294967296; - }; -} diff --git a/src/__tests__/decompositions/evd.test.js b/src/__tests__/decompositions/evd.test.js index 20908bb..49032bf 100644 --- a/src/__tests__/decompositions/evd.test.js +++ b/src/__tests__/decompositions/evd.test.js @@ -1,4 +1,5 @@ import { toBeDeepCloseTo } from 'jest-matcher-deep-close-to'; +import { XSadd } from 'ml-xsadd'; import { describe, it, expect } from 'vitest'; import { Matrix, EVD } from '../..'; @@ -68,7 +69,7 @@ describe('Eigenvalue decomposition', () => { it('larger symmetric matrix: A·V = V·D', () => { const n = 12; - const base = Matrix.rand(n, n, { random: seededRandom(42) }); + const base = Matrix.rand(n, n, { random: new XSadd(42).random }); // make it symmetric const matrix = base.add(base.transpose()); const evd = new EVD(matrix, { assumeSymmetric: true }); @@ -97,14 +98,3 @@ describe('Eigenvalue decomposition', () => { ).toBeDeepCloseTo([-1, 1], 8); }); }); - -function seededRandom(seed) { - let a = seed >>> 0; - return function random() { - a |= 0; - a = (a + 0x6d2b79f5) | 0; - let t = Math.imul(a ^ (a >>> 15), 1 | a); - t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t; - return ((t ^ (t >>> 14)) >>> 0) / 4294967296; - }; -} From 903f3f768785d8237d60e24b0740a12a1274cfc3 Mon Sep 17 00:00:00 2001 From: Luc Patiny Date: Sat, 20 Jun 2026 13:35:34 +0200 Subject: [PATCH 8/9] chore: ignore .claude Assisted-By: Claude Opus 4.8 (1M context) --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 64e8143..a3b9f7e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.claude .idea node_modules /matrix.*js From aef6b5e4b3eed79dcf980e64fe39b40246bb366d Mon Sep 17 00:00:00 2001 From: Luc Patiny Date: Mon, 22 Jun 2026 18:22:19 +0200 Subject: [PATCH 9/9] chore(benchmark): rewrite decompositions benchmark on the benchmark library Move scripts/benchmark.js to benchmark/decompositions.js and rewrite it as a Benchmark.Suite comparing SVD, LU, EVD and symmetric EVD on a seeded matrix, consistent with the other files in benchmark/. Steady-state sampling makes the results independent of execution order. --- benchmark/decompositions.js | 33 +++++++++++++++++++++ scripts/benchmark.js | 58 ------------------------------------- 2 files changed, 33 insertions(+), 58 deletions(-) create mode 100644 benchmark/decompositions.js delete mode 100644 scripts/benchmark.js diff --git a/benchmark/decompositions.js b/benchmark/decompositions.js new file mode 100644 index 0000000..c6686e2 --- /dev/null +++ b/benchmark/decompositions.js @@ -0,0 +1,33 @@ +'use strict'; + +const Benchmark = require('benchmark'); +const { XSadd } = require('ml-xsadd'); + +const { Matrix, SVD, LU, EVD } = require('..'); + +const size = parseInt(process.argv[2], 10) || 200; +console.log(`Decomposition benchmark for ${size}x${size} matrix`); + +// Deterministic input so every run uses identical matrices. +const general = Matrix.rand(size, size, { random: new XSadd(size).random }); +const symmetric = general.add(general.transpose()); + +// EVD has two code paths: orthes/hqr2 for general matrices and tred2/tql2 for +// symmetric ones. They are benchmarked separately ("EVD" vs "EVD (symmetric)"). +new Benchmark.Suite() + .add('SVD', () => { + new SVD(general); + }) + .add('LU', () => { + new LU(general); + }) + .add('EVD', () => { + new EVD(general); + }) + .add('EVD (symmetric)', () => { + new EVD(symmetric, { assumeSymmetric: true }); + }) + .on('cycle', (event) => { + console.log(String(event.target)); + }) + .run(); diff --git a/scripts/benchmark.js b/scripts/benchmark.js deleted file mode 100644 index 887f75d..0000000 --- a/scripts/benchmark.js +++ /dev/null @@ -1,58 +0,0 @@ -'use strict'; - -const { XSadd } = require('ml-xsadd'); - -const { Matrix, SVD, LU, EVD } = require('../matrix'); - -const sizes = [10, 100, 500, 1000]; -const TARGET_MS = 3000; - -// EVD has two code paths: orthes/hqr2 for general matrices and tred2/tql2 for -// symmetric ones. They are benchmarked separately ("EVD" vs "EVD (symmetric)"). -const algorithms = [ - { name: 'SVD', make: (m) => new SVD(m), matrix: randomMatrix }, - { name: 'LU', make: (m) => new LU(m), matrix: randomMatrix }, - { name: 'EVD', make: (m) => new EVD(m), matrix: randomMatrix }, - { - name: 'EVD (symmetric)', - make: (m) => new EVD(m, { assumeSymmetric: true }), - matrix: symmetricMatrix, - }, -]; - -const benchmarks = []; -for (const size of sizes) benchmarks.push(test(size)); -console.table(benchmarks); - -function test(size) { - const benchmark = { size }; - for (const algorithm of algorithms) { - // Fixed reusable matrix; decompositions clone internally so it is not mutated. - const matrix = algorithm.matrix(size); - - // warm-up - algorithm.make(matrix); - - let count = 1; - const start = performance.now(); - algorithm.make(matrix); - const first = performance.now() - start; - if (first < TARGET_MS) { - count = Math.ceil(TARGET_MS / first); - for (let i = 0; i < count - 1; i++) algorithm.make(matrix); - } - const time = (performance.now() - start) / count; - benchmark[algorithm.name] = Number(time.toFixed(4)); - } - return benchmark; -} - -// Deterministic seed so every run/layout uses identical input matrices. -function randomMatrix(size) { - return Matrix.rand(size, size, { random: new XSadd(size).random }); -} - -function symmetricMatrix(size) { - const base = Matrix.rand(size, size, { random: new XSadd(size).random }); - return base.add(base.transpose()); -}