diff --git a/.gitignore b/.gitignore index 64e8143..a3b9f7e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.claude .idea node_modules /matrix.*js diff --git a/benchmark/decompositions.js b/benchmark/decompositions.js new file mode 100644 index 0000000..c6686e2 --- /dev/null +++ b/benchmark/decompositions.js @@ -0,0 +1,33 @@ +'use strict'; + +const Benchmark = require('benchmark'); +const { XSadd } = require('ml-xsadd'); + +const { Matrix, SVD, LU, EVD } = require('..'); + +const size = parseInt(process.argv[2], 10) || 200; +console.log(`Decomposition benchmark for ${size}x${size} matrix`); + +// Deterministic input so every run uses identical matrices. +const general = Matrix.rand(size, size, { random: new XSadd(size).random }); +const symmetric = general.add(general.transpose()); + +// EVD has two code paths: orthes/hqr2 for general matrices and tred2/tql2 for +// symmetric ones. They are benchmarked separately ("EVD" vs "EVD (symmetric)"). +new Benchmark.Suite() + .add('SVD', () => { + new SVD(general); + }) + .add('LU', () => { + new LU(general); + }) + .add('EVD', () => { + new EVD(general); + }) + .add('EVD (symmetric)', () => { + new EVD(symmetric, { assumeSymmetric: true }); + }) + .on('cycle', (event) => { + console.log(String(event.target)); + }) + .run(); diff --git a/package.json b/package.json index 03c2199..5e2e2c5 100644 --- a/package.json +++ b/package.json @@ -75,6 +75,7 @@ "jest-matcher-deep-close-to": "^3.0.2", "mathjs": "^14.5.2", "ml-dataset-iris": "^1.2.1", + "ml-xsadd": "^3.0.1", "numeric": "^1.2.6", "prettier": "^3.5.3", "pretty-hrtime": "^1.0.3", diff --git a/src/__tests__/decompositions/evd.test.js b/src/__tests__/decompositions/evd.test.js index 9e8b9f9..49032bf 100644 --- a/src/__tests__/decompositions/evd.test.js +++ b/src/__tests__/decompositions/evd.test.js @@ -1,7 +1,11 @@ +import { toBeDeepCloseTo } from 'jest-matcher-deep-close-to'; +import { XSadd } from 'ml-xsadd'; import { describe, it, expect } from 'vitest'; import { Matrix, EVD } from '../..'; +expect.extend({ toBeDeepCloseTo }); + describe('Eigenvalue decomposition', () => { it('simple example', () => { let matrix = new Matrix([ @@ -20,4 +24,77 @@ describe('Eigenvalue decomposition', () => { const matrix = new Matrix([]); expect(() => new EVD(matrix)).toThrow('Matrix must be non-empty'); }); + + it('symmetric matrix: A·V = V·D and V is orthogonal', () => { + const matrix = new Matrix([ + [2, -1, 0, 1], + [-1, 2, -1, 0], + [0, -1, 2, -1], + [1, 0, -1, 2], + ]); + const evd = new EVD(matrix); + const V = evd.eigenvectorMatrix; + const D = evd.diagonalMatrix; + + // eigen relation + expect(matrix.mmul(V).to2DArray()).toBeDeepCloseTo( + V.mmul(D).to2DArray(), + 8, + ); + // eigenvectors of a symmetric matrix are orthonormal: VᵀV = I + expect(V.transpose().mmul(V).to2DArray()).toBeDeepCloseTo( + Matrix.eye(4).to2DArray(), + 8, + ); + }); + + it('non-symmetric matrix with real eigenvalues: A·V = V·D', () => { + const matrix = new Matrix([ + [4, 1, 2], + [0, 3, -1], + [0, 0, 2], + ]); + const evd = new EVD(matrix); + const V = evd.eigenvectorMatrix; + const D = evd.diagonalMatrix; + expect(evd.realEigenvalues.slice().sort((a, b) => a - b)).toBeDeepCloseTo( + [2, 3, 4], + 8, + ); + expect(matrix.mmul(V).to2DArray()).toBeDeepCloseTo( + V.mmul(D).to2DArray(), + 8, + ); + }); + + it('larger symmetric matrix: A·V = V·D', () => { + const n = 12; + const base = Matrix.rand(n, n, { random: new XSadd(42).random }); + // make it symmetric + const matrix = base.add(base.transpose()); + const evd = new EVD(matrix, { assumeSymmetric: true }); + const V = evd.eigenvectorMatrix; + const D = evd.diagonalMatrix; + expect(matrix.mmul(V).to2DArray()).toBeDeepCloseTo( + V.mmul(D).to2DArray(), + 6, + ); + expect(V.transpose().mmul(V).to2DArray()).toBeDeepCloseTo( + Matrix.eye(n).to2DArray(), + 6, + ); + }); + + it('matrix with a complex eigenvalue pair', () => { + // rotation-like block has eigenvalues 1 ± i + const matrix = new Matrix([ + [1, -1], + [1, 1], + ]); + const evd = new EVD(matrix); + expect(evd.realEigenvalues).toBeDeepCloseTo([1, 1], 8); + expect( + evd.imaginaryEigenvalues.slice().sort((a, b) => a - b), + ).toBeDeepCloseTo([-1, 1], 8); + }); }); diff --git a/src/dc/evd.js b/src/dc/evd.js index 6c6a9a1..c3c42a2 100644 --- a/src/dc/evd.js +++ b/src/dc/evd.js @@ -1,7 +1,7 @@ import Matrix from '../matrix'; import WrapperMatrix2D from '../wrap/WrapperMatrix2D'; -import { hypotenuse } from './util'; +import { hypotenuse, transposeSquareInPlace } from './util'; export default class EigenvalueDecomposition { constructor(matrix, options = {}) { @@ -31,14 +31,25 @@ export default class EigenvalueDecomposition { } if (isSymmetric) { + // tred2/tql2 access V almost exclusively down columns (the row index + // varies in the hot loops). Storing V transposed turns those into + // sequential row scans of the row-major backing store; we transpose it + // back to the logical layout before returning. V.get(j, i) holds the + // logical V(i, j). for (i = 0; i < n; i++) { for (j = 0; j < n; j++) { - V.set(i, j, value.get(i, j)); + V.set(j, i, value.get(i, j)); } } tred2(n, e, d, V); tql2(n, e, d, V); + // V is square; restore the logical layout in place (no allocation). + transposeSquareInPlace(V); } else { + // The non-symmetric path (orthes/hqr2) has two O(n^3) phases with opposite + // memory-layout preferences (the QR sweep favours column-major eigenvectors + // while the back-transform favours row-major), so a single transposed + // storage cannot help both. It is left in the original row-major layout. let H = new Matrix(n, n); let ort = new Float64Array(n); for (j = 0; j < n; j++) { @@ -93,7 +104,7 @@ function tred2(n, e, d, V) { let f, g, h, i, j, k, hh, scale; for (j = 0; j < n; j++) { - d[j] = V.get(n - 1, j); + d[j] = V.get(j, n - 1); } for (i = n - 1; i > 0; i--) { @@ -106,9 +117,9 @@ function tred2(n, e, d, V) { if (scale === 0) { e[i] = d[i - 1]; for (j = 0; j < i; j++) { - d[j] = V.get(i - 1, j); - V.set(i, j, 0); + d[j] = V.get(j, i - 1); V.set(j, i, 0); + V.set(i, j, 0); } } else { for (k = 0; k < i; k++) { @@ -131,11 +142,11 @@ function tred2(n, e, d, V) { for (j = 0; j < i; j++) { f = d[j]; - V.set(j, i, f); + V.set(i, j, f); g = e[j] + V.get(j, j) * f; for (k = j + 1; k <= i - 1; k++) { - g += V.get(k, j) * d[k]; - e[k] += V.get(k, j) * f; + g += V.get(j, k) * d[k]; + e[k] += V.get(j, k) * f; } e[j] = g; } @@ -155,43 +166,43 @@ function tred2(n, e, d, V) { f = d[j]; g = e[j]; for (k = j; k <= i - 1; k++) { - V.set(k, j, V.get(k, j) - (f * e[k] + g * d[k])); + V.set(j, k, V.get(j, k) - (f * e[k] + g * d[k])); } - d[j] = V.get(i - 1, j); - V.set(i, j, 0); + d[j] = V.get(j, i - 1); + V.set(j, i, 0); } } d[i] = h; } for (i = 0; i < n - 1; i++) { - V.set(n - 1, i, V.get(i, i)); + V.set(i, n - 1, V.get(i, i)); V.set(i, i, 1); h = d[i + 1]; if (h !== 0) { for (k = 0; k <= i; k++) { - d[k] = V.get(k, i + 1) / h; + d[k] = V.get(i + 1, k) / h; } for (j = 0; j <= i; j++) { g = 0; for (k = 0; k <= i; k++) { - g += V.get(k, i + 1) * V.get(k, j); + g += V.get(i + 1, k) * V.get(j, k); } for (k = 0; k <= i; k++) { - V.set(k, j, V.get(k, j) - g * d[k]); + V.set(j, k, V.get(j, k) - g * d[k]); } } } for (k = 0; k <= i; k++) { - V.set(k, i + 1, 0); + V.set(i + 1, k, 0); } } for (j = 0; j < n; j++) { - d[j] = V.get(n - 1, j); - V.set(n - 1, j, 0); + d[j] = V.get(j, n - 1); + V.set(j, n - 1, 0); } V.set(n - 1, n - 1, 1); @@ -264,9 +275,9 @@ function tql2(n, e, d, V) { d[i + 1] = h + s * (c * g + s * d[i]); for (k = 0; k < n; k++) { - h = V.get(k, i + 1); - V.set(k, i + 1, s * V.get(k, i) + c * h); - V.set(k, i, c * V.get(k, i) - s * h); + h = V.get(i + 1, k); + V.set(i + 1, k, s * V.get(i, k) + c * h); + V.set(i, k, c * V.get(i, k) - s * h); } } @@ -293,9 +304,9 @@ function tql2(n, e, d, V) { d[k] = d[i]; d[i] = p; for (j = 0; j < n; j++) { - p = V.get(j, i); - V.set(j, i, V.get(j, k)); - V.set(j, k, p); + p = V.get(i, j); + V.set(i, j, V.get(k, j)); + V.set(k, j, p); } } } diff --git a/src/dc/svd.js b/src/dc/svd.js index 20ca002..6b78606 100644 --- a/src/dc/svd.js +++ b/src/dc/svd.js @@ -1,7 +1,7 @@ import Matrix from '../matrix'; import WrapperMatrix2D from '../wrap/WrapperMatrix2D'; -import { hypotenuse } from './util'; +import { hypotenuse, transposeSquareInPlace } from './util'; export default class SingularValueDecomposition { constructor(value, options = {}) { @@ -23,32 +23,40 @@ export default class SingularValueDecomposition { let wantu = Boolean(computeLeftSingularVectors); let wantv = Boolean(computeRightSingularVectors); + // Work on the transpose of the input so the hot inner loops (which iterate + // over rows for a fixed column) scan memory sequentially in the row-major + // backing store. `at` holds the transpose: at.get(j, i) === a.get(i, j) + // where `a` is the logical m x n working matrix. let swapped = false; - let a; + let at; if (m < n) { if (!autoTranspose) { - a = value.clone(); // eslint-disable-next-line no-console console.warn( 'Computing SVD on a matrix with more columns than rows. Consider enabling autoTranspose', ); + at = value.transpose(); } else { - a = value.transpose(); - m = a.rows; - n = a.columns; + at = value.clone(); + m = value.columns; + n = value.rows; swapped = true; let aux = wantu; wantu = wantv; wantv = aux; } } else { - a = value.clone(); + at = value.transpose(); } let nu = Math.min(m, n); let ni = Math.min(m + 1, n); let s = new Float64Array(ni); - let U = new Matrix(m, nu); + // U and V are stored transposed during the computation so the inner loops + // (which always vary the row index) scan memory sequentially. They are + // transposed back to their logical layout before being returned. + // Ut.get(j, i) === U.get(i, j) and Vt.get(j, i) === V.get(i, j). + let U = new Matrix(nu, m); let V = new Matrix(n, n); let e = new Float64Array(n); @@ -65,16 +73,16 @@ export default class SingularValueDecomposition { if (k < nct) { s[k] = 0; for (let i = k; i < m; i++) { - s[k] = hypotenuse(s[k], a.get(i, k)); + s[k] = hypotenuse(s[k], at.get(k, i)); } if (s[k] !== 0) { - if (a.get(k, k) < 0) { + if (at.get(k, k) < 0) { s[k] = -s[k]; } for (let i = k; i < m; i++) { - a.set(i, k, a.get(i, k) / s[k]); + at.set(k, i, at.get(k, i) / s[k]); } - a.set(k, k, a.get(k, k) + 1); + at.set(k, k, at.get(k, k) + 1); } s[k] = -s[k]; } @@ -83,19 +91,19 @@ export default class SingularValueDecomposition { if (k < nct && s[k] !== 0) { let t = 0; for (let i = k; i < m; i++) { - t += a.get(i, k) * a.get(i, j); + t += at.get(k, i) * at.get(j, i); } - t = -t / a.get(k, k); + t = -t / at.get(k, k); for (let i = k; i < m; i++) { - a.set(i, j, a.get(i, j) + t * a.get(i, k)); + at.set(j, i, at.get(j, i) + t * at.get(k, i)); } } - e[j] = a.get(k, j); + e[j] = at.get(j, k); } if (wantu && k < nct) { for (let i = k; i < m; i++) { - U.set(i, k, a.get(i, k)); + U.set(k, i, at.get(k, i)); } } @@ -120,19 +128,19 @@ export default class SingularValueDecomposition { } for (let i = k + 1; i < m; i++) { for (let j = k + 1; j < n; j++) { - work[i] += e[j] * a.get(i, j); + work[i] += e[j] * at.get(j, i); } } for (let j = k + 1; j < n; j++) { let t = -e[j] / e[k + 1]; for (let i = k + 1; i < m; i++) { - a.set(i, j, a.get(i, j) + t * work[i]); + at.set(j, i, at.get(j, i) + t * work[i]); } } } if (wantv) { for (let i = k + 1; i < n; i++) { - V.set(i, k, e[i]); + V.set(k, i, e[i]); } } } @@ -140,20 +148,20 @@ export default class SingularValueDecomposition { let p = Math.min(n, m + 1); if (nct < n) { - s[nct] = a.get(nct, nct); + s[nct] = at.get(nct, nct); } if (m < p) { s[p - 1] = 0; } if (nrt + 1 < p) { - e[nrt] = a.get(nrt, p - 1); + e[nrt] = at.get(p - 1, nrt); } e[p - 1] = 0; if (wantu) { for (let j = nct; j < nu; j++) { for (let i = 0; i < m; i++) { - U.set(i, j, 0); + U.set(j, i, 0); } U.set(j, j, 1); } @@ -162,23 +170,23 @@ export default class SingularValueDecomposition { for (let j = k + 1; j < nu; j++) { let t = 0; for (let i = k; i < m; i++) { - t += U.get(i, k) * U.get(i, j); + t += U.get(k, i) * U.get(j, i); } t = -t / U.get(k, k); for (let i = k; i < m; i++) { - U.set(i, j, U.get(i, j) + t * U.get(i, k)); + U.set(j, i, U.get(j, i) + t * U.get(k, i)); } } for (let i = k; i < m; i++) { - U.set(i, k, -U.get(i, k)); + U.set(k, i, -U.get(k, i)); } U.set(k, k, 1 + U.get(k, k)); for (let i = 0; i < k - 1; i++) { - U.set(i, k, 0); + U.set(k, i, 0); } } else { for (let i = 0; i < m; i++) { - U.set(i, k, 0); + U.set(k, i, 0); } U.set(k, k, 1); } @@ -191,16 +199,16 @@ export default class SingularValueDecomposition { for (let j = k + 1; j < n; j++) { let t = 0; for (let i = k + 1; i < n; i++) { - t += V.get(i, k) * V.get(i, j); + t += V.get(k, i) * V.get(j, i); } - t = -t / V.get(k + 1, k); + t = -t / V.get(k, k + 1); for (let i = k + 1; i < n; i++) { - V.set(i, j, V.get(i, j) + t * V.get(i, k)); + V.set(j, i, V.get(j, i) + t * V.get(k, i)); } } } for (let i = 0; i < n; i++) { - V.set(i, k, 0); + V.set(k, i, 0); } V.set(k, k, 1); } @@ -265,9 +273,9 @@ export default class SingularValueDecomposition { } if (wantv) { for (let i = 0; i < n; i++) { - t = cs * V.get(i, j) + sn * V.get(i, p - 1); - V.set(i, p - 1, -sn * V.get(i, j) + cs * V.get(i, p - 1)); - V.set(i, j, t); + t = cs * V.get(j, i) + sn * V.get(p - 1, i); + V.set(p - 1, i, -sn * V.get(j, i) + cs * V.get(p - 1, i)); + V.set(j, i, t); } } } @@ -285,9 +293,9 @@ export default class SingularValueDecomposition { e[j] = cs * e[j]; if (wantu) { for (let i = 0; i < m; i++) { - t = cs * U.get(i, j) + sn * U.get(i, k - 1); - U.set(i, k - 1, -sn * U.get(i, j) + cs * U.get(i, k - 1)); - U.set(i, j, t); + t = cs * U.get(j, i) + sn * U.get(k - 1, i); + U.set(k - 1, i, -sn * U.get(j, i) + cs * U.get(k - 1, i)); + U.set(j, i, t); } } } @@ -333,9 +341,9 @@ export default class SingularValueDecomposition { s[j + 1] = cs * s[j + 1]; if (wantv) { for (let i = 0; i < n; i++) { - t = cs * V.get(i, j) + sn * V.get(i, j + 1); - V.set(i, j + 1, -sn * V.get(i, j) + cs * V.get(i, j + 1)); - V.set(i, j, t); + t = cs * V.get(j, i) + sn * V.get(j + 1, i); + V.set(j + 1, i, -sn * V.get(j, i) + cs * V.get(j + 1, i)); + V.set(j, i, t); } } t = hypotenuse(f, g); @@ -349,9 +357,9 @@ export default class SingularValueDecomposition { e[j + 1] = cs * e[j + 1]; if (wantu && j < m - 1) { for (let i = 0; i < m; i++) { - t = cs * U.get(i, j) + sn * U.get(i, j + 1); - U.set(i, j + 1, -sn * U.get(i, j) + cs * U.get(i, j + 1)); - U.set(i, j, t); + t = cs * U.get(j, i) + sn * U.get(j + 1, i); + U.set(j + 1, i, -sn * U.get(j, i) + cs * U.get(j + 1, i)); + U.set(j, i, t); } } } @@ -364,7 +372,7 @@ export default class SingularValueDecomposition { s[k] = s[k] < 0 ? -s[k] : 0; if (wantv) { for (let i = 0; i <= pp; i++) { - V.set(i, k, -V.get(i, k)); + V.set(k, i, -V.get(k, i)); } } } @@ -377,16 +385,16 @@ export default class SingularValueDecomposition { s[k + 1] = t; if (wantv && k < n - 1) { for (let i = 0; i < n; i++) { - t = V.get(i, k + 1); - V.set(i, k + 1, V.get(i, k)); - V.set(i, k, t); + t = V.get(k + 1, i); + V.set(k + 1, i, V.get(k, i)); + V.set(k, i, t); } } if (wantu && k < m - 1) { for (let i = 0; i < m; i++) { - t = U.get(i, k + 1); - U.set(i, k + 1, U.get(i, k)); - U.set(i, k, t); + t = U.get(k + 1, i); + U.set(k + 1, i, U.get(k, i)); + U.set(k, i, t); } } k++; @@ -399,6 +407,13 @@ export default class SingularValueDecomposition { } } + // Restore the logical (row-major) layout of the singular vectors, which were + // accumulated in transposed storage for cache-sequential inner loops. V is + // always square and U is square whenever the input is, so this is done in + // place (no allocation) in the common case. + U = U.isSquare() ? transposeSquareInPlace(U) : U.transpose(); + V = transposeSquareInPlace(V); + if (swapped) { let tmp = V; V = U; diff --git a/src/dc/util.js b/src/dc/util.js index 642aa11..29dd76e 100644 --- a/src/dc/util.js +++ b/src/dc/util.js @@ -1,3 +1,24 @@ +/** + * Transpose a square matrix in place, without allocating a copy. + * Used to restore the logical layout of decomposition outputs that were + * accumulated in transposed storage for cache-sequential inner loops. + * @param {import('../matrix').default} matrix - square matrix, mutated in place + * @returns {import('../matrix').default} the same matrix + */ +export function transposeSquareInPlace(matrix) { + const data = matrix.data; + const n = matrix.rows; + for (let i = 0; i < n; i++) { + const rowI = data[i]; + for (let j = i + 1; j < n; j++) { + const tmp = rowI[j]; + rowI[j] = data[j][i]; + data[j][i] = tmp; + } + } + return matrix; +} + export function hypotenuse(a, b) { let r = 0; if (Math.abs(a) > Math.abs(b)) {