Skip to content

Commit 4227ebe

Browse files
Abdeltotocaiopizzolcaio-pizzol
authored
feat(math): implement m:eqArr equation-array converter (#2754)
* feat(math): implement m:eqArr equation-array converter (closes #2607) Made-with: Cursor * fix(math): strip & alignment markers from m:eqArr rows Per ECMA-376 §22.1.2.34, '&' characters inside m:t elements within an equation array are alignment markers, not literal text. Without mapping them to MathML <maligngroup>/<malignmark> (poorly supported in browsers), the previous implementation rendered them as literal ampersands. Strip '&' from m:t text before recursing into row children so real-world documents with aligned equations render cleanly. Also adds tests covering the m:eqArrPr filter and nested-math recursion paths. * test(math): add behavior tests for m:eqArr converter Adds math-eqarr-tests.docx fixture with 5 Word-native equation arrays: basic, nested fraction, nested subscript, alignment markers, and m:eqArrPr properties. New test.describe block follows the convention established by limLow/limUpp, delimiter, radical, and func suites — verifying mtable structure, nested-math recursion, alignment-marker stripping, and m:eqArrPr property filtering. Also registers the fixture in the R2 rendering corpus (sd-2754) so layout and visual regression suites will auto-discover it. * style(math): wrap m:eqArr cell content in explicit <mrow> Align with sibling math converters (fraction, subscript, radical, bar, function, limits) which all create an explicit <mrow> inside their container elements. MathML's implicit mrow semantics mean the rendered output is unchanged, but this matches the directory convention and makes the JSDoc shape match the code. --------- Co-authored-by: Caio Pizzol <caiopizzol@icloud.com> Co-authored-by: Caio Pizzol <caio@superdoc.dev>
1 parent d2d0b0f commit 4227ebe

6 files changed

Lines changed: 309 additions & 1 deletion

File tree

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import type { MathObjectConverter, OmmlJsonNode } from '../types.js';
2+
3+
const MATHML_NS = 'http://www.w3.org/1998/Math/MathML';
4+
5+
/**
6+
* Deep-clone row children with `&` stripped from m:t text nodes.
7+
*
8+
* ECMA-376 §22.1.2.34: `&` characters inside m:t are alignment markers
9+
* (odd = align, even = spacer), not literal text. This implementation
10+
* doesn't yet map them to MathML <maligngroup>/<malignmark>, so strip them
11+
* to avoid rendering literal ampersands in the output.
12+
*/
13+
const stripAlignmentMarkers = (nodes: OmmlJsonNode[]): OmmlJsonNode[] =>
14+
nodes.map((node) => {
15+
if (node?.type === 'text' && typeof node.text === 'string' && node.text.includes('&')) {
16+
return { ...node, text: node.text.replace(/&/g, '') };
17+
}
18+
if (node?.elements) {
19+
return { ...node, elements: stripAlignmentMarkers(node.elements) };
20+
}
21+
return node;
22+
});
23+
24+
/**
25+
* Convert m:eqArr (equation array) to MathML <mtable>.
26+
*
27+
* OMML structure:
28+
* m:eqArr → m:eqArrPr (optional), m:e* (one element per row)
29+
*
30+
* MathML output:
31+
* <mtable columnalign="left">
32+
* <mtr> <mtd> <mrow>row-content</mrow> </mtd> </mtr>
33+
* ...
34+
* </mtable>
35+
*
36+
* Unlike m:m (matrix), equation arrays have one cell per row and are
37+
* typically left-aligned. Used for systems of equations.
38+
*
39+
* @spec ECMA-376 §22.1.2.34
40+
*/
41+
export const convertEquationArray: MathObjectConverter = (node, doc, convertChildren) => {
42+
const elements = node.elements ?? [];
43+
const rows = elements.filter((e) => e.name === 'm:e');
44+
45+
const mtable = doc.createElementNS(MATHML_NS, 'mtable');
46+
mtable.setAttribute('columnalign', 'left');
47+
48+
for (const row of rows) {
49+
const mtr = doc.createElementNS(MATHML_NS, 'mtr');
50+
const mtd = doc.createElementNS(MATHML_NS, 'mtd');
51+
const mrow = doc.createElementNS(MATHML_NS, 'mrow');
52+
const cleanedChildren = stripAlignmentMarkers(row.elements ?? []);
53+
mrow.appendChild(convertChildren(cleanedChildren));
54+
mtd.appendChild(mrow);
55+
mtr.appendChild(mtd);
56+
mtable.appendChild(mtr);
57+
}
58+
59+
return mtable.childNodes.length > 0 ? mtable : null;
60+
};

packages/layout-engine/painters/dom/src/features/math/converters/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ export { convertSuperscript } from './superscript.js';
1616
export { convertSubSuperscript } from './sub-superscript.js';
1717
export { convertAccent } from './accent.js';
1818
export { convertPreSubSuperscript } from './pre-sub-superscript.js';
19+
export { convertEquationArray } from './equation-array.js';
1920
export { convertRadical } from './radical.js';
2021
export { convertLowerLimit } from './lower-limit.js';
2122
export { convertUpperLimit } from './upper-limit.js';

packages/layout-engine/painters/dom/src/features/math/omml-to-mathml.test.ts

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2569,3 +2569,142 @@ describe('m:limUpp converter', () => {
25692569
expect(mover!.children[1]!.textContent).toBe('x');
25702570
});
25712571
});
2572+
2573+
describe('m:eqArr converter', () => {
2574+
it('converts equation array to left-aligned <mtable>', () => {
2575+
const omml = {
2576+
name: 'm:oMath',
2577+
elements: [
2578+
{
2579+
name: 'm:eqArr',
2580+
elements: [
2581+
{
2582+
name: 'm:e',
2583+
elements: [
2584+
{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: 'x' }] }] },
2585+
{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: '=' }] }] },
2586+
{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: '1' }] }] },
2587+
],
2588+
},
2589+
{
2590+
name: 'm:e',
2591+
elements: [
2592+
{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: 'y' }] }] },
2593+
{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: '=' }] }] },
2594+
{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: '2' }] }] },
2595+
],
2596+
},
2597+
],
2598+
},
2599+
],
2600+
};
2601+
const result = convertOmmlToMathml(omml, doc);
2602+
expect(result).not.toBeNull();
2603+
const mtable = result!.querySelector('mtable');
2604+
expect(mtable).not.toBeNull();
2605+
expect(mtable!.getAttribute('columnalign')).toBe('left');
2606+
const rows = mtable!.querySelectorAll('mtr');
2607+
expect(rows.length).toBe(2);
2608+
expect(rows[0]!.textContent).toBe('x=1');
2609+
expect(rows[1]!.textContent).toBe('y=2');
2610+
});
2611+
2612+
it('returns null for empty equation array', () => {
2613+
const omml = {
2614+
name: 'm:oMath',
2615+
elements: [{ name: 'm:eqArr', elements: [] }],
2616+
};
2617+
const result = convertOmmlToMathml(omml, doc);
2618+
expect(result).toBeNull();
2619+
});
2620+
2621+
it('strips & alignment markers from row content', () => {
2622+
// ECMA-376 §22.1.2.34: `&` inside m:t is an alignment marker, not literal text.
2623+
// The converter doesn't yet map these to MathML alignment elements, so they
2624+
// should be stripped rather than rendered.
2625+
const omml = {
2626+
name: 'm:oMath',
2627+
elements: [
2628+
{
2629+
name: 'm:eqArr',
2630+
elements: [
2631+
{
2632+
name: 'm:e',
2633+
elements: [
2634+
{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: 'x' }] }] },
2635+
{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: '&=' }] }] },
2636+
{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: '1' }] }] },
2637+
],
2638+
},
2639+
],
2640+
},
2641+
],
2642+
};
2643+
const result = convertOmmlToMathml(omml, doc);
2644+
const rows = result!.querySelectorAll('mtr');
2645+
expect(rows.length).toBe(1);
2646+
expect(rows[0]!.textContent).toBe('x=1');
2647+
expect(rows[0]!.textContent).not.toContain('&');
2648+
});
2649+
2650+
it('ignores m:eqArrPr properties element', () => {
2651+
const omml = {
2652+
name: 'm:oMath',
2653+
elements: [
2654+
{
2655+
name: 'm:eqArr',
2656+
elements: [
2657+
{ name: 'm:eqArrPr', elements: [{ name: 'm:ctrlPr' }] },
2658+
{
2659+
name: 'm:e',
2660+
elements: [{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: 'x' }] }] }],
2661+
},
2662+
{
2663+
name: 'm:e',
2664+
elements: [{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: 'y' }] }] }],
2665+
},
2666+
],
2667+
},
2668+
],
2669+
};
2670+
const result = convertOmmlToMathml(omml, doc);
2671+
const rows = result!.querySelectorAll('mtr');
2672+
expect(rows.length).toBe(2);
2673+
expect(rows[0]!.textContent).toBe('x');
2674+
expect(rows[1]!.textContent).toBe('y');
2675+
});
2676+
2677+
it('preserves nested math (fraction) inside rows', () => {
2678+
const omml = {
2679+
name: 'm:oMath',
2680+
elements: [
2681+
{
2682+
name: 'm:eqArr',
2683+
elements: [
2684+
{
2685+
name: 'm:e',
2686+
elements: [
2687+
{
2688+
name: 'm:f',
2689+
elements: [
2690+
{
2691+
name: 'm:num',
2692+
elements: [{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: 'a' }] }] }],
2693+
},
2694+
{
2695+
name: 'm:den',
2696+
elements: [{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: 'b' }] }] }],
2697+
},
2698+
],
2699+
},
2700+
],
2701+
},
2702+
],
2703+
},
2704+
],
2705+
};
2706+
const result = convertOmmlToMathml(omml, doc);
2707+
const mfrac = result!.querySelector('mtable mtr mtd mfrac');
2708+
expect(mfrac).not.toBeNull();
2709+
});
2710+
});

packages/layout-engine/painters/dom/src/features/math/omml-to-mathml.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import {
2121
convertSubSuperscript,
2222
convertAccent,
2323
convertPreSubSuperscript,
24+
convertEquationArray,
2425
convertRadical,
2526
convertLowerLimit,
2627
convertUpperLimit,
@@ -47,6 +48,7 @@ const MATH_OBJECT_REGISTRY: Record<string, MathObjectConverter | null> = {
4748
'm:acc': convertAccent, // Accent (diacritical mark above base)
4849
'm:bar': convertBar, // Bar (overbar/underbar)
4950
'm:d': convertDelimiter, // Delimiter (parentheses, brackets, braces)
51+
'm:eqArr': convertEquationArray, // Equation array (vertical array of equations)
5052
'm:f': convertFraction, // Fraction (numerator/denominator)
5153
'm:func': convertFunction, // Function apply (sin, cos, log, etc.)
5254
'm:limLow': convertLowerLimit, // Lower limit (e.g., lim)
@@ -60,7 +62,6 @@ const MATH_OBJECT_REGISTRY: Record<string, MathObjectConverter | null> = {
6062
// ── Not yet implemented (community contributions welcome) ────────────────
6163
'm:borderBox': null, // Border box (border around math content)
6264
'm:box': null, // Box (invisible grouping container)
63-
'm:eqArr': null, // Equation array (vertical array of equations)
6465
'm:groupChr': null, // Group character (overbrace, underbrace)
6566
'm:m': null, // Matrix (grid of elements)
6667
'm:nary': null, // N-ary operator (integral, summation, product)
11.2 KB
Binary file not shown.

tests/behavior/tests/importing/math-equations.spec.ts

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ const SPRE_DOC = path.resolve(__dirname, 'fixtures/math-spre-tests.docx');
99
const DELIMITER_DOC = path.resolve(__dirname, 'fixtures/math-delimiter-tests.docx');
1010
const RADICAL_DOC = path.resolve(__dirname, 'fixtures/math-radical-tests.docx');
1111
const LIMIT_DOC = path.resolve(__dirname, 'fixtures/math-limit-tests.docx');
12+
const EQARR_DOC = path.resolve(__dirname, 'fixtures/math-eqarr-tests.docx');
1213
// Single-object test docs are used for focused verification by community contributors.
1314
// The all-objects doc is used for behavior tests since it exercises the full pipeline.
1415

@@ -775,3 +776,109 @@ test.describe('m:limLow / m:limUpp (limit object) rendering', () => {
775776
expect(leaked).toEqual([]);
776777
});
777778
});
779+
780+
test.describe('m:eqArr (equation array) rendering', () => {
781+
// Fixture (math-eqarr-tests.docx) contains 5 Word-native equation arrays:
782+
// 1. Basic 2-row — x=1 / y=2
783+
// 2. Row with nested fraction — a/b=c / x=y
784+
// 3. Row with subscript — x_1=a / y=b
785+
// 4. Alignment markers (&) — x&=1 / yy&=22 (ampersands must be stripped)
786+
// 5. With m:eqArrPr properties — x=1 / y=2 (Pr element must be filtered)
787+
788+
test('renders all 5 equation arrays as <mtable columnalign="left">', async ({ superdoc }) => {
789+
await superdoc.loadDocument(EQARR_DOC);
790+
await superdoc.waitForStable();
791+
792+
const data = await superdoc.page.evaluate(() => {
793+
const mtables = Array.from(document.querySelectorAll('mtable'));
794+
return mtables.map((t) => ({
795+
columnalign: t.getAttribute('columnalign'),
796+
mtrCount: t.querySelectorAll(':scope > mtr').length,
797+
}));
798+
});
799+
800+
expect(data.length).toBe(5);
801+
for (const t of data) {
802+
expect(t.columnalign).toBe('left');
803+
expect(t.mtrCount).toBe(2);
804+
}
805+
});
806+
807+
test('preserves nested <mfrac> inside an equation array row (case 2)', async ({ superdoc }) => {
808+
await superdoc.loadDocument(EQARR_DOC);
809+
await superdoc.waitForStable();
810+
811+
const hasFracInRow = await superdoc.page.evaluate(() => {
812+
const mtables = Array.from(document.querySelectorAll('mtable'));
813+
for (const t of mtables) {
814+
const frac = t.querySelector(':scope > mtr > mtd mfrac');
815+
if (
816+
frac &&
817+
frac.children.length === 2 &&
818+
frac.children[0]?.textContent === 'a' &&
819+
frac.children[1]?.textContent === 'b'
820+
) {
821+
return true;
822+
}
823+
}
824+
return false;
825+
});
826+
827+
expect(hasFracInRow).toBe(true);
828+
});
829+
830+
test('preserves nested <msub> inside an equation array row (case 3)', async ({ superdoc }) => {
831+
await superdoc.loadDocument(EQARR_DOC);
832+
await superdoc.waitForStable();
833+
834+
const hasSubInRow = await superdoc.page.evaluate(() => {
835+
const mtables = Array.from(document.querySelectorAll('mtable'));
836+
return mtables.some((t) => t.querySelector(':scope > mtr > mtd msub') !== null);
837+
});
838+
839+
expect(hasSubInRow).toBe(true);
840+
});
841+
842+
test('strips & alignment markers from row content (case 4)', async ({ superdoc }) => {
843+
await superdoc.loadDocument(EQARR_DOC);
844+
await superdoc.waitForStable();
845+
846+
// ECMA-376 §22.1.2.34: `&` inside m:t is an alignment marker, not literal text.
847+
// The converter does not yet map these to MathML alignment groups, so they
848+
// should be stripped rather than rendered as literal ampersands.
849+
const alignmentData = await superdoc.page.evaluate(() => {
850+
const mtables = Array.from(document.querySelectorAll('mtable'));
851+
const texts = mtables.flatMap((t) =>
852+
Array.from(t.querySelectorAll(':scope > mtr > mtd')).map((td) => td.textContent ?? ''),
853+
);
854+
return {
855+
anyContainsAmpersand: texts.some((s) => s.includes('&')),
856+
hasStrippedRow: texts.some((s) => s === 'yy=22'),
857+
};
858+
});
859+
860+
expect(alignmentData.anyContainsAmpersand).toBe(false);
861+
expect(alignmentData.hasStrippedRow).toBe(true);
862+
});
863+
864+
test('m:eqArrPr property element is filtered out (case 5)', async ({ superdoc }) => {
865+
await superdoc.loadDocument(EQARR_DOC);
866+
await superdoc.waitForStable();
867+
868+
// Word emits m:eqArrPr wrapping m:baseJc / m:maxDist / m:rSp / m:ctrlPr etc.
869+
// These must be stripped by the converter — they should never appear as DOM
870+
// elements named "eqarrpr" / "basejc" / "maxdist" / "ctrlpr".
871+
const leaked = await superdoc.page.evaluate(() => {
872+
const leaks: string[] = [];
873+
for (const el of document.querySelectorAll('math *')) {
874+
const name = el.localName.toLowerCase();
875+
if (['eqarrpr', 'basejc', 'maxdist', 'objdist', 'rsp', 'rsprule', 'ctrlpr'].includes(name)) {
876+
leaks.push(name);
877+
}
878+
}
879+
return leaks;
880+
});
881+
882+
expect(leaked).toEqual([]);
883+
});
884+
});

0 commit comments

Comments
 (0)