diff --git a/lib/doc/pivot-table.js b/lib/doc/pivot-table.js index 9de9688f6..50960dda8 100644 --- a/lib/doc/pivot-table.js +++ b/lib/doc/pivot-table.js @@ -33,10 +33,14 @@ function makePivotTable(worksheet, model) { let {rows, columns, values, pages = []} = model; const {metric, pageDefaults = {}} = model; - // Generate sharedItems for ALL fields in the source, not just the ones used by this pivot table - // This ensures Excel can properly display any field configuration + // Generate sharedItems only for fields used as pivot axes (rows, columns, pages). + // Value fields and unused fields use lightweight sharedItems (no enumeration) — Excel + // flags pivot caches that materialize sharedItems for unused/long-text fields with a + // "Repaired Records: PivotTable report from /xl/pivotCache/pivotCacheDefinition*.xml" + // warning on open (see protobi/exceljs#42). const allHeaderNames = sourceSheet.getRow(1).values.slice(1); - const cacheFields = makeCacheFields(sourceSheet, allHeaderNames); + const axialFieldNames = [...rows, ...columns, ...pages]; + const cacheFields = makeCacheFields(sourceSheet, axialFieldNames, allHeaderNames); // let {rows, columns, values, pages} use indices instead of names; // names can then be accessed via `pivotTable.cacheFields[index].name`. @@ -140,8 +144,10 @@ function validate(worksheet, model) { } } -function makeCacheFields(worksheet, fieldNamesWithSharedItems) { +function makeCacheFields(worksheet, fieldNamesWithSharedItems, allFieldNames /* reserved */) { // Cache fields are used in pivot tables to reference source data. + // `fieldNamesWithSharedItems` = axial fields (rows/columns/pages) that need full enumeration. + // `allFieldNames` is reserved for future use; iteration uses worksheet.getRow(1). // // Example // ------- @@ -193,12 +199,35 @@ function makeCacheFields(worksheet, fieldNamesWithSharedItems) { return toSortedArray(uniqueValues); }; + // Inspect a column's body values to derive lightweight sharedItems type hints + // WITHOUT enumerating every unique value. Used for non-axial fields — Excel can + // rebuild the index on refresh from the inline values in pivotCacheRecords. + const inspect = columnIndex => { + const columnValues = worksheet.getColumn(columnIndex).values.slice(2); + const hints = {containsBlank: false, containsString: false, containsNumber: false}; + for (const value of columnValues) { + if (value === null || value === undefined) { + hints.containsBlank = true; + } else if (typeof value === 'number') { + hints.containsNumber = true; + } else { + hints.containsString = true; + } + } + return hints; + }; + // make result const result = []; for (const columnIndex of range(1, names.length)) { const name = names[columnIndex]; - const sharedItems = nameToHasSharedItems[name] ? aggregate(columnIndex) : null; - result.push({name, sharedItems}); + if (nameToHasSharedItems[name]) { + // Axial field — full enumeration so pivotCacheRecords can reference values by index. + result.push({name, sharedItems: aggregate(columnIndex)}); + } else { + // Non-axial field — lightweight sharedItems with type hints only. + result.push({name, sharedItems: null, hints: inspect(columnIndex)}); + } } return result; } diff --git a/lib/xlsx/xform/pivot-table/cache-field.js b/lib/xlsx/xform/pivot-table/cache-field.js index 8dcea835e..5096beea1 100644 --- a/lib/xlsx/xform/pivot-table/cache-field.js +++ b/lib/xlsx/xform/pivot-table/cache-field.js @@ -1,22 +1,18 @@ class CacheField { - constructor({name, sharedItems}) { - // string type + constructor({name, sharedItems, hints}) { + // Axial field (used as pivot row/column/page): + // { name: 'A', sharedItems: ['a1', 'a2', 'a3'] } // - // { - // 'name': 'A', - // 'sharedItems': ['a1', 'a2', 'a3'] - // } - // - // or - // - // integer type - // - // { - // 'name': 'D', - // 'sharedItems': null - // } + // Non-axial field (value field or unused) — sharedItems is null and `hints` + // describes the column's value types so we emit a lightweight + // without enumerating every unique value. + // This avoids triggering Excel's "Repaired Records: PivotTable report" + // warning on pivot caches with high-cardinality or long-text non-axial + // fields (see protobi/exceljs#42). + // { name: 'D', sharedItems: null, hints: { containsBlank, containsString, containsNumber } } this.name = name; this.sharedItems = sharedItems; + this.hints = hints || null; } // Helper function to escape XML special characters @@ -34,11 +30,28 @@ class CacheField { // PivotCache Field: http://www.datypic.com/sc/ooxml/e-ssml_cacheField-1.html // Shared Items: http://www.datypic.com/sc/ooxml/e-ssml_sharedItems-1.html - // integer types + // Non-axial field — emit lightweight based on type hints. + // pivotCacheRecords inlines values for these fields (no indices). if (this.sharedItems === null) { - // TK(2023-07-18): left out attributes... minValue="5" maxValue="45" + const hints = this.hints || {}; + const attrs = []; + if (hints.containsNumber && hints.containsString) { + attrs.push('containsSemiMixedTypes="1"'); + attrs.push('containsNumber="1"'); + attrs.push('containsString="1"'); + } else if (hints.containsNumber) { + attrs.push('containsSemiMixedTypes="0"'); + attrs.push('containsString="0"'); + attrs.push('containsNumber="1"'); + } else if (hints.containsString) { + attrs.push('containsSemiMixedTypes="0"'); + attrs.push('containsNumber="0"'); + attrs.push('containsString="1"'); + } + if (hints.containsBlank) attrs.push('containsBlank="1"'); + const attrStr = attrs.length ? ` ${attrs.join(' ')}` : ''; return ` - + `; } diff --git a/lib/xlsx/xform/pivot-table/pivot-cache-definition-xform.js b/lib/xlsx/xform/pivot-table/pivot-cache-definition-xform.js index 18f4ef379..d7b46a89e 100644 --- a/lib/xlsx/xform/pivot-table/pivot-cache-definition-xform.js +++ b/lib/xlsx/xform/pivot-table/pivot-cache-definition-xform.js @@ -21,6 +21,13 @@ class PivotCacheDefinitionXform extends BaseXform { render(xmlStream, model) { const {sourceSheet, cacheFields} = model; + // recordCount must match the count attribute of — the + // number of data rows in the source sheet (header excluded). Excel uses + // this to validate the cache; a mismatch triggers the + // "Repaired Records: PivotTable report from /xl/pivotCache/ + // pivotCacheDefinition*.xml" warning (see protobi/exceljs#42). + const recordCount = Math.max(0, sourceSheet.getSheetValues().length - 2); + xmlStream.openXml(XmlStream.StdDocAttributes); xmlStream.openNode(this.tag, { ...PivotCacheDefinitionXform.PIVOT_CACHE_DEFINITION_ATTRIBUTES, @@ -31,7 +38,7 @@ class PivotCacheDefinitionXform extends BaseXform { createdVersion: '8', refreshedVersion: '8', minRefreshableVersion: '3', - recordCount: cacheFields.length + 1, + recordCount, }); xmlStream.openNode('cacheSource', {type: 'worksheet'}); diff --git a/spec/integration/issues/issue-42-pivot-cache-repair.spec.js b/spec/integration/issues/issue-42-pivot-cache-repair.spec.js new file mode 100644 index 000000000..c8936548c --- /dev/null +++ b/spec/integration/issues/issue-42-pivot-cache-repair.spec.js @@ -0,0 +1,121 @@ +// Regression test for protobi/exceljs#42: +// "[BUG] Pivot cache triggers Excel repair" +// +// When a workbook contains a pivot table whose source sheet has fields that +// are NOT used as pivot axes (rows/columns/pages) and contain high-cardinality +// or long-text values, Excel opens the file with: +// +// "Repaired Records: PivotTable report from /xl/pivotCache/ +// pivotCacheDefinition1.xml part (PivotTable cache)" +// +// Two bugs caused this: +// 1. `recordCount` on pivotCacheDefinition was set to `cacheFields.length + 1` +// instead of the number of data rows. Excel validates this matches the +// `` attribute. +// 2. `makeCacheFields` enumerated sharedItems for ALL source fields, including +// non-axial value/unused fields. Excel flags full sharedItems on non-axial +// fields as a repair condition for high-cardinality or long-text columns. +// +// Fix: only enumerate sharedItems for axial fields (rows/columns/pages). +// Non-axial fields get a lightweight with type hints. + +const fs = require('fs'); +const {promisify} = require('util'); +const fsReadFileAsync = promisify(fs.readFile); +const JSZip = require('jszip'); + +const ExcelJS = verquire('exceljs'); + +const TEST_XLSX_FILEPATH = './spec/out/wb.issue-42.xlsx'; + +function findCacheField(xml, name) { + const re = new RegExp(`]*>[\\s\\S]*?`); + return xml.match(re); +} + +describe('Workbook', () => { + describe('Pivot Tables (issue #42 — Excel repair on cache definition)', () => { + let cacheDefXml; + let cacheRecordsXml; + const ROW_COUNT = 20; + + before(async () => { + const workbook = new ExcelJS.Workbook(); + const dataSheet = workbook.addWorksheet('Data'); + dataSheet.columns = [ + {header: 'Repo', key: 'repo', width: 20}, + {header: 'Severity', key: 'severity', width: 20}, + // Description: long-text, high-cardinality, NOT used as a pivot axis. + {header: 'Description', key: 'description', width: 80}, + {header: 'Counter', key: 'count', width: 10}, + ]; + + for (let i = 0; i < ROW_COUNT; i++) { + dataSheet.addRow({ + repo: `repo-${i % 5}`, + severity: ['HIGH', 'LOW'][i % 2], + description: `long-description-${i}-${'x'.repeat(100)}`, + count: 1, + }); + } + + const pivotSheet = workbook.addWorksheet('Summary'); + pivotSheet.addPivotTable({ + sourceSheet: dataSheet, + rows: ['Repo'], + columns: ['Severity'], + values: ['Counter'], + metric: 'sum', + }); + + await workbook.xlsx.writeFile(TEST_XLSX_FILEPATH); + const buffer = await fsReadFileAsync(TEST_XLSX_FILEPATH); + const zip = await JSZip.loadAsync(buffer); + cacheDefXml = await zip.file('xl/pivotCache/pivotCacheDefinition1.xml').async('string'); + cacheRecordsXml = await zip.file('xl/pivotCache/pivotCacheRecords1.xml').async('string'); + }); + + it('emits a pivot cache definition', () => { + expect(cacheDefXml).to.be.a('string'); + expect(cacheDefXml).to.include(' { + // Was previously `cacheFields.length + 1` (= 5 for this fixture); + // Excel expects it to match which is ROW_COUNT. + const match = cacheDefXml.match(/recordCount="(\d+)"/); + expect(match).to.not.be.null(); + expect(Number(match[1])).to.equal(ROW_COUNT); + + const recordsCountMatch = cacheRecordsXml.match(/]*count="(\d+)"/); + expect(recordsCountMatch).to.not.be.null(); + expect(Number(recordsCountMatch[1])).to.equal(ROW_COUNT); + }); + + it('does NOT enumerate sharedItems for the non-axial Description field', () => { + const descMatch = findCacheField(cacheDefXml, 'Description'); + expect(descMatch, 'Description cacheField missing').to.not.be.null(); + const descBlock = descMatch[0]; + // Must not contain inline children. + expect(descBlock).to.not.include(' { + const reloaded = new ExcelJS.Workbook(); + await reloaded.xlsx.readFile(TEST_XLSX_FILEPATH); + expect(reloaded.worksheets.map(ws => ws.name)).to.include('Data'); + expect(reloaded.worksheets.map(ws => ws.name)).to.include('Summary'); + }); + }); +});