diff --git a/lib/doc/pivot-table.js b/lib/doc/pivot-table.js
index 9de9688f6..50960dda8 100644
--- a/lib/doc/pivot-table.js
+++ b/lib/doc/pivot-table.js
@@ -33,10 +33,14 @@ function makePivotTable(worksheet, model) {
let {rows, columns, values, pages = []} = model;
const {metric, pageDefaults = {}} = model;
- // Generate sharedItems for ALL fields in the source, not just the ones used by this pivot table
- // This ensures Excel can properly display any field configuration
+ // Generate sharedItems only for fields used as pivot axes (rows, columns, pages).
+ // Value fields and unused fields use lightweight sharedItems (no enumeration) — Excel
+ // flags pivot caches that materialize sharedItems for unused/long-text fields with a
+ // "Repaired Records: PivotTable report from /xl/pivotCache/pivotCacheDefinition*.xml"
+ // warning on open (see protobi/exceljs#42).
const allHeaderNames = sourceSheet.getRow(1).values.slice(1);
- const cacheFields = makeCacheFields(sourceSheet, allHeaderNames);
+ const axialFieldNames = [...rows, ...columns, ...pages];
+ const cacheFields = makeCacheFields(sourceSheet, axialFieldNames, allHeaderNames);
// let {rows, columns, values, pages} use indices instead of names;
// names can then be accessed via `pivotTable.cacheFields[index].name`.
@@ -140,8 +144,10 @@ function validate(worksheet, model) {
}
}
-function makeCacheFields(worksheet, fieldNamesWithSharedItems) {
+function makeCacheFields(worksheet, fieldNamesWithSharedItems, allFieldNames /* reserved */) {
// Cache fields are used in pivot tables to reference source data.
+ // `fieldNamesWithSharedItems` = axial fields (rows/columns/pages) that need full enumeration.
+ // `allFieldNames` is reserved for future use; iteration uses worksheet.getRow(1).
//
// Example
// -------
@@ -193,12 +199,35 @@ function makeCacheFields(worksheet, fieldNamesWithSharedItems) {
return toSortedArray(uniqueValues);
};
+ // Inspect a column's body values to derive lightweight sharedItems type hints
+ // WITHOUT enumerating every unique value. Used for non-axial fields — Excel can
+ // rebuild the index on refresh from the inline values in pivotCacheRecords.
+ const inspect = columnIndex => {
+ const columnValues = worksheet.getColumn(columnIndex).values.slice(2);
+ const hints = {containsBlank: false, containsString: false, containsNumber: false};
+ for (const value of columnValues) {
+ if (value === null || value === undefined) {
+ hints.containsBlank = true;
+ } else if (typeof value === 'number') {
+ hints.containsNumber = true;
+ } else {
+ hints.containsString = true;
+ }
+ }
+ return hints;
+ };
+
// make result
const result = [];
for (const columnIndex of range(1, names.length)) {
const name = names[columnIndex];
- const sharedItems = nameToHasSharedItems[name] ? aggregate(columnIndex) : null;
- result.push({name, sharedItems});
+ if (nameToHasSharedItems[name]) {
+ // Axial field — full enumeration so pivotCacheRecords can reference values by index.
+ result.push({name, sharedItems: aggregate(columnIndex)});
+ } else {
+ // Non-axial field — lightweight sharedItems with type hints only.
+ result.push({name, sharedItems: null, hints: inspect(columnIndex)});
+ }
}
return result;
}
diff --git a/lib/xlsx/xform/pivot-table/cache-field.js b/lib/xlsx/xform/pivot-table/cache-field.js
index 8dcea835e..5096beea1 100644
--- a/lib/xlsx/xform/pivot-table/cache-field.js
+++ b/lib/xlsx/xform/pivot-table/cache-field.js
@@ -1,22 +1,18 @@
class CacheField {
- constructor({name, sharedItems}) {
- // string type
+ constructor({name, sharedItems, hints}) {
+ // Axial field (used as pivot row/column/page):
+ // { name: 'A', sharedItems: ['a1', 'a2', 'a3'] }
//
- // {
- // 'name': 'A',
- // 'sharedItems': ['a1', 'a2', 'a3']
- // }
- //
- // or
- //
- // integer type
- //
- // {
- // 'name': 'D',
- // 'sharedItems': null
- // }
+ // Non-axial field (value field or unused) — sharedItems is null and `hints`
+ // describes the column's value types so we emit a lightweight
+ // without enumerating every unique value.
+ // This avoids triggering Excel's "Repaired Records: PivotTable report"
+ // warning on pivot caches with high-cardinality or long-text non-axial
+ // fields (see protobi/exceljs#42).
+ // { name: 'D', sharedItems: null, hints: { containsBlank, containsString, containsNumber } }
this.name = name;
this.sharedItems = sharedItems;
+ this.hints = hints || null;
}
// Helper function to escape XML special characters
@@ -34,11 +30,28 @@ class CacheField {
// PivotCache Field: http://www.datypic.com/sc/ooxml/e-ssml_cacheField-1.html
// Shared Items: http://www.datypic.com/sc/ooxml/e-ssml_sharedItems-1.html
- // integer types
+ // Non-axial field — emit lightweight based on type hints.
+ // pivotCacheRecords inlines values for these fields (no indices).
if (this.sharedItems === null) {
- // TK(2023-07-18): left out attributes... minValue="5" maxValue="45"
+ const hints = this.hints || {};
+ const attrs = [];
+ if (hints.containsNumber && hints.containsString) {
+ attrs.push('containsSemiMixedTypes="1"');
+ attrs.push('containsNumber="1"');
+ attrs.push('containsString="1"');
+ } else if (hints.containsNumber) {
+ attrs.push('containsSemiMixedTypes="0"');
+ attrs.push('containsString="0"');
+ attrs.push('containsNumber="1"');
+ } else if (hints.containsString) {
+ attrs.push('containsSemiMixedTypes="0"');
+ attrs.push('containsNumber="0"');
+ attrs.push('containsString="1"');
+ }
+ if (hints.containsBlank) attrs.push('containsBlank="1"');
+ const attrStr = attrs.length ? ` ${attrs.join(' ')}` : '';
return `
-
+
`;
}
diff --git a/lib/xlsx/xform/pivot-table/pivot-cache-definition-xform.js b/lib/xlsx/xform/pivot-table/pivot-cache-definition-xform.js
index 18f4ef379..d7b46a89e 100644
--- a/lib/xlsx/xform/pivot-table/pivot-cache-definition-xform.js
+++ b/lib/xlsx/xform/pivot-table/pivot-cache-definition-xform.js
@@ -21,6 +21,13 @@ class PivotCacheDefinitionXform extends BaseXform {
render(xmlStream, model) {
const {sourceSheet, cacheFields} = model;
+ // recordCount must match the count attribute of — the
+ // number of data rows in the source sheet (header excluded). Excel uses
+ // this to validate the cache; a mismatch triggers the
+ // "Repaired Records: PivotTable report from /xl/pivotCache/
+ // pivotCacheDefinition*.xml" warning (see protobi/exceljs#42).
+ const recordCount = Math.max(0, sourceSheet.getSheetValues().length - 2);
+
xmlStream.openXml(XmlStream.StdDocAttributes);
xmlStream.openNode(this.tag, {
...PivotCacheDefinitionXform.PIVOT_CACHE_DEFINITION_ATTRIBUTES,
@@ -31,7 +38,7 @@ class PivotCacheDefinitionXform extends BaseXform {
createdVersion: '8',
refreshedVersion: '8',
minRefreshableVersion: '3',
- recordCount: cacheFields.length + 1,
+ recordCount,
});
xmlStream.openNode('cacheSource', {type: 'worksheet'});
diff --git a/spec/integration/issues/issue-42-pivot-cache-repair.spec.js b/spec/integration/issues/issue-42-pivot-cache-repair.spec.js
new file mode 100644
index 000000000..c8936548c
--- /dev/null
+++ b/spec/integration/issues/issue-42-pivot-cache-repair.spec.js
@@ -0,0 +1,121 @@
+// Regression test for protobi/exceljs#42:
+// "[BUG] Pivot cache triggers Excel repair"
+//
+// When a workbook contains a pivot table whose source sheet has fields that
+// are NOT used as pivot axes (rows/columns/pages) and contain high-cardinality
+// or long-text values, Excel opens the file with:
+//
+// "Repaired Records: PivotTable report from /xl/pivotCache/
+// pivotCacheDefinition1.xml part (PivotTable cache)"
+//
+// Two bugs caused this:
+// 1. `recordCount` on pivotCacheDefinition was set to `cacheFields.length + 1`
+// instead of the number of data rows. Excel validates this matches the
+// `` attribute.
+// 2. `makeCacheFields` enumerated sharedItems for ALL source fields, including
+// non-axial value/unused fields. Excel flags full sharedItems on non-axial
+// fields as a repair condition for high-cardinality or long-text columns.
+//
+// Fix: only enumerate sharedItems for axial fields (rows/columns/pages).
+// Non-axial fields get a lightweight with type hints.
+
+const fs = require('fs');
+const {promisify} = require('util');
+const fsReadFileAsync = promisify(fs.readFile);
+const JSZip = require('jszip');
+
+const ExcelJS = verquire('exceljs');
+
+const TEST_XLSX_FILEPATH = './spec/out/wb.issue-42.xlsx';
+
+function findCacheField(xml, name) {
+ const re = new RegExp(`]*>[\\s\\S]*?`);
+ return xml.match(re);
+}
+
+describe('Workbook', () => {
+ describe('Pivot Tables (issue #42 — Excel repair on cache definition)', () => {
+ let cacheDefXml;
+ let cacheRecordsXml;
+ const ROW_COUNT = 20;
+
+ before(async () => {
+ const workbook = new ExcelJS.Workbook();
+ const dataSheet = workbook.addWorksheet('Data');
+ dataSheet.columns = [
+ {header: 'Repo', key: 'repo', width: 20},
+ {header: 'Severity', key: 'severity', width: 20},
+ // Description: long-text, high-cardinality, NOT used as a pivot axis.
+ {header: 'Description', key: 'description', width: 80},
+ {header: 'Counter', key: 'count', width: 10},
+ ];
+
+ for (let i = 0; i < ROW_COUNT; i++) {
+ dataSheet.addRow({
+ repo: `repo-${i % 5}`,
+ severity: ['HIGH', 'LOW'][i % 2],
+ description: `long-description-${i}-${'x'.repeat(100)}`,
+ count: 1,
+ });
+ }
+
+ const pivotSheet = workbook.addWorksheet('Summary');
+ pivotSheet.addPivotTable({
+ sourceSheet: dataSheet,
+ rows: ['Repo'],
+ columns: ['Severity'],
+ values: ['Counter'],
+ metric: 'sum',
+ });
+
+ await workbook.xlsx.writeFile(TEST_XLSX_FILEPATH);
+ const buffer = await fsReadFileAsync(TEST_XLSX_FILEPATH);
+ const zip = await JSZip.loadAsync(buffer);
+ cacheDefXml = await zip.file('xl/pivotCache/pivotCacheDefinition1.xml').async('string');
+ cacheRecordsXml = await zip.file('xl/pivotCache/pivotCacheRecords1.xml').async('string');
+ });
+
+ it('emits a pivot cache definition', () => {
+ expect(cacheDefXml).to.be.a('string');
+ expect(cacheDefXml).to.include(' {
+ // Was previously `cacheFields.length + 1` (= 5 for this fixture);
+ // Excel expects it to match which is ROW_COUNT.
+ const match = cacheDefXml.match(/recordCount="(\d+)"/);
+ expect(match).to.not.be.null();
+ expect(Number(match[1])).to.equal(ROW_COUNT);
+
+ const recordsCountMatch = cacheRecordsXml.match(/]*count="(\d+)"/);
+ expect(recordsCountMatch).to.not.be.null();
+ expect(Number(recordsCountMatch[1])).to.equal(ROW_COUNT);
+ });
+
+ it('does NOT enumerate sharedItems for the non-axial Description field', () => {
+ const descMatch = findCacheField(cacheDefXml, 'Description');
+ expect(descMatch, 'Description cacheField missing').to.not.be.null();
+ const descBlock = descMatch[0];
+ // Must not contain inline children.
+ expect(descBlock).to.not.include(' {
+ const reloaded = new ExcelJS.Workbook();
+ await reloaded.xlsx.readFile(TEST_XLSX_FILEPATH);
+ expect(reloaded.worksheets.map(ws => ws.name)).to.include('Data');
+ expect(reloaded.worksheets.map(ws => ws.name)).to.include('Summary');
+ });
+ });
+});