diff --git a/spreadsheet-formula-provenance-guard/README.md b/spreadsheet-formula-provenance-guard/README.md
new file mode 100644
index 00000000..466ada07
--- /dev/null
+++ b/spreadsheet-formula-provenance-guard/README.md
@@ -0,0 +1,17 @@
+# Spreadsheet Formula Provenance Guard
+
+Self-contained SCIBASE Scientific/Engineering Data & Code Hosting slice for issue #14. The guard checks whether uploaded spreadsheet datasets are reproducible and safe to publish before metadata-aware previews, normalized exports, or DOI-ready research packets are generated.
+
+## Why this slice is distinct
+
+Existing #14 submissions cover broad FAIR manifests, artifact package integrity, preview cache/version drift, raw-instrument previews, notebook previews, retention/tombstones, model-card lineage, license metadata, sensitive redaction, schema evolution, data dictionaries, persistent identifiers, SBOM/advisory checks, upload checkpoints, replica consistency, column sensitivity, malware/archive quarantine, executable sandbox egress policy, and supplementary media accessibility previews. This module focuses only on spreadsheet formula provenance: formula cells, volatile functions, stale calculated values, hidden sheets, external workbook links, macro-enabled files, normalized export checksums, and reviewer-ready transformation evidence.
+
+## Run
+
+```bash
+npm test
+npm run demo
+npm run demo:video
+```
+
+Demo artifacts are written to `reports/`, including JSON, Markdown, SVG, GIF, and MP4 files.
diff --git a/spreadsheet-formula-provenance-guard/demo.js b/spreadsheet-formula-provenance-guard/demo.js
new file mode 100644
index 00000000..b0d166b3
--- /dev/null
+++ b/spreadsheet-formula-provenance-guard/demo.js
@@ -0,0 +1,61 @@
+const fs = require("fs");
+const path = require("path");
+
+const { assessSpreadsheetProvenance } = require("./index");
+const { cleanSpreadsheet, riskySpreadsheet } = require("./sample-data");
+
+const reportsDir = path.join(__dirname, "reports");
+fs.mkdirSync(reportsDir, { recursive: true });
+
+function markdownReport(name, report) {
+ const findings = report.findings.length
+ ? report.findings
+ .map((item) => `- ${item.severity.toUpperCase()} ${item.code}: ${item.message}`)
+ .join("\n")
+ : "- No spreadsheet provenance findings.";
+ return `# Spreadsheet Formula Provenance Guard
+
+Scenario: ${name}
+
+Dataset: ${report.datasetId}
+File: ${report.filename}
+Decision: ${report.decision.toUpperCase()}
+
+Reviewed ${report.summary.sheetsReviewed} sheets, ${report.summary.formulaCellsReviewed} formula cells, and ${report.summary.exportsReviewed} exports.
+
+## Findings
+
+${findings}
+
+## Release Criteria
+
+${report.releaseCriteria.map((item) => `- ${item}`).join("\n")}
+`;
+}
+
+function svgReport(report) {
+ const color = report.decision === "hold" ? "#b91c1c" : report.decision === "revise" ? "#c2410c" : "#15803d";
+ return ``;
+}
+
+for (const [name, spreadsheet] of [
+ ["clean-spreadsheet", cleanSpreadsheet],
+ ["risky-spreadsheet", riskySpreadsheet],
+]) {
+ const report = assessSpreadsheetProvenance(spreadsheet);
+ fs.writeFileSync(path.join(reportsDir, `${name}.json`), JSON.stringify(report, null, 2));
+ fs.writeFileSync(path.join(reportsDir, `${name}.md`), markdownReport(name, report));
+ fs.writeFileSync(path.join(reportsDir, `${name}.svg`), svgReport(report));
+ console.log(`${name}: ${report.decision} (${report.summary.findings} findings)`);
+}
diff --git a/spreadsheet-formula-provenance-guard/demo_video.py b/spreadsheet-formula-provenance-guard/demo_video.py
new file mode 100644
index 00000000..9b525849
--- /dev/null
+++ b/spreadsheet-formula-provenance-guard/demo_video.py
@@ -0,0 +1,46 @@
+from pathlib import Path
+
+import imageio.v3 as iio
+import numpy as np
+from PIL import Image, ImageDraw, ImageFont
+
+
+ROOT = Path(__file__).resolve().parent
+REPORTS = ROOT / "reports"
+REPORTS.mkdir(exist_ok=True)
+
+
+def font(size):
+ for name in ("arial.ttf", "segoeui.ttf"):
+ try:
+ return ImageFont.truetype(name, size)
+ except OSError:
+ pass
+ return ImageFont.load_default()
+
+
+slides = [
+ ("Spreadsheet Formula Guard", "Scientific/Engineering Data & Code Hosting #14"),
+ ("Checks", "formula cells, volatile functions, stale cached values"),
+ ("Checks", "external workbook links, hidden sheets, macro-enabled files"),
+ ("Decision", "hold publication until spreadsheet datasets are reproducible"),
+]
+
+frames = []
+for index, (title, subtitle) in enumerate(slides, start=1):
+ image = Image.new("RGB", (960, 544), "#10201b")
+ draw = ImageDraw.Draw(image)
+ draw.rectangle((44, 52, 916, 492), outline="#34d399", width=3)
+ draw.text((80, 124), title, fill="#f8fafc", font=font(40))
+ draw.text((80, 206), subtitle, fill="#d1fae5", font=font(25))
+ draw.rectangle((80, 326, 818, 382), fill="#065f46")
+ draw.text((104, 342), "spreadsheet previews must be reproducible outside Excel", fill="#ecfdf5", font=font(22))
+ draw.text((80, 438), f"Slide {index}/4 - synthetic reviewer artifact", fill="#cbd5e1", font=font(20))
+ frames.extend([image] * 14)
+
+gif_path = REPORTS / "demo.gif"
+mp4_path = REPORTS / "demo.mp4"
+frames[0].save(gif_path, save_all=True, append_images=frames[1:], duration=120, loop=0)
+iio.imwrite(mp4_path, [np.asarray(frame) for frame in frames], fps=8, codec="libx264")
+print(f"wrote {gif_path}")
+print(f"wrote {mp4_path}")
diff --git a/spreadsheet-formula-provenance-guard/index.js b/spreadsheet-formula-provenance-guard/index.js
new file mode 100644
index 00000000..7d103f02
--- /dev/null
+++ b/spreadsheet-formula-provenance-guard/index.js
@@ -0,0 +1,246 @@
+const HIGH = "high";
+const MEDIUM = "medium";
+const LOW = "low";
+
+const VOLATILE_FUNCTIONS = ["NOW", "TODAY", "RAND", "RANDBETWEEN", "OFFSET", "INDIRECT"];
+
+function requiredString(value, field) {
+ if (typeof value !== "string" || value.trim() === "") {
+ throw new TypeError(`${field} must be a non-empty string`);
+ }
+ return value.trim();
+}
+
+function array(value, field) {
+ if (!Array.isArray(value)) {
+ throw new TypeError(`${field} must be an array`);
+ }
+ return value;
+}
+
+function unique(values) {
+ return [...new Set(values.map(String))];
+}
+
+function isSha256(value) {
+ return typeof value === "string" && /^[a-f0-9]{64}$/.test(value);
+}
+
+function normalizeCell(raw, sheetName, index) {
+ return {
+ address: requiredString(raw.address, `${sheetName}.cells[${index}].address`),
+ formula: raw.formula ? String(raw.formula) : "",
+ cachedValue: raw.cachedValue === undefined ? null : raw.cachedValue,
+ recalculatedAt: raw.recalculatedAt ? String(raw.recalculatedAt) : "",
+ sourceRefs: unique(raw.sourceRefs || []),
+ };
+}
+
+function normalizeSheet(raw, index) {
+ const name = requiredString(raw.name, `sheets[${index}].name`);
+ return {
+ name,
+ hidden: Boolean(raw.hidden),
+ purpose: raw.purpose ? String(raw.purpose) : "",
+ cells: array(raw.cells || [], `${name}.cells`).map((cell, cellIndex) => normalizeCell(cell, name, cellIndex)),
+ };
+}
+
+function normalizeSpreadsheet(raw) {
+ return {
+ datasetId: requiredString(raw.datasetId, "datasetId"),
+ filename: requiredString(raw.filename, "filename"),
+ uploadedAt: requiredString(raw.uploadedAt, "uploadedAt"),
+ sha256: requiredString(raw.sha256, "sha256").toLowerCase(),
+ macroEnabled: Boolean(raw.macroEnabled),
+ externalLinks: unique(raw.externalLinks || []),
+ exports: array(raw.exports || [], "exports").map((item, index) => ({
+ format: requiredString(item.format, `exports[${index}].format`),
+ path: requiredString(item.path, `exports[${index}].path`),
+ sha256: requiredString(item.sha256, `exports[${index}].sha256`).toLowerCase(),
+ })),
+ sheets: array(raw.sheets || [], "sheets").map(normalizeSheet),
+ };
+}
+
+function finding(code, severity, sourceId, message, remediation) {
+ return { code, severity, sourceId, message, remediation };
+}
+
+function formulaFunctions(formula) {
+ const matches = String(formula).toUpperCase().match(/[A-Z][A-Z0-9_.]*\s*\(/g) || [];
+ return unique(matches.map((item) => item.replace(/\s*\($/, "")));
+}
+
+function hasExternalReference(formula) {
+ return /\[[^\]]+\]|\bhttps?:\/\//i.test(String(formula));
+}
+
+function assessSpreadsheetProvenance(rawSpreadsheet) {
+ const spreadsheet = normalizeSpreadsheet(rawSpreadsheet);
+ const findings = [];
+ const formulaCells = spreadsheet.sheets.flatMap((sheet) =>
+ sheet.cells
+ .filter((cell) => cell.formula)
+ .map((cell) => ({ ...cell, sheet: sheet.name, sourceId: `${sheet.name}!${cell.address}` }))
+ );
+
+ if (!isSha256(spreadsheet.sha256)) {
+ findings.push(
+ finding(
+ "INVALID_SPREADSHEET_DIGEST",
+ HIGH,
+ spreadsheet.filename,
+ `${spreadsheet.filename} does not have a valid SHA-256 upload digest.`,
+ "Record the original spreadsheet hash before accepting it as a citable dataset artifact."
+ )
+ );
+ }
+
+ if (spreadsheet.macroEnabled) {
+ findings.push(
+ finding(
+ "MACRO_ENABLED_DATASET",
+ HIGH,
+ spreadsheet.filename,
+ `${spreadsheet.filename} is macro-enabled and cannot be treated as inert tabular data.`,
+ "Quarantine macro-enabled files or publish a macro-free normalized export with reviewed transformation notes."
+ )
+ );
+ }
+
+ if (spreadsheet.externalLinks.length > 0) {
+ findings.push(
+ finding(
+ "EXTERNAL_WORKBOOK_LINKS",
+ HIGH,
+ spreadsheet.filename,
+ `${spreadsheet.filename} depends on external workbook links: ${spreadsheet.externalLinks.join(", ")}.`,
+ "Bundle source workbooks or replace formulas with reproducible local references before publication."
+ )
+ );
+ }
+
+ for (const sheet of spreadsheet.sheets) {
+ if (sheet.hidden && sheet.cells.length > 0) {
+ findings.push(
+ finding(
+ "HIDDEN_SHEET_WITH_DATA",
+ MEDIUM,
+ sheet.name,
+ `${sheet.name} is hidden but contains data or formulas.`,
+ "Expose the sheet, document its purpose, or exclude it from the release with an audit note."
+ )
+ );
+ }
+ }
+
+ for (const cell of formulaCells) {
+ const functions = formulaFunctions(cell.formula);
+ const volatile = functions.filter((name) => VOLATILE_FUNCTIONS.includes(name));
+ if (volatile.length > 0) {
+ findings.push(
+ finding(
+ "VOLATILE_FORMULA",
+ HIGH,
+ cell.sourceId,
+ `${cell.sourceId} uses volatile functions: ${volatile.join(", ")}.`,
+ "Replace volatile formulas with fixed values or a scripted, versioned transformation."
+ )
+ );
+ }
+
+ if (hasExternalReference(cell.formula)) {
+ findings.push(
+ finding(
+ "FORMULA_EXTERNAL_REFERENCE",
+ HIGH,
+ cell.sourceId,
+ `${cell.sourceId} references an external workbook or URL.`,
+ "Bundle the dependency and record its checksum, or normalize the formula into a local reproducible dataset."
+ )
+ );
+ }
+
+ if (cell.cachedValue === null || cell.recalculatedAt === "") {
+ findings.push(
+ finding(
+ "STALE_OR_MISSING_CALCULATION_EVIDENCE",
+ MEDIUM,
+ cell.sourceId,
+ `${cell.sourceId} lacks cached value or recalculation timestamp evidence.`,
+ "Recalculate the workbook in a controlled environment and record cached values before preview/export."
+ )
+ );
+ }
+
+ if (cell.sourceRefs.length === 0) {
+ findings.push(
+ finding(
+ "FORMULA_WITHOUT_PROVENANCE",
+ MEDIUM,
+ cell.sourceId,
+ `${cell.sourceId} has no provenance references for its inputs.`,
+ "Attach source ranges, instruments, upstream datasets, or transformation tickets for formula-derived values."
+ )
+ );
+ }
+ }
+
+ const exportFormats = new Set(spreadsheet.exports.map((item) => item.format));
+ if (!exportFormats.has("csv") && !exportFormats.has("parquet")) {
+ findings.push(
+ finding(
+ "MISSING_NORMALIZED_EXPORT",
+ HIGH,
+ "exports",
+ "Spreadsheet dataset has no CSV or parquet normalized export.",
+ "Publish a normalized tabular export so reviewers can diff and re-use the dataset outside spreadsheet software."
+ )
+ );
+ }
+
+ for (const exported of spreadsheet.exports) {
+ if (!isSha256(exported.sha256)) {
+ findings.push(
+ finding(
+ "INVALID_EXPORT_DIGEST",
+ HIGH,
+ exported.path,
+ `${exported.path} does not have a valid SHA-256 digest.`,
+ "Record a digest for every normalized export before publication."
+ )
+ );
+ }
+ }
+
+ const high = findings.filter((item) => item.severity === HIGH).length;
+ const medium = findings.filter((item) => item.severity === MEDIUM).length;
+ return {
+ datasetId: spreadsheet.datasetId,
+ filename: spreadsheet.filename,
+ decision: high > 0 ? "hold" : medium > 0 ? "revise" : "release",
+ summary: {
+ sheetsReviewed: spreadsheet.sheets.length,
+ formulaCellsReviewed: formulaCells.length,
+ exportsReviewed: spreadsheet.exports.length,
+ findings: findings.length,
+ high,
+ medium,
+ low: findings.filter((item) => item.severity === LOW).length,
+ },
+ findings,
+ releaseCriteria: [
+ "Original spreadsheet and normalized exports have stable SHA-256 digests.",
+ "Formula-derived cells avoid volatile functions and external workbook references.",
+ "Formula cells carry cached value, recalculation timestamp, and provenance evidence.",
+ "Hidden sheets, macros, and external links are disclosed or blocked before publication.",
+ "At least one normalized CSV or parquet export is available for reproducible reuse.",
+ ],
+ };
+}
+
+module.exports = {
+ assessSpreadsheetProvenance,
+ normalizeSpreadsheet,
+};
diff --git a/spreadsheet-formula-provenance-guard/package.json b/spreadsheet-formula-provenance-guard/package.json
new file mode 100644
index 00000000..dfbb7ebf
--- /dev/null
+++ b/spreadsheet-formula-provenance-guard/package.json
@@ -0,0 +1,13 @@
+{
+ "name": "spreadsheet-formula-provenance-guard",
+ "version": "1.0.0",
+ "description": "Spreadsheet formula provenance guard for SCIBASE scientific data hosting",
+ "main": "index.js",
+ "type": "commonjs",
+ "scripts": {
+ "test": "node test.js",
+ "demo": "node demo.js",
+ "demo:video": "python demo_video.py"
+ },
+ "license": "MIT"
+}
diff --git a/spreadsheet-formula-provenance-guard/reports/clean-spreadsheet.json b/spreadsheet-formula-provenance-guard/reports/clean-spreadsheet.json
new file mode 100644
index 00000000..ab52a18e
--- /dev/null
+++ b/spreadsheet-formula-provenance-guard/reports/clean-spreadsheet.json
@@ -0,0 +1,22 @@
+{
+ "datasetId": "dataset-climate-yield-001",
+ "filename": "climate-yield-model.xlsx",
+ "decision": "release",
+ "summary": {
+ "sheetsReviewed": 2,
+ "formulaCellsReviewed": 1,
+ "exportsReviewed": 2,
+ "findings": 0,
+ "high": 0,
+ "medium": 0,
+ "low": 0
+ },
+ "findings": [],
+ "releaseCriteria": [
+ "Original spreadsheet and normalized exports have stable SHA-256 digests.",
+ "Formula-derived cells avoid volatile functions and external workbook references.",
+ "Formula cells carry cached value, recalculation timestamp, and provenance evidence.",
+ "Hidden sheets, macros, and external links are disclosed or blocked before publication.",
+ "At least one normalized CSV or parquet export is available for reproducible reuse."
+ ]
+}
\ No newline at end of file
diff --git a/spreadsheet-formula-provenance-guard/reports/clean-spreadsheet.md b/spreadsheet-formula-provenance-guard/reports/clean-spreadsheet.md
new file mode 100644
index 00000000..7db3f7a1
--- /dev/null
+++ b/spreadsheet-formula-provenance-guard/reports/clean-spreadsheet.md
@@ -0,0 +1,21 @@
+# Spreadsheet Formula Provenance Guard
+
+Scenario: clean-spreadsheet
+
+Dataset: dataset-climate-yield-001
+File: climate-yield-model.xlsx
+Decision: RELEASE
+
+Reviewed 2 sheets, 1 formula cells, and 2 exports.
+
+## Findings
+
+- No spreadsheet provenance findings.
+
+## Release Criteria
+
+- Original spreadsheet and normalized exports have stable SHA-256 digests.
+- Formula-derived cells avoid volatile functions and external workbook references.
+- Formula cells carry cached value, recalculation timestamp, and provenance evidence.
+- Hidden sheets, macros, and external links are disclosed or blocked before publication.
+- At least one normalized CSV or parquet export is available for reproducible reuse.
diff --git a/spreadsheet-formula-provenance-guard/reports/clean-spreadsheet.svg b/spreadsheet-formula-provenance-guard/reports/clean-spreadsheet.svg
new file mode 100644
index 00000000..00b0b893
--- /dev/null
+++ b/spreadsheet-formula-provenance-guard/reports/clean-spreadsheet.svg
@@ -0,0 +1,12 @@
+
\ No newline at end of file
diff --git a/spreadsheet-formula-provenance-guard/reports/demo.gif b/spreadsheet-formula-provenance-guard/reports/demo.gif
new file mode 100644
index 00000000..6f3bd928
Binary files /dev/null and b/spreadsheet-formula-provenance-guard/reports/demo.gif differ
diff --git a/spreadsheet-formula-provenance-guard/reports/demo.mp4 b/spreadsheet-formula-provenance-guard/reports/demo.mp4
new file mode 100644
index 00000000..67bcab77
Binary files /dev/null and b/spreadsheet-formula-provenance-guard/reports/demo.mp4 differ
diff --git a/spreadsheet-formula-provenance-guard/reports/risky-spreadsheet.json b/spreadsheet-formula-provenance-guard/reports/risky-spreadsheet.json
new file mode 100644
index 00000000..3a202983
--- /dev/null
+++ b/spreadsheet-formula-provenance-guard/reports/risky-spreadsheet.json
@@ -0,0 +1,114 @@
+{
+ "datasetId": "dataset-fragile-xlsx-002",
+ "filename": "fragile-field-results.xlsm",
+ "decision": "hold",
+ "summary": {
+ "sheetsReviewed": 2,
+ "formulaCellsReviewed": 2,
+ "exportsReviewed": 1,
+ "findings": 13,
+ "high": 8,
+ "medium": 5,
+ "low": 0
+ },
+ "findings": [
+ {
+ "code": "INVALID_SPREADSHEET_DIGEST",
+ "severity": "high",
+ "sourceId": "fragile-field-results.xlsm",
+ "message": "fragile-field-results.xlsm does not have a valid SHA-256 upload digest.",
+ "remediation": "Record the original spreadsheet hash before accepting it as a citable dataset artifact."
+ },
+ {
+ "code": "MACRO_ENABLED_DATASET",
+ "severity": "high",
+ "sourceId": "fragile-field-results.xlsm",
+ "message": "fragile-field-results.xlsm is macro-enabled and cannot be treated as inert tabular data.",
+ "remediation": "Quarantine macro-enabled files or publish a macro-free normalized export with reviewed transformation notes."
+ },
+ {
+ "code": "EXTERNAL_WORKBOOK_LINKS",
+ "severity": "high",
+ "sourceId": "fragile-field-results.xlsm",
+ "message": "fragile-field-results.xlsm depends on external workbook links: ../lab-shared/master-calibration.xlsx.",
+ "remediation": "Bundle source workbooks or replace formulas with reproducible local references before publication."
+ },
+ {
+ "code": "HIDDEN_SHEET_WITH_DATA",
+ "severity": "medium",
+ "sourceId": "hidden-calibration",
+ "message": "hidden-calibration is hidden but contains data or formulas.",
+ "remediation": "Expose the sheet, document its purpose, or exclude it from the release with an audit note."
+ },
+ {
+ "code": "VOLATILE_FORMULA",
+ "severity": "high",
+ "sourceId": "hidden-calibration!C7",
+ "message": "hidden-calibration!C7 uses volatile functions: RAND.",
+ "remediation": "Replace volatile formulas with fixed values or a scripted, versioned transformation."
+ },
+ {
+ "code": "FORMULA_EXTERNAL_REFERENCE",
+ "severity": "high",
+ "sourceId": "hidden-calibration!C7",
+ "message": "hidden-calibration!C7 references an external workbook or URL.",
+ "remediation": "Bundle the dependency and record its checksum, or normalize the formula into a local reproducible dataset."
+ },
+ {
+ "code": "STALE_OR_MISSING_CALCULATION_EVIDENCE",
+ "severity": "medium",
+ "sourceId": "hidden-calibration!C7",
+ "message": "hidden-calibration!C7 lacks cached value or recalculation timestamp evidence.",
+ "remediation": "Recalculate the workbook in a controlled environment and record cached values before preview/export."
+ },
+ {
+ "code": "FORMULA_WITHOUT_PROVENANCE",
+ "severity": "medium",
+ "sourceId": "hidden-calibration!C7",
+ "message": "hidden-calibration!C7 has no provenance references for its inputs.",
+ "remediation": "Attach source ranges, instruments, upstream datasets, or transformation tickets for formula-derived values."
+ },
+ {
+ "code": "FORMULA_EXTERNAL_REFERENCE",
+ "severity": "high",
+ "sourceId": "hidden-calibration!C8",
+ "message": "hidden-calibration!C8 references an external workbook or URL.",
+ "remediation": "Bundle the dependency and record its checksum, or normalize the formula into a local reproducible dataset."
+ },
+ {
+ "code": "STALE_OR_MISSING_CALCULATION_EVIDENCE",
+ "severity": "medium",
+ "sourceId": "hidden-calibration!C8",
+ "message": "hidden-calibration!C8 lacks cached value or recalculation timestamp evidence.",
+ "remediation": "Recalculate the workbook in a controlled environment and record cached values before preview/export."
+ },
+ {
+ "code": "FORMULA_WITHOUT_PROVENANCE",
+ "severity": "medium",
+ "sourceId": "hidden-calibration!C8",
+ "message": "hidden-calibration!C8 has no provenance references for its inputs.",
+ "remediation": "Attach source ranges, instruments, upstream datasets, or transformation tickets for formula-derived values."
+ },
+ {
+ "code": "MISSING_NORMALIZED_EXPORT",
+ "severity": "high",
+ "sourceId": "exports",
+ "message": "Spreadsheet dataset has no CSV or parquet normalized export.",
+ "remediation": "Publish a normalized tabular export so reviewers can diff and re-use the dataset outside spreadsheet software."
+ },
+ {
+ "code": "INVALID_EXPORT_DIGEST",
+ "severity": "high",
+ "sourceId": "exports/fragile-field-results.xlsx",
+ "message": "exports/fragile-field-results.xlsx does not have a valid SHA-256 digest.",
+ "remediation": "Record a digest for every normalized export before publication."
+ }
+ ],
+ "releaseCriteria": [
+ "Original spreadsheet and normalized exports have stable SHA-256 digests.",
+ "Formula-derived cells avoid volatile functions and external workbook references.",
+ "Formula cells carry cached value, recalculation timestamp, and provenance evidence.",
+ "Hidden sheets, macros, and external links are disclosed or blocked before publication.",
+ "At least one normalized CSV or parquet export is available for reproducible reuse."
+ ]
+}
\ No newline at end of file
diff --git a/spreadsheet-formula-provenance-guard/reports/risky-spreadsheet.md b/spreadsheet-formula-provenance-guard/reports/risky-spreadsheet.md
new file mode 100644
index 00000000..9eb71ddf
--- /dev/null
+++ b/spreadsheet-formula-provenance-guard/reports/risky-spreadsheet.md
@@ -0,0 +1,33 @@
+# Spreadsheet Formula Provenance Guard
+
+Scenario: risky-spreadsheet
+
+Dataset: dataset-fragile-xlsx-002
+File: fragile-field-results.xlsm
+Decision: HOLD
+
+Reviewed 2 sheets, 2 formula cells, and 1 exports.
+
+## Findings
+
+- HIGH INVALID_SPREADSHEET_DIGEST: fragile-field-results.xlsm does not have a valid SHA-256 upload digest.
+- HIGH MACRO_ENABLED_DATASET: fragile-field-results.xlsm is macro-enabled and cannot be treated as inert tabular data.
+- HIGH EXTERNAL_WORKBOOK_LINKS: fragile-field-results.xlsm depends on external workbook links: ../lab-shared/master-calibration.xlsx.
+- MEDIUM HIDDEN_SHEET_WITH_DATA: hidden-calibration is hidden but contains data or formulas.
+- HIGH VOLATILE_FORMULA: hidden-calibration!C7 uses volatile functions: RAND.
+- HIGH FORMULA_EXTERNAL_REFERENCE: hidden-calibration!C7 references an external workbook or URL.
+- MEDIUM STALE_OR_MISSING_CALCULATION_EVIDENCE: hidden-calibration!C7 lacks cached value or recalculation timestamp evidence.
+- MEDIUM FORMULA_WITHOUT_PROVENANCE: hidden-calibration!C7 has no provenance references for its inputs.
+- HIGH FORMULA_EXTERNAL_REFERENCE: hidden-calibration!C8 references an external workbook or URL.
+- MEDIUM STALE_OR_MISSING_CALCULATION_EVIDENCE: hidden-calibration!C8 lacks cached value or recalculation timestamp evidence.
+- MEDIUM FORMULA_WITHOUT_PROVENANCE: hidden-calibration!C8 has no provenance references for its inputs.
+- HIGH MISSING_NORMALIZED_EXPORT: Spreadsheet dataset has no CSV or parquet normalized export.
+- HIGH INVALID_EXPORT_DIGEST: exports/fragile-field-results.xlsx does not have a valid SHA-256 digest.
+
+## Release Criteria
+
+- Original spreadsheet and normalized exports have stable SHA-256 digests.
+- Formula-derived cells avoid volatile functions and external workbook references.
+- Formula cells carry cached value, recalculation timestamp, and provenance evidence.
+- Hidden sheets, macros, and external links are disclosed or blocked before publication.
+- At least one normalized CSV or parquet export is available for reproducible reuse.
diff --git a/spreadsheet-formula-provenance-guard/reports/risky-spreadsheet.svg b/spreadsheet-formula-provenance-guard/reports/risky-spreadsheet.svg
new file mode 100644
index 00000000..7a597b1b
--- /dev/null
+++ b/spreadsheet-formula-provenance-guard/reports/risky-spreadsheet.svg
@@ -0,0 +1,12 @@
+
\ No newline at end of file
diff --git a/spreadsheet-formula-provenance-guard/requirements-map.md b/spreadsheet-formula-provenance-guard/requirements-map.md
new file mode 100644
index 00000000..7450d321
--- /dev/null
+++ b/spreadsheet-formula-provenance-guard/requirements-map.md
@@ -0,0 +1,13 @@
+# Requirements Map
+
+Issue #14 asks for scientific data and code hosting with major file-type support, metadata-aware previews, upload versioning and diffing, FAIR-oriented metadata, executable/reusable artifacts, and machine-discoverable exports.
+
+This slice covers a focused spreadsheet dataset gate:
+
+- Major file types: targets `.xlsx`/`.xlsm` spreadsheet datasets before they are accepted as research artifacts.
+- Metadata-aware previews: blocks previews when formula-derived values lack recalculation evidence or provenance.
+- Upload versioning and diffing: requires stable SHA-256 digests for the original workbook and normalized exports.
+- Reusable data: requires CSV or parquet normalized exports so reviewers can inspect data without spreadsheet software.
+- FAIR and provenance: requires formula source references for derived values and flags hidden sheets, volatile functions, macros, and external workbook links.
+
+Out of scope by design: broad repository manifests, executable sandboxes, model cards, data dictionaries, malware/archive scans, access embargoes, and accessibility previews, because those are already covered by separate same-issue slices.
diff --git a/spreadsheet-formula-provenance-guard/sample-data.js b/spreadsheet-formula-provenance-guard/sample-data.js
new file mode 100644
index 00000000..7fa860ab
--- /dev/null
+++ b/spreadsheet-formula-provenance-guard/sample-data.js
@@ -0,0 +1,89 @@
+const digestA = "a".repeat(64);
+const digestB = "b".repeat(64);
+const digestC = "c".repeat(64);
+
+const cleanSpreadsheet = {
+ datasetId: "dataset-climate-yield-001",
+ filename: "climate-yield-model.xlsx",
+ uploadedAt: "2026-06-01T19:00:00Z",
+ sha256: digestA,
+ macroEnabled: false,
+ externalLinks: [],
+ exports: [
+ { format: "csv", path: "exports/climate-yield-model.csv", sha256: digestB },
+ { format: "parquet", path: "exports/climate-yield-model.parquet", sha256: digestC },
+ ],
+ sheets: [
+ {
+ name: "observations",
+ hidden: false,
+ purpose: "Raw normalized observations",
+ cells: [
+ { address: "A2", cachedValue: "MX-001", sourceRefs: ["instrument:station-mx-001"] },
+ { address: "B2", cachedValue: 18.2, sourceRefs: ["instrument:station-mx-001"] },
+ ],
+ },
+ {
+ name: "model",
+ hidden: false,
+ purpose: "Derived yield score",
+ cells: [
+ {
+ address: "D2",
+ formula: "=ROUND(observations!B2*1.8,2)",
+ cachedValue: 32.76,
+ recalculatedAt: "2026-06-01T19:05:00Z",
+ sourceRefs: ["observations!B2", "protocols/yield-transform-v3.md"],
+ },
+ ],
+ },
+ ],
+};
+
+const riskySpreadsheet = {
+ datasetId: "dataset-fragile-xlsx-002",
+ filename: "fragile-field-results.xlsm",
+ uploadedAt: "2026-06-01T19:00:00Z",
+ sha256: "not-a-digest",
+ macroEnabled: true,
+ externalLinks: ["../lab-shared/master-calibration.xlsx"],
+ exports: [
+ { format: "xlsx", path: "exports/fragile-field-results.xlsx", sha256: "bad-export-digest" },
+ ],
+ sheets: [
+ {
+ name: "raw",
+ hidden: false,
+ purpose: "Raw observations",
+ cells: [
+ { address: "A2", cachedValue: "BR-204", sourceRefs: [] },
+ ],
+ },
+ {
+ name: "hidden-calibration",
+ hidden: true,
+ purpose: "Undocumented calibration",
+ cells: [
+ {
+ address: "C7",
+ formula: "='[master-calibration.xlsx]Sheet1'!B4+RAND()",
+ cachedValue: null,
+ recalculatedAt: "",
+ sourceRefs: [],
+ },
+ {
+ address: "C8",
+ formula: "=WEBSERVICE(\"https://example.invalid/current-factor\")",
+ cachedValue: null,
+ recalculatedAt: "",
+ sourceRefs: [],
+ },
+ ],
+ },
+ ],
+};
+
+module.exports = {
+ cleanSpreadsheet,
+ riskySpreadsheet,
+};
diff --git a/spreadsheet-formula-provenance-guard/test.js b/spreadsheet-formula-provenance-guard/test.js
new file mode 100644
index 00000000..bb06073f
--- /dev/null
+++ b/spreadsheet-formula-provenance-guard/test.js
@@ -0,0 +1,47 @@
+const assert = require("assert");
+
+const { assessSpreadsheetProvenance, normalizeSpreadsheet } = require("./index");
+const { cleanSpreadsheet, riskySpreadsheet } = require("./sample-data");
+
+const clean = assessSpreadsheetProvenance(cleanSpreadsheet);
+assert.strictEqual(clean.decision, "release");
+assert.strictEqual(clean.summary.findings, 0);
+assert.strictEqual(clean.summary.formulaCellsReviewed, 1);
+
+const risky = assessSpreadsheetProvenance(riskySpreadsheet);
+assert.strictEqual(risky.decision, "hold");
+for (const code of [
+ "INVALID_SPREADSHEET_DIGEST",
+ "MACRO_ENABLED_DATASET",
+ "EXTERNAL_WORKBOOK_LINKS",
+ "HIDDEN_SHEET_WITH_DATA",
+ "VOLATILE_FORMULA",
+ "FORMULA_EXTERNAL_REFERENCE",
+ "STALE_OR_MISSING_CALCULATION_EVIDENCE",
+ "FORMULA_WITHOUT_PROVENANCE",
+ "MISSING_NORMALIZED_EXPORT",
+ "INVALID_EXPORT_DIGEST",
+]) {
+ assert(risky.findings.some((finding) => finding.code === code), `missing ${code}`);
+}
+
+const reviseOnly = assessSpreadsheetProvenance({
+ ...cleanSpreadsheet,
+ sheets: cleanSpreadsheet.sheets.map((sheet) =>
+ sheet.name === "model"
+ ? {
+ ...sheet,
+ cells: sheet.cells.map((cell) => ({ ...cell, recalculatedAt: "" })),
+ }
+ : sheet
+ ),
+});
+assert.strictEqual(reviseOnly.decision, "revise");
+assert(reviseOnly.findings.some((finding) => finding.code === "STALE_OR_MISSING_CALCULATION_EVIDENCE"));
+
+assert.throws(
+ () => normalizeSpreadsheet({ ...cleanSpreadsheet, datasetId: "" }),
+ /datasetId must be a non-empty string/
+);
+
+console.log("spreadsheet formula provenance guard tests passed");