From d668bcace7d7effd18ff59bb303ba4f99d959ffe Mon Sep 17 00:00:00 2001 From: David Pine <7679720+IEvangelist@users.noreply.github.com> Date: Tue, 30 Jun 2026 09:29:40 -0500 Subject: [PATCH 1/2] Fix OOM in aspire-version-placeholders build hook The astro:build:done hook re-walked the entire dist tree with a single recursive Promise.all, holding the contents of every .html/.md/.txt file (tens of thousands across all locales, including the large llms-full.txt assets) in memory at once. On the full production build that exhausted the default ~4 GB Node heap and crashed with 'JavaScript heap out of memory'. That broad walk was almost entirely redundant. The remarkAspireVersionPlaceholders remark plugin is already wired into markdown.remarkPlugins, so placeholders are replaced before render: .html pages are correct, and llms*.txt is sourced from rendered HTML via render(entry). The reference/**/*.md endpoints come from API/sample data, not docs content. The only generated artifact that still contains raw placeholders is the per-page .md copies emitted by starlight-page-actions, which viteStaticCopy's raw src/content/docs/** through a regex-only transform that bypasses the remark pipeline. Scope the post-build pass to .md files only and stream them through a bounded worker pool (default concurrency 16). Peak memory is now proportional to the concurrency limit, and the bulk of dist (.html plus the large .txt assets) is no longer re-read. Replacement semantics are unchanged. Co-authored-by: Copilot App <223556219+Copilot@users.noreply.github.com> --- ...spire-version-placeholders-integration.mjs | 83 ++++++++++++++----- ...aspire-version-placeholders.vitest.test.ts | 69 ++++++++++++++- 2 files changed, 130 insertions(+), 22 deletions(-) diff --git a/src/frontend/config/aspire-version-placeholders-integration.mjs b/src/frontend/config/aspire-version-placeholders-integration.mjs index f17dbb9f5..a189b6f62 100644 --- a/src/frontend/config/aspire-version-placeholders-integration.mjs +++ b/src/frontend/config/aspire-version-placeholders-integration.mjs @@ -3,7 +3,28 @@ import path from 'node:path'; import { fileURLToPath } from 'node:url'; import { replaceAspireVersionPlaceholders } from './remark-aspire-version-placeholders.mjs'; -const generatedAssetExtensions = new Set(['.html', '.md', '.txt']); +// Per-page Markdown copies emitted by `starlight-page-actions` are the only +// generated artifacts that still contain raw `%ASPIRE_VERSION%` placeholders: +// that plugin `viteStaticCopy`s `src/content/docs/**/*.{md,mdx}` straight to +// `dist/**/*.md` through a regex-only transform, so it never runs through the +// `remarkAspireVersionPlaceholders` remark plugin. +// +// Everything else is already handled before it reaches `dist`: +// - `.html` pages -> rendered via the remark pipeline (placeholders replaced +// in the mdast before expressive-code renders code blocks) +// - `llms*.txt` -> `starlight-llms-txt` sources rendered HTML (`render(entry)`) +// - `reference/**.md` -> generated from API/sample data, not docs content +// +// So this post-build pass only needs to touch `.md` files. Scoping it this way +// (instead of walking every `.html`/`.txt` in `dist`) avoids re-reading the bulk +// of the output — including the large `llms-full.txt` assets — which is what +// previously exhausted the Node heap. +const placeholderCopyExtensions = new Set(['.md']); + +// Process the Markdown copies through a small worker pool rather than a single +// recursive `Promise.all` over the whole tree, so peak memory stays proportional +// to the concurrency limit instead of the number of files held open at once. +const DEFAULT_CONCURRENCY = 16; export function aspireVersionPlaceholdersIntegration() { return { @@ -16,28 +37,52 @@ export function aspireVersionPlaceholdersIntegration() { }; } -export async function replaceAspireVersionPlaceholdersInDirectory(directory) { +export async function replaceAspireVersionPlaceholdersInDirectory( + directory, + concurrency = DEFAULT_CONCURRENCY +) { + const files = []; + await collectMarkdownCopies(directory, files); + + if (files.length === 0) { + return; + } + + const workerCount = Math.min(Math.max(1, concurrency), files.length); + let cursor = 0; + + const runWorker = async () => { + while (cursor < files.length) { + const filePath = files[cursor++]; + await replaceAspireVersionPlaceholdersInFile(filePath); + } + }; + + await Promise.all(Array.from({ length: workerCount }, runWorker)); +} + +async function collectMarkdownCopies(directory, files) { const entries = await readdir(directory, { withFileTypes: true }); - await Promise.all( - entries.map(async (entry) => { - const resolvedPath = path.join(directory, entry.name); + for (const entry of entries) { + const resolvedPath = path.join(directory, entry.name); - if (entry.isDirectory()) { - await replaceAspireVersionPlaceholdersInDirectory(resolvedPath); - return; - } + if (entry.isDirectory()) { + await collectMarkdownCopies(resolvedPath, files); + continue; + } - if (!entry.isFile() || !generatedAssetExtensions.has(path.extname(entry.name))) { - return; - } + if (entry.isFile() && placeholderCopyExtensions.has(path.extname(entry.name))) { + files.push(resolvedPath); + } + } +} - const content = await readFile(resolvedPath, 'utf8'); - const updated = replaceAspireVersionPlaceholders(content); +async function replaceAspireVersionPlaceholdersInFile(filePath) { + const content = await readFile(filePath, 'utf8'); + const updated = replaceAspireVersionPlaceholders(content); - if (updated !== content) { - await writeFile(resolvedPath, updated); - } - }) - ); + if (updated !== content) { + await writeFile(filePath, updated); + } } diff --git a/src/frontend/tests/unit/aspire-version-placeholders.vitest.test.ts b/src/frontend/tests/unit/aspire-version-placeholders.vitest.test.ts index 690379f69..f72f92b7c 100644 --- a/src/frontend/tests/unit/aspire-version-placeholders.vitest.test.ts +++ b/src/frontend/tests/unit/aspire-version-placeholders.vitest.test.ts @@ -1,4 +1,4 @@ -import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import os from 'node:os'; import path from 'node:path'; import { describe, expect, test } from 'vitest'; @@ -48,26 +48,89 @@ describe('Aspire version placeholders', () => { expect(tree.children[2].attributes[0].value).toBe(currentAspireVersion); }); - test('replaces placeholders in generated markdown assets', async () => { + test('replaces placeholders only in Markdown copies, leaving other assets untouched', async () => { const tempDir = await mkdtemp(path.join(os.tmpdir(), 'aspire-version-placeholders-')); try { const markdownPath = path.join(tempDir, 'example.md'); + const htmlPath = path.join(tempDir, 'example.html'); + const textPath = path.join(tempDir, 'example.txt'); + const mdxPath = path.join(tempDir, 'example.mdx'); const jsonPath = path.join(tempDir, 'example.json'); + const placeholderContent = 'Aspire %ASPIRE_VERSION_MAJOR_MINOR%: %ASPIRE_VERSION%'; + await Promise.all([ - writeFile(markdownPath, 'Aspire %ASPIRE_VERSION_MAJOR_MINOR%: %ASPIRE_VERSION%'), + writeFile(markdownPath, placeholderContent), + writeFile(htmlPath, placeholderContent), + writeFile(textPath, placeholderContent), + writeFile(mdxPath, placeholderContent), writeFile(jsonPath, '{"version":"%ASPIRE_VERSION%"}'), ]); await replaceAspireVersionPlaceholdersInDirectory(tempDir); + // Only the `.md` copy (which bypasses the remark pipeline) is rewritten. await expect(readFile(markdownPath, 'utf8')).resolves.toBe( `Aspire ${currentAspireMajorMinorVersion}: ${currentAspireVersion}` ); + + // `.html`/`.txt`/`.mdx` already have placeholders replaced by the remark + // pipeline, and `.json` is never a placeholder target — all are left as-is. + await expect(readFile(htmlPath, 'utf8')).resolves.toBe(placeholderContent); + await expect(readFile(textPath, 'utf8')).resolves.toBe(placeholderContent); + await expect(readFile(mdxPath, 'utf8')).resolves.toBe(placeholderContent); await expect(readFile(jsonPath, 'utf8')).resolves.toBe('{"version":"%ASPIRE_VERSION%"}'); } finally { await rm(tempDir, { recursive: true, force: true }); } }); + + test('replaces Markdown placeholders recursively under a bounded concurrency limit', async () => { + const tempDir = await mkdtemp(path.join(os.tmpdir(), 'aspire-version-placeholders-')); + + try { + // Spread more `.md` files than the worker-pool concurrency across nested + // directories so the bounded recursive traversal is exercised, alongside + // non-`.md` assets that must be left untouched. + const placeholderContent = 'Aspire %ASPIRE_VERSION_MAJOR_MINOR% is %ASPIRE_VERSION%.'; + const ignoredExtensions = ['.html', '.txt', '.mdx', '.json']; + const markdownPaths: string[] = []; + const ignoredPaths: string[] = []; + + for (let depth = 0; depth < 4; depth++) { + const dir = path.join(tempDir, ...Array.from({ length: depth }, (_, i) => `level-${i}`)); + await mkdir(dir, { recursive: true }); + + for (let index = 0; index < 5; index++) { + const markdownPath = path.join(dir, `asset-${index}.md`); + await writeFile(markdownPath, placeholderContent); + markdownPaths.push(markdownPath); + + const extension = ignoredExtensions[index % ignoredExtensions.length]; + const ignoredPath = path.join(dir, `asset-${index}${extension}`); + await writeFile(ignoredPath, placeholderContent); + ignoredPaths.push(ignoredPath); + } + } + + await replaceAspireVersionPlaceholdersInDirectory(tempDir, 2); + + await Promise.all( + markdownPaths.map(async (markdownPath) => { + await expect(readFile(markdownPath, 'utf8')).resolves.toBe( + `Aspire ${currentAspireMajorMinorVersion} is ${currentAspireVersion}.` + ); + }) + ); + + await Promise.all( + ignoredPaths.map(async (ignoredPath) => { + await expect(readFile(ignoredPath, 'utf8')).resolves.toBe(placeholderContent); + }) + ); + } finally { + await rm(tempDir, { recursive: true, force: true }); + } + }); }); From ddd042e7736345ac5cacacb51dc8a653e21817e5 Mon Sep 17 00:00:00 2001 From: David Pine <7679720+IEvangelist@users.noreply.github.com> Date: Tue, 30 Jun 2026 10:02:23 -0500 Subject: [PATCH 2/2] Address PR review: normalize concurrency and clarify test comment - Guard against a non-finite/0/negative concurrency value: normalize to a finite positive integer (falling back to the default) before computing the worker count, so a stray NaN can't collapse the pool to an empty array and silently skip every file. Adds a regression test passing NaN. - Reword the scoping test comment so it no longer implies the seeded .html/.txt/.mdx fixtures were already replaced; clarify the assertion is that this pass intentionally leaves every non-.md extension untouched. Co-authored-by: Copilot App <223556219+Copilot@users.noreply.github.com> --- ...spire-version-placeholders-integration.mjs | 5 +++- ...aspire-version-placeholders.vitest.test.ts | 26 +++++++++++++++++-- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/src/frontend/config/aspire-version-placeholders-integration.mjs b/src/frontend/config/aspire-version-placeholders-integration.mjs index a189b6f62..ae1a1dc38 100644 --- a/src/frontend/config/aspire-version-placeholders-integration.mjs +++ b/src/frontend/config/aspire-version-placeholders-integration.mjs @@ -48,7 +48,10 @@ export async function replaceAspireVersionPlaceholdersInDirectory( return; } - const workerCount = Math.min(Math.max(1, concurrency), files.length); + // Normalize to a finite positive integer so a stray NaN/0/negative value can't + // collapse the worker pool to an empty array and silently skip every file. + const limit = Number.isFinite(concurrency) ? Math.floor(concurrency) : DEFAULT_CONCURRENCY; + const workerCount = Math.min(Math.max(1, limit), files.length); let cursor = 0; const runWorker = async () => { diff --git a/src/frontend/tests/unit/aspire-version-placeholders.vitest.test.ts b/src/frontend/tests/unit/aspire-version-placeholders.vitest.test.ts index f72f92b7c..20ef2e16f 100644 --- a/src/frontend/tests/unit/aspire-version-placeholders.vitest.test.ts +++ b/src/frontend/tests/unit/aspire-version-placeholders.vitest.test.ts @@ -75,8 +75,11 @@ describe('Aspire version placeholders', () => { `Aspire ${currentAspireMajorMinorVersion}: ${currentAspireVersion}` ); - // `.html`/`.txt`/`.mdx` already have placeholders replaced by the remark - // pipeline, and `.json` is never a placeholder target — all are left as-is. + // The post-build pass intentionally rewrites only `.md` files. In the real + // build `.html`/`.txt`/`.mdx` are produced through the remark pipeline (so + // they're already replaced before this pass runs) and `.json` is never a + // placeholder target; this test seeds them with raw placeholders to assert + // that this pass leaves every non-`.md` extension untouched. await expect(readFile(htmlPath, 'utf8')).resolves.toBe(placeholderContent); await expect(readFile(textPath, 'utf8')).resolves.toBe(placeholderContent); await expect(readFile(mdxPath, 'utf8')).resolves.toBe(placeholderContent); @@ -133,4 +136,23 @@ describe('Aspire version placeholders', () => { await rm(tempDir, { recursive: true, force: true }); } }); + + test('falls back to a valid worker count when given a non-finite concurrency', async () => { + const tempDir = await mkdtemp(path.join(os.tmpdir(), 'aspire-version-placeholders-')); + + try { + const markdownPath = path.join(tempDir, 'example.md'); + await writeFile(markdownPath, 'Aspire %ASPIRE_VERSION_MAJOR_MINOR%: %ASPIRE_VERSION%'); + + // A non-finite concurrency must not collapse the worker pool to an empty + // array and silently skip every file. + await replaceAspireVersionPlaceholdersInDirectory(tempDir, Number.NaN); + + await expect(readFile(markdownPath, 'utf8')).resolves.toBe( + `Aspire ${currentAspireMajorMinorVersion}: ${currentAspireVersion}` + ); + } finally { + await rm(tempDir, { recursive: true, force: true }); + } + }); });