diff --git a/src/frontend/config/aspire-version-placeholders-integration.mjs b/src/frontend/config/aspire-version-placeholders-integration.mjs index f17dbb9f5..ae1a1dc38 100644 --- a/src/frontend/config/aspire-version-placeholders-integration.mjs +++ b/src/frontend/config/aspire-version-placeholders-integration.mjs @@ -3,7 +3,28 @@ import path from 'node:path'; import { fileURLToPath } from 'node:url'; import { replaceAspireVersionPlaceholders } from './remark-aspire-version-placeholders.mjs'; -const generatedAssetExtensions = new Set(['.html', '.md', '.txt']); +// Per-page Markdown copies emitted by `starlight-page-actions` are the only +// generated artifacts that still contain raw `%ASPIRE_VERSION%` placeholders: +// that plugin `viteStaticCopy`s `src/content/docs/**/*.{md,mdx}` straight to +// `dist/**/*.md` through a regex-only transform, so it never runs through the +// `remarkAspireVersionPlaceholders` remark plugin. +// +// Everything else is already handled before it reaches `dist`: +// - `.html` pages -> rendered via the remark pipeline (placeholders replaced +// in the mdast before expressive-code renders code blocks) +// - `llms*.txt` -> `starlight-llms-txt` sources rendered HTML (`render(entry)`) +// - `reference/**.md` -> generated from API/sample data, not docs content +// +// So this post-build pass only needs to touch `.md` files. Scoping it this way +// (instead of walking every `.html`/`.txt` in `dist`) avoids re-reading the bulk +// of the output — including the large `llms-full.txt` assets — which is what +// previously exhausted the Node heap. +const placeholderCopyExtensions = new Set(['.md']); + +// Process the Markdown copies through a small worker pool rather than a single +// recursive `Promise.all` over the whole tree, so peak memory stays proportional +// to the concurrency limit instead of the number of files held open at once. +const DEFAULT_CONCURRENCY = 16; export function aspireVersionPlaceholdersIntegration() { return { @@ -16,28 +37,55 @@ export function aspireVersionPlaceholdersIntegration() { }; } -export async function replaceAspireVersionPlaceholdersInDirectory(directory) { +export async function replaceAspireVersionPlaceholdersInDirectory( + directory, + concurrency = DEFAULT_CONCURRENCY +) { + const files = []; + await collectMarkdownCopies(directory, files); + + if (files.length === 0) { + return; + } + + // Normalize to a finite positive integer so a stray NaN/0/negative value can't + // collapse the worker pool to an empty array and silently skip every file. + const limit = Number.isFinite(concurrency) ? Math.floor(concurrency) : DEFAULT_CONCURRENCY; + const workerCount = Math.min(Math.max(1, limit), files.length); + let cursor = 0; + + const runWorker = async () => { + while (cursor < files.length) { + const filePath = files[cursor++]; + await replaceAspireVersionPlaceholdersInFile(filePath); + } + }; + + await Promise.all(Array.from({ length: workerCount }, runWorker)); +} + +async function collectMarkdownCopies(directory, files) { const entries = await readdir(directory, { withFileTypes: true }); - await Promise.all( - entries.map(async (entry) => { - const resolvedPath = path.join(directory, entry.name); + for (const entry of entries) { + const resolvedPath = path.join(directory, entry.name); - if (entry.isDirectory()) { - await replaceAspireVersionPlaceholdersInDirectory(resolvedPath); - return; - } + if (entry.isDirectory()) { + await collectMarkdownCopies(resolvedPath, files); + continue; + } - if (!entry.isFile() || !generatedAssetExtensions.has(path.extname(entry.name))) { - return; - } + if (entry.isFile() && placeholderCopyExtensions.has(path.extname(entry.name))) { + files.push(resolvedPath); + } + } +} - const content = await readFile(resolvedPath, 'utf8'); - const updated = replaceAspireVersionPlaceholders(content); +async function replaceAspireVersionPlaceholdersInFile(filePath) { + const content = await readFile(filePath, 'utf8'); + const updated = replaceAspireVersionPlaceholders(content); - if (updated !== content) { - await writeFile(resolvedPath, updated); - } - }) - ); + if (updated !== content) { + await writeFile(filePath, updated); + } } diff --git a/src/frontend/tests/unit/aspire-version-placeholders.vitest.test.ts b/src/frontend/tests/unit/aspire-version-placeholders.vitest.test.ts index 690379f69..20ef2e16f 100644 --- a/src/frontend/tests/unit/aspire-version-placeholders.vitest.test.ts +++ b/src/frontend/tests/unit/aspire-version-placeholders.vitest.test.ts @@ -1,4 +1,4 @@ -import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import os from 'node:os'; import path from 'node:path'; import { describe, expect, test } from 'vitest'; @@ -48,26 +48,111 @@ describe('Aspire version placeholders', () => { expect(tree.children[2].attributes[0].value).toBe(currentAspireVersion); }); - test('replaces placeholders in generated markdown assets', async () => { + test('replaces placeholders only in Markdown copies, leaving other assets untouched', async () => { const tempDir = await mkdtemp(path.join(os.tmpdir(), 'aspire-version-placeholders-')); try { const markdownPath = path.join(tempDir, 'example.md'); + const htmlPath = path.join(tempDir, 'example.html'); + const textPath = path.join(tempDir, 'example.txt'); + const mdxPath = path.join(tempDir, 'example.mdx'); const jsonPath = path.join(tempDir, 'example.json'); + const placeholderContent = 'Aspire %ASPIRE_VERSION_MAJOR_MINOR%: %ASPIRE_VERSION%'; + await Promise.all([ - writeFile(markdownPath, 'Aspire %ASPIRE_VERSION_MAJOR_MINOR%: %ASPIRE_VERSION%'), + writeFile(markdownPath, placeholderContent), + writeFile(htmlPath, placeholderContent), + writeFile(textPath, placeholderContent), + writeFile(mdxPath, placeholderContent), writeFile(jsonPath, '{"version":"%ASPIRE_VERSION%"}'), ]); await replaceAspireVersionPlaceholdersInDirectory(tempDir); + // Only the `.md` copy (which bypasses the remark pipeline) is rewritten. await expect(readFile(markdownPath, 'utf8')).resolves.toBe( `Aspire ${currentAspireMajorMinorVersion}: ${currentAspireVersion}` ); + + // The post-build pass intentionally rewrites only `.md` files. In the real + // build `.html`/`.txt`/`.mdx` are produced through the remark pipeline (so + // they're already replaced before this pass runs) and `.json` is never a + // placeholder target; this test seeds them with raw placeholders to assert + // that this pass leaves every non-`.md` extension untouched. + await expect(readFile(htmlPath, 'utf8')).resolves.toBe(placeholderContent); + await expect(readFile(textPath, 'utf8')).resolves.toBe(placeholderContent); + await expect(readFile(mdxPath, 'utf8')).resolves.toBe(placeholderContent); await expect(readFile(jsonPath, 'utf8')).resolves.toBe('{"version":"%ASPIRE_VERSION%"}'); } finally { await rm(tempDir, { recursive: true, force: true }); } }); + + test('replaces Markdown placeholders recursively under a bounded concurrency limit', async () => { + const tempDir = await mkdtemp(path.join(os.tmpdir(), 'aspire-version-placeholders-')); + + try { + // Spread more `.md` files than the worker-pool concurrency across nested + // directories so the bounded recursive traversal is exercised, alongside + // non-`.md` assets that must be left untouched. + const placeholderContent = 'Aspire %ASPIRE_VERSION_MAJOR_MINOR% is %ASPIRE_VERSION%.'; + const ignoredExtensions = ['.html', '.txt', '.mdx', '.json']; + const markdownPaths: string[] = []; + const ignoredPaths: string[] = []; + + for (let depth = 0; depth < 4; depth++) { + const dir = path.join(tempDir, ...Array.from({ length: depth }, (_, i) => `level-${i}`)); + await mkdir(dir, { recursive: true }); + + for (let index = 0; index < 5; index++) { + const markdownPath = path.join(dir, `asset-${index}.md`); + await writeFile(markdownPath, placeholderContent); + markdownPaths.push(markdownPath); + + const extension = ignoredExtensions[index % ignoredExtensions.length]; + const ignoredPath = path.join(dir, `asset-${index}${extension}`); + await writeFile(ignoredPath, placeholderContent); + ignoredPaths.push(ignoredPath); + } + } + + await replaceAspireVersionPlaceholdersInDirectory(tempDir, 2); + + await Promise.all( + markdownPaths.map(async (markdownPath) => { + await expect(readFile(markdownPath, 'utf8')).resolves.toBe( + `Aspire ${currentAspireMajorMinorVersion} is ${currentAspireVersion}.` + ); + }) + ); + + await Promise.all( + ignoredPaths.map(async (ignoredPath) => { + await expect(readFile(ignoredPath, 'utf8')).resolves.toBe(placeholderContent); + }) + ); + } finally { + await rm(tempDir, { recursive: true, force: true }); + } + }); + + test('falls back to a valid worker count when given a non-finite concurrency', async () => { + const tempDir = await mkdtemp(path.join(os.tmpdir(), 'aspire-version-placeholders-')); + + try { + const markdownPath = path.join(tempDir, 'example.md'); + await writeFile(markdownPath, 'Aspire %ASPIRE_VERSION_MAJOR_MINOR%: %ASPIRE_VERSION%'); + + // A non-finite concurrency must not collapse the worker pool to an empty + // array and silently skip every file. + await replaceAspireVersionPlaceholdersInDirectory(tempDir, Number.NaN); + + await expect(readFile(markdownPath, 'utf8')).resolves.toBe( + `Aspire ${currentAspireMajorMinorVersion}: ${currentAspireVersion}` + ); + } finally { + await rm(tempDir, { recursive: true, force: true }); + } + }); });