Per-Aspera-LLC · perasperaactual · Jun 1, 2026 · May 31, 2026 · May 31, 2026 · Jun 1, 2026
diff --git a/.beads/interactions.jsonl b/.beads/interactions.jsonl
@@ -53,3 +53,5 @@
 {"id":"int-db886ffc","kind":"field_change","created_at":"2026-05-30T17:58:07.630601725Z","actor":"Stackwright Bot","issue_id":"stackwright-rqj","extra":{"field":"status","new_value":"closed","old_value":"in_progress","reason":"Closed"}}
 {"id":"int-34488cdd","kind":"field_change","created_at":"2026-05-31T01:52:40.569772239Z","actor":"Stackwright Bot","issue_id":"stackwright-b2w","extra":{"field":"status","new_value":"closed","old_value":"in_progress","reason":"Closed"}}
 {"id":"int-13dc0a3a","kind":"field_change","created_at":"2026-05-31T13:04:03.328783799Z","actor":"Stackwright Bot","issue_id":"stackwright-70q","extra":{"field":"status","new_value":"closed","old_value":"in_progress","reason":"Closed"}}
+{"id":"int-e4836528","kind":"field_change","created_at":"2026-05-31T14:56:36.319940229Z","actor":"Stackwright Bot","issue_id":"stackwright-nw6","extra":{"field":"status","new_value":"closed","old_value":"in_progress","reason":"Closed"}}
+{"id":"int-7d9d52ed","kind":"field_change","created_at":"2026-05-31T23:33:00.059300042Z","actor":"Stackwright Bot","issue_id":"stackwright-11p","extra":{"field":"status","new_value":"closed","old_value":"in_progress","reason":"Closed"}}
diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl
diff --git a/.changeset/seo-autopilot.md b/.changeset/seo-autopilot.md
@@ -0,0 +1,16 @@
+---
+"@stackwright/build-scripts": minor
+"@stackwright/core": minor
+---
+
+feat: SEO Autopilot — auto-generate sitemap.xml, robots.txt, and JSON-LD structured data
+
+Prebuild now generates `sitemap.xml` and `robots.txt` in `public/` when `meta.base_url` is set in `stackwright.yml`. Pages with `noindex: true` are excluded from the sitemap. Locale variants get `xhtml:link` alternate entries.
+
+Content types with natural schema.org mappings now emit `<script type="application/ld+json">` tags:
+- `faq` → FAQPage schema
+- `pricing_table` → Product with AggregateOffer schema
+
+New exports:
+- `@stackwright/build-scripts`: `generateSitemap`, `generateRobotsTxt`, `collectPageMeta`
+- `@stackwright/core`: `generatePageJsonLd`, `generateFaqJsonLd`, `generatePricingJsonLd`, `generateArticleJsonLd`, `JsonLdScript`
diff --git a/packages/build-scripts/src/index.ts b/packages/build-scripts/src/index.ts
@@ -8,3 +8,5 @@ export { runPrebuild } from './prebuild';
 export { runWatch } from './watch';
 export type { PrebuildOptions, PrebuildPlugin, PrebuildPluginContext } from '@stackwright/types';
 export type { SBOMOptions, SBOM, SBOMFormat } from '@stackwright/sbom-generator';
+export { generateSitemap, generateRobotsTxt, collectPageMeta } from './seo';
+export type { PageEntry, PageMeta, SitemapOptions } from './seo';
diff --git a/packages/build-scripts/src/prebuild.ts b/packages/build-scripts/src/prebuild.ts
@@ -42,6 +42,7 @@ import type {
   PrebuildPlugin,
   PrebuildPluginContext,
 } from '@stackwright/types';
+import { generateSitemap, generateRobotsTxt, collectPageMeta } from './seo';
 
 /**
  * Recursively resolve environment variable references in config values.
@@ -1596,6 +1597,29 @@ export async function runPrebuild(options?: string | PrebuildOptions): Promise<v
   console.log('\nGenerating icon manifest...');
   await generateIconManifest(contentOutDir, projectRoot);
 
+  // 3b. Generate sitemap.xml and robots.txt (SEO Autopilot)
+  const siteMetaConfig = (configWithEnvResolved as Record<string, unknown>).meta as
+    | Record<string, unknown>
+    | undefined;
+  const baseUrl = siteMetaConfig?.base_url as string | undefined;
+
+  if (baseUrl) {
+    const pages = collectPageMeta(contentOutDir);
+    const buildDate = new Date().toISOString().split('T')[0];
+
+    const sitemapXml = generateSitemap({ pages, baseUrl, buildDate });
+    fs.writeFileSync(path.join(publicDir, 'sitemap.xml'), sitemapXml);
+    console.log(`  [OK] sitemap.xml (${pages.filter((p) => !p.meta?.noindex).length} pages)`);
+
+    const robotsTxt = generateRobotsTxt(baseUrl);
+    fs.writeFileSync(path.join(publicDir, 'robots.txt'), robotsTxt);
+    console.log('  [OK] robots.txt');
+  } else {
+    console.log(
+      '  [INFO] Skipping sitemap.xml/robots.txt — set meta.base_url in stackwright.yml to enable'
+    );
+  }
+
   // Run afterBuild plugin hooks
   if (plugins.length > 0) {
     console.log('\nRunning afterBuild plugins...');

diff --git a/packages/build-scripts/src/seo.ts b/packages/build-scripts/src/seo.ts
@@ -0,0 +1,310 @@
+/**
+ * seo.ts
+ *
+ * SEO Autopilot utilities for generating sitemap.xml and robots.txt
+ * during the Stackwright prebuild step.
+ *
+ * These are pure functions with zero external dependencies beyond Node builtins.
+ */
+
+import fs from 'fs';
+import path from 'path';
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+export interface PageMeta {
+  noindex?: boolean;
+}
+
+export interface PageEntry {
+  slug: string | null;
+  locale?: string;
+  meta?: PageMeta;
+}
+
+export interface SitemapOptions {
+  pages: PageEntry[];
+  baseUrl: string;
+  buildDate: string;
+}
+
+// ---------------------------------------------------------------------------
+// Reserved files / directories that are NOT page content
+// ---------------------------------------------------------------------------
+
+const RESERVED_BASENAMES = new Set([
+  '_site.json',
+  '_font-links.json',
+  'search-index.json',
+  '_icon-manifest.json',
+]);
+
+/** Matches locale-specific site configs like `_site.fr.json`, `_site.de.json` */
+const LOCALE_SITE_RE = /^_site\..+\.json$/;
+
+const SKIP_DIRS = new Set(['collections']);
+
+// ---------------------------------------------------------------------------
+// collectPageMeta
+// ---------------------------------------------------------------------------
+
+/**
+ * Walk the prebuild content output directory and return a flat list of page
+ * entries with optional SEO metadata.
+ *
+ * The output directory structure produced by `stackwright-prebuild` is:
+ * ```
+ *   _root.json          → slug null  (root page)
+ *   about.json          → slug 'about'
+ *   blog/my-post.json   → slug 'blog/my-post'
+ *   fr/_root.json       → slug null,  locale 'fr'
+ *   fr/about.json       → slug 'about', locale 'fr'
+ * ```
+ *
+ * Reserved files (`_site.json`, `_font-links.json`, …) and the `collections/`
+ * directory are skipped automatically.
+ *
+ * @param contentOutDir - Absolute path to the prebuild output directory
+ *                        (typically `public/stackwright-content`).
+ * @returns Array of page entries suitable for {@link generateSitemap}.
+ */
+export function collectPageMeta(contentOutDir: string): PageEntry[] {
+  const pages: PageEntry[] = [];
+
+  function walk(dir: string): void {
+    let entries: fs.Dirent[];
+    try {
+      entries = fs.readdirSync(dir, { withFileTypes: true });
+    } catch {
+      return; // directory doesn't exist — nothing to index
+    }
+
+    for (const entry of entries) {
+      if (entry.isDirectory()) {
+        if (SKIP_DIRS.has(entry.name)) continue;
+        walk(path.join(dir, entry.name));
+        continue;
+      }
+
+      if (!entry.name.endsWith('.json')) continue;
+      if (RESERVED_BASENAMES.has(entry.name)) continue;
+      if (LOCALE_SITE_RE.test(entry.name)) continue;
+
+      const filePath = path.join(dir, entry.name);
+      const relPath = path.relative(contentOutDir, filePath);
+      const page = parsePageFile(relPath, filePath);
+      if (page) pages.push(page);
+    }
+  }
+
+  walk(contentOutDir);
+  return pages;
+}
+
+/**
+ * Parse a single JSON content file into a {@link PageEntry}.
+ *
+ * @param relPath  - Path relative to contentOutDir (e.g. `fr/about.json`)
+ * @param filePath - Absolute path for reading the file
+ */
+function parsePageFile(relPath: string, filePath: string): PageEntry | null {
+  // Normalise to forward slashes for cross-platform sanity
+  const normalised = relPath.replace(/\\/g, '/');
+  const parts = normalised.replace(/\.json$/, '').split('/');
+
+  let slug: string | null;
+  let locale: string | undefined;
+
+  if (parts.length === 1) {
+    // Top-level: _root.json → null, about.json → 'about'
+    slug = parts[0] === '_root' ? null : parts[0];
+  } else {
+    // Nested: first segment is the locale if the final segment is a page
+    // e.g. fr/_root.json → locale 'fr', slug null
+    //      fr/about.json → locale 'fr', slug 'about'
+    //      docs/intro.json → slug 'docs/intro' (not a locale — no _root.json
+    //        would exist at top-level for 'docs', but we treat multi-segment
+    //        paths with a single leading segment as locale only when the
+    //        framework's i18n prebuild is active).
+    //
+    // Heuristic: a leading single-segment directory that contains a _root.json
+    // is a locale directory. But since we can't cheaply re-check the fs here,
+    // we rely on a simpler rule: if the first segment is ≤5 chars and looks
+    // like a BCP-47 primary subtag, treat it as a locale.
+    const possibleLocale = parts[0];
+    if (parts.length === 2 && looksLikeLocale(possibleLocale)) {
+      locale = possibleLocale;
+      slug = parts[1] === '_root' ? null : parts[1];
+    } else if (parts.length > 2 && looksLikeLocale(possibleLocale)) {
+      locale = possibleLocale;
+      const rest = parts.slice(1);
+      slug = rest[rest.length - 1] === '_root' ? rest.slice(0, -1).join('/') : rest.join('/');
+    } else {
+      // No locale prefix — deep-nested page like docs/intro
+      slug = parts[parts.length - 1] === '_root' ? parts.slice(0, -1).join('/') : parts.join('/');
+    }
+  }
+
+  // Read the JSON and extract meta
+  let meta: PageMeta | undefined;
+  try {
+    const raw = JSON.parse(fs.readFileSync(filePath, 'utf-8'));
+    if (raw?.content?.meta) {
+      const { noindex } = raw.content.meta;
+      if (noindex !== undefined) {
+        meta = { noindex };
+      }
+    }
+  } catch {
+    // If we can't read / parse the file, include the page without meta.
+    // Better to have an entry in the sitemap than silently drop it.
+  }
+
+  const page: PageEntry = { slug };
+  if (locale) page.locale = locale;
+  if (meta) page.meta = meta;
+  return page;
+}
+
+/**
+ * Quick-and-dirty check: does a string look like a BCP-47 primary language
+ * subtag?  (2–3 lowercase letters, e.g. `en`, `fr`, `de`, `zh`)
+ */
+function looksLikeLocale(s: string): boolean {
+  return /^[a-z]{2,3}$/.test(s);
+}
+
+// ---------------------------------------------------------------------------
+// generateSitemap
+// ---------------------------------------------------------------------------
+
+/**
+ * Generate a valid XML sitemap string from a list of page entries.
+ *
+ * Features:
+ * - Omits pages with `meta.noindex === true`
+ * - Groups pages by slug so locale alternates share `<xhtml:link>` entries
+ * - Strips trailing slashes from `baseUrl`
+ * - Handles empty pages array (returns a valid but empty `<urlset>`)
+ *
+ * @param options - Sitemap generation options
+ * @returns A complete XML sitemap string ready to write to `public/sitemap.xml`
+ */
+export function generateSitemap(options: SitemapOptions): string {
+  const { pages, buildDate } = options;
+  const baseUrl = stripTrailingSlashes(options.baseUrl);
+
+  // Group indexable pages by slug so we can attach locale alternates
+  const groupedBySlug = new Map<string, PageEntry[]>();
+
+  for (const page of pages) {
+    if (page.meta?.noindex) continue;
+
+    const key = page.slug ?? '__root__';
+    const group = groupedBySlug.get(key) ?? [];
+    group.push(page);
+    groupedBySlug.set(key, group);
+  }
+
+  const urlEntries: string[] = [];
+
+  for (const [, group] of groupedBySlug) {
+    // Build xhtml:link alternates when there are multiple locales
+    const hasAlternates = group.length > 1;
+    const alternateLinks = hasAlternates
+      ? group
+          .map((p) => {
+            const href = buildAbsoluteUrl(baseUrl, p);
+            const hreflang = p.locale ?? 'x-default';
+            return `    <xhtml:link rel="alternate" hreflang="${escapeXml(hreflang)}" href="${escapeXml(href)}" />`;
+          })
+          .join('\n')
+      : '';
+
+    for (const page of group) {
+      const loc = buildAbsoluteUrl(baseUrl, page);
+      const parts = ['  <url>', `    <loc>${escapeXml(loc)}</loc>`];
+
+      if (alternateLinks) parts.push(alternateLinks);
+
+      parts.push(`    <lastmod>${escapeXml(buildDate)}</lastmod>`, '  </url>');
+      urlEntries.push(parts.join('\n'));
+    }
+  }
+
+  return [
+    '<?xml version="1.0" encoding="UTF-8"?>',
+    '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"',
+    '        xmlns:xhtml="http://www.w3.org/1999/xhtml">',
+    ...urlEntries,
+    '</urlset>',
+    '', // trailing newline
+  ].join('\n');
+}
+
+// ---------------------------------------------------------------------------
+// generateRobotsTxt
+// ---------------------------------------------------------------------------
+
+/**
+ * Generate a standard `robots.txt` for Stackwright sites.
+ *
+ * Allows all crawlers by default while blocking internal framework paths
+ * (`/api/`, `/_next/`, `/stackwright-content/`) and pointing to the sitemap.
+ *
+ * @param baseUrl - The production base URL (trailing slash is stripped).
+ * @returns A robots.txt string ready to write to `public/robots.txt`.
+ */
+export function generateRobotsTxt(baseUrl: string): string {
+  const base = stripTrailingSlashes(baseUrl);
+
+  return [
+    'User-agent: *',
+    'Allow: /',
+    'Disallow: /api/',
+    'Disallow: /_next/',
+    'Disallow: /stackwright-content/',
+    '',
+    `Sitemap: ${base}/sitemap.xml`,
+    '', // trailing newline
+  ].join('\n');
+}
+
+// ---------------------------------------------------------------------------
+// Internal helpers
+// ---------------------------------------------------------------------------
+
+/**
+ * Build the absolute URL for a page, taking locale into account.
+ *
+ * - Root page (slug null, no locale) → `https://example.com/`
+ * - Locale root (slug null, locale fr) → `https://example.com/fr`
+ * - Regular page → `https://example.com/about`
+ * - Locale page  → `https://example.com/fr/about`
+ */
+function buildAbsoluteUrl(baseUrl: string, page: PageEntry): string {
+  const segments: string[] = [];
+  if (page.locale) segments.push(page.locale);
+  if (page.slug) segments.push(page.slug);
+
+  return segments.length === 0 ? `${baseUrl}/` : `${baseUrl}/${segments.join('/')}`;
+}
+
+/** Strip trailing `/` characters without a regex (avoids CodeQL ReDoS false positives). */
+function stripTrailingSlashes(url: string): string {
+  let end = url.length;
+  while (end > 0 && url[end - 1] === '/') end--;
+  return end === url.length ? url : url.slice(0, end);
+}
+
+/** Minimal XML escaping for attribute/text values. */
+function escapeXml(s: string): string {
+  return s
+    .replace(/&/g, '&amp;')
+    .replace(/</g, '&lt;')
+    .replace(/>/g, '&gt;')
+    .replace(/"/g, '&quot;')
+    .replace(/'/g, '&apos;');
+}