diff --git a/package.json b/package.json index a856ccf..b577a23 100644 --- a/package.json +++ b/package.json @@ -31,7 +31,8 @@ "lighthouse": "node scripts/lighthouse.mjs", "perf-budget": "node scripts/perf-budget.mjs", "hue-rotate": "node scripts/hue-rotate.mjs", - "validate:all": "npm run validate:brand && npm run typecheck && npm run test && npm run prompt-evals && npm run build && npm run perf-budget" + "validate:all": "npm run validate:brand && npm run typecheck && npm run test && npm run prompt-evals && npm run build && npm run perf-budget", + "validate": "node scripts/build_validators.mjs dist" }, "dependencies": { "@radix-ui/react-accordion": "^1.2.0", diff --git a/scripts/build_validators.mjs b/scripts/build_validators.mjs new file mode 100644 index 0000000..33d8b49 --- /dev/null +++ b/scripts/build_validators.mjs @@ -0,0 +1,26 @@ +#!/usr/bin/env node +// build_validators.mjs — orchestrator. Runs every build-gate validator over dist/ and exits +// non-zero on any error-level finding (blocks deploy). Spec: heymegabyte-claude-skills +// rules/build-validators-manifest.md. Add validators here as they are implemented. +import { existsSync } from 'node:fs'; +import { validateLinks } from './validators/validate-links.mjs'; +import { validateRouteMetadata } from './validators/validate-route-metadata.mjs'; +import { validateAssets } from './validators/validate-assets.mjs'; +import { validateSsrHead } from './validators/validate-ssr-head.mjs'; + +const DIST = process.argv[2] || 'dist'; +if (!existsSync(DIST)) { console.error(`build_validators: ${DIST}/ not found — run the build first`); process.exit(2); } + +const VALIDATORS = [validateLinks, validateRouteMetadata, validateAssets, validateSsrHead]; +const findings = VALIDATORS.flatMap((v) => v(DIST)); +const errors = findings.filter((f) => f.level === 'error'); +const warns = findings.filter((f) => f.level === 'warn'); + +for (const f of warns) console.warn(`⚠ [${f.code}] ${f.route} — ${f.detail}`); +for (const f of errors) console.error(`✗ [${f.code}] ${f.route} — ${f.detail}`); + +if (errors.length) { + console.error(`\nbuild_validators: ${errors.length} build-break finding(s) across ${VALIDATORS.length} validators. Fix before deploy.`); + process.exit(1); +} +console.log(`✓ build_validators: ${VALIDATORS.length} validators passed over ${DIST}/ (${warns.length} warnings).`); diff --git a/scripts/validators/lib.mjs b/scripts/validators/lib.mjs new file mode 100644 index 0000000..bf97fe9 --- /dev/null +++ b/scripts/validators/lib.mjs @@ -0,0 +1,21 @@ +// Shared helpers for build validators. Pure node, no deps. +import { readdirSync, readFileSync, statSync, existsSync } from 'node:fs'; +import { join, relative } from 'node:path'; + +/** Recursively list every .html file under dir. */ +export function htmlFiles(dir) { + const out = []; + const walk = (d) => { + for (const e of readdirSync(d, { withFileTypes: true })) { + const p = join(d, e.name); + if (e.isDirectory()) walk(p); + else if (e.name.endsWith('.html')) out.push(p); + } + }; + if (existsSync(dir)) walk(dir); + return out; +} + +/** A build-break finding. */ +export const fail = (validator, code, route, detail) => ({ validator, code, route, detail, level: 'error' }); +export const warn = (validator, code, route, detail) => ({ validator, code, route, detail, level: 'warn' }); diff --git a/scripts/validators/validate-assets.mjs b/scripts/validators/validate-assets.mjs new file mode 100644 index 0000000..da731e6 --- /dev/null +++ b/scripts/validators/validate-assets.mjs @@ -0,0 +1,23 @@ +// validate-assets — every local asset ref (src/href to /assets, images, css, js) resolves in dist/. Fail assets.missing. +import { readFileSync, existsSync, statSync } from 'node:fs'; +import { join } from 'node:path'; +import { htmlFiles, fail } from './lib.mjs'; + +export function validateAssets(dist) { + const out = []; + const seen = new Set(); + for (const f of htmlFiles(dist)) { + const html = readFileSync(f, 'utf8'); + const route = '/' + f.slice(dist.length).replace(/^\/+/, ''); + for (const m of html.matchAll(/\b(?:src|href)=["'](\/[^"']+\.(?:png|jpe?g|webp|avif|svg|gif|ico|css|js|woff2?|json|webmanifest|xml|txt))["']/gi)) { + const ref = m[1].split('?')[0]; + const key = route + '|' + ref; + if (seen.has(key)) continue; seen.add(key); + const target = join(dist, ref); + if (!(existsSync(target) && statSync(target).isFile())) { + out.push(fail('validate-assets', 'assets.missing', route, `asset ref does not resolve: ${ref}`)); + } + } + } + return out; +} diff --git a/scripts/validators/validate-links.mjs b/scripts/validators/validate-links.mjs new file mode 100644 index 0000000..c37b5d8 --- /dev/null +++ b/scripts/validators/validate-links.mjs @@ -0,0 +1,26 @@ +// validate-links — every internal resolves to a real file in dist/. Fail routes.dead_link. +import { readFileSync, existsSync, statSync } from 'node:fs'; +import { join } from 'node:path'; +import { htmlFiles, fail } from './lib.mjs'; + +export function validateLinks(dist) { + const out = []; + const files = htmlFiles(dist); + for (const f of files) { + const html = readFileSync(f, 'utf8'); + const route = '/' + f.slice(dist.length).replace(/^\/+/, ''); + for (const m of html.matchAll(/]*\bhref=["']([^"'#]+)["']/gi)) { + let href = m[1].trim(); + if (/^(https?:|mailto:|tel:|data:|\/\/)/i.test(href)) continue; // external/scheme + if (href.startsWith('#') || href === '') continue; + const clean = href.split('?')[0].split('#')[0]; + // resolve relative to dist root (absolute) — template uses absolute internal links + let target = clean.startsWith('/') ? join(dist, clean) : join(f, '..', clean); + const candidates = [target, target + '.html', join(target, 'index.html')]; + if (!candidates.some((c) => existsSync(c) && statSync(c).isFile())) { + out.push(fail('validate-links', 'routes.dead_link', route, `unresolved internal href: ${href}`)); + } + } + } + return out; +} diff --git a/scripts/validators/validate-route-metadata.mjs b/scripts/validators/validate-route-metadata.mjs new file mode 100644 index 0000000..ada07e5 --- /dev/null +++ b/scripts/validators/validate-route-metadata.mjs @@ -0,0 +1,17 @@ +// validate-route-metadata — every HTML page has a non-empty + meta description. Fail meta.missing. +import { readFileSync } from 'node:fs'; +import { htmlFiles, fail } from './lib.mjs'; + +export function validateRouteMetadata(dist) { + const out = []; + for (const f of htmlFiles(dist)) { + const html = readFileSync(f, 'utf8'); + const route = '/' + f.slice(dist.length).replace(/^\/+/, ''); + if (/\b(404|500|offline)\.html$/.test(f)) continue; // utility pages exempt from meta-desc + const title = (html.match(/<title>([^<]*)<\/title>/i) || [])[1]?.trim(); + const desc = (html.match(/<meta[^>]+name=["']description["'][^>]*content=["']([^"']*)["']/i) || [])[1]?.trim(); + if (!title) out.push(fail('validate-route-metadata', 'meta.no_title', route, 'missing/empty <title>')); + if (!desc) out.push(fail('validate-route-metadata', 'meta.no_description', route, 'missing/empty meta description')); + } + return out; +} diff --git a/scripts/validators/validate-ssr-head.mjs b/scripts/validators/validate-ssr-head.mjs new file mode 100644 index 0000000..24ee9d4 --- /dev/null +++ b/scripts/validators/validate-ssr-head.mjs @@ -0,0 +1,38 @@ +// validate-ssr-head — detect the SEO-collapse anti-pattern: a multi-route SPA that serves +// every route from one index.html (SPA fallback) with NO per-route server head — no +// prerendered route HTML AND no edge HTMLRewriter. Crawlers then read the homepage head on +// every URL → site collapses to one indexable URL. Fail seo.client_only_head. (checklist #3) +import { readFileSync, existsSync, readdirSync, statSync } from 'node:fs'; +import { join } from 'node:path'; +import { htmlFiles, fail } from './lib.mjs'; + +export function validateSsrHead(dist, root = '.') { + // 1. How many routes does the site claim? (sitemap <loc> count) + let routeCount = 0; + const sm = join(dist, 'sitemap.xml'); + if (existsSync(sm)) routeCount = (readFileSync(sm, 'utf8').match(/<loc>/g) || []).length; + + // 2. How many prerendered route HTML files exist (excluding utility shells)? + const UTIL = /(?:^|\/)(?:404|500|offline|index)\.html$/; + const prerendered = htmlFiles(dist).filter((f) => !UTIL.test(f)).length; + + // 3. SPA catch-all fallback present? (_redirects /* → /index.html) + const redir = join(dist, '_redirects'); + const spaFallback = existsSync(redir) && /\/\*\s+\/index\.html\s+200/.test(readFileSync(redir, 'utf8')); + + // 4. Any edge worker that rewrites the per-route <head>? + const fnDirs = ['functions', 'src/worker', 'workers'].map((d) => join(root, d)).filter(existsSync); + const grepDir = (d) => readdirSync(d, { withFileTypes: true }).some((e) => { + const p = join(d, e.name); + if (e.isDirectory()) return grepDir(p); + return /\.(ts|js|mjs)$/.test(e.name) && /HTMLRewriter|getMeta\(|rewrite.*<title|on\(['"]title/.test(readFileSync(p, 'utf8')); + }); + const edgeRewriter = fnDirs.some(grepDir); + + // Verdict: multi-route + SPA-fallback + no prerender + no edge rewrite = client-only head. + if (routeCount > 1 && spaFallback && prerendered === 0 && !edgeRewriter) { + return [fail('validate-ssr-head', 'seo.client_only_head', '/*', + `${routeCount} routes (sitemap) all serve one index.html (SPA fallback) with NO per-route server head — no prerendered route HTML, no edge HTMLRewriter. Crawlers read the homepage head on every URL → SEO collapse. Add SSG prerender (vite-ssg) OR a Worker HTMLRewriter keyed on getMeta(pathname).`)]; + } + return []; +}