diff --git a/bun.lock b/bun.lock index 8c86b1b31..2d52b7a44 100644 --- a/bun.lock +++ b/bun.lock @@ -22,7 +22,7 @@ }, "packages/aws-lambda": { "name": "@hyperframes/aws-lambda", - "version": "0.6.27", + "version": "0.6.29", "dependencies": { "@aws-sdk/client-s3": "^3.700.0", "@aws-sdk/client-sfn": "^3.700.0", @@ -54,7 +54,7 @@ }, "packages/cli": { "name": "@hyperframes/cli", - "version": "0.6.27", + "version": "0.6.29", "bin": { "hyperframes": "./dist/cli.js", }, @@ -65,6 +65,7 @@ "citty": "^0.2.1", "compare-versions": "^6.1.1", "esbuild": "^0.25.12", + "fontkit": "^2.0.4", "giget": "^3.2.0", "hono": "^4.0.0", "onnxruntime-node": "^1.20.0", @@ -72,7 +73,7 @@ "postcss": "^8.5.8", "prettier": "^3.8.1", "puppeteer-core": "^24.39.1", - "sharp": "^0.34.0", + "sharp": "^0.34.5", }, "devDependencies": { "@clack/prompts": "^1.1.0", @@ -82,6 +83,7 @@ "@hyperframes/producer": "workspace:*", "@hyperframes/studio": "workspace:*", "@types/adm-zip": "^0.5.7", + "@types/fontkit": "^2.0.9", "@types/mime-types": "^3.0.1", "@types/node": "^25.0.10", "linkedom": "^0.18.12", @@ -97,7 +99,7 @@ }, "packages/core": { "name": "@hyperframes/core", - "version": "0.6.27", + "version": "0.6.29", "dependencies": { "@chenglou/pretext": "^0.0.5", "postcss": "^8.5.8", @@ -124,7 +126,7 @@ }, "packages/engine": { "name": "@hyperframes/engine", - "version": "0.6.27", + "version": "0.6.29", "dependencies": { "@hono/node-server": "^1.13.0", "@hyperframes/core": "workspace:^", @@ -142,7 +144,7 @@ }, "packages/player": { "name": "@hyperframes/player", - "version": "0.6.27", + "version": "0.6.29", "devDependencies": { "@types/bun": "^1.1.0", "gsap": "^3.12.5", @@ -154,7 +156,7 @@ }, "packages/producer": { "name": "@hyperframes/producer", - "version": "0.6.27", + "version": "0.6.29", "dependencies": { "@fontsource/archivo-black": "^5.2.8", "@fontsource/eb-garamond": "^5.2.7", @@ -194,7 +196,7 @@ }, "packages/shader-transitions": { "name": "@hyperframes/shader-transitions", - "version": "0.6.27", + "version": "0.6.29", "dependencies": { "html2canvas": "^1.4.1", }, @@ -206,7 +208,7 @@ }, "packages/studio": { "name": "@hyperframes/studio", - "version": "0.6.27", + "version": "0.6.29", "dependencies": { "@codemirror/autocomplete": "^6.20.1", "@codemirror/commands": "^6.10.3", @@ -941,6 +943,8 @@ "@sparticuz/chromium": ["@sparticuz/chromium@148.0.0", "", { "dependencies": { "tar-fs": "^3.1.2" } }, "sha512-na5beDSZkrlcEWEMt+eHu4Xe+MLUgCtHBjHaXGsNaQu5tJWwXE+McxAcMtyumEM/JzXrxGpkO5vAPD9TWhil3g=="], + "@swc/helpers": ["@swc/helpers@0.5.21", "", { "dependencies": { "tslib": "^2.8.0" } }, "sha512-jI/VAmtdjB/RnI8GTnokyX7Ug8c+g+ffD6QRLa6XQewtnGyukKkKSk3wLTM3b5cjt1jNh9x0jfVlagdN2gDKQg=="], + "@tootallnate/quickjs-emscripten": ["@tootallnate/quickjs-emscripten@0.23.0", "", {}, "sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA=="], "@tybys/wasm-util": ["@tybys/wasm-util@0.10.2", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-RoBvJ2X0wuKlWFIjrwffGw1IqZHKQqzIchKaadZZfnNpsAYp2mM0h36JtPCjNDAHGgYez/15uMBpfGwchhiMgg=="], @@ -969,6 +973,8 @@ "@types/estree": ["@types/estree@1.0.8", "", {}, "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w=="], + "@types/fontkit": ["@types/fontkit@2.0.9", "", { "dependencies": { "@types/node": "*" } }, "sha512-qNYerFky3muCmZPq+R+B3cUDRA5OONw/oh6aGGFxx2LOBz6yu8eamKusrhkHnC6rc2fm76+G9z9QoWSB2SaQaw=="], + "@types/jsdom": ["@types/jsdom@28.0.2", "", { "dependencies": { "@types/node": "*", "@types/tough-cookie": "*", "parse5": "^8.0.0", "undici-types": "^7.21.0" } }, "sha512-zZYItekplnGirFhVDrcB0+103TMakXfKfIp7uECxaFzFG3Ws5kYQSwVb1d4pQfJMMjQda6pfuZxueAv9CMiJbw=="], "@types/mime-types": ["@types/mime-types@3.0.1", "", {}, "sha512-xRMsfuQbnRq1Ef+C+RKaENOxXX87Ygl38W1vDfPHRku02TgQr+Qd8iivLtAMcR0KF5/29xlnFihkTlbqFrGOVQ=="], @@ -1083,6 +1089,8 @@ "braces": ["braces@3.0.3", "", { "dependencies": { "fill-range": "^7.1.1" } }, "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA=="], + "brotli": ["brotli@1.3.3", "", { "dependencies": { "base64-js": "^1.1.2" } }, "sha512-oTKjJdShmDuGW94SyyaoQvAjf30dZaHnjJ8uAF+u2/vGJkJbJPJAT1gDiOJP5v1Zb6f9KEyW/1HpuaWIXtGHPg=="], + "browserslist": ["browserslist@4.28.2", "", { "dependencies": { "baseline-browser-mapping": "^2.10.12", "caniuse-lite": "^1.0.30001782", "electron-to-chromium": "^1.5.328", "node-releases": "^2.0.36", "update-browserslist-db": "^1.2.3" }, "bin": { "browserslist": "cli.js" } }, "sha512-48xSriZYYg+8qXna9kwqjIVzuQxi+KYWp2+5nCYnYKPTr0LvD89Jqk2Or5ogxz0NUMfIjhh2lIUX/LyX9B4oIg=="], "buffer-crc32": ["buffer-crc32@0.2.13", "", {}, "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ=="], @@ -1125,6 +1133,8 @@ "cliui": ["cliui@8.0.1", "", { "dependencies": { "string-width": "^4.2.0", "strip-ansi": "^6.0.1", "wrap-ansi": "^7.0.0" } }, "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ=="], + "clone": ["clone@2.1.2", "", {}, "sha512-3Pe/CF1Nn94hyhIYpjtiLhdCoEoz0DqQ+988E9gmeEdQZlojxnOb74wctFyuwWQHzqyf9X7C7MG8juUpqBJT8w=="], + "color-convert": ["color-convert@2.0.1", "", { "dependencies": { "color-name": "~1.1.4" } }, "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ=="], "color-name": ["color-name@1.1.4", "", {}, "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA=="], @@ -1203,6 +1213,8 @@ "devtools-protocol": ["devtools-protocol@0.0.1608973", "", {}, "sha512-Tpm17fxYzt+J7VrGdc1k8YdRqS3YV7se/M6KeemEqvUbq/n7At1rWVuXMxQgpWkdwSdIEKYbU//Bve+Shm4YNQ=="], + "dfa": ["dfa@1.2.0", "", {}, "sha512-ED3jP8saaweFTjeGX8HQPjeC1YYyZs98jGNZx6IiBvxW7JG5v492kamAQB3m2wop07CvU/RQmzcKr6bgcC5D/Q=="], + "didyoumean": ["didyoumean@1.2.2", "", {}, "sha512-gxtyfqMg7GKyhQmb056K7M3xszy/myH8w+B4RT+QXBQsvAOdc3XymqDDPHx1BgPgsdAA5SIifona89YtRATDzw=="], "dlv": ["dlv@1.1.3", "", {}, "sha512-+HlytyjlPKnIG8XuRG8WvmBP8xs8P71y+SKKS6ZXWoEgLuePxtDoUEiH7WkdePWrQ5JBpE6aoVqfZfJUQkjXwA=="], @@ -1303,6 +1315,8 @@ "fix-dts-default-cjs-exports": ["fix-dts-default-cjs-exports@1.0.1", "", { "dependencies": { "magic-string": "^0.30.17", "mlly": "^1.7.4", "rollup": "^4.34.8" } }, "sha512-pVIECanWFC61Hzl2+oOCtoJ3F17kglZC/6N94eRWycFgBH35hHx0Li604ZIzhseh97mf2p0cv7vVrOZGoqhlEg=="], + "fontkit": ["fontkit@2.0.4", "", { "dependencies": { "@swc/helpers": "^0.5.12", "brotli": "^1.3.2", "clone": "^2.1.2", "dfa": "^1.2.0", "fast-deep-equal": "^3.1.3", "restructure": "^3.0.0", "tiny-inflate": "^1.0.3", "unicode-properties": "^1.4.0", "unicode-trie": "^2.0.0" } }, "sha512-syetQadaUEDNdxdugga9CpEYVaQIxOwk7GlwZWWZ19//qW4zE5bknOKeMBDYAASwnpaSHKJITRLMF9m1fp3s6g=="], + "foreground-child": ["foreground-child@3.3.1", "", { "dependencies": { "cross-spawn": "^7.0.6", "signal-exit": "^4.0.1" } }, "sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw=="], "formatly": ["formatly@0.3.0", "", { "dependencies": { "fd-package-json": "^2.0.0" }, "bin": { "formatly": "bin/index.mjs" } }, "sha512-9XNj/o4wrRFyhSMJOvsuyMwy8aUfBaZ1VrqHVfohyXf0Sw0e+yfKG+xZaY3arGCOMdwFsqObtzVOc1gU9KiT9w=="], @@ -1581,6 +1595,8 @@ "package-json-from-dist": ["package-json-from-dist@1.0.1", "", {}, "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw=="], + "pako": ["pako@0.2.9", "", {}, "sha512-NUcwaKxUxWrZLpDG+z/xZaCgQITkA/Dv4V/T6bw7VON6l1Xz/VnrBqrYjZQ12TamKHzITTfOEIYUj48y2KXImA=="], + "parent-module": ["parent-module@1.0.1", "", { "dependencies": { "callsites": "^3.0.0" } }, "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g=="], "parse-cache-control": ["parse-cache-control@1.0.1", "", {}, "sha512-60zvsJReQPX5/QP0Kzfd/VrpjScIQ7SHBW6bFCYfEP+fp0Eppr1SHhIO5nd1PjZtvclzSzES9D/p5nFJurwfWg=="], @@ -1669,6 +1685,8 @@ "resolve-pkg-maps": ["resolve-pkg-maps@1.0.0", "", {}, "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw=="], + "restructure": ["restructure@3.0.2", "", {}, "sha512-gSfoiOEA0VPE6Tukkrr7I0RBdE0s7H1eFCDBk05l1KIQT1UIKNc5JZy6jdyW6eYH3aR3g5b3PuL77rq0hvwtAw=="], + "retry": ["retry@0.13.1", "", {}, "sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg=="], "reusify": ["reusify@1.1.0", "", {}, "sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw=="], @@ -1775,6 +1793,8 @@ "thenify-all": ["thenify-all@1.6.0", "", { "dependencies": { "thenify": ">= 3.1.0 < 4" } }, "sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA=="], + "tiny-inflate": ["tiny-inflate@1.0.3", "", {}, "sha512-pkY1fj1cKHb2seWDy0B16HeWyczlJA9/WW3u3c4z/NiWDsO3DOU5D7nhTLE9CF0yXv/QZFY7sEJmj24dK+Rrqw=="], + "tinybench": ["tinybench@2.9.0", "", {}, "sha512-0+DUvqWMValLmha6lr4kD8iAMK1HzV0/aKnCtWb9v9641TnP/MFb7Pc2bxoxQjTXAErryXVgUOfv2YqNllqGeg=="], "tinyexec": ["tinyexec@1.1.2", "", {}, "sha512-dAqSqE/RabpBKI8+h26GfLq6Vb3JVXs30XYQjdMjaj/c2tS8IYYMbIzP599KtRj7c57/wYApb3QjgRgXmrCukA=="], @@ -1825,6 +1845,10 @@ "undici-types": ["undici-types@7.21.0", "", {}, "sha512-w9IMgQrz4O0YN1LtB7K5P63vhlIOvC7opSmouCJ+ZywlPAlO9gIkJ+otk6LvGpAs2wg4econaCz3TvQ9xPoyuQ=="], + "unicode-properties": ["unicode-properties@1.4.1", "", { "dependencies": { "base64-js": "^1.3.0", "unicode-trie": "^2.0.0" } }, "sha512-CLjCCLQ6UuMxWnbIylkisbRj31qxHPAurvena/0iwSVbQ2G1VY5/HjV0IRabOEbDHlzZlRdCrD4NhB0JtU40Pg=="], + + "unicode-trie": ["unicode-trie@2.0.0", "", { "dependencies": { "pako": "^0.2.5", "tiny-inflate": "^1.0.0" } }, "sha512-x7bc76x0bm4prf1VLg79uhAzKw8DVboClSN5VxJuQ+LKDOVEW9CdH+VY7SP+vX7xCYQqzzgQpFqz15zeLvAtZQ=="], + "universalify": ["universalify@2.0.1", "", {}, "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw=="], "update-browserslist-db": ["update-browserslist-db@1.2.3", "", { "dependencies": { "escalade": "^3.2.0", "picocolors": "^1.1.1" }, "peerDependencies": { "browserslist": ">= 4.21.0" }, "bin": { "update-browserslist-db": "cli.js" } }, "sha512-Js0m9cx+qOgDxo0eMiFGEueWztz+d4+M3rGlmKPT+T4IS/jP4ylw3Nwpu6cpTTP8R1MAC1kF4VbdLt3ARf209w=="], diff --git a/packages/cli/package.json b/packages/cli/package.json index d1fc8b27c..8be75f490 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -30,6 +30,7 @@ "citty": "^0.2.1", "compare-versions": "^6.1.1", "esbuild": "^0.25.12", + "fontkit": "^2.0.4", "giget": "^3.2.0", "hono": "^4.0.0", "onnxruntime-node": "^1.20.0", @@ -47,6 +48,7 @@ "@hyperframes/producer": "workspace:*", "@hyperframes/studio": "workspace:*", "@types/adm-zip": "^0.5.7", + "@types/fontkit": "^2.0.9", "@types/mime-types": "^3.0.1", "@types/node": "^25.0.10", "linkedom": "^0.18.12", diff --git a/packages/cli/src/capture/fontMetadataExtractor.test.ts b/packages/cli/src/capture/fontMetadataExtractor.test.ts new file mode 100644 index 000000000..38b23cf39 --- /dev/null +++ b/packages/cli/src/capture/fontMetadataExtractor.test.ts @@ -0,0 +1,176 @@ +import { describe, expect, it } from "vitest"; +import { mkdtempSync, rmSync, existsSync, readFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { + canonicalizeFamily, + extractFontMetadata, + inferWeightFromSubfamily, +} from "./fontMetadataExtractor.js"; + +describe("inferWeightFromSubfamily", () => { + // The concatenated forms were always handled. The spaced and hyphenated + // forms were the bug Copilot flagged on PR #987 — "Extra Light" used to + // fall through to the 400 default before the whitespace-normalization fix. + describe("concatenated forms (already handled)", () => { + it.each([ + ["Thin", 100], + ["ExtraLight", 200], + ["UltraLight", 200], + ["Light", 300], + ["Regular", 400], + ["Medium", 500], + ["SemiBold", 600], + ["DemiBold", 600], + ["Bold", 700], + ["ExtraBold", 800], + ["UltraBold", 800], + ["Black", 900], + ["Heavy", 900], + ])("%s → %d", (subfamily, expected) => { + expect(inferWeightFromSubfamily(subfamily)).toBe(expected); + }); + }); + + describe("spaced forms (the bug fix)", () => { + it.each([ + ["Extra Light", 200], + ["Ultra Light", 200], + ["Semi Bold", 600], + ["Demi Bold", 600], + ["Extra Bold", 800], + ["Ultra Bold", 800], + ])("%s → %d", (subfamily, expected) => { + expect(inferWeightFromSubfamily(subfamily)).toBe(expected); + }); + }); + + describe("hyphenated forms (the bug fix)", () => { + it.each([ + ["Extra-Light", 200], + ["Semi-Bold", 600], + ["Extra-Bold", 800], + ])("%s → %d", (subfamily, expected) => { + expect(inferWeightFromSubfamily(subfamily)).toBe(expected); + }); + }); + + describe("composite styles", () => { + it("Bold Italic still detects Bold", () => { + expect(inferWeightFromSubfamily("Bold Italic")).toBe(700); + }); + it("Semi Bold Italic still detects SemiBold (priority over Bold)", () => { + expect(inferWeightFromSubfamily("Semi Bold Italic")).toBe(600); + }); + it("ExtraBold Italic still detects ExtraBold (priority over Bold)", () => { + expect(inferWeightFromSubfamily("ExtraBold Italic")).toBe(800); + }); + }); + + it("unknown subfamily falls back to 400 (Regular)", () => { + expect(inferWeightFromSubfamily("Headline")).toBe(400); + expect(inferWeightFromSubfamily("")).toBe(400); + expect(inferWeightFromSubfamily("Some Random Style")).toBe(400); + }); + + it("is case-insensitive", () => { + expect(inferWeightFromSubfamily("EXTRA LIGHT")).toBe(200); + expect(inferWeightFromSubfamily("extra light")).toBe(200); + expect(inferWeightFromSubfamily("ExTrA LiGhT")).toBe(200); + }); +}); + +describe("canonicalizeFamily", () => { + it("returns family unchanged when no weight token is trailing", () => { + expect(canonicalizeFamily("Inter")).toEqual({ + canonical: "Inter", + inferredWeight: null, + }); + expect(canonicalizeFamily("Tiempos Headline")).toEqual({ + canonical: "Tiempos Headline", + inferredWeight: null, + }); + expect(canonicalizeFamily("Söhne Breit")).toEqual({ + canonical: "Söhne Breit", + inferredWeight: null, + }); + }); + + it("strips trailing weight tokens and surfaces the implied weight", () => { + expect(canonicalizeFamily("Inter Medium")).toEqual({ + canonical: "Inter", + inferredWeight: 500, + }); + expect(canonicalizeFamily("Inter Light")).toEqual({ + canonical: "Inter", + inferredWeight: 300, + }); + expect(canonicalizeFamily("Inter Bold")).toEqual({ + canonical: "Inter", + inferredWeight: 700, + }); + expect(canonicalizeFamily("Funnel Display Light")).toEqual({ + canonical: "Funnel Display", + inferredWeight: 300, + }); + }); + + it("preserves width modifiers before the weight token", () => { + expect(canonicalizeFamily("Inter Tight Medium")).toEqual({ + canonical: "Inter Tight", + inferredWeight: 500, + }); + }); + + it("emits 950 for ExtraBlack / UltraBlack (mirrors foundry intent)", () => { + expect(canonicalizeFamily("Inter ExtraBlack")).toEqual({ + canonical: "Inter", + inferredWeight: 950, + }); + }); + + it("returns empty input unchanged", () => { + expect(canonicalizeFamily("")).toEqual({ + canonical: "", + inferredWeight: null, + }); + }); +}); + +describe("extractFontMetadata", () => { + // Light integration tests against the public surface — uses a real + // temp directory and verifies the manifest shape. Doesn't require + // fixture font binaries; the non-existent and empty-directory cases + // exercise the happy paths for the surrounding pipeline. + + it("returns an empty manifest when the fonts directory doesn't exist", () => { + const tmp = mkdtempSync(join(tmpdir(), "hf-font-test-")); + try { + const outputPath = join(tmp, "manifest.json"); + const manifest = extractFontMetadata(join(tmp, "does-not-exist"), outputPath); + expect(manifest.files).toEqual([]); + expect(manifest.families).toEqual([]); + expect(manifest.unidentified).toEqual([]); + expect(existsSync(outputPath)).toBe(true); + const written = JSON.parse(readFileSync(outputPath, "utf-8")) as typeof manifest; + expect(written.files).toEqual([]); + expect(written.meta.tool).toBe("fontkit"); + expect(typeof written.meta.generatedAt).toBe("string"); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it("writes a manifest with the documented meta shape", () => { + const tmp = mkdtempSync(join(tmpdir(), "hf-font-test-")); + try { + const outputPath = join(tmp, "manifest.json"); + const manifest = extractFontMetadata(tmp, outputPath); + expect(manifest.meta.tool).toBe("fontkit"); // no version hardcoded — moves with the dep + // generatedAt is an ISO string + expect(manifest.meta.generatedAt).toMatch(/^\d{4}-\d{2}-\d{2}T/); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); +}); diff --git a/packages/cli/src/capture/fontMetadataExtractor.ts b/packages/cli/src/capture/fontMetadataExtractor.ts new file mode 100644 index 000000000..121cbe1c9 --- /dev/null +++ b/packages/cli/src/capture/fontMetadataExtractor.ts @@ -0,0 +1,339 @@ +/** + * Extract font metadata from downloaded font files. + * + * Modern web frameworks (Next.js, Webpack) rename fonts with content hashes for + * cache-busting, leaving downloaded files like `19cfc7226ec3afaa-s.woff2` with + * no human-readable identification. The CSS @font-face mapping that originally + * tied each hash back to a family name is often lost during capture. + * + * Every OpenType / WOFF / WOFF2 file embeds a `name` table (part of the spec + * since 1996) containing the family, subfamily, full name, PostScript name, + * weight class, and variation axes. Subsetting and hashing do not strip it. + * This extractor uses `fontkit` to read the name table from each downloaded + * font and writes a manifest the rest of the pipeline can consult instead of + * guessing from filename patterns. + * + * Output: extracted/fonts-manifest.json with per-file metadata + per-family + * aggregation. See FontsManifest type for shape. + */ + +import { readdirSync, readFileSync, writeFileSync, existsSync } from "node:fs"; +import { join } from "node:path"; +import * as fontkit from "fontkit"; +import type { Font, FontCollection } from "fontkit"; + +function isFontCollection(value: Font | FontCollection): value is FontCollection { + return value.type === "TTC" || value.type === "DFont"; +} + +export interface FontFileMetadata { + /** Filename relative to capture/assets/fonts/ (e.g. "19cfc7226ec3afaa-s.woff2") */ + file: string; + /** + * Canonical family name. Many static-weight font files package each weight as + * a separate "family" in nameID 1 (e.g. "Inter Medium" instead of "Inter"). + * This field strips trailing weight tokens so multiple weights of the same + * typographic family aggregate cleanly. See rawFamily for the unmodified value. + */ + family: string; + /** + * Raw family name as extracted, before canonicalization. Source precedence: + * 1. OpenType `name` table (nameID 16 if present, else nameID 1) + * 2. Fallback: derived from the PostScript name (nameID 6) before the first + * `-` (e.g. PostScript "Inter-Regular" → "Inter") + * Empty string when both the name table and PostScript name are absent + * (i.e. when `identified` is false). + */ + rawFamily: string; + /** Subfamily / style name from nameID 17 or 2 (e.g. "Regular", "Bold Italic") */ + subfamily: string; + /** PostScript name from nameID 6 (e.g. "Inter-Regular") */ + postscript: string; + /** + * Weight value. Typically the OS/2 `usWeightClass` (100–900) when present. + * Other values you may see: + * - `0`: returned when the file is `identified: false` (no name-table data + * to infer from); treat as unknown. + * - `950`: emitted by the family-name canonicalization when a foundry + * packaged "ExtraBlack" or "UltraBlack" as its own family. This is + * outside the 100-900 standard range but mirrors the foundry intent. + * For variable fonts, this is the file's default axis position — see + * `variationAxes` for the available `wght` range. + */ + weight: number; + /** "normal" or "italic" — derived from subfamily and OS/2 fsSelection */ + style: "normal" | "italic"; + /** If this is a variable font, the axes present (e.g. ["wght", "slnt"]). Empty for static fonts. */ + variationAxes: string[]; + /** Whether identification came from the binary name table (the trustworthy source). */ + identified: boolean; +} + +export interface FontFamilySummary { + /** Family name */ + family: string; + /** Distinct weights captured (from OS/2 weight class — for variable fonts shows the default) */ + weights: number[]; + /** Whether any file in this family is a variable font */ + variable: boolean; + /** Number of files in this family (typically subsets of the same weight) */ + fileCount: number; + /** Files in this family — useful for picking the @font-face src */ + files: string[]; +} + +export interface FontsManifest { + /** Per-file metadata, one entry per downloaded font */ + files: FontFileMetadata[]; + /** Aggregated per-family summary — most useful for DESIGN.md authoring */ + families: FontFamilySummary[]; + /** Files where identification failed entirely. Should be empty for typical captures. */ + unidentified: string[]; + /** Generated-at timestamp + tool version for debugging */ + meta: { generatedAt: string; tool: string }; +} + +/** + * Read all font files in fontsDir, extract metadata via fontkit, and write + * the manifest to outputPath. Returns the manifest in case callers want to log it. + * + * Failures are non-fatal: if a single font's name table is missing or corrupt, + * the file is added to `unidentified` and the rest continue. If the fonts + * directory doesn't exist, returns an empty manifest without throwing. + */ +export function extractFontMetadata(fontsDir: string, outputPath: string): FontsManifest { + const files: FontFileMetadata[] = []; + const unidentified: string[] = []; + + if (existsSync(fontsDir)) { + const fontFiles = readdirSync(fontsDir).filter((f) => /\.(woff2?|ttf|otf)$/i.test(f)); + for (const filename of fontFiles) { + const fullPath = join(fontsDir, filename); + const meta = readSingleFont(fullPath, filename); + if (meta.identified) { + files.push(meta); + } else { + files.push(meta); + unidentified.push(filename); + } + } + } + + const families = aggregateFamilies(files); + + const manifest: FontsManifest = { + files, + families, + unidentified, + meta: { + generatedAt: new Date().toISOString(), + // Record just the tool name; the version moves with the dep and would + // otherwise drift from a hardcoded string on every fontkit bump. + tool: "fontkit", + }, + }; + + writeFileSync(outputPath, JSON.stringify(manifest, null, 2), "utf-8"); + return manifest; +} + +// fallow-ignore-next-line complexity +function readSingleFont(fullPath: string, filename: string): FontFileMetadata { + const empty: FontFileMetadata = { + file: filename, + family: "", + rawFamily: "", + subfamily: "", + postscript: "", + weight: 0, + style: "normal", + variationAxes: [], + identified: false, + }; + + try { + const buf = readFileSync(fullPath); + // fontkit.create returns Font | FontCollection. For TTC/DFont collections, + // take the first font inside; otherwise the value is already a single Font. + const created: Font | FontCollection = fontkit.create(buf); + const font: Font | undefined = isFontCollection(created) ? created.fonts[0] : created; + if (!font) return empty; + + const rawFamily = (font.familyName || "").trim(); + const subfamily = (font.subfamilyName || "").trim(); + const postscript = (font.postscriptName || "").trim(); + const fsSelection = font["OS/2"]?.fsSelection; + const italicBit = Boolean(fsSelection?.italic || fsSelection?.oblique); + const style: "normal" | "italic" = + italicBit || /italic|oblique/i.test(subfamily) ? "italic" : "normal"; + const variationAxes = font.variationAxes ? Object.keys(font.variationAxes) : []; + + if (!rawFamily && !postscript) return empty; // name table empty — cannot identify + + const familyForCanonicalization = rawFamily || deriveFamilyFromPostscript(postscript); + const { canonical, inferredWeight } = canonicalizeFamily(familyForCanonicalization); + const weight = + font["OS/2"]?.usWeightClass ?? inferredWeight ?? inferWeightFromSubfamily(subfamily); + + return { + file: filename, + family: canonical || familyForCanonicalization, + rawFamily: familyForCanonicalization, + subfamily, + postscript, + weight, + style, + variationAxes, + identified: true, + }; + } catch { + return empty; + } +} + +/** Aggregate per-file entries into per-family summaries — most useful shape for DESIGN.md. */ +// fallow-ignore-next-line complexity +function aggregateFamilies(files: FontFileMetadata[]): FontFamilySummary[] { + const byFamily = new Map(); + for (const f of files) { + if (!f.family) continue; + let entry = byFamily.get(f.family); + if (!entry) { + entry = { family: f.family, weights: [], variable: false, fileCount: 0, files: [] }; + byFamily.set(f.family, entry); + } + entry.fileCount++; + entry.files.push(f.file); + if (f.variationAxes.length > 0) entry.variable = true; + if (f.weight && !entry.weights.includes(f.weight)) entry.weights.push(f.weight); + } + for (const entry of byFamily.values()) { + entry.weights.sort((a, b) => a - b); + entry.files.sort(); + } + return Array.from(byFamily.values()).sort((a, b) => a.family.localeCompare(b.family)); +} + +/** + * PostScript names follow the convention `Family-Style`. When the family name + * record (nameID 1) is missing but PostScript is present, recover the family + * portion as a best-effort fallback. + */ +function deriveFamilyFromPostscript(postscript: string): string { + if (!postscript) return ""; + const dashIdx = postscript.indexOf("-"); + return (dashIdx > 0 ? postscript.slice(0, dashIdx) : postscript).trim(); +} + +/** + * Fallback when OS/2 table is missing — guess weight from "Bold", "Light", etc. + * + * Normalizes spaces and hyphens out of the subfamily before matching so that + * fonts using spaced names ("Extra Light", "Semi Bold") or hyphenated names + * ("Extra-Light", "Semi-Bold") resolve to the same weight as the concatenated + * forms ("ExtraLight", "SemiBold"). Without this, a font subfamily of + * "Extra Light" would fall through every concat check and end at the 400 + * default, misreporting a 200-weight font as 400. + * + * Exported for unit testing. + */ +// fallow-ignore-next-line complexity +export function inferWeightFromSubfamily(subfamily: string): number { + const s = subfamily.toLowerCase().replace(/[\s-]+/g, ""); + if (s.includes("thin")) return 100; + if (s.includes("extralight") || s.includes("ultralight")) return 200; + if (s.includes("light")) return 300; + if (s.includes("medium")) return 500; + if (s.includes("semibold") || s.includes("demibold")) return 600; + if (s.includes("extrabold") || s.includes("ultrabold")) return 800; + if (s.includes("black") || s.includes("heavy")) return 900; + if (s.includes("bold")) return 700; + return 400; +} + +/** + * Map of trailing weight tokens found in family names (e.g. "Inter Medium" → + * "Inter") to their numeric OS/2 weight equivalent. Used to canonicalize family + * names when a foundry packaged each weight as a separate "family" instead of + * setting nameID 16 / 17 (Preferred Family / Subfamily). + * + * Conservative: only strips well-known English weight tokens. Width modifiers + * like "Tight", "Condensed", "Extended" are intentionally NOT stripped — they + * denote separate typographic families, not weight variants. Localized weight + * tokens (German "Fett", "Extrafett"; French "Maigre"; etc.) and abbreviations + * ("ExtBd", "ExtBlk") are not stripped either — the resulting family stays + * separate, which is an honest representation of what's in the file. + */ +const WEIGHT_TOKEN_TO_VALUE: Record = { + Thin: 100, + Hairline: 100, + ExtraLight: 200, + UltraLight: 200, + Light: 300, + Book: 400, + Regular: 400, + Normal: 400, + Medium: 500, + SemiBold: 600, + DemiBold: 600, + Bold: 700, + ExtraBold: 800, + UltraBold: 800, + Black: 900, + Heavy: 900, + ExtraBlack: 950, + UltraBlack: 950, +}; + +const WEIGHT_TOKEN_RE = new RegExp(`\\s+(${Object.keys(WEIGHT_TOKEN_TO_VALUE).join("|")})$`, "i"); + +/** + * Strip a trailing weight token from a family name and return both the + * canonicalized form and the weight value the stripped token implied. + * + * Examples: + * "Inter Medium" → { canonical: "Inter", inferredWeight: 500 } + * "Inter Tight Medium" → { canonical: "Inter Tight", inferredWeight: 500 } + * "Funnel Display Light" → { canonical: "Funnel Display", inferredWeight: 300 } + * "Tiempos Headline" → { canonical: "Tiempos Headline", inferredWeight: null } + * "Söhne Breit Extrafett" → { canonical: "Söhne Breit Extrafett", inferredWeight: null } + * + * Trailing "Italic"/"Oblique" is stripped before weight detection so families + * like "Inter Italic" or "Inter Medium Italic" canonicalize correctly. The + * italic flag is recovered separately from the OS/2 fsSelection bit, so no + * information is lost. + */ +// Exported for unit testing. +// fallow-ignore-next-line complexity +export function canonicalizeFamily(family: string): { + canonical: string; + inferredWeight: number | null; +} { + if (!family) return { canonical: family, inferredWeight: null }; + let result = family.trim(); + // Strip trailing "Italic" or "Oblique" first — handled by the style field. + result = result.replace(/\s+(Italic|Oblique)$/i, "").trim(); + // Normalize compound weight tokens written with a space ("Semi Bold" → "SemiBold") + // so the single-token matcher below catches them. Anchored to end-of-string to + // avoid touching family names that legitimately contain these words mid-string. + result = result.replace( + /\s+(Semi|Extra|Ultra|Demi)\s+(Bold|Black|Light)$/i, + (_, prefix: string, suffix: string) => ` ${capitalize(prefix)}${capitalize(suffix)}`, + ); + // Strip trailing weight token if any. + const match = result.match(WEIGHT_TOKEN_RE); + if (match && match[1]) { + // Look up the canonical (case-sensitive) key for the matched token. + const matchedKey = Object.keys(WEIGHT_TOKEN_TO_VALUE).find( + (k) => k.toLowerCase() === match[1]!.toLowerCase(), + ); + const inferredWeight = matchedKey ? WEIGHT_TOKEN_TO_VALUE[matchedKey]! : null; + result = result.slice(0, result.length - match[0].length).trim(); + return { canonical: result, inferredWeight }; + } + return { canonical: result, inferredWeight: null }; +} + +function capitalize(s: string): string { + return s.length === 0 ? s : s[0]!.toUpperCase() + s.slice(1).toLowerCase(); +} diff --git a/packages/cli/src/capture/index.ts b/packages/cli/src/capture/index.ts index a3f6ca1a6..d5f59a1ad 100644 --- a/packages/cli/src/capture/index.ts +++ b/packages/cli/src/capture/index.ts @@ -16,6 +16,7 @@ import { extractHtml } from "./htmlExtractor.js"; // captureScreenshots removed — full-page screenshot replaces per-section shots import { extractTokens } from "./tokenExtractor.js"; import { downloadAssets, downloadAndRewriteFonts } from "./assetDownloader.js"; +import { extractFontMetadata } from "./fontMetadataExtractor.js"; // briefGenerator.ts, visual-style, capture-summary removed — DESIGN.md replaces them import { setupAnimationCapture, @@ -419,6 +420,33 @@ export async function captureWebsite( // Download fonts and rewrite URLs to local paths extracted.headHtml = await downloadAndRewriteFonts(extracted.headHtml, outputDir); + // Identify each downloaded font by reading its OpenType name table. + // Modern frameworks hash font filenames; this manifest tells the + // downstream pipeline (DESIGN.md authoring, beat sub-agents) which file + // belongs to which family without guessing from filename patterns. + try { + const fontsManifest = extractFontMetadata( + join(outputDir, "assets", "fonts"), + join(outputDir, "extracted", "fonts-manifest.json"), + ); + if (fontsManifest.families.length > 0) { + const summary = fontsManifest.families + .map((f) => `${f.family}${f.variable ? " (variable)" : ""} × ${f.fileCount}`) + .join(", "); + console.log(`Font metadata extracted: ${summary}`); + if (fontsManifest.unidentified.length > 0) { + console.warn( + ` ${fontsManifest.unidentified.length} font file(s) could not be identified — DESIGN.md should flag these explicitly.`, + ); + } + } + } catch (err) { + console.warn( + "Font metadata extraction failed (non-fatal):", + err instanceof Error ? err.message : err, + ); + } + // Save animation catalog — lean version for the agent (not 745 raw CSS declarations) if (animationCatalog) { // Extract just what's useful: counts, named animations, a few representative keyframed entries