diff --git a/.github/workflows/monthly-vision-eval.yml b/.github/workflows/monthly-vision-eval.yml index fe9a6b7..6b38e03 100644 --- a/.github/workflows/monthly-vision-eval.yml +++ b/.github/workflows/monthly-vision-eval.yml @@ -12,18 +12,18 @@ # # Why monthly not weekly: vision scoring costs real money # (Anthropic vision API, billed per image input token). At Haiku -# defaults ~$0.001/image; 600 text images + 40 image-gen images -# ~= $0.60-$1.00/run. Monthly cadence keeps the annual bill +# defaults ~$0.001/image; 300 text images + 40 image-gen images +# ~= $0.30-$0.40/run. Monthly cadence keeps the annual bill # bounded and still catches drift well ahead of any six-month # signal loss. # -# Budget breakdown: -# - Text gen (600 samples): free (CF OSS) -# - Text vision (600 images): ~$0.60 +# Budget breakdown (5 models, n=30, raw + compiled = 300 samples): +# - Text gen (300 samples): free (CF OSS) +# - Text vision (300 images): ~$0.30 # - Image gen (40 images): free (CF free tier, <10k # neurons/day) # - Image vision (40 images): ~$0.04 -# Total: ~$0.60-$1 / month +# Total: ~$0.30-$0.40 / month # # Secrets required on the repo: # - CF_API_TOKEN + CF_ACCOUNT_ID: text generation via CF Workers AI @@ -105,6 +105,7 @@ jobs: --brief briefs/landing.yml \ --models "$MODELS" \ --n "$N" \ + --sample-concurrency 6 \ --out evals \ --report "docs/evals/monthly/${REPORT_DATE}-source.md" echo "REPORT_DATE=$REPORT_DATE" >> "$GITHUB_ENV" diff --git a/.github/workflows/weekly-eval.yml b/.github/workflows/weekly-eval.yml index 4c38d84..2893d77 100644 --- a/.github/workflows/weekly-eval.yml +++ b/.github/workflows/weekly-eval.yml @@ -67,7 +67,7 @@ jobs: CF_API_TOKEN: ${{ secrets.CF_API_TOKEN }} CF_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID }} run: | - # Five OSS cells at n=30 = 300 raw + 300 compiled = 600 calls. + # Five OSS models at n=30 = 150 raw + 150 compiled = 300 calls. # Well within CF Workers AI free tier (10k neurons/day). If the # model list needs updating, bump the comma list below; the # workflow is intentionally explicit rather than auto-discovered @@ -81,6 +81,7 @@ jobs: --brief briefs/landing.yml \ --models cf:@cf/google/gemma-4-26b-a4b-it,cf:@cf/meta/llama-4-scout-17b-16e-instruct,cf:@cf/mistralai/mistral-small-3.1-24b-instruct,cf:@cf/openai/gpt-oss-120b,cf:@cf/qwen/qwen3-30b-a3b-fp8 \ --n 30 \ + --sample-concurrency 6 \ --out "$OUT_DIR" \ --report "$REPORT_PATH" echo "REPORT_PATH=$REPORT_PATH" >> "$GITHUB_ENV" diff --git a/docs/artwork/stickers/02-rule-final.svg b/docs/artwork/stickers/02-rule-final.svg new file mode 100644 index 0000000..8b2ade7 --- /dev/null +++ b/docs/artwork/stickers/02-rule-final.svg @@ -0,0 +1,377 @@ + + + + + + + + + + + + Get Started → + + + + + no-vibes-buttons + + + + AHD + Artificial Human Design + + ahd.adastra.computer + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/artwork/stickers/02-rule-print.pdf b/docs/artwork/stickers/02-rule-print.pdf new file mode 100644 index 0000000..23a936b Binary files /dev/null and b/docs/artwork/stickers/02-rule-print.pdf differ diff --git a/docs/artwork/stickers/02-rule-print.png b/docs/artwork/stickers/02-rule-print.png new file mode 100644 index 0000000..3b3fef6 Binary files /dev/null and b/docs/artwork/stickers/02-rule-print.png differ diff --git a/docs/artwork/stickers/02-rule-print.svg b/docs/artwork/stickers/02-rule-print.svg new file mode 100644 index 0000000..7e9c4bf --- /dev/null +++ b/docs/artwork/stickers/02-rule-print.svg @@ -0,0 +1,2389 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/artwork/stickers/03-manifest-final.svg b/docs/artwork/stickers/03-manifest-final.svg new file mode 100644 index 0000000..26b0c33 --- /dev/null +++ b/docs/artwork/stickers/03-manifest-final.svg @@ -0,0 +1,379 @@ + + + + + + + + + + rules/no-vibes-buttons.yml + + + + + id: no-vibes-buttons + level: error + family: editorial + applies_to: + - token_pack: !default + reason: > + purple gradient CTAs + are the AI-design tell. + + + + + + + AHD + Artificial Human Design + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/artwork/stickers/03-manifest-print.pdf b/docs/artwork/stickers/03-manifest-print.pdf new file mode 100644 index 0000000..86e1622 Binary files /dev/null and b/docs/artwork/stickers/03-manifest-print.pdf differ diff --git a/docs/artwork/stickers/03-manifest-print.png b/docs/artwork/stickers/03-manifest-print.png new file mode 100644 index 0000000..bb820c8 Binary files /dev/null and b/docs/artwork/stickers/03-manifest-print.png differ diff --git a/docs/artwork/stickers/03-manifest-print.svg b/docs/artwork/stickers/03-manifest-print.svg new file mode 100644 index 0000000..601d3fd --- /dev/null +++ b/docs/artwork/stickers/03-manifest-print.svg @@ -0,0 +1,2443 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/artwork/stickers/README.md b/docs/artwork/stickers/README.md new file mode 100644 index 0000000..ae21f0c --- /dev/null +++ b/docs/artwork/stickers/README.md @@ -0,0 +1,42 @@ +# Stickers + +Conference-distribution stickers for AHD. Two directions, both 3" × 3" die-cut matte vinyl, pure black on natural cream. + +## Files + +| File | Purpose | +| --- | --- | +| `02-rule-final.svg` | Editable source for *The Rule*. Live `` elements, font references intact. | +| `02-rule-print.svg` | Production. Text outlined to paths. No font dependency. | +| `02-rule-print.pdf` | Production. Upload to printer. Vector, 3" page. | +| `02-rule-print.png` | Preview only. 900 × 900 px (= 3" at 300 DPI). | +| `03-manifest-final.svg` | Editable source for *The Manifest*. | +| `03-manifest-print.svg` | Production. Text outlined. | +| `03-manifest-print.pdf` | Production. Upload to printer. | +| `03-manifest-print.png` | Preview only. | + +QR codes encode `https://ahd.adastra.computer` and are scanned-verified. + +## Regenerating from sources + +If you edit either `-final.svg`, regenerate the `-print` outputs with Inkscape under nix-shell so fonts resolve correctly: + +```sh +nix-shell -E 'with import {}; let + fontsConf = makeFontsConf { fontDirectories = [ inter jetbrains-mono ]; }; +in mkShell { + buildInputs = [ inkscape fontconfig ]; + shellHook = "export FONTCONFIG_FILE=" + fontsConf; +}' --run ' +for stem in 02-rule-final 03-manifest-final; do + out="${stem%-final}-print" + inkscape "${stem}.svg" --export-text-to-path --export-plain-svg --export-type=svg --export-filename="${out}.svg" + inkscape "${out}.svg" --export-type=pdf --export-filename="${out}.pdf" + inkscape "${out}.svg" --export-type=png --export-dpi=300 --export-filename="${out}.png" +done +' +``` + +## Font note + +The editable sources reference Inter (SIL OFL) and JetBrains Mono (Apache 2.0). Both are free for commercial print. The live AHD site uses Neue Haas Grotesk (Linotype, paid); if you have a license, swap `font-family` in the `-final.svg` files before regenerating. diff --git a/src/eval/runners/openai.ts b/src/eval/runners/openai.ts index 89f3b2c..4b470d4 100644 --- a/src/eval/runners/openai.ts +++ b/src/eval/runners/openai.ts @@ -19,9 +19,20 @@ export function openaiRunner(options: { * fields to the underlying model as extra generation params. */ extraBody?: Record; + /** + * Per-request wall-clock cap in milliseconds. A request that hasn't + * resolved by this point is aborted so it surfaces as a caught error + * (the caller writes a `.error.txt` and moves on) rather than hanging + * the whole run. Without it a single stalled upstream connection + * blocks a serial eval forever — the failure mode that silently ate + * the full 60-minute CI ceiling. Defaults to 120s, comfortably above + * the slowest legitimate generation (~25-30s) observed on CF. + */ + timeoutMs?: number; }): ModelRunner { const model = options.model ?? "gpt-5"; const baseURL = options.baseURL ?? "https://api.openai.com/v1"; + const timeoutMs = options.timeoutMs ?? 120_000; return { id: model, provider: "openai", @@ -31,20 +42,29 @@ export function openaiRunner(options: { if (input.systemPrompt) messages.push({ role: "system", content: input.systemPrompt }); messages.push({ role: "user", content: input.userPrompt }); - const res = await fetch(`${baseURL}/chat/completions`, { - method: "POST", - headers: { - Authorization: `Bearer ${options.apiKey}`, - "content-type": "application/json", - }, - body: JSON.stringify({ - model, - messages, - max_completion_tokens: input.maxTokens ?? 4096, - seed: input.seed, - ...(options.extraBody ?? {}), - }), - }); + let res: Response; + try { + res = await fetch(`${baseURL}/chat/completions`, { + method: "POST", + headers: { + Authorization: `Bearer ${options.apiKey}`, + "content-type": "application/json", + }, + body: JSON.stringify({ + model, + messages, + max_completion_tokens: input.maxTokens ?? 4096, + seed: input.seed, + ...(options.extraBody ?? {}), + }), + signal: AbortSignal.timeout(timeoutMs), + }); + } catch (err) { + if (err instanceof Error && err.name === "TimeoutError") { + throw new Error(`openai ${model}: request timed out after ${timeoutMs}ms`); + } + throw err; + } if (!res.ok) { throw new Error(`openai ${model}: ${res.status} ${await res.text()}`); } diff --git a/src/eval/runners/workers-ai.ts b/src/eval/runners/workers-ai.ts index 4721f22..3b52be4 100644 --- a/src/eval/runners/workers-ai.ts +++ b/src/eval/runners/workers-ai.ts @@ -8,7 +8,7 @@ export const WORKERS_AI_DEFAULTS = [ "@cf/qwen/qwq-32b", "@cf/qwen/qwen2.5-coder-32b-instruct", "@cf/mistralai/mistral-small-3.1-24b-instruct", - "@cf/google/gemma-3-12b-it", + "@cf/google/gemma-4-26b-a4b-it", ] as const; // Model-family-specific generation knobs. Cloudflare Workers AI's