diff --git a/.claude/scheduled_tasks.lock b/.claude/scheduled_tasks.lock
deleted file mode 100644
index a2cec532..00000000
--- a/.claude/scheduled_tasks.lock
+++ /dev/null
@@ -1 +0,0 @@
-{"sessionId":"f2210cba-a155-40b7-87ba-f76b37114205","pid":12996,"acquiredAt":1775866616902}
\ No newline at end of file
diff --git a/.claude/worktrees/semantic-poc b/.claude/worktrees/semantic-poc
deleted file mode 160000
index f1236cc0..00000000
--- a/.claude/worktrees/semantic-poc
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit f1236cc0216d1ee66c60497ec8e021ba2c100884
diff --git a/.gitignore b/.gitignore
index 4eb02ade..4b359265 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,9 @@
# Working session docs (internal notes)
docs/
+# Design exploration HTML mockups (local only)
+public/design-explorations/
+
# Node.js / Frontend
node_modules/
.pnpm-store/
@@ -55,3 +58,4 @@ scripts/.fonts/
# OG mode images: regenerate with `uv run python scripts/generate_mode_og_images.py --all`
# Committed to git (~5MB) since Render build env lacks Python/uv.
+.claude
diff --git a/TODO.md b/TODO.md
index ae84d4e0..82d395cf 100644
--- a/TODO.md
+++ b/TODO.md
@@ -448,3 +448,68 @@ const stats = computed(() => {
});
```
Then `calculateStats(key, max)` becomes `setStatsKey(key, max)` — just sets the refs, Vue handles the rest. Eliminates the entire class of "forgot to recalculate" bugs. Touches: `stores/stats.ts`, `composables/useGamePage.ts`, `pages/profile.vue`.
+
+### 16. Semantic Explorer: viewport-locked layout (like other game modes)
+The semantic page uses a scrollable layout (`overflow-y: auto` on `.semantic-body`) while every other game mode uses `h-[100dvh]` viewport-locked layout via `PageShell`. This causes:
+- Double scrollbar on short desktops (page scroll + browser scroll)
+- Map SVG overflows its `max-height` container because it renders at intrinsic 520px
+- `min-height: 280px` fights `max-height: calc(100dvh - 310px)` on short viewports
+- Input gets pushed below the fold
+
+**Proper fix:** Refactor `semantic.vue` to use viewport-locked layout like `PageShell`:
+- Left column (map + input): flex column, map grows to fill, input pinned to bottom
+- Right column (compass + hint + leaderboard): flex column with overflow scroll
+- No page-level scroll — everything fits in viewport
+- Expand button goes truly fullscreen (overlay), not just "fill the column"
+
+Current band-aid: `min-height: min(200px, calc(100dvh - 310px))` prevents `min-height` from exceeding viewport, but the SVG still overflows on short desktops. `:deep()` CSS hacks were tried and reverted because they broke the expanded map aspect ratio.
+
+---
+
+## DB Migration — Remove Disk Fallback Paths
+
+**Added**: 2026-04-12
+**Status**: Monitoring — disk fallback paths emit console.warn when hit
+
+Data has been migrated from Render's persistent disk to Postgres:
+- 253K definitions (77K kaikki native + 98K kaikki-en + 77K LLM, source/model provenance tracked)
+- 2.8K word stats
+- 50K embeddings with UMAP/PCA2D coordinates, 70 axes, 4.4M neighbor ranks
+- `model` column added to definitions table (gpt-5.2, wiktionary-kaikki-2024, legacy-unknown)
+
+### Phase 1: Remove disk fallback code (after 2 weeks stable, ~2026-04-26)
+
+- [ ] `server/utils/definitions.ts` — remove Tier 1 disk read, disk write, kaikki in-memory cache (`_kaikkiCache`, `loadKaikkiFile`, `lookupKaikki`, `resolveDefinitionsDir`, `DEFINITIONS_DIR`). Kaikki data is now in the `definitions` table with source='kaikki'/'kaikki-en'.
+- [ ] `server/utils/word-stats.ts` — remove disk read/write fallback + `proper-lockfile` dependency
+- [ ] `server/utils/wiktionary.ts` — remove `readCache`/`writeCache` disk functions
+- [ ] `server/api/[lang]/semantic/hint.post.ts` — remove disk read/write for hints
+- [ ] `server/utils/data-loader.ts` — remove `WORD_DEFS_DIR`, `WORD_STATS_DIR` exports
+- [ ] Remove `proper-lockfile` from package.json
+- [ ] Remove fs imports (`existsSync`, `readFileSync`, `writeFileSync`, `mkdirSync`) from all above files
+
+### Phase 2: Migrate remaining disk-dependent features
+
+- [ ] **Word history** → new DB table `(lang, day_idx, word)`. ~136K rows (80 langs × 1700 days). Eliminates 546MB of `.txt` files on Render disk and disk reads in `word-selection.ts`. Algorithm is deterministic but cache is a safety net against word list changes.
+- [ ] **Word images** → decide: keep on Render persistent disk ($0.40/month for 1.5GB), or move to Cloudflare R2 (free egress, ~$0.003/month for 204MB). Only feature still requiring the persistent disk. No urgency — current setup works.
+- [ ] **`semantic.ts` legacy in-memory loader** — `start.post.ts` and `word/[slug].get.ts` still import `loadSemanticData` which loads the 98MB embedding matrix. Migrate these two endpoints to use `_semantic-db.ts` (DB-backed), then delete `loadSemanticData`/`loadSemanticDataSafe`/`loadEmbeddings` and the entire in-memory path.
+
+### Phase 3: Remove committed heavy files from git
+
+- [ ] `data/semantic/embeddings.f32` + `embeddings.meta.json` (~99MB) — in pgvector
+- [ ] `data/semantic/embeddings.json` (~230MB if present) — in pgvector
+- [ ] `data/semantic/axes.json` — in `semantic_axes` table
+- [ ] `data/semantic/umap.json`, `pca2d.json` — in `word_embeddings` columns
+- [ ] `data/semantic/targets.json`, `vocabulary.json` — queryable from `word_embeddings`
+- [ ] Keep `data/semantic/valid_words.json` (loaded into memory for spellcheck, no DB table)
+- [ ] Keep `data/definitions/` as archive (kaikki data now in DB, but files are small and useful for re-seeding)
+
+### 17. Semantic Explorer OG image
+Design and add `public/images/og-semantic.png` (1200x630) showing the meaning map with dots, compass needle, and the editorial aesthetic. Currently falls back to generic `og-image.png`.
+
+### 18. Semantic best starting words
+Add semantic-specific content to the `/en/best-starting-words` page — tips for first guesses in semantic mode (broad category words, high-information starters). Could be a separate section or tab.
+
+### 19. useGameSeo refactor
+- Silent 60-char truncation: configured titles are dropped without warning when too long. Should either warn at build time or use the configured title regardless.
+- Hardcoded `| Wordle English` suffix: not all modes benefit from Wordle brand. Add configurable suffix per mode.
+- No length validation at config time — easy to write titles/descriptions that get silently truncated.
diff --git a/assets/css/design-system.css b/assets/css/design-system.css
index 2ff1e696..db2cf680 100644
--- a/assets/css/design-system.css
+++ b/assets/css/design-system.css
@@ -230,6 +230,25 @@
border-color: var(--color-ink);
}
+/* Thin editorial scrollbar — used on modals, leaderboards, and any scrollable panel */
+.editorial-scroll {
+ scrollbar-width: thin;
+ scrollbar-color: var(--color-rule) transparent;
+}
+.editorial-scroll::-webkit-scrollbar {
+ width: 4px;
+}
+.editorial-scroll::-webkit-scrollbar-track {
+ background: transparent;
+}
+.editorial-scroll::-webkit-scrollbar-thumb {
+ background: var(--color-rule);
+ border-radius: 2px;
+}
+.editorial-scroll::-webkit-scrollbar-thumb:hover {
+ background: var(--color-muted);
+}
+
/* Flag icon — consistent circular flag display */
.flag-icon {
width: 24px;
diff --git a/components/LbAvatar.vue b/components/LbAvatar.vue
new file mode 100644
index 00000000..bc941818
--- /dev/null
+++ b/components/LbAvatar.vue
@@ -0,0 +1,41 @@
+
+
+
diff --git a/components/app/GameModePicker.vue b/components/app/GameModePicker.vue
index e6cf080c..e60e9b29 100644
--- a/components/app/GameModePicker.vue
+++ b/components/app/GameModePicker.vue
@@ -4,14 +4,19 @@
size="lg"
align="top"
no-padding
- :aria-label="`Choose a game mode for ${languageName}`"
+ :aria-label="ui?.choose_game_mode || 'Choose a Game Mode'"
@close="$emit('close')"
>
-
Choose a Game Mode
+
+ {{ ui?.choose_game_mode || 'Choose a Game Mode' }}
+
- Different ways to play — same language, new challenges.
+ {{
+ ui?.game_mode_subtitle ||
+ 'Different ways to play — same language, new challenges.'
+ }}
- Your compass will appear after your first guess.
+ {{ ui?.semantic_compass_empty }}
- No clear bearing
+ {{ ui?.semantic_compass_no_bearing }}
- The compass can't place this one on a known axis — try a word from a different
- corner of meaning.
+ {{
+ ui?.semantic_compass_no_bearing_sub ||
+ "The compass can't place this one on a known axis — try a word from a different corner of meaning."
+ }}
- Play Wordle
+ {{ ui.play_wordle || 'Play Wordle' }}
-
Or pick a language
+
+ {{ ui.or_pick_language || 'Or pick a language' }}
+
) || {};
-const coverageLabel = ui.coverage_label || 'Coverage';
+const coverageLabel = ui.coverage_label;
// SEO templates from language_config.json meta.best_starting_words (merged with defaults)
const meta = (pageData.value.meta as Record) || {};
diff --git a/pages/[lang]/semantic.vue b/pages/[lang]/semantic.vue
index ec566408..4bd69f94 100644
--- a/pages/[lang]/semantic.vue
+++ b/pages/[lang]/semantic.vue
@@ -2,7 +2,7 @@
/**
* Semantic Explorer — production game mode page.
*
- * Navigate meaning space to find a hidden target word in 15 guesses. Each
+ * Navigate meaning space to find a hidden target word. Each
* guess receives a rank (#1 = target) based on its cosine position in the
* 50k-word neighbour list. The map is target-centered with radius proportional
* to log-rank and angle from the UMAP 2D projection.
@@ -17,10 +17,11 @@ import { createGameConfig } from '~/utils/game-modes';
import MapFrame from '~/components/shared/MapFrame.vue';
import MeaningMap, { type MapDot } from '~/components/shared/MeaningMap.vue';
import { buildSemanticGradientFromCSS, sampleGradient } from '~/utils/semanticColor';
+import { interpolate } from '~/utils/interpolate';
definePageMeta({
layout: 'game',
- key: (route) => `${route.params.lang}-semantic-${route.query.play || 'daily'}`,
+ key: (route) => `${route.params.lang}-semantic-${route.query.play}`,
});
const route = useRoute();
@@ -103,10 +104,14 @@ const latestGuessWord = computed(() => {
);
});
+// --- UI strings ---
+const ui = computed(() => langStore.config?.ui);
+
// --- Header meta ---
-const headerTitle = computed(() => 'Semantic Explorer');
+const headerTitle = computed(() => ui.value?.semantic_title);
const headerSubtitle = computed(() => {
- if (isUnlimited.value) return `${configVal.name_native || lang} · Unlimited`;
+ const unlimitedLabel = ui.value?.semantic_unlimited;
+ if (isUnlimited.value) return `${configVal.name_native || lang} · ${unlimitedLabel}`;
return sem.dayIdx.value
? `${configVal.name_native || lang} · #${sem.dayIdx.value}`
: configVal.name_native || lang;
@@ -332,10 +337,11 @@ const { shareResults } = useGameShare();
async function onShare() {
const bestRank = sem.bestGuess.value?.rank;
const attemptsText = sem.won.value ? String(sem.guesses.value.length) : 'x';
+ const semanticTitle = ui.value?.semantic_title;
const shareText =
- `Semantic Explorer ${langStore.languageCode.toUpperCase()} #${sem.dayIdx.value}` +
+ `${semanticTitle} ${langStore.languageCode.toUpperCase()} #${sem.dayIdx.value}` +
` · ${sem.won.value ? `${sem.guesses.value.length}/${sem.maxGuesses.value}` : 'X/' + sem.maxGuesses.value}` +
- (bestRank ? `\nBest rank: #${bestRank.toLocaleString()}` : '');
+ (bestRank ? `\n${ui.value?.semantic_best_rank}: #${bestRank.toLocaleString()}` : '');
await shareResults({
shareText,
@@ -393,20 +399,24 @@ function onKeepPlaying() {
v-if="gameUnavailable"
class="flex flex-col items-center justify-center flex-1 px-6 py-20 text-center"
>
-
Semantic Explorer
+
+ {{ ui?.semantic_title }}
+
- This mode is temporarily unavailable — the word embedding data is being generated.
- Check back in a few minutes.
+ {{
+ ui?.semantic_unavailable ||
+ 'This mode is temporarily unavailable — the word embedding data is being generated. Check back in a few minutes.'
+ }}
+
+ All-time records across all languages and modes.
+
+
+ Consecutive days with any daily win. Play every day to climb.
+
+
+ Average score per day — higher is better. Min {{ minDays }} days played to
+ qualify.
+
+
+ Average guesses per day — lower is better. Min {{ minDays }} days played
+ to qualify.
+
+
+ Highest score wins. Points from words solved, combos, and speed.
+
+ Fewest guesses wins. Ties broken by who played first.
+
+
+
+
+
+
+
+
+
+
+ No records yet
+
+
+ Play more games to set records.
+
+
+
+
+
+ {{ rec.label }}
+
+
+ {{ rec.value }}
+
+
+
+
+ {{ rec.username }}
+
+
+
+
+
+
+
+
+
+
+ Sign in to compete
+
+
+ Join 1.6 million players worldwide. Create a free account to see where
+ you rank
+
+ among today's {{ total.toLocaleString() }} players.
+
+ on today's leaderboard.
+
+
+ 6.4M+ games played · 80 languages
+
+
+ Sign in
+
+
+
+
+
Today's rankings
+
+
+
+
+
+
+
+
+
+ New day, empty board
+
+
+ Be the first to solve today's word and claim the #1 spot.
+
+
+ Play now
+
+
+
+
+
+
+
+
+
+
+
+ You haven't played today
+
+
+ Solve today's puzzle to see where you rank
+
+
+ Play now
+
+
+
+
+
+
+
+
+
+
+
+
+
⋮
+
+
#{{ displayRank }}
+
+
+
+ {{ you.username }}
+ YOU
+
+
+ {{ you.daysPlayed }} days
+
+
+
+
+ {{ formatScore(you) }}
+
+
+ #{{ you.rank }} of {{ total.toLocaleString() }}
+
- How players discover, choose, and move between play types across every game mode, every surface, and every screen.
-
-
- April 2026
- ·
- Wordle Global
-
-
-
-
-
-
-
01
-
The Problem
-
Daily and unlimited are tangled into the mode list
-
-
- Today the sidebar lists 10 game modes as flat items. Five of those are
- multi-board variants (Dordle through Duotrigordle) that differ only in board count.
- "Daily Puzzle" and "Unlimited" are separate modes, but they're actually
- play types of the same Classic game.
-
-
- As we add daily variants to multi-board modes, Speed, and unlimited to Semantic,
- the sidebar would double. The conceptual model is wrong: Dordle Daily and Dordle
- Unlimited aren't different games. The navigation should reflect that.
-
-
-
- Core insight: the game mode matrix has two independent axes —
- board type (Classic, Multi-Board, Semantic, Speed) and play type
- (Daily, Unlimited). The sidebar conflates them. Untangle them.
-
-
-
What needs to work:
-
- • Player finishes daily Quordle → discovers unlimited Quordle exists
- • Sidebar shows 5 items, not 14
- • Archive shows daily history for every mode at /en/archive
- • One product-wide streak across all daily modes and languages
- • Best Starting Words, Word Explorer, and Archive are discoverable from sidebar
- • Language switching works via modal, not sidebar navigation
-
-
-
-
-
-
-
-
02
-
Taxonomy
-
What gets daily, what gets unlimited, what stays as-is
-
-
-
Board Type
Play Types
Sidebar
Notes
-
-
-
Classic
-
DailyUnlim
-
2 top-level rows (legacy)
-
"Unlimited" has its own identity. Don't merge.
-
-
-
Speed Streak
-
Both
-
1 row, expandable
-
Daily: same word sequence for everyone — comparable scores/leaderboard. Unlimited: random.
-
-
-
Semantic
-
Both
-
1 row, expandable
-
Daily: deterministic target per day. Unlimited: random target.
5 modes → 1 row. Click opens board picker modal with play type selection.
-
-
-
-
-
- Result: sidebar goes from 10 Play items to 5
- (Daily Puzzle, Unlimited, Speed, Multi-Board, Semantic). A new Learn
- section adds Best Starting Words, Word Explorer, and Archive.
-
-
-
-
-
-
-
-
03
-
Design Principles
-
Rules that resolve ambiguity
-
-
1. Play type is chosen before the game, not during.
- No in-game toggle. No config bar. The subtitle says which type you're in.
-
-
2. Daily is the default. Unlimited is the upsell.
- Clicking any mode goes to daily (when available). Unlimited is discovered through
- post-game CTAs and sidebar expansion.
-
-
3. Post-game is the highest-conversion entry point.
- Three surfaces: stats modal, keyboard flip panel, scroll-down section. All three
- cross-pollinate between daily and unlimited.
-
-
4. One product-wide streak.
- Playing ANY daily mode in ANY language continues your streak. You don't need to play
- the same mode every day. Unlimited has no streak — it has play count and win rate.
-
-
5. The archive is daily-only.
- Lives at /en/archive (301 redirect from /en/words).
- Mode filter tabs at the top. Unlimited games are ephemeral.
-
-
6. Hidden interactions reward power users.
- Tapping the header subtitle opens the sidebar with that mode's daily/unlimited expanded.
- No visual affordance — the cursor changes to pointer, and that's it.
-
-
7. Language switching is a modal, not navigation.
- Clicking the language item in the sidebar (or the flag in the header subtitle) opens
- a language picker modal. No page navigation, no losing game state.
-
-
-
-
-
-
-
04 / Surface
-
In-Game Header
-
Mode name leads. Language + play type below.
-
-
- H1: the game mode name ("Dordle", "Semantic Explorer", "Speed Streak").
- Subtitle: flag icon + language + play type indicator.
- Daily shows the day number: "English · #142".
- Unlimited shows the label in accent: "English · Unlimited".
-
-
-
Hidden interaction: tapping the subtitle opens the sidebar with the
- current mode's daily/unlimited sub-items expanded. No visual button — cursor: pointer
- is the only hint. Power users discover it; new users use the sidebar or post-game CTAs.
-
-
-
-
-
-
9:41•••
-
- ☰
-
-
Dordle
-
-
- English · #142
-
-
- ⚙
-
-
- 2 boards · game area
-
-
-
Daily — tap subtitle to open sidebar
-
-
-
-
9:41•••
-
- ☰
-
-
Dordle
-
-
- English · Unlimited
-
-
- ⚙
-
-
- 2 boards · game area
-
-
-
Unlimited — accent color, no day number
-
-
-
-
-
-
-
-
05 / Surface
-
Sidebar
-
5 Play items + Learn section. Multi-Board opens a picker modal.
-
-
- Multi-board modes collapse into one "Multi-Board" row. Clicking it opens
- the board picker modal (not inline expansion — 5 sub-items
- with daily/unlimited each would be too much inline). Speed and Semantic expand inline
- to show daily/unlimited sub-items.
-
-
- New Learn section: Best Starting Words, Word Explorer, Archive.
- These are content pages accessible from the sidebar but not game modes.
-
-
- Expand is 2 levels only (section → items). Sub-items are just
- indented text rows, no colored pills or badges. The active sub-item gets the same
- border-left: 3px indicator as any sidebar item. Expand/collapse
- uses useAutoHeight for a smooth vertical tween — sidebar width is
- fixed at 280px, so only height changes.
-
- Note on mockup icons: production uses Lucide SVG icons (Square, Infinity,
- Zap, Grid, Compass, etc.). The mockups below use Unicode placeholders for the same
- icon slots. The layout, spacing, and typography match the production sidebar exactly.
-
-
-
-
-
-
-
-
Wordle.Global
-
Play
-
-
■
-
Daily Puzzle
-
🔥 14
-
-
-
∞
-
Unlimited
-
-
-
⚡
-
Speed Streak
-
▶
-
-
-
▦▦
-
Multi-Board
-
▶
-
-
-
☉
-
Semantic
-
▶
-
-
Learn
-
★
Best Starting Words
-
📚
Word Explorer
-
📅
Archive
-
Language
-
-
🇬🇧
-
English
-
change
-
-
You
-
▤
Statistics
-
⚙
Settings
-
-
Collapsed — Learn section is new. Language opens modal.
The post-game experience has three surfaces that surface the daily/unlimited CTA:
-
-
A. Stats Modal
-
- Auto-opens after game over. Matches the existing 2-button pattern
- (bg-ink primary + border-ink secondary, side by side).
- After unlimited, the daily nudge is a subtle text link below the buttons, not a third button.
- Compare to Screen 03.
-
After unlimited — 2 buttons + subtle text link for daily nudge
-
-
-
-
-
B. Keyboard Flip Panel
-
- After game over, the keyboard flips to reveal a discovery panel. Currently shows
- mode suggestions. Add: "Keep playing: Unlimited Dordle" as a prominent
- card alongside other mode suggestions. This surface reaches players who dismiss the
- stats modal quickly.
-
-
-
C. Scroll-Down Section
-
- Below the game board, the SEO content area has FAQ, tips, recent words, and mode links.
- The "More Game Modes" section should surface daily/unlimited variants
- with contextual text: "Finished today's Dordle? Play unlimited rounds — same rules,
- random words." This is the lowest-friction surface (always visible, no modal).
-
-
-
-
-
-
-
-
07 / Surface
-
Homepage
-
Mode cards adapt to player state
-
-
- Each mode card shows available play types via small tags. Returning users see active
- game state with contextual CTAs. Compare to
- F1 (Homepage Hub) and
- Screen 01.
-
-
-
-
-
-
-
9:41•••
-
-
Wordle.Global
- ⚙
-
-
-
Game Modes
-
■
Classic
The original. 6 guesses.
Daily
🔥 14
-
▦▦
Multi-Board
2–32 boards at once
Daily·Unlim
-
☉
Semantic
Navigate meaning space
Daily·Unlim
-
⚡
Speed Streak
Race the clock
Daily·Unlim
-
-
-
New visitor — click → daily by default
-
-
-
-
9:41•••
-
-
Wordle.Global
- ⚙
-
-
-
Game Modes
-
■
Classic #1756
Solved ✓ · 3/6
🔥 14
-
▦▦
Dordle #142
In progress · 3/7
Continue →
-
☉
Semantic #99
Solved ✓
Play Unlimited →
-
⚡
Speed Streak
Race the clock
Daily·Unlim
-
-
-
Returning — cards show game state. Solved modes nudge unlimited.
-
-
-
-
-
-
-
-
08 / Surface
-
Board Picker Modal
-
Repurposed from GameModePicker. Opens when clicking "Multi-Board" in sidebar.
-
-
- The existing GameModePicker.vue modal evolves into a board picker.
- When clicking "Multi-Board" in the sidebar, this modal opens showing the 5 board
- options (Dordle through Duotrigordle). Each row shows the board name, board count,
- and Daily / Unlim tags.
- Clicking a tag navigates directly.
-
-
- This is better than inline sidebar expansion because 5 modes × 2 play types = 10
- sub-items would bloat the sidebar. The modal gives each option breathing room.
-
-
-
-
-
-
-
9:41•••
-
- ← Back
-
Multi-Board
- ✕
-
-
-
▦▦
Dordle
2 boards · 7 guesses
Daily·Unlim
-
▦×4
Quordle
4 boards · 9 guesses
Daily·Unlim
-
▦×8
Octordle
8 boards · 13 guesses
Daily·Unlim
-
▦16
Sedecordle
16 boards · 21 guesses
Daily·Unlim
-
▦32
Duotrigordle
32 boards · 37 guesses
Daily·Unlim
-
-
-
Board picker — click Daily or Unlim tag to navigate directly
-
-
-
-
-
-
-
-
09 / Surface
-
Language Picker Modal
-
Opens from sidebar or header flag. No page navigation.
-
-
- When in a game, clicking the language item in the sidebar (or the flag icon in the
- header subtitle) opens a modal showing the language grid. Selecting a language
- navigates to the same mode + same play type in the new language.
-
-
- The modal filters languages by what's available for the current mode (e.g. Semantic
- only shows English). On the homepage/landing page, the language picker remains
- inline (the flag grid is already there).
-
-
-
-
-
-
-
9:41•••
-
-
-
Choose Language
- ✕
-
-
-
-
-
-
🇬🇧
English
Current language
✓
-
🇫🇷
Français
French
-
🇩🇪
Deutsch
German
-
🇪🇸
Español
Spanish
-
🇮🇹
Italiano
Italian
-
+ 75 more languages
-
-
-
Language modal — search + grid. Selecting navigates to same mode in new language.
-
-
-
-
-
-
-
-
10 / System
-
Streaks
-
Product-wide. Any daily mode, any language.
-
-
- One global streak. Playing ANY daily mode in ANY language continues your
- streak. You don't need to play the same mode or language every day — just complete
- at least one daily game per calendar day.
-
-
-
- Example:
- Day 12: Classic English → streak continues
- Day 13: Dordle Finnish + Semantic English → streak continues
- Day 14: Quordle German → streak continues
- Day 15: (nothing) → streak breaks
-
-
-
- The streak badge in the header (🔥 14) shows one global number. The streak dropdown
- (see Streak exploration) can show a
- calendar heatmap with sub-info per day: which modes and languages were played.
-
-
-
-
Play Type
Streak
Win %
Games Played
Best Streak
-
-
Daily (any mode)
🔥 Product-wide
Per-mode
Per-mode
Product-wide
-
Unlimited
—
Per-mode
Per-mode (rounds)
—
-
-
-
-
- Per-mode stats still exist: win rate, games played, guess distribution.
- They just don't have their own streak. The streak is the one number that represents
- your overall engagement with Wordle Global.
-
-
-
-
-
-
-
-
11 / System
-
Archive
-
Daily-only. Per-mode. At /en/archive.
-
-
- URL:/en/archive is the canonical URL. /en/words
- gets a 301 redirect (preserve SEO equity from existing indexed pages).
-
-
- Mode filter tabs at the top: Classic, Dordle, Quordle, Octordle, Sedecordle, Semantic, Speed.
- Each tab shows that mode's daily word history with the player's result.
- Unlimited games don't appear — they're ephemeral.
-
-
- The existing archive page (/en/words) already has a good design —
- paginated word cards with mini tiles, definitions, AI art thumbnails, and community stats.
- Don't redesign the Classic archive. Changes:
-
-
- • URL rename:/en/archive canonical, 301 from /en/words
- • Mode filter tabs at the top: Classic, Dordle, Quordle, Semantic, Speed
- • When viewing "All" modes, each card gains a small mode label
-
-
-
Multi-board archive cards
-
- The Classic archive card shows 1 word with tiles + definition + art. That works for
- Classic, Semantic, and Speed (1 word per day). Multi-board modes need a different
- card format since one day has N words:
-
-
-
-
Mode
Words/Day
Archive Card Format
-
-
Classic
1
Existing card — word, tiles, definition, art. No change.
-
Semantic
1
Target word + best rank + guess count. Same card shape.
-
Speed
varies
Summary: "8 words solved in 5:00". No individual word cards.
-
Dordle
2
Two words side by side on one card. Compact tiles per word.
-
Quordle
4
2×2 mini grid. Word name + result per cell. No full tiles.
-
Octordle
8
Summary card: "Solved 7/8 boards". Expandable to show individual words.
-
Sedecordle
16
Summary card only. "Solved 14/16". Click to expand word list.
-
Duotrigordle
32
Summary card only. "Solved 28/32". Click to expand. No tile rendering — just word names + solved/failed.
-
-
-
-
- Principle: archive cards get progressively more summarized as board count increases.
- 1–2 words: show everything. 4 words: mini grid. 8+: summary with expand.
- The card never tries to render 32 sets of tiles — that's unreadable.
-
-
-
-
-
-
-
12 / System
-
Word Pages
-
Cross-mode daily history on every word detail page
-
-
- Each word detail page (/en/word/chase) already shows the word's definition,
- part of speech, and AI illustration. With multi-mode dailies, a word can appear as
- the daily target in multiple modes on different days.
-
-
- New section: "Appeared in" — shows which daily modes featured this word
- and on which day. This creates internal links between word pages and the archive,
- and gives players context ("I solved CHASE in Classic on Day #1742, but it was also
- in Dordle #98").
-
- Each entry links to the archive page filtered to that mode + day.
- Words that haven't appeared in any daily mode don't show this section.
- This is SEO-valuable: creates a web of internal links between word pages,
- archive pages, and mode pages.
-
-
-
-
-
-
-
-
13 / System
-
Routing & URLs
-
One URL per mode. Play type is a query param.
-
-
-
URL
Result
-
-
/en/dordle
Daily Dordle (default)
-
/en/dordle?play=unlimited
Unlimited Dordle
-
/en/semantic
Daily Semantic (default)
-
/en/semantic?play=unlimited
Unlimited Semantic
-
/en/speed
Daily Speed (default)
-
/en/speed?play=unlimited
Unlimited Speed
-
/en
Daily Classic (no change)
-
/en/unlimited
Unlimited Classic (no change)
-
/en/archive
Archive (301 from /en/words)
-
-
-
-
- ?play=unlimited triggers a soft reset. Last choice saved to localStorage
- per mode. Classic keeps /unlimited for backward compat and SEO.
-
-
-
-
-
-
-
-
14
-
All Entry Points
-
Every path to a game
-
-
-
Sidebar mode → daily/unlim
-
→
-
Game
-
-
-
Stats Modal CTA "play unlimited" / "try daily"
-
→
-
Game
-
-
-
Keyboard Flip discovery panel after game over
-
→
-
Game
-
-
-
Scroll-Down Section "More Game Modes" below fold
-
→
-
Game
-
-
-
Homepage Card click → daily default
-
→
-
Game
-
-
-
Header Subtitle Tap hidden — opens sidebar expanded
-
→
-
Sidebar
-
→
-
Game
-
-
-
Board Picker Modal from "Multi-Board" sidebar item
-
→
-
Game
-
-
-
Direct URL / Share Link /en/dordle?play=unlimited
-
→
-
Game
-
-
-
-
-
-
-
-
15
-
What We're NOT Doing
-
Explicitly rejected approaches
-
-
No config bar.
- F3 had persistent Daily/Unlimited/Custom
- pills. Rejected — too much UI for a binary choice that's made once per session.
-
-
No in-game toggle.
- Option B toggled play type mid-game.
- Confusing. You pick before you play.
-
-
No separate sidebar items for daily/unlimited per mode.
- Doubles the sidebar. Multi-Board uses a picker modal instead.
-
-
No per-mode streaks.
- Product-wide streak is simpler, more motivating, and easier to maintain.
- Per-mode stats (win rate, games played) still exist.
-
-
No full-screen mode picker modal for all modes.
- Screen 09 was redundant with the sidebar.
- The board picker modal only opens for Multi-Board (5 sub-modes need space).
Start playing instantly with defaults (Classic · Daily · Solo · your language). Change any dimension from within the game via the header bar. No pre-game configuration.
-
-
-
-
Pros
-
-
Zero friction — tap and play immediately
-
Discoverable through exploration
-
Feels lightweight, not overwhelming
-
-
-
-
Cons
-
-
Dimensions hidden behind icons — less discoverable
-
Switching mid-game could be confusing
-
Hard to represent Party mode as a toggle
-
-
-
-
-
-
-
- ℹ️
- ☰
-
- Wordle
-
- 📊
- ⚙
-
-
-
- Classic
- Dordle
- Tridle
- Quordle
- +
-
-
- Daily
- Unlimited
- 🇬🇧 EN
- Party
-
-
-
-
-
-
Board type tabs + play type pills + language/party in sub-bar
-
-
-
-
-
-
Option C
-
Mode-First Sidebar
-
The sidebar is the primary navigation hub. Board types are top-level items. Play type, social, and language are settings within each mode. Like Discord's server/channel hierarchy.
-
-
-
-
Pros
-
-
Familiar pattern (Discord, Slack, Notion)
-
Each mode feels like its own "space"
-
Room for per-mode stats and streaks
-
-
-
-
Cons
-
-
Sidebar takes screen real estate on mobile
-
Deep nesting: Mode → Play Type → Social
-
Switching language requires digging
-
-
-
-
-
-
Wordle Global
-
-
-
Play
-
Classic
-
Daily · Solo
-
Unlimited
-
Party ●
-
Dordle
-
Tridle
-
Quordle
-
Semantic
-
Speed Streak
-
-
Language
-
🇬🇧 English
-
Change →
-
-
-
CLASSIC · DAILY · SOLO · ENGLISH
-
[game board here]
-
-
-
-
-
-
-
Option D
-
Homepage Hub + Quick Actions
-
The homepage shows your active games, streaks, and quick-launch cards. Each card is a pre-configured combo. "Continue" buttons for in-progress games. Party and Custom are actions, not modes.
-
-
-
-
Pros
-
-
Personalized — shows what matters to you
-
Quick resume for daily players
-
Party/Custom as actions feels natural
-
-
-
-
Cons
-
-
Complex homepage to build
-
Empty state problem for new users
-
May hide modes you haven't tried
-
-
-
-
-
-
Wordle Global⚙
-
-
Today's Puzzles
-
-
-
■
-
English Daily
Classic · 5 letters
-
Solved ✓
-
-
-
■
-
Français Daily
Classic · 5 letters
-
Play →
-
-
-
◧
-
English Quordle
4 boards · Daily
-
Play →
-
-
-
-
-
Quick Play
-
-
-
Unlimited
-
English · Classic
-
-
-
Speed Streak
-
5 min challenge
-
-
-
-
-
-
Social
-
- Create Party
- Custom Word
-
-
-
-
-
-
-
Option E
-
Two-Step: Pick Board → Configure
-
Step 1: Choose your board type (the primary decision). Step 2: A compact config bar for play type, social, and language. Separates the "what" from the "how".
-
-
-
-
Pros
-
-
Progressive disclosure — one choice at a time
-
Board type is the hero — most visual impact
-
Config bar is reusable across all modes
-
Party/Custom are first-class but not overwhelming
-
-
-
-
Cons
-
-
Two steps to start playing (vs one tap)
-
Config bar adds a row of UI to every game
-
-
-
-
-
-
-
Step 1: Pick board type
-
-
Wordle Global
-
-
-
■
-
Classic
1 board · 6 guesses
-
→
-
-
-
▦
-
Dordle
2 boards · 7 guesses
-
→
-
-
-
▤
-
Tridle
3 boards · 8 guesses
-
→
-
-
-
◧
-
Quordle
4 boards · 9 guesses
-
→
-
-
-
◎
-
Semantic
Meaning space · 10 guesses
-
→
-
-
-
⚡
-
Speed Streak
5 min timer
-
→
-
-
-
-
-
-
-
Step 2: Configure and play
-
-
- ← Classic
- 📊 ⚙
-
-
- Daily
- Unlimited
- Custom
-
- Solo
- Party
-
- EN
-
-
-
-
-
-
Config bar stays visible during play Tap Party to create lobby from here
-
-
-
-
-
-
-
-
Party Mode Screens
-
Party: Lobby → In-Game → Results
-
These screens work regardless of which navigation option is chosen. Party is an overlay that wraps any game mode.
-
-
-
-
-
-
1. Create & share lobby
-
-
Party Lobby✕
-
-
-
Classic · English
-
Daily puzzle · 5 letters
-
-
-
-
Players (2/6)
-
-
-
H
- Hugo
- Ready ●
-
-
-
S
- Sanna
- Ready ●
-
-
-
?
- Waiting for players...
-
-
-
-
-
-
-
Copy
-
-
- Start Game
-
Everyone plays the same word simultaneously
-
-
-
-
-
-
-
2. Play with live progress
-
-
Party · Classic2 players
-
-
-
H
-
-
-
-
-
-
-
-
-
-
-
S
-
-
-
-
-
-
-
-
-
-
-
-
-
C
R
A
N
E
-
C
L
A
S
S
-
C
H
-
-
Progress strip shows guess # per player No letter spoilers — just dots
-
-
-
-
-
-
-
3. Results & rematch
-
-
Party Results✕
-
-
-
CHASE
-
/tʃeɪs/ · verb
-
-
-
-
-
1
-
Sanna
Solved in 2 · 0:34
-
2 guesses
-
🏆
-
-
-
2
-
Hugo
Solved in 3 · 1:12
-
3 guesses
-
-
-
-
-
- Rematch
- Share
-
-
Rematch keeps the same lobby, new word
-
-
-
-
-
-
-
-
-
Refined: The Hybrid
-
Combining the Homepage Hub (D) with Two-Step Flow (E). The recommended architecture.
-
-
-
-
F1 / Returning User Homepage
-
Homepage Hub
-
Returning users see their active games, languages, and social actions. One tap to continue. "New Game" for exploration.
-
-
-
9:41●●● WiFi 🔋
-
-
Wordle.Global
-
-
-
-
-
-
-
Continue Playing
-
-
-
-
English Daily
-
Classic · 5 letters · #247
-
-
Solved ✓
-
-
-
-
-
Français Daily
-
Classic · 5 letters · 🔥 21
-
-
Play →
-
-
-
-
-
English Quordle
-
4 boards · Daily · #42
-
-
Play →
-
-
-
- New Game
-
-
-
Your Languages
-
-
-
-
-
-
+
-
-
-
Social
-
-
Create Party
-
Custom Word
-
-
-
- 247 won
- 14 streak
- 12 langs
-
-
-
-
-
-
-
F2 / New Game → Board Type Picker
-
Pick Your Board
-
The hero choice. Each board type is visually distinct. Tap to proceed to the game with the config bar.
-
-
-
9:41●●● WiFi 🔋
-
- ← Back
- New Game
-
-
-
-
-
-
-
Classic
-
1 board · 6 guesses · The original
-
- →
-
-
-
-
-
Dordle
-
2 boards · 7 guesses
-
- →
-
-
-
-
-
Tridle
-
3 boards · 8 guesses
-
- →
-
-
-
-
-
Quordle
-
4 boards · 9 guesses
-
- →
-
-
-
-
-
Semantic Explorer
-
Navigate meaning · 10 guesses
-
- →
-
-
-
-
-
Speed Streak
-
5 minutes · Solve as many as you can
-
- →
-
-
More modes coming soon
-
-
-
-
-
-
F3 / In-Game with Config Bar
-
Play with Persistent Config
-
The config bar stays visible during gameplay. Switch between Daily/Unlimited, toggle Party, or change language without leaving the game.
-
-
-
9:41●●● WiFi 🔋
-
- ←
- Classic
-
-
-
-
-
-
- Daily
- Unlimited
- Custom
-
- Solo
- Party
-
- EN
-
-
-
#247 · March 20, 2026
-
-
C
R
A
N
E
-
C
L
A
S
S
-
C
H
-
-
-
-
-
-
Q
W
E
R
T
Y
U
I
O
P
-
A
S
D
F
G
H
J
K
L
-
ENTER
Z
X
C
V
B
N
M
⌫
-
-
-
-
-
-
-
F4 / Party — Triggered from Config Bar
-
Party as an Overlay
-
Tapping "Party" in the config bar opens a bottom sheet. Share the link, wait for friends, start when ready. Works for any board type.
-
-
-
9:41●●● WiFi 🔋
-
- ←
- Classic
-
-
-
-
-
-
- Daily
- Unlimited
- Custom
-
- Solo
- Party ●
-
- EN
-
- The flame sits to the left of the stats icon — visible but not loud.
- The badge changes color and intensity with streak length. Inspired by Duolingo but adapted to our editorial restraint.
-
-
-
- No streak
- 3-day
- 12-day
- 47-day
- Frozen
-
-
-
-
No active streak
-
-
-
-
-
-
-
-
-
English
-
#1737
-
-
-
-
-
-
-
-
-
-
-
-
-
Gray flame, no count — invites the user to start a streak.
-
-
-
-
Dev / Product Notes
-
-
Data source:statsStore.stats.current_streak (already computed from localStorage game results)
-
Placement: Between info (i) and stats (bar chart) icons in GameHeader.vue
-
States: No streak (gray, 0.5 opacity) / Active 1-6 (orange, static) / Hot 7+ (orange, gentle pulse animation) / Frozen (blue, snowflake accent)
-
Animation — "Catch fire": When the user completes a game and their streak increments, the flame should briefly ignite — scale up with a glow burst (600ms), then settle. CSS keyframe: scale(1) -> scale(1.6) + drop-shadow glow -> scale(1). Only on the transition from streak N to N+1, not on page load.
-
Animation — "Freeze": When a freeze is consumed, the flame transitions from orange to blue with a brief frost particle effect (CSS radial gradient burst). The badge gets a subtle crystalline shimmer.
-
Click target: Opens the streak dropdown (see next section). On mobile, opens as a bottom sheet instead of dropdown.
-
Speed mode: Streak badge hidden — speed is session-based, not daily.
-
-
-
-
-
-
-
- Streak / Dropdown
-
-
Click the Flame
-
- Tapping the streak badge opens a dropdown: hero number, calendar heatmap of the past month, streak stats, and freeze status.
-
-
-
-
-
-
Active — 12 days
-
-
-
-
12
-
Day streak
-
You're on fire! Keep it going.
-
-
-
March 2026
-
MTWTFSS
-
-
1
-
2
3
4
5
6
7
8
-
9
10
11
12
13
14
15
-
16
17
18
19
20
21
22
-
-
-
-
-
12
Current
-
34
Longest
-
-
-
-
1 Streak Freeze
Protects your streak if you miss a day
-
-
-
-
-
-
-
-
Frozen — used a freeze
-
-
-
-
12
-
Day streak — frozen
-
Your streak was saved! Play today to keep going.
-
-
-
-
No Freezes Left
Get more with Wordle Global+
- Get more
-
-
-
-
-
-
-
-
Dev / Product Notes
-
-
Trigger: Click/tap the streak badge in the navbar. Opens as dropdown (desktop) or bottom sheet (mobile).
-
Calendar data: Built from gameResults in localStorage — each entry has a date. Map dates to played/missed/frozen grid cells.
-
Frozen days: Detected when a day has no game result but the streak didn't break. Requires storing freeze usage timestamps.
-
"View full stats" link: Navigates to /stats page (already exists).
-
Freeze count: Free users: 1 freeze, replenishes every Monday. Premium: unlimited. Stored in localStorage, validated server-side for premium.
-
Auto-dismiss: Dropdown closes on outside click or Escape key (same as BaseModal pattern).
-
-
-
-
-
-
-
- Streak / Milestones
-
-
Streak Milestones
-
- Celebrations at 7, 30, 100, and 365 days. Each progressively more dramatic. Click to preview.
-
-
-
-
-
-
7
Days
"First Week"
-
-
-
-
30
Days
"Monthly Master"
-
-
-
-
100
Days
"Century Club"
-
-
-
-
365
Days
"Year of Words"
-
-
-
Click a milestone to preview the celebration
-
-
-
Dev / Product Notes
-
-
When it appears: Celebration overlay triggers immediately after the stats modal auto-shows, IF the user just crossed a milestone threshold. Sequence: tiles reveal -> bounce -> keyboard flip -> stats modal -> milestone celebration on top.
-
Haptic feedback:haptic.success() pattern on milestone hit. Sound: ascending 4-note chime (reuse win sound but extended).
-
Confetti: The dotted confetti pattern in the card is CSS-only (background radial gradients). For the overlay entrance, consider a lightweight canvas confetti burst (200ms, 30 particles, gravity fall).
-
Share prompt: After dismissing the celebration, optionally prompt "Share your streak?" with a pre-formatted share text: "I just hit a 30-day streak on Wordle Global! wordle.global"
-
Persistence: Track which milestones the user has seen in localStorage to avoid re-showing on refresh. Key: streak_milestones_seen: [7, 30]
-
Future milestones: Could extend to 500, 1000. Consider language-specific milestones ("Play 7 languages in a week").
-
-
-
-
-
-
-
- Streak / Freeze & Premium
-
-
Streak Freeze
-
- Miss a day, your freeze auto-activates to save the streak.
- Free users get 1 freeze (replenishes weekly). Premium gets unlimited + streak recovery.
-
-
-
-
-
-
- Wordle Global+
-
-
Never Lose Your Streak
-
Keep your momentum with unlimited freezes and more
-
-
-
-
-
Unlimited Streak Freezes
Miss a day without losing your streak. Free users get 1 per week.
-
-
-
-
Streak Recovery
Broke your streak? Restore it within 48 hours.
-
-
-
-
No Ads, Ever
Clean, distraction-free gameplay.
-
-
-
- Start Free Trial
-
7 days free, then $2.99/month
-
-
-
-
Dev / Product Notes
-
-
Freeze mechanic: If the user doesn't play on a given day AND has a freeze available, the freeze auto-consumes at midnight UTC. The next time they open the app, the flame shows blue (frozen state) with "Your streak was saved!" messaging.
-
Free tier: 1 freeze, replenishes every Monday at 00:00 UTC. Stored in localStorage: { freezes_available: 1, last_replenish: "2026-03-17" }
-
Premium tier: Unlimited freezes + streak recovery (restore a broken streak within 48 hours). Revenue model: $2.99/month or $24.99/year.
-
Premium gate UX: When free user runs out of freezes, the dropdown shows "Get more" button -> opens premium upsell modal. Also accessible from Settings.
-
Server validation: Premium status and freeze usage should be validated server-side to prevent localStorage tampering. But streak itself stays client-side (privacy, offline play).
-
Implementation order: 1) Streak badge in navbar (free, no backend). 2) Streak dropdown with calendar (free, no backend). 3) Milestone celebrations (free, no backend). 4) Streak freeze (needs freeze storage). 5) Premium (needs auth + payments).
- Interactive prototypes of every streak effect. Each panel is a working demo — click the buttons to trigger.
- Dev notes explain implementation. All effects respect prefers-reduced-motion.
-
- When this appears
- Same Lucide flame icon throughout. At 30+ days it flickers; at 50+ embers detach from the tip.
- Win Sequence shows the full chain: gray dormant icon -> warms to orange -> catches ablaze with embers -> settles to gentle flicker.
-
-
-
-
-
-
Glow Escalation
-
CSS only — zero JS
-
-
-
- 3
-
-
-
- 3 days
- 12 days
- 47 days
- 100 days
-
-
- When this appears
- Always visible in header when streak >= 1. Glow tier set on page load — no entrance animation.
-
-
-
-
-
-
Fire Particles
-
Canvas2D — ~30 particles
-
-
-
-
- 47
-
-
-
- Simmer
- Catch Ablaze
- Off
-
-
- When this appears
- Simmer: ambient at 30+ day streaks. Catch ablaze: ~2s burst on game win, then settles back to simmer.
-
-
-
-
-
-
Heat Distortion
-
SVG feTurbulence filter
-
-
-
-
- 100
-
-
-
- Start
- Stop
-
-
- When this appears
- 100+ day streaks only. Stacks with fire particles for the full hot streak look.
-
-
-
-
-
-
Frost & Snowfall
-
CSS + Canvas2D
-
-
-
-
- 12
-
-
-
-
- Freeze
- Thaw
-
-
- When this appears
- Freeze auto-activates at midnight if user missed today. Next visit: frozen badge + snowfall. Playing thaws it back to warm.
-
-
-
-
-
-
Tile Ember Sweep
-
CSS keyframes — staggered per tile
-
-
F
-
L
-
A
-
M
-
E
-
- Trigger Sweep
-
- When this appears
- After final tile-flip on win. Sweeps the winning row, then badge does catch ablaze. Chain: tiles catch fire -> badge ignites.
-
-
-
-
-
-
-
Milestone Confetti
-
Canvas2D — gravity physics
-
-
-
7
-
-
First Week Complete
-
- 7 days
- 30 days
- 100 days
- 365 days
-
-
- When this appears
- Fires once when milestone modal opens. Scales with milestone: 7-day = 50 pieces, 365-day = 150 piece eruption.
-
-
-
-
-
-
-
-
Effect Escalation Map
-
-
-
-
Streak
-
Badge Effect
-
Win Moment
-
Tech
-
-
-
-
1-6 days
Warm glow (level 1)
Tile bounce only
CSS
-
7-29 days
Glow pulse (level 2-3)
Ember sweep + bounce
CSS
-
30-99 days
Fire particles (simmer)
Particles blaze + sweep
Canvas
-
100+ days
Particles + heat distortion
Full blaze + sweep + distortion
Canvas + SVG
-
Frozen
Frost overlay + snowfall
Thaw transition on win
CSS + Canvas
-
Milestone
--
Confetti burst (scaled)
Canvas
-
-
-
- All effects skip when prefers-reduced-motion is set or .reduce-animations is active. Falls back to static glow/tint only.
-
-
-
-
-
-
-
-
- ×
-
-
7
-
Day Streak!
-
First Week Complete
-
You've played Wordle every day for a week. The habit is forming.
- 16 boards, 21 guesses. The fundamental problem: 42 tile rows in ~480px = 11px tiles.
- Three approaches to making this playable. All interactive — click to explore.
-
- Show recent guesses at full size. Collapse earlier rows into a tappable bar.
- Tap "+N earlier" to expand and review, tap again to collapse.
-
-
-
-
Mobile — 2 boards, 21 guesses, rows collapsed
-
-
-
-
Board 1
-
-
-
- +12 earlier
-
-
-
-
-
S
T
A
R
E
-
C
L
O
U
D
-
M
I
G
H
T
-
B
R
I
N
T
-
W
A
G
E
T
-
-
B
U
G
-
-
-
-
3 guesses left
-
-
-
-
-
Board 2
-
-
- +14 earlier
-
-
-
-
P
L
A
N
E
-
P
O
I
N
T
-
P
A
I
N
T
-
-
P
I
-
-
1 guess left
-
-
-
-
-
- How expand/collapse works
- Tap "+N earlier" to smoothly expand all hidden rows (max-height transition). The board scrolls to keep the active row visible. Tap again to re-collapse. On desktop, hover shows a subtle preview. The collapsed bar doubles as a guess counter. The grid sizes for ~8 visible rows, giving 28px tiles on mobile.
-
-
-
-
-
-
B. Scrollable Board Area
-
- All rows rendered. Board area scrolls vertically. Keyboard stays fixed at the bottom.
- This is what sedecordle.com and duotrigordle.com do.
-
-
-
-
Desktop — 4x2 grid, scrollable, keyboard fixed
-
-
-
-
-
-
-
-
-
-
-
Q
W
E
R
T
Y
U
I
O
P
-
-
-
A
S
D
F
G
H
J
K
L
-
-
-
ENT
Z
X
C
V
B
N
M
DEL
-
-
-
-
-
- How scrolling works
- The board grid scrolls freely. Scroll-snap aligns to rows of boards. The keyboard is position: sticky at the bottom, always accessible. On mobile, the virtual keyboard replaces the on-screen one. After submitting a guess, auto-scroll to the first unsolved board that isn't visible.
-
-
-
-
-
-
E. Focused Board + Thumbnails
-
- One board at full readable size. The rest as tiny colored thumbnails.
- Click a thumbnail to swap the active board.
-
-
-
-
Mobile — 1 focused board, 16 thumbnails
-
-
-
-
-
-
-
-
-
-
-
Board 3 — Guess 6 of 21
-
-
- How focus swap works
- Tap any thumbnail to swap it into the focus slot with a crossfade (opacity 0 -> swap content -> opacity 1, 200ms). The thumbnail strip scrolls horizontally. Solved boards get a green border and dimmed opacity. After a guess, if the focused board is solved, auto-advance to the next unsolved board. Keyboard colors show state for the focused board only.
-
-
-
-
-
-
Comparison
-
-
-
-
Criterion
-
A. Row Collapse
-
B. Scrollable
-
E. Focus + Thumbs
-
-
-
-
-
Tile readability
-
28px, readable
-
16px, small but visible
-
40px, best
-
-
-
Multi-board overview
-
2-4 boards visible
-
8 boards visible
-
1 board + thumbnails
-
-
-
Guess history access
-
Tap to expand
-
Always visible (scroll)
-
Visible on focused board
-
-
-
Screen space usage
-
Good
-
Excellent
-
Good
-
-
-
Mobile UX
-
Good (2 boards, big tiles)
-
Scroll can conflict with keyboard
-
Good (1 board, huge tiles)
-
-
-
Desktop UX
-
Good
-
Best (all boards, scroll)
-
Wastes space (1 board)
-
-
-
Implementation complexity
-
Medium
-
Low (just CSS overflow)
-
High (swap animation, state)
-
-
-
Recommendation
-
Best for mobile
-
Best overall for desktop
-
Best for single-board focus
-
-
-
-
- Recommended hybrid: B (scrollable) on desktop + A (row collapse) on mobile. Use E (focused) as an optional "zoom" interaction on any board.
-
- Daily and unlimited aren't settings you toggle mid-game — they're
- entry points. You choose which one before you start playing.
- Once you're in, the game header tells you which type you're in, and that's it.
- The design problem isn't "how to switch" — it's how players discover
- and navigate to both play types. Three surfaces matter:
- 1. Sidebar, 2. Post-game CTAs, 3. Homepage mode cards.
-
-
Prior Art
-
- Screen 02 — sidebar (daily-only),
- Screen 03 — post-game "Play Unlimited" CTA,
- Screen 09 — mode picker list,
- F1 — hub with "Continue Playing" cards,
- F3 — config bar with daily/unlimited pills (rejected — too heavy).
-
-
Mode Matrix
-
-
Mode
Today
Proposed
Notes
-
-
Classic
Daily
Daily
Stays daily. "Unlimited" is its own sidebar item.
-
Unlimited
Unlim
Unlim
Classic's unlimited variant. No change.
-
Speed
Unlim
Unlim
Arcade survival. No daily variant.
-
Dordle
Unlim
Both
Daily: 2 deterministic words/day.
-
Quordle
Unlim
Both
Daily: 4 deterministic words.
-
Octordle+
Unlim
Both
Same for 8/16/32 boards.
-
Semantic
Daily
Both
Unlimited: random target each game.
-
-
-
-
-
-
-
- Daily / Unlimited — In-Game Label
-
-
In-Game: Just a Label
-
- The subtitle tells you which type you're in. "Dordle #142" = daily (the number says it).
- "Dordle · Unlimited" = unlimited (accent color, no number).
- Nothing interactive. No toggle. The subtitle is a label, not a control.
-
-
-
-
9:41••• WiFi 🔋
-
-
English
-
Dordle #142
-
-
- 2 boards · game area
-
-
Daily — the day number implies it
-
-
-
9:41••• WiFi 🔋
-
-
English
-
Dordle · Unlimited
-
-
- 2 boards · game area
-
-
Unlimited — accent color signals the difference
-
-
-
-
-
-
-
- Daily / Unlimited — Sidebar
-
-
Sidebar: Expandable Sub-Items
-
- Modes with both play types show a red dot on the right. Tapping expands to show
- "Daily" and "Unlimited" as sub-items. Single-type modes (Classic, Unlimited, Speed)
- navigate directly. Avoids doubling the sidebar length
- (compare to Screen 02 which had no daily/unlimited distinction).
-
-
-
-
-
Play
-
■
Daily Puzzle
🔥 14
-
∞
Unlimited
-
▦▦
Dordle
-
▦×4
Quordle
-
☉
Semantic
-
⚡
Speed Streak
-
Collapsed — dot = has unlimited
-
-
-
-
Play
-
■
Daily Puzzle
🔥 14
-
∞
Unlimited
-
▦▦
Dordle
▼
-
📅 Daily #142 🔥 3
-
∞ Unlimited
-
▦×4
Quordle
-
☉
Semantic
-
⚡
Speed Streak
-
Expanded — sub-items with streak
-
-
-
-
-
-
-
- Daily / Unlimited — Post-Game CTAs
-
-
Post-Game: Cross-Pollination
-
- The stats modal is the highest-conversion moment. After daily, nudge into unlimited.
- After unlimited, nudge into daily. Compare to Screen 03 which
- already had a "Play Unlimited" button — this version is more prominent and contextual.
-
-
-
-
-
9:41••• WiFi 🔋
-
-
Dordle #142
-
Solved 5/7
-
-
-
-
CHASE & PLANT
-
Two down, infinite to go
-
-
-
42
Played
-
88
Win %
-
7
Streak
-
12
Best
-
- Share Result
- Play Unlimited Dordle →
-
- Next daily Dordle
- 07:42:15
-
-
-
After daily: unlimited CTA in accent
-
-
-
-
9:41••• WiFi 🔋
-
-
Dordle · Unlimited
-
Solved 6/7
-
-
-
-
RIVER & GHOST
-
Nice one. Another round?
-
-
-
18
Played
-
72
Win %
-
—
Streak
-
—
Best
-
- Play Again
- Share Result
- Try Today's Daily Dordle #142 →
-
-
After unlimited: daily nudge in green
-
-
-
- After daily, "Play Unlimited" is accent-colored — "you're done for today, but there's more."
- After unlimited, "Play Again" is primary; "Try Daily" is a green nudge that only appears if today's daily is unplayed.
-
-
-
-
-
-
- Daily / Unlimited — Homepage
-
-
Homepage: Contextual Mode Cards
-
- Each card shows which play types are available via small labels. The card adapts
- to the player's state — compare to F1's "Continue Playing" hub,
- but applied to the mode grid instead of a separate section.
-
Mode cards with daily/unlimited labels, game state, and a featured Semantic card
-
-
-
- The homepage adapts to context. New visitors see mode cards with
- "Daily · Unlimited" labels — click goes to daily by default.
- Returning players see their active game state: in-progress games
- get a "Continue" CTA, solved dailies nudge toward unlimited, and the streak badge
- shows their product-wide streak.
-
-
- Semantic Explorer gets a featured card with a larger icon, dark
- background, and "NEW" badge to drive discovery of the newest mode.
-
-
-
-
-
-
-
New Visitor
-
-
-
-
9:41••• WiFi
-
-
Wordle.Global
-
The world's word game — 80 languages
-
-
-
Game Modes
-
-
-
-
■
-
-
Wordle
-
One word per day. 6 guesses. The classic.
-
-
-
Daily·Unlim
-
-
-
-
-
-
NEW
-
☉
-
-
Semantic Explorer
-
Find words by meaning. Navigate a map of language.
-
-
-
Daily·Unlim
-
-
-
-
-
-
⚡
-
-
Speed Streak
-
Race the clock. Solve as many as you can.
-
-
-
Daily·Unlim
-
-
-
-
-
-
▦▦
-
-
Multi-Board
-
2–32 boards at once. Dordle, Quordle, and more.
-
-
-
Daily·Unlim
-
-
-
-
Languages
-
-
English
-
Espanol
-
Deutsch
-
Francais
-
Italiano
-
Turkce
-
Suomi
-
+ 73 more
-
-
-
New visitor — click any card → daily by default
-
-
-
-
-
-
-
Returning Player
-
-
-
-
9:41••• WiFi
-
-
Wordle.Global
-
The world's word game — 80 languages
-
🔥 14 day streak
-
-
-
Your Games
-
-
-
-
■
-
-
Wordle #1756
-
Solved ✓ · 3/6
-
-
-
Play Unlimited →
-
-
-
-
-
-
▦▦
-
-
Dordle #142
-
In progress · 3/7
-
-
-
Continue →
-
-
-
-
-
-
NEW
-
☉
-
-
Semantic Explorer
-
Find words by meaning. Try it!
-
-
-
Daily·Unlim
-
-
-
-
-
-
⚡
-
-
Speed Streak
-
Race the clock
-
-
-
Daily·Unlim
-
-
-
-
-
-
▦▦
-
-
Multi-Board
-
Quordle, Octordle, and more
-
-
-
Daily·Unlim
-
-
-
-
Your Languages
-
-
-
-
-
+
-
-
-
Returning — streak badge, solved/in-progress states, featured Semantic
-
-
-
-
-
-
-
Design Notes
-
-
4 mode cards, not 5. "Unlimited" is gone as a standalone card — it's Classic's unlimited variant, accessible via the sidebar or post-game CTA.
-
Semantic is featured. Larger card, dark icon bg, "NEW" badge. Positioned second after Classic to maximize discovery. Once it's no longer new, it becomes a regular card.
-
Multi-Board is one card. Clicking it opens the sidebar's Multi-Board section (or a picker modal) where you choose Dordle/Quordle/etc.
-
Streak badge on returning user. Product-wide streak (any daily mode, any language). Shows in the masthead area.
-
Solved dailies nudge unlimited. "Wordle #1756 — Solved" shows "Play Unlimited →" as the CTA, not a dead card.
-
Language flags as circles. Returning users see their played languages as flag circles with a "+" to add more. New visitors see the full grid.
- The stats modal and stats page show both play types for each mode.
- A tab bar switches between Daily and Unlimited views. The product-wide streak
- lives at the top — it's the one number that represents overall engagement.
-
-
- Daily tab: games played, win %, guess distribution, streak (product-wide),
- best streak. Unlimited tab: rounds played, win %, guess distribution.
- No streak — unlimited is on-demand, not calendar-based.
-
-
-
-
-
-
-
Stats Modal — After Daily Win
-
-
-
-
9:41•••
-
-
-
Wordle
-
English · #1756 · 3/6
-
- ×
-
-
-
-
-
THETA
-
An angle or direction
-
-
-
-
-
14
-
Day Streak
-
Classic EN, Dordle EN, Semantic EN today
-
-
-
-
- Daily
- Unlimited
-
-
-
-
-
247
Played
-
94
Win %
-
14
Streak
-
21
Best
-
-
-
-
-
Guess Distribution
-
1
2
-
2
14
-
3
42
-
4
31
-
5
11
-
6
3
-
-
-
-
- Share Result
- Unlimited →
-
-
-
- Next Wordle
- 07:42:15
-
-
-
Daily tab active — streak + distribution + CTAs
-
-
-
-
-
9:41•••
-
-
-
Wordle
-
English · #1756 · 3/6
-
- ×
-
-
-
-
THETA
-
An angle or direction
-
-
-
-
14
-
Day Streak
-
Streak is daily-only — any mode counts
-
-
-
-
- Daily
- Unlimited
-
-
-
-
83
Rounds
-
78
Win %
-
—
Streak
-
—
Best
-
-
-
-
Guess Distribution (Unlimited)
-
1
1
-
2
8
-
3
24
-
4
32
-
5
13
-
6
5
-
-
-
- Share Result
- Unlimited →
-
-
-
Unlimited tab — rounds (not games), no streak, separate distribution
Streak is always visible. The product-wide streak hero sits above the tabs — it's the same regardless of which tab is active. It's the one motivating number.
No streak for unlimited. The streak/best cells show "—" on the unlimited tab. Unlimited has rounds played and win %, but no consecutive-day concept.
-
Calendar heatmap. Shows which days you played (any daily mode). Green = 1 mode played. Dark green = 2+ modes. Red = missed. Expanding a day could show which modes/languages were played.
-
Per-mode breakdown. Each mode shows a one-line summary: "Daily: N played · X% | Unlimited: N rounds · X%". Clicking expands to full stats + distribution for that mode.
-
Stats modal vs stats page. The modal shows the current mode's stats (with tabs). The full stats page shows all modes with the calendar heatmap. Same tab pattern, different scope.
-
-
-
-
-
diff --git a/public/llms.txt b/public/llms.txt
index 8292216c..0c01be19 100644
--- a/public/llms.txt
+++ b/public/llms.txt
@@ -14,7 +14,7 @@ Wordle Global is a word-guessing game at https://wordle.global. Players guess a
- [Octordle](https://wordle.global/en/octordle): 8 boards, 13 guesses.
- [Sedecordle](https://wordle.global/en/sedecordle): 16 boards, 21 guesses.
- [Duotrigordle](https://wordle.global/en/duotrigordle): 32 boards, 37 guesses.
-- [Semantic Explorer](https://wordle.global/en/semantic): Find a word by meaning similarity with an interactive map. English only.
+- [Semantic Explorer](https://wordle.global/en/semantic): A Contexto/Semantle-style semantic word game. Guess a hidden word by meaning — each guess gets a rank (1–50,001) showing semantic closeness. Features compass hints, an interactive meaning map, and 275,000+ accepted words. Like Contexto and Semantle but with richer hints and visualization. English only.
## Languages
diff --git a/scripts/benchmark-semantic-db.ts b/scripts/benchmark-semantic-db.ts
new file mode 100644
index 00000000..54bf2cad
--- /dev/null
+++ b/scripts/benchmark-semantic-db.ts
@@ -0,0 +1,150 @@
+/**
+ * Benchmark pgvector semantic operations against the live database.
+ *
+ * Reports p50/p95/p99 latencies for the hot-path operations:
+ * - computeGuessRank (btree lookup on target_neighbors)
+ * - knnNearest (pgvector HNSW index)
+ * - getEmbedding (single vector fetch)
+ * - get2dPosition (coordinate lookup)
+ *
+ * Usage: DATABASE_URL="..." npx tsx scripts/benchmark-semantic-db.ts
+ */
+
+import pg from 'pg';
+import Prisma from '@prisma/client';
+import { PrismaPg } from '@prisma/adapter-pg';
+
+const { PrismaClient } = Prisma;
+
+const DATABASE_URL = process.env.DATABASE_URL;
+if (!DATABASE_URL) {
+ console.error('DATABASE_URL not set');
+ process.exit(1);
+}
+
+const pool = new pg.Pool({
+ connectionString: DATABASE_URL,
+ ssl: { rejectUnauthorized: false },
+ max: 5,
+});
+
+async function benchmark(name: string, fn: () => Promise, iterations: number = 100) {
+ // Warm up
+ for (let i = 0; i < 5; i++) await fn();
+
+ const times: number[] = [];
+ for (let i = 0; i < iterations; i++) {
+ const t0 = performance.now();
+ await fn();
+ times.push(performance.now() - t0);
+ }
+
+ times.sort((a, b) => a - b);
+ const p50 = times[Math.floor(times.length * 0.5)]!;
+ const p95 = times[Math.floor(times.length * 0.95)]!;
+ const p99 = times[Math.floor(times.length * 0.99)]!;
+ const avg = times.reduce((s, t) => s + t, 0) / times.length;
+
+ console.log(
+ ` ${name.padEnd(25)} avg=${avg.toFixed(1)}ms p50=${p50.toFixed(1)}ms p95=${p95.toFixed(1)}ms p99=${p99.toFixed(1)}ms`
+ );
+}
+
+async function main() {
+ const client = await pool.connect();
+
+ console.log('=== Semantic DB Benchmark ===\n');
+
+ // 1. computeGuessRank — btree lookup
+ await benchmark('computeGuessRank (hit)', async () => {
+ await client.query(
+ 'SELECT rank FROM wordle.target_neighbors WHERE lang = $1 AND target_word = $2 AND word = $3',
+ ['en', 'bread', 'loaf']
+ );
+ });
+
+ await benchmark('computeGuessRank (miss)', async () => {
+ await client.query(
+ 'SELECT rank FROM wordle.target_neighbors WHERE lang = $1 AND target_word = $2 AND word = $3',
+ ['en', 'bread', 'quantum']
+ );
+ });
+
+ // 2. getEmbedding — single vector fetch
+ await benchmark('getEmbedding', async () => {
+ await client.query(
+ 'SELECT embedding::text FROM wordle.word_embeddings WHERE lang = $1 AND word = $2',
+ ['en', 'bread']
+ );
+ });
+
+ // 3. get2dPosition — coordinate lookup
+ await benchmark('get2dPosition', async () => {
+ await client.query(
+ 'SELECT umap_x, umap_y FROM wordle.word_embeddings WHERE lang = $1 AND word = $2',
+ ['en', 'bread']
+ );
+ });
+
+ // 4. knnNearest — pgvector HNSW
+ // First get bread's embedding for the kNN query
+ const embResult = await client.query(
+ 'SELECT embedding::text FROM wordle.word_embeddings WHERE lang = $1 AND word = $2',
+ ['en', 'bread']
+ );
+ const breadVec = embResult.rows[0]?.embedding;
+ if (breadVec) {
+ await benchmark('knnNearest (k=8)', async () => {
+ await client.query(
+ `SELECT word, 1 - (embedding <=> $1::vector) as similarity
+ FROM wordle.word_embeddings
+ WHERE lang = 'en' AND is_vocab = true
+ ORDER BY embedding <=> $1::vector
+ LIMIT 8`,
+ [breadVec]
+ );
+ }, 50);
+
+ await benchmark('knnNearest (k=20)', async () => {
+ await client.query(
+ `SELECT word, 1 - (embedding <=> $1::vector) as similarity
+ FROM wordle.word_embeddings
+ WHERE lang = 'en' AND is_vocab = true
+ ORDER BY embedding <=> $1::vector
+ LIMIT 20`,
+ [breadVec]
+ );
+ }, 50);
+ }
+
+ // 5. COUNT(*) for totalRanked
+ await benchmark('totalRanked COUNT', async () => {
+ await client.query(
+ 'SELECT COUNT(*) FROM wordle.word_embeddings WHERE lang = $1 AND is_vocab = true',
+ ['en']
+ );
+ }, 20);
+
+ // 6. Combined guess simulation (rank + umap + total — parallel)
+ await benchmark('full guess (parallel)', async () => {
+ await Promise.all([
+ client.query(
+ 'SELECT rank FROM wordle.target_neighbors WHERE lang = $1 AND target_word = $2 AND word = $3',
+ ['en', 'bread', 'loaf']
+ ),
+ client.query(
+ 'SELECT umap_x, umap_y FROM wordle.word_embeddings WHERE lang = $1 AND word = $2',
+ ['en', 'loaf']
+ ),
+ ]);
+ });
+
+ console.log('\n=== Benchmark complete ===');
+ client.release();
+ await pool.end();
+}
+
+main().catch((e) => {
+ console.error('Benchmark failed:', e);
+ process.exit(1);
+});
diff --git a/scripts/migrate-caches-to-db.mjs b/scripts/migrate-caches-to-db.mjs
new file mode 100644
index 00000000..1972098a
--- /dev/null
+++ b/scripts/migrate-caches-to-db.mjs
@@ -0,0 +1,295 @@
+/**
+ * Migrate disk-based cache files to Postgres tables.
+ *
+ * Plain JS version — runs with just `node` (no tsx/esbuild needed).
+ * Only dependency: `pg` (production dep, always available).
+ *
+ * Safe to run multiple times — uses ON CONFLICT DO NOTHING.
+ *
+ * Usage on Render shell:
+ * node scripts/migrate-caches-to-db.mjs
+ * node scripts/migrate-caches-to-db.mjs --definitions-only
+ * node scripts/migrate-caches-to-db.mjs --stats-only
+ *
+ * Env vars (auto-set on Render):
+ * DATABASE_URL — Postgres connection string (required)
+ * BASE_DIR — Persistent data dir (default: /data)
+ */
+
+import { existsSync, readdirSync, readFileSync } from 'fs';
+import { join } from 'path';
+import pg from 'pg';
+
+const DATABASE_URL = process.env.DATABASE_URL;
+if (!DATABASE_URL) {
+ console.error('DATABASE_URL not set');
+ process.exit(1);
+}
+
+const BASE_DIR = process.env.BASE_DIR || '/data';
+const WORD_DEFS_DIR = join(BASE_DIR, 'word-defs');
+const WORD_STATS_DIR = join(BASE_DIR, 'word-stats');
+const BATCH_SIZE = 200;
+
+function sanitize(v) { return v != null ? String(v).replace(/\0/g, '') : null; }
+
+const pool = new pg.Pool({
+ connectionString: DATABASE_URL,
+ ssl: { rejectUnauthorized: false },
+ max: 5,
+});
+
+// ---------------------------------------------------------------------------
+// Definitions
+// ---------------------------------------------------------------------------
+
+async function insertDefinitionBatch(batch) {
+ if (!batch.length) return;
+ const values = [];
+ const placeholders = [];
+ let idx = 1;
+ for (const r of batch) {
+ placeholders.push(`($${idx},$${idx+1},$${idx+2},$${idx+3},$${idx+4},$${idx+5},$${idx+6},$${idx+7},$${idx+8},$${idx+9})`);
+ values.push(r.lang, r.word, r.definition, r.definition_native, r.definition_en, r.part_of_speech, r.confidence, r.source, r.url, r.is_negative);
+ idx += 10;
+ }
+ await pool.query(
+ `INSERT INTO wordle.definitions (lang, word, definition, definition_native, definition_en, part_of_speech, confidence, source, url, is_negative)
+ VALUES ${placeholders.join(',')} ON CONFLICT (lang, word) DO NOTHING`,
+ values
+ );
+}
+
+async function migrateDefinitions() {
+ if (!existsSync(WORD_DEFS_DIR)) { console.log('[definitions] No word-defs dir, skipping'); return; }
+
+ const SKIP = new Set(['wiktionary-exists', 'semantic-embeddings', 'semantic-hints', '.cache']);
+ const langs = readdirSync(WORD_DEFS_DIR, { withFileTypes: true })
+ .filter(d => d.isDirectory() && !d.name.startsWith('.') && !SKIP.has(d.name))
+ .map(d => d.name);
+
+ let total = 0, skipped = 0;
+ for (const lang of langs) {
+ const langDir = join(WORD_DEFS_DIR, lang);
+ const files = readdirSync(langDir).filter(f => f.endsWith('.json'));
+ let langCount = 0;
+ const batch = [];
+
+ for (const file of files) {
+ const word = file.replace('.json', '');
+ try {
+ const raw = readFileSync(join(langDir, file), 'utf-8').replace(/\0/g, '');
+ const data = JSON.parse(raw);
+ batch.push({
+ lang, word,
+ definition: sanitize(data.definition),
+ definition_native: sanitize(data.definition_native),
+ definition_en: sanitize(data.definition_en),
+ part_of_speech: sanitize(data.part_of_speech),
+ confidence: data.confidence ?? null,
+ source: sanitize(data.source),
+ url: sanitize(data.url),
+ is_negative: !!data.not_found,
+ });
+ if (batch.length >= BATCH_SIZE) {
+ try { await insertDefinitionBatch(batch); langCount += batch.length; } catch (e) { console.warn(` [batch-err] ${lang}: ${e.message?.slice(0,80)}`); skipped += batch.length; }
+ batch.length = 0;
+ }
+ } catch { skipped++; }
+ }
+ if (batch.length) { try { await insertDefinitionBatch(batch); langCount += batch.length; } catch (e) { console.warn(` [batch-err] ${lang}: ${e.message?.slice(0,80)}`); skipped += batch.length; } batch.length = 0; }
+ total += langCount;
+ if (langCount > 0) console.log(` [definitions] ${lang}: ${langCount}`);
+ }
+ console.log(`[definitions] Total: ${total} migrated, ${skipped} skipped`);
+}
+
+// ---------------------------------------------------------------------------
+// Word Stats
+// ---------------------------------------------------------------------------
+
+async function insertStatsBatch(batch) {
+ if (!batch.length) return;
+ const values = [];
+ const placeholders = [];
+ let idx = 1;
+ for (const r of batch) {
+ placeholders.push(`($${idx},$${idx+1},$${idx+2},$${idx+3},$${idx+4},$${idx+5},$${idx+6},$${idx+7},$${idx+8},$${idx+9},$${idx+10})`);
+ values.push(r.lang, r.day_idx, r.total, r.wins, r.losses, r.dist_1, r.dist_2, r.dist_3, r.dist_4, r.dist_5, r.dist_6);
+ idx += 11;
+ }
+ await pool.query(
+ `INSERT INTO wordle.word_stats (lang, day_idx, total, wins, losses, dist_1, dist_2, dist_3, dist_4, dist_5, dist_6)
+ VALUES ${placeholders.join(',')} ON CONFLICT (lang, day_idx) DO NOTHING`,
+ values
+ );
+}
+
+async function migrateWordStats() {
+ if (!existsSync(WORD_STATS_DIR)) { console.log('[word-stats] No word-stats dir, skipping'); return; }
+
+ const langs = readdirSync(WORD_STATS_DIR, { withFileTypes: true })
+ .filter(d => d.isDirectory()).map(d => d.name);
+
+ let total = 0, skipped = 0;
+ for (const lang of langs) {
+ const langDir = join(WORD_STATS_DIR, lang);
+ const files = readdirSync(langDir).filter(f => f.endsWith('.json'));
+ const batch = [];
+
+ for (const file of files) {
+ const dayIdx = parseInt(file.replace('.json', ''), 10);
+ if (isNaN(dayIdx)) continue;
+ try {
+ const data = JSON.parse(readFileSync(join(langDir, file), 'utf-8'));
+ if (!data.total) continue;
+ batch.push({
+ lang, day_idx: dayIdx,
+ total: data.total ?? 0, wins: data.wins ?? 0, losses: data.losses ?? 0,
+ dist_1: data.distribution?.['1'] ?? 0, dist_2: data.distribution?.['2'] ?? 0,
+ dist_3: data.distribution?.['3'] ?? 0, dist_4: data.distribution?.['4'] ?? 0,
+ dist_5: data.distribution?.['5'] ?? 0, dist_6: data.distribution?.['6'] ?? 0,
+ });
+ if (batch.length >= BATCH_SIZE) { await insertStatsBatch(batch); total += batch.length; batch.length = 0; }
+ } catch { skipped++; }
+ }
+ if (batch.length) { await insertStatsBatch(batch); total += batch.length; batch.length = 0; }
+ }
+ console.log(`[word-stats] Total: ${total} migrated, ${skipped} skipped`);
+}
+
+// ---------------------------------------------------------------------------
+// Wiktionary
+// ---------------------------------------------------------------------------
+
+async function insertWiktionaryBatch(batch) {
+ if (!batch.length) return;
+ const values = [];
+ const placeholders = [];
+ let idx = 1;
+ for (const r of batch) {
+ placeholders.push(`($${idx},$${idx+1},$${idx+2})`);
+ values.push(r.lang, r.word, r.exists);
+ idx += 3;
+ }
+ await pool.query(
+ `INSERT INTO wordle.wiktionary_cache (lang, word, exists)
+ VALUES ${placeholders.join(',')} ON CONFLICT (lang, word) DO NOTHING`,
+ values
+ );
+}
+
+async function migrateWiktionary() {
+ const wiktDir = join(WORD_DEFS_DIR, 'wiktionary-exists');
+ if (!existsSync(wiktDir)) { console.log('[wiktionary] No wiktionary-exists dir, skipping'); return; }
+
+ const langs = readdirSync(wiktDir, { withFileTypes: true })
+ .filter(d => d.isDirectory()).map(d => d.name);
+
+ let total = 0, skipped = 0;
+ for (const lang of langs) {
+ const langDir = join(wiktDir, lang);
+ const files = readdirSync(langDir).filter(f => f.endsWith('.json'));
+ const batch = [];
+
+ for (const file of files) {
+ const word = file.replace('.json', '');
+ try {
+ const data = JSON.parse(readFileSync(join(langDir, file), 'utf-8'));
+ batch.push({ lang, word, exists: !!data.exists });
+ if (batch.length >= BATCH_SIZE) { await insertWiktionaryBatch(batch); total += batch.length; batch.length = 0; }
+ } catch { skipped++; }
+ }
+ if (batch.length) { await insertWiktionaryBatch(batch); total += batch.length; batch.length = 0; }
+ }
+ console.log(`[wiktionary] Total: ${total} migrated, ${skipped} skipped`);
+}
+
+// ---------------------------------------------------------------------------
+// Semantic Hints
+// ---------------------------------------------------------------------------
+
+async function insertHintsBatch(batch) {
+ if (!batch.length) return;
+ const values = [];
+ const placeholders = [];
+ let idx = 1;
+ for (const r of batch) {
+ placeholders.push(`($${idx},$${idx+1},$${idx+2},$${idx+3})`);
+ values.push(r.lang, r.word, r.hint, r.model);
+ idx += 4;
+ }
+ await pool.query(
+ `INSERT INTO wordle.semantic_hints (lang, word, hint, model)
+ VALUES ${placeholders.join(',')} ON CONFLICT (lang, word) DO NOTHING`,
+ values
+ );
+}
+
+async function migrateSemanticHints() {
+ const hintsDir = join(WORD_DEFS_DIR, 'semantic-hints');
+ if (!existsSync(hintsDir)) { console.log('[semantic-hints] No hints dir, skipping'); return; }
+
+ const files = readdirSync(hintsDir).filter(f => f.endsWith('.json'));
+ let total = 0, skipped = 0;
+ const batch = [];
+
+ for (const file of files) {
+ const word = file.replace('.json', '');
+ try {
+ const data = JSON.parse(readFileSync(join(hintsDir, file), 'utf-8'));
+ if (!data.hint) continue;
+ batch.push({ lang: 'en', word, hint: data.hint, model: data.model ?? null });
+ if (batch.length >= BATCH_SIZE) { await insertHintsBatch(batch); total += batch.length; batch.length = 0; }
+ } catch { skipped++; }
+ }
+ if (batch.length) { await insertHintsBatch(batch); total += batch.length; }
+ console.log(`[semantic-hints] Total: ${total} migrated, ${skipped} skipped`);
+}
+
+// ---------------------------------------------------------------------------
+// Main
+// ---------------------------------------------------------------------------
+
+async function main() {
+ const args = process.argv.slice(2);
+ const runAll = args.length === 0;
+
+ console.log('=== Migrating disk caches to Postgres ===');
+ console.log(`BASE_DIR: ${BASE_DIR}`);
+ console.log(`BATCH_SIZE: ${BATCH_SIZE}\n`);
+
+ try {
+ await pool.query('SELECT 1');
+ console.log('[db] Connected\n');
+ } catch (e) {
+ console.error('[db] Connection failed:', e);
+ process.exit(1);
+ }
+
+ if (runAll || args.includes('--definitions-only')) {
+ const t0 = Date.now();
+ await migrateDefinitions();
+ console.log(` (${((Date.now() - t0) / 1000).toFixed(1)}s)\n`);
+ }
+ if (runAll || args.includes('--stats-only')) {
+ const t0 = Date.now();
+ await migrateWordStats();
+ console.log(` (${((Date.now() - t0) / 1000).toFixed(1)}s)\n`);
+ }
+ if (runAll || args.includes('--wiktionary-only')) {
+ const t0 = Date.now();
+ await migrateWiktionary();
+ console.log(` (${((Date.now() - t0) / 1000).toFixed(1)}s)\n`);
+ }
+ if (runAll || args.includes('--hints-only')) {
+ const t0 = Date.now();
+ await migrateSemanticHints();
+ console.log(` (${((Date.now() - t0) / 1000).toFixed(1)}s)\n`);
+ }
+
+ console.log('=== Migration complete ===');
+ await pool.end();
+}
+
+main().catch(e => { console.error('Migration failed:', e); process.exit(1); });
diff --git a/scripts/migrate-caches-to-db.ts b/scripts/migrate-caches-to-db.ts
new file mode 100644
index 00000000..a6936c81
--- /dev/null
+++ b/scripts/migrate-caches-to-db.ts
@@ -0,0 +1,493 @@
+/**
+ * Migrate disk-based cache files to Postgres tables.
+ *
+ * Reads JSON cache files (definitions, word stats, wiktionary, hints)
+ * from the persistent disk and batch-inserts them into Postgres.
+ *
+ * Safe to run multiple times — uses ON CONFLICT DO NOTHING.
+ *
+ * Usage:
+ * BASE_DIR=/data npx tsx scripts/migrate-caches-to-db.ts
+ * BASE_DIR=/data npx tsx scripts/migrate-caches-to-db.ts --definitions-only
+ * BASE_DIR=/data npx tsx scripts/migrate-caches-to-db.ts --stats-only
+ *
+ * Env vars:
+ * DATABASE_URL — Postgres connection string (required)
+ * BASE_DIR — Path to persistent data directory (default: /data)
+ */
+
+import { existsSync, readdirSync, readFileSync } from 'fs';
+import { join } from 'path';
+import pg from 'pg';
+
+const DATABASE_URL = process.env.DATABASE_URL;
+if (!DATABASE_URL) {
+ console.error('DATABASE_URL not set');
+ process.exit(1);
+}
+
+const BASE_DIR = process.env.BASE_DIR || '/data';
+const WORD_DEFS_DIR = join(BASE_DIR, 'word-defs');
+const WORD_STATS_DIR = join(BASE_DIR, 'word-stats');
+
+const BATCH_SIZE = 200;
+
+const pool = new pg.Pool({
+ connectionString: DATABASE_URL,
+ ssl: { rejectUnauthorized: false },
+ max: 5,
+});
+
+// ---------------------------------------------------------------------------
+// Definitions: word-defs/{lang}/{word}.json → definitions table
+// ---------------------------------------------------------------------------
+
+async function migrateDefinitions() {
+ if (!existsSync(WORD_DEFS_DIR)) {
+ console.log('[definitions] No word-defs directory found, skipping');
+ return;
+ }
+
+ const SKIP_DIRS = new Set([
+ 'wiktionary-exists',
+ 'semantic-embeddings',
+ 'semantic-hints',
+ '.cache',
+ ]);
+
+ const langs = readdirSync(WORD_DEFS_DIR, { withFileTypes: true })
+ .filter((d) => d.isDirectory() && !d.name.startsWith('.') && !SKIP_DIRS.has(d.name))
+ .map((d) => d.name);
+
+ let total = 0;
+ let skipped = 0;
+
+ for (const lang of langs) {
+ const langDir = join(WORD_DEFS_DIR, lang);
+ const files = readdirSync(langDir).filter((f) => f.endsWith('.json'));
+ let langCount = 0;
+
+ const batch: Array<{
+ lang: string;
+ word: string;
+ definition: string | null;
+ definition_native: string | null;
+ definition_en: string | null;
+ part_of_speech: string | null;
+ confidence: number | null;
+ source: string | null;
+ url: string | null;
+ is_negative: boolean;
+ }> = [];
+
+ for (const file of files) {
+ const word = file.replace('.json', '');
+ try {
+ const raw = readFileSync(join(langDir, file), 'utf-8');
+ const data = JSON.parse(raw);
+ const isNeg = !!data.not_found;
+
+ batch.push({
+ lang,
+ word,
+ definition: data.definition ?? null,
+ definition_native: data.definition_native ?? null,
+ definition_en: data.definition_en ?? null,
+ part_of_speech: data.part_of_speech ?? null,
+ confidence: data.confidence ?? null,
+ source: data.source ?? null,
+ url: data.url ?? null,
+ is_negative: isNeg,
+ });
+
+ if (batch.length >= BATCH_SIZE) {
+ await insertDefinitionBatch(batch);
+ langCount += batch.length;
+ batch.length = 0;
+ }
+ } catch {
+ skipped++;
+ }
+ }
+
+ if (batch.length > 0) {
+ await insertDefinitionBatch(batch);
+ langCount += batch.length;
+ batch.length = 0;
+ }
+
+ total += langCount;
+ if (langCount > 0) {
+ console.log(` [definitions] ${lang}: ${langCount} entries`);
+ }
+ }
+
+ console.log(`[definitions] Total: ${total} migrated, ${skipped} skipped\n`);
+}
+
+async function insertDefinitionBatch(
+ batch: Array<{
+ lang: string;
+ word: string;
+ definition: string | null;
+ definition_native: string | null;
+ definition_en: string | null;
+ part_of_speech: string | null;
+ confidence: number | null;
+ source: string | null;
+ url: string | null;
+ is_negative: boolean;
+ }>
+) {
+ if (batch.length === 0) return;
+
+ // Build multi-value INSERT with ON CONFLICT DO NOTHING
+ const values: any[] = [];
+ const placeholders: string[] = [];
+ let idx = 1;
+
+ for (const row of batch) {
+ placeholders.push(
+ `($${idx}, $${idx + 1}, $${idx + 2}, $${idx + 3}, $${idx + 4}, $${idx + 5}, $${idx + 6}, $${idx + 7}, $${idx + 8}, $${idx + 9})`
+ );
+ values.push(
+ row.lang,
+ row.word,
+ row.definition,
+ row.definition_native,
+ row.definition_en,
+ row.part_of_speech,
+ row.confidence,
+ row.source,
+ row.url,
+ row.is_negative
+ );
+ idx += 10;
+ }
+
+ await pool.query(
+ `INSERT INTO wordle.definitions (lang, word, definition, definition_native, definition_en, part_of_speech, confidence, source, url, is_negative)
+ VALUES ${placeholders.join(', ')}
+ ON CONFLICT (lang, word) DO NOTHING`,
+ values
+ );
+}
+
+// ---------------------------------------------------------------------------
+// Word Stats: word-stats/{lang}/{dayIdx}.json → word_stats table
+// ---------------------------------------------------------------------------
+
+async function migrateWordStats() {
+ if (!existsSync(WORD_STATS_DIR)) {
+ console.log('[word-stats] No word-stats directory found, skipping');
+ return;
+ }
+
+ const langs = readdirSync(WORD_STATS_DIR, { withFileTypes: true })
+ .filter((d) => d.isDirectory())
+ .map((d) => d.name);
+
+ let total = 0;
+ let skipped = 0;
+
+ for (const lang of langs) {
+ const langDir = join(WORD_STATS_DIR, lang);
+ const files = readdirSync(langDir).filter((f) => f.endsWith('.json'));
+
+ const batch: Array<{
+ lang: string;
+ day_idx: number;
+ total: number;
+ wins: number;
+ losses: number;
+ dist_1: number;
+ dist_2: number;
+ dist_3: number;
+ dist_4: number;
+ dist_5: number;
+ dist_6: number;
+ }> = [];
+
+ for (const file of files) {
+ const dayIdx = parseInt(file.replace('.json', ''), 10);
+ if (isNaN(dayIdx)) continue;
+
+ try {
+ const data = JSON.parse(readFileSync(join(langDir, file), 'utf-8'));
+ if (!data.total) continue;
+
+ batch.push({
+ lang,
+ day_idx: dayIdx,
+ total: data.total ?? 0,
+ wins: data.wins ?? 0,
+ losses: data.losses ?? 0,
+ dist_1: data.distribution?.['1'] ?? 0,
+ dist_2: data.distribution?.['2'] ?? 0,
+ dist_3: data.distribution?.['3'] ?? 0,
+ dist_4: data.distribution?.['4'] ?? 0,
+ dist_5: data.distribution?.['5'] ?? 0,
+ dist_6: data.distribution?.['6'] ?? 0,
+ });
+
+ if (batch.length >= BATCH_SIZE) {
+ await insertStatsBatch(batch);
+ total += batch.length;
+ batch.length = 0;
+ }
+ } catch {
+ skipped++;
+ }
+ }
+
+ if (batch.length > 0) {
+ await insertStatsBatch(batch);
+ total += batch.length;
+ batch.length = 0;
+ }
+ }
+
+ console.log(`[word-stats] Total: ${total} migrated, ${skipped} skipped\n`);
+}
+
+async function insertStatsBatch(
+ batch: Array<{
+ lang: string;
+ day_idx: number;
+ total: number;
+ wins: number;
+ losses: number;
+ dist_1: number;
+ dist_2: number;
+ dist_3: number;
+ dist_4: number;
+ dist_5: number;
+ dist_6: number;
+ }>
+) {
+ if (batch.length === 0) return;
+
+ const values: any[] = [];
+ const placeholders: string[] = [];
+ let idx = 1;
+
+ for (const row of batch) {
+ placeholders.push(
+ `($${idx}, $${idx + 1}, $${idx + 2}, $${idx + 3}, $${idx + 4}, $${idx + 5}, $${idx + 6}, $${idx + 7}, $${idx + 8}, $${idx + 9}, $${idx + 10})`
+ );
+ values.push(
+ row.lang,
+ row.day_idx,
+ row.total,
+ row.wins,
+ row.losses,
+ row.dist_1,
+ row.dist_2,
+ row.dist_3,
+ row.dist_4,
+ row.dist_5,
+ row.dist_6
+ );
+ idx += 11;
+ }
+
+ await pool.query(
+ `INSERT INTO wordle.word_stats (lang, day_idx, total, wins, losses, dist_1, dist_2, dist_3, dist_4, dist_5, dist_6)
+ VALUES ${placeholders.join(', ')}
+ ON CONFLICT (lang, day_idx) DO NOTHING`,
+ values
+ );
+}
+
+// ---------------------------------------------------------------------------
+// Wiktionary: word-defs/wiktionary-exists/{lang}/{word}.json → wiktionary_cache
+// ---------------------------------------------------------------------------
+
+async function migrateWiktionary() {
+ const wiktDir = join(WORD_DEFS_DIR, 'wiktionary-exists');
+ if (!existsSync(wiktDir)) {
+ console.log('[wiktionary] No wiktionary-exists directory found, skipping');
+ return;
+ }
+
+ const langs = readdirSync(wiktDir, { withFileTypes: true })
+ .filter((d) => d.isDirectory())
+ .map((d) => d.name);
+
+ let total = 0;
+ let skipped = 0;
+
+ for (const lang of langs) {
+ const langDir = join(wiktDir, lang);
+ const files = readdirSync(langDir).filter((f) => f.endsWith('.json'));
+
+ const batch: Array<{ lang: string; word: string; exists: boolean }> = [];
+
+ for (const file of files) {
+ const word = file.replace('.json', '');
+ try {
+ const data = JSON.parse(readFileSync(join(langDir, file), 'utf-8'));
+ batch.push({ lang, word, exists: !!data.exists });
+
+ if (batch.length >= BATCH_SIZE) {
+ await insertWiktionaryBatch(batch);
+ total += batch.length;
+ batch.length = 0;
+ }
+ } catch {
+ skipped++;
+ }
+ }
+
+ if (batch.length > 0) {
+ await insertWiktionaryBatch(batch);
+ total += batch.length;
+ batch.length = 0;
+ }
+ }
+
+ console.log(`[wiktionary] Total: ${total} migrated, ${skipped} skipped\n`);
+}
+
+async function insertWiktionaryBatch(
+ batch: Array<{ lang: string; word: string; exists: boolean }>
+) {
+ if (batch.length === 0) return;
+
+ const values: any[] = [];
+ const placeholders: string[] = [];
+ let idx = 1;
+
+ for (const row of batch) {
+ placeholders.push(`($${idx}, $${idx + 1}, $${idx + 2})`);
+ values.push(row.lang, row.word, row.exists);
+ idx += 3;
+ }
+
+ await pool.query(
+ `INSERT INTO wordle.wiktionary_cache (lang, word, exists)
+ VALUES ${placeholders.join(', ')}
+ ON CONFLICT (lang, word) DO NOTHING`,
+ values
+ );
+}
+
+// ---------------------------------------------------------------------------
+// Semantic Hints: word-defs/semantic-hints/{word}.json → semantic_hints
+// ---------------------------------------------------------------------------
+
+async function migrateSemanticHints() {
+ const hintsDir = join(WORD_DEFS_DIR, 'semantic-hints');
+ if (!existsSync(hintsDir)) {
+ console.log('[semantic-hints] No hints directory found, skipping');
+ return;
+ }
+
+ const files = readdirSync(hintsDir).filter((f) => f.endsWith('.json'));
+ let total = 0;
+ let skipped = 0;
+
+ const batch: Array<{ lang: string; word: string; hint: string; model: string | null }> = [];
+
+ for (const file of files) {
+ const word = file.replace('.json', '');
+ try {
+ const data = JSON.parse(readFileSync(join(hintsDir, file), 'utf-8'));
+ if (!data.hint) continue;
+
+ batch.push({ lang: 'en', word, hint: data.hint, model: data.model ?? null });
+
+ if (batch.length >= BATCH_SIZE) {
+ await insertHintsBatch(batch);
+ total += batch.length;
+ batch.length = 0;
+ }
+ } catch {
+ skipped++;
+ }
+ }
+
+ if (batch.length > 0) {
+ await insertHintsBatch(batch);
+ total += batch.length;
+ }
+
+ console.log(`[semantic-hints] Total: ${total} migrated, ${skipped} skipped\n`);
+}
+
+async function insertHintsBatch(
+ batch: Array<{ lang: string; word: string; hint: string; model: string | null }>
+) {
+ if (batch.length === 0) return;
+
+ const values: any[] = [];
+ const placeholders: string[] = [];
+ let idx = 1;
+
+ for (const row of batch) {
+ placeholders.push(`($${idx}, $${idx + 1}, $${idx + 2}, $${idx + 3})`);
+ values.push(row.lang, row.word, row.hint, row.model);
+ idx += 4;
+ }
+
+ await pool.query(
+ `INSERT INTO wordle.semantic_hints (lang, word, hint, model)
+ VALUES ${placeholders.join(', ')}
+ ON CONFLICT (lang, word) DO NOTHING`,
+ values
+ );
+}
+
+// ---------------------------------------------------------------------------
+// Main
+// ---------------------------------------------------------------------------
+
+async function main() {
+ const args = process.argv.slice(2);
+ const runAll = args.length === 0;
+
+ console.log(`=== Migrating disk caches to Postgres ===`);
+ console.log(`BASE_DIR: ${BASE_DIR}`);
+ console.log(`BATCH_SIZE: ${BATCH_SIZE}\n`);
+
+ // Verify DB connection
+ try {
+ const { rows } = await pool.query('SELECT 1');
+ console.log('[db] Connected\n');
+ } catch (e) {
+ console.error('[db] Connection failed:', e);
+ process.exit(1);
+ }
+
+ if (runAll || args.includes('--definitions-only')) {
+ const t0 = Date.now();
+ await migrateDefinitions();
+ console.log(` (${((Date.now() - t0) / 1000).toFixed(1)}s)\n`);
+ }
+
+ if (runAll || args.includes('--stats-only')) {
+ const t0 = Date.now();
+ await migrateWordStats();
+ console.log(` (${((Date.now() - t0) / 1000).toFixed(1)}s)\n`);
+ }
+
+ if (runAll || args.includes('--wiktionary-only')) {
+ const t0 = Date.now();
+ await migrateWiktionary();
+ console.log(` (${((Date.now() - t0) / 1000).toFixed(1)}s)\n`);
+ }
+
+ if (runAll || args.includes('--hints-only')) {
+ const t0 = Date.now();
+ await migrateSemanticHints();
+ console.log(` (${((Date.now() - t0) / 1000).toFixed(1)}s)\n`);
+ }
+
+ console.log('=== Migration complete ===');
+ await pool.end();
+}
+
+main().catch((e) => {
+ console.error('Migration failed:', e);
+ process.exit(1);
+});
diff --git a/scripts/seed-coordinates.mjs b/scripts/seed-coordinates.mjs
new file mode 100644
index 00000000..de53c26f
--- /dev/null
+++ b/scripts/seed-coordinates.mjs
@@ -0,0 +1,90 @@
+/**
+ * Seed UMAP and PCA2D coordinates into word_embeddings table.
+ * Reads from data/semantic/umap.json and pca2d.json.
+ *
+ * Usage: node scripts/seed-coordinates.mjs
+ * Env: DATABASE_URL, SEMANTIC_DIR (default: data/semantic)
+ */
+import { readFileSync } from 'fs';
+import { join } from 'path';
+import pg from 'pg';
+
+const DATABASE_URL = process.env.DATABASE_URL;
+if (!DATABASE_URL) { console.error('DATABASE_URL not set'); process.exit(1); }
+
+const SEMANTIC_DIR = process.env.SEMANTIC_DIR || join(process.cwd(), 'data', 'semantic');
+const BATCH_SIZE = 500;
+
+const pool = new pg.Pool({ connectionString: DATABASE_URL, ssl: { rejectUnauthorized: false }, max: 5 });
+
+async function main() {
+ await pool.query('SELECT 1');
+ console.log('[db] Connected');
+
+ // Load coordinate files
+ const umap = JSON.parse(readFileSync(join(SEMANTIC_DIR, 'umap.json'), 'utf-8')).coordinates;
+ const pca = JSON.parse(readFileSync(join(SEMANTIC_DIR, 'pca2d.json'), 'utf-8')).coordinates;
+ console.log(`[umap] ${Object.keys(umap).length} words`);
+ console.log(`[pca2d] ${Object.keys(pca).length} words`);
+
+ // Merge into one update set
+ const allWords = new Set([...Object.keys(umap), ...Object.keys(pca)]);
+ console.log(`[total] ${allWords.size} unique words to update\n`);
+
+ let updated = 0, skipped = 0;
+ const batch = [];
+
+ for (const word of allWords) {
+ const u = umap[word];
+ const p = pca[word];
+ batch.push({
+ word,
+ umap_x: u ? u[0] : null,
+ umap_y: u ? u[1] : null,
+ pca2d_x: p ? p[0] : null,
+ pca2d_y: p ? p[1] : null,
+ });
+
+ if (batch.length >= BATCH_SIZE) {
+ const n = await flushBatch(batch);
+ updated += n;
+ skipped += batch.length - n;
+ batch.length = 0;
+ if (updated % 10000 < BATCH_SIZE) process.stdout.write(` ${updated} updated...\r`);
+ }
+ }
+ if (batch.length) {
+ const n = await flushBatch(batch);
+ updated += n;
+ skipped += batch.length - n;
+ }
+
+ console.log(`\n[done] ${updated} updated, ${skipped} skipped (not in DB)`);
+ await pool.end();
+}
+
+async function flushBatch(batch) {
+ // Use a CTE with VALUES to batch-update
+ const values = [];
+ const rows = [];
+ let idx = 1;
+ for (const r of batch) {
+ rows.push(`($${idx},$${idx+1},$${idx+2},$${idx+3},$${idx+4})`);
+ values.push(r.word, r.umap_x, r.umap_y, r.pca2d_x, r.pca2d_y);
+ idx += 5;
+ }
+
+ const result = await pool.query(`
+ UPDATE wordle.word_embeddings AS we SET
+ umap_x = v.umap_x::double precision,
+ umap_y = v.umap_y::double precision,
+ pca2d_x = v.pca2d_x::double precision,
+ pca2d_y = v.pca2d_y::double precision
+ FROM (VALUES ${rows.join(',')}) AS v(word, umap_x, umap_y, pca2d_x, pca2d_y)
+ WHERE we.lang = 'en' AND we.word = v.word
+ `, values);
+
+ return result.rowCount;
+}
+
+main().catch(e => { console.error('FAILED:', e); process.exit(1); });
diff --git a/scripts/seed-kaikki-to-db.mjs b/scripts/seed-kaikki-to-db.mjs
new file mode 100644
index 00000000..404a5761
--- /dev/null
+++ b/scripts/seed-kaikki-to-db.mjs
@@ -0,0 +1,101 @@
+/**
+ * Seed kaikki (offline Wiktionary) definitions into the definitions table.
+ *
+ * These are static, pre-built definitions that serve as the Tier 3 fallback
+ * when LLM definitions fail. Seeding them into the DB eliminates the need
+ * for a separate in-memory cache that grows to 100MB.
+ *
+ * Files: data/definitions/{lang}.json (native) and {lang}_en.json (English)
+ * Format: { "word": "definition string", ... }
+ *
+ * Inserts with source='kaikki' or 'kaikki-en'. Uses ON CONFLICT DO NOTHING
+ * so LLM definitions (higher quality) aren't overwritten.
+ *
+ * Usage: node scripts/seed-kaikki-to-db.mjs
+ * Env: DATABASE_URL
+ */
+
+import { readdirSync, readFileSync } from 'fs';
+import { join } from 'path';
+import pg from 'pg';
+
+const DATABASE_URL = process.env.DATABASE_URL;
+if (!DATABASE_URL) { console.error('DATABASE_URL not set'); process.exit(1); }
+
+const DEFS_DIR = process.env.DEFS_DIR || join(process.cwd(), 'data', 'definitions');
+const BATCH_SIZE = 500;
+
+const pool = new pg.Pool({ connectionString: DATABASE_URL, ssl: { rejectUnauthorized: false }, max: 5 });
+
+async function insertBatch(batch) {
+ if (!batch.length) return;
+ const values = [];
+ const placeholders = [];
+ let idx = 1;
+ for (const r of batch) {
+ placeholders.push(`($${idx},$${idx+1},$${idx+2},$${idx+3},$${idx+4},$${idx+5})`);
+ values.push(r.lang, r.word, r.definition, r.definition_en, r.source, r.url);
+ idx += 6;
+ }
+ await pool.query(
+ `INSERT INTO wordle.definitions (lang, word, definition, definition_en, source, url)
+ VALUES ${placeholders.join(',')}
+ ON CONFLICT (lang, word) DO NOTHING`,
+ values
+ );
+}
+
+function wiktionaryUrl(word, langCode) {
+ // Simplified — matches the app's logic
+ const wiktLangs = { en: 'en', fi: 'fi', de: 'de', fr: 'fr', es: 'es', it: 'it', pt: 'pt', nl: 'nl', sv: 'sv', nb: 'no', nn: 'nn', da: 'da', pl: 'pl', ru: 'ru', uk: 'uk', bg: 'bg', hr: 'hr', sr: 'sr', sl: 'sl', cs: 'cs', sk: 'sk', ro: 'ro', hu: 'hu', tr: 'tr', az: 'az', et: 'et', lt: 'lt', lv: 'lv', el: 'el', ka: 'ka', hy: 'hy', he: 'he', ar: 'ar', fa: 'fa', vi: 'vi', id: 'id', ms: 'ms', ca: 'ca', gl: 'gl', eu: 'eu', br: 'br', oc: 'oc', la: 'la', ko: 'ko', sq: 'sq', mk: 'mk', is: 'is', ga: 'ga', cy: 'cy', mt: 'mt', eo: 'eo', ja: 'ja' };
+ const wl = wiktLangs[langCode] || 'en';
+ return `https://${wl}.wiktionary.org/wiki/${encodeURIComponent(word)}`;
+}
+
+async function main() {
+ await pool.query('SELECT 1');
+ console.log('[db] Connected\n');
+
+ const files = readdirSync(DEFS_DIR).filter(f => f.endsWith('.json'));
+ let totalNative = 0, totalEn = 0;
+
+ for (const file of files) {
+ const isEn = file.endsWith('_en.json');
+ const langCode = isEn ? file.replace('_en.json', '') : file.replace('.json', '');
+ const source = isEn ? 'kaikki-en' : 'kaikki';
+
+ const data = JSON.parse(readFileSync(join(DEFS_DIR, file), 'utf-8'));
+ const words = Object.keys(data);
+ const batch = [];
+
+ for (const word of words) {
+ const def = (data[word] || '').replace(/\0/g, '');
+ if (!def) continue;
+
+ batch.push({
+ lang: langCode,
+ word: word.toLowerCase(),
+ definition: isEn ? null : def,
+ definition_en: isEn ? def : null,
+ source,
+ url: wiktionaryUrl(word, langCode),
+ });
+
+ if (batch.length >= BATCH_SIZE) {
+ await insertBatch(batch);
+ if (isEn) totalEn += batch.length; else totalNative += batch.length;
+ batch.length = 0;
+ }
+ }
+ if (batch.length) {
+ await insertBatch(batch);
+ if (isEn) totalEn += batch.length; else totalNative += batch.length;
+ }
+ console.log(` [${source}] ${langCode}: ${words.length}`);
+ }
+
+ console.log(`\n[done] ${totalNative} native + ${totalEn} English = ${totalNative + totalEn} total`);
+ await pool.end();
+}
+
+main().catch(e => { console.error('FAILED:', e); process.exit(1); });
diff --git a/scripts/seed-semantic-db.ts b/scripts/seed-semantic-db.ts
new file mode 100644
index 00000000..d59c80b5
--- /dev/null
+++ b/scripts/seed-semantic-db.ts
@@ -0,0 +1,331 @@
+/**
+ * Seed Postgres with semantic embeddings + precompute target neighbors.
+ *
+ * This is a heavy script (~30 minutes for target_neighbors) that should
+ * be run locally, NOT on the production server.
+ *
+ * Steps:
+ * 1. Read embeddings from .f32 binary (or .json fallback)
+ * 2. Read metadata: umap, pca2d, targets, vocabulary, axes
+ * 3. Batch-insert into word_embeddings table
+ * 4. Insert axes into semantic_axes table
+ * 5. Precompute target_neighbors (879 targets × top 5k vocab)
+ *
+ * Usage: npx tsx scripts/seed-semantic-db.ts
+ *
+ * Requires: DATABASE_URL env var, pgvector extension enabled
+ */
+
+import { existsSync, readFileSync } from 'fs';
+import { join } from 'path';
+import pg from 'pg';
+
+const DATABASE_URL = process.env.DATABASE_URL;
+if (!DATABASE_URL) {
+ console.error('DATABASE_URL not set');
+ process.exit(1);
+}
+
+const pool = new pg.Pool({
+ connectionString: DATABASE_URL,
+ ssl: { rejectUnauthorized: false },
+ max: 5,
+});
+
+const SEMANTIC_DIR = join(process.cwd(), 'data', 'semantic');
+const RUNTIME_DIR = join(process.cwd(), 'semantic-runtime');
+const LANG = 'en';
+const DIMS = 512;
+const TOP_K_NEIGHBORS = 5000;
+const BATCH_SIZE = 500;
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Data loading (same logic as server/utils/semantic.ts)
+// ═══════════════════════════════════════════════════════════════════════════
+
+function loadEmbeddings(): { words: string[]; embeddings: Float32Array } {
+ // Try binary .f32 first
+ const f32Path = existsSync(join(RUNTIME_DIR, 'embeddings.f32'))
+ ? join(RUNTIME_DIR, 'embeddings.f32')
+ : join(SEMANTIC_DIR, 'embeddings.f32');
+ const metaPath = existsSync(join(RUNTIME_DIR, 'embeddings.meta.json'))
+ ? join(RUNTIME_DIR, 'embeddings.meta.json')
+ : join(SEMANTIC_DIR, 'embeddings.meta.json');
+
+ if (existsSync(f32Path) && existsSync(metaPath)) {
+ console.log('[load] Using binary .f32 format');
+ const meta = JSON.parse(readFileSync(metaPath, 'utf-8'));
+ const buf = readFileSync(f32Path);
+ const embeddings = new Float32Array(buf.buffer, buf.byteOffset, buf.byteLength / 4);
+ return { words: meta.words, embeddings };
+ }
+
+ // Fallback to JSON
+ const jsonPath = existsSync(join(RUNTIME_DIR, 'embeddings.json'))
+ ? join(RUNTIME_DIR, 'embeddings.json')
+ : join(SEMANTIC_DIR, 'embeddings.json');
+
+ console.log('[load] Using JSON format (slow)');
+ const data = JSON.parse(readFileSync(jsonPath, 'utf-8'));
+ const words: string[] = data.words;
+ const N = words.length;
+ const embeddings = new Float32Array(N * DIMS);
+ for (let i = 0; i < N; i++) {
+ const vec = data.vectors[i];
+ for (let j = 0; j < DIMS; j++) {
+ embeddings[i * DIMS + j] = vec[j];
+ }
+ }
+ return { words, embeddings };
+}
+
+function loadJson(filename: string): T {
+ const runtimePath = join(RUNTIME_DIR, filename);
+ const staticPath = join(SEMANTIC_DIR, filename);
+ const p = existsSync(runtimePath) ? runtimePath : staticPath;
+ return JSON.parse(readFileSync(p, 'utf-8')) as T;
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Seeding
+// ═══════════════════════════════════════════════════════════════════════════
+
+async function seedWordEmbeddings() {
+ const { words, embeddings } = loadEmbeddings();
+ const targetsData = loadJson<{ targets?: string[] } | string[]>('targets.json');
+ const targets = new Set(Array.isArray(targetsData) ? targetsData : targetsData.targets ?? []);
+ const umap = loadJson>('umap.json');
+ const pca2d = loadJson>('pca2d.json');
+ const vocabData = loadJson<{ words?: string[] } | string[]>('vocabulary.json');
+ const vocabulary = new Set(Array.isArray(vocabData) ? vocabData : vocabData.words ?? []);
+
+ const N = words.length;
+ console.log(`[seed] Inserting ${N} word embeddings...`);
+
+ // Clear existing data for this language
+ const client = await pool.connect();
+ try {
+ await client.query('DELETE FROM wordle.word_embeddings WHERE lang = $1', [LANG]);
+
+ // Batch insert
+ for (let batch = 0; batch < N; batch += BATCH_SIZE) {
+ const end = Math.min(batch + BATCH_SIZE, N);
+ const values: string[] = [];
+ const params: any[] = [];
+ let paramIdx = 1;
+
+ for (let i = batch; i < end; i++) {
+ const word = words[i]!;
+ const vec = Array.from(
+ embeddings.subarray(i * DIMS, (i + 1) * DIMS)
+ );
+ const vecStr = `[${vec.join(',')}]`;
+ const umapCoords = umap[word];
+ const pca2dCoords = pca2d[word];
+
+ // Sanitize coordinates — NaN/Infinity → null
+ const safeFloat = (v: number | undefined | null): number | null =>
+ v != null && Number.isFinite(v) ? v : null;
+
+ values.push(
+ `($${paramIdx++}, $${paramIdx++}, $${paramIdx++}::vector, $${paramIdx++}, $${paramIdx++}, $${paramIdx++}, $${paramIdx++}, $${paramIdx++}, $${paramIdx++})`
+ );
+ params.push(
+ LANG,
+ word,
+ vecStr,
+ safeFloat(umapCoords?.[0]),
+ safeFloat(umapCoords?.[1]),
+ safeFloat(pca2dCoords?.[0]),
+ safeFloat(pca2dCoords?.[1]),
+ targets.has(word),
+ vocabulary.has(word)
+ );
+ }
+
+ await client.query(
+ `INSERT INTO wordle.word_embeddings (lang, word, embedding, umap_x, umap_y, pca2d_x, pca2d_y, is_target, is_vocab)
+ VALUES ${values.join(', ')}
+ ON CONFLICT (lang, word) DO UPDATE SET
+ embedding = EXCLUDED.embedding,
+ umap_x = EXCLUDED.umap_x,
+ umap_y = EXCLUDED.umap_y,
+ pca2d_x = EXCLUDED.pca2d_x,
+ pca2d_y = EXCLUDED.pca2d_y,
+ is_target = EXCLUDED.is_target,
+ is_vocab = EXCLUDED.is_vocab`,
+ params
+ );
+
+ if ((batch / BATCH_SIZE) % 10 === 0) {
+ console.log(` ${end}/${N} words inserted`);
+ }
+ }
+
+ console.log(`[seed] Inserted ${N} word embeddings`);
+ } finally {
+ client.release();
+ }
+}
+
+async function seedAxes() {
+ const rawAxes = loadJson>('axes.json');
+ // Axes file may be wrapped: { version, axes: {...}, coherence_auc, ranges }
+ const axesData = rawAxes.axes ?? rawAxes;
+ const aucData = rawAxes.coherence_auc ?? rawAxes._auc ?? {};
+ const rangesData = rawAxes.ranges ?? rawAxes._ranges ?? {};
+ const axisNames = Object.keys(axesData).filter(
+ (k) => !['version', '_model', '_dims', '_auc', '_ranges', 'coherence_auc', 'ranges'].includes(k)
+ );
+
+ console.log(`[seed] Inserting ${axisNames.length} semantic axes...`);
+
+ const client = await pool.connect();
+ try {
+ await client.query('DELETE FROM wordle.semantic_axes WHERE lang = $1', [LANG]);
+
+ for (const name of axisNames) {
+ const axis = axesData[name];
+ if (!axis?.vector) continue;
+
+ const vecStr = `[${axis.vector.join(',')}]`;
+ const auc = aucData[name] ?? 0;
+ const ranges = rangesData[name];
+
+ await client.query(
+ `INSERT INTO wordle.semantic_axes (lang, name, low_anchor, high_anchor, vector, auc, range_p5, range_p95)
+ VALUES ($1, $2, $3, $4, $5::vector, $6, $7, $8)
+ ON CONFLICT (lang, name) DO UPDATE SET
+ low_anchor = EXCLUDED.low_anchor,
+ high_anchor = EXCLUDED.high_anchor,
+ vector = EXCLUDED.vector,
+ auc = EXCLUDED.auc,
+ range_p5 = EXCLUDED.range_p5,
+ range_p95 = EXCLUDED.range_p95`,
+ [
+ LANG,
+ name,
+ axis.low_anchor,
+ axis.high_anchor,
+ vecStr,
+ auc,
+ ranges?.p5 ?? null,
+ ranges?.p95 ?? null,
+ ]
+ );
+ }
+
+ console.log(`[seed] Inserted ${axisNames.length} axes`);
+ } finally {
+ client.release();
+ }
+}
+
+async function seedTargetNeighbors() {
+ const { words, embeddings } = loadEmbeddings();
+ const targetsRaw = loadJson<{ targets?: string[] } | string[]>('targets.json');
+ const targets = Array.isArray(targetsRaw) ? targetsRaw : targetsRaw.targets ?? [];
+ const N = words.length;
+
+ // Build word → index map
+ const wordIndex = new Map();
+ for (let i = 0; i < N; i++) wordIndex.set(words[i]!, i);
+
+ console.log(
+ `[seed] Computing target neighbors: ${targets.length} targets × top ${TOP_K_NEIGHBORS}...`
+ );
+ console.log(' (This takes ~30 minutes for 879 targets × 50k vocab)');
+
+ const client = await pool.connect();
+ try {
+ await client.query('DELETE FROM wordle.target_neighbors WHERE lang = $1', [LANG]);
+
+ for (let t = 0; t < targets.length; t++) {
+ const target = targets[t]!;
+ const targetIdx = wordIndex.get(target);
+ if (targetIdx === undefined) {
+ console.warn(` [skip] target "${target}" not in vocab`);
+ continue;
+ }
+
+ // Compute cosine to all vocab words
+ const cosines = new Float32Array(N);
+ for (let i = 0; i < N; i++) {
+ let dot = 0;
+ for (let j = 0; j < DIMS; j++) {
+ dot +=
+ embeddings[targetIdx * DIMS + j]! *
+ embeddings[i * DIMS + j]!;
+ }
+ cosines[i] = dot;
+ }
+
+ // Get top K indices by cosine (descending)
+ const indices = Array.from({ length: N }, (_, i) => i);
+ indices.sort((a, b) => cosines[b]! - cosines[a]!);
+
+ // Batch insert top K neighbors
+ const k = Math.min(TOP_K_NEIGHBORS, N);
+ const batchValues: string[] = [];
+ const batchParams: any[] = [];
+ let pIdx = 1;
+
+ for (let rank = 0; rank < k; rank++) {
+ const idx = indices[rank]!;
+ batchValues.push(
+ `($${pIdx++}, $${pIdx++}, $${pIdx++}, $${pIdx++}, $${pIdx++})`
+ );
+ batchParams.push(
+ LANG,
+ target,
+ words[idx]!,
+ rank + 1,
+ cosines[idx]!
+ );
+
+ // Flush every 1000 rows
+ if (batchValues.length >= 1000 || rank === k - 1) {
+ await client.query(
+ `INSERT INTO wordle.target_neighbors (lang, target_word, word, rank, cosine)
+ VALUES ${batchValues.join(', ')}
+ ON CONFLICT (lang, target_word, word) DO NOTHING`,
+ batchParams
+ );
+ batchValues.length = 0;
+ batchParams.length = 0;
+ pIdx = 1;
+ }
+ }
+
+ if ((t + 1) % 50 === 0 || t === targets.length - 1) {
+ console.log(
+ ` ${t + 1}/${targets.length} targets processed`
+ );
+ }
+ }
+
+ console.log(
+ `[seed] Inserted ${targets.length * TOP_K_NEIGHBORS} target neighbor rows`
+ );
+ } finally {
+ client.release();
+ }
+}
+
+async function main() {
+ console.log('=== Seeding semantic data into Postgres (pgvector) ===\n');
+
+ const t0 = Date.now();
+ await seedWordEmbeddings();
+ await seedAxes();
+ await seedTargetNeighbors();
+
+ const elapsed = ((Date.now() - t0) / 1000 / 60).toFixed(1);
+ console.log(`\n=== Seeding complete in ${elapsed} minutes ===`);
+ await pool.end();
+}
+
+main().catch((e) => {
+ console.error('Seeding failed:', e);
+ process.exit(1);
+});
diff --git a/scripts/validate_i18n.py b/scripts/validate_i18n.py
new file mode 100644
index 00000000..faac34bc
--- /dev/null
+++ b/scripts/validate_i18n.py
@@ -0,0 +1,346 @@
+"""
+Validate i18n translation files against the default language config.
+
+Checks:
+ 1. Missing keys — language overrides a section but omits keys present in default
+ 2. Untranslated keys — value identical to English default (likely never translated)
+ 3. String length warnings — translations >2x English length that may overflow UI
+ 4. JSON validity — every language_config.json must parse
+
+Usage:
+ uv run python scripts/validate_i18n.py
+ uv run python scripts/validate_i18n.py --verbose
+ uv run python scripts/validate_i18n.py --strict # also fail on untranslated keys
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+DATA_DIR = Path(__file__).resolve().parent.parent / "data"
+LANGUAGES_DIR = DATA_DIR / "languages"
+DEFAULT_CONFIG_PATH = DATA_DIR / "default_language_config.json"
+
+# Sections whose string keys are compared against the default
+CHECKED_SECTIONS = ("text", "ui", "help")
+
+# Languages exempt from untranslated-key checks (English-based or conlangs)
+UNTRANSLATED_EXEMPT_LANGS = {"en", "tlh", "qya", "pau"}
+
+# UI keys used in tight spaces — warn if translation exceeds this char count
+BUTTON_KEYS = {"text.share", "text.copied", "text.shared"}
+BUTTON_MAX_CHARS = 15
+
+STAT_LABEL_KEYS = {
+ "ui.games",
+ "ui.win_percent",
+ "ui.streak",
+ "ui.best",
+ "ui.wins",
+ "ui.solved",
+ "ui.combo",
+ "ui.avg_guesses",
+ "ui.failed",
+ "ui.points",
+}
+STAT_LABEL_MAX_CHARS = 12
+
+# General max length before warning (for non-description strings)
+GENERAL_MAX_CHARS = 80
+
+# Minimum string length to consider for untranslated check — very short strings
+# (1-2 chars) are often legitimately identical across languages.
+MIN_UNTRANSLATED_CHECK_LEN = 3
+
+
+# ---------------------------------------------------------------------------
+# Core validation logic (importable from tests)
+# ---------------------------------------------------------------------------
+
+
+def load_default_config() -> dict:
+ with open(DEFAULT_CONFIG_PATH, encoding="utf-8") as f:
+ return json.load(f)
+
+
+def get_language_dirs() -> list[Path]:
+ """Return sorted list of language directories that contain language_config.json."""
+ if not LANGUAGES_DIR.exists():
+ return []
+ return sorted(
+ d for d in LANGUAGES_DIR.iterdir() if d.is_dir() and (d / "language_config.json").exists()
+ )
+
+
+def load_lang_config(lang_dir: Path) -> dict:
+ with open(lang_dir / "language_config.json", encoding="utf-8") as f:
+ return json.load(f)
+
+
+def _flat_string_keys(section: dict, prefix: str = "") -> dict[str, str]:
+ """Extract flat key->value pairs for string values only (skip dicts/lists)."""
+ result = {}
+ for k, v in section.items():
+ full_key = f"{prefix}.{k}" if prefix else k
+ if isinstance(v, str):
+ result[full_key] = v
+ return result
+
+
+def check_missing_keys(lang: str, lang_config: dict, default_config: dict) -> dict[str, list[str]]:
+ """Return {section: [missing_keys]} for sections the language overrides."""
+ missing: dict[str, list[str]] = {}
+ for section in CHECKED_SECTIONS:
+ if section not in lang_config:
+ # Language doesn't override this section — falls back entirely to default.
+ continue
+ default_keys = set(default_config.get(section, {}).keys())
+ lang_keys = set(lang_config[section].keys())
+ diff = sorted(default_keys - lang_keys)
+ if diff:
+ missing[section] = diff
+ return missing
+
+
+def check_untranslated_keys(lang: str, lang_config: dict, default_config: dict) -> list[str]:
+ """Return list of 'section.key' strings identical to English default."""
+ if lang in UNTRANSLATED_EXEMPT_LANGS:
+ return []
+
+ untranslated = []
+ for section in CHECKED_SECTIONS:
+ if section not in lang_config:
+ continue
+ default_flat = _flat_string_keys(default_config.get(section, {}), section)
+ lang_flat = _flat_string_keys(lang_config.get(section, {}), section)
+
+ for key, lang_val in lang_flat.items():
+ default_val = default_flat.get(key)
+ if default_val is None:
+ continue
+ if len(default_val) < MIN_UNTRANSLATED_CHECK_LEN:
+ continue
+ if lang_val == default_val:
+ untranslated.append(key)
+ return untranslated
+
+
+def check_string_lengths(
+ lang: str, lang_config: dict, default_config: dict
+) -> list[tuple[str, int, int]]:
+ """Return list of (key, lang_len, default_len) for oversized translations."""
+ warnings = []
+ for section in CHECKED_SECTIONS:
+ if section not in lang_config:
+ continue
+ default_flat = _flat_string_keys(default_config.get(section, {}), section)
+ lang_flat = _flat_string_keys(lang_config.get(section, {}), section)
+
+ for key, lang_val in lang_flat.items():
+ default_val = default_flat.get(key)
+ if default_val is None or not default_val:
+ continue
+
+ lang_len = len(lang_val)
+ default_len = len(default_val)
+ qualified_key = key # already has section prefix
+
+ # Button keys: hard cap
+ if qualified_key in BUTTON_KEYS and lang_len > BUTTON_MAX_CHARS:
+ warnings.append((qualified_key, lang_len, default_len))
+ continue
+
+ # Stat label keys: hard cap
+ if qualified_key in STAT_LABEL_KEYS and lang_len > STAT_LABEL_MAX_CHARS:
+ warnings.append((qualified_key, lang_len, default_len))
+ continue
+
+ # General: warn if >2x default AND >80 chars (skip short strings
+ # where 2x is still fine, e.g. "Share" -> "Teilen" is 2x but fine)
+ if lang_len > 2 * default_len and lang_len > GENERAL_MAX_CHARS:
+ warnings.append((qualified_key, lang_len, default_len))
+
+ return warnings
+
+
+def check_json_validity(lang_dir: Path) -> str | None:
+ """Return error message if language_config.json is invalid JSON, else None."""
+ try:
+ with open(lang_dir / "language_config.json", encoding="utf-8") as f:
+ json.load(f)
+ return None
+ except (json.JSONDecodeError, UnicodeDecodeError) as e:
+ return str(e)
+
+
+def validate_all() -> dict:
+ """Run all checks on all languages. Returns a results dict."""
+ default_config = load_default_config()
+ lang_dirs = get_language_dirs()
+
+ results = {
+ "total_languages": len(lang_dirs),
+ "json_errors": {}, # lang -> error string
+ "missing_keys": {}, # lang -> {section: [keys]}
+ "untranslated": {}, # lang -> [keys]
+ "length_warnings": {}, # lang -> [(key, lang_len, default_len)]
+ }
+
+ for lang_dir in lang_dirs:
+ lang = lang_dir.name
+
+ # JSON validity
+ err = check_json_validity(lang_dir)
+ if err:
+ results["json_errors"][lang] = err
+ continue # can't check further if JSON is broken
+
+ lang_config = load_lang_config(lang_dir)
+
+ # Missing keys
+ missing = check_missing_keys(lang, lang_config, default_config)
+ if missing:
+ results["missing_keys"][lang] = missing
+
+ # Untranslated keys
+ untranslated = check_untranslated_keys(lang, lang_config, default_config)
+ if untranslated:
+ results["untranslated"][lang] = untranslated
+
+ # Length warnings
+ length_warns = check_string_lengths(lang, lang_config, default_config)
+ if length_warns:
+ results["length_warnings"][lang] = length_warns
+
+ return results
+
+
+# ---------------------------------------------------------------------------
+# CLI output
+# ---------------------------------------------------------------------------
+
+
+def print_results(results: dict, verbose: bool = False, strict: bool = False) -> int:
+ """Print results and return exit code (0 = pass, 1 = fail).
+
+ Default mode: only JSON errors are critical (exit 1).
+ --strict mode: missing keys and untranslated keys also cause exit 1.
+
+ Missing keys fall back to English defaults at runtime, so they don't break
+ the app — but they indicate incomplete translations.
+ """
+ has_critical = False
+
+ print(f"\n{'=' * 60}")
+ print(f"i18n Validation — {results['total_languages']} languages checked")
+ print(f"{'=' * 60}\n")
+
+ # JSON errors (always critical)
+ if results["json_errors"]:
+ has_critical = True
+ print("FAIL: Invalid JSON files")
+ for lang, err in sorted(results["json_errors"].items()):
+ print(f" {lang}: {err}")
+ print()
+
+ # Missing keys (critical only with --strict)
+ if results["missing_keys"]:
+ label = "FAIL" if strict else "WARN"
+ if strict:
+ has_critical = True
+ print(f"{label}: Missing keys in {len(results['missing_keys'])} languages")
+ for lang, sections in sorted(results["missing_keys"].items()):
+ total = sum(len(keys) for keys in sections.values())
+ if verbose:
+ for section, keys in sorted(sections.items()):
+ for key in keys:
+ print(f" {lang}: {section}.{key}")
+ else:
+ section_summary = ", ".join(f"{s} ({len(k)})" for s, k in sorted(sections.items()))
+ print(f" {lang}: {total} missing — {section_summary}")
+ print()
+
+ # Untranslated keys (critical only with --strict)
+ if results["untranslated"]:
+ label = "FAIL" if strict else "WARN"
+ if strict:
+ has_critical = True
+
+ # Sort by count descending, show top 10
+ by_count = sorted(results["untranslated"].items(), key=lambda x: -len(x[1]))
+ print(f"{label}: Untranslated keys (identical to English default)")
+ for shown, (lang, keys) in enumerate(by_count):
+ if not verbose and shown >= 10:
+ remaining = len(by_count) - 10
+ print(f" ... and {remaining} more languages")
+ break
+ if verbose:
+ print(f" {lang} ({len(keys)} keys):")
+ for key in keys:
+ print(f" {key}")
+ else:
+ print(f" {lang}: {len(keys)} untranslated keys")
+ print()
+
+ # Length warnings (never critical, informational)
+ if results["length_warnings"]:
+ print(f"WARN: String length issues in {len(results['length_warnings'])} languages")
+ for lang, warns in sorted(results["length_warnings"].items()):
+ if verbose:
+ for key, lang_len, default_len in warns:
+ print(f" {lang}: {key} — {lang_len} chars (default: {default_len})")
+ else:
+ print(f" {lang}: {len(warns)} strings may overflow UI")
+ print()
+
+ # Summary
+ issues_count = len(results["json_errors"])
+ if strict:
+ issues_count += len(results["missing_keys"])
+ ok_count = results["total_languages"] - issues_count
+ print(f"{'=' * 60}")
+ if has_critical:
+ print(f"RESULT: FAIL — {ok_count}/{results['total_languages']} languages OK")
+ else:
+ print(f"RESULT: PASS — {ok_count}/{results['total_languages']} languages OK")
+ warnings = []
+ if results["missing_keys"]:
+ total_missing = sum(
+ sum(len(keys) for keys in sections.values())
+ for sections in results["missing_keys"].values()
+ )
+ warnings.append(
+ f"{total_missing} missing keys in {len(results['missing_keys'])} languages"
+ )
+ if results["untranslated"]:
+ total_untranslated = sum(len(v) for v in results["untranslated"].values())
+ warnings.append(
+ f"{total_untranslated} untranslated keys in "
+ f"{len(results['untranslated'])} languages"
+ )
+ if warnings:
+ print(f" Warnings: {'; '.join(warnings)}")
+ print(" Run with --strict to enforce")
+ print(f"{'=' * 60}\n")
+
+ return 1 if has_critical else 0
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Validate i18n translation files")
+ parser.add_argument("--verbose", action="store_true", help="Show detailed per-language output")
+ parser.add_argument(
+ "--strict", action="store_true", help="Fail on untranslated keys (not just warn)"
+ )
+ args = parser.parse_args()
+
+ results = validate_all()
+ exit_code = print_results(results, verbose=args.verbose, strict=args.strict)
+ sys.exit(exit_code)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/server/api/[lang]/data.get.ts b/server/api/[lang]/data.get.ts
index 473d2ba7..4b581aff 100644
--- a/server/api/[lang]/data.get.ts
+++ b/server/api/[lang]/data.get.ts
@@ -55,12 +55,17 @@ export default defineEventHandler((event) => {
// Multi-board modes: N distinct daily words
const modeConfig = GAME_MODE_CONFIG[mode as GameMode];
if (modeConfig && modeConfig.boardCount > 1) {
- response.todays_words = getWordsForDay(lang, session.todaysIdx, modeConfig.boardCount);
+ response.todays_words = getWordsForDay(
+ lang,
+ session.todaysIdx,
+ modeConfig.boardCount,
+ mode
+ );
}
// Daily speed: deterministic sequence of 50 words (same for everyone)
if (mode === 'speed') {
- response.speed_daily_words = getWordsForDay(lang, session.todaysIdx, 50);
+ response.speed_daily_words = getWordsForDay(lang, session.todaysIdx, 50, mode);
}
}
diff --git a/server/api/[lang]/definition/[word].get.ts b/server/api/[lang]/definition/[word].get.ts
index da93261f..18d5844e 100644
--- a/server/api/[lang]/definition/[word].get.ts
+++ b/server/api/[lang]/definition/[word].get.ts
@@ -3,8 +3,10 @@
*/
import { loadAllData } from '../../../utils/data-loader';
import { fetchDefinition } from '../../../utils/definitions';
+import { rateLimit } from '../../../utils/rate-limit';
export default defineEventHandler(async (event) => {
+ rateLimit(event, 'llm:definition', 20, 60 * 1000);
const lang = getRouterParam(event, 'lang')!;
const word = getRouterParam(event, 'word')!.normalize('NFC');
const data = loadAllData();
diff --git a/server/api/[lang]/leaderboard.get.ts b/server/api/[lang]/leaderboard.get.ts
new file mode 100644
index 00000000..b9a9c1e2
--- /dev/null
+++ b/server/api/[lang]/leaderboard.get.ts
@@ -0,0 +1,678 @@
+/**
+ * GET /api/[lang]/leaderboard — Daily leaderboard rankings.
+ *
+ * Query params:
+ * mode — game mode (default: "classic")
+ * period — "today" (default), "week", "month"
+ * day — day index for "today" period (default: today)
+ * offset — pagination offset (default: 0)
+ * limit — page size (default: 50, max 100)
+ *
+ * Returns: { entries, total, you?, period, day_idx }
+ *
+ * Privacy: only exposes username + avatarUrl. Never email or displayName.
+ */
+import { prisma } from '~/server/utils/prisma';
+import { requireLang, langResponseFields } from '~/server/utils/data-loader';
+import { getTodaysIdx, idxToDate } from '~/server/lib/day-index';
+import { GAME_MODE_CONFIG } from '~/utils/game-modes';
+
+// In-memory cache with max size eviction
+const MAX_CACHE_SIZE = 500;
+const cache = new Map();
+const CACHE_TTL_TODAY = 60_000; // 60s
+const CACHE_TTL_AGG = 300_000; // 5min for week/month
+const CACHE_TTL_GLOBAL = 600_000; // 10min for global streaks/records
+
+function cacheSet(key: string, data: any, ttl: number) {
+ // Evict expired entries when cache grows large
+ if (cache.size >= MAX_CACHE_SIZE) {
+ const now = Date.now();
+ for (const [k, v] of cache) {
+ if (v.expiresAt <= now) cache.delete(k);
+ }
+ // If still too large, drop oldest half
+ if (cache.size >= MAX_CACHE_SIZE) {
+ const keys = [...cache.keys()];
+ for (let i = 0; i < keys.length / 2; i++) cache.delete(keys[i]!);
+ }
+ }
+ cache.set(key, { data, expiresAt: Date.now() + ttl });
+}
+
+interface LeaderboardEntry {
+ rank: number;
+ username: string;
+ avatarUrl: string | null;
+ attempts: number; // for today: actual attempts; for week/month: avg
+ score?: number; // speed mode: points
+ wordsSolved?: number; // speed mode: words completed
+ daysPlayed?: number; // week/month only
+ playedAt: string;
+}
+
+type Period = 'today' | 'week' | 'month' | 'streaks' | 'records';
+const VALID_PERIODS: Period[] = ['today', 'week', 'month', 'streaks', 'records'];
+const MIN_DAYS: Record = { today: 1, week: 3, month: 10, streaks: 1, records: 1 };
+
+interface RecordEntry {
+ label: string;
+ value: string;
+ username: string;
+ avatarUrl: string | null;
+}
+
+export default defineEventHandler(async (event) => {
+ const { lang, config } = requireLang(event);
+ const query = getQuery(event);
+
+ const mode = (query.mode as string) || 'classic';
+ if (!(mode in GAME_MODE_CONFIG)) {
+ throw createError({ statusCode: 400, message: `Invalid mode: ${mode}` });
+ }
+
+ const period = ((query.period as string) || 'today') as Period;
+ if (!VALID_PERIODS.includes(period)) {
+ throw createError({ statusCode: 400, message: `Invalid period: ${period}` });
+ }
+
+ const tz = config.timezone || 'UTC';
+ const todaysIdx = getTodaysIdx(tz);
+ const rawDay = query.day as string | undefined;
+ const dayIdx = rawDay && rawDay !== 'null' && rawDay !== '' ? parseInt(rawDay, 10) : todaysIdx;
+ if (isNaN(dayIdx) || dayIdx < 0 || dayIdx > todaysIdx) {
+ throw createError({ statusCode: 400, message: 'Invalid day index' });
+ }
+
+ const offset = Math.max(0, parseInt(query.offset as string, 10) || 0);
+ const limit = Math.min(100, Math.max(1, parseInt(query.limit as string, 10) || 50));
+
+ // Compute day range for week/month
+ const { startIdx, endIdx } = getDayRange(period, dayIdx, todaysIdx);
+
+ // Records: global (all languages, all modes)
+ if (period === 'records') {
+ const recordsCacheKey = 'global:records';
+ const now = Date.now();
+ const cachedRecords = cache.get(recordsCacheKey);
+ let records: RecordEntry[];
+ if (cachedRecords && cachedRecords.expiresAt > now) {
+ records = cachedRecords.data;
+ } else {
+ records = await fetchRecords(todaysIdx);
+ cacheSet(recordsCacheKey, records, CACHE_TTL_GLOBAL);
+ }
+ return {
+ ...langResponseFields(lang, config),
+ day_idx: dayIdx,
+ todays_idx: todaysIdx,
+ mode,
+ period,
+ min_days: 1,
+ entries: [],
+ total: 0,
+ you: null,
+ records,
+ };
+ }
+
+ const isStreaksPeriod = period === 'streaks';
+ const cacheTtl = isStreaksPeriod
+ ? CACHE_TTL_GLOBAL
+ : period === 'today'
+ ? CACHE_TTL_TODAY
+ : CACHE_TTL_AGG;
+ const cacheKey = isStreaksPeriod
+ ? `global:streaks:${offset}:${limit}`
+ : `${lang}:${mode}:${period}:${startIdx}-${endIdx}:${offset}:${limit}`;
+ const now = Date.now();
+ const cached = cache.get(cacheKey);
+ let publicData: { entries: LeaderboardEntry[]; total: number };
+
+ if (cached && cached.expiresAt > now) {
+ publicData = cached.data;
+ } else {
+ publicData =
+ period === 'today'
+ ? await fetchToday(lang, mode, dayIdx, offset, limit)
+ : isStreaksPeriod
+ ? await fetchStreaks(todaysIdx, offset, limit)
+ : await fetchAggregate(
+ lang,
+ mode,
+ startIdx,
+ endIdx,
+ MIN_DAYS[period],
+ offset,
+ limit
+ );
+ cacheSet(cacheKey, publicData, cacheTtl);
+ }
+
+ // Optional auth: include caller's rank
+ let you: (LeaderboardEntry & { percentile: number }) | null = null;
+ try {
+ const session = await getUserSession(event);
+ const userId = (session?.user as any)?.id;
+ if (userId) {
+ if (period === 'streaks') {
+ you = await fetchYourStreak(userId, todaysIdx, publicData.total);
+ } else if (period === 'today') {
+ you = await fetchYourRankToday(userId, lang, mode, dayIdx, publicData.total);
+ } else {
+ you = await fetchYourRankAggregate(
+ userId,
+ lang,
+ mode,
+ startIdx,
+ endIdx,
+ MIN_DAYS[period],
+ publicData.total
+ );
+ }
+ }
+ } catch {
+ // Not logged in
+ }
+
+ return {
+ ...langResponseFields(lang, config),
+ day_idx: dayIdx,
+ todays_idx: todaysIdx,
+ mode,
+ period,
+ min_days: MIN_DAYS[period],
+ entries: publicData.entries,
+ total: publicData.total,
+ you,
+ };
+});
+
+// ─── Helpers ────────────────────────────────────────────────────────────────
+
+function getDayRange(period: Period, dayIdx: number, todaysIdx: number) {
+ if (period === 'today') return { startIdx: dayIdx, endIdx: dayIdx };
+
+ if (period === 'week') {
+ // Current week: go back to the most recent Monday (dayIdx % 7 alignment)
+ // Simple approach: last 7 days ending today
+ const startIdx = Math.max(0, todaysIdx - 6);
+ return { startIdx, endIdx: todaysIdx };
+ }
+
+ // month: last 30 days
+ const startIdx = Math.max(0, todaysIdx - 29);
+ return { startIdx, endIdx: todaysIdx };
+}
+
+// ─── Today queries ──────────────────────────────────────────────────────────
+
+// Speed results are stored differently: playType='unlimited', won=null, no dayIdx.
+// Filter by date range (today's UTC day) instead of dayIdx.
+function speedWhereForDay(lang: string, dayIdx: number) {
+ const date = idxToDate(dayIdx);
+ const dayStart = new Date(date);
+ const dayEnd = new Date(date.getTime() + 86400000);
+ return {
+ lang,
+ mode: 'speed' as const,
+ playedAt: { gte: dayStart, lt: dayEnd },
+ };
+}
+
+async function fetchToday(
+ lang: string,
+ mode: string,
+ dayIdx: number,
+ offset: number,
+ limit: number
+): Promise<{ entries: LeaderboardEntry[]; total: number }> {
+ const isSpeed = mode === 'speed';
+ const where = isSpeed
+ ? speedWhereForDay(lang, dayIdx)
+ : { lang, mode, playType: 'daily' as const, dayIdx, won: true };
+
+ const [total, results] = await Promise.all([
+ prisma.result.count({ where }),
+ prisma.result.findMany({
+ where,
+ orderBy: isSpeed
+ ? [{ score: 'desc' }, { wordsSolved: 'desc' }, { maxCombo: 'desc' }]
+ : [{ attempts: 'asc' }, { playedAt: 'asc' }],
+ skip: offset,
+ take: limit,
+ select: {
+ attempts: true,
+ score: true,
+ wordsSolved: true,
+ maxCombo: true,
+ playedAt: true,
+ user: { select: { username: true, avatarUrl: true } },
+ },
+ }),
+ ]);
+
+ const entries: LeaderboardEntry[] = results.map((r: (typeof results)[number], i: number) => ({
+ rank: offset + i + 1,
+ username: r.user.username,
+ avatarUrl: r.user.avatarUrl,
+ attempts: isSpeed ? (r.score ?? 0) : (r.attempts ?? 0),
+ score: isSpeed ? (r.score ?? 0) : undefined,
+ wordsSolved: isSpeed ? (r.wordsSolved ?? 0) : undefined,
+ playedAt: r.playedAt.toISOString(),
+ }));
+
+ return { entries, total };
+}
+
+async function fetchYourRankToday(
+ userId: string,
+ lang: string,
+ mode: string,
+ dayIdx: number,
+ total: number
+): Promise<(LeaderboardEntry & { percentile: number }) | null> {
+ const isSpeed = mode === 'speed';
+ const myWhere = isSpeed
+ ? { userId, ...speedWhereForDay(lang, dayIdx) }
+ : { userId, lang, mode, playType: 'daily' as const, dayIdx, won: true };
+ const myResult = await prisma.result.findFirst({
+ where: myWhere,
+ ...(isSpeed ? { orderBy: { score: 'desc' as const } } : {}),
+ select: {
+ attempts: true,
+ score: true,
+ wordsSolved: true,
+ maxCombo: true,
+ totalGuesses: true,
+ playedAt: true,
+ user: { select: { username: true, avatarUrl: true } },
+ },
+ });
+ if (!myResult) return null;
+
+ let rankAbove: number;
+ if (isSpeed) {
+ const sc = myResult.score ?? 0;
+ const ws = myResult.wordsSolved ?? 0;
+ const mc = myResult.maxCombo ?? 0;
+ rankAbove = await prisma.result.count({
+ where: {
+ ...speedWhereForDay(lang, dayIdx),
+ OR: [
+ { score: { gt: sc } },
+ { score: sc, wordsSolved: { gt: ws } },
+ { score: sc, wordsSolved: ws, maxCombo: { gt: mc } },
+ ],
+ },
+ });
+ } else {
+ const att = myResult.attempts ?? 999;
+ const pat = myResult.playedAt;
+ rankAbove = await prisma.result.count({
+ where: {
+ lang,
+ mode,
+ playType: 'daily',
+ dayIdx,
+ won: true,
+ OR: [{ attempts: { lt: att } }, { attempts: att, playedAt: { lt: pat } }],
+ },
+ });
+ }
+
+ const rank = rankAbove + 1;
+ const percentile = total > 0 ? Math.round((rank / total) * 100) : 0;
+
+ return {
+ rank,
+ username: myResult.user.username,
+ avatarUrl: myResult.user.avatarUrl,
+ attempts: isSpeed ? (myResult.score ?? 0) : (myResult.attempts ?? 0),
+ score: isSpeed ? (myResult.score ?? 0) : undefined,
+ wordsSolved: isSpeed ? (myResult.wordsSolved ?? 0) : undefined,
+ playedAt: myResult.playedAt.toISOString(),
+ percentile,
+ };
+}
+
+// ─── Week/Month aggregate queries ───────────────────────────────────────────
+
+interface AggRow {
+ user_id: string;
+ username: string;
+ avatar_url: string | null;
+ avg_attempts: number;
+ avg_words_solved?: number;
+ days_played: number;
+}
+
+async function fetchAggregate(
+ lang: string,
+ mode: string,
+ startIdx: number,
+ endIdx: number,
+ minDays: number,
+ offset: number,
+ limit: number
+): Promise<{ entries: LeaderboardEntry[]; total: number }> {
+ const isSpeed = mode === 'speed';
+
+ // Count qualifying players
+ const countResult = await prisma.$queryRawUnsafe<[{ count: bigint }]>(
+ `SELECT COUNT(*) as count FROM (
+ SELECT r.user_id FROM wordle.results r
+ WHERE r.lang = $1 AND r.mode = $2 AND r.play_type = 'daily'
+ AND r.day_idx >= $3 AND r.day_idx <= $4 AND r.won = true
+ GROUP BY r.user_id HAVING COUNT(*) >= $5
+ ) sub`,
+ lang,
+ mode,
+ startIdx,
+ endIdx,
+ minDays
+ );
+ const total = Number(countResult[0]?.count ?? 0);
+
+ const orderCol = isSpeed ? 'AVG(r.score)' : 'AVG(r.attempts)';
+ const orderDir = isSpeed ? 'DESC' : 'ASC';
+
+ const rows = await prisma.$queryRawUnsafe(
+ `SELECT r.user_id, u.username, u.avatar_url,
+ ${isSpeed ? 'AVG(r.score)' : 'AVG(r.attempts)'} as avg_attempts
+ ${isSpeed ? ', AVG(r.words_solved) as avg_words_solved' : ''},
+ COUNT(*)::int as days_played
+ FROM wordle.results r
+ JOIN wordle.users u ON r.user_id = u.id
+ WHERE r.lang = $1 AND r.mode = $2 AND r.play_type = 'daily'
+ AND r.day_idx >= $3 AND r.day_idx <= $4 AND r.won = true
+ GROUP BY r.user_id, u.username, u.avatar_url
+ HAVING COUNT(*) >= $5
+ ORDER BY ${orderCol} ${orderDir}, MIN(r.played_at) ASC
+ OFFSET $6 LIMIT $7`,
+ lang,
+ mode,
+ startIdx,
+ endIdx,
+ minDays,
+ offset,
+ limit
+ );
+
+ const entries: LeaderboardEntry[] = rows.map((r: AggRow, i: number) => ({
+ rank: offset + i + 1,
+ username: r.username,
+ avatarUrl: r.avatar_url,
+ attempts: Math.round(Number(r.avg_attempts) * 10) / 10, // 1 decimal place
+ wordsSolved:
+ isSpeed && r.avg_words_solved != null
+ ? Math.round(Number(r.avg_words_solved) * 10) / 10
+ : undefined,
+ daysPlayed: Number(r.days_played),
+ playedAt: '',
+ }));
+
+ return { entries, total };
+}
+
+async function fetchYourRankAggregate(
+ userId: string,
+ lang: string,
+ mode: string,
+ startIdx: number,
+ endIdx: number,
+ minDays: number,
+ total: number
+): Promise<(LeaderboardEntry & { percentile: number }) | null> {
+ const isSpeed = mode === 'speed';
+ const avgCol = isSpeed ? 'AVG(r.score)' : 'AVG(r.attempts)';
+
+ // Get my aggregate
+ const myRows = await prisma.$queryRawUnsafe(
+ `SELECT r.user_id, u.username, u.avatar_url,
+ ${avgCol} as avg_attempts
+ ${isSpeed ? ', AVG(r.words_solved) as avg_words_solved' : ''},
+ COUNT(*)::int as days_played
+ FROM wordle.results r
+ JOIN wordle.users u ON r.user_id = u.id
+ WHERE r.user_id = $1 AND r.lang = $2 AND r.mode = $3 AND r.play_type = 'daily'
+ AND r.day_idx >= $4 AND r.day_idx <= $5 AND r.won = true
+ GROUP BY r.user_id, u.username, u.avatar_url
+ HAVING COUNT(*) >= $6`,
+ userId,
+ lang,
+ mode,
+ startIdx,
+ endIdx,
+ minDays
+ );
+
+ if (myRows.length === 0) return null;
+ const my = myRows[0]!;
+ const myAvg = Number(my.avg_attempts);
+
+ // Count players who rank above me
+ const orderOp = isSpeed ? '>' : '<';
+ const rankResult = await prisma.$queryRawUnsafe<[{ count: bigint }]>(
+ `SELECT COUNT(*) as count FROM (
+ SELECT r.user_id, ${avgCol} as avg_att
+ FROM wordle.results r
+ WHERE r.lang = $1 AND r.mode = $2 AND r.play_type = 'daily'
+ AND r.day_idx >= $3 AND r.day_idx <= $4 AND r.won = true
+ GROUP BY r.user_id HAVING COUNT(*) >= $5
+ ) sub WHERE sub.avg_att ${orderOp} $6`,
+ lang,
+ mode,
+ startIdx,
+ endIdx,
+ minDays,
+ myAvg
+ );
+
+ const rank = Number(rankResult[0]?.count ?? 0) + 1;
+ const percentile = total > 0 ? Math.round((rank / total) * 100) : 0;
+
+ return {
+ rank,
+ username: my.username,
+ avatarUrl: my.avatar_url,
+ attempts: Math.round(myAvg * 10) / 10,
+ wordsSolved:
+ isSpeed && my.avg_words_solved != null
+ ? Math.round(Number(my.avg_words_solved) * 10) / 10
+ : undefined,
+ daysPlayed: Number(my.days_played),
+ playedAt: '',
+ percentile,
+ };
+}
+
+// ─── Streaks (global — all languages, all modes) ────────────────────────────
+
+// Global streaks CTE: any daily win across any lang/mode counts toward the streak.
+const GLOBAL_STREAKS_CTE = `WITH daily_plays AS (
+ SELECT DISTINCT user_id, day_idx FROM wordle.results
+ WHERE play_type = 'daily' AND won = true
+), gaps AS (
+ SELECT user_id, day_idx,
+ day_idx - ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY day_idx) AS grp
+ FROM daily_plays
+), streaks AS (
+ SELECT user_id, COUNT(*)::int AS streak_len, MAX(day_idx) AS last_day
+ FROM gaps GROUP BY user_id, grp
+)`;
+
+interface StreakRow {
+ user_id: string;
+ username: string;
+ avatar_url: string | null;
+ streak_len: number;
+ last_day: number;
+}
+
+async function fetchStreaks(
+ todaysIdx: number,
+ offset: number,
+ limit: number
+): Promise<{ entries: LeaderboardEntry[]; total: number }> {
+ const [countResult, rows] = await Promise.all([
+ prisma.$queryRawUnsafe<[{ count: bigint }]>(
+ `${GLOBAL_STREAKS_CTE} SELECT COUNT(*)::bigint as count FROM streaks WHERE last_day >= $1 - 1`,
+ todaysIdx
+ ),
+ prisma.$queryRawUnsafe(
+ `${GLOBAL_STREAKS_CTE} SELECT s.user_id, u.username, u.avatar_url, s.streak_len, s.last_day
+ FROM streaks s JOIN wordle.users u ON s.user_id = u.id
+ WHERE s.last_day >= $1 - 1
+ ORDER BY s.streak_len DESC, s.last_day DESC
+ OFFSET $2 LIMIT $3`,
+ todaysIdx,
+ offset,
+ limit
+ ),
+ ]);
+
+ const total = Number(countResult[0]?.count ?? 0);
+ const entries: LeaderboardEntry[] = rows.map((r: StreakRow, i: number) => ({
+ rank: offset + i + 1,
+ username: r.username,
+ avatarUrl: r.avatar_url,
+ attempts: Number(r.streak_len),
+ daysPlayed: Number(r.streak_len),
+ playedAt: '',
+ }));
+
+ return { entries, total };
+}
+
+async function fetchYourStreak(
+ userId: string,
+ todaysIdx: number,
+ total: number
+): Promise<(LeaderboardEntry & { percentile: number }) | null> {
+ const rows = await prisma.$queryRawUnsafe(
+ `WITH daily_plays AS (
+ SELECT DISTINCT user_id, day_idx FROM wordle.results
+ WHERE user_id = $1 AND play_type = 'daily' AND won = true
+ ), gaps AS (
+ SELECT user_id, day_idx, day_idx - ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY day_idx) AS grp
+ FROM daily_plays
+ ), streaks AS (
+ SELECT user_id, COUNT(*)::int AS streak_len, MAX(day_idx) AS last_day
+ FROM gaps GROUP BY user_id, grp
+ )
+ SELECT s.user_id, u.username, u.avatar_url, s.streak_len, s.last_day
+ FROM streaks s JOIN wordle.users u ON s.user_id = u.id
+ WHERE s.last_day >= $2 - 1
+ ORDER BY s.streak_len DESC LIMIT 1`,
+ userId,
+ todaysIdx
+ );
+
+ if (rows.length === 0) return null;
+ const my = rows[0]!;
+ const myStreak = Number(my.streak_len);
+
+ const rankResult = await prisma.$queryRawUnsafe<[{ count: bigint }]>(
+ `${GLOBAL_STREAKS_CTE} SELECT COUNT(*)::bigint as count FROM streaks
+ WHERE last_day >= $1 - 1 AND streak_len > $2`,
+ todaysIdx,
+ myStreak
+ );
+
+ const rank = Number(rankResult[0]?.count ?? 0) + 1;
+ const percentile = total > 0 ? Math.round((rank / total) * 100) : 0;
+
+ return {
+ rank,
+ username: my.username,
+ avatarUrl: my.avatar_url,
+ attempts: myStreak,
+ daysPlayed: myStreak,
+ playedAt: '',
+ percentile,
+ };
+}
+
+// ─── Records (Hall of Fame) ─────────────────────────────────────────────────
+
+async function fetchRecords(_todaysIdx: number): Promise {
+ // All records are global — across all languages and modes
+ const [streakRows, gamesRows, langsRows, avgRows] = await Promise.all([
+ // 1. Longest streak ever (global)
+ prisma.$queryRawUnsafe(
+ `${GLOBAL_STREAKS_CTE} SELECT s.user_id, u.username, u.avatar_url, s.streak_len, s.last_day
+ FROM streaks s JOIN wordle.users u ON s.user_id = u.id
+ ORDER BY s.streak_len DESC LIMIT 1`
+ ),
+ // 2. Most games played (global, all modes)
+ prisma.$queryRawUnsafe<
+ { username: string; avatar_url: string | null; game_count: number }[]
+ >(
+ `SELECT u.username, u.avatar_url, COUNT(*)::int AS game_count
+ FROM wordle.results r JOIN wordle.users u ON r.user_id = u.id
+ WHERE r.play_type = 'daily' AND r.won = true
+ GROUP BY u.id, u.username, u.avatar_url
+ ORDER BY game_count DESC LIMIT 1`
+ ),
+ // 3. Most languages played (global)
+ prisma.$queryRawUnsafe<{ username: string; avatar_url: string | null; val: number }[]>(
+ `SELECT u.username, u.avatar_url, COUNT(DISTINCT r.lang)::int AS val
+ FROM wordle.results r JOIN wordle.users u ON r.user_id = u.id
+ WHERE r.play_type = 'daily' AND r.won = true
+ GROUP BY u.id, u.username, u.avatar_url
+ ORDER BY val DESC LIMIT 1`
+ ),
+ // 4. Best speed score ever (global)
+ prisma.$queryRawUnsafe<
+ { username: string; avatar_url: string | null; best_score: number }[]
+ >(
+ `SELECT u.username, u.avatar_url, MAX(r.score)::int AS best_score
+ FROM wordle.results r JOIN wordle.users u ON r.user_id = u.id
+ WHERE r.mode = 'speed' AND r.score IS NOT NULL
+ GROUP BY u.id, u.username, u.avatar_url
+ ORDER BY best_score DESC LIMIT 1`
+ ),
+ ]);
+
+ const records: RecordEntry[] = [];
+
+ if (streakRows.length > 0) {
+ const s = streakRows[0]!;
+ records.push({
+ label: 'Longest Streak',
+ value: `${s.streak_len} days`,
+ username: s.username,
+ avatarUrl: s.avatar_url,
+ });
+ }
+ if (gamesRows.length > 0) {
+ const g = gamesRows[0]!;
+ records.push({
+ label: 'Most Games Won',
+ value: `${g.game_count} wins`,
+ username: g.username,
+ avatarUrl: g.avatar_url,
+ });
+ }
+ if (langsRows.length > 0) {
+ const l = langsRows[0]!;
+ records.push({
+ label: 'Most Languages',
+ value: `${l.val} languages`,
+ username: l.username,
+ avatarUrl: l.avatar_url,
+ });
+ }
+ if (avgRows.length > 0) {
+ const sp = avgRows[0]!;
+ records.push({
+ label: 'Best Speed Score',
+ value: `${Number(sp.best_score).toLocaleString()} pts`,
+ username: sp.username,
+ avatarUrl: sp.avatar_url,
+ });
+ }
+
+ return records;
+}
diff --git a/server/api/[lang]/semantic/guess.post.ts b/server/api/[lang]/semantic/guess.post.ts
index 5d051f36..ed5629a3 100644
--- a/server/api/[lang]/semantic/guess.post.ts
+++ b/server/api/[lang]/semantic/guess.post.ts
@@ -1,37 +1,22 @@
/**
* Semantic Explorer — submit a guess.
*
- * The canonical "how close" signal is `rank` — the guess's position in the
- * target's vocab-sorted neighbour list (1 = target itself). No cosine
- * stretches, no magic constants. The client derives a log-% display from
- * (rank, totalRanked) for the proximity bar and color gradient.
- *
- * Returns:
- * - rank: 1-indexed position in target's cosine-sorted neighbour list
- * - totalRanked: vocab size for log-scaled display
- * - display: 1 - log(rank)/log(N), capped at 0.99 for non-winning guesses
- * - similarity: raw cosine (kept for debugging, not for display)
- * - umapPosition: guess's UMAP coordinates (for map angle)
- * - allProjectionsNormalized: 20 axis projections for axis-slice view
- * - compass: top-5 axis deltas with prose + intensity
- * - won: true when rank === 1 (guess === target)
+ * Rank lookup via precomputed target_neighbors table (pgvector).
+ * Embeddings, coordinates, and neighbors all live in Postgres.
+ * Only axes (140KB) and valid words (2MB) are in memory.
*/
import {
- computeCompass,
- computeGuessRank,
cosineSimilarity,
- fetchEmbeddingOnDemand,
- get2dPosition,
- getEmbedding,
getSessionTarget,
- loadSemanticDataSafe,
- normalizeProjection,
- projectAllAxes,
rankToDisplay,
+ computeCompass,
} from '~/server/utils/semantic';
+import * as semanticDb from '~/server/utils/_semantic-db';
+import { getValidWords } from '~/server/plugins/semantic-warmup';
export default defineEventHandler(async (event) => {
+ const lang = getRouterParam(event, 'lang') ?? 'en';
const body = await readBody(event);
const targetId = body?.targetId as string | undefined;
const word = (body?.word as string | undefined)?.toLowerCase().trim();
@@ -46,72 +31,64 @@ export default defineEventHandler(async (event) => {
throw createError({ statusCode: 404, message: 'Unknown or expired targetId' });
}
- const data = loadSemanticDataSafe();
- const targetVec = getEmbedding(data, target);
+ const targetVec = await semanticDb.getEmbedding(lang, target);
if (!targetVec) {
throw createError({ statusCode: 500, message: 'Target embedding missing' });
}
- let guessVec = getEmbedding(data, word);
+ let guessVec = await semanticDb.getEmbedding(lang, word);
if (!guessVec) {
if (!/^[a-z][a-z\-']{0,24}$/.test(word)) {
return { valid: false, word, reason: 'bad_format' };
}
- // Spellcheck: reject words that aren't in the validator dictionary.
- // Stops misspellings like "girafe" from getting an OpenAI embedding.
- if (data.validWords.size > 0 && !data.validWords.has(word)) {
+ const validWords = getValidWords();
+ if (validWords.size > 0 && !validWords.has(word)) {
return { valid: false, word, reason: 'not_a_word' };
}
- guessVec = await fetchEmbeddingOnDemand(data, word);
+ guessVec = await semanticDb.fetchOnDemandEmbedding(lang, word);
if (!guessVec) {
return { valid: false, word, reason: 'embedding_failed' };
}
}
const rawSimilarity = cosineSimilarity(guessVec, targetVec);
- const rank = computeGuessRank(data, target, word, guessVec) ?? data.words.length;
- const totalRanked = data.words.length;
+
+ const [rankResult, totalRanked, umapPosition] = await Promise.all([
+ semanticDb.computeGuessRank(lang, target, word, guessVec, targetVec),
+ semanticDb.getTotalRanked(lang),
+ semanticDb.get2dPosition(lang, word),
+ ]);
+ const rank = rankResult ?? totalRanked;
const won = rank === 1;
- // Display % — log-scaled rank, capped at 0.99 for non-wins so only the
- // target itself can show 100%.
const display = won ? 1 : Math.min(0.99, rankToDisplay(rank, totalRanked));
- const umapPosition = get2dPosition(data, word);
+ const allProjectionsNormalized = semanticDb.projectAxes(guessVec);
- // Per-axis projections (normalized to [0,1]), cached client-side for
- // axis slice view transitions.
- const guessProjections = projectAllAxes(data, guessVec);
- const normalizedGuessProjections: Record = {};
- for (const axis of data.axesNames) {
- normalizedGuessProjections[axis] = normalizeProjection(data, axis, guessProjections[axis]!);
+ let compassResult = { hints: [] as any[], status: 'close' as const, totalExplained: 0 };
+ const cachedAxes = semanticDb.getCachedAxes();
+ if (cachedAxes) {
+ try {
+ compassResult = computeCompass(cachedAxes, guessVec, targetVec, 5, []);
+ } catch {
+ /* skip */
+ }
}
- // Compass hints: iterative Gram-Schmidt matching pursuit. Return up to 5
- // axes so the client can filter out whichever are currently displayed on
- // a slice view and still have 2 to render. Top 2 are guaranteed orthogonal;
- // positions 3-5 are fallbacks for the slice-view exclude filter. Returns
- // status='close' when the pair is too near for meaningful hints.
- const compassResult = computeCompass(data, guessVec, targetVec, 5, []);
-
const response: Record = {
valid: true,
word,
rank,
totalRanked,
display,
- similarity: rawSimilarity, // raw cosine, for debugging only
+ similarity: rawSimilarity,
umapPosition,
- allProjectionsNormalized: normalizedGuessProjections,
+ allProjectionsNormalized,
compass: compassResult.hints,
compassStatus: compassResult.status,
compassExplained: compassResult.totalExplained,
won,
guessNumber,
};
-
- if (won) {
- response.targetWord = target;
- }
-
+ if (won) response.targetWord = target;
return response;
});
diff --git a/server/api/[lang]/semantic/hint.post.ts b/server/api/[lang]/semantic/hint.post.ts
index 70e39ad8..49bf2de7 100644
--- a/server/api/[lang]/semantic/hint.post.ts
+++ b/server/api/[lang]/semantic/hint.post.ts
@@ -2,13 +2,11 @@
// Cache-keyed on target word only — same hint for all players on the same daily.
// Includes a validator loop: if the hint is too easy to reverse, regenerate.
-import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
-import { join } from 'node:path';
-
+import { dedup } from '~/server/utils/inflight';
+import { rateLimit } from '~/server/utils/rate-limit';
import { getSessionTarget } from '~/server/utils/semantic';
const LLM_MODEL = 'gpt-5.2';
-const CACHE_DIR = join(process.cwd(), 'word-defs', 'semantic-hints');
const MAX_ATTEMPTS = 3;
async function callLlm(
@@ -131,6 +129,7 @@ async function generateValidatedHint(target: string): Promise {
}
export default defineEventHandler(async (event) => {
+ rateLimit(event, 'llm:hint', 10, 60 * 1000);
const body = await readBody(event);
const targetId = body?.targetId as string | undefined;
@@ -142,26 +141,34 @@ export default defineEventHandler(async (event) => {
throw createError({ statusCode: 404, message: 'Unknown or expired targetId' });
}
- mkdirSync(CACHE_DIR, { recursive: true });
- const cacheFile = join(CACHE_DIR, `${target}.json`);
+ const lang = getRouterParam(event, 'lang') ?? 'en';
+
+ // Tier 0: DB cache
+ try {
+ const { getSemanticHint, setSemanticHint } = await import('~/server/utils/db-cache');
+ const dbHint = await getSemanticHint(lang, target);
+ if (dbHint) return { hint: dbHint, cached: true };
+ } catch {
+ /* fall through */
+ }
+
+ // Generate via LLM (deduplicated — only one generation per word)
+ const result = await dedup('hint', `${lang}:${target}`, async () => {
+ const generated = await generateValidatedHint(target);
+ if (!generated) return null;
- // Serve from cache — same hint for all players on the same word
- if (existsSync(cacheFile)) {
try {
- const cached = JSON.parse(readFileSync(cacheFile, 'utf-8'));
- if (cached.hint) {
- return { hint: cached.hint, cached: true };
- }
- } catch {
- // fall through to regenerate
+ const { setSemanticHint } = await import('~/server/utils/db-cache');
+ await setSemanticHint(lang, target, generated, LLM_MODEL);
+ } catch (e) {
+ console.warn(`[hint] DB write failed for ${lang}/${target}:`, e);
}
- }
- const hint = await generateValidatedHint(target);
- if (!hint) {
+ return generated;
+ });
+
+ if (!result) {
return { hint: null, cached: false, error: 'llm_unavailable' };
}
-
- writeFileSync(cacheFile, JSON.stringify({ hint, createdAt: Date.now() }));
- return { hint, cached: false };
+ return { hint: result, cached: false };
});
diff --git a/server/api/[lang]/semantic/reveal.post.ts b/server/api/[lang]/semantic/reveal.post.ts
index 5270df51..11b32472 100644
--- a/server/api/[lang]/semantic/reveal.post.ts
+++ b/server/api/[lang]/semantic/reveal.post.ts
@@ -1,28 +1,15 @@
/**
* Semantic Explorer — post-game reveal.
*
- * Returns the target word + 8 nearest neighbours (in full 512D space),
- * excluding any words the player already guessed. Used to decorate the
- * end-of-game map with labeled neighbours as a learning moment.
- *
- * Each neighbour ships its UMAP position AND all-20-axis normalized projections
- * (so the client can render them in any axis-slice view consistently with
- * how guesses are rendered).
+ * Returns the target word + k nearest neighbours via pgvector HNSW.
+ * Each neighbour ships UMAP position for map rendering.
*/
-import {
- computeGuessRank,
- get2dPosition,
- getEmbedding,
- getSessionTarget,
- knnNearest,
- loadSemanticDataSafe,
- normalizeProjection,
- projectAllAxes,
- rankToDisplay,
-} from '~/server/utils/semantic';
+import { getSessionTarget, rankToDisplay } from '~/server/utils/semantic';
+import * as semanticDb from '~/server/utils/_semantic-db';
export default defineEventHandler(async (event) => {
+ const lang = getRouterParam(event, 'lang') ?? 'en';
const body = await readBody(event);
const targetId = body?.targetId as string | undefined;
const exclude = (body?.exclude as string[] | undefined) ?? [];
@@ -36,41 +23,29 @@ export default defineEventHandler(async (event) => {
throw createError({ statusCode: 404, message: 'Unknown or expired targetId' });
}
- const data = loadSemanticDataSafe();
- const targetVec = getEmbedding(data, target);
- if (!targetVec) {
- throw createError({ statusCode: 500, message: 'Target embedding missing' });
- }
+ const excludeList = [target, ...exclude.map((w) => w.toLowerCase().trim())];
+
+ const [targetUmap, neighbours, totalRanked] = await Promise.all([
+ semanticDb.get2dPosition(lang, target),
+ semanticDb.knnNearest(lang, target, k, excludeList),
+ semanticDb.getTotalRanked(lang),
+ ]);
- const excludeSet = new Set([target, ...exclude.map((w) => w.toLowerCase().trim())]);
- const neighbours = knnNearest(data, targetVec, k, excludeSet);
+ const neighborWords = neighbours.map((n) => n.word);
+ const rankMap = await semanticDb.batchGetRanks(lang, target, neighborWords);
- const totalRanked = data.words.length;
- const enriched = neighbours
- .map((n) => {
- const v = getEmbedding(data, n.word);
- if (!v) return null;
- const projs = projectAllAxes(data, v);
- const normProjs: Record = {};
- for (const axis of data.axesNames) {
- normProjs[axis] = normalizeProjection(data, axis, projs[axis]!);
- }
- const rank = computeGuessRank(data, target, n.word, v) ?? totalRanked;
- return {
- word: n.word,
- rank,
- totalRanked,
- display: rankToDisplay(rank, totalRanked),
- similarity: n.similarity,
- umapPosition: get2dPosition(data, n.word),
- allProjectionsNormalized: normProjs,
- };
- })
- .filter(Boolean);
+ const enriched = neighbours.map((n) => ({
+ word: n.word,
+ rank: rankMap.get(n.word) ?? totalRanked,
+ totalRanked,
+ display: rankToDisplay(rankMap.get(n.word) ?? totalRanked, totalRanked),
+ similarity: n.similarity,
+ umapPosition: n.umapX != null ? [n.umapX, n.umapY] : null,
+ }));
return {
targetWord: target,
- targetUmapPosition: get2dPosition(data, target),
+ targetUmapPosition: targetUmap,
neighbours: enriched,
};
});
diff --git a/server/api/[lang]/semantic/start.post.ts b/server/api/[lang]/semantic/start.post.ts
index dc3b0b95..b2ee542e 100644
--- a/server/api/[lang]/semantic/start.post.ts
+++ b/server/api/[lang]/semantic/start.post.ts
@@ -8,7 +8,10 @@
* (NOT the target word itself), so the client can center the map on it.
*/
-import { createSession, get2dPosition, loadSemanticDataSafe } from '~/server/utils/semantic';
+import { createSession } from '~/server/utils/semantic';
+import * as semanticDb from '~/server/utils/_semantic-db';
+import { EMBEDDING_MODEL } from '~/server/utils/_semantic-db';
+import { GAME_MODE_CONFIG } from '~/utils/game-modes';
import { getTodaysIdx, toModeDayIdx } from '~/server/lib/day-index';
function pickDailyTarget(targets: readonly string[], lang: string, dayIdx: number): string {
@@ -25,9 +28,6 @@ function pickDailyTarget(targets: readonly string[], lang: string, dayIdx: numbe
export default defineEventHandler(async (event) => {
const lang = getRouterParam(event, 'lang') ?? 'en';
- // Semantic Explorer is English-only for v1. The embeddings, targets, axes,
- // and UMAP data are all generated from English corpora. Serving them for
- // other languages would silently produce meaningless results.
if (lang !== 'en') {
throw createError({
statusCode: 404,
@@ -40,50 +40,67 @@ export default defineEventHandler(async (event) => {
const debug = Boolean(body?.debug);
const play = (body?.play as string | undefined) ?? 'daily';
- const data = loadSemanticDataSafe();
// TZ-aware day index, 1-based from April 11 2026
const classicIdx = getTodaysIdx();
const dayIdx = toModeDayIdx(classicIdx) ?? 1;
+ // Load targets from DB
+ const targets = await semanticDb.getTargets(lang);
+ if (!targets.length) {
+ throw createError({
+ statusCode: 503,
+ message: 'Semantic Explorer is temporarily unavailable.',
+ });
+ }
+
// Daily pick, unlimited random, or override via debug
let target: string;
- if (override && data.wordIndex.has(override)) {
+ if (override && (await semanticDb.wordExists(lang, override))) {
target = override;
} else if (play === 'unlimited') {
- target = data.targets[Math.floor(Math.random() * data.targets.length)]!;
+ target = targets[Math.floor(Math.random() * targets.length)]!;
} else {
- target = pickDailyTarget(data.targets, lang, dayIdx);
+ target = pickDailyTarget(targets, lang, dayIdx);
}
const targetId = createSession(target);
- // Anchor words for compass hint labels (no target word leak)
+ // Axis metadata from DB-cached axes (loaded at startup, 140KB)
+ const cachedAxes = semanticDb.getCachedAxes();
+ const axesNames = semanticDb.getCachedAxesNames();
const axisAnchors: Record = {};
- for (const name of data.axesNames) {
- const axis = data.axes[name];
- if (axis) axisAnchors[name] = { low: axis.low_anchor, high: axis.high_anchor };
+ const axesCoherence: Record = {};
+ if (cachedAxes) {
+ for (const axis of cachedAxes) {
+ axisAnchors[axis.name] = { low: axis.lowAnchor, high: axis.highAnchor };
+ axesCoherence[axis.name] = axis.auc;
+ }
}
- const targetUmapPosition = get2dPosition(data, target);
+ // Target position + vocab size from DB
+ const [targetUmapPosition, totalRanked] = await Promise.all([
+ semanticDb.get2dPosition(lang, target),
+ semanticDb.getTotalRanked(lang),
+ ]);
const response: Record = {
targetId,
lang,
dayIdx,
- vocabularySize: data.vocabulary.length,
- axes: data.axesNames,
- axesCoherence: data.axesAuc,
+ vocabularySize: totalRanked,
+ axes: axesNames,
+ axesCoherence,
axisAnchors,
- modelName: data.modelName,
+ modelName: EMBEDDING_MODEL,
targetUmapPosition,
- maxGuesses: 15,
- totalRanked: data.words.length,
+ maxGuesses: GAME_MODE_CONFIG.semantic.maxGuesses,
+ totalRanked,
};
- if (debug) {
+ if (debug && process.env.NODE_ENV !== 'production') {
response.debug = {
targetWord: target,
- targetPool: data.targets,
+ targetPool: targets,
};
}
diff --git a/server/api/[lang]/semantic/vocab.get.ts b/server/api/[lang]/semantic/vocab.get.ts
index f082069b..402fdde8 100644
--- a/server/api/[lang]/semantic/vocab.get.ts
+++ b/server/api/[lang]/semantic/vocab.get.ts
@@ -1,9 +1,15 @@
-import { loadSemanticDataSafe } from '~/server/utils/semantic';
+import { getValidWords } from '~/server/plugins/semantic-warmup';
+
+// Cache the array form — Set never changes after startup
+let _cachedArray: string[] | null = null;
export default defineEventHandler(() => {
- const data = loadSemanticDataSafe();
+ const words = getValidWords();
+ if (!_cachedArray && words.size > 0) {
+ _cachedArray = Array.from(words);
+ }
return {
- words: data.vocabulary,
- count: data.vocabulary.length,
+ words: _cachedArray ?? [],
+ count: words.size,
};
});
diff --git a/server/api/[lang]/word-explore/[slug].get.ts b/server/api/[lang]/word-explore/[slug].get.ts
index f2c8d176..a505424a 100644
--- a/server/api/[lang]/word-explore/[slug].get.ts
+++ b/server/api/[lang]/word-explore/[slug].get.ts
@@ -2,24 +2,23 @@
* GET /api/[lang]/word-explore/[slug]
*
* Semantic exploration data for a word: normalized axis projections,
- * nearest + farthest neighbors, UMAP coordinates. For out-of-vocab words,
- * fetches an embedding on-demand via OpenAI (cached to disk). Non-English
- * languages return `available: false` — the semantic data is English-only.
+ * nearest neighbors with UMAP + projections, cosine similarity.
+ *
+ * The top FOREGROUND_COUNT neighbors include axis projections so the
+ * client can render foreground dots AND lens/slice views from a single
+ * request — no per-word follow-up fetches needed.
*/
-import {
- cosineSimilarity,
- fetchEmbeddingOnDemand,
- getEmbedding,
- getTargetDistribution,
- loadSemanticData,
- normalizeProjection,
-} from '../../../utils/semantic';
+import { cosineSimilarity } from '../../../utils/semantic';
+import * as semanticDb from '~/server/utils/_semantic-db';
import { resolveWordSlug } from '../../../utils/word-selection';
import { loadAllData } from '../../../utils/data-loader';
+const NEIGHBOR_COUNT = 80;
+const FOREGROUND_COUNT = 15;
+
const EMPTY_RESPONSE = {
projections: [] as Array,
- nearest: [] as Array<{ word: string; similarity: number }>,
+ nearest: [] as Array,
umap: null as [number, number] | null,
similarityTo: null as number | null,
available: false,
@@ -45,77 +44,54 @@ export default defineEventHandler(async (event) => {
throw createError({ statusCode: 404, message: 'Word not found' });
}
- // Semantic data is English-only for now; other languages get a graceful
- // empty response so the page can render the fallback section.
if (lang !== 'en') {
return { word, inVocab: false, ...EMPTY_RESPONSE };
}
- const sem = loadSemanticData();
- let vec = getEmbedding(sem, word);
+ let vec = await semanticDb.getEmbedding(lang, word);
const inVocab = vec !== null;
- if (!vec) vec = await fetchEmbeddingOnDemand(sem, word);
+ if (!vec) {
+ vec = await semanticDb.fetchOnDemandEmbedding(lang, word);
+ }
if (!vec) {
return { word, inVocab: false, ...EMPTY_RESPONSE };
}
- // Per-axis projections — compute inline (not projectAllAxes) so we can
- // skip low-AUC axes in one pass rather than projecting then filtering.
- const D = sem.dims;
- const projections = [];
- for (let a = 0; a < sem.axesNames.length; a++) {
- const name = sem.axesNames[a]!;
- if ((sem.axesAuc[name] ?? 0) < 0.8) continue;
- let raw = 0;
- const rowOffset = a * D;
- for (let j = 0; j < D; j++) {
- raw += vec[j]! * sem.axesVectors[rowOffset + j]!;
- }
- const rec = sem.axes[name]!;
- projections.push({
- axis: name,
- lowAnchor: rec.low_anchor,
- highAnchor: rec.high_anchor,
- normalized: normalizeProjection(sem, name, raw),
- rawProjection: raw,
- });
- }
+ const projections = semanticDb.projectAxesDetailed(vec, 0.8);
- // Top-80 nearest neighbors. 80 lets the Word Explorer show a small
- // prominent foreground (~12 top dots) + a muted "extended neighborhood"
- // background (~60 faded dots) in the same polar coordinate system.
- // UMAP coords come along so the client can compute real angular
- // directions via polarProject — without them, every muted dot would
- // stack at (0.5, 0.5).
- const dist = getTargetDistribution(sem, word);
- type NeighborOut = {
- word: string;
- similarity: number;
- umap: [number, number] | null;
- };
- const nearest: NeighborOut[] = [];
- if (dist) {
- const N = dist.words.length;
- for (let i = 1; i <= 80 && i < N; i++) {
- const w = dist.words[i]!;
- nearest.push({
- word: w,
- similarity: dist.cosines[i]!,
- umap: sem.umap[w] ?? null,
- });
+ const [neighbors, umap] = await Promise.all([
+ semanticDb.knnNearestByVector(lang, vec, NEIGHBOR_COUNT, [word]),
+ semanticDb.get2dPosition(lang, word),
+ ]);
+
+ // Batch-fetch embeddings for foreground neighbors to compute their projections.
+ // Pure math on cached axis vectors — no extra DB round-trips beyond the batch fetch.
+ const foregroundWords = neighbors.slice(0, FOREGROUND_COUNT).map((n) => n.word);
+ const foregroundVecs = await semanticDb.getEmbeddings(lang, foregroundWords);
+
+ const nearest = neighbors.map((n, i) => {
+ const entry: Record = {
+ word: n.word,
+ similarity: n.similarity,
+ umap: n.umapX != null ? [n.umapX, n.umapY] : null,
+ };
+ // Include projections for foreground candidates
+ if (i < FOREGROUND_COUNT) {
+ const nVec = foregroundVecs.get(n.word);
+ if (nVec) {
+ entry.projections = semanticDb.projectAxes(nVec);
+ }
}
- }
+ return entry;
+ });
- // When `?relativeTo=X` is passed, compute cosine similarity to X so
- // the client can lay out user-added context words at a radius that
- // reflects their distance from the primary word. Same-word => 1.
let similarityTo: number | null = null;
if (relativeTo) {
if (relativeTo === word) {
similarityTo = 1;
} else {
- let otherVec = getEmbedding(sem, relativeTo);
- if (!otherVec) otherVec = await fetchEmbeddingOnDemand(sem, relativeTo);
+ let otherVec = await semanticDb.getEmbedding(lang, relativeTo);
+ if (!otherVec) otherVec = await semanticDb.fetchOnDemandEmbedding(lang, relativeTo);
if (otherVec) similarityTo = cosineSimilarity(vec, otherVec);
}
}
@@ -125,7 +101,7 @@ export default defineEventHandler(async (event) => {
inVocab,
projections,
nearest,
- umap: sem.umap[word] ?? null,
+ umap,
similarityTo,
available: true,
};
diff --git a/server/api/[lang]/word-image/[word].get.ts b/server/api/[lang]/word-image/[word].get.ts
index a0323eb4..bb90bcd6 100644
--- a/server/api/[lang]/word-image/[word].get.ts
+++ b/server/api/[lang]/word-image/[word].get.ts
@@ -3,9 +3,11 @@
*
* Serves cached WebP images. Generates via DALL-E on demand for current daily words.
*/
-import { existsSync, readFileSync, writeFileSync, mkdirSync, unlinkSync } from 'fs';
+import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'fs';
import { join } from 'path';
import { loadAllData, WORD_IMAGES_DIR } from '../../../utils/data-loader';
+import { dedup } from '../../../utils/inflight';
+import { rateLimit } from '../../../utils/rate-limit';
import { getTodaysIdx, getWordForDay } from '../../../utils/word-selection';
import { fetchDefinition } from '../../../utils/definitions';
@@ -55,6 +57,11 @@ export default defineEventHandler(async (event) => {
throw createError({ statusCode: 404, message: 'Not found' });
}
+ // Path traversal protection
+ if (word.includes('/') || word.includes('\\') || word.includes('..')) {
+ throw createError({ statusCode: 400, message: 'Invalid word' });
+ }
+
const openaiKey = process.env.OPENAI_API_KEY;
if (!openaiKey) {
throw createError({ statusCode: 404, message: 'Not available' });
@@ -70,6 +77,9 @@ export default defineEventHandler(async (event) => {
return readFileSync(cachePath);
}
+ // Rate limit DALL-E generation (cached images bypass this)
+ rateLimit(event, 'dalle:image', 5, 3600 * 1000);
+
// For non-top languages, only serve from cache
if (!IMAGE_LANGUAGES.has(lang)) {
throw createError({ statusCode: 404, message: 'Image not available for this language' });
@@ -95,23 +105,9 @@ export default defineEventHandler(async (event) => {
throw createError({ statusCode: 404, message: 'Image not available for historical words' });
}
- // Pending check
- const pendingPath = cachePath + '.pending';
- if (existsSync(pendingPath)) {
- setResponseStatus(event, 202);
- return 'Image being generated';
- }
-
- // Mark as pending
- try {
- mkdirSync(cacheDir, { recursive: true });
- writeFileSync(pendingPath, '', { flag: 'wx' });
- } catch {
- setResponseStatus(event, 202);
- return 'Image being generated';
- }
-
- try {
+ // Deduplicate concurrent requests — only one DALL-E call per word.
+ // All concurrent requesters get the same Promise (and the same image).
+ const webpBuffer = await dedup('image', `${lang}:${word.toLowerCase()}`, async () => {
// Get definition hint for DALL-E prompt
let definitionHint = '';
const defn = await fetchDefinition(word, lang);
@@ -138,7 +134,7 @@ export default defineEventHandler(async (event) => {
const imageUrl = response.data[0]?.url;
if (!imageUrl?.startsWith('https://')) {
- throw createError({ statusCode: 404, message: 'Image generation failed' });
+ throw new Error('Image generation returned no URL');
}
// Download and convert to WebP
@@ -146,22 +142,14 @@ export default defineEventHandler(async (event) => {
const imageBuffer = Buffer.from(await imageResponse.arrayBuffer());
const sharp = (await import('sharp')).default;
- const webpBuffer = await sharp(imageBuffer).webp({ quality: 80 }).toBuffer();
+ const buf = await sharp(imageBuffer).webp({ quality: 80 }).toBuffer();
mkdirSync(cacheDir, { recursive: true });
- writeFileSync(cachePath, webpBuffer);
+ writeFileSync(cachePath, buf);
+ return buf;
+ });
- setResponseHeader(event, 'Content-Type', 'image/webp');
- setResponseHeader(event, 'Cache-Control', 'public, max-age=31536000');
- return webpBuffer;
- } catch (e: any) {
- console.error(`[word-image] Failed for ${lang}/${word}: ${e.message}`);
- throw createError({ statusCode: 404, message: 'Image generation failed' });
- } finally {
- if (existsSync(pendingPath)) {
- try {
- unlinkSync(pendingPath);
- } catch {}
- }
- }
+ setResponseHeader(event, 'Content-Type', 'image/webp');
+ setResponseHeader(event, 'Cache-Control', 'public, max-age=31536000');
+ return webpBuffer;
});
diff --git a/server/api/[lang]/word-stats.post.ts b/server/api/[lang]/word-stats.post.ts
index 922acc6b..56f5c322 100644
--- a/server/api/[lang]/word-stats.post.ts
+++ b/server/api/[lang]/word-stats.post.ts
@@ -38,7 +38,7 @@ export default defineEventHandler(async (event) => {
'unknown';
if (isDuplicateSubmission(lang, day_idx, clientId, todaysIdx)) {
- const existing = loadWordStats(lang, day_idx);
+ const existing = await loadWordStats(lang, day_idx);
return existing || {};
}
@@ -48,6 +48,6 @@ export default defineEventHandler(async (event) => {
console.warn(`[word-stats] Disk write failed for ${lang}`);
}
- const updated = loadWordStats(lang, day_idx);
+ const updated = await loadWordStats(lang, day_idx);
return updated || {};
});
diff --git a/server/api/[lang]/word/[slug].get.ts b/server/api/[lang]/word/[slug].get.ts
index b69a0951..ae88bd5b 100644
--- a/server/api/[lang]/word/[slug].get.ts
+++ b/server/api/[lang]/word/[slug].get.ts
@@ -15,7 +15,8 @@ import {
import { loadWordStats } from '../../../utils/word-stats';
import { fetchDefinition } from '../../../utils/definitions';
import { checkWiktionaryExists } from '../../../utils/wiktionary';
-import { getEmbedding, knnNearest, loadSemanticData } from '../../../utils/semantic';
+import * as semanticDb from '~/server/utils/_semantic-db';
+import { getValidWords } from '~/server/plugins/semantic-warmup';
import type { WordStats } from '~/utils/types';
// Module-level Set cache so we don't rebuild on every request. wordLists
@@ -47,14 +48,10 @@ function wordIsRecognized(
data: ReturnType
): boolean {
if (getWordSet(lang, data).has(word)) return true;
- // English gets the much larger semantic validator dictionary
+ // English gets the much larger semantic valid words set (loaded at startup, 2MB)
if (lang === 'en') {
- try {
- const sem = loadSemanticData();
- if (sem.validWords.has(word)) return true;
- } catch {
- // semantic data missing — fall through
- }
+ const validWords = getValidWords();
+ if (validWords.size > 0 && validWords.has(word)) return true;
}
return false;
}
@@ -98,14 +95,16 @@ export default defineEventHandler(async (event) => {
if (dayIdx != null && !isFuture) {
wordDate = idxToDate(dayIdx).toISOString().slice(0, 10);
- wordStats = loadWordStats(lang, dayIdx);
+ wordStats = await loadWordStats(lang, dayIdx);
}
- // cacheOnly=1: skip LLM definition generation — only return disk-cached
- // or kaikki definitions. Used by the hover-prefetch so browsing neighbors
- // doesn't burn AI credits on obscure words.
const query = getQuery(event);
- const cacheOnly = query.cacheOnly === '1' || query.cacheOnly === 'true';
+ const clientCacheOnly = query.cacheOnly === '1' || query.cacheOnly === 'true';
+ // Only generate LLM definitions for words in the game's word list (5-letter
+ // daily candidates). Other words (semantic vocab, neighbor links) use cacheOnly
+ // to avoid burning LLM credits on the 75K-word tail that bots crawl.
+ const isGameWord = getWordSet(lang, data).has(word ?? '');
+ const cacheOnly = clientCacheOnly || !isGameWord;
let wiktionaryExists = false;
if (word) {
@@ -123,19 +122,15 @@ export default defineEventHandler(async (event) => {
wiktionaryExists = wiktResult === true;
}
- // Nearest words for SSR internal link juice. Lightweight k-NN on the
- // in-memory semantic embeddings — ~5ms, zero LLM cost. Only for English
- // (where semantic data exists) and only the word names (no full explore).
+ // Nearest words for SSR internal link juice via pgvector HNSW — ~10ms.
+ // Only for English (where semantic data exists).
let nearestWords: string[] = [];
if (word && lang === 'en') {
try {
- const semData = loadSemanticData();
- const vec = getEmbedding(semData, word);
- if (vec) {
- nearestWords = knnNearest(semData, vec, 8, new Set([word])).map((n) => n.word);
- }
+ const neighbors = await semanticDb.knnNearest(lang, word, 8, [word]);
+ nearestWords = neighbors.map((n) => n.word);
} catch {
- // Semantic data not loaded — skip (non-English or cold start)
+ // DB unavailable — skip
}
}
diff --git a/server/api/[lang]/words.get.ts b/server/api/[lang]/words.get.ts
index 9590e5f3..730ca6f8 100644
--- a/server/api/[lang]/words.get.ts
+++ b/server/api/[lang]/words.get.ts
@@ -87,9 +87,8 @@ let _semanticTargets: string[] | null = null;
async function loadSemanticTargets(): Promise {
if (_semanticTargets) return _semanticTargets;
try {
- const { loadSemanticDataSafe } = await import('../../utils/semantic');
- const data = loadSemanticDataSafe();
- _semanticTargets = data.targets;
+ const semanticDb = await import('../../utils/_semantic-db');
+ _semanticTargets = await semanticDb.getTargets('en');
return _semanticTargets!;
} catch {
return [];
@@ -178,7 +177,7 @@ export default defineEventHandler(async (event) => {
const targets = await loadSemanticTargets();
word = targets.length > 0 ? pickSemanticDailyTarget(targets, lang, idx) : null;
} else if (boardCount > 1) {
- wordList = getWordsForDay(lang, classicIdx, boardCount);
+ wordList = getWordsForDay(lang, classicIdx, boardCount, mode);
word = wordList[0] ?? null;
} else {
word = getWordForDay(lang, classicIdx);
diff --git a/server/api/auth/register.post.ts b/server/api/auth/register.post.ts
index a0acf5bb..e96cfc07 100644
--- a/server/api/auth/register.post.ts
+++ b/server/api/auth/register.post.ts
@@ -31,6 +31,9 @@ export default defineEventHandler(async (event) => {
message: `Password must be at least ${MIN_PASSWORD_LENGTH} characters`,
});
}
+ if (body.password.length > 128) {
+ throw createError({ statusCode: 400, message: 'Password too long' });
+ }
const existing = await prisma.user.findUnique({ where: { email: body.email.toLowerCase() } });
if (existing) {
@@ -48,7 +51,7 @@ export default defineEventHandler(async (event) => {
username,
email: body.email.toLowerCase(),
passwordHash,
- displayName: body.displayName || body.email.split('@')[0],
+ displayName: (body.displayName || body.email.split('@')[0] || '').trim().slice(0, 30),
emailVerified: false,
},
});
diff --git a/server/plugins/semantic-warmup.ts b/server/plugins/semantic-warmup.ts
index 875c0ded..e71fa46c 100644
--- a/server/plugins/semantic-warmup.ts
+++ b/server/plugins/semantic-warmup.ts
@@ -2,61 +2,55 @@ import { consola } from 'consola';
/**
* Semantic Explorer — Nitro startup plugin.
*
- * Two responsibilities:
+ * Loads only lightweight data at startup:
+ * - Axis vectors from Postgres (70 × 512 = 140KB)
+ * - Valid words for spellcheck from disk (~2MB)
*
- * 1. Warm the in-memory semantic data cache BEFORE the first request, so
- * the multi-megabyte `embeddings.json` parse doesn't stall the event
- * loop while a player is waiting.
- *
- * 2. On first boot (or whenever `embeddings.json` is missing from the
- * persistent disk), regenerate it via OpenAI by calling the
- * TypeScript-native generator. No Python runtime needed in production.
- *
- * The generated files land in SEMANTIC_RUNTIME_DIR (persistent disk on
- * Render). Delete them to force regeneration on the next boot.
+ * Embeddings (98-230MB) live in Postgres via pgvector — NOT loaded into memory.
*/
-import { loadSemanticData, SemanticDataMissingError } from '~/server/utils/semantic';
-import {
- generateSemanticRuntimeData,
- semanticRuntimeCacheExists,
-} from '~/server/utils/semanticGenerate';
+import { existsSync, readFileSync } from 'fs';
+import { join } from 'path';
-export default defineNitroPlugin(async () => {
- // Fast path: files already on disk → just warm the in-memory cache.
- if (semanticRuntimeCacheExists()) {
- try {
- const t0 = Date.now();
- const data = loadSemanticData();
- const ms = Date.now() - t0;
- consola.info(
- `[semantic warmup] loaded ${data.words.length} embeddings, ` +
- `${data.targets.length} targets, ${data.validWords.size} validator words in ${ms}ms`
- );
- } catch (e) {
- consola.warn('[semantic warmup] load failed:', e);
+// Valid words set — loaded for spellcheck, shared with guess endpoint
+let _validWords: Set | null = null;
+
+export function getValidWords(): Set {
+ if (_validWords) return _validWords;
+
+ // Try runtime dir first, then static dir
+ const dirs = [join(process.cwd(), 'semantic-runtime'), join(process.cwd(), 'data', 'semantic')];
+
+ for (const dir of dirs) {
+ const path = join(dir, 'valid_words.json');
+ if (existsSync(path)) {
+ try {
+ const data = JSON.parse(readFileSync(path, 'utf-8'));
+ const words = data.words ?? data;
+ _validWords = new Set(Array.isArray(words) ? words : []);
+ return _validWords;
+ } catch {
+ // try next
+ }
}
- return;
}
- // Missing runtime files → regenerate before the first request lands.
- consola.info('[semantic warmup] runtime cache missing — regenerating from OpenAI');
+ _validWords = new Set();
+ return _validWords;
+}
+
+export default defineNitroPlugin(async () => {
+ // 1. Load axes from Postgres (140KB, for compass computation)
try {
- const ok = await generateSemanticRuntimeData();
- if (!ok) {
- consola.warn('[semantic warmup] regeneration skipped (missing key or static data)');
- return;
- }
- const data = loadSemanticData();
- consola.info(
- `[semantic warmup] regenerated + loaded ${data.words.length} embeddings, ` +
- `${data.targets.length} targets`
- );
+ const { loadAxes } = await import('~/server/utils/_semantic-db');
+ const t0 = Date.now();
+ const axes = await loadAxes('en');
+ consola.info(`[semantic warmup] loaded ${axes.length} axes in ${Date.now() - t0}ms`);
} catch (e) {
- if (e instanceof SemanticDataMissingError) {
- consola.warn('[semantic warmup] post-generation load still missing files:', e.missing);
- } else {
- consola.warn('[semantic warmup] regeneration failed:', e);
- }
+ consola.warn('[semantic warmup] axis load failed:', e);
}
+
+ // 2. Load valid words for spellcheck (2MB, from disk)
+ const validWords = getValidWords();
+ consola.info(`[semantic warmup] loaded ${validWords.size} valid words for spellcheck`);
});
diff --git a/server/routes/auth/google.get.ts b/server/routes/auth/google.get.ts
index 4153e2ad..bb7064e3 100644
--- a/server/routes/auth/google.get.ts
+++ b/server/routes/auth/google.get.ts
@@ -56,8 +56,10 @@ export default defineOAuthGoogleEventHandler({
await setSessionForUser(event, user, 'google');
- const redirect = getCookie(event, 'auth-redirect') || '/';
+ const raw = getCookie(event, 'auth-redirect') || '/';
deleteCookie(event, 'auth-redirect');
+ // Prevent open redirect — only allow relative paths
+ const redirect = raw.startsWith('/') && !raw.startsWith('//') ? raw : '/';
return sendRedirect(event, redirect);
},
onError(event, error) {
diff --git a/server/utils/_semantic-db.ts b/server/utils/_semantic-db.ts
new file mode 100644
index 00000000..5e5f7828
--- /dev/null
+++ b/server/utils/_semantic-db.ts
@@ -0,0 +1,500 @@
+/**
+ * semantic-db — pgvector-backed semantic operations.
+ *
+ * All embedding lookups, rank computation, kNN, and axis projections
+ * go through Postgres. Only axes (140KB) and targets (~10KB) are
+ * cached in memory after first load.
+ */
+
+import { prisma } from './prisma';
+import { cosineSimilarity } from './semantic';
+import { dedup } from './inflight';
+
+export { type AxisData };
+
+export const EMBEDDING_MODEL = 'text-embedding-3-large';
+export const EMBEDDING_DIMS = 512;
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Axis data (loaded once at startup, cached in memory — 140KB)
+// ═══════════════════════════════════════════════════════════════════════════
+
+interface AxisData {
+ name: string;
+ lowAnchor: string;
+ highAnchor: string;
+ vector: Float32Array;
+ auc: number;
+ rangeP5: number;
+ rangeP95: number;
+}
+
+let _axesCache: {
+ lang: string;
+ axes: AxisData[];
+ axesNames: string[];
+ axesVectors: Float32Array;
+} | null = null;
+
+export async function loadAxes(lang: string = 'en'): Promise {
+ if (_axesCache?.lang === lang) return _axesCache.axes;
+
+ const rows = await prisma.$queryRaw<
+ Array<{
+ name: string;
+ low_anchor: string;
+ high_anchor: string;
+ vector: string;
+ auc: number | null;
+ range_p5: number | null;
+ range_p95: number | null;
+ }>
+ >`SELECT name, low_anchor, high_anchor, vector::text, auc, range_p5, range_p95
+ FROM wordle.semantic_axes WHERE lang = ${lang} ORDER BY name`;
+
+ const axes: AxisData[] = rows.map((r) => ({
+ name: r.name,
+ lowAnchor: r.low_anchor,
+ highAnchor: r.high_anchor,
+ vector: parseVector(r.vector),
+ auc: r.auc ?? 0,
+ rangeP5: r.range_p5 ?? 0,
+ rangeP95: r.range_p95 ?? 0,
+ }));
+
+ const dims = axes[0]?.vector.length ?? EMBEDDING_DIMS;
+ const axesVectors = new Float32Array(axes.length * dims);
+ for (let a = 0; a < axes.length; a++) {
+ axesVectors.set(axes[a]!.vector, a * dims);
+ }
+
+ const axesNames = axes.map((a) => a.name);
+ _axesCache = { lang, axes, axesNames, axesVectors };
+ return axes;
+}
+
+export function getCachedAxesVectors(): Float32Array | null {
+ return _axesCache?.axesVectors ?? null;
+}
+
+export function getCachedAxesNames(): string[] {
+ return _axesCache?.axesNames ?? [];
+}
+
+export function getCachedAxes(): AxisData[] | null {
+ return _axesCache?.axes ?? null;
+}
+
+/**
+ * Compute normalized [0,1] axis projections for a word vector.
+ * Optionally filter by minimum AUC coherence score.
+ */
+export function projectAxes(
+ vec: Float32Array,
+ opts?: { minAuc?: number; includeRaw?: boolean }
+): Record {
+ const axes = _axesCache?.axes;
+ const axesVectors = _axesCache?.axesVectors;
+ if (!axes || !axesVectors) return {};
+
+ const D = vec.length;
+ const minAuc = opts?.minAuc ?? 0;
+ const result: Record = {};
+
+ for (let a = 0; a < axes.length; a++) {
+ const axis = axes[a]!;
+ if (axis.auc < minAuc) continue;
+ let dot = 0;
+ const offset = a * D;
+ for (let i = 0; i < D; i++) dot += vec[i]! * axesVectors[offset + i]!;
+
+ if (axis.rangeP95 !== axis.rangeP5) {
+ result[axis.name] = Math.max(
+ 0,
+ Math.min(1, (dot - axis.rangeP5) / (axis.rangeP95 - axis.rangeP5))
+ );
+ } else {
+ result[axis.name] = 0.5;
+ }
+ }
+ return result;
+}
+
+/**
+ * Detailed axis projections with anchor labels (for word-explore endpoint).
+ */
+export function projectAxesDetailed(
+ vec: Float32Array,
+ minAuc: number = 0.8
+): Array<{
+ axis: string;
+ lowAnchor: string;
+ highAnchor: string;
+ normalized: number;
+ rawProjection: number;
+}> {
+ const axes = _axesCache?.axes;
+ const axesVectors = _axesCache?.axesVectors;
+ if (!axes || !axesVectors) return [];
+
+ const D = vec.length;
+ const result = [];
+
+ for (let a = 0; a < axes.length; a++) {
+ const axis = axes[a]!;
+ if (axis.auc < minAuc) continue;
+ let dot = 0;
+ const offset = a * D;
+ for (let i = 0; i < D; i++) dot += vec[i]! * axesVectors[offset + i]!;
+
+ let normalized = 0.5;
+ if (axis.rangeP95 !== axis.rangeP5) {
+ normalized = Math.max(
+ 0,
+ Math.min(1, (dot - axis.rangeP5) / (axis.rangeP95 - axis.rangeP5))
+ );
+ }
+ result.push({
+ axis: axis.name,
+ lowAnchor: axis.lowAnchor,
+ highAnchor: axis.highAnchor,
+ normalized,
+ rawProjection: dot,
+ });
+ }
+ return result;
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Embedding lookups
+// ═══════════════════════════════════════════════════════════════════════════
+
+/** Batch-fetch embeddings for multiple words (1 query). */
+export async function getEmbeddings(
+ lang: string,
+ words: string[]
+): Promise