diff --git a/backend/src/cache/REGION_REPLICATION.md b/backend/src/cache/REGION_REPLICATION.md new file mode 100644 index 00000000..b8e777db --- /dev/null +++ b/backend/src/cache/REGION_REPLICATION.md @@ -0,0 +1,76 @@ +# Multi-Region Cache Replication + +Synchronizes Redis keys across multiple geographic regions so worldwide students +read from a nearby region while writes propagate everywhere. Implements an +application-layer **multi-master** model with **region-based fallback**. + +## How it works + +``` + write("course:1") + │ + ┌─────────▼─────────┐ active region first (low latency) + │ us-east (origin) │ + └─────────┬─────────┘ + fan-out (best-effort, parallel) + ┌─────────┼─────────┐ + ┌────▼───┐ ┌───▼────┐ ┌──▼─────┐ + │ eu-west│ │ ap-south│ │ ... │ replica regions stay in sync + └────────┘ └────────┘ └────────┘ +``` + +- **Writes** (`set`) go to the active region first, then fan out to every other + healthy region in parallel. A replica failure is logged, not fatal. +- **Reads** (`get`) try the active region, then fall back to other healthy + regions — a regional outage degrades latency, not availability. +- **Deletes** (`del`) apply to every region for consistency. +- **Health**: a region is skipped only when its connection is dead + (`end`/`close`); transient states still get attempted with per-call fallback. + +## Configuration + +| Env var | Example | Meaning | +|---------|---------|---------| +| `REDIS_REGIONS` | `us-east@redis://cache-us:6379,eu-west@cache-eu:6379` | Comma-separated `name@connection` (URL or `host:port`). | +| `REDIS_ACTIVE_REGION` | `eu-west` | This process's local region (defaults to the first listed). | + +When `REDIS_REGIONS` is unset, `createRegionReplicator()` returns `null` and the +app continues with the existing single-instance cache — multi-region is opt-in. + +## Files + +| File | Responsibility | +|------|----------------| +| `../config/region.config.ts` | Pure parsing + active-region resolution + fallback ordering. Unit tested. | +| `RegionReplicator.ts` | `RegionReplicator` (set/get/del across regions) + ioredis client builder + `createRegionReplicator()`. | + +The replicator depends only on a minimal `RedisLike` interface, so it runs on +real ioredis clients in production and on in-memory fakes in tests. + +## Usage + +```ts +import { createRegionReplicator } from './cache/RegionReplicator.js'; + +const replicator = createRegionReplicator(); // null if not configured +if (replicator) { + await replicator.set('course:1', JSON.stringify(course), 900); // synced to all regions + const cached = await replicator.get('course:1'); // nearest region, with fallback +} +``` + +> Production note: for native Redis-level geo-replication you would pair this +> with Redis Enterprise Active-Active (CRDT) or per-region replicas. This module +> provides the application-layer coordination and fallback that works on top of +> either, and keeps the behaviour testable. + +## Tests + +```bash +cd backend +npm test -- region-replication +``` + +Proves the acceptance criterion: a key modified in one region is present in every +replica region, plus fallback reads, replica-failure tolerance, and cross-region +deletes. diff --git a/backend/src/cache/RegionReplicator.ts b/backend/src/cache/RegionReplicator.ts new file mode 100644 index 00000000..57bf04e2 --- /dev/null +++ b/backend/src/cache/RegionReplicator.ts @@ -0,0 +1,205 @@ +import Redis from 'ioredis'; +import logger from '../utils/logger.js'; +import { + orderRegionsByPreference, + parseRegions, + resolveActiveRegionName, + type RegionConfig, +} from '../config/region.config.js'; + +/** + * Multi-region cache replication. + * + * Implements a multi-master replication model at the application layer: a write + * is applied to the active (local) region first for low latency, then fanned out + * to every other healthy region so the same keys exist everywhere. Reads use + * region-based fallback — the active region first, then other healthy regions — + * so a single region outage degrades latency, not availability. + * + * The replicator depends only on the minimal {@link RedisLike} interface, so it + * can be driven by real ioredis clients in production or fakes in tests. + */ + +/** The minimal Redis surface the replicator needs. */ +export interface RedisLike { + get(key: string): Promise; + set(key: string, value: string, mode?: string, ttlSeconds?: number): Promise; + del(key: string): Promise; + /** ioredis connection status; absent on fakes (treated as healthy). */ + readonly status?: string; +} + +/** A named region paired with its client. */ +export interface RegionClient { + name: string; + client: RedisLike; +} + +/** Outcome of a replicated write. */ +export interface ReplicationResult { + /** Region the write originated in, or null if no region was writable. */ + origin: string | null; + /** Regions the key was successfully written/replicated to. */ + replicated: string[]; + /** Regions that failed to accept the write. */ + failed: string[]; +} + +// ioredis statuses that mean the connection is unusable. +const DEAD_STATUSES = new Set(['end', 'close']); + +export class RegionReplicator { + private readonly regionNames: string[]; + private readonly clientsByName: Map; + + constructor( + regions: RegionClient[], + private readonly activeRegion: string + ) { + this.regionNames = regions.map((r) => r.name); + this.clientsByName = new Map(regions.map((r) => [r.name, r.client])); + } + + /** A region is healthy unless its client reports a dead connection status. */ + private isHealthy = (name: string): boolean => { + const client = this.clientsByName.get(name); + if (!client) return false; + return !DEAD_STATUSES.has(client.status ?? 'ready'); + }; + + /** Healthy regions in fallback preference order (active first). */ + private preferenceOrder(): string[] { + return orderRegionsByPreference(this.regionNames, this.activeRegion, this.isHealthy); + } + + private async writeOne(name: string, key: string, value: string, ttlSeconds?: number): Promise { + const client = this.clientsByName.get(name); + if (!client) throw new Error(`Unknown region: ${name}`); + if (ttlSeconds && ttlSeconds > 0) { + await client.set(key, value, 'EX', ttlSeconds); + } else { + await client.set(key, value); + } + } + + /** + * Write a key to the active region and replicate it to every other healthy + * region. Replication is best-effort and runs in parallel; a replica failure + * is logged but does not fail the call (the origin write is what matters). + */ + async set(key: string, value: string, ttlSeconds?: number): Promise { + const order = this.preferenceOrder(); + if (order.length === 0) { + logger.error('RegionReplicator: no healthy regions available for write'); + return { origin: null, replicated: [], failed: [...this.regionNames] }; + } + + const [origin, ...replicas] = order; + const replicated: string[] = []; + const failed: string[] = []; + + try { + await this.writeOne(origin, key, value, ttlSeconds); + replicated.push(origin); + } catch (error) { + logger.error(`RegionReplicator: origin write to ${origin} failed:`, error); + failed.push(origin); + } + + const settled = await Promise.allSettled( + replicas.map((name) => this.writeOne(name, key, value, ttlSeconds)) + ); + settled.forEach((result, i) => { + const name = replicas[i]; + if (result.status === 'fulfilled') { + replicated.push(name); + } else { + failed.push(name); + logger.warn(`RegionReplicator: replication to ${name} failed:`, result.reason); + } + }); + + return { origin: replicated[0] ?? null, replicated, failed }; + } + + /** + * Read a key using region-based fallback: try the active region first, then + * other healthy regions until a value is found. Returns null if absent + * everywhere or all reachable regions error. + */ + async get(key: string): Promise { + for (const name of this.preferenceOrder()) { + try { + const value = await this.clientsByName.get(name)!.get(key); + if (value !== null && value !== undefined) return value; + } catch (error) { + logger.warn(`RegionReplicator: read from ${name} failed, falling back:`, error); + } + } + return null; + } + + /** Delete a key from every region so it stays consistent across regions. */ + async del(key: string): Promise<{ deleted: string[]; failed: string[] }> { + const deleted: string[] = []; + const failed: string[] = []; + await Promise.allSettled( + this.regionNames.map(async (name) => { + try { + await this.clientsByName.get(name)!.del(key); + deleted.push(name); + } catch (error) { + failed.push(name); + logger.warn(`RegionReplicator: delete in ${name} failed:`, error); + } + }) + ); + return { deleted, failed }; + } + + getActiveRegion(): string { + return this.activeRegion; + } + + getRegions(): string[] { + return [...this.regionNames]; + } +} + +/** Create an ioredis client from a URL or `host:port` connection string. */ +function createIoRedisClient(connection: string): RedisLike { + const options = { maxRetriesPerRequest: 3, enableOfflineQueue: false }; + if (connection.includes('://')) { + return new Redis(connection, options) as unknown as RedisLike; + } + const [host, port] = connection.split(':'); + return new Redis({ host, port: parseInt(port || '6379', 10), ...options }) as unknown as RedisLike; +} + +/** Build region clients from parsed config (opens real connections). */ +export function buildRegionClients(regions: RegionConfig[]): RegionClient[] { + return regions.map((region) => ({ + name: region.name, + client: createIoRedisClient(region.connection), + })); +} + +/** + * Build a {@link RegionReplicator} from the environment, or return null when + * multi-region replication is not configured (no `REDIS_REGIONS`). Call this + * once at startup — it opens connections, so it has no effect at import time. + */ +export function createRegionReplicator( + env: NodeJS.ProcessEnv = process.env +): RegionReplicator | null { + const regions = parseRegions(env); + if (regions.length === 0) return null; + + const activeRegion = resolveActiveRegionName(regions, env); + if (!activeRegion) return null; + + logger.info( + `RegionReplicator: ${regions.length} region(s) configured, active=${activeRegion}` + ); + return new RegionReplicator(buildRegionClients(regions), activeRegion); +} diff --git a/backend/src/config/region.config.ts b/backend/src/config/region.config.ts new file mode 100644 index 00000000..3049075e --- /dev/null +++ b/backend/src/config/region.config.ts @@ -0,0 +1,87 @@ +/** + * Multi-region Redis replication configuration. + * + * Defines the set of Redis regions the cache replicates across and which region + * the current process treats as "local" (active). Parsing and region-selection + * here are **pure** so they can be unit-tested without any Redis connection. + * + * Environment: + * REDIS_REGIONS comma-separated `name@connection` pairs, where + * `connection` is a redis URL or `host:port`. e.g. + * "us-east@redis://cache-us:6379,eu-west@cache-eu:6379" + * REDIS_ACTIVE_REGION name of this process's local region (defaults to the + * first configured region). + */ + +/** A single replication region and how to connect to it. */ +export interface RegionConfig { + /** Logical region name, e.g. "us-east". */ + name: string; + /** Redis URL or `host:port` connection string. */ + connection: string; +} + +/** + * Parse the `REDIS_REGIONS` env value into an ordered list of regions. + * Malformed entries (missing name or connection) are skipped. Returns an empty + * list when unset, so callers can detect "multi-region not configured". + */ +export function parseRegions(env: NodeJS.ProcessEnv = process.env): RegionConfig[] { + const raw = env.REDIS_REGIONS?.trim(); + if (!raw) return []; + + const regions: RegionConfig[] = []; + const seen = new Set(); + + for (const entry of raw.split(',')) { + const trimmed = entry.trim(); + if (!trimmed) continue; + + // Split on the FIRST '@' only — connection strings may contain '@' + // (e.g. redis://user:pass@host:6379). + const at = trimmed.indexOf('@'); + if (at <= 0 || at === trimmed.length - 1) continue; + + const name = trimmed.slice(0, at).trim(); + const connection = trimmed.slice(at + 1).trim(); + if (!name || !connection || seen.has(name)) continue; + + seen.add(name); + regions.push({ name, connection }); + } + + return regions; +} + +/** + * Determine the active (local) region name. Honours `REDIS_ACTIVE_REGION` when + * it matches a configured region, otherwise falls back to the first region. + * Returns undefined when no regions are configured. + */ +export function resolveActiveRegionName( + regions: RegionConfig[], + env: NodeJS.ProcessEnv = process.env +): string | undefined { + if (regions.length === 0) return undefined; + const requested = env.REDIS_ACTIVE_REGION?.trim(); + if (requested && regions.some((r) => r.name === requested)) { + return requested; + } + return regions[0].name; +} + +/** + * Order regions by connection preference for region-based fallback: the active + * region first (when healthy), then any other healthy regions. Unhealthy regions + * are excluded, so the result is the list of regions safe to talk to, best + * first. Pure — health is supplied by the caller. + */ +export function orderRegionsByPreference( + regionNames: string[], + activeName: string, + isHealthy: (name: string) => boolean +): string[] { + const active = regionNames.filter((n) => n === activeName && isHealthy(n)); + const others = regionNames.filter((n) => n !== activeName && isHealthy(n)); + return [...active, ...others]; +} diff --git a/backend/tests/region-replication.test.ts b/backend/tests/region-replication.test.ts new file mode 100644 index 00000000..dee5b7ca --- /dev/null +++ b/backend/tests/region-replication.test.ts @@ -0,0 +1,145 @@ +import { + orderRegionsByPreference, + parseRegions, + resolveActiveRegionName, +} from '../src/config/region.config.js'; +import { RegionReplicator, type RedisLike, type RegionClient } from '../src/cache/RegionReplicator.js'; + +/** Minimal in-memory Redis fake so replication is observable per region. */ +function fakeClient(options: { status?: string; failGet?: boolean; failSet?: boolean } = {}) { + const store = new Map(); + return { + store, + status: options.status, + async get(key: string): Promise { + if (options.failGet) throw new Error('region read down'); + return store.has(key) ? store.get(key)! : null; + }, + async set(key: string, value: string): Promise { + if (options.failSet) throw new Error('region write down'); + store.set(key, value); + return 'OK'; + }, + async del(key: string): Promise { + store.delete(key); + return 1; + }, + }; +} + +function regions(...clients: Array<{ name: string; client: RedisLike }>): RegionClient[] { + return clients; +} + +describe('region.config (pure)', () => { + it('parses name@connection pairs, keeping @ inside connection strings', () => { + const parsed = parseRegions({ + REDIS_REGIONS: 'us-east@redis://user:pw@cache-us:6379, eu-west@cache-eu:6380', + } as NodeJS.ProcessEnv); + expect(parsed).toEqual([ + { name: 'us-east', connection: 'redis://user:pw@cache-us:6379' }, + { name: 'eu-west', connection: 'cache-eu:6380' }, + ]); + }); + + it('returns [] when unset and skips malformed/duplicate entries', () => { + expect(parseRegions({} as NodeJS.ProcessEnv)).toEqual([]); + const parsed = parseRegions({ + REDIS_REGIONS: 'broken,@nohost,name@,us@h:1,us@h:2', + } as NodeJS.ProcessEnv); + expect(parsed).toEqual([{ name: 'us', connection: 'h:1' }]); + }); + + it('resolves the active region from env, else the first region', () => { + const list = parseRegions({ REDIS_REGIONS: 'us@h:1,eu@h:2' } as NodeJS.ProcessEnv); + expect(resolveActiveRegionName(list, { REDIS_ACTIVE_REGION: 'eu' } as NodeJS.ProcessEnv)).toBe('eu'); + expect(resolveActiveRegionName(list, { REDIS_ACTIVE_REGION: 'xx' } as NodeJS.ProcessEnv)).toBe('us'); + expect(resolveActiveRegionName([], {} as NodeJS.ProcessEnv)).toBeUndefined(); + }); + + it('orders healthy regions with the active first', () => { + const names = ['us', 'eu', 'ap']; + expect(orderRegionsByPreference(names, 'eu', () => true)).toEqual(['eu', 'us', 'ap']); + // active unhealthy -> excluded, first healthy replica leads + expect(orderRegionsByPreference(names, 'eu', (n) => n !== 'eu')).toEqual(['us', 'ap']); + }); +}); + +describe('RegionReplicator', () => { + it('synchronizes a key modified in one region across all replica regions', async () => { + const us = fakeClient(); + const eu = fakeClient(); + const ap = fakeClient(); + const replicator = new RegionReplicator( + regions({ name: 'us', client: us }, { name: 'eu', client: eu }, { name: 'ap', client: ap }), + 'us' + ); + + const result = await replicator.set('course:1', 'cached-value', 900); + + // Acceptance: keys modified in one region appear in every replica region. + expect(us.store.get('course:1')).toBe('cached-value'); + expect(eu.store.get('course:1')).toBe('cached-value'); + expect(ap.store.get('course:1')).toBe('cached-value'); + expect(result.origin).toBe('us'); + expect(result.replicated.sort()).toEqual(['ap', 'eu', 'us']); + expect(result.failed).toEqual([]); + }); + + it('still replicates to healthy regions when one replica is down', async () => { + const us = fakeClient(); + const eu = fakeClient({ failSet: true }); + const replicator = new RegionReplicator( + regions({ name: 'us', client: us }, { name: 'eu', client: eu }), + 'us' + ); + + const result = await replicator.set('k', 'v'); + expect(us.store.get('k')).toBe('v'); + expect(result.replicated).toContain('us'); + expect(result.failed).toContain('eu'); + }); + + it('reads from the active region, falling back to a replica on miss/error', async () => { + const us = fakeClient({ failGet: true }); // active region read fails + const eu = fakeClient(); + eu.store.set('k', 'from-eu'); + const replicator = new RegionReplicator( + regions({ name: 'us', client: us }, { name: 'eu', client: eu }), + 'us' + ); + + expect(await replicator.get('k')).toBe('from-eu'); + expect(await replicator.get('missing')).toBeNull(); + }); + + it('skips regions whose connection is dead', async () => { + const us = fakeClient({ status: 'end' }); // dead active region + const eu = fakeClient(); + const replicator = new RegionReplicator( + regions({ name: 'us', client: us }, { name: 'eu', client: eu }), + 'us' + ); + + const result = await replicator.set('k', 'v'); + expect(result.origin).toBe('eu'); // fell back to the healthy region + expect(us.store.has('k')).toBe(false); + expect(eu.store.get('k')).toBe('v'); + }); + + it('deletes a key from every region', async () => { + const us = fakeClient(); + const eu = fakeClient(); + us.store.set('k', 'v'); + eu.store.set('k', 'v'); + const replicator = new RegionReplicator( + regions({ name: 'us', client: us }, { name: 'eu', client: eu }), + 'us' + ); + + const result = await replicator.del('k'); + expect(us.store.has('k')).toBe(false); + expect(eu.store.has('k')).toBe(false); + expect(result.deleted.sort()).toEqual(['eu', 'us']); + }); +});