diff --git a/babel.config.js b/babel.config.js index 9c1732c2..671d4c8e 100644 --- a/babel.config.js +++ b/babel.config.js @@ -17,6 +17,11 @@ module.exports = function (api) { } return { + // `babel-preset-expo` already lowers dynamic `import()` to the async + // require form Metro needs for on-demand screen chunks (see AppNavigator + // and metro.config.js inlineRequires). Lazy module *evaluation* is handled + // by Metro's inlineRequires transform rather than a Babel plugin here, so + // the preset configuration is intentionally minimal. presets: [['babel-preset-expo', { unstable_transformImportMeta: true }]], plugins, }; diff --git a/backend/services/shared/__tests__/tracing.test.ts b/backend/services/shared/__tests__/tracing.test.ts new file mode 100644 index 00000000..85ca9330 --- /dev/null +++ b/backend/services/shared/__tests__/tracing.test.ts @@ -0,0 +1,137 @@ +import { + Sampler, + Tracer, + InMemorySpanExporter, + parseTraceparent, + formatTraceparent, + extractContext, + injectContext, + scrubAttributes, + toOtlpPayload, +} from '../tracing'; + +describe('W3C trace context', () => { + it('round-trips a traceparent', () => { + const value = '00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01'; + const ctx = parseTraceparent(value); + expect(ctx).not.toBeNull(); + expect(ctx?.traceId).toBe('4bf92f3577b34da6a3ce929d0e0e4736'); + expect(ctx?.spanId).toBe('00f067aa0ba902b7'); + expect(ctx?.sampled).toBe(true); + expect(formatTraceparent(ctx!)).toBe(value); + }); + + it('rejects malformed and all-zero ids', () => { + expect(parseTraceparent('garbage')).toBeNull(); + expect(parseTraceparent('00-' + '0'.repeat(32) + '-00f067aa0ba902b7-01')).toBeNull(); + expect(parseTraceparent(undefined)).toBeNull(); + }); + + it('extracts from case-insensitive headers and injects back', () => { + const ctx = extractContext({ + TraceParent: '00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01', + }); + expect(ctx?.traceId).toBe('4bf92f3577b34da6a3ce929d0e0e4736'); + const headers = injectContext(ctx!); + expect(headers.traceparent).toContain('4bf92f3577b34da6a3ce929d0e0e4736'); + }); +}); + +describe('Sampler', () => { + it('honors a parent decision over the ratio', () => { + const sampler = new Sampler({ defaultRatio: 0 }); + expect(sampler.shouldSample({ traceId: 'f'.repeat(32), parentSampled: true })).toBe(true); + }); + + it('is deterministic for the same traceId', () => { + const sampler = new Sampler({ defaultRatio: 0.5 }); + const id = '4bf92f3577b34da6a3ce929d0e0e4736'; + expect(sampler.shouldSample({ traceId: id })).toBe(sampler.shouldSample({ traceId: id })); + }); + + it('applies endpoint overrides', () => { + const sampler = new Sampler({ defaultRatio: 0, endpointRatios: { 'POST /charges': 1 } }); + expect(sampler.shouldSample({ traceId: 'a'.repeat(32), endpoint: 'POST /charges' })).toBe(true); + expect(sampler.shouldSample({ traceId: 'a'.repeat(32), endpoint: 'GET /other' })).toBe(false); + }); +}); + +describe('Tracer', () => { + it('exports sampled spans with parent linkage and timing', async () => { + const exporter = new InMemorySpanExporter(); + const tracer = new Tracer({ + serviceName: 'test', + exporter, + sampler: new Sampler({ defaultRatio: 1 }), + }); + + await tracer.withSpan('parent', async (parent) => { + await tracer.withSpan('child', async () => undefined, { parent: parent.context }); + }); + + const spans = exporter.getFinishedSpans(); + expect(spans).toHaveLength(2); + const parent = spans.find((s) => s.name === 'parent')!; + const child = spans.find((s) => s.name === 'child')!; + expect(child.traceId).toBe(parent.traceId); + expect(child.parentSpanId).toBe(parent.spanId); + expect(parent.status.code).toBe('ok'); + expect(typeof parent.durationMs).toBe('number'); + }); + + it('force-keeps errored spans even when sampling would drop them', async () => { + const exporter = new InMemorySpanExporter(); + const tracer = new Tracer({ + serviceName: 'test', + exporter, + sampler: new Sampler({ defaultRatio: 0, alwaysSampleErrors: true }), + }); + + await expect( + tracer.withSpan('boom', async () => { + throw new Error('kaboom'); + }) + ).rejects.toThrow('kaboom'); + + const spans = exporter.getFinishedSpans(); + expect(spans).toHaveLength(1); + expect(spans[0].status.code).toBe('error'); + }); + + it('does not export unsampled, successful spans', async () => { + const exporter = new InMemorySpanExporter(); + const tracer = new Tracer({ + serviceName: 'test', + exporter, + sampler: new Sampler({ defaultRatio: 0, alwaysSampleErrors: false }), + }); + await tracer.withSpan('quiet', async () => undefined); + expect(exporter.getFinishedSpans()).toHaveLength(0); + }); +}); + +describe('PII scrubbing + OTLP', () => { + it('redacts sensitive attribute keys', () => { + const scrubbed = scrubAttributes({ 'user.email': 'a@b.com', 'http.method': 'GET' }); + expect(scrubbed['user.email']).toBe('[redacted]'); + expect(scrubbed['http.method']).toBe('GET'); + }); + + it('produces an OTLP ResourceSpans payload', () => { + const payload = toOtlpPayload([ + { + traceId: 'a'.repeat(32), + spanId: 'b'.repeat(16), + name: 'op', + kind: 'server', + startTime: 1, + endTime: 2, + attributes: { 'http.status_code': 200 }, + events: [], + status: { code: 'ok' }, + service: 'svc', + }, + ]) as { resourceSpans: unknown[] }; + expect(payload.resourceSpans).toHaveLength(1); + }); +}); diff --git a/backend/services/shared/tracing.ts b/backend/services/shared/tracing.ts new file mode 100644 index 00000000..cfdcc898 --- /dev/null +++ b/backend/services/shared/tracing.ts @@ -0,0 +1,448 @@ +/** + * Distributed tracing core — W3C Trace Context propagation + a minimal, + * dependency-free tracer that is OpenTelemetry-shaped (spans, kinds, status, + * attributes, events) and exports OTLP-style payloads. + * + * We deliberately avoid pulling the full OpenTelemetry SDK into the shared + * backend layer: the wire formats (W3C `traceparent`/`tracestate`, OTLP/HTTP) + * are small and stable, and a self-contained implementation keeps the hot path + * cheap (the <2% p95 overhead budget) and the dependency surface minimal. The + * exporter interface is compatible with an OTLP collector, so swapping in the + * real SDK later is a drop-in. + * + * @see https://www.w3.org/TR/trace-context/ + */ + +import crypto from 'crypto'; + +// ── Wire types ─────────────────────────────────────────────────────────────── + +export type SpanKind = 'server' | 'client' | 'producer' | 'consumer' | 'internal'; +export type SpanStatusCode = 'unset' | 'ok' | 'error'; + +export interface SpanContext { + traceId: string; // 32 hex chars + spanId: string; // 16 hex chars + /** Low bit = sampled, per W3C trace-flags. */ + sampled: boolean; + /** Opaque vendor state, propagated verbatim. */ + traceState?: string; +} + +export interface SpanEvent { + name: string; + timestamp: number; + attributes?: Record; +} + +export type AttributeValue = string | number | boolean; + +export interface SpanData { + traceId: string; + spanId: string; + parentSpanId?: string; + name: string; + kind: SpanKind; + startTime: number; + endTime?: number; + durationMs?: number; + attributes: Record; + events: SpanEvent[]; + status: { code: SpanStatusCode; message?: string }; + /** Logical service that produced the span — set by the exporter/tracer. */ + service: string; +} + +// ── ID + clock seams (overridable for deterministic tests) ──────────────────── + +export interface TracingClock { + now(): number; +} + +const defaultClock: TracingClock = { now: () => Date.now() }; + +const randomHex = (bytes: number): string => crypto.randomBytes(bytes).toString('hex'); + +export const generateTraceId = (): string => randomHex(16); // 128-bit +export const generateSpanId = (): string => randomHex(8); // 64-bit + +const INVALID_TRACE_ID = '0'.repeat(32); +const INVALID_SPAN_ID = '0'.repeat(16); + +// ── W3C Trace Context (de)serialization ────────────────────────────────────── + +const TRACEPARENT_RE = /^([0-9a-f]{2})-([0-9a-f]{32})-([0-9a-f]{16})-([0-9a-f]{2})$/; + +/** Parse a `traceparent` (+ optional `tracestate`) into a SpanContext. */ +export const parseTraceparent = ( + traceparent: string | undefined | null, + tracestate?: string | null +): SpanContext | null => { + if (!traceparent) return null; + const match = TRACEPARENT_RE.exec(traceparent.trim()); + if (!match) return null; + + const [, version, traceId, spanId, flags] = match; + // Only version 00 is defined; future versions must still be parseable but we + // reject the all-zero (invalid) ids per spec. + if (version === 'ff') return null; + if (traceId === INVALID_TRACE_ID || spanId === INVALID_SPAN_ID) return null; + + return { + traceId, + spanId, + sampled: (parseInt(flags, 16) & 0x01) === 0x01, + traceState: tracestate ?? undefined, + }; +}; + +/** Serialize a SpanContext into a W3C `traceparent` header value. */ +export const formatTraceparent = (ctx: SpanContext): string => + `00-${ctx.traceId}-${ctx.spanId}-${ctx.sampled ? '01' : '00'}`; + +const HEADER_TRACEPARENT = 'traceparent'; +const HEADER_TRACESTATE = 'tracestate'; + +type HeaderBag = Record; + +const headerValue = (headers: HeaderBag, name: string): string | undefined => { + // HTTP headers are case-insensitive. + const key = Object.keys(headers).find((k) => k.toLowerCase() === name); + const raw = key ? headers[key] : undefined; + return Array.isArray(raw) ? raw[0] : raw; +}; + +/** Extract a parent SpanContext from an incoming request's headers. */ +export const extractContext = (headers: HeaderBag): SpanContext | null => + parseTraceparent(headerValue(headers, HEADER_TRACEPARENT), headerValue(headers, HEADER_TRACESTATE)); + +/** Inject a SpanContext into outgoing headers for downstream propagation. */ +export const injectContext = ( + ctx: SpanContext, + headers: Record = {} +): Record => { + headers[HEADER_TRACEPARENT] = formatTraceparent(ctx); + if (ctx.traceState) headers[HEADER_TRACESTATE] = ctx.traceState; + return headers; +}; + +// ── Sampling ───────────────────────────────────────────────────────────────── + +export interface SamplerConfig { + /** Base probability [0,1] applied when no endpoint rule matches. */ + defaultRatio: number; + /** Per-endpoint overrides, keyed by route name (e.g. "POST /v1/charges"). */ + endpointRatios?: Record; + /** Always sample traces that end in error, regardless of ratio. */ + alwaysSampleErrors?: boolean; +} + +export interface SampleInput { + traceId: string; + endpoint?: string; + /** A parent decision (from an upstream service) takes precedence when present. */ + parentSampled?: boolean; +} + +/** + * Deterministic, consistent sampler. The decision is derived from the traceId so + * every service in a trace makes the *same* choice (no partial traces), and a + * parent's decision is always honored to keep traces whole across hops. + */ +export class Sampler { + constructor(private readonly config: SamplerConfig) {} + + shouldSample(input: SampleInput): boolean { + if (input.parentSampled !== undefined) return input.parentSampled; + + const endpointRatio = input.endpoint + ? this.config.endpointRatios?.[input.endpoint] + : undefined; + const ratio = endpointRatio ?? this.config.defaultRatio; + if (ratio >= 1) return true; + if (ratio <= 0) return false; + + // Map the high 32 bits of the traceId to [0,1) — consistent across services. + const bucket = parseInt(input.traceId.slice(0, 8), 16) / 0xffffffff; + return bucket < ratio; + } + + /** Error-based sampling: force-keep a trace that errored (if configured). */ + forceOnError(): boolean { + return this.config.alwaysSampleErrors ?? true; + } +} + +// ── PII scrubbing ───────────────────────────────────────────────────────────── + +const DEFAULT_REDACT_KEYS = [ + 'authorization', + 'cookie', + 'password', + 'token', + 'secret', + 'apikey', + 'api_key', + 'email', + 'phone', + 'ssn', + 'card', + 'wallet', +]; + +/** Strip likely-PII attribute values before a span leaves the process. */ +export const scrubAttributes = ( + attributes: Record, + redactKeys: string[] = DEFAULT_REDACT_KEYS +): Record => { + const result: Record = {}; + for (const [key, value] of Object.entries(attributes)) { + const lower = key.toLowerCase(); + result[key] = redactKeys.some((r) => lower.includes(r)) ? '[redacted]' : value; + } + return result; +}; + +// ── Exporters ───────────────────────────────────────────────────────────────── + +export interface SpanExporter { + export(spans: SpanData[]): void | Promise; +} + +/** Buffers spans in memory — used by tests and the dashboard endpoint. */ +export class InMemorySpanExporter implements SpanExporter { + private spans: SpanData[] = []; + export(spans: SpanData[]): void { + this.spans.push(...spans); + } + getFinishedSpans(): SpanData[] { + return [...this.spans]; + } + reset(): void { + this.spans = []; + } +} + +/** + * Posts spans to an OpenTelemetry collector over OTLP/HTTP-JSON. Fire-and-forget + * and best-effort: tracing must never break or slow the request path, so export + * failures are swallowed (and surfaced via the optional onError hook). + */ +export class OtlpHttpSpanExporter implements SpanExporter { + constructor( + private readonly options: { + endpoint: string; // e.g. http://otel-collector:4318/v1/traces + fetchImpl?: typeof fetch; + onError?: (err: unknown) => void; + } + ) {} + + async export(spans: SpanData[]): Promise { + if (spans.length === 0) return; + const fetchImpl = this.options.fetchImpl ?? fetch; + try { + await fetchImpl(this.options.endpoint, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(toOtlpPayload(spans)), + }); + } catch (err) { + this.options.onError?.(err); + } + } +} + +/** Convert internal spans to a minimal OTLP/JSON ResourceSpans payload. */ +export const toOtlpPayload = (spans: SpanData[]): unknown => ({ + resourceSpans: [ + { + resource: { + attributes: [{ key: 'service.name', value: { stringValue: spans[0]?.service ?? 'unknown' } }], + }, + scopeSpans: [ + { + scope: { name: 'subtrackr-tracing' }, + spans: spans.map((s) => ({ + traceId: s.traceId, + spanId: s.spanId, + parentSpanId: s.parentSpanId, + name: s.name, + kind: s.kind, + startTimeUnixNano: s.startTime * 1e6, + endTimeUnixNano: (s.endTime ?? s.startTime) * 1e6, + attributes: Object.entries(s.attributes).map(([key, value]) => ({ + key, + value: attributeToOtlp(value), + })), + status: { code: s.status.code, message: s.status.message }, + })), + }, + ], + }, + ], +}); + +const attributeToOtlp = (value: AttributeValue) => { + if (typeof value === 'number') return { doubleValue: value }; + if (typeof value === 'boolean') return { boolValue: value }; + return { stringValue: value }; +}; + +// ── Span + Tracer ────────────────────────────────────────────────────────────── + +export class Span { + readonly context: SpanContext; + readonly data: SpanData; + private ended = false; + + constructor( + data: SpanData, + sampled: boolean, + private readonly clock: TracingClock, + private readonly onEnd: (span: Span) => void + ) { + this.data = data; + this.context = { traceId: data.traceId, spanId: data.spanId, sampled }; + } + + setAttribute(key: string, value: AttributeValue): this { + this.data.attributes[key] = value; + return this; + } + + setAttributes(attributes: Record): this { + Object.assign(this.data.attributes, attributes); + return this; + } + + addEvent(name: string, attributes?: Record): this { + this.data.events.push({ name, timestamp: this.clock.now(), attributes }); + return this; + } + + setStatus(code: SpanStatusCode, message?: string): this { + this.data.status = { code, message }; + return this; + } + + recordException(error: unknown): this { + const message = error instanceof Error ? error.message : String(error); + this.addEvent('exception', { 'exception.message': message }); + return this.setStatus('error', message); + } + + end(): void { + if (this.ended) return; + this.ended = true; + this.data.endTime = this.clock.now(); + this.data.durationMs = this.data.endTime - this.data.startTime; + this.onEnd(this); + } +} + +export interface TracerOptions { + serviceName: string; + exporter: SpanExporter; + sampler: Sampler; + clock?: TracingClock; + redactKeys?: string[]; +} + +export interface StartSpanOptions { + kind?: SpanKind; + parent?: SpanContext | null; + attributes?: Record; + /** Route name used for endpoint-based sampling. */ + endpoint?: string; +} + +export class Tracer { + private readonly clock: TracingClock; + + constructor(private readonly options: TracerOptions) { + this.clock = options.clock ?? defaultClock; + } + + startSpan(name: string, opts: StartSpanOptions = {}): Span { + const parent = opts.parent ?? null; + const traceId = parent?.traceId ?? generateTraceId(); + const sampled = this.options.sampler.shouldSample({ + traceId, + endpoint: opts.endpoint, + parentSampled: parent?.sampled, + }); + + const data: SpanData = { + traceId, + spanId: generateSpanId(), + parentSpanId: parent?.spanId, + name, + kind: opts.kind ?? 'internal', + startTime: this.clock.now(), + attributes: opts.attributes ? { ...opts.attributes } : {}, + events: [], + status: { code: 'unset' }, + service: this.options.serviceName, + }; + + return new Span(data, sampled, this.clock, (span) => this.onSpanEnd(span)); + } + + /** Wrap an async unit of work in a span, recording timing, errors and status. */ + async withSpan( + name: string, + fn: (span: Span) => Promise, + opts: StartSpanOptions = {} + ): Promise { + const span = this.startSpan(name, opts); + try { + const result = await fn(span); + if (span.data.status.code === 'unset') span.setStatus('ok'); + return result; + } catch (err) { + span.recordException(err); + throw err; + } finally { + span.end(); + } + } + + private onSpanEnd(span: Span): void { + const errored = span.data.status.code === 'error'; + // Error-based sampling: keep an errored trace even if probabilistic + // sampling would have dropped it. + const keep = span.context.sampled || (errored && this.options.sampler.forceOnError()); + if (!keep) return; + + span.data.attributes = scrubAttributes(span.data.attributes, this.options.redactKeys); + void this.options.exporter.export([span.data]); + } +} + +// ── Default process tracer ───────────────────────────────────────────────────── + +const num = (value: string | undefined, fallback: number): number => { + const parsed = value === undefined ? NaN : Number(value); + return Number.isFinite(parsed) ? parsed : fallback; +}; + +/** + * Build a tracer from environment configuration. The exporter is OTLP/HTTP when + * OTEL_EXPORTER_OTLP_ENDPOINT is set, otherwise an in-memory buffer (tests/dev). + */ +export const createTracerFromEnv = ( + serviceName: string, + env: NodeJS.ProcessEnv = process.env +): Tracer => { + const endpoint = env.OTEL_EXPORTER_OTLP_ENDPOINT; + const exporter: SpanExporter = endpoint + ? new OtlpHttpSpanExporter({ endpoint: `${endpoint.replace(/\/$/, '')}/v1/traces` }) + : new InMemorySpanExporter(); + + const sampler = new Sampler({ + defaultRatio: num(env.OTEL_TRACES_SAMPLER_RATIO, 0.1), + alwaysSampleErrors: env.OTEL_TRACES_SAMPLE_ERRORS !== 'false', + }); + + return new Tracer({ serviceName, exporter, sampler }); +}; diff --git a/docs/distributed-tracing.md b/docs/distributed-tracing.md new file mode 100644 index 00000000..9d4c788c --- /dev/null +++ b/docs/distributed-tracing.md @@ -0,0 +1,115 @@ +# Distributed Tracing + +SubTrackr spans mobile, backend, ML, webhooks and smart contracts. End-to-end +tracing stitches a single user action into one trace so latency and errors can be +attributed to a specific service hop instead of correlated by hand across logs. + +## Architecture + +``` +Mobile app ──traceparent──▶ Backend API ──traceparent──▶ ML service + │ │ + │ apiClient.ts │ shared/monitoring.ts ml-service/main.py + │ (client span) │ (server/db/external spans) (server/inference spans) + │ │ + │ └──traceparent──▶ Webhook receiver + │ webhook.ts (producer span) + ▼ + OTLP/HTTP ─────────────────▶ OTel Collector ──▶ Tempo ──▶ Grafana (flame graphs) +``` + +Every hop propagates **W3C Trace Context** (`traceparent` / `tracestate`) so the +trace id is shared and parent/child span linkage is preserved. + +## Propagation contract + +- Header: `traceparent: 00-<32-hex trace-id>-<16-hex span-id>-<2-hex flags>`. +- The low bit of flags is the **sampled** flag. +- A receiver adopts the incoming context as the parent of its server span; if no + header is present it starts a new root trace. +- Decisions are **consistent across services**: sampling is derived from the + trace id and a parent's decision is always honored, so traces are never partial. + +## Per-language usage + +### Backend (TypeScript) — `backend/services/shared` + +```ts +import { startServerSpan, traceDbQuery, traceExternalCall } from './shared/monitoring'; + +async function handleCharge(req) { + const { span, downstreamHeaders } = startServerSpan('POST /v1/charges', req.headers); + try { + const sub = await traceDbQuery('select subscription', span.context, () => db.query(...)); + await traceExternalCall('ml-service', span.context, (_s, headers) => + fetch(ML_URL, { headers }) // headers already carry traceparent + ); + span.setStatus('ok'); + } catch (e) { + span.recordException(e); + throw e; + } finally { + span.end(); + } +} +``` + +### Mobile (TypeScript) — `src/services/network/apiClient.ts` + +```ts +import { apiClient } from './services/network/apiClient'; +const res = await apiClient.post('/v1/charges', body); // injects traceparent, spans the call +``` + +### ML service (Python) — `ml-service/main.py` + +Spans are emitted for `ml.model.load`, `ml.feature.compute` and `ml.inference`, +all children of a server span rooted in the incoming context. + +### Webhooks — `backend/services/webhook.ts` + +`deliverEvent(input, parentContext)` opens a producer span and injects +`traceparent` into the delivery headers so receivers can correlate. + +## Sampling strategy + +Configurable via env, consistent across JS and Python services: + +| Variable | Meaning | Default | +| ----------------------------- | ---------------------------------------- | ------- | +| `OTEL_TRACES_SAMPLER_RATIO` | head sampling probability [0,1] | `0.1` | +| `OTEL_TRACES_SAMPLE_ERRORS` | always keep errored traces (`false` off) | `true` | +| `OTEL_EXPORTER_OTLP_ENDPOINT` | collector base URL | — | +| `OTEL_SERVICE_NAME` | logical service name on spans | per svc | + +Three strategies are supported and compose: + +- **Rate-based** — `defaultRatio` / `OTEL_TRACES_SAMPLER_RATIO`. +- **Endpoint-based** — `endpointRatios` per route (e.g. always sample `POST /v1/charges`). +- **Error-based** — head-dropped traces that error are force-kept; the collector + additionally tail-samples errors and slow (>1s) traces. + +## Collector + visualization + +Bring up the local stack and point services at it: + +```bash +docker compose -f infra/docker-compose.observability.yml up +export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 +``` + +Open Grafana (`http://localhost:3000`) → Explore → Tempo → search by trace id or +service to see the flame graph. The collector config +(`infra/otel-collector-config.yaml`) redacts PII attributes and applies tail +sampling before export. + +## Privacy / overhead + +- **PII** — span attributes are scrubbed of likely-sensitive keys + (`authorization`, `email`, `wallet`, …) before export, both in-process + (`scrubAttributes`) and again at the collector. +- **Header size** — only `traceparent` (+ optional `tracestate`) are propagated. +- **Overhead** — spans are plain objects; export is async and best-effort + (failures are swallowed), keeping the instrumentation within the <2% p95 budget. +- **Retries** — propagation is per-attempt, so a retried request still carries a + valid context. diff --git a/docs/e2e-deterministic-testing.md b/docs/e2e-deterministic-testing.md new file mode 100644 index 00000000..bd04b8da --- /dev/null +++ b/docs/e2e-deterministic-testing.md @@ -0,0 +1,116 @@ +# Writing Deterministic E2E Tests + +Detox tests fail in CI for reasons that have nothing to do with real regressions: +timing, live network, and state leaking between cases. This guide describes the +infrastructure that removes those failure modes and the rules for keeping new +tests deterministic. + +## The four pillars + +| Concern | Mechanism | Where | +| ------------------- | ------------------------------------------- | --------------------------------------- | +| Isolated state | wipe storage + hermetic seed per test | `e2e/helpers/launchArgs.ts`, `testData.ts` | +| Explicit waits | poll a condition, never sleep | `e2e/helpers/waits.ts` | +| Deterministic network | in-app `fetch` interceptor + scenarios | `e2e/helpers/mockServer.ts`, `src/utils/e2e/` | +| Stable screenshots | pixel-diff with tolerance, not hashing | `e2e/helpers/visualRegression.ts` | + +## 1. Hermetic, isolated state + +Every test launches a fresh app with storage wiped (`delete: true`) and a frozen +clock/locale/timezone. Use `launchCleanApp()` for an empty app or +`launchSeededApp(fixture)` to start with known data: + +```ts +import { launchSeededApp } from './helpers/subscriptionFlows'; +import { fixtures } from './helpers/testData'; + +beforeEach(async () => { + await launchSeededApp(fixtures.portfolio); +}); +``` + +Seeds are defined in `e2e/helpers/testData.ts` with **fixed** IDs and **absolute** +ISO dates (relative to the frozen clock `FIXED_NOW_MS = 2024-01-15T12:00:00Z`). +Never use `Date.now()` or random data in a fixture — it reintroduces drift. + +The app reads the seed at startup in `src/utils/e2e/e2eBootstrap.ts`, writes it to +the zustand persist key, and rehydrates the store before the first frame. This is +a strict no-op outside E2E (`isE2E()` is false), so production is unaffected. + +## 2. Explicit waits — never `sleep` + +**Banned:** `device.sleep(ms)`, `setTimeout`-based waits, or `withTimeout` on a +fixed delay. They are simultaneously too slow (wastes CI time) and too short +(flaky on cold machines). + +**Required:** wait on the condition you actually care about, via `helpers/waits.ts`: + +```ts +import { waitForVisible, tapWhenReady, waitForGone } from './helpers/waits'; + +await tapWhenReady(by.id('save-subscription-button')); // waits, then taps +await waitForVisible(by.id('subscription-detail-screen')); +await waitForGone(by.text('Deleting…')); +``` + +Detox already idles on the bridge and animations, so these resolve the instant +the app settles. + +## 3. Deterministic network + +Live HTTP is the single biggest flake source. When launched with +`e2eMockNetwork=true` (the default), the app installs a `fetch` interceptor that +answers from a **named scenario**. Pick one per test: + +```ts +await launchSeededApp(fixtures.empty, { scenario: 'charge-failure' }); +``` + +Scenarios live in `e2e/helpers/mockServer.ts` (test-facing names) and are mirrored +in `src/utils/e2e/mockScenarios.ts` (the in-app responder). Add routes to **both**. +An unmapped request in a mocked run returns `501 unmocked_request` — fail loudly +rather than leak to the network. + +Available scenarios: `happy-path` (default), `charge-failure`, `degraded-network` +(fixed latency to exercise loading states without real jitter). + +## 4. Visual regression with tolerance + +Screenshots are compared pixel-by-pixel with `pixelmatch`, not by exact hash. A +test passes when the fraction of differing pixels is within tolerance: + +```ts +assertVisualSnapshot('home-screen', shot, { maxDiffRatio: 0.02 }); +``` + +Defaults are env-overridable: + +- `VISUAL_PIXEL_THRESHOLD` — per-pixel color sensitivity (0 strict … 1 loose, default `0.1`) +- `VISUAL_MAX_DIFF_RATIO` — max fraction of differing pixels (default `0.01` = 1%) + +Baselines are PNGs in `e2e/fixtures/baselines/`, with per-snapshot tolerances in +`e2e/fixtures/visual-baselines.json`. Record/update them intentionally: + +```bash +UPDATE_VISUAL_BASELINE=true npm run e2e:visual:update-ios +``` + +When a comparison fails, a diff image is written to `artifacts/visual-diffs/`. + +## Flaky detection and the zero-flaky gate + +- Failed tests auto-retry up to `E2E_RETRIES` (default 2) via `jest.retryTimes`. +- `e2e/helpers/flakyReporter.js` records any test that only passed **after** a + retry into `artifacts/flaky-report.json`. +- With `E2E_FAIL_ON_FLAKY=true` (used by `npm run e2e:stability-*`) the build + fails if any flake is detected. +- The `stability` CI job (`workflow_dispatch`) runs the suite **5 consecutive + times** with the flaky gate on, enforcing "zero flaky failures across 5 runs". + +## Checklist for a new test + +- [ ] Launches via `launchCleanApp` / `launchSeededApp` (no raw `device.launchApp`). +- [ ] Uses `helpers/waits.ts`; contains no `sleep`/fixed timers. +- [ ] Any network dependency is covered by a mock scenario. +- [ ] Visual assertions pass a sensible `maxDiffRatio`, never an exact hash. +- [ ] Fixtures use fixed IDs and absolute dates. diff --git a/e2e/fixtures/baselines/README.md b/e2e/fixtures/baselines/README.md new file mode 100644 index 00000000..deec6b21 --- /dev/null +++ b/e2e/fixtures/baselines/README.md @@ -0,0 +1,2 @@ +# Visual regression baseline PNGs are stored here. +# Record/update with UPDATE_VISUAL_BASELINE=true. diff --git a/e2e/helpers/flakyReporter.js b/e2e/helpers/flakyReporter.js new file mode 100644 index 00000000..e772280b --- /dev/null +++ b/e2e/helpers/flakyReporter.js @@ -0,0 +1,69 @@ +/* eslint-disable @typescript-eslint/no-var-requires */ +const fs = require('fs'); +const path = require('path'); + +/** + * Jest reporter that surfaces flaky E2E tests. + * + * A test is "flaky" when it required more than one invocation to pass — i.e. it + * failed at least once and only succeeded on a `jest.retryTimes` retry. These + * are exactly the tests that erode confidence: green overall, but non-determ. + * + * The reporter writes a machine-readable report to `artifacts/flaky-report.json` + * (uploaded as a CI artifact) and prints a summary. With `E2E_FAIL_ON_FLAKY=true` + * the process exits non-zero when any flake is detected, enforcing the + * "zero flaky failures" acceptance criterion in CI. + */ +class FlakyReporter { + constructor(globalConfig, options) { + this._globalConfig = globalConfig; + this._options = options || {}; + this._flaky = []; + } + + onTestResult(_test, testResult) { + for (const result of testResult.testResults) { + // `invocations` counts every attempt; >1 with a pass means it flaked. + const invocations = result.invocations || 1; + if (invocations > 1 && result.status === 'passed') { + this._flaky.push({ + title: result.fullName || result.title, + file: testResult.testFilePath, + attempts: invocations, + }); + } + } + } + + onRunComplete(_contexts, results) { + const outDir = this._options.outputDir || path.resolve(process.cwd(), 'artifacts'); + fs.mkdirSync(outDir, { recursive: true }); + const reportPath = path.join(outDir, 'flaky-report.json'); + + const report = { + generatedAt: new Date().toISOString(), + totalTests: results.numTotalTests, + failedTests: results.numFailedTests, + flakyCount: this._flaky.length, + flaky: this._flaky, + }; + fs.writeFileSync(reportPath, `${JSON.stringify(report, null, 2)}\n`); + + if (this._flaky.length > 0) { + // eslint-disable-next-line no-console + console.warn(`\n⚠️ ${this._flaky.length} flaky test(s) detected (passed only after retry):`); + for (const f of this._flaky) { + // eslint-disable-next-line no-console + console.warn(` • ${f.title} (${f.attempts} attempts)`); + } + // eslint-disable-next-line no-console + console.warn(` Report: ${reportPath}\n`); + + if (process.env.E2E_FAIL_ON_FLAKY === 'true') { + process.exitCode = 1; + } + } + } +} + +module.exports = FlakyReporter; diff --git a/e2e/helpers/launchArgs.ts b/e2e/helpers/launchArgs.ts new file mode 100644 index 00000000..84d0dba8 --- /dev/null +++ b/e2e/helpers/launchArgs.ts @@ -0,0 +1,84 @@ +import { device } from 'detox'; +import { defaultMockScenario, MockNetworkScenarioName } from './mockServer'; +import { SeededSubscription } from './testData'; + +/** + * Deterministic launch configuration shared by every E2E test. + * + * The goal is that two runs of the same test — locally or in CI — start the app + * in byte-identical state: same data, same clock, same locale, no animations and + * a mocked network layer. All non-determinism (wall clock, RNG, live HTTP, OS + * animation timing) is pinned through launch arguments that the app reads on boot + * via `src/utils/e2e/e2eBootstrap.ts`. + */ +export interface E2ELaunchConfig { + /** Subscriptions to hydrate the store with before the first frame renders. */ + seed?: SeededSubscription[]; + /** Named mock-network scenario; controls deterministic API responses. */ + scenario?: MockNetworkScenarioName; + /** Fixed epoch millis used as the app clock (defaults to a stable instant). */ + now?: number; + /** BCP-47 locale; pinned so date/number formatting is reproducible. */ + locale?: string; + /** IANA timezone; pinned so "today"/billing math is reproducible. */ + timezone?: string; + /** Disable UI animations to remove frame-timing flakiness. Default: true. */ + disableAnimations?: boolean; + /** Wipe persisted storage before launch (fully isolated state). Default: true. */ + clean?: boolean; +} + +/** + * A fixed instant used as the default app clock during E2E runs: + * 2024-01-15T12:00:00.000Z. Billing-date math and "next charge" calculations + * become deterministic because they no longer depend on the real wall clock. + */ +export const FIXED_NOW_MS = 1705320000000; + +const DEFAULTS: Required> = { + now: FIXED_NOW_MS, + locale: 'en-US', + timezone: 'UTC', + disableAnimations: true, + clean: true, +}; + +/** + * Serialize an {@link E2ELaunchConfig} into Detox `launchArgs`. Complex values + * are JSON-encoded because Detox only forwards string-ish scalars to the app. + */ +export const toLaunchArgs = (config: E2ELaunchConfig = {}): Record => { + const merged = { ...DEFAULTS, ...config }; + const args: Record = { + e2e: 'true', + e2eNow: String(merged.now), + e2eLocale: merged.locale, + e2eTimezone: merged.timezone, + e2eDisableAnimations: String(merged.disableAnimations), + e2eScenario: config.scenario ?? defaultMockScenario, + e2eMockNetwork: 'true', + }; + if (config.seed && config.seed.length > 0) { + args.e2eSeed = JSON.stringify(config.seed); + } + return args; +}; + +/** + * Launch the app with a deterministic, hermetic configuration. Replaces ad-hoc + * `device.launchApp` calls so every test gets identical, isolated startup state. + */ +export const launchApp = async (config: E2ELaunchConfig = {}): Promise => { + const clean = config.clean ?? DEFAULTS.clean; + await device.launchApp({ + newInstance: true, + delete: clean, + launchArgs: toLaunchArgs(config), + // Grant permissions up front so no OS dialog can interrupt a test mid-flow. + permissions: { notifications: 'YES' }, + languageAndLocale: { + language: (config.locale ?? DEFAULTS.locale).split('-')[0], + locale: config.locale ?? DEFAULTS.locale, + }, + }); +}; diff --git a/e2e/helpers/mockServer.ts b/e2e/helpers/mockServer.ts new file mode 100644 index 00000000..20dacea6 --- /dev/null +++ b/e2e/helpers/mockServer.ts @@ -0,0 +1,95 @@ +/** + * Mock network layer contract for E2E tests. + * + * Live HTTP is the single biggest source of E2E flakiness: rate limits, latency, + * and changing upstream data all produce non-reproducible failures. Instead the + * app ships an interceptor (`src/services/network/apiClient.ts` + + * `src/utils/e2e/e2eBootstrap.ts`) that, when launched with `e2eMockNetwork=true`, + * serves responses from a named scenario defined here. + * + * A "scenario" is a deterministic map of endpoint → canned response. Tests pick a + * scenario by name through the launch config; the app never touches the network. + */ + +export interface MockResponse { + status: number; + /** JSON body returned verbatim — must be fully deterministic. */ + body: unknown; + /** Optional fixed latency (ms) to exercise loading states without real I/O. */ + delayMs?: number; +} + +export interface MockNetworkScenario { + name: string; + description: string; + /** Keyed by `" "`, e.g. `"GET /v1/exchange-rates"`. */ + routes: Record; +} + +const EXCHANGE_RATES: MockResponse = { + status: 200, + body: { + base: 'USD', + // Frozen rates → currency conversions render identically every run. + rates: { USD: 1, EUR: 0.92, GBP: 0.79, NGN: 1550, JPY: 148.5 }, + asOf: '2024-01-15T12:00:00.000Z', + }, +}; + +const GAS_PRICE_OK: MockResponse = { + status: 200, + body: { chainId: 1, gwei: 21, asOf: '2024-01-15T12:00:00.000Z' }, +}; + +/** Baseline: everything healthy and fast. The default for most tests. */ +const happyPath: MockNetworkScenario = { + name: 'happy-path', + description: 'All upstream services return successful, frozen responses.', + routes: { + 'GET /v1/exchange-rates': EXCHANGE_RATES, + 'GET /v1/gas-price': GAS_PRICE_OK, + 'POST /v1/charges': { status: 201, body: { id: 'chg_seed_1', status: 'succeeded' } }, + }, +}; + +/** Charge endpoint fails deterministically — drives failed-billing UI assertions. */ +const chargeFailure: MockNetworkScenario = { + name: 'charge-failure', + description: 'Charge endpoint returns a deterministic 402 to test failure UI.', + routes: { + 'GET /v1/exchange-rates': EXCHANGE_RATES, + 'GET /v1/gas-price': GAS_PRICE_OK, + 'POST /v1/charges': { + status: 402, + body: { id: 'chg_seed_2', status: 'failed', error: 'insufficient_funds' }, + }, + }, +}; + +/** Slow-but-successful responses — exercises spinners without real latency jitter. */ +const degradedNetwork: MockNetworkScenario = { + name: 'degraded-network', + description: 'Successful responses with a fixed delay to test loading states.', + routes: { + 'GET /v1/exchange-rates': { ...EXCHANGE_RATES, delayMs: 800 }, + 'GET /v1/gas-price': { ...GAS_PRICE_OK, delayMs: 800 }, + 'POST /v1/charges': { + status: 201, + body: { id: 'chg_seed_3', status: 'succeeded' }, + delayMs: 800, + }, + }, +}; + +export const mockScenarios = { + 'happy-path': happyPath, + 'charge-failure': chargeFailure, + 'degraded-network': degradedNetwork, +} as const; + +export type MockNetworkScenarioName = keyof typeof mockScenarios; + +export const defaultMockScenario: MockNetworkScenarioName = 'happy-path'; + +export const getScenario = (name: MockNetworkScenarioName): MockNetworkScenario => + mockScenarios[name]; diff --git a/e2e/helpers/subscriptionFlows.ts b/e2e/helpers/subscriptionFlows.ts index ad2720de..cdad5b14 100644 --- a/e2e/helpers/subscriptionFlows.ts +++ b/e2e/helpers/subscriptionFlows.ts @@ -1,4 +1,6 @@ -import { by, device, element, expect, waitFor } from 'detox'; +import { by, element, expect, waitFor } from 'detox'; +import { E2ELaunchConfig, launchApp } from './launchArgs'; +import { SeededSubscription } from './testData'; const BILLING_LABELS: Record<'monthly' | 'yearly' | 'weekly', string> = { monthly: 'Monthly', @@ -6,8 +8,13 @@ const BILLING_LABELS: Record<'monthly' | 'yearly' | 'weekly', string> = { weekly: 'Weekly', }; -export const launchCleanApp = async () => { - await device.launchApp({ newInstance: true, delete: true }); +/** + * Launch a fully isolated, empty app. Every test calls this in `beforeEach` so + * no state leaks between cases — storage is wiped, the clock/locale are pinned, + * animations are off and the network is mocked. + */ +export const launchCleanApp = async (config: E2ELaunchConfig = {}) => { + await launchApp(config); await waitFor(element(by.id('app-root'))) .toExist() .withTimeout(30000); @@ -16,6 +23,14 @@ export const launchCleanApp = async () => { .withTimeout(30000); }; +/** + * Launch with hermetic seed data already loaded. Faster and more deterministic + * than driving the UI to create fixtures, and keeps each test self-contained. + */ +export const launchSeededApp = async (seed: SeededSubscription[], config: E2ELaunchConfig = {}) => { + await launchCleanApp({ ...config, seed }); +}; + export const createSubscription = async ( name: string, price: string, diff --git a/e2e/helpers/testData.ts b/e2e/helpers/testData.ts new file mode 100644 index 00000000..f4c723f2 --- /dev/null +++ b/e2e/helpers/testData.ts @@ -0,0 +1,66 @@ +/** + * Hermetic test data. + * + * Every field is fixed — IDs, prices, dates — so seeding the same fixture twice + * produces an identical app state. Dates are expressed as absolute ISO strings + * relative to {@link FIXED_NOW_MS} (2024-01-15T12:00:00Z) rather than `Date.now()` + * so they never drift between runs. + */ + +/** Minimal, serializable subscription shape understood by the app's E2E seeder. */ +export interface SeededSubscription { + id: string; + name: string; + price: number; + currency: string; + billingCycle: 'monthly' | 'yearly' | 'weekly'; + category: string; + nextBillingDate: string; // ISO 8601 + isActive: boolean; +} + +/** A single, stable subscription used as the canonical "one item" fixture. */ +export const NETFLIX_FIXTURE: SeededSubscription = { + id: 'seed-netflix', + name: 'Netflix', + price: 15.49, + currency: 'USD', + billingCycle: 'monthly', + category: 'streaming', + nextBillingDate: '2024-02-01T00:00:00.000Z', + isActive: true, +}; + +/** A small, deterministic portfolio for list / analytics screens. */ +export const PORTFOLIO_FIXTURE: SeededSubscription[] = [ + NETFLIX_FIXTURE, + { + id: 'seed-spotify', + name: 'Spotify', + price: 9.99, + currency: 'USD', + billingCycle: 'monthly', + category: 'streaming', + nextBillingDate: '2024-01-20T00:00:00.000Z', + isActive: true, + }, + { + id: 'seed-github', + name: 'GitHub Pro', + price: 48.0, + currency: 'USD', + billingCycle: 'yearly', + category: 'software', + nextBillingDate: '2024-06-01T00:00:00.000Z', + isActive: true, + }, +]; + +/** Named fixtures so tests reference data by intent, not by literal arrays. */ +export const fixtures = { + empty: [] as SeededSubscription[], + single: [NETFLIX_FIXTURE], + portfolio: PORTFOLIO_FIXTURE, +} as const; + +export type FixtureName = keyof typeof fixtures; diff --git a/e2e/helpers/waits.ts b/e2e/helpers/waits.ts new file mode 100644 index 00000000..0a6ef961 --- /dev/null +++ b/e2e/helpers/waits.ts @@ -0,0 +1,59 @@ +import { element, expect, waitFor } from 'detox'; + +/** + * Explicit, expectation-based wait helpers. + * + * RULE: E2E tests must never call `device.sleep(...)` or any fixed timer to + * "give the UI a moment". Fixed sleeps are simultaneously too long (slow CI) and + * too short (flaky on cold machines). Instead we poll an explicit condition until + * it holds or a generous timeout elapses. Detox's synchronization already idles + * on the bridge/animations, so these waits resolve as soon as the app is settled. + */ + +/** Generous default ceiling — reached only on genuine hangs, not normal latency. */ +export const DEFAULT_TIMEOUT = 15000; + +type Matcher = Detox.NativeMatcher; + +const el = (matcher: Matcher) => element(matcher); + +/** Wait until an element is visible (rendered and on-screen). */ +export const waitForVisible = async ( + matcher: Matcher, + timeout = DEFAULT_TIMEOUT +): Promise => { + await waitFor(el(matcher)).toBeVisible().withTimeout(timeout); +}; + +/** Wait until an element exists in the hierarchy (may be off-screen). */ +export const waitForExists = async (matcher: Matcher, timeout = DEFAULT_TIMEOUT): Promise => { + await waitFor(el(matcher)).toExist().withTimeout(timeout); +}; + +/** Wait until an element is gone from the hierarchy (e.g. after navigation). */ +export const waitForGone = async (matcher: Matcher, timeout = DEFAULT_TIMEOUT): Promise => { + await waitFor(el(matcher)).not.toExist().withTimeout(timeout); +}; + +/** Wait until an element carries the expected text — avoids reading stale labels. */ +export const waitForText = async ( + matcher: Matcher, + text: string, + timeout = DEFAULT_TIMEOUT +): Promise => { + await waitFor(el(matcher)).toHaveText(text).withTimeout(timeout); +}; + +/** + * Wait for an element then tap it. Tapping without first waiting is a classic + * race: the node may not yet be hittable. This pairs the wait + action atomically. + */ +export const tapWhenReady = async (matcher: Matcher, timeout = DEFAULT_TIMEOUT): Promise => { + await waitForVisible(matcher, timeout); + await el(matcher).tap(); +}; + +/** Assert visible immediately (no polling) — for post-condition checks. */ +export const expectVisible = async (matcher: Matcher): Promise => { + await expect(el(matcher)).toBeVisible(); +}; diff --git a/e2e/jest.config.js b/e2e/jest.config.js index f860a221..87f97389 100644 --- a/e2e/jest.config.js +++ b/e2e/jest.config.js @@ -6,7 +6,7 @@ module.exports = { maxWorkers: process.env.E2E_MAX_WORKERS ? Number(process.env.E2E_MAX_WORKERS) : 2, globalSetup: 'detox/runners/jest/globalSetup', globalTeardown: 'detox/runners/jest/globalTeardown', - reporters: ['detox/runners/jest/reporter'], + reporters: ['detox/runners/jest/reporter', '/e2e/helpers/flakyReporter.js'], testEnvironment: 'detox/runners/jest/testEnvironment', setupFilesAfterEnv: ['/e2e/setup.ts'], verbose: true, diff --git a/e2e/payment.test.ts b/e2e/payment.test.ts index 25367a2d..7aee457d 100644 --- a/e2e/payment.test.ts +++ b/e2e/payment.test.ts @@ -1,17 +1,17 @@ -import { by, element, expect, waitFor } from 'detox'; +import { by } from 'detox'; import { createSubscription, - launchCleanApp, + launchSeededApp, openSubscriptionByName, } from './helpers/subscriptionFlows'; +import { expectVisible, tapWhenReady } from './helpers/waits'; +import { fixtures } from './helpers/testData'; describe('Subscription Charging Flow E2E', () => { - beforeAll(async () => { - await launchCleanApp(); - }); - beforeEach(async () => { - await launchCleanApp(); + // Deterministic charge responses: success then a controlled failure, served + // by the mock network layer rather than a live billing backend. + await launchSeededApp(fixtures.empty, { scenario: 'charge-failure' }); }); it('simulates successful and failed billing events', async () => { @@ -19,16 +19,11 @@ describe('Subscription Charging Flow E2E', () => { await createSubscription(subName, '11.99'); await openSubscriptionByName(subName); - await expect(element(by.id('simulate-charge-success-button'))).toBeVisible(); - await element(by.id('simulate-charge-success-button')).tap(); - - await waitFor(element(by.id('simulate-charge-failed-button'))) - .toBeVisible() - .withTimeout(5000); - await element(by.id('simulate-charge-failed-button')).tap(); + await tapWhenReady(by.id('simulate-charge-success-button')); + await tapWhenReady(by.id('simulate-charge-failed-button')); // Validate action controls still available after charging operations. - await expect(element(by.id('cancel-subscription-button'))).toBeVisible(); - await expect(element(by.id('pause-resume-subscription-button'))).toBeVisible(); + await expectVisible(by.id('cancel-subscription-button')); + await expectVisible(by.id('pause-resume-subscription-button')); }); }); diff --git a/e2e/setup.ts b/e2e/setup.ts index ee310b25..08333b2d 100644 --- a/e2e/setup.ts +++ b/e2e/setup.ts @@ -1 +1,17 @@ jest.setTimeout(180000); + +/** + * Flaky-test mitigation: automatically re-run a failed E2E test before declaring + * a failure. A test that only passes on retry is recorded as "flaky" by + * `flakyReporter.js` so flakiness is surfaced and tracked rather than silently + * masked. Retry count is configurable via E2E_RETRIES (default 2). + * + * Note: retries are a safety net, not a substitute for determinism — the helpers + * in this suite (hermetic seeding, explicit waits, mocked network) are what keep + * the retry count at zero in practice. + */ +const retries = process.env.E2E_RETRIES ? Number(process.env.E2E_RETRIES) : 2; + +if (typeof jest.retryTimes === 'function') { + jest.retryTimes(retries, { logErrorsBeforeRetry: true }); +} diff --git a/infra/README.md b/infra/README.md new file mode 100644 index 00000000..96874040 --- /dev/null +++ b/infra/README.md @@ -0,0 +1,29 @@ +# Observability Infrastructure + +Local OpenTelemetry stack for SubTrackr distributed tracing. + +## Components + +- `otel-collector-config.yaml` — OTLP receiver → PII redaction → tail sampling → + Tempo exporter. +- `tempo.yaml` — Grafana Tempo trace storage. +- `docker-compose.observability.yml` — collector + Tempo + Grafana. + +## Usage + +```bash +docker compose -f docker-compose.observability.yml up +``` + +Point every service at the collector: + +```bash +export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 +``` + +- OTLP HTTP: `:4318`, gRPC: `:4317` +- Collector health: `:13133` +- Grafana (flame graphs): `http://localhost:3000` → Explore → Tempo + +See [../docs/distributed-tracing.md](../docs/distributed-tracing.md) for the full +propagation contract and per-language usage. diff --git a/infra/docker-compose.observability.yml b/infra/docker-compose.observability.yml new file mode 100644 index 00000000..82cca71d --- /dev/null +++ b/infra/docker-compose.observability.yml @@ -0,0 +1,39 @@ +# Local observability stack for SubTrackr distributed tracing. +# +# docker compose -f infra/docker-compose.observability.yml up +# +# Then point every service at the collector: +# OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 +# and open Grafana at http://localhost:3000 (Explore → Tempo) for flame graphs. + +services: + otel-collector: + image: otel/opentelemetry-collector-contrib:latest + command: ['--config=/etc/otel-collector-config.yaml'] + volumes: + - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro + ports: + - '4318:4318' # OTLP HTTP + - '4317:4317' # OTLP gRPC + - '13133:13133' # health check + depends_on: + - tempo + + tempo: + image: grafana/tempo:latest + command: ['-config.file=/etc/tempo.yaml'] + volumes: + - ./tempo.yaml:/etc/tempo.yaml:ro + ports: + - '3200:3200' # Tempo query + + grafana: + image: grafana/grafana:latest + environment: + - GF_AUTH_ANONYMOUS_ENABLED=true + - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin + - GF_FEATURE_TOGGLES_ENABLE=traceqlEditor + ports: + - '3000:3000' + depends_on: + - tempo diff --git a/infra/otel-collector-config.yaml b/infra/otel-collector-config.yaml new file mode 100644 index 00000000..57114b91 --- /dev/null +++ b/infra/otel-collector-config.yaml @@ -0,0 +1,68 @@ +# OpenTelemetry Collector configuration for SubTrackr distributed tracing. +# +# Receives OTLP spans from every service (mobile app, backend API, ML service, +# webhook producer), batches them, and exports to a trace backend (Tempo) that +# Grafana renders as flame graphs. Sampling is done at the source (head sampling +# in each service); the collector adds tail-based sampling so we always keep +# error and slow traces regardless of the head decision. + +receivers: + otlp: + protocols: + http: + endpoint: 0.0.0.0:4318 + grpc: + endpoint: 0.0.0.0:4317 + +processors: + batch: + timeout: 5s + send_batch_size: 512 + + # Drop/redact attributes that may carry PII before storage. + attributes/redact: + actions: + - key: http.request.header.authorization + action: delete + - key: user.email + action: delete + - key: wallet.address + action: delete + + # Tail sampling: keep all errored or slow (>1s) traces, plus 10% of the rest. + tail_sampling: + decision_wait: 10s + policies: + - name: errors + type: status_code + status_code: + status_codes: [ERROR] + - name: slow + type: latency + latency: + threshold_ms: 1000 + - name: baseline + type: probabilistic + probabilistic: + sampling_percentage: 10 + +exporters: + otlp/tempo: + endpoint: tempo:4317 + tls: + insecure: true + # Useful for local debugging — prints spans to the collector log. + debug: + verbosity: normal + +extensions: + health_check: + endpoint: 0.0.0.0:13133 + +service: + extensions: [health_check] + pipelines: + traces: + receivers: [otlp] + processors: [attributes/redact, tail_sampling, batch] + exporters: [otlp/tempo, debug] diff --git a/infra/tempo.yaml b/infra/tempo.yaml new file mode 100644 index 00000000..79208682 --- /dev/null +++ b/infra/tempo.yaml @@ -0,0 +1,18 @@ +# Minimal Grafana Tempo config for local trace storage. +server: + http_listen_port: 3200 + +distributor: + receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + +storage: + trace: + backend: local + local: + path: /tmp/tempo/blocks + wal: + path: /tmp/tempo/wal diff --git a/src/services/network/apiClient.ts b/src/services/network/apiClient.ts new file mode 100644 index 00000000..dae62cb0 --- /dev/null +++ b/src/services/network/apiClient.ts @@ -0,0 +1,107 @@ +/** + * Traced HTTP client for the mobile app. + * + * Every request opens a client span and injects a W3C `traceparent` header so the + * backend can continue the same trace — giving an end-to-end view from a user tap + * through API → ML → webhook. The client is a thin wrapper over `fetch` (so the + * E2E mock-network interceptor still applies) and adds timing, status and error + * attributes to the span. Sensitive headers are never recorded. + */ + +import { formatTraceparent, mobileTracer, MobileTracer } from './trace'; + +export interface ApiClientOptions { + baseUrl?: string; + tracer?: MobileTracer; + fetchImpl?: typeof fetch; + /** Default headers merged into every request (e.g. content-type). */ + defaultHeaders?: Record; +} + +export interface ApiRequestOptions { + method?: string; + headers?: Record; + body?: unknown; + /** Logical operation name for the span; defaults to "METHOD path". */ + spanName?: string; +} + +export interface ApiResponse { + status: number; + ok: boolean; + data: T; + traceId: string; +} + +export class ApiClient { + private readonly baseUrl: string; + private readonly tracer: MobileTracer; + private readonly fetchImpl: typeof fetch; + private readonly defaultHeaders: Record; + + constructor(options: ApiClientOptions = {}) { + this.baseUrl = (options.baseUrl ?? process.env.EXPO_PUBLIC_API_BASE_URL ?? '').replace( + /\/$/, + '' + ); + this.tracer = options.tracer ?? mobileTracer; + this.fetchImpl = options.fetchImpl ?? fetch; + this.defaultHeaders = { 'Content-Type': 'application/json', ...options.defaultHeaders }; + } + + async request(path: string, options: ApiRequestOptions = {}): Promise> { + const method = (options.method ?? 'GET').toUpperCase(); + const url = path.startsWith('http') ? path : `${this.baseUrl}${path}`; + const span = this.tracer.startClientSpan(options.spanName ?? `${method} ${path}`, { + 'http.method': method, + 'http.url': path, // path only — avoids leaking query-string PII + }); + + // Propagate trace context downstream. + const headers: Record = { + ...this.defaultHeaders, + ...options.headers, + traceparent: formatTraceparent(span.context), + }; + + try { + const response = await this.fetchImpl(url, { + method, + headers, + body: options.body === undefined ? undefined : JSON.stringify(options.body), + }); + + const text = await response.text(); + const data = (text ? JSON.parse(text) : null) as T; + + this.tracer.endSpan(span, response.ok ? 'ok' : 'error', { + 'http.status_code': response.status, + }); + + return { status: response.status, ok: response.ok, data, traceId: span.context.traceId }; + } catch (error) { + this.tracer.endSpan(span, 'error', { + 'error.message': error instanceof Error ? error.message : String(error), + }); + throw error; + } + } + + get( + path: string, + options: Omit = {} + ): Promise> { + return this.request(path, { ...options, method: 'GET' }); + } + + post( + path: string, + body?: unknown, + options: Omit = {} + ): Promise> { + return this.request(path, { ...options, method: 'POST', body }); + } +} + +/** Shared client instance for app code. */ +export const apiClient = new ApiClient(); diff --git a/src/services/network/trace.ts b/src/services/network/trace.ts new file mode 100644 index 00000000..dfdd9f95 --- /dev/null +++ b/src/services/network/trace.ts @@ -0,0 +1,92 @@ +/** + * Lightweight mobile tracing primitives. + * + * The mobile app is a leaf in the distributed trace: it *originates* traces and + * propagates W3C `traceparent` to the backend so a tap-to-response flow can be + * stitched together end-to-end. We keep this tiny and dependency-free (no OTel + * SDK on device) — just enough to generate spec-compliant ids, build the header, + * and buffer client spans for export. + * + * @see https://www.w3.org/TR/trace-context/ + */ + +export interface MobileSpanContext { + traceId: string; // 32 hex + spanId: string; // 16 hex + sampled: boolean; +} + +const hex = (length: number): string => { + const bytes = new Uint8Array(length / 2); + const cryptoObj = (globalThis as unknown as { crypto?: Crypto }).crypto; + if (cryptoObj?.getRandomValues) { + cryptoObj.getRandomValues(bytes); + } else { + // Non-crypto fallback for environments without getRandomValues (tests). + for (let i = 0; i < bytes.length; i += 1) bytes[i] = Math.floor(Math.random() * 256); + } + return Array.from(bytes, (b) => b.toString(16).padStart(2, '0')).join(''); +}; + +export const generateTraceId = (): string => hex(32); +export const generateSpanId = (): string => hex(16); + +export const formatTraceparent = (ctx: MobileSpanContext): string => + `00-${ctx.traceId}-${ctx.spanId}-${ctx.sampled ? '01' : '00'}`; + +export interface MobileSpan { + context: MobileSpanContext; + name: string; + startTime: number; + endTime?: number; + attributes: Record; + status: 'unset' | 'ok' | 'error'; +} + +type SpanSink = (span: MobileSpan) => void; + +/** + * Minimal client tracer. `sampleRatio` controls head sampling; sampled spans are + * handed to an optional sink (wire to an OTLP exporter or the dev console). + */ +export class MobileTracer { + private sink: SpanSink | undefined; + + constructor(private readonly sampleRatio: number = 0.1) {} + + setSink(sink: SpanSink): void { + this.sink = sink; + } + + startClientSpan( + name: string, + attributes: Record = {} + ): MobileSpan { + const traceId = generateTraceId(); + const bucket = parseInt(traceId.slice(0, 8), 16) / 0xffffffff; + return { + context: { traceId, spanId: generateSpanId(), sampled: bucket < this.sampleRatio }, + name, + startTime: Date.now(), + attributes, + status: 'unset', + }; + } + + endSpan( + span: MobileSpan, + status: 'ok' | 'error', + attributes: Record = {} + ): void { + span.endTime = Date.now(); + span.status = status; + Object.assign(span.attributes, attributes); + if (span.context.sampled || status === 'error') { + this.sink?.(span); + } + } +} + +export const mobileTracer = new MobileTracer( + Number(process.env.EXPO_PUBLIC_OTEL_SAMPLE_RATIO ?? '0.1') || 0.1 +); diff --git a/src/utils/e2e/__tests__/launchArgs.test.ts b/src/utils/e2e/__tests__/launchArgs.test.ts new file mode 100644 index 00000000..04234088 --- /dev/null +++ b/src/utils/e2e/__tests__/launchArgs.test.ts @@ -0,0 +1,50 @@ +import { getLaunchArgs, isE2E, __resetLaunchArgsCache } from '../launchArgs'; +import { MOCK_SCENARIOS, DEFAULT_SCENARIO } from '../mockScenarios'; + +describe('e2e launchArgs', () => { + const originalE2E = process.env.E2E; + + afterEach(() => { + if (originalE2E === undefined) { + delete process.env.E2E; + } else { + process.env.E2E = originalE2E; + } + __resetLaunchArgsCache(); + }); + + it('is a no-op outside E2E (no native module, no env flag)', () => { + delete process.env.E2E; + __resetLaunchArgsCache(); + expect(isE2E()).toBe(false); + expect(getLaunchArgs()).toEqual({}); + }); + + it('activates when the E2E env flag is set', () => { + process.env.E2E = 'true'; + __resetLaunchArgsCache(); + expect(isE2E()).toBe(true); + }); + + it('memoizes the resolved args', () => { + process.env.E2E = 'true'; + __resetLaunchArgsCache(); + const first = getLaunchArgs(); + const second = getLaunchArgs(); + expect(second).toBe(first); + }); +}); + +describe('e2e mock scenarios', () => { + it('exposes a valid default scenario', () => { + expect(MOCK_SCENARIOS[DEFAULT_SCENARIO]).toBeDefined(); + }); + + it('keys every route as " "', () => { + for (const scenario of Object.values(MOCK_SCENARIOS)) { + for (const key of Object.keys(scenario.routes)) { + expect(key).toMatch(/^(GET|POST|PUT|PATCH|DELETE) \/.+/); + } + } + }); +}); diff --git a/src/utils/e2e/e2eBootstrap.ts b/src/utils/e2e/e2eBootstrap.ts new file mode 100644 index 00000000..2e012667 --- /dev/null +++ b/src/utils/e2e/e2eBootstrap.ts @@ -0,0 +1,122 @@ +import AsyncStorage from '@react-native-async-storage/async-storage'; +import { getLaunchArgs, isE2E } from './launchArgs'; +import { DEFAULT_SCENARIO, MOCK_SCENARIOS, MockResponse } from './mockScenarios'; + +/** + * Hermetic E2E bootstrap. Runs once at app startup *before* the first screen + * renders and is a strict no-op outside E2E. It pins the sources of + * non-determinism that make Detox tests flaky: + * + * 1. Storage — seeds the subscription store from `e2eSeed` so each test + * starts with identical, known data. + * 2. Network — replaces `global.fetch` with a deterministic interceptor that + * answers from a named mock scenario; the app never hits the wire. + * 3. Clock — exposes a fixed "now" on `globalThis.__E2E__` for app code that + * wants reproducible time without monkeypatching Date globally. + */ + +const SUBSCRIPTION_STORAGE_KEY = 'subtrackr-subscriptions'; +const SUBSCRIPTION_STORE_VERSION = 1; + +export interface E2ERuntimeConfig { + now: number; + locale: string; + timezone: string; + scenario: string; + mockNetwork: boolean; + disableAnimations: boolean; +} + +declare global { + // eslint-disable-next-line no-var + var __E2E__: E2ERuntimeConfig | undefined; +} + +const buildConfig = (): E2ERuntimeConfig => { + const args = getLaunchArgs(); + return { + now: args.e2eNow ? Number(args.e2eNow) : Date.now(), + locale: args.e2eLocale ?? 'en-US', + timezone: args.e2eTimezone ?? 'UTC', + scenario: args.e2eScenario ?? DEFAULT_SCENARIO, + mockNetwork: args.e2eMockNetwork === 'true', + disableAnimations: args.e2eDisableAnimations !== 'false', + }; +}; + +const seedSubscriptions = async (rawSeed: string): Promise => { + const seed = JSON.parse(rawSeed) as unknown[]; + // Match the zustand persist envelope so a rehydrate() picks the seed up. + const envelope = JSON.stringify({ + state: { subscriptions: seed }, + version: SUBSCRIPTION_STORE_VERSION, + }); + await AsyncStorage.setItem(SUBSCRIPTION_STORAGE_KEY, envelope); + + try { + // eslint-disable-next-line @typescript-eslint/no-var-requires + const { useSubscriptionStore } = require('../../store/subscriptionStore'); + if (useSubscriptionStore?.persist?.rehydrate) { + await useSubscriptionStore.persist.rehydrate(); + } + } catch { + // Store not available in this context — seeded storage will hydrate normally. + } +}; + +const matchRoute = (method: string, url: string): MockResponse | undefined => { + const scenario = MOCK_SCENARIOS[globalThis.__E2E__?.scenario ?? DEFAULT_SCENARIO]; + if (!scenario) return undefined; + let pathname = url; + try { + pathname = new URL(url).pathname; + } catch { + // Relative URL — keep as-is. + } + return scenario.routes[`${method.toUpperCase()} ${pathname}`]; +}; + +const installFetchInterceptor = (): void => { + const realFetch = globalThis.fetch?.bind(globalThis); + const wait = (ms?: number) => (ms ? new Promise((r) => setTimeout(r, ms)) : Promise.resolve()); + + globalThis.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => { + const url = typeof input === 'string' ? input : input.toString(); + const method = (init?.method ?? 'GET').toUpperCase(); + const mock = matchRoute(method, url); + + if (mock) { + await wait(mock.delayMs); + return new Response(JSON.stringify(mock.body), { + status: mock.status, + headers: { 'Content-Type': 'application/json' }, + }); + } + + // Unmapped request in a mocked run: fail loudly and deterministically rather + // than silently leaking to the real network (the prime source of flakiness). + if (realFetch && !globalThis.__E2E__?.mockNetwork) { + return realFetch(input as RequestInfo, init); + } + return new Response(JSON.stringify({ error: 'unmocked_request', method, url }), { + status: 501, + headers: { 'Content-Type': 'application/json' }, + }); + }) as typeof fetch; +}; + +export const applyE2EBootstrap = async (): Promise => { + if (!isE2E()) return; + + const config = buildConfig(); + globalThis.__E2E__ = config; + + if (config.mockNetwork) { + installFetchInterceptor(); + } + + const args = getLaunchArgs(); + if (args.e2eSeed) { + await seedSubscriptions(args.e2eSeed); + } +}; diff --git a/src/utils/e2e/launchArgs.ts b/src/utils/e2e/launchArgs.ts new file mode 100644 index 00000000..431e889e --- /dev/null +++ b/src/utils/e2e/launchArgs.ts @@ -0,0 +1,53 @@ +/** + * App-side reader for Detox launch arguments. + * + * The E2E suite (see `e2e/helpers/launchArgs.ts`) passes a deterministic config + * through `device.launchApp({ launchArgs })`. On a real device those arrive via + * the optional `react-native-launch-arguments` native module. Everything here is + * defensive and a strict no-op in production: if the module is missing or no E2E + * flag is set, `isE2E()` returns false and the rest of the app behaves normally. + */ + +export interface E2ELaunchArgs { + e2e?: string; + e2eSeed?: string; + e2eScenario?: string; + e2eNow?: string; + e2eLocale?: string; + e2eTimezone?: string; + e2eDisableAnimations?: string; + e2eMockNetwork?: string; +} + +let cached: E2ELaunchArgs | null = null; + +export const getLaunchArgs = (): E2ELaunchArgs => { + if (cached) return cached; + + let args: E2ELaunchArgs = {}; + try { + // Optional native module — absent in production builds, web and unit tests. + // eslint-disable-next-line @typescript-eslint/no-var-requires + const mod = require('react-native-launch-arguments'); + const LaunchArguments = mod.LaunchArguments ?? mod.default ?? mod; + if (LaunchArguments && typeof LaunchArguments.value === 'function') { + args = (LaunchArguments.value() as E2ELaunchArgs) ?? {}; + } + } catch { + // Module not installed / not a native context — fall through to env. + } + + if (!args.e2e && process.env.E2E === 'true') { + args = { ...args, e2e: 'true' }; + } + + cached = args; + return cached; +}; + +export const isE2E = (): boolean => getLaunchArgs().e2e === 'true'; + +/** Test-only: reset the memoized args (used by unit tests). */ +export const __resetLaunchArgsCache = (): void => { + cached = null; +}; diff --git a/src/utils/e2e/mockScenarios.ts b/src/utils/e2e/mockScenarios.ts new file mode 100644 index 00000000..8f5ba2f0 --- /dev/null +++ b/src/utils/e2e/mockScenarios.ts @@ -0,0 +1,68 @@ +/** + * App-side mirror of the E2E mock-network scenarios defined in + * `e2e/helpers/mockServer.ts`. Kept in sync intentionally: the test side selects + * a scenario *by name*, and this table is what the in-app `fetch` interceptor + * uses to answer requests deterministically. If you add a route in one file, + * add it in the other. + */ + +export interface MockResponse { + status: number; + body: unknown; + delayMs?: number; +} + +export interface MockNetworkScenario { + name: string; + routes: Record; +} + +const EXCHANGE_RATES: MockResponse = { + status: 200, + body: { + base: 'USD', + rates: { USD: 1, EUR: 0.92, GBP: 0.79, NGN: 1550, JPY: 148.5 }, + asOf: '2024-01-15T12:00:00.000Z', + }, +}; + +const GAS_PRICE_OK: MockResponse = { + status: 200, + body: { chainId: 1, gwei: 21, asOf: '2024-01-15T12:00:00.000Z' }, +}; + +export const MOCK_SCENARIOS: Record = { + 'happy-path': { + name: 'happy-path', + routes: { + 'GET /v1/exchange-rates': EXCHANGE_RATES, + 'GET /v1/gas-price': GAS_PRICE_OK, + 'POST /v1/charges': { status: 201, body: { id: 'chg_seed_1', status: 'succeeded' } }, + }, + }, + 'charge-failure': { + name: 'charge-failure', + routes: { + 'GET /v1/exchange-rates': EXCHANGE_RATES, + 'GET /v1/gas-price': GAS_PRICE_OK, + 'POST /v1/charges': { + status: 402, + body: { id: 'chg_seed_2', status: 'failed', error: 'insufficient_funds' }, + }, + }, + }, + 'degraded-network': { + name: 'degraded-network', + routes: { + 'GET /v1/exchange-rates': { ...EXCHANGE_RATES, delayMs: 800 }, + 'GET /v1/gas-price': { ...GAS_PRICE_OK, delayMs: 800 }, + 'POST /v1/charges': { + status: 201, + body: { id: 'chg_seed_3', status: 'succeeded' }, + delayMs: 800, + }, + }, + }, +}; + +export const DEFAULT_SCENARIO = 'happy-path';