diff --git a/.devcontainer/00-blind-by-design_03-expert/devcontainer.json b/.devcontainer/00-blind-by-design_03-expert/devcontainer.json new file mode 100644 index 00000000..e01594be --- /dev/null +++ b/.devcontainer/00-blind-by-design_03-expert/devcontainer.json @@ -0,0 +1,37 @@ +{ + "name": "๐Ÿงช Adventure 00 | ๐Ÿ”ด Expert (Read the chart)", + "dockerComposeFile": "docker-compose.yml", + "service": "workspace", + "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}/adventures/planned/00-blind-by-design/expert", + "postCreateCommand": "bash /workspaces/${localWorkspaceFolderBasename}/.devcontainer/00-blind-by-design_03-expert/post-create.sh", + "postStartCommand": "bash /workspaces/${localWorkspaceFolderBasename}/.devcontainer/00-blind-by-design_03-expert/post-start.sh", + "customizations": { + "vscode": { + "extensions": [ + "vscjava.vscode-java-pack", + "redhat.vscode-yaml", + "ms-azuretools.vscode-docker" + ] + }, + "codespaces": { + "openFiles": [ + "adventures/planned/00-blind-by-design/docs/expert.md", + "adventures/planned/00-blind-by-design/expert/otel.properties", + "adventures/planned/00-blind-by-design/expert/src/main/java/dev/openfeature/demo/java/demo/OpenFeatureConfig.java", + "adventures/planned/00-blind-by-design/expert/flags.json" + ] + } + }, + "forwardPorts": [8080, 3000, 4317, 4318, 9090, 3200], + "portsAttributes": { + "8080": { "label": "Spring Boot lab", "onAutoForward": "notify" }, + "3000": { "label": "Grafana", "onAutoForward": "notify" }, + "4317": { "label": "OTLP gRPC", "onAutoForward": "ignore" }, + "4318": { "label": "OTLP HTTP", "onAutoForward": "ignore" }, + "9090": { "label": "Prometheus", "onAutoForward": "ignore" }, + "3200": { "label": "Tempo HTTP API", "onAutoForward": "ignore" } + }, + "otherPortsAttributes": { + "onAutoForward": "ignore" + } +} diff --git a/.devcontainer/00-blind-by-design_03-expert/docker-compose.yml b/.devcontainer/00-blind-by-design_03-expert/docker-compose.yml new file mode 100644 index 00000000..bbe9a14e --- /dev/null +++ b/.devcontainer/00-blind-by-design_03-expert/docker-compose.yml @@ -0,0 +1,75 @@ +# Multi-container devcontainer for Expert. The lab itself runs in +# `workspace`; flagd, the Grafana LGTM stack, and the k6 loadgen run as +# sibling services. No Docker-in-Docker โ€” the devcontainer attaches to +# `workspace` and the rest of the stack is already up. +# +# Inside `workspace`, services are reachable by service name +# (flagd:8013, lgtm:4317, etc.). FLAGD_HOST and OTEL_EXPORTER_OTLP_ENDPOINT +# are pre-set so the participant does not have to hard-code hostnames. +# Codespaces also forwards each port to localhost on the host so verify.sh +# and curl can keep using localhost:NNNN unchanged. + +services: + workspace: + image: mcr.microsoft.com/devcontainers/java:1-21 + volumes: + - ../..:/workspaces/${localWorkspaceFolderBasename:-open-ecosystem-challenges}:cached + command: sleep infinity + environment: + - FLAGD_HOST=flagd + - FLAGD_PORT=8013 + # OpenTelemetry Java Agent. post-create.sh downloads the jar; the + # spring-boot-maven-plugin reads OTEL_JAVAAGENT_JAR for its + # so only the forked lab JVM is agent-attached (not Maven itself). + # Agent config lives in expert/otel.properties โ€” that's the file + # players edit to toggle the metrics exporter. + - OTEL_JAVAAGENT_JAR=/workspaces/${localWorkspaceFolderBasename:-open-ecosystem-challenges}/tools/opentelemetry-javaagent.jar + - OTEL_JAVAAGENT_CONFIGURATION_FILE=/workspaces/${localWorkspaceFolderBasename:-open-ecosystem-challenges}/adventures/planned/00-blind-by-design/expert/otel.properties + # Trial country of registration. Read by OpenFeatureConfig via + # System.getenv("COUNTRY") and put on the global eval context. + - COUNTRY=de + + flagd: + image: ghcr.io/open-feature/flagd:v0.15.4 + volumes: + - ../..:/workspaces/${localWorkspaceFolderBasename:-open-ecosystem-challenges}:ro + command: + - start + - --uri + - file:/workspaces/${localWorkspaceFolderBasename:-open-ecosystem-challenges}/adventures/planned/00-blind-by-design/expert/flags.json + # No `ports:` block โ€” the lab and loadgen reach flagd on the + # docker-internal network as `flagd:8013`. Forwarding the flagd + # ports onto the Codespace host is not needed to play the level. + + lgtm: + image: grafana/otel-lgtm:0.26.0 + ports: + - "3000:3000" # Grafana UI (admin / admin) + - "4317:4317" # OTLP gRPC + - "4318:4318" # OTLP HTTP + - "9090:9090" # Prometheus query API (verify.sh) + - "3200:3200" # Tempo HTTP API (verify.sh) + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=admin + volumes: + - ../..:/workspaces/${localWorkspaceFolderBasename:-open-ecosystem-challenges}:ro + # Dashboard auto-provisioning: otel-lgtm scans /otel-lgtm/grafana/conf/provisioning/dashboards/ + # for provider YAMLs, then loads dashboard JSONs from whatever path + # the provider references. We point at .../custom for both. + - ../../adventures/planned/00-blind-by-design/expert/dashboards/provisioning.yaml:/otel-lgtm/grafana/conf/provisioning/dashboards/fun-with-flags.yaml:ro + - ../../adventures/planned/00-blind-by-design/expert/dashboards:/otel-lgtm/grafana/conf/provisioning/dashboards/custom:ro + + loadgen: + image: grafana/k6:1.7.1 + command: ["run", "--quiet", "/scripts/script.js"] + volumes: + - ../../adventures/planned/00-blind-by-design/expert/loadgen/k6:/scripts:ro + environment: + # The script idles while loadgen_active is "off". Flip it in flags.json + # to start hammering the lab. + - BASE_URL=http://workspace:8080 + - FLAGD_URL=http://flagd:8013 + restart: unless-stopped + depends_on: + - flagd diff --git a/.devcontainer/00-blind-by-design_03-expert/post-create.sh b/.devcontainer/00-blind-by-design_03-expert/post-create.sh new file mode 100755 index 00000000..d2269dfe --- /dev/null +++ b/.devcontainer/00-blind-by-design_03-expert/post-create.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +set -e + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" + +# shellcheck disable=SC1091 +source "$REPO_ROOT/lib/scripts/tracker.sh" +set_tracking_context "00-blind-by-design" "expert" +track_codespace_created + +# gum is used by the verify.sh / output.sh helpers +"$REPO_ROOT/lib/shared/init.sh" --version v0.17.0 # https://github.com/charmbracelet/gum/releases + +# jq is needed by verify.sh; the Java devcontainer image is debian-based. +if ! command -v jq >/dev/null 2>&1; then + sudo apt-get update -y + sudo apt-get install -y --no-install-recommends jq +fi + +CHALLENGE_DIR="$REPO_ROOT/adventures/planned/00-blind-by-design/expert" + +# Make the Maven wrapper executable so the participant can just `./mvnw ...` +if [[ -f "$CHALLENGE_DIR/mvnw" ]]; then + chmod +x "$CHALLENGE_DIR/mvnw" +fi + +# Download the OpenTelemetry Java Agent. The Spring Boot Maven Plugin +# attaches it via -javaagent (see expert/pom.xml). One jar per Codespace +# โ€” skip if already present so re-runs are cheap. +OTEL_AGENT_VERSION="v2.27.0" +OTEL_AGENT_DIR="$REPO_ROOT/tools" +OTEL_AGENT_JAR="$OTEL_AGENT_DIR/opentelemetry-javaagent.jar" +mkdir -p "$OTEL_AGENT_DIR" +if [[ ! -f "$OTEL_AGENT_JAR" ]]; then + echo "โฌ‡๏ธ Downloading OpenTelemetry Java Agent $OTEL_AGENT_VERSION..." + curl -fsSL \ + -o "$OTEL_AGENT_JAR" \ + "https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/$OTEL_AGENT_VERSION/opentelemetry-javaagent.jar" \ + || echo "โš ๏ธ Failed to fetch the OpenTelemetry Java Agent โ€” traces and metrics will not flow until the jar is present at $OTEL_AGENT_JAR" +fi + +echo "โœจ Pre-warming the Maven dependency cache so the first ./mvnw is fast..." +( cd "$CHALLENGE_DIR" && ./mvnw -q -DskipTests dependency:go-offline ) || \ + echo "โš ๏ธ Dependency pre-warm skipped (network or wrapper not ready yet)" + +echo "โœ… Phase 3 toolchain ready (gum + Java 21). flagd / lgtm / loadgen run as sibling devcontainer services." diff --git a/.devcontainer/00-blind-by-design_03-expert/post-start.sh b/.devcontainer/00-blind-by-design_03-expert/post-start.sh new file mode 100755 index 00000000..5641b129 --- /dev/null +++ b/.devcontainer/00-blind-by-design_03-expert/post-start.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +set -e + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +CHALLENGE_DIR="$REPO_ROOT/adventures/planned/00-blind-by-design/expert" + +cat </dev/null 2>&1; then + code "$REPO_ROOT/adventures/planned/00-blind-by-design/docs/expert.md" \ + "$CHALLENGE_DIR/otel.properties" \ + "$CHALLENGE_DIR/src/main/java/dev/openfeature/demo/java/demo/OpenFeatureConfig.java" \ + "$CHALLENGE_DIR/flags.json" \ + 2>/dev/null || true +fi diff --git a/.gitignore b/.gitignore index 95ad82f3..ad234894 100644 --- a/.gitignore +++ b/.gitignore @@ -19,5 +19,11 @@ venv/ .pytest_cache/ .mypy_cache/ +# Maven build artifacts +target/ + +# OpenTelemetry Java Agent (downloaded per-Codespace by post-create.sh) +tools/ + # Custom ignores/includes .prompts diff --git a/adventures/planned/00-blind-by-design/docs/expert.md b/adventures/planned/00-blind-by-design/docs/expert.md new file mode 100644 index 00000000..b0f9f53a --- /dev/null +++ b/adventures/planned/00-blind-by-design/docs/expert.md @@ -0,0 +1,215 @@ +# ๐Ÿ”ด Expert: Read the chart + +Spans are already flowing into Tempo from the OpenFeature `TracesHook`, but the metrics half is dead โ€” the `MeterProvider` has no exporter and the `MetricsHook` was never registered. The dashboard the operator wants to triage from is empty. The k6 loadgen is idle, waiting for a flag flip to turn it on. + +## ๐Ÿช The Backstory + +The trial just went wide. Phase 3 of the new vision amplifier โ€” `vision_amplifier_v2` โ€” was approved for the full cohort yesterday morning. The promise was straightforward: subjects emerge with sharper eyesight than they walked in with. By mid-afternoon the audit log was screaming. Subjects were stabilising 200ms slower, and roughly one in ten of them was emerging **blind** โ€” containment failure recorded as an HTTP 500. The lab director pulled up the **Feature Flag Metrics** dashboard expecting to triage visually. The dashboard was dark. Someone had wired up traces but never finished the metrics half. There is no chart to read. The lab is studying eyesight and the lab itself cannot see. + +Your job, in order: **turn on the lights**, find the bad arm of the trial, and **halt enrolment** on the amplifier โ€” all without redeploying the lab. That last constraint is the whole point of feature flags: when a rollout starts misbehaving in production, you need an operational lever that does not take twenty minutes to pull. Save the file, watch the dose drop, watch the 5xx rate fall back to baseline, watch the next batch of subjects walk out seeing. + +## โฐ Deadline + +> ๐Ÿšง **Coming Soon** โ€” this level is in the planned bucket. Final deadline will be announced when the adventure goes live. + +## ๐Ÿ’ฌ Join the discussion + +> ๐Ÿšง **Coming Soon** โ€” community thread will be linked here at launch. + +## ๐Ÿ—๏ธ Architecture + +Four containers and one Spring Boot process, all on a shared Docker network. + +``` +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” OTLP/gRPC :4317 โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ Spring Boot โ”‚ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–ถ โ”‚ grafana/otel-lgtm โ”‚ +โ”‚ fun-with-flags- โ”‚ flag eval + HTTP โ”‚ - Grafana :3000 โ”‚ +โ”‚ java-spring โ”‚ โ”‚ - Prometheus :9090 โ”‚ +โ”‚ :8080 โ”‚ โ”‚ - Tempo :3200 โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–ฒโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ”‚ OpenFeature SDK :8013 โ”‚ scrape / pull + โ”‚ (RPC mode) โ”‚ +โ”Œโ”€โ”€โ”€โ”€โ”€โ–ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ flagd โ”‚ โ—€โ”€โ”€โ”€โ”€ poll loadgen flag โ”€โ”€โ”‚ k6 loadgen โ”‚ +โ”‚ :8013 (gRPC + HTTP โ”‚ โ”‚ HTTP GET /?userId=โ€ฆ โ”‚ +โ”‚ eval gateway)โ”‚ โ”‚ (the lab interceptor โ”‚ +โ”‚ :8014 management / โ”‚ โ”‚ sets userId as the โ”‚ +โ”‚ metrics โ”‚ โ”‚ targetingKey, which โ”‚ +โ”‚ :8015 sync stream โ”‚ โ”‚ is what fractional โ”‚ +โ”‚ :8016 OFREP โ”‚ โ”‚ rollouts bucket on) โ”‚ +โ”‚ flags.json mounted โ”‚ โ”‚ โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +``` + +## ๐ŸŽฏ Objective + +By the end of this level, the lab hits each of these observable outcomes: + +- **Spans for `fun-with-flags-java-spring` are visible in Tempo** with `feature_flag.context.` attributes โ€” searching `feature_flag.context.dose=underdose` lights up the requests where a tech mis-dosed, with `feature_flag.variant=clouded` on the same span. +- **`feature_flag_evaluation_requests_total` is non-zero in Prometheus** โ€” flag evaluations show up as counters, not just spans. +- **The Feature Flag Metrics dashboard renders.** Variant-distribution, error rate, latency p99 โ€” all populated from the metric counters. +- **The `vision_amplifier_v2` rollout is rolled back to 100% off** โ€” without redeploying the lab. +- **HTTP 5xx rate over the last minute drops below 1%.** The bad arm is contained. + +## ๐Ÿง  What You'll Learn + +- How the OpenFeature OpenTelemetry hooks (`TracesHook` and `MetricsHook`) join + flag evaluations to the rest of an application's telemetry without a + separate ingestion path +- How to **author your own `Hook`** โ€” a tiny class that copies merged-eval-context + attributes onto the active OTel span โ€” to close the loop between *why* a + flag resolved the way it did and *what* the operator sees in Tempo +- How [`fractional`](https://flagd.dev/reference/custom-operations/fractional-operation/) + rollout in flagd buckets users by `targetingKey` โ€” same key, same bucket, every + request โ€” and how to read that bucketing off a dashboard +- How a **flag flip** is a faster operational lever than a redeploy when a + rollout is misbehaving โ€” the difference between a one-line config change and + a twenty-minute deployment + +## ๐Ÿงฐ Toolbox + +Your Codespace comes pre-configured with the following tools: + +- [`curl`](https://curl.se/): HTTP client for hitting the lab, flagd, and Prometheus +- [`./mvnw`](https://maven.apache.org/wrapper/): The Maven wrapper to build and run the Spring Boot lab +- A browser pointed at [`http://localhost:3000`](http://localhost:3000) for Grafana (admin / admin) +- [`jq`](https://jqlang.github.io/jq/): Pretty-print and filter JSON from `curl` + +flagd, the Grafana LGTM stack, and the k6 loadgen are **sibling devcontainer services** โ€” they come up automatically when the Codespace boots. There is no `docker compose up` step. Inside the workspace they are reachable as `flagd`, `lgtm`, and `loadgen`. The Grafana / Prometheus / Tempo / OTLP ports on `lgtm` are also forwarded onto the Codespace host so you can click them in the Ports tab; flagd stays on the docker-internal network only. + +## โœ… How to Play + +### 1. Start Your Challenge + +> ๐Ÿ“– **First time?** Check out the [Getting Started Guide](../../start-a-challenge) +> for detailed instructions on forking, starting a Codespace, and waiting for +> infrastructure setup. + +Quick start: + +- Fork the repo +- Create a Codespace +- Select **"Adventure 00 | ๐Ÿ”ด Expert (Read the chart)"** +- Wait ~2-3 minutes for the sibling containers (flagd, Grafana LGTM, k6 + loadgen) to come up. They are part of the devcontainer compose, so they + start automatically โ€” no `docker compose up` step. + +### 2. Start the Lab + +The sibling containers (flagd, the LGTM stack, the k6 loadgen) are already up โ€” the Spring Boot lab itself isn't. Boot it before you click into the Ports tab so the forwarded `:8080` is actually serving. Either click **Run** on `Laboratory` in the Spring Boot Dashboard panel (or press **F5** with `Laboratory.java` open), or, from the terminal: + +```bash +./mvnw spring-boot:run +``` + +Spans start flowing into Tempo on the first request โ€” the OpenTelemetry trace pipeline is already wired. The metrics half is dead (task 4a) so the Grafana dashboard panels stay empty until you fix it. + +### 3. Access the UIs + +Open the **Ports** tab in the bottom panel and click through to: + +#### Spring Boot lab (Port `8080`) + +The application under test. Open `http://localhost:8080/` to get a vision_state reading +back. Add a `userId` query parameter (e.g. `?userId=subject-42`) to give the +fractional rollout a stable bucketing key. + +#### Grafana (Port `3000`) + +The single window into the LGTM stack. Login is `admin` / `admin` (skip the +"change your password" prompt). + +- **Dashboards โ†’ Fun With Flags โ€” Feature Flag Metrics** โ€” the dashboard the + director keeps reloading. Empty for now. +- **Explore โ†’ Tempo** โ€” search by service `fun-with-flags-java-spring` + to see flag evaluations as span events nested inside HTTP request spans. + Traces work even before you wire up metrics. + +#### Prometheus (Port `9090`) + +Exposed by the LGTM container. Useful for `curl`-driven debugging: +`curl 'http://localhost:9090/api/v1/query?query=feature_flag_evaluation_requests_total'`. + +#### Tempo (Port `3200`) + +Tempo's own HTTP API. The `verify.sh` script uses +`http://localhost:3200/api/search?tags=service.name=fun-with-flags-java-spring` +to assert traces are flowing. + +#### flagd + +flagd runs on the docker-internal network only. The lab and the loadgen reach it as `flagd:8013`; you don't need to forward its ports onto the Codespace host to play this level. (`verify.sh` runs inside the workspace container so it can reach `flagd:8013` directly.) + +#### OTLP receivers (Ports `4317` / `4318`) + +The Spring Boot app exports traces (and, after you finish the wiring, metrics) +to the LGTM stack on `4317` (gRPC) and `4318` (HTTP). + +### 4. Implement the Objective + +Four sub-tasks, in order: wire the meter provider, register the matching `MetricsHook`, write your own `ContextSpanHook` to enrich spans with the flag-decision context, then turn on the loadgen so you can find and roll back the misbehaving fractional rollout. + +#### 4a. Turn on the metrics exporter + +OTel ships two parallel pipelines: **traces** (per-request spans, already flowing into Tempo) and **metrics** (aggregate counters, dead). The OpenTelemetry Java Agent attached to the lab JVM has both pipelines plumbed and pointed at the LGTM stack, but its config says `otel.metrics.exporter=none` โ€” anything the meter records goes nowhere. Flip the exporter on and the OpenFeature `MetricsHook` (next step) finds the working meter provider through `GlobalOpenTelemetry` without any further plumbing. + +`otel.properties` (next to `pom.xml`) is what the agent reads on startup. While you're there, look at the export interval โ€” the agent's default makes the next ten minutes harder than they need to be. + +#### 4b. Register `MetricsHook` on the OpenFeature API + +The OpenFeature OTel contrib library ships two hooks that turn flag evaluations into telemetry: **`TracesHook`** emits a span event on the active span (that's why flag evaluations show up nested inside HTTP request spans in Tempo); **`MetricsHook`** emits four counters per evaluation โ€” `feature_flag_evaluation_requests_total` and friends โ€” that power the dashboard panels. + +`OpenFeatureConfig.java` registers `TracesHook` but stops there. `MetricsHook` needs an `OpenTelemetry` handle to find the meter provider โ€” the agent installs one globally at JVM start, so `GlobalOpenTelemetry.get()` is the way to reach it. Even once `MetricsHook` is registered, the **Fun With Flags โ€” Feature Flag Metrics** dashboard stays empty until something drives traffic โ€” that's the next step. + +#### 4c. Author and register your own `ContextSpanHook` + +The two contrib hooks tell you *what* happened โ€” which flag, which variant, which reason. The `AuditHook` shipped with this level (carried over from Intermediate) writes the durable archive view to disk. What's missing is the **on-call's view in Tempo**: when a span shows `feature_flag.variant=clouded`, the operator can't see *why* without a separate hop into the audit log. Write a third hook that copies the merged eval context attributes onto the active OTel span as `feature_flag.context.` โ€” same data the audit log records, but visible right next to the variant in the trace UI. + +The shape is roughly: + +```text +before(hookCtx) { + span = active OTel span + for each allowlisted key in merged eval context: + span.setAttribute("feature_flag.context." + key, value) +} +``` + +The `before` callback receives a `HookContext`, and `getCtx()` returns the **merged** evaluation context (global + transaction + invocation) โ€” exactly what drove the flag's resolution. Span attributes go on the currently active span; the OpenFeature hook fires inside its scope. Register it alongside `TracesHook` / `MetricsHook` in `OpenFeatureConfig`. The verifier searches Tempo for `feature_flag.context.dose=underdose` once you're done โ€” that's the smoke signal. + +> โš ๏ธ **Allowlist, don't iterate.** Use a fixed allowlist (`List.of("species", "country", "dose")`) โ€” never iterate the whole eval context. The merged context routinely carries the OpenFeature `targetingKey`, typically a stable user id that joins to email and account data in real apps. Span attributes are retained for days in Tempo and indexed at scale; once they ship, redacting after the fact is hard. Same discipline `AuditHook` already follows for the audit log, same reason. See [OpenTelemetry's security guidance](https://opentelemetry.io/docs/security/). + +#### 4d. Turn on the loadgen, find the bad rollout, roll it back + +`fractional` is flagd's bucketing operation: given a list of `[variant, percent]` pairs, it deterministically assigns each evaluation to a variant based on a hash of the **`targetingKey`** on the eval context. Same key โ†’ same bucket โ†’ same variant. Different keys spread across the percentages. **If no targeting key is set, every evaluation hashes the same way, every request lands in the same bucket, and the percentages do nothing.** The `SpeciesInterceptor` shipped with this level reads `?userId=` and threads it through as the targetingKey โ€” the lab is already serving fractional rollouts correctly without you touching it. + +`flags.json` in the expert directory has a `loadgen_active` flag (off) and the misbehaving `vision_amplifier_v2` flag. flagd watches the file and picks up changes within a second; the k6 loadgen polls `loadgen_active` every two seconds, so flipping it turns on five virtual users hammering the lab. When the loadgen turns on, latency p99 should climb around 200ms and the 5xx rate around 10% โ€” confirmation that something is firing. The dashboard's variant-distribution panel tells you which one. Roll the offender back via the flag definition, watch the dashboard recover. + +**No deploy. No rebuild. No restart of the lab.** + +#### Helpful Documentation + +- [OpenFeature OTel contrib hooks (Java)](https://github.com/open-feature/java-sdk-contrib/tree/main/hooks/open-telemetry) โ€” where `TracesHook` and `MetricsHook` live, with constructor signatures +- [OpenTelemetry Java Agent โ€” agent configuration](https://opentelemetry.io/docs/zero-code/java/agent/configuration/) โ€” every `otel.*` key the agent honors, including exporter and batch-interval knobs +- [OpenFeature Hooks concept](https://openfeature.dev/docs/reference/concepts/hooks) โ€” the `before` / `after` / `error` / `finallyAfter` lifecycle for authoring your own hook +- [flagd `fractional` operation](https://flagd.dev/reference/custom-operations/fractional-operation/) โ€” the bucketing rule and how it reads the targetingKey +- [OpenTelemetry security guidance](https://opentelemetry.io/docs/security/) โ€” why allowlists on span attributes matter at SIEM scale + +### 5. Verify Your Solution + +Once you think you've solved the challenge, run the verification script: + +```bash +./verify.sh +``` + +**If the verification fails:** + +The script will tell you which checks failed. Fix the issues and run it again. + +**If the verification passes:** + +1. The script will check if your changes are committed and pushed. +2. Follow the on-screen instructions to commit your changes if needed. +3. Once everything is ready, the script will generate a **Certificate of Completion**. +4. **Copy this certificate** and paste it into the [challenge thread](https://community.open-ecosystem.com/c/open-ecosystem-challenges/) to claim your victory! ๐Ÿ† diff --git a/adventures/planned/00-blind-by-design/expert/.mvn/wrapper/maven-wrapper.properties b/adventures/planned/00-blind-by-design/expert/.mvn/wrapper/maven-wrapper.properties new file mode 100644 index 00000000..3ee7848f --- /dev/null +++ b/adventures/planned/00-blind-by-design/expert/.mvn/wrapper/maven-wrapper.properties @@ -0,0 +1 @@ +distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.15/apache-maven-3.9.15-bin.zip diff --git a/adventures/planned/00-blind-by-design/expert/.vscode/launch.json b/adventures/planned/00-blind-by-design/expert/.vscode/launch.json new file mode 100644 index 00000000..f3151f97 --- /dev/null +++ b/adventures/planned/00-blind-by-design/expert/.vscode/launch.json @@ -0,0 +1,15 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "type": "java", + "name": "๐Ÿงช Run the Lab", + "request": "launch", + "mainClass": "dev.openfeature.demo.java.demo.Laboratory", + "projectName": "demo", + "console": "integratedTerminal", + "cwd": "${workspaceFolder}", + "vmArgs": "-javaagent:${env:OTEL_JAVAAGENT_JAR}" + } + ] +} diff --git a/adventures/planned/00-blind-by-design/expert/.vscode/tasks.json b/adventures/planned/00-blind-by-design/expert/.vscode/tasks.json new file mode 100644 index 00000000..1d483f30 --- /dev/null +++ b/adventures/planned/00-blind-by-design/expert/.vscode/tasks.json @@ -0,0 +1,14 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "๐Ÿงช Verify Solution", + "type": "shell", + "command": "./verify.sh", + "options": { "cwd": "${workspaceFolder}" }, + "problemMatcher": [], + "presentation": { "reveal": "always", "panel": "dedicated" }, + "group": { "kind": "test", "isDefault": true } + } + ] +} diff --git a/adventures/planned/00-blind-by-design/expert/Makefile b/adventures/planned/00-blind-by-design/expert/Makefile new file mode 100644 index 00000000..eed6032a --- /dev/null +++ b/adventures/planned/00-blind-by-design/expert/Makefile @@ -0,0 +1,36 @@ +# ============================================================================ +# Makefile for Blind by Design - Expert Level: Read the chart +# ============================================================================ +# This Makefile provides convenient commands for running the Spring Boot lab +# alongside the Grafana LGTM stack and verifying your solution. +# ============================================================================ + +.PHONY: help lab probe verify + +# Default target - show help +help: + @echo "Blind by Design - Expert Level: Read the chart" + @echo "" + @echo "Application:" + @echo " make lab - Start the Spring Boot lab on :8080" + @echo " make probe - Hit the lab as a sample subject and pretty-print the response" + @echo "" + @echo "Verification:" + @echo " make verify - Run verification checks (lab + flagd + LGTM + dashboard + 5xx-rate)" + +# ---------------------------------------------------------------------------- +# Application Targets +# ---------------------------------------------------------------------------- + +lab: + @./mvnw spring-boot:run + +probe: + @curl -s 'http://localhost:8080/?userId=subject-42' | jq + +# ---------------------------------------------------------------------------- +# Verification Targets +# ---------------------------------------------------------------------------- + +verify: + @./verify.sh diff --git a/adventures/planned/00-blind-by-design/expert/dashboards/feature-flags.json b/adventures/planned/00-blind-by-design/expert/dashboards/feature-flags.json new file mode 100644 index 00000000..a293ce92 --- /dev/null +++ b/adventures/planned/00-blind-by-design/expert/dashboards/feature-flags.json @@ -0,0 +1,135 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "drawStyle": "line", "lineWidth": 2, "fillOpacity": 10 }, "unit": "ops" }, "overrides": [] }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 }, + "id": 1, + "options": { "legend": { "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "multi" } }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum by (feature_flag_key) (rate(feature_flag_evaluation_requests_total[1m]))", + "legendFormat": "{{feature_flag_key}}", + "refId": "A" + } + ], + "title": "Flag evaluations per second (by flag)", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "unit": "short" }, "overrides": [] }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 }, + "id": 2, + "options": { "legend": { "displayMode": "table", "placement": "right" }, "pieType": "donut", "reduceOptions": { "calcs": ["lastNotNull"], "values": false } }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum by (feature_flag_variant) (increase(feature_flag_evaluation_success_total[5m]))", + "legendFormat": "{{feature_flag_variant}}", + "refId": "A" + } + ], + "title": "Variant distribution (last 5m)", + "type": "piechart" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 0.01 } ] }, "unit": "ops" }, "overrides": [] }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 }, + "id": 3, + "options": { "legend": { "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "multi" } }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum by (feature_flag_key, error_type) (rate(feature_flag_evaluation_error_total[1m]))", + "legendFormat": "{{feature_flag_key}} ({{error_type}})", + "refId": "A" + } + ], + "title": "Evaluation errors per second", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "unit": "short" }, "overrides": [] }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 }, + "id": 4, + "options": { "legend": { "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "multi" } }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum by (service_name) (rate(feature_flag_evaluation_requests_total[1m]))", + "legendFormat": "{{service_name}}", + "refId": "A" + } + ], + "title": "Evaluations per service (rate)", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "description": "Step 7 โ€” HTTP request latency p99 from OTel auto-instrumentation. Watch this rise when a slow rollout cohort gets bigger.", + "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "drawStyle": "line", "lineWidth": 2, "fillOpacity": 5 }, "unit": "s" }, "overrides": [] }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 }, + "id": 5, + "options": { "legend": { "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "multi" } }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "histogram_quantile(0.99, sum by (le, service_name) (rate(http_server_request_duration_seconds_bucket[1m])))", + "legendFormat": "p99 {{service_name}}", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "histogram_quantile(0.50, sum by (le, service_name) (rate(http_server_request_duration_seconds_bucket[1m])))", + "legendFormat": "p50 {{service_name}}", + "refId": "B" + } + ], + "title": "HTTP request latency (p50, p99)", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "description": "Step 7 โ€” HTTP 5xx rate. Watch this jump when the new code path's error injection kicks in.", + "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "orange", "value": 0.1 }, { "color": "red", "value": 1 } ] }, "unit": "ops" }, "overrides": [] }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 }, + "id": 6, + "options": { "legend": { "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "multi" } }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum by (service_name) (rate(http_server_request_duration_seconds_count{http_response_status_code=~\"5..\"}[1m]))", + "legendFormat": "{{service_name}}", + "refId": "A" + } + ], + "title": "HTTP 5xx per second", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 38, + "style": "dark", + "tags": ["openfeature", "feature-flags"], + "templating": { "list": [] }, + "time": { "from": "now-15m", "to": "now" }, + "timepicker": {}, + "timezone": "", + "title": "Fun With Flags โ€” Feature Flag Metrics", + "uid": "fun-with-flags-metrics", + "version": 1, + "weekStart": "" +} diff --git a/adventures/planned/00-blind-by-design/expert/dashboards/provisioning.yaml b/adventures/planned/00-blind-by-design/expert/dashboards/provisioning.yaml new file mode 100644 index 00000000..5f587efb --- /dev/null +++ b/adventures/planned/00-blind-by-design/expert/dashboards/provisioning.yaml @@ -0,0 +1,10 @@ +apiVersion: 1 +providers: + - name: 'Fun With Flags' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + options: + path: /otel-lgtm/grafana/conf/provisioning/dashboards/custom + foldersFromFilesStructure: false diff --git a/adventures/planned/00-blind-by-design/expert/flags.json b/adventures/planned/00-blind-by-design/expert/flags.json new file mode 100644 index 00000000..4ccfb246 --- /dev/null +++ b/adventures/planned/00-blind-by-design/expert/flags.json @@ -0,0 +1,46 @@ +{ + "flags": { + "vision_state": { + "state": "ENABLED", + "variants": { + "enhanced": "enhanced", + "sharp": "sharp", + "blurry": "blurry", + "clouded": "clouded" + }, + "defaultVariant": "blurry", + "targeting": { + "if": [ + { "===": [{ "var": "species" }, "zyklop"] }, + "enhanced", + { "in": [{ "var": "dose" }, ["underdose", "overdose"]] }, + "clouded", + { "===": [{ "var": "country" }, "de"] }, + "sharp" + ] + } + }, + "vision_amplifier_v2": { + "state": "ENABLED", + "variants": { + "off": false, + "on": true + }, + "defaultVariant": "off", + "targeting": { + "fractional": [ + ["off", 0], + ["on", 100] + ] + } + }, + "loadgen_active": { + "state": "ENABLED", + "variants": { + "off": false, + "on": true + }, + "defaultVariant": "off" + } + } +} diff --git a/adventures/planned/00-blind-by-design/expert/loadgen/k6/script.js b/adventures/planned/00-blind-by-design/expert/loadgen/k6/script.js new file mode 100644 index 00000000..bd648a77 --- /dev/null +++ b/adventures/planned/00-blind-by-design/expert/loadgen/k6/script.js @@ -0,0 +1,63 @@ +// k6 script that hits the demo's GET / with random species values, but only +// when the OpenFeature flag `loadgen_active` is true. Flip the flag in the +// running flagd's flags.json (defaultVariant: "off" โ†’ "on") and the script +// starts hammering within seconds. Flip it back and it goes idle. +// +// The script targets one app instance via BASE_URL โ€” point it at :8080 of +// whichever folder you're running. FLAGD_URL is flagd's eval endpoint on +// :8013 (the gRPC port also serves HTTP/JSON via gRPC-Gateway, so a plain +// curl-style POST works against the same port the SDK uses). + +import http from 'k6/http'; +import { sleep } from 'k6'; + +export const options = { + vus: 5, // five virtual users; modest load, dashboard stays readable + duration: '24h', // run forever โ€” toggle the flag to start/stop traffic +}; + +const BASE_URL = __ENV.BASE_URL || 'http://host.docker.internal:8080'; +const FLAGD_URL = __ENV.FLAGD_URL || 'http://host.docker.internal:8013'; + +// Pool of subject species. Empty string means "no query parameter" โ€” exercises +// the country-fallback or default branch. The mix is deliberately uneven so the +// variant distribution panel in Grafana looks like real traffic, not a flat split. +const SPECIES = ['zyklop', 'zyklop', 'human', 'human', 'human', 'orc', 'elf', 'goblin', '']; + +// Generate a random user id per request. The Phase 3 `vision_amplifier_v2` flag +// uses a fractional rollout that buckets on the OpenFeature targetingKey, so +// without a stable per-request id every request would land in the same bucket. +function randomUserId() { + return `user-${Math.floor(Math.random() * 100000)}`; +} + +function isLoadgenActive() { + const res = http.post( + `${FLAGD_URL}/flagd.evaluation.v1.Service/ResolveBoolean`, + JSON.stringify({ flagKey: 'loadgen_active', context: {} }), + { headers: { 'Content-Type': 'application/json' }, timeout: '2s' }, + ); + if (res.status !== 200) return false; + try { + return JSON.parse(res.body).value === true; + } catch { + return false; + } +} + +export default function () { + if (!isLoadgenActive()) { + // Flag is off โ€” idle gently. Two seconds is short enough to feel responsive + // when the flag flips on, long enough not to thrash flagd. + sleep(2); + return; + } + + const species = SPECIES[Math.floor(Math.random() * SPECIES.length)]; + const userId = randomUserId(); + const params = [`userId=${userId}`]; + if (species) params.push(`species=${species}`); + const url = `${BASE_URL}/?${params.join('&')}`; + http.get(url, { tags: { species: species || 'default' } }); + sleep(0.1); +} diff --git a/adventures/planned/00-blind-by-design/expert/mvnw b/adventures/planned/00-blind-by-design/expert/mvnw new file mode 100755 index 00000000..9b14e061 --- /dev/null +++ b/adventures/planned/00-blind-by-design/expert/mvnw @@ -0,0 +1,259 @@ +#!/bin/sh +# ---------------------------------------------------------------------------- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ---------------------------------------------------------------------------- + +# ---------------------------------------------------------------------------- +# Apache Maven Wrapper startup batch script, version 3.3.4 +# +# Optional ENV vars +# ----------------- +# JAVA_HOME - location of a JDK home dir, required when download maven via java source +# MVNW_REPOURL - repo url base for downloading maven distribution +# MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven +# MVNW_VERBOSE - true: enable verbose log; debug: trace the mvnw script; others: silence the output +# ---------------------------------------------------------------------------- + +set -euf +[ "${MVNW_VERBOSE-}" != debug ] || set -x + +# OS specific support. +native_path() { printf %s\\n "$1"; } +case "$(uname)" in +CYGWIN* | MINGW*) + [ -z "${JAVA_HOME-}" ] || JAVA_HOME="$(cygpath --unix "$JAVA_HOME")" + native_path() { cygpath --path --windows "$1"; } + ;; +esac + +# set JAVACMD and JAVACCMD +set_java_home() { + # For Cygwin and MinGW, ensure paths are in Unix format before anything is touched + if [ -n "${JAVA_HOME-}" ]; then + if [ -x "$JAVA_HOME/jre/sh/java" ]; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + JAVACCMD="$JAVA_HOME/jre/sh/javac" + else + JAVACMD="$JAVA_HOME/bin/java" + JAVACCMD="$JAVA_HOME/bin/javac" + + if [ ! -x "$JAVACMD" ] || [ ! -x "$JAVACCMD" ]; then + echo "The JAVA_HOME environment variable is not defined correctly, so mvnw cannot run." >&2 + echo "JAVA_HOME is set to \"$JAVA_HOME\", but \"\$JAVA_HOME/bin/java\" or \"\$JAVA_HOME/bin/javac\" does not exist." >&2 + return 1 + fi + fi + else + JAVACMD="$( + 'set' +e + 'unset' -f command 2>/dev/null + 'command' -v java + )" || : + JAVACCMD="$( + 'set' +e + 'unset' -f command 2>/dev/null + 'command' -v javac + )" || : + + if [ ! -x "${JAVACMD-}" ] || [ ! -x "${JAVACCMD-}" ]; then + echo "The java/javac command does not exist in PATH nor is JAVA_HOME set, so mvnw cannot run." >&2 + return 1 + fi + fi +} + +# hash string like Java String::hashCode +hash_string() { + str="${1:-}" h=0 + while [ -n "$str" ]; do + char="${str%"${str#?}"}" + h=$(((h * 31 + $(LC_CTYPE=C printf %d "'$char")) % 4294967296)) + str="${str#?}" + done + printf %x\\n $h +} + +verbose() { :; } +[ "${MVNW_VERBOSE-}" != true ] || verbose() { printf %s\\n "${1-}"; } + +die() { + printf %s\\n "$1" >&2 + exit 1 +} + +trim() { + # MWRAPPER-139: + # Trims trailing and leading whitespace, carriage returns, tabs, and linefeeds. + # Needed for removing poorly interpreted newline sequences when running in more + # exotic environments such as mingw bash on Windows. + printf "%s" "${1}" | tr -d '[:space:]' +} + +# parse distributionUrl and optional distributionSha256Sum, requires .mvn/wrapper/maven-wrapper.properties +while IFS="=" read -r key value; do + case "${key-}" in + distributionUrl) distributionUrl=$(trim "${value-}") ;; + distributionSha256Sum) distributionSha256Sum=$(trim "${value-}") ;; + esac +done <"${0%/*}/.mvn/wrapper/maven-wrapper.properties" +[ -n "${distributionUrl-}" ] || die "cannot read distributionUrl property in ${0%/*}/.mvn/wrapper/maven-wrapper.properties" + +case "${distributionUrl##*/}" in +maven-mvnd-*bin.*) + MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/ + case "${PROCESSOR_ARCHITECTURE-}${PROCESSOR_ARCHITEW6432-}:$(uname -a)" in + *AMD64:CYGWIN* | *AMD64:MINGW*) distributionPlatform=windows-amd64 ;; + :Darwin*x86_64) distributionPlatform=darwin-amd64 ;; + :Darwin*arm64) distributionPlatform=darwin-aarch64 ;; + :Linux*x86_64*) distributionPlatform=linux-amd64 ;; + *) + echo "Cannot detect native platform for mvnd on $(uname)-$(uname -m), use pure java version" >&2 + distributionPlatform=linux-amd64 + ;; + esac + distributionUrl="${distributionUrl%-bin.*}-$distributionPlatform.zip" + ;; +maven-mvnd-*) MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/ ;; +*) MVN_CMD="mvn${0##*/mvnw}" _MVNW_REPO_PATTERN=/org/apache/maven/ ;; +esac + +# apply MVNW_REPOURL and calculate MAVEN_HOME +# maven home pattern: ~/.m2/wrapper/dists/{apache-maven-,maven-mvnd--}/ +[ -z "${MVNW_REPOURL-}" ] || distributionUrl="$MVNW_REPOURL$_MVNW_REPO_PATTERN${distributionUrl#*"$_MVNW_REPO_PATTERN"}" +distributionUrlName="${distributionUrl##*/}" +distributionUrlNameMain="${distributionUrlName%.*}" +distributionUrlNameMain="${distributionUrlNameMain%-bin}" +MAVEN_USER_HOME="${MAVEN_USER_HOME:-${HOME}/.m2}" +MAVEN_HOME="${MAVEN_USER_HOME}/wrapper/dists/${distributionUrlNameMain-}/$(hash_string "$distributionUrl")" + +exec_maven() { + unset MVNW_VERBOSE MVNW_USERNAME MVNW_PASSWORD MVNW_REPOURL || : + exec "$MAVEN_HOME/bin/$MVN_CMD" "$@" || die "cannot exec $MAVEN_HOME/bin/$MVN_CMD" +} + +if [ -d "$MAVEN_HOME" ]; then + verbose "found existing MAVEN_HOME at $MAVEN_HOME" + exec_maven "$@" +fi + +case "${distributionUrl-}" in +*?-bin.zip | *?maven-mvnd-?*-?*.zip) ;; +*) die "distributionUrl is not valid, must match *-bin.zip or maven-mvnd-*.zip, but found '${distributionUrl-}'" ;; +esac + +# prepare tmp dir +if TMP_DOWNLOAD_DIR="$(mktemp -d)" && [ -d "$TMP_DOWNLOAD_DIR" ]; then + clean() { rm -rf -- "$TMP_DOWNLOAD_DIR"; } + trap clean HUP INT TERM EXIT +else + die "cannot create temp dir" +fi + +mkdir -p -- "${MAVEN_HOME%/*}" + +# Download and Install Apache Maven +verbose "Couldn't find MAVEN_HOME, downloading and installing it ..." +verbose "Downloading from: $distributionUrl" +verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName" + +# select .zip or .tar.gz +if ! command -v unzip >/dev/null; then + distributionUrl="${distributionUrl%.zip}.tar.gz" + distributionUrlName="${distributionUrl##*/}" +fi + +# verbose opt +__MVNW_QUIET_WGET=--quiet __MVNW_QUIET_CURL=--silent __MVNW_QUIET_UNZIP=-q __MVNW_QUIET_TAR='' +[ "${MVNW_VERBOSE-}" != true ] || __MVNW_QUIET_WGET='' __MVNW_QUIET_CURL='' __MVNW_QUIET_UNZIP='' __MVNW_QUIET_TAR=v + +# normalize http auth +case "${MVNW_PASSWORD:+has-password}" in +'') MVNW_USERNAME='' MVNW_PASSWORD='' ;; +has-password) [ -n "${MVNW_USERNAME-}" ] || MVNW_USERNAME='' MVNW_PASSWORD='' ;; +esac + +if [ -z "${MVNW_USERNAME-}" ] && command -v wget >/dev/null; then + verbose "Found wget ... using wget" + wget ${__MVNW_QUIET_WGET:+"$__MVNW_QUIET_WGET"} "$distributionUrl" -O "$TMP_DOWNLOAD_DIR/$distributionUrlName" || die "wget: Failed to fetch $distributionUrl" +elif [ -z "${MVNW_USERNAME-}" ] && command -v curl >/dev/null; then + verbose "Found curl ... using curl" + curl ${__MVNW_QUIET_CURL:+"$__MVNW_QUIET_CURL"} -f -L -o "$TMP_DOWNLOAD_DIR/$distributionUrlName" "$distributionUrl" || die "curl: Failed to fetch $distributionUrl" +elif set_java_home; then + verbose "Falling back to use Java to download" + javaSource="$TMP_DOWNLOAD_DIR/Downloader.java" + targetZip="$TMP_DOWNLOAD_DIR/$distributionUrlName" + cat >"$javaSource" <<-END + public class Downloader extends java.net.Authenticator + { + protected java.net.PasswordAuthentication getPasswordAuthentication() + { + return new java.net.PasswordAuthentication( System.getenv( "MVNW_USERNAME" ), System.getenv( "MVNW_PASSWORD" ).toCharArray() ); + } + public static void main( String[] args ) throws Exception + { + setDefault( new Downloader() ); + java.nio.file.Files.copy( java.net.URI.create( args[0] ).toURL().openStream(), java.nio.file.Paths.get( args[1] ).toAbsolutePath().normalize() ); + } + } + END + # For Cygwin/MinGW, switch paths to Windows format before running javac and java + verbose " - Compiling Downloader.java ..." + "$(native_path "$JAVACCMD")" "$(native_path "$javaSource")" || die "Failed to compile Downloader.java" + verbose " - Running Downloader.java ..." + "$(native_path "$JAVACMD")" -cp "$(native_path "$TMP_DOWNLOAD_DIR")" Downloader "$distributionUrl" "$(native_path "$targetZip")" +fi + +# If specified, validate the SHA-256 sum of the Maven distribution zip file +if [ -n "${distributionSha256Sum-}" ]; then + distributionSha256Result=false + if [ "$MVN_CMD" = mvnd.sh ]; then + echo "Checksum validation is not supported for maven-mvnd." >&2 + echo "Please disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2 + exit 1 + elif command -v sha256sum >/dev/null; then + if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | sha256sum -c >/dev/null 2>&1; then + distributionSha256Result=true + fi + elif command -v shasum >/dev/null; then + if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | shasum -a 256 -c >/dev/null 2>&1; then + distributionSha256Result=true + fi + else + echo "Checksum validation was requested but neither 'sha256sum' or 'shasum' are available." >&2 + echo "Please install either command, or disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2 + exit 1 + fi + if [ $distributionSha256Result = false ]; then + echo "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised." >&2 + echo "If you updated your Maven version, you need to update the specified distributionSha256Sum property." >&2 + exit 1 + fi +fi + +# unzip and move +if command -v unzip >/dev/null; then + unzip ${__MVNW_QUIET_UNZIP:+"$__MVNW_QUIET_UNZIP"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -d "$TMP_DOWNLOAD_DIR" || die "failed to unzip" +else + tar xzf${__MVNW_QUIET_TAR:+"$__MVNW_QUIET_TAR"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -C "$TMP_DOWNLOAD_DIR" || die "failed to untar" +fi +printf %s\\n "$distributionUrl" >"$TMP_DOWNLOAD_DIR/$distributionUrlNameMain/mvnw.url" +mv -- "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" "$MAVEN_HOME" || [ -d "$MAVEN_HOME" ] || die "fail to move MAVEN_HOME" + +clean || : +exec_maven "$@" diff --git a/adventures/planned/00-blind-by-design/expert/mvnw.cmd b/adventures/planned/00-blind-by-design/expert/mvnw.cmd new file mode 100644 index 00000000..155e00b9 --- /dev/null +++ b/adventures/planned/00-blind-by-design/expert/mvnw.cmd @@ -0,0 +1,149 @@ +<# : batch portion +@REM ---------------------------------------------------------------------------- +@REM Licensed to the Apache Software Foundation (ASF) under one +@REM or more contributor license agreements. See the NOTICE file +@REM distributed with this work for additional information +@REM regarding copyright ownership. The ASF licenses this file +@REM to you under the Apache License, Version 2.0 (the +@REM "License"); you may not use this file except in compliance +@REM with the License. You may obtain a copy of the License at +@REM +@REM http://www.apache.org/licenses/LICENSE-2.0 +@REM +@REM Unless required by applicable law or agreed to in writing, +@REM software distributed under the License is distributed on an +@REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +@REM KIND, either express or implied. See the License for the +@REM specific language governing permissions and limitations +@REM under the License. +@REM ---------------------------------------------------------------------------- + +@REM ---------------------------------------------------------------------------- +@REM Apache Maven Wrapper startup batch script, version 3.3.4 +@REM +@REM Optional ENV vars +@REM MVNW_REPOURL - repo url base for downloading maven distribution +@REM MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven +@REM MVNW_VERBOSE - true: enable verbose log; others: silence the output +@REM ---------------------------------------------------------------------------- + +@IF "%__MVNW_ARG0_NAME__%"=="" (SET __MVNW_ARG0_NAME__=%~nx0) +@SET __MVNW_CMD__= +@SET __MVNW_ERROR__= +@SET __MVNW_PSMODULEP_SAVE=%PSModulePath% +@SET PSModulePath= +@FOR /F "usebackq tokens=1* delims==" %%A IN (`powershell -noprofile "& {$scriptDir='%~dp0'; $script='%__MVNW_ARG0_NAME__%'; icm -ScriptBlock ([Scriptblock]::Create((Get-Content -Raw '%~f0'))) -NoNewScope}"`) DO @( + IF "%%A"=="MVN_CMD" (set __MVNW_CMD__=%%B) ELSE IF "%%B"=="" (echo %%A) ELSE (echo %%A=%%B) +) +@SET PSModulePath=%__MVNW_PSMODULEP_SAVE% +@SET __MVNW_PSMODULEP_SAVE= +@SET __MVNW_ARG0_NAME__= +@SET MVNW_USERNAME= +@SET MVNW_PASSWORD= +@IF NOT "%__MVNW_CMD__%"=="" (%__MVNW_CMD__% %*) +@echo Cannot start maven from wrapper >&2 && exit /b 1 +@GOTO :EOF +: end batch / begin powershell #> + +$ErrorActionPreference = "Stop" +if ($env:MVNW_VERBOSE -eq "true") { + $VerbosePreference = "Continue" +} + +# calculate distributionUrl, requires .mvn/wrapper/maven-wrapper.properties +$distributionUrl = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionUrl +if (!$distributionUrl) { + Write-Error "cannot read distributionUrl property in $scriptDir/.mvn/wrapper/maven-wrapper.properties" +} + +switch -wildcard -casesensitive ( $($distributionUrl -replace '^.*/','') ) { + "maven-mvnd-*" { + $USE_MVND = $true + $distributionUrl = $distributionUrl -replace '-bin\.[^.]*$',"-windows-amd64.zip" + $MVN_CMD = "mvnd.cmd" + break + } + default { + $USE_MVND = $false + $MVN_CMD = $script -replace '^mvnw','mvn' + break + } +} + +# apply MVNW_REPOURL and calculate MAVEN_HOME +# maven home pattern: ~/.m2/wrapper/dists/{apache-maven-,maven-mvnd--}/ +if ($env:MVNW_REPOURL) { + $MVNW_REPO_PATTERN = if ($USE_MVND) { "/org/apache/maven/" } else { "/maven/mvnd/" } + $distributionUrl = "$env:MVNW_REPOURL$MVNW_REPO_PATTERN$($distributionUrl -replace '^.*'+$MVNW_REPO_PATTERN,'')" +} +$distributionUrlName = $distributionUrl -replace '^.*/','' +$distributionUrlNameMain = $distributionUrlName -replace '\.[^.]*$','' -replace '-bin$','' +$MAVEN_HOME_PARENT = "$HOME/.m2/wrapper/dists/$distributionUrlNameMain" +if ($env:MAVEN_USER_HOME) { + $MAVEN_HOME_PARENT = "$env:MAVEN_USER_HOME/wrapper/dists/$distributionUrlNameMain" +} +$MAVEN_HOME_NAME = ([System.Security.Cryptography.MD5]::Create().ComputeHash([byte[]][char[]]$distributionUrl) | ForEach-Object {$_.ToString("x2")}) -join '' +$MAVEN_HOME = "$MAVEN_HOME_PARENT/$MAVEN_HOME_NAME" + +if (Test-Path -Path "$MAVEN_HOME" -PathType Container) { + Write-Verbose "found existing MAVEN_HOME at $MAVEN_HOME" + Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD" + exit $? +} + +if (! $distributionUrlNameMain -or ($distributionUrlName -eq $distributionUrlNameMain)) { + Write-Error "distributionUrl is not valid, must end with *-bin.zip, but found $distributionUrl" +} + +# prepare tmp dir +$TMP_DOWNLOAD_DIR_HOLDER = New-TemporaryFile +$TMP_DOWNLOAD_DIR = New-Item -Itemtype Directory -Path "$TMP_DOWNLOAD_DIR_HOLDER.dir" +$TMP_DOWNLOAD_DIR_HOLDER.Delete() | Out-Null +trap { + if ($TMP_DOWNLOAD_DIR.Exists) { + try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null } + catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" } + } +} + +New-Item -Itemtype Directory -Path "$MAVEN_HOME_PARENT" -Force | Out-Null + +# Download and Install Apache Maven +Write-Verbose "Couldn't find MAVEN_HOME, downloading and installing it ..." +Write-Verbose "Downloading from: $distributionUrl" +Write-Verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName" + +$webclient = New-Object System.Net.WebClient +if ($env:MVNW_USERNAME -and $env:MVNW_PASSWORD) { + $webclient.Credentials = New-Object System.Net.NetworkCredential($env:MVNW_USERNAME, $env:MVNW_PASSWORD) +} +[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12 +$webclient.DownloadFile($distributionUrl, "$TMP_DOWNLOAD_DIR/$distributionUrlName") | Out-Null + +# If specified, validate the SHA-256 sum of the Maven distribution zip file +$distributionSha256Sum = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionSha256Sum +if ($distributionSha256Sum) { + if ($USE_MVND) { + Write-Error "Checksum validation is not supported for maven-mvnd. `nPlease disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." + } + Import-Module $PSHOME\Modules\Microsoft.PowerShell.Utility -Function Get-FileHash + if ((Get-FileHash "$TMP_DOWNLOAD_DIR/$distributionUrlName" -Algorithm SHA256).Hash.ToLower() -ne $distributionSha256Sum) { + Write-Error "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised. If you updated your Maven version, you need to update the specified distributionSha256Sum property." + } +} + +# unzip and move +Expand-Archive "$TMP_DOWNLOAD_DIR/$distributionUrlName" -DestinationPath "$TMP_DOWNLOAD_DIR" | Out-Null +Rename-Item -Path "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" -NewName $MAVEN_HOME_NAME | Out-Null +try { + Move-Item -Path "$TMP_DOWNLOAD_DIR/$MAVEN_HOME_NAME" -Destination $MAVEN_HOME_PARENT | Out-Null +} catch { + if (! (Test-Path -Path "$MAVEN_HOME" -PathType Container)) { + Write-Error "fail to move MAVEN_HOME" + } +} finally { + try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null } + catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" } +} + +Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD" diff --git a/adventures/planned/00-blind-by-design/expert/otel.properties b/adventures/planned/00-blind-by-design/expert/otel.properties new file mode 100644 index 00000000..694e36b0 --- /dev/null +++ b/adventures/planned/00-blind-by-design/expert/otel.properties @@ -0,0 +1,26 @@ +# OpenTelemetry Java Agent configuration. +# +# Read by the agent on JVM startup (via OTEL_JAVAAGENT_CONFIGURATION_FILE +# in docker-compose.yml). Edit + restart the lab to apply. +# +# Reference: https://opentelemetry.io/docs/zero-code/java/agent/configuration/ + +otel.service.name=fun-with-flags-java-spring +otel.exporter.otlp.endpoint=http://lgtm:4317 +otel.exporter.otlp.protocol=grpc + +# Traces flow into Tempo. Auto-instrumentation wraps every HTTP request +# in a server span, so the OpenFeature TracesHook + ContextSpanHook +# (once you write it) have an active span to attach to. +otel.traces.exporter=otlp + +# TODO Phase 3 task: flip from "none" to "otlp" so flag-evaluation +# metrics start exporting to the LGTM stack and the Feature Flag Metrics +# dashboard renders. +otel.metrics.exporter=none + +# Once metrics are on, set a short export interval so the dashboard +# updates within ten seconds of new traffic instead of waiting a minute. +otel.metric.export.interval=10000 + +otel.logs.exporter=none diff --git a/adventures/planned/00-blind-by-design/expert/pom.xml b/adventures/planned/00-blind-by-design/expert/pom.xml new file mode 100644 index 00000000..67515fad --- /dev/null +++ b/adventures/planned/00-blind-by-design/expert/pom.xml @@ -0,0 +1,104 @@ + + + 4.0.0 + + org.springframework.boot + spring-boot-starter-parent + 4.0.6 + + + dev.openfeature.demo.java + demo + 0.0.1-SNAPSHOT + demo + Blind by Design - Expert: pharma trial dispenser + + 21 + 1.48.0 + 2.14.0 + + + + + io.opentelemetry + opentelemetry-bom + ${opentelemetry.version} + pom + import + + + io.opentelemetry.instrumentation + opentelemetry-instrumentation-bom + ${opentelemetry.instrumentation.version} + pom + import + + + + + + org.springframework.boot + spring-boot-starter-actuator + + + org.springframework.boot + spring-boot-starter-web + + + org.springframework.boot + spring-boot-starter-test + test + + + + + dev.openfeature + sdk + 1.14.2 + + + dev.openfeature.contrib.providers + flagd + 0.11.8 + + + + + dev.openfeature.contrib.hooks + otel + 3.2.1 + + + + + io.opentelemetry + opentelemetry-api + + + + + + + org.springframework.boot + spring-boot-maven-plugin + + + -javaagent:${env.OTEL_JAVAAGENT_JAR} + + + + + + diff --git a/adventures/planned/00-blind-by-design/expert/src/main/java/dev/openfeature/demo/java/demo/AuditHook.java b/adventures/planned/00-blind-by-design/expert/src/main/java/dev/openfeature/demo/java/demo/AuditHook.java new file mode 100644 index 00000000..ad1ce2a4 --- /dev/null +++ b/adventures/planned/00-blind-by-design/expert/src/main/java/dev/openfeature/demo/java/demo/AuditHook.java @@ -0,0 +1,53 @@ +package dev.openfeature.demo.java.demo; + +import dev.openfeature.sdk.EvaluationContext; +import dev.openfeature.sdk.FlagEvaluationDetails; +import dev.openfeature.sdk.Hook; +import dev.openfeature.sdk.HookContext; +import dev.openfeature.sdk.Value; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; +import java.util.Map; + +/** + * Audit-log hook carried over from the Intermediate level. Writes one line + * per evaluation tagged {@code [AUDIT]}, with the cohort attributes the lab + * director cares about. Variants of {@code clouded} log at {@code WARN} so + * the safety officer can grep for improper-dosing follow-ups. + * + *

This is the durable, weeks-from-now archive view. The Phase 3 task adds + * a {@code ContextSpanHook} for real-time correlation in Tempo โ€” both hooks + * stay registered, they just serve different downstreams.

+ */ +public class AuditHook implements Hook { + + private static final Logger LOG = LoggerFactory.getLogger(AuditHook.class); + + /** Allowlist of context attributes safe to drop into the audit log. */ + private static final List AUDITED = List.of("species", "country", "dose"); + + @Override + public void after(HookContext ctx, FlagEvaluationDetails details, Map hints) { + StringBuilder ctxLine = new StringBuilder(); + EvaluationContext ec = ctx.getCtx(); + for (String key : AUDITED) { + Value v = ec != null ? ec.getValue(key) : null; + ctxLine.append(' ').append(key).append('=').append(v != null ? v.asString() : "(absent)"); + } + String message = String.format("[AUDIT] flag=%s variant=%s reason=%s%s", + ctx.getFlagKey(), details.getVariant(), details.getReason(), ctxLine); + + if ("clouded".equals(details.getVariant())) { + LOG.warn("{} -- improper dosing or off-protocol cohort, follow-up required", message); + } else { + LOG.info("{}", message); + } + } + + @Override + public void error(HookContext ctx, Exception err, Map hints) { + LOG.warn("[AUDIT] flag evaluation error flag={} err={}", ctx.getFlagKey(), err.toString()); + } +} diff --git a/adventures/planned/00-blind-by-design/expert/src/main/java/dev/openfeature/demo/java/demo/Laboratory.java b/adventures/planned/00-blind-by-design/expert/src/main/java/dev/openfeature/demo/java/demo/Laboratory.java new file mode 100644 index 00000000..33c27c39 --- /dev/null +++ b/adventures/planned/00-blind-by-design/expert/src/main/java/dev/openfeature/demo/java/demo/Laboratory.java @@ -0,0 +1,13 @@ +package dev.openfeature.demo.java.demo; + +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; + +@SpringBootApplication +public class Laboratory { + + public static void main(String[] args) { + SpringApplication.run(Laboratory.class, args); + } + +} diff --git a/adventures/planned/00-blind-by-design/expert/src/main/java/dev/openfeature/demo/java/demo/OpenFeatureConfig.java b/adventures/planned/00-blind-by-design/expert/src/main/java/dev/openfeature/demo/java/demo/OpenFeatureConfig.java new file mode 100644 index 00000000..1c341895 --- /dev/null +++ b/adventures/planned/00-blind-by-design/expert/src/main/java/dev/openfeature/demo/java/demo/OpenFeatureConfig.java @@ -0,0 +1,80 @@ +package dev.openfeature.demo.java.demo; + +import dev.openfeature.contrib.hooks.otel.TracesHook; +import dev.openfeature.contrib.providers.flagd.Config; +import dev.openfeature.contrib.providers.flagd.FlagdOptions; +import dev.openfeature.contrib.providers.flagd.FlagdProvider; +import dev.openfeature.sdk.ImmutableContext; +import dev.openfeature.sdk.OpenFeatureAPI; +import dev.openfeature.sdk.Value; +import jakarta.annotation.PostConstruct; +import org.springframework.context.annotation.Configuration; +import org.springframework.web.servlet.config.annotation.InterceptorRegistry; +import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; + +import java.util.HashMap; +import java.util.Optional; + +/** + * Wires the OpenFeature client to a remote flagd container ({@code Resolver.RPC}) + * and registers the cross-cutting hooks. + * + *

OpenTelemetry SDK setup is provided by the OpenTelemetry Java Agent + * (attached via {@code -javaagent} โ€” see {@code pom.xml} and {@code otel.properties}). + * The agent installs the global {@link io.opentelemetry.api.OpenTelemetry} instance + * before {@code main()} runs, so {@link io.opentelemetry.api.GlobalOpenTelemetry#get()} + * returns a working SDK throughout this class.

+ * + *

Half-wired on purpose: the {@link TracesHook} is registered, so flag + * evaluations show up as span events in Tempo. The matching + * {@code MetricsHook} is NOT registered โ€” until it is, the "Fun With Flags" + * dashboard panels in Grafana stay dark.

+ */ +@Configuration +public class OpenFeatureConfig implements WebMvcConfigurer { + + @PostConstruct + public void initProvider() { + OpenFeatureAPI api = OpenFeatureAPI.getInstance(); + FlagdOptions flagdOptions = FlagdOptions.builder() + .resolverType(Config.Resolver.RPC) + .build(); + + api.setProviderAndWait(new FlagdProvider(flagdOptions)); + + String country = Optional.ofNullable(System.getenv("COUNTRY")).orElse(""); + HashMap attributes = new HashMap<>(); + attributes.put("country", new Value(country)); + ImmutableContext evaluationContext = new ImmutableContext(attributes); + api.setEvaluationContext(evaluationContext); + + api.addHooks(new AuditHook()); + api.addHooks(new TracesHook()); + // TODO Phase 3 task #1: register the matching MetricsHook here. Grab + // the OTel handle the agent installed via GlobalOpenTelemetry.get() + // โ€” the agent already wired the SDK and exporter before main() ran, + // but the metrics pipeline stays inert until you also turn on the + // metrics exporter in otel.properties (next to pom.xml). + // + // TODO Phase 3 task #2: write a small ContextSpanHook that copies the + // merged evaluation context attributes (species, country, dose) onto the + // active OpenTelemetry span โ€” for example as + // `feature_flag.context.` โ€” and register it here. Lets you search + // Tempo for `feature_flag.context.dose=underdose` and see, on the same + // span, which `feature_flag.variant` the lab recorded. Closes the + // loop between why an outcome happened and what the chart knew at + // the time. + // + // โš ๏ธ Use a fixed allowlist of keys; do NOT iterate over the whole + // evaluation context. The merged context routinely carries the + // OpenFeature targetingKey (often a user id) and, in real apps, things + // like email or account identifiers โ€” span attributes are retained + // for days in Tempo/Prometheus and are hard to redact after the fact. + // See https://opentelemetry.io/docs/security/ for the broader rule. + } + + @Override + public void addInterceptors(InterceptorRegistry registry) { + registry.addInterceptor(new SpeciesInterceptor()); + } +} diff --git a/adventures/planned/00-blind-by-design/expert/src/main/java/dev/openfeature/demo/java/demo/SpeciesInterceptor.java b/adventures/planned/00-blind-by-design/expert/src/main/java/dev/openfeature/demo/java/demo/SpeciesInterceptor.java new file mode 100644 index 00000000..a1020ebe --- /dev/null +++ b/adventures/planned/00-blind-by-design/expert/src/main/java/dev/openfeature/demo/java/demo/SpeciesInterceptor.java @@ -0,0 +1,45 @@ +package dev.openfeature.demo.java.demo; + +import dev.openfeature.sdk.ImmutableContext; +import dev.openfeature.sdk.OpenFeatureAPI; +import dev.openfeature.sdk.ThreadLocalTransactionContextPropagator; +import dev.openfeature.sdk.Value; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; +import org.springframework.web.servlet.HandlerInterceptor; + +import java.util.HashMap; + +/** + * Per-request OpenFeature transaction context. Reads {@code species} (drives the + * species targeting branch on {@code vision_state}) and {@code userId} (used as + * the OpenFeature targetingKey, so the fractional rollout on + * {@code vision_amplifier_v2} is sticky per caller). + */ +public class SpeciesInterceptor implements HandlerInterceptor { + + @Override + public boolean preHandle(HttpServletRequest request, HttpServletResponse response, Object handler) throws Exception { + String species = request.getParameter("species"); + String userId = request.getParameter("userId"); + HashMap attributes = new HashMap<>(); + if (species != null) { + attributes.put("species", new Value(species)); + } + ImmutableContext evaluationContext = userId != null + ? new ImmutableContext(userId, attributes) + : new ImmutableContext(attributes); + OpenFeatureAPI.getInstance().setTransactionContext(evaluationContext); + return HandlerInterceptor.super.preHandle(request, response, handler); + } + + @Override + public void afterCompletion(HttpServletRequest request, HttpServletResponse response, Object handler, Exception ex) throws Exception { + OpenFeatureAPI.getInstance().setTransactionContext(new ImmutableContext()); + HandlerInterceptor.super.afterCompletion(request, response, handler, ex); + } + + static { + OpenFeatureAPI.getInstance().setTransactionContextPropagator(new ThreadLocalTransactionContextPropagator()); + } +} diff --git a/adventures/planned/00-blind-by-design/expert/src/main/java/dev/openfeature/demo/java/demo/Trial.java b/adventures/planned/00-blind-by-design/expert/src/main/java/dev/openfeature/demo/java/demo/Trial.java new file mode 100644 index 00000000..f5c79d1d --- /dev/null +++ b/adventures/planned/00-blind-by-design/expert/src/main/java/dev/openfeature/demo/java/demo/Trial.java @@ -0,0 +1,59 @@ +package dev.openfeature.demo.java.demo; + +import dev.openfeature.sdk.Client; +import dev.openfeature.sdk.ImmutableContext; +import dev.openfeature.sdk.OpenFeatureAPI; +import dev.openfeature.sdk.Value; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; + +import java.util.HashMap; +import java.util.concurrent.ThreadLocalRandom; + +/** + * Phase 3 lab. Reads the {@code vision_amplifier_v2} flag and, when the + * fractional rollout puts the caller into the {@code on} bucket, executes the + * deliberately bad new formulation: 200ms slower, 10% chance of a 5xx. The + * baseline {@code vision_state} flag still drives the response body. + * + *

Each evaluation also passes a {@code dose} attribute as invocation + * context โ€” the fraction of clinical staff who under- or over-dose + * subjects shows up here. Most subjects get {@code "standard"}, the rest get + * {@code "underdose"} or {@code "overdose"}, both of which override the cohort + * targeting and yield {@code clouded}.

+ */ +@RestController +public class Trial { + + @GetMapping("/") + public ResponseEntity observeSubject(@RequestParam(required = false) String dose) { + Client client = OpenFeatureAPI.getInstance().getClient(); + boolean newAlgo = client.getBooleanValue("vision_amplifier_v2", false); + if (newAlgo) { + try { + Thread.sleep(200); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + if (ThreadLocalRandom.current().nextDouble() < 0.1) { + return ResponseEntity.status(500).body("simulated failure in vision_amplifier_v2"); + } + } + + String resolvedDose = (dose != null) ? dose : pickDose(); + HashMap invocationCtx = new HashMap<>(); + invocationCtx.put("dose", new Value(resolvedDose)); + + return ResponseEntity.ok( + client.getStringDetails("vision_state", "untreated", new ImmutableContext(invocationCtx))); + } + + private static String pickDose() { + double r = ThreadLocalRandom.current().nextDouble(); + if (r < 0.60) return "standard"; + if (r < 0.90) return "underdose"; + return "overdose"; + } +} diff --git a/adventures/planned/00-blind-by-design/expert/src/main/resources/application.properties b/adventures/planned/00-blind-by-design/expert/src/main/resources/application.properties new file mode 100644 index 00000000..e46379db --- /dev/null +++ b/adventures/planned/00-blind-by-design/expert/src/main/resources/application.properties @@ -0,0 +1,6 @@ +spring.application.name=demo + +# OpenTelemetry SDK setup is provided by the OpenTelemetry Java Agent, +# which reads its configuration from otel.properties (next to pom.xml). +# Spring's Environment is not on the agent's lookup path, so do NOT put +# otel.* keys in this file โ€” they will be silently ignored. diff --git a/adventures/planned/00-blind-by-design/expert/verify.sh b/adventures/planned/00-blind-by-design/expert/verify.sh new file mode 100755 index 00000000..4a45a914 --- /dev/null +++ b/adventures/planned/00-blind-by-design/expert/verify.sh @@ -0,0 +1,232 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Load shared libraries +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck disable=SC1091 +source "$SCRIPT_DIR/../../../../lib/scripts/loader.sh" + +OBJECTIVE="By the end of this level, the lab hits each of these observable outcomes: + +- Spans for 'fun-with-flags-java-spring' visible in Tempo with feature_flag.context. attributes (searching 'feature_flag.context.dose=underdose' lights up the mis-dose requests) +- 'feature_flag_evaluation_requests_total' non-zero in Prometheus โ€” flag evaluations show up as counters, not just spans +- The 'vision_amplifier_v2' rollout is rolled back to 100% off โ€” without redeploying the lab +- HTTP 5xx rate over the last minute drops below 1%" + +DOCS_URL="https://dynatrace-oss.github.io/open-ecosystem-challenges/00-blind-by-design/expert" + +print_header \ + 'Adventure 00: Blind by Design' \ + '๐Ÿ”ด Expert: Read the chart' \ + 'Verification' + +check_prerequisites curl jq + +print_sub_header "Running verification checks..." + +TESTS_PASSED=0 +TESTS_FAILED=0 +FAILED_CHECKS=() + +APP_URL="http://localhost:8080" +# verify.sh runs from inside the workspace container. The lab is in the +# same container, so localhost:8080 works โ€” but flagd and the LGTM stack +# are sibling compose services, reachable only by service name on the +# docker-internal network. Codespaces forwards the host ports onto the +# developer's laptop (so the browser sees localhost:3000), but those +# forwards don't loop back into the workspace container. +FLAGD_HTTP="http://flagd:8013" +PROMETHEUS_URL="http://lgtm:9090" +TEMPO_URL="http://lgtm:3200" +GRAFANA_URL="http://lgtm:3000" + +# ---- 1. App reachable ------------------------------------------------------ +# Lean on test_http_endpoint from lib/scripts/http.sh โ€” handles connection +# failure and unexpected-content cases for us. +print_test_section "Checking lab reachability" +if ! test_http_endpoint "$APP_URL/" "vision_state" \ + "Start the app with: ./mvnw spring-boot:run"; then + FAILED_CHECKS+=("app_reachable") +fi +print_new_line + +# ---- 2. flagd reachable --------------------------------------------------- +print_test_section "Checking flagd reachability" +if curl -fsS --max-time 5 -X POST "$FLAGD_HTTP/flagd.evaluation.v1.Service/ResolveBoolean" \ + -H 'Content-Type: application/json' \ + -d '{"flagKey":"loadgen_active","context":{}}' >/dev/null 2>&1; then + print_info_indent "โœ“ flagd HTTP eval API reachable at $FLAGD_HTTP" + TESTS_PASSED=$((TESTS_PASSED + 1)) +else + print_error_indent "flagd HTTP API is not reachable at $FLAGD_HTTP" + print_hint "flagd is a sibling devcontainer service. Reopen the Codespace if it is not running." + TESTS_FAILED=$((TESTS_FAILED + 1)) + FAILED_CHECKS+=("flagd_reachable") +fi +print_new_line + +# ---- 3. LGTM stack reachable --------------------------------------------- +print_test_section "Checking Grafana LGTM stack reachability" +if curl -fsS --max-time 5 "$GRAFANA_URL/api/health" >/dev/null 2>&1; then + print_info_indent "โœ“ Grafana reachable at $GRAFANA_URL" + TESTS_PASSED=$((TESTS_PASSED + 1)) +else + print_error_indent "Grafana is not reachable at $GRAFANA_URL" + print_hint "The LGTM stack is a sibling compose service named 'lgtm'. From the workspace container use lgtm:3000 (not localhost). If it's still unreachable, the sibling container has not started โ€” reopen the Codespace." + TESTS_FAILED=$((TESTS_FAILED + 1)) + FAILED_CHECKS+=("lgtm_reachable") +fi +print_new_line + +# ---- 4. vision_amplifier_v2 rolled back ----------------------------------- +print_test_section "Checking vision_amplifier_v2 rollback" +ROLLOUT_RESPONSE=$(curl -fsS --max-time 5 -X POST \ + "$FLAGD_HTTP/flagd.evaluation.v1.Service/ResolveBoolean" \ + -H 'Content-Type: application/json' \ + -d '{"flagKey":"vision_amplifier_v2","context":{"targetingKey":"verify-probe-user"}}' 2>/dev/null || echo "") + +if [[ -z "$ROLLOUT_RESPONSE" ]]; then + print_error_indent "Could not query vision_amplifier_v2 from flagd" + print_hint "Make sure the flagd container is running and flags.json has vision_amplifier_v2 defined." + TESTS_FAILED=$((TESTS_FAILED + 1)) + FAILED_CHECKS+=("vision_amplifier_v2_rollback") +else + # NB: do not use `.value // empty` โ€” `//` treats jq-false as missing, + # so a successfully rolled-back flag (.value=false) would print as ''. + ROLLOUT_VALUE=$(echo "$ROLLOUT_RESPONSE" | jq -r '.value') + if [[ "$ROLLOUT_VALUE" == "false" ]]; then + print_info_indent "โœ“ vision_amplifier_v2 evaluates to false (rollout has been rolled back)" + TESTS_PASSED=$((TESTS_PASSED + 1)) + else + print_error_indent "vision_amplifier_v2 still resolves to '$ROLLOUT_VALUE' for the probe user" + print_hint "Edit flags.json: flip the fractional bucket so 'off' is 100 and 'on' is 0, save, and flagd will pick it up." + TESTS_FAILED=$((TESTS_FAILED + 1)) + FAILED_CHECKS+=("vision_amplifier_v2_rollback") + fi +fi +print_new_line + +# ---- 5. Prometheus has feature_flag_evaluation_requests_total ---------- +print_test_section "Checking feature_flag metrics in Prometheus" +PROM_QUERY='feature_flag_evaluation_requests_total' +PROM_RESPONSE=$(curl -fsS --max-time 5 -G "$PROMETHEUS_URL/api/v1/query" \ + --data-urlencode "query=$PROM_QUERY" 2>/dev/null || echo "") + +if [[ -z "$PROM_RESPONSE" ]]; then + print_error_indent "Could not query Prometheus at $PROMETHEUS_URL" + print_hint "Prometheus runs inside the lgtm sibling compose service on port 9090 (reachable as lgtm:9090 from the workspace container). If it's still unreachable, the lgtm container has not started โ€” reopen the Codespace." + TESTS_FAILED=$((TESTS_FAILED + 1)) + FAILED_CHECKS+=("prometheus_metrics") +else + RESULT_COUNT=$(echo "$PROM_RESPONSE" | jq '.data.result | length // 0') + TOTAL=$(echo "$PROM_RESPONSE" | jq -r '[.data.result[]?.value[1] | tonumber] | add // 0') + # `add // 0` is a tiny safeguard if the array is empty. + if [[ "$RESULT_COUNT" -gt 0 ]] && awk -v v="$TOTAL" 'BEGIN { exit !(v+0 > 0) }'; then + print_info_indent "โœ“ feature_flag_evaluation_requests_total is non-zero (sum=$TOTAL)" + TESTS_PASSED=$((TESTS_PASSED + 1)) + else + print_error_indent "feature_flag_evaluation_requests_total is missing or zero" + print_hint "Wire the OpenTelemetry meter provider AND register MetricsHook in OpenFeatureConfig.initProvider(). Then drive traffic by flipping loadgen_active to 'on'." + TESTS_FAILED=$((TESTS_FAILED + 1)) + FAILED_CHECKS+=("prometheus_metrics") + fi +fi +print_new_line + +# ---- 6. Tempo has at least one trace for the service ------------------- +print_test_section "Checking traces in Tempo" +TEMPO_RESPONSE=$(curl -fsS --max-time 5 -G "$TEMPO_URL/api/search" \ + --data-urlencode 'tags=service.name=fun-with-flags-java-spring' \ + --data-urlencode 'limit=20' 2>/dev/null || echo "") + +if [[ -z "$TEMPO_RESPONSE" ]]; then + print_error_indent "Could not query Tempo at $TEMPO_URL" + print_hint "Tempo runs inside the lgtm sibling compose service on port 3200 (reachable as lgtm:3200 from the workspace container). If it's still unreachable, the lgtm container has not started โ€” reopen the Codespace." + TESTS_FAILED=$((TESTS_FAILED + 1)) + FAILED_CHECKS+=("tempo_traces") +else + TRACE_COUNT=$(echo "$TEMPO_RESPONSE" | jq '.traces | length // 0') + if [[ "$TRACE_COUNT" -gt 0 ]]; then + print_info_indent "โœ“ Tempo has $TRACE_COUNT trace(s) for service 'fun-with-flags-java-spring'" + TESTS_PASSED=$((TESTS_PASSED + 1)) + else + print_error_indent "Tempo has no traces for service 'fun-with-flags-java-spring'" + print_hint "Send some traffic: curl http://localhost:8080/?userId=demo and wait a few seconds for the exporter to flush." + TESTS_FAILED=$((TESTS_FAILED + 1)) + FAILED_CHECKS+=("tempo_traces") + fi +fi +print_new_line + +# ---- 6b. Tempo spans carry the dose context attribute ------------------ +# Generate a deterministic underdose request, give the exporter a moment to +# flush, then query Tempo for spans with feature_flag.context.dose. If the +# attribute is missing the participant has not registered the +# ContextSpanHook (or it is not reading the merged eval context). +print_test_section "Checking flag-context attributes on Tempo spans" +curl -s --max-time 5 'http://localhost:8080/?dose=underdose' >/dev/null 2>&1 || true +sleep 6 # OTel batch span processor flush window +DOSE_TEMPO=$(curl -fsS --max-time 5 -G "$TEMPO_URL/api/search" \ + --data-urlencode 'tags=feature_flag.context.dose=underdose' \ + --data-urlencode 'limit=5' 2>/dev/null || echo "") + +if [[ -z "$DOSE_TEMPO" ]]; then + print_error_indent "Could not query Tempo for context attributes" + TESTS_FAILED=$((TESTS_FAILED + 1)) + FAILED_CHECKS+=("tempo_context") +else + DOSE_COUNT=$(echo "$DOSE_TEMPO" | jq '.traces | length // 0') + if [[ "$DOSE_COUNT" -gt 0 ]]; then + print_info_indent "โœ“ Tempo has $DOSE_COUNT span(s) tagged feature_flag.context.dose=underdose" + TESTS_PASSED=$((TESTS_PASSED + 1)) + else + print_error_indent "No spans with feature_flag.context.dose=underdose found in Tempo" + print_hint "Did you register the ContextSpanHook that copies merged-eval-context attrs onto Span.current()?" + TESTS_FAILED=$((TESTS_FAILED + 1)) + FAILED_CHECKS+=("tempo_context") + fi +fi +print_new_line + +# ---- 7. HTTP 5xx rate under threshold ---------------------------------- +print_test_section "Checking HTTP 5xx error rate (last 1m)" +ERROR_QUERY='sum(rate(http_server_request_duration_seconds_count{http_response_status_code=~"5.."}[1m])) / clamp_min(sum(rate(http_server_request_duration_seconds_count[1m])), 1e-9)' +ERROR_RESPONSE=$(curl -fsS --max-time 5 -G "$PROMETHEUS_URL/api/v1/query" \ + --data-urlencode "query=$ERROR_QUERY" 2>/dev/null || echo "") + +if [[ -z "$ERROR_RESPONSE" ]]; then + # Fallback: try the older Spring metric name + ERROR_QUERY_ALT='sum(rate(http_server_requests_seconds_count{status=~"5.."}[1m])) / clamp_min(sum(rate(http_server_requests_seconds_count[1m])), 1e-9)' + ERROR_RESPONSE=$(curl -fsS --max-time 5 -G "$PROMETHEUS_URL/api/v1/query" \ + --data-urlencode "query=$ERROR_QUERY_ALT" 2>/dev/null || echo "") +fi + +if [[ -z "$ERROR_RESPONSE" ]]; then + print_error_indent "Could not query Prometheus for HTTP error rate" + TESTS_FAILED=$((TESTS_FAILED + 1)) + FAILED_CHECKS+=("error_rate") +else + ERROR_RATE=$(echo "$ERROR_RESPONSE" | jq -r '.data.result[0].value[1] // "0"') + # Treat NaN (no requests at all) as a pass โ€” there's no traffic to fail on. + if [[ "$ERROR_RATE" == "NaN" ]]; then + print_info_indent "โœ“ No traffic in the last minute โ€” error rate not meaningful (treated as pass)" + TESTS_PASSED=$((TESTS_PASSED + 1)) + elif awk -v v="$ERROR_RATE" 'BEGIN { exit !(v+0 < 0.01) }'; then + PERCENT=$(awk -v v="$ERROR_RATE" 'BEGIN { printf "%.2f", v*100 }') + print_info_indent "โœ“ HTTP 5xx rate is ${PERCENT}% (< 1%)" + TESTS_PASSED=$((TESTS_PASSED + 1)) + else + PERCENT=$(awk -v v="$ERROR_RATE" 'BEGIN { printf "%.2f", v*100 }') + print_error_indent "HTTP 5xx rate is ${PERCENT}% (>= 1%)" + print_hint "The 'on' bucket of vision_amplifier_v2 throws 5xx 10% of the time. Roll the rollout back to 100% off." + TESTS_FAILED=$((TESTS_FAILED + 1)) + FAILED_CHECKS+=("error_rate") + fi +fi +print_new_line + +print_verification_summary "Read the chart" "$DOCS_URL" "$OBJECTIVE" + +if [[ $TESTS_FAILED -ne 0 ]]; then + exit 1 +fi