Skip to content

Commit 2bf87e4

Browse files
bdchathamclaude
andauthored
feat: OTel infrastructure — MeterProvider, exporters, attribute constants (#97)
* feat: OTel infrastructure — MeterProvider, exporters, attribute constants Phase 1 of the OTel telemetry migration. Sets up the foundation with no metric changes — validates the bridge works alongside controller-runtime. MeterProvider setup (cmd/telemetry.go): - Prometheus exporter registers into controller-runtime's metrics.Registry so OTel instruments will appear on the existing /metrics endpoint - OTLP exporter enabled when OTEL_EXPORTER_OTLP_ENDPOINT is set - Resource attributes: service.name, service.version, k8s.pod.name, k8s.namespace.name (via downward API env vars) - Proper shutdown handling via defer Attribute constants (observability/attributes.go): - 9 per-datapoint attribute keys matching existing Prometheus label names - No overloaded "type" — split into replica_state and condition_type Build changes: - Makefile and Dockerfile: go build ./cmd/ (package, not single file) - OTel SDK dependencies added to go.mod Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: use signal-cancellable context for MeterProvider init Create the signal handler context once via ctrl.SetupSignalHandler() and share it between the MeterProvider init and mgr.Start. The OTLP exporter's gRPC connection setup respects cancellation on SIGTERM. The shutdown defer intentionally uses context.Background() so the OTLP exporter can flush its final batch after the signal fires. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: add timeout to MeterProvider shutdown Use a 5-second timeout on the shutdown context so the OTLP exporter doesn't hang indefinitely if the collector is unreachable. The fresh context.Background() is still correct (signal ctx is already cancelled when defer runs), but now it's bounded. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 7027b90 commit 2bf87e4

7 files changed

Lines changed: 206 additions & 80 deletions

File tree

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ RUN go mod download
99

1010
COPY . .
1111

12-
RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o manager cmd/main.go
12+
RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o manager ./cmd/
1313

1414
FROM gcr.io/distroless/static:nonroot
1515
WORKDIR /

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ GOLANGCI_LINT ?= $(shell which golangci-lint 2>/dev/null || echo $(HOME)/go/bin/
44
.PHONY: build test lint manifests generate ci docker-build docker-push
55

66
build: ## Build manager binary.
7-
go build -o bin/manager cmd/main.go
7+
go build -o bin/manager ./cmd/
88

99
test: ## Run tests.
1010
go test $$(go list ./... | grep -v /e2e) -coverprofile cover.out

cmd/main.go

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"fmt"
88
"os"
99
"sort"
10+
"time"
1011

1112
_ "k8s.io/client-go/plugin/pkg/client/auth"
1213

@@ -99,6 +100,21 @@ func main() {
99100
metricsServerOptions.KeyName = metricsCertKey
100101
}
101102

103+
ctx := ctrl.SetupSignalHandler()
104+
105+
mp, err := initMeterProvider(ctx)
106+
if err != nil {
107+
setupLog.Error(err, "Failed to initialize OTel MeterProvider")
108+
os.Exit(1)
109+
}
110+
defer func() {
111+
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
112+
defer cancel()
113+
if err := mp.Shutdown(shutdownCtx); err != nil {
114+
setupLog.Error(err, "Failed to shutdown MeterProvider")
115+
}
116+
}()
117+
102118
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
103119
Scheme: scheme,
104120
Metrics: metricsServerOptions,
@@ -230,7 +246,7 @@ func main() {
230246
}
231247

232248
setupLog.Info("Starting manager")
233-
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
249+
if err := mgr.Start(ctx); err != nil {
234250
setupLog.Error(err, "Failed to run manager")
235251
os.Exit(1)
236252
}

cmd/telemetry.go

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
package main
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"os"
7+
8+
"go.opentelemetry.io/otel"
9+
"go.opentelemetry.io/otel/attribute"
10+
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
11+
promexporter "go.opentelemetry.io/otel/exporters/prometheus"
12+
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
13+
"go.opentelemetry.io/otel/sdk/resource"
14+
semconv "go.opentelemetry.io/otel/semconv/v1.26.0"
15+
crmetrics "sigs.k8s.io/controller-runtime/pkg/metrics"
16+
)
17+
18+
// buildVersion is set via ldflags at build time.
19+
var buildVersion = "dev"
20+
21+
// initMeterProvider creates an OTel MeterProvider with two readers:
22+
// - A Prometheus exporter that registers into controller-runtime's metrics.Registry,
23+
// making OTel instruments appear on the existing /metrics endpoint alongside
24+
// controller-runtime's own workqueue/reconcile metrics.
25+
// - An OTLP exporter (when OTEL_EXPORTER_OTLP_ENDPOINT is set) that pushes to
26+
// whatever backend the user configures.
27+
func initMeterProvider(ctx context.Context) (*sdkmetric.MeterProvider, error) {
28+
res, err := resource.Merge(
29+
resource.Default(),
30+
resource.NewWithAttributes(
31+
semconv.SchemaURL,
32+
semconv.ServiceName("sei-k8s-controller"),
33+
semconv.ServiceVersion(buildVersion),
34+
attribute.String("k8s.pod.name", os.Getenv("POD_NAME")),
35+
attribute.String("k8s.namespace.name", os.Getenv("POD_NAMESPACE")),
36+
),
37+
)
38+
if err != nil {
39+
return nil, fmt.Errorf("building OTel resource: %w", err)
40+
}
41+
42+
var opts []sdkmetric.Option
43+
opts = append(opts, sdkmetric.WithResource(res))
44+
45+
// Prometheus reader: bridges OTel instruments into controller-runtime's
46+
// existing /metrics endpoint. WithoutScopeInfo and WithoutTargetInfo
47+
// suppress otel_scope_info and target_info metrics that would add noise
48+
// alongside controller-runtime's own Prometheus output.
49+
promReader, err := promexporter.New(
50+
promexporter.WithRegisterer(crmetrics.Registry),
51+
promexporter.WithoutScopeInfo(),
52+
promexporter.WithoutTargetInfo(),
53+
)
54+
if err != nil {
55+
return nil, fmt.Errorf("creating Prometheus exporter: %w", err)
56+
}
57+
opts = append(opts, sdkmetric.WithReader(promReader))
58+
59+
// OTLP reader: optional, enabled by standard OTel env var.
60+
if os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT") != "" {
61+
otlpExporter, err := otlpmetricgrpc.New(ctx)
62+
if err != nil {
63+
return nil, fmt.Errorf("creating OTLP exporter: %w", err)
64+
}
65+
opts = append(opts, sdkmetric.WithReader(
66+
sdkmetric.NewPeriodicReader(otlpExporter),
67+
))
68+
}
69+
70+
mp := sdkmetric.NewMeterProvider(opts...)
71+
otel.SetMeterProvider(mp)
72+
73+
return mp, nil
74+
}

go.mod

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@ require (
1111
github.com/prometheus/client_golang v1.23.2
1212
github.com/sei-protocol/sei-config v0.0.11
1313
github.com/sei-protocol/seictl v0.0.30
14+
go.opentelemetry.io/otel v1.43.0
15+
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.43.0
16+
go.opentelemetry.io/otel/exporters/prometheus v0.65.0
17+
go.opentelemetry.io/otel/sdk v1.43.0
18+
go.opentelemetry.io/otel/sdk/metric v1.43.0
1419
k8s.io/api v0.35.0
1520
k8s.io/apiextensions-apiserver v0.35.0
1621
k8s.io/apimachinery v0.35.0
@@ -20,7 +25,7 @@ require (
2025
)
2126

2227
require (
23-
cel.dev/expr v0.24.0 // indirect
28+
cel.dev/expr v0.25.1 // indirect
2429
github.com/BurntSushi/toml v1.5.0 // indirect
2530
github.com/antlr4-go/antlr/v4 v4.13.0 // indirect
2631
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
@@ -42,7 +47,7 @@ require (
4247
github.com/aws/smithy-go v1.24.2 // indirect
4348
github.com/beorn7/perks v1.0.1 // indirect
4449
github.com/blang/semver/v4 v4.0.0 // indirect
45-
github.com/cenkalti/backoff/v5 v5.0.2 // indirect
50+
github.com/cenkalti/backoff/v5 v5.0.3 // indirect
4651
github.com/cespare/xxhash/v2 v2.3.0 // indirect
4752
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
4853
github.com/dustin/go-humanize v1.0.1 // indirect
@@ -61,7 +66,7 @@ require (
6166
github.com/google/cel-go v0.26.0 // indirect
6267
github.com/google/gnostic-models v0.7.0 // indirect
6368
github.com/google/go-cmp v0.7.0 // indirect
64-
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.1 // indirect
69+
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect
6570
github.com/inconshreveable/mousetrap v1.1.0 // indirect
6671
github.com/josharian/intern v1.0.0 // indirect
6772
github.com/json-iterator/go v1.1.12 // indirect
@@ -74,8 +79,9 @@ require (
7479
github.com/oapi-codegen/runtime v1.2.0 // indirect
7580
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
7681
github.com/prometheus/client_model v0.6.2 // indirect
77-
github.com/prometheus/common v0.66.1 // indirect
78-
github.com/prometheus/procfs v0.17.0 // indirect
82+
github.com/prometheus/common v0.67.5 // indirect
83+
github.com/prometheus/otlptranslator v1.0.0 // indirect
84+
github.com/prometheus/procfs v0.20.1 // indirect
7985
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
8086
github.com/sei-protocol/seilog v0.0.3 // indirect
8187
github.com/spf13/cobra v1.10.2 // indirect
@@ -84,31 +90,29 @@ require (
8490
github.com/x448/float16 v0.8.4 // indirect
8591
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
8692
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0 // indirect
87-
go.opentelemetry.io/otel v1.39.0 // indirect
8893
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.37.0 // indirect
8994
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.37.0 // indirect
90-
go.opentelemetry.io/otel/metric v1.39.0 // indirect
91-
go.opentelemetry.io/otel/sdk v1.39.0 // indirect
92-
go.opentelemetry.io/otel/trace v1.39.0 // indirect
93-
go.opentelemetry.io/proto/otlp v1.7.0 // indirect
95+
go.opentelemetry.io/otel/metric v1.43.0 // indirect
96+
go.opentelemetry.io/otel/trace v1.43.0 // indirect
97+
go.opentelemetry.io/proto/otlp v1.10.0 // indirect
9498
go.uber.org/multierr v1.11.0 // indirect
9599
go.uber.org/zap v1.27.0 // indirect
96-
go.yaml.in/yaml/v2 v2.4.3 // indirect
100+
go.yaml.in/yaml/v2 v2.4.4 // indirect
97101
go.yaml.in/yaml/v3 v3.0.4 // indirect
98102
golang.org/x/exp v0.0.0-20260112195511-716be5621a96 // indirect
99-
golang.org/x/mod v0.32.0 // indirect
100-
golang.org/x/net v0.49.0 // indirect
103+
golang.org/x/mod v0.33.0 // indirect
104+
golang.org/x/net v0.52.0 // indirect
101105
golang.org/x/oauth2 v0.35.0 // indirect
102-
golang.org/x/sync v0.19.0 // indirect
103-
golang.org/x/sys v0.40.0 // indirect
104-
golang.org/x/term v0.39.0 // indirect
105-
golang.org/x/text v0.34.0 // indirect
106+
golang.org/x/sync v0.20.0 // indirect
107+
golang.org/x/sys v0.42.0 // indirect
108+
golang.org/x/term v0.41.0 // indirect
109+
golang.org/x/text v0.35.0 // indirect
106110
golang.org/x/time v0.14.0 // indirect
107-
golang.org/x/tools v0.41.0 // indirect
111+
golang.org/x/tools v0.42.0 // indirect
108112
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
109-
google.golang.org/genproto/googleapis/api v0.0.0-20260226221140-a57be14db171 // indirect
110-
google.golang.org/genproto/googleapis/rpc v0.0.0-20260226221140-a57be14db171 // indirect
111-
google.golang.org/grpc v1.78.0 // indirect
113+
google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect
114+
google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 // indirect
115+
google.golang.org/grpc v1.80.0 // indirect
112116
google.golang.org/protobuf v1.36.11 // indirect
113117
gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect
114118
gopkg.in/inf.v0 v0.9.1 // indirect

0 commit comments

Comments
 (0)