Skip to content

Commit 7f0b767

Browse files
committed
feat: add slog + zap log-level metrics via MetricsSlogHandler
1 parent 0dc5d75 commit 7f0b767

4 files changed

Lines changed: 541 additions & 9 deletions

File tree

cmd/main.go

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,12 @@ import (
99
"flag"
1010
"log/slog"
1111
"net/http"
12+
13+
uberzap "go.uber.org/zap"
1214
"os"
1315
"path/filepath"
1416
"slices"
17+
"strings"
1518
"time"
1619

1720
// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
@@ -149,12 +152,34 @@ func main() {
149152
opts.BindFlags(flag.CommandLine)
150153
flag.Parse()
151154

152-
ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))
153-
154-
// Configure slog (used across internal packages) with structured JSON output.
155-
slog.SetDefault(slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
156-
Level: slog.LevelInfo,
157-
})))
155+
ctrl.SetLogger(zap.New(
156+
zap.UseFlagOptions(&opts),
157+
zap.RawZapOpts(uberzap.WrapCore(monitoring.WrapCoreWithLogMetrics)),
158+
))
159+
160+
// Configure slog (used across internal packages) with JSON output and
161+
// level control via the LOG_LEVEL environment variable.
162+
// Supported values: debug, info (default), warn, error.
163+
slogLevel := new(slog.LevelVar)
164+
slogLevel.Set(slog.LevelInfo)
165+
if lvl := os.Getenv("LOG_LEVEL"); lvl != "" {
166+
switch strings.ToLower(lvl) {
167+
case "debug":
168+
slogLevel.Set(slog.LevelDebug)
169+
case "info":
170+
slogLevel.Set(slog.LevelInfo)
171+
case "warn", "warning":
172+
slogLevel.Set(slog.LevelWarn)
173+
case "error":
174+
slogLevel.Set(slog.LevelError)
175+
}
176+
}
177+
slog.SetDefault(slog.New(monitoring.NewMetricsSlogHandler(
178+
slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
179+
Level: slogLevel,
180+
}),
181+
)))
182+
slog.Info("slog configured", "level", slogLevel.Level().String())
158183

159184
// Log the main configuration
160185
setupLog.Info("loaded main configuration",
@@ -307,6 +332,7 @@ func main() {
307332
// This is useful to distinguish metrics from different deployments.
308333
metricsConfig := conf.GetConfigOrDie[monitoring.Config]()
309334
metrics.Registry = monitoring.WrapRegistry(metrics.Registry, metricsConfig)
335+
metrics.Registry.MustRegister(monitoring.LogMessagesTotal)
310336

311337
// TODO: Remove me after scheduling pipeline steps don't require DB connections anymore.
312338
metrics.Registry.MustRegister(&db.Monitor)
@@ -658,10 +684,10 @@ func main() {
658684
os.Exit(1)
659685
}
660686

661-
syncerMonitor := commitments.NewSyncerMonitor()
662-
must.Succeed(metrics.Registry.Register(syncerMonitor))
663687
if slices.Contains(mainConfig.EnabledTasks, "commitments-sync-task") {
664688
setupLog.Info("starting commitments syncer")
689+
syncerMonitor := commitments.NewSyncerMonitor()
690+
must.Succeed(metrics.Registry.Register(syncerMonitor))
665691
syncer := commitments.NewSyncer(multiclusterClient, syncerMonitor)
666692
syncerConfig := conf.GetConfigOrDie[commitments.SyncerConfig]()
667693
syncerConfig.ApplyDefaults()

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ require (
104104
go.opentelemetry.io/otel/trace v1.43.0 // indirect
105105
go.opentelemetry.io/proto/otlp v1.8.0 // indirect
106106
go.uber.org/multierr v1.11.0 // indirect
107-
go.uber.org/zap v1.27.1 // indirect
107+
go.uber.org/zap v1.27.1
108108
go.yaml.in/yaml/v2 v2.4.3 // indirect
109109
go.yaml.in/yaml/v3 v3.0.4 // indirect
110110
go4.org/netipx v0.0.0-20231129151722-fdeea329fbba // indirect

pkg/monitoring/log_metrics.go

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
// Copyright SAP SE
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package monitoring
5+
6+
import (
7+
"context"
8+
"log/slog"
9+
"path"
10+
"runtime"
11+
"sync"
12+
13+
"github.com/prometheus/client_golang/prometheus"
14+
"go.uber.org/zap/zapcore"
15+
)
16+
17+
// pcFileCache caches the resolved file path for each program counter. The set
18+
// of distinct PCs is bounded by the number of log call sites in the binary, so
19+
// this map grows to a fixed size and all subsequent lookups are lock-free reads.
20+
var pcFileCache sync.Map // uintptr -> string
21+
22+
// LogMessagesTotal counts warn and error log messages emitted by both the slog
23+
// and zap loggers. Labels: "level" (warn|error), "file" (relative source path).
24+
var LogMessagesTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
25+
Namespace: "cortex",
26+
Name: "log_messages_total",
27+
Help: "Total number of log messages emitted at warn or error level.",
28+
}, []string{"level", "file"})
29+
30+
// shortFilePath returns "parent_dir/filename.go" from any absolute or
31+
// module-relative path. This is independent of the build environment (no
32+
// -trimpath needed) and keeps Prometheus label cardinality manageable.
33+
func shortFilePath(file string) string {
34+
dir, base := path.Split(file)
35+
parent := path.Base(dir)
36+
if parent == "." || parent == "/" {
37+
return base
38+
}
39+
return parent + "/" + base
40+
}
41+
42+
// --- slog handler wrapper ---
43+
44+
// MetricsSlogHandler wraps an slog.Handler and increments LogMessagesTotal for
45+
// every warn or error log record.
46+
type MetricsSlogHandler struct {
47+
next slog.Handler
48+
}
49+
50+
// NewMetricsSlogHandler returns a new handler that counts warn/error logs and
51+
// delegates all calls to next.
52+
func NewMetricsSlogHandler(next slog.Handler) *MetricsSlogHandler {
53+
return &MetricsSlogHandler{next: next}
54+
}
55+
56+
func (h *MetricsSlogHandler) Enabled(ctx context.Context, level slog.Level) bool {
57+
if h.next == nil {
58+
return false
59+
}
60+
return h.next.Enabled(ctx, level)
61+
}
62+
63+
func (h *MetricsSlogHandler) Handle(ctx context.Context, r slog.Record) error {
64+
if r.Level >= slog.LevelWarn {
65+
level := "warn"
66+
if r.Level >= slog.LevelError {
67+
level = "error"
68+
}
69+
file := "unknown"
70+
if r.PC != 0 {
71+
if cached, ok := pcFileCache.Load(r.PC); ok {
72+
file = cached.(string)
73+
} else {
74+
frames := runtime.CallersFrames([]uintptr{r.PC})
75+
f, _ := frames.Next()
76+
if f.File != "" {
77+
file = shortFilePath(f.File)
78+
}
79+
pcFileCache.Store(r.PC, file)
80+
}
81+
}
82+
LogMessagesTotal.WithLabelValues(level, file).Inc()
83+
}
84+
if h.next == nil {
85+
return nil
86+
}
87+
return h.next.Handle(ctx, r)
88+
}
89+
90+
func (h *MetricsSlogHandler) WithAttrs(attrs []slog.Attr) slog.Handler {
91+
if h.next == nil {
92+
return &MetricsSlogHandler{}
93+
}
94+
return &MetricsSlogHandler{next: h.next.WithAttrs(attrs)}
95+
}
96+
97+
func (h *MetricsSlogHandler) WithGroup(name string) slog.Handler {
98+
if h.next == nil {
99+
return &MetricsSlogHandler{}
100+
}
101+
return &MetricsSlogHandler{next: h.next.WithGroup(name)}
102+
}
103+
104+
// --- zap core wrapper ---
105+
106+
// WrapCoreWithLogMetrics returns a zapcore.Core that hooks into every write to
107+
// increment LogMessagesTotal for warn and error entries. It uses
108+
// zapcore.RegisterHooks so no manual Check/Write plumbing is needed.
109+
func WrapCoreWithLogMetrics(core zapcore.Core) zapcore.Core {
110+
return zapcore.RegisterHooks(core, func(e zapcore.Entry) error {
111+
if e.Level >= zapcore.WarnLevel {
112+
level := "warn"
113+
if e.Level >= zapcore.ErrorLevel {
114+
level = "error"
115+
}
116+
file := "unknown"
117+
if e.Caller.Defined {
118+
file = shortFilePath(e.Caller.File)
119+
}
120+
LogMessagesTotal.WithLabelValues(level, file).Inc()
121+
}
122+
return nil
123+
})
124+
}

0 commit comments

Comments
 (0)