Skip to content

Commit 47bad84

Browse files
committed
fix(metrics): clear stale state on SetVMState, proper server lifecycle via mgr.Add, wire Metrics in main
1 parent 14e62cf commit 47bad84

3 files changed

Lines changed: 25 additions & 8 deletions

File tree

cmd/agent/main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ func main() {
7373
Scheme: mgr.GetScheme(),
7474
NodeName: nodeName,
7575
Driver: driver,
76+
Metrics: agent.NewVMMetricsCollector(),
7677
}).SetupWithManager(mgr); err != nil {
7778
log.Error(err, "Unable to set up ImpVMReconciler")
7879
os.Exit(1)

internal/agent/metrics.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,11 @@ func NewVMMetricsCollector() *VMMetricsCollector {
4949
}
5050

5151
// SetVMState sets the imp_vm_state gauge for a VM. key = "namespace/name".
52+
// Clears any previous state series for this VM so only one state is active at a time.
5253
func (c *VMMetricsCollector) SetVMState(key, state, node string) {
5354
ns, name := splitKey(key)
55+
// Remove stale state series before setting the new one to avoid double-counting.
56+
c.vmState.DeletePartialMatch(prometheus.Labels{"impvm": name, "namespace": ns, "node": node})
5457
c.vmState.WithLabelValues(name, ns, node, state).Set(1)
5558
}
5659

internal/agent/reconciler.go

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -185,17 +185,30 @@ func (r *ImpVMReconciler) clearOwnership(ctx context.Context, vm *impdevv1alpha1
185185
return ctrl.Result{}, nil
186186
}
187187

188+
// metricsServer is a controller-runtime Runnable that serves Prometheus metrics.
189+
// Registered with the manager so it shuts down cleanly when the manager stops.
190+
type metricsServer struct{ handler http.Handler }
191+
192+
func (s *metricsServer) Start(ctx context.Context) error {
193+
srv := &http.Server{Addr: metricsPort, Handler: s.handler, ReadHeaderTimeout: 10 * time.Second}
194+
go func() {
195+
<-ctx.Done()
196+
_ = srv.Shutdown(context.Background()) //nolint:errcheck
197+
}()
198+
if err := srv.ListenAndServe(); !errors.Is(err, http.ErrServerClosed) {
199+
return err
200+
}
201+
return nil
202+
}
203+
188204
// SetupWithManager registers the reconciler with the controller-runtime manager.
189205
func (r *ImpVMReconciler) SetupWithManager(mgr ctrl.Manager) error {
190206
if r.Metrics != nil {
191-
go func() {
192-
mux := http.NewServeMux()
193-
mux.Handle("/metrics", NewMetricsHandlerWithCollector(r.Metrics))
194-
srv := &http.Server{Addr: metricsPort, Handler: mux, ReadHeaderTimeout: 10 * time.Second}
195-
if err := srv.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) {
196-
logf.Log.Error(err, "metrics server failed")
197-
}
198-
}()
207+
mux := http.NewServeMux()
208+
mux.Handle("/metrics", NewMetricsHandlerWithCollector(r.Metrics))
209+
if err := mgr.Add(&metricsServer{handler: mux}); err != nil {
210+
return err
211+
}
199212
}
200213

201214
return ctrl.NewControllerManagedBy(mgr).

0 commit comments

Comments
 (0)