@@ -207,15 +207,16 @@ func (d *FirecrackerDriver) Start(ctx context.Context, vm *impdevv1alpha1.ImpVM)
207207
208208 proc := & fcProc {machine : m , pid : int64 (pid ), socket : sockPath , netInfo : netInfo }
209209
210- // Capture probe arguments before goroutine launch to avoid data race on vm.
210+ // Capture goroutine args before launch to avoid data race on vm.
211211 var probeCtx context.Context
212212 var probes * impdevv1alpha1.ProbeSpec
213- var vsockPath , vmNamespace , vmName string
214- if gaEnabled && vm . Spec . Probes != nil {
213+ var vsockPath , vmNamespace , vmName , className string
214+ if gaEnabled {
215215 vsockPath = strings .TrimSuffix (sockPath , ".sock" ) + ".vsock"
216- probes = vm .Spec .Probes
216+ probes = vm .Spec .Probes // may be nil — runProbes handles nil probes
217217 vmNamespace = vm .Namespace
218218 vmName = vm .Name
219+ className = vm .Spec .ClassRef .Name // safe: checked non-nil at top of Start
219220 var probeCancel context.CancelFunc
220221 probeCtx , probeCancel = context .WithCancel (context .Background ())
221222 proc .probeCancel = probeCancel
@@ -228,7 +229,7 @@ func (d *FirecrackerDriver) Start(ctx context.Context, vm *impdevv1alpha1.ImpVM)
228229 d .mu .Unlock ()
229230
230231 if probeCtx != nil {
231- go d .runProbes (probeCtx , probes , vsockPath , vmNamespace , vmName )
232+ go d .runProbes (probeCtx , probes , vsockPath , vmNamespace , vmName , className )
232233 }
233234
234235 return int64 (pid ), nil
@@ -476,16 +477,28 @@ func (d *FirecrackerDriver) guestAgentPath() string {
476477}
477478
478479// runProbes dials the guest VSOCK and runs probe polling until ctx is cancelled.
479- // Called in a goroutine after the VM reaches Running. probes must not be nil.
480+ // Called in a goroutine after the VM reaches Running. probes may be nil — when
481+ // nil, runProbes keeps the VSOCK connection open for metrics until ctx is done.
480482// vmNamespace and vmName identify the ImpVM to patch conditions onto.
481- func (d * FirecrackerDriver ) runProbes (ctx context.Context , probes * impdevv1alpha1.ProbeSpec , vsockPath , vmNamespace , vmName string ) {
483+ func (d * FirecrackerDriver ) runProbes (ctx context.Context , probes * impdevv1alpha1.ProbeSpec , vsockPath , vmNamespace , vmName , className string ) {
482484 conn , err := agentvsock .Dial (ctx , vsockPath , 10000 )
483485 if err != nil {
484486 logf .Log .Error (err , "runProbes: VSOCK dial failed" , "vsock" , vsockPath )
485487 return
486488 }
487489 defer conn .Close () //nolint:errcheck
488490 client := pb .NewGuestAgentClient (conn )
491+
492+ // Always poll metrics when collector is set.
493+ if d .Metrics != nil {
494+ go d .pollMetrics (ctx , client , vmNamespace + "/" + vmName , className )
495+ }
496+
497+ if probes == nil {
498+ <- ctx .Done () // keep connection open for metrics until VM stops
499+ return
500+ }
501+
489502 runner := probe .NewRunner (client , probes , func (conds []metav1.Condition ) {
490503 if d .Client == nil {
491504 logf .Log .Error (nil , "probe patcher: client is nil, skipping condition patch" , "vm" , vmNamespace + "/" + vmName )
@@ -508,6 +521,28 @@ func (d *FirecrackerDriver) runProbes(ctx context.Context, probes *impdevv1alpha
508521 runner .Run (ctx )
509522}
510523
524+ // pollMetrics calls the guest Metrics RPC on every metricsInterval tick and
525+ // forwards results to the Metrics collector. Errors are logged at V(1) and
526+ // skipped — the guest may be unavailable during startup or shutdown.
527+ // Runs until ctx is cancelled.
528+ func (d * FirecrackerDriver ) pollMetrics (ctx context.Context , client pb.GuestAgentClient , vmKey , className string ) {
529+ ticker := time .NewTicker (metricsInterval )
530+ defer ticker .Stop ()
531+ for {
532+ select {
533+ case <- ctx .Done ():
534+ return
535+ case <- ticker .C :
536+ resp , err := client .Metrics (ctx , & pb.MetricsRequest {})
537+ if err != nil {
538+ logf .Log .V (1 ).Info ("pollMetrics: guest agent unavailable" , "vm" , vmKey , "err" , err )
539+ continue
540+ }
541+ d .Metrics .SetGuestMetrics (vmKey , d .NodeName , className , resp .CpuUsageRatio , resp .MemoryUsedBytes , resp .DiskUsedBytes )
542+ }
543+ }
544+ }
545+
511546// Snapshot pauses the VM, writes state+memory files to destDir, then resumes it.
512547// The VM is always resumed before returning, even on error (enforced via defer).
513548// destDir must exist; files are named vm.state and vm.mem.
0 commit comments