66 "context"
77 "errors"
88 "net/http"
9+ "strconv"
910 "time"
1011
1112 "go.opentelemetry.io/otel"
@@ -169,12 +170,23 @@ func (r *ImpVMReconciler) handleScheduled(ctx context.Context, vm *impdevv1alpha
169170 // Register VTEP entry so the operator and other nodes know where this VM lives.
170171 if vm .Spec .NetworkRef != nil && state .IP != "" && r .NodeIP != "" {
171172 macAddr := network .MACAddr (vm .Namespace + "/" + vm .Name )
172- if err := r .registerVTEP (ctx , vm , state .IP , macAddr ); err != nil {
173- log .Error (err , "registerVTEP failed — FDB sync may be incomplete" )
174- } else {
175- // Sync local FDB now that this node has a VTEP entry.
176- if err := r .syncFDB (ctx , vm ); err != nil {
177- log .Error (err , "syncFDB after registerVTEP failed" )
173+ {
174+ vCtx , vSpan := otel .Tracer ("imp.agent" ).Start (ctx , "agent.impvm.vtep_register" ,
175+ trace .WithAttributes (
176+ attribute .String ("vm.name" , vm .Name ),
177+ attribute .String ("vm.ip" , state .IP ),
178+ ),
179+ )
180+ vtepErr := r .registerVTEP (vCtx , vm , state .IP , macAddr )
181+ tracing .RecordError (vSpan , vtepErr )
182+ vSpan .End ()
183+ if vtepErr != nil {
184+ log .Error (vtepErr , "registerVTEP failed — FDB sync may be incomplete" )
185+ } else {
186+ // Sync local FDB now that this node has a VTEP entry.
187+ if err := r .syncFDB (ctx , vm ); err != nil {
188+ log .Error (err , "syncFDB after registerVTEP failed" )
189+ }
178190 }
179191 }
180192 }
@@ -200,8 +212,18 @@ func (r *ImpVMReconciler) handleRunning(ctx context.Context, vm *impdevv1alpha1.
200212 // the Firecracker process is still alive (procs map may be empty after an
201213 // agent pod restart). If PID is alive, reattach and restore allocator state.
202214 if pid := vm .Status .RuntimePID ; pid > 0 && r .Driver .IsAlive (pid ) {
203- if err := r .Driver .Reattach (ctx , vm ); err != nil {
204- log .Error (err , "Reattach failed — treating VM as dead" )
215+ rCtx , rSpan := otel .Tracer ("imp.agent" ).Start (ctx , "agent.impvm.reattach" ,
216+ trace .WithAttributes (
217+ attribute .String ("vm.name" , vm .Name ),
218+ attribute .String ("vm.namespace" , vm .Namespace ),
219+ attribute .String ("vm.pid" , strconv .FormatInt (pid , 10 )),
220+ ),
221+ )
222+ reattachErr := r .Driver .Reattach (rCtx , vm )
223+ if reattachErr != nil {
224+ tracing .RecordError (rSpan , reattachErr )
225+ rSpan .End ()
226+ log .Error (reattachErr , "Reattach failed — treating VM as dead" )
205227 } else {
206228 // Restore in-memory IP allocation so Release works correctly later.
207229 if r .Alloc != nil && vm .Spec .NetworkRef != nil && vm .Status .IP != "" {
@@ -211,12 +233,35 @@ func (r *ImpVMReconciler) handleRunning(ctx context.Context, vm *impdevv1alpha1.
211233 // Re-publish VTEP entry and sync FDB in case they were lost.
212234 if vm .Spec .NetworkRef != nil && vm .Status .IP != "" && r .NodeIP != "" {
213235 macAddr := network .MACAddr (vm .Namespace + "/" + vm .Name )
214- if err := r .registerVTEP (ctx , vm , vm .Status .IP , macAddr ); err != nil {
215- log .Error (err , "registerVTEP after reattach failed" )
216- } else if err := r .syncFDB (ctx , vm ); err != nil {
217- log .Error (err , "syncFDB after reattach failed" )
236+ {
237+ vCtx , vSpan := otel .Tracer ("imp.agent" ).Start (rCtx , "agent.impvm.vtep_register" ,
238+ trace .WithAttributes (
239+ attribute .String ("vm.name" , vm .Name ),
240+ attribute .String ("vm.ip" , vm .Status .IP ),
241+ ),
242+ )
243+ vtepErr := r .registerVTEP (vCtx , vm , vm .Status .IP , macAddr )
244+ tracing .RecordError (vSpan , vtepErr )
245+ vSpan .End ()
246+ if vtepErr != nil {
247+ log .Error (vtepErr , "registerVTEP after reattach failed" )
248+ } else {
249+ fCtx , fSpan := otel .Tracer ("imp.agent" ).Start (rCtx , "agent.impvm.fdb_sync" ,
250+ trace .WithAttributes (
251+ attribute .String ("vm.name" , vm .Name ),
252+ attribute .String ("net.name" , vm .Spec .NetworkRef .Name ),
253+ ),
254+ )
255+ fdbErr := r .syncFDB (fCtx , vm )
256+ tracing .RecordError (fSpan , fdbErr )
257+ fSpan .End ()
258+ if fdbErr != nil {
259+ log .Error (fdbErr , "syncFDB after reattach failed" )
260+ }
261+ }
218262 }
219263 }
264+ rSpan .End ()
220265 log .Info ("VM reattached after agent restart" , "pid" , pid )
221266 return ctrl.Result {}, nil
222267 }
0 commit comments