@@ -199,15 +199,46 @@ func (cs *controllerServer) ControllerPublishVolume(ctx context.Context, req *cs
199199 return nil , status .Errorf (codes .NotFound , "ControllerPublishVolume: GetVolume returned nil volume" )
200200 }
201201 klog .Infof ("ControllerPublishVolume: GetVolume succeeded -\n Status: %s\n Name: %s\n ID: %d\n Size:%d" , * getVolume .Status , * getVolume .Name , * getVolume .Id , * getVolume .Size )
202- if * getVolume .Status == "in-use" { //Volume is already attached
202+ if * getVolume .Status == "in-use" {
203203 klog .Infof ("ControllerPublishVolume: Volume %s is already in use" , * getVolume .Name )
204204 if len (* getVolume .Attachments ) > 0 {
205- klog .Infof ("ControllerPublishVolume: Volume %s is already attached to node %s" , * getVolume .Name , * (* getVolume .Attachments )[0 ].InstanceId )
206- return & csi.ControllerPublishVolumeResponse {
207- PublishContext : map [string ]string {
208- volNameKeyFromControllerPublishVolume : * (* getVolume .Attachments )[0 ].Device ,
209- },
210- }, nil
205+ attachedInstanceIdStr := * (* getVolume .Attachments )[0 ].InstanceId
206+ attachedInstanceId , err := strconv .Atoi (attachedInstanceIdStr )
207+ if err != nil {
208+ klog .Errorf ("ControllerPublishVolume: Failed to convert attached instance ID to int: %v" , err )
209+ return nil , status .Errorf (codes .Internal , "Failed to convert attached instance ID to int: %v" , err )
210+ }
211+ klog .Infof ("ControllerPublishVolume: Volume %s is already attached to node %d" , * getVolume .Name , attachedInstanceId )
212+ if attachedInstanceId == vmId {
213+ return & csi.ControllerPublishVolumeResponse {
214+ PublishContext : map [string ]string {
215+ volNameKeyFromControllerPublishVolume : * (* getVolume .Attachments )[0 ].Device ,
216+ },
217+ }, nil
218+ }
219+ klog .Infof ("ControllerPublishVolume: Volume attached to wrong node (attached: %d, requested: %d), detaching" , attachedInstanceId , vmId )
220+ _ , err = cloud .DetachVolumeFromNode (ctx , attachedInstanceId , volumeIDInt )
221+ if err != nil {
222+ klog .Errorf ("ControllerPublishVolume: Failed to detach from wrong node: %v" , err )
223+ return nil , status .Errorf (codes .Internal , "ControllerPublishVolume: Failed to detach from wrong node: %v" , err )
224+ }
225+ maxAttempts := 30
226+ for i := 0 ; i < maxAttempts ; i ++ {
227+ v , err := cloud .GetVolume (ctx , volumeIDInt )
228+ if err != nil || v == nil {
229+ time .Sleep (2 * time .Second )
230+ continue
231+ }
232+ if * v .Status == "available" {
233+ klog .Infof ("ControllerPublishVolume: Volume detached from wrong node, now available" )
234+ getVolume = v
235+ break
236+ }
237+ time .Sleep (2 * time .Second )
238+ }
239+ if * getVolume .Status != "available" {
240+ return nil , status .Error (codes .DeadlineExceeded , "ControllerPublishVolume: Timeout waiting for detachment from wrong node" )
241+ }
211242 }
212243 }
213244 if * getVolume .Status == "available" {
@@ -217,8 +248,42 @@ func (cs *controllerServer) ControllerPublishVolume(ctx context.Context, req *cs
217248 return nil , status .Errorf (codes .Internal , "ControllerPublishVolume: Failed to AttachVolumeToNode: %v" , err )
218249 }
219250 klog .Infof ("ControllerPublishVolume: AttachVolumeToNode succeeded -\n ID: %v\n Instance id ID: %v\n Status: %v\n Volume ID: %v" , * attachVolume .Id , * attachVolume .InstanceId , * attachVolume .Status , * attachVolume .VolumeId )
251+
252+ maxAttempts := 30
253+ klog .Infof ("ControllerPublishVolume: Polling for attachment to complete with max attempts: %d" , maxAttempts )
254+ for i := 0 ; i < maxAttempts ; i ++ {
255+ klog .Infof ("ControllerPublishVolume: Polling for attachment %d/%d" , i + 1 , maxAttempts )
256+ v , err := cloud .GetVolume (ctx , volumeIDInt )
257+ if err != nil {
258+ klog .Errorf ("ControllerPublishVolume: Failed to GetVolume while polling: %v" , err )
259+ time .Sleep (2 * time .Second )
260+ continue
261+ }
262+ if v == nil {
263+ klog .Warningf ("ControllerPublishVolume: GetVolume attempt %d returned nil volume" , i + 1 )
264+ time .Sleep (2 * time .Second )
265+ continue
266+ }
267+ if * v .Status == "in-use" && len (* v .Attachments ) > 0 {
268+ attachment := (* v .Attachments )[0 ]
269+ attachedId , err := strconv .Atoi (* attachment .InstanceId )
270+ if err == nil && attachedId == vmId && attachment .Device != nil {
271+ klog .Infof ("ControllerPublishVolume: Volume attached to correct node with device %s" , * attachment .Device )
272+ return & csi.ControllerPublishVolumeResponse {
273+ PublishContext : map [string ]string {
274+ volNameKeyFromControllerPublishVolume : * attachment .Device ,
275+ },
276+ }, nil
277+ }
278+ }
279+ time .Sleep (2 * time .Second )
280+ }
281+ return nil , status .Error (codes .DeadlineExceeded , "ControllerPublishVolume: Timeout waiting for attachment to complete" )
220282 }
221- return & csi.ControllerPublishVolumeResponse {}, nil
283+
284+ klog .Errorf ("ControllerPublishVolume: Volume %s in unexpected state: %s" , * getVolume .Name , * getVolume .Status )
285+ return nil , status .Errorf (codes .FailedPrecondition ,
286+ "Volume in unexpected state: %s (expected 'available' or 'in-use')" , * getVolume .Status )
222287}
223288
224289func (cs * controllerServer ) ControllerUnpublishVolume (ctx context.Context , req * csi.ControllerUnpublishVolumeRequest ) (* csi.ControllerUnpublishVolumeResponse , error ) {
@@ -254,6 +319,29 @@ func (cs *controllerServer) ControllerUnpublishVolume(ctx context.Context, req *
254319 return nil , status .Errorf (codes .Internal , "ControllerUnpublishVolume: Failed to DetachVolumeFromNode: %v" , err )
255320 }
256321 klog .Infof ("ControllerUnpublishVolume: DetachVolumeFromNode succeeded -\n Message: %v\n Status: %v\n Volume Attachments: %v" , * detachVolume .Message , * detachVolume .Status , * detachVolume .VolumeAttachments )
322+
323+ maxAttempts := 30
324+ klog .Infof ("ControllerUnpublishVolume: Polling for volume to become available with max attempts: %d" , maxAttempts )
325+ for i := 0 ; i < maxAttempts ; i ++ {
326+ klog .Infof ("ControllerUnpublishVolume: Polling for volume to become available %d/%d" , i + 1 , maxAttempts )
327+ v , err := cloud .GetVolume (ctx , volumeIDInt )
328+ if err != nil {
329+ klog .Errorf ("ControllerUnpublishVolume: Failed to GetVolume while polling: %v" , err )
330+ time .Sleep (2 * time .Second )
331+ continue
332+ }
333+ if v == nil {
334+ klog .Warningf ("ControllerUnpublishVolume: GetVolume attempt %d returned nil volume" , i + 1 )
335+ time .Sleep (2 * time .Second )
336+ continue
337+ }
338+ if * v .Status == "available" {
339+ klog .Infof ("ControllerUnpublishVolume: Volume is now available after detachment" )
340+ return & csi.ControllerUnpublishVolumeResponse {}, nil
341+ }
342+ time .Sleep (2 * time .Second )
343+ }
344+ return nil , status .Error (codes .DeadlineExceeded , "ControllerUnpublishVolume: Timeout waiting for volume to detach" )
257345 }
258346 return & csi.ControllerUnpublishVolumeResponse {}, nil
259347}
0 commit comments