zhuqi-lucas commented on code in PR #890:
URL: https://github.com/apache/yunikorn-k8shim/pull/890#discussion_r1717840806


##########
pkg/cache/context.go:
##########
@@ -754,20 +755,73 @@ func (ctx *Context) bindPodVolumes(pod *v1.Pod) error {
                                // convert nil to empty array
                                volumes.DynamicProvisions = 
make([]*v1.PersistentVolumeClaim, 0)
                        }
-                       err = 
ctx.apiProvider.GetAPIs().VolumeBinder.BindPodVolumes(context.Background(), 
assumedPod, volumes)
-                       if err != nil {
-                               log.Log(log.ShimContext).Error("Failed to bind 
pod volumes",
-                                       zap.String("podName", assumedPod.Name),
-                                       zap.String("nodeName", 
assumedPod.Spec.NodeName),
-                                       zap.Int("dynamicProvisions", 
len(volumes.DynamicProvisions)),
-                                       zap.Int("staticBindings", 
len(volumes.StaticBindings)))
-                               return err
-                       }
+                       // Here we set the max retry count to 5, and use the 
default retry strategy
+                       // Details:
+                       // max sleep time is 1s, 2s, 4s, 8s and the last one 
will not sleep
+                       return ctx.bindPodVolumesWithRetry(assumedPod, volumes, 
5, &DefaultRetryStrategy{})
                }
        }
        return nil
 }
 
+type RetryStrategy interface {
+       // Sleep function used for retry delays
+       Sleep(duration time.Duration)
+}
+
+// DefaultRetryStrategy is a simple retry strategy that sleeps for a fixed 
duration
+// We can extend this to support more advanced retry strategies in the future 
and also for testing purposes
+type DefaultRetryStrategy struct{}
+
+func (r *DefaultRetryStrategy) Sleep(duration time.Duration) {
+       time.Sleep(duration)
+}
+
+func (ctx *Context) bindPodVolumesWithRetry(
+       assumedPod *v1.Pod,
+       volumes *volumebinding.PodVolumes,
+       maxRetries int,
+       retryStrategy RetryStrategy,
+) error {
+       const baseDelay = time.Second
+       const maxDelay = 8 * time.Second
+
+       var err error
+       for i := 0; i < maxRetries; i++ {
+               err = 
ctx.apiProvider.GetAPIs().VolumeBinder.BindPodVolumes(context.Background(), 
assumedPod, volumes)
+               if err == nil {
+                       return nil
+               }
+
+               log.Log(log.ShimContext).Error("Failed to bind pod volumes",
+                       zap.String("podName", assumedPod.Name),
+                       zap.String("nodeName", assumedPod.Spec.NodeName),
+                       zap.Int("dynamicProvisions", 
len(volumes.DynamicProvisions)),
+                       zap.Int("staticBindings", len(volumes.StaticBindings)),
+                       zap.Int("retryCount", i+1),
+                       zap.Error(err))
+
+               if i == maxRetries-1 {
+                       log.Log(log.ShimContext).Error("Failed to bind pod 
volumes after retry",
+                               zap.String("podName", assumedPod.Name),
+                               zap.String("nodeName", 
assumedPod.Spec.NodeName),
+                               zap.Int("dynamicProvisions", 
len(volumes.DynamicProvisions)),
+                               zap.Int("staticBindings", 
len(volumes.StaticBindings)),
+                               zap.Error(err))
+                       return err
+               }
+
+               delay := baseDelay * time.Duration(1<<uint(i))
+               if delay > maxDelay {
+                       delay = maxDelay
+               }
+
+               retryStrategy.Sleep(delay) // Use the retry strategy
+       }

Review Comment:
   I think exponential retry is enough, because each retry with internal 
timeout for k8s itself.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to