weizhouapache commented on code in PR #8402:
URL: https://github.com/apache/cloudstack/pull/8402#discussion_r1466010679


##########
plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/actionworkers/KubernetesClusterUpgradeWorker.java:
##########
@@ -77,39 +77,62 @@ private Pair<Boolean, String> runInstallScriptOnVM(final 
UserVm vm, final int in
     }
 
     private void upgradeKubernetesClusterNodes() {
-        Pair<Boolean, String> result = null;
         for (int i = 0; i < clusterVMs.size(); ++i) {
             UserVm vm = clusterVMs.get(i);
             String hostName = vm.getHostName();
             if (StringUtils.isNotEmpty(hostName)) {
                 hostName = hostName.toLowerCase();
             }
-            result = null;
+            Pair<Boolean, String> result;
             if (LOGGER.isInfoEnabled()) {
                 LOGGER.info(String.format("Upgrading node on VM %s in 
Kubernetes cluster %s with Kubernetes version(%s) ID: %s",
                         vm.getDisplayName(), kubernetesCluster.getName(), 
upgradeVersion.getSemanticVersion(), upgradeVersion.getUuid()));
             }
-            try {
-                result = SshHelper.sshExecute(publicIpAddress, sshPort, 
getControlNodeLoginUser(), sshKeyFile, null,
-                        String.format("sudo /opt/bin/kubectl drain %s 
--ignore-daemonsets --delete-emptydir-data", hostName),
-                        10000, 10000, 60000);
-            } catch (Exception e) {
-                logTransitStateDetachIsoAndThrow(Level.ERROR, 
String.format("Failed to upgrade Kubernetes cluster : %s, unable to drain 
Kubernetes node on VM : %s", kubernetesCluster.getName(), vm.getDisplayName()), 
kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, e);
-            }
-            if (!result.first()) {
-                logTransitStateDetachIsoAndThrow(Level.ERROR, 
String.format("Failed to upgrade Kubernetes cluster : %s, unable to drain 
Kubernetes node on VM : %s", kubernetesCluster.getName(), vm.getDisplayName()), 
kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, null);
+            for (int retry = 
KubernetesClusterService.KubernetesClusterUpgradeRetries.value(); retry >= 0; 
retry--) {
+                String errorMessage = String.format("Failed to upgrade 
Kubernetes cluster : %s, unable to drain Kubernetes node on VM : %s", 
kubernetesCluster.getName(), vm.getDisplayName());
+                try {
+                    result = SshHelper.sshExecute(publicIpAddress, sshPort, 
getControlNodeLoginUser(), sshKeyFile, null,
+                            String.format("sudo /opt/bin/kubectl drain %s 
--ignore-daemonsets --delete-emptydir-data", hostName),
+                            10000, 10000, 60000);
+                    if (result.first()) {
+                        break;
+                    }
+                    if (retry > 0) {
+                        LOGGER.error(String.format("%s, retries left: %s", 
errorMessage, retry));
+                    } else {
+                        logTransitStateDetachIsoAndThrow(Level.ERROR, 
errorMessage, kubernetesCluster, clusterVMs, 
KubernetesCluster.Event.OperationFailed, null);
+                    }
+                } catch (Exception e) {
+                    if (retry > 0) {
+                        LOGGER.error(String.format("%s due to %s, retries 
left: %s", errorMessage, e, retry));
+                    } else {
+                        logTransitStateDetachIsoAndThrow(Level.ERROR, 
errorMessage, kubernetesCluster, clusterVMs, 
KubernetesCluster.Event.OperationFailed, e);
+                    }
+                }
             }
             if (System.currentTimeMillis() > upgradeTimeoutTime) {
                 logTransitStateDetachIsoAndThrow(Level.ERROR, 
String.format("Failed to upgrade Kubernetes cluster : %s, upgrade action timed 
out", kubernetesCluster.getName()), kubernetesCluster, clusterVMs, 
KubernetesCluster.Event.OperationFailed, null);
             }
-            try {
-                deployProvider();
-                result = runInstallScriptOnVM(vm, i);
-            } catch (Exception e) {
-                logTransitStateDetachIsoAndThrow(Level.ERROR, 
String.format("Failed to upgrade Kubernetes cluster : %s, unable to upgrade 
Kubernetes node on VM : %s", kubernetesCluster.getName(), vm.getDisplayName()), 
kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, e);
-            }
-            if (!result.first()) {
-                logTransitStateDetachIsoAndThrow(Level.ERROR, 
String.format("Failed to upgrade Kubernetes cluster : %s, unable to upgrade 
Kubernetes node on VM : %s", kubernetesCluster.getName(), vm.getDisplayName()), 
kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, null);
+            for (int retry = 
KubernetesClusterService.KubernetesClusterUpgradeRetries.value(); retry >= 0; 
retry--) {
+                String errorMessage = String.format("Failed to upgrade 
Kubernetes cluster : %s, unable to upgrade Kubernetes node on VM : %s", 
kubernetesCluster.getName(), vm.getDisplayName());

Review Comment:
   thanks @JoaoJandre 
   applied



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to