This is an automated email from the ASF dual-hosted git repository.

dahn pushed a commit to branch 4.11
in repository https://gitbox.apache.org/repos/asf/cloudstack.git


The following commit(s) were added to refs/heads/4.11 by this push:
     new 023dcec  CLOUDSTACK-10310 Fix KVM reboot on storage issue (#2722)
023dcec is described below

commit 023dcec5ef2e38091c0aacda1e0fae67fd6c4553
Author: Slair1 <[email protected]>
AuthorDate: Mon Aug 20 03:28:03 2018 -0500

    CLOUDSTACK-10310 Fix KVM reboot on storage issue (#2722)
---
 .../src/com/cloud/hypervisor/kvm/resource/KVMHABase.java   |  3 ++-
 .../com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java    | 14 +++++++++++---
 scripts/vm/hypervisor/kvm/kvmheartbeat.sh                  |  4 ++--
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git 
a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHABase.java 
b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHABase.java
index be5ab39..f180848 100644
--- 
a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHABase.java
+++ 
b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHABase.java
@@ -34,7 +34,8 @@ public class KVMHABase {
     protected static String s_heartBeatPath;
     protected long _heartBeatUpdateTimeout = 60000;
     protected long _heartBeatUpdateFreq = 60000;
-    protected long _heartBeatUpdateMaxRetry = 3;
+    protected long _heartBeatUpdateMaxTries = 5;
+    protected long _heartBeatUpdateRetrySleep = 15000;
 
     public static enum PoolType {
         PrimaryStorage, SecondaryStorage
diff --git 
a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java
 
b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java
index 0cebb4c..8a11b7f 100644
--- 
a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java
+++ 
b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java
@@ -119,7 +119,8 @@ public class KVMHAMonitor extends KVMHABase implements 
Runnable {
                     }
 
                     String result = null;
-                    for (int i = 0; i < 5; i++) {
+                    // Try multiple times, but sleep in between tries to 
ensure it isn't a short lived transient error
+                    for (int i = 1; i <= _heartBeatUpdateMaxTries; i++) {
                         Script cmd = new Script(s_heartBeatPath, 
_heartBeatUpdateTimeout, s_logger);
                         cmd.add("-i", primaryStoragePool._poolIp);
                         cmd.add("-p", primaryStoragePool._poolMountSourcePath);
@@ -127,14 +128,21 @@ public class KVMHAMonitor extends KVMHABase implements 
Runnable {
                         cmd.add("-h", _hostIP);
                         result = cmd.execute();
                         if (result != null) {
-                            s_logger.warn("write heartbeat failed: " + result 
+ ", retry: " + i);
+                            s_logger.warn("write heartbeat failed: " + result 
+ ", try: " + i + " of " + _heartBeatUpdateMaxTries);
+                            try {
+                                Thread.sleep(_heartBeatUpdateRetrySleep);
+                            } catch (InterruptedException e) {
+                                s_logger.debug("[ignored] interupted between 
heartbeat retries.");
+                            }
                         } else {
                             break;
                         }
                     }
 
                     if (result != null) {
-                        s_logger.warn("write heartbeat failed: " + result + "; 
reboot the host");
+                        // Stop cloudstack-agent if can't write to heartbeat 
file.
+                        // This will raise an alert on the mgmt server
+                        s_logger.warn("write heartbeat failed: " + result + "; 
stopping cloudstack-agent");
                         Script cmd = new Script(s_heartBeatPath, 
_heartBeatUpdateTimeout, s_logger);
                         cmd.add("-i", primaryStoragePool._poolIp);
                         cmd.add("-p", primaryStoragePool._poolMountSourcePath);
diff --git a/scripts/vm/hypervisor/kvm/kvmheartbeat.sh 
b/scripts/vm/hypervisor/kvm/kvmheartbeat.sh
index 7c8ee67..30ca72a 100755
--- a/scripts/vm/hypervisor/kvm/kvmheartbeat.sh
+++ b/scripts/vm/hypervisor/kvm/kvmheartbeat.sh
@@ -155,10 +155,10 @@ then
   exit 0
 elif [ "$cflag" == "1" ]
 then
-  /usr/bin/logger -t heartbeat "kvmheartbeat.sh rebooted system because it was 
unable to write the heartbeat to the storage."
+  /usr/bin/logger -t heartbeat "kvmheartbeat.sh stopped cloudstack-agent 
because it was unable to write the heartbeat to the storage."
   sync &
   sleep 5
-  echo b > /proc/sysrq-trigger
+  service cloudstack-agent stop
   exit $?
 else
   write_hbLog 

Reply via email to