DaanHoogland closed pull request #2722: CLOUDSTACK-10310 Fix KVM reboot on 
storage issue
URL: https://github.com/apache/cloudstack/pull/2722
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git 
a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHABase.java 
b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHABase.java
index be5ab396d19..f180848a8d5 100644
--- 
a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHABase.java
+++ 
b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHABase.java
@@ -34,7 +34,8 @@
     protected static String s_heartBeatPath;
     protected long _heartBeatUpdateTimeout = 60000;
     protected long _heartBeatUpdateFreq = 60000;
-    protected long _heartBeatUpdateMaxRetry = 3;
+    protected long _heartBeatUpdateMaxTries = 5;
+    protected long _heartBeatUpdateRetrySleep = 15000;
 
     public static enum PoolType {
         PrimaryStorage, SecondaryStorage
diff --git 
a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java
 
b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java
index 0cebb4c9b00..8a11b7fc962 100644
--- 
a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java
+++ 
b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java
@@ -119,7 +119,8 @@ protected void runInContext() {
                     }
 
                     String result = null;
-                    for (int i = 0; i < 5; i++) {
+                    // Try multiple times, but sleep in between tries to 
ensure it isn't a short lived transient error
+                    for (int i = 1; i <= _heartBeatUpdateMaxTries; i++) {
                         Script cmd = new Script(s_heartBeatPath, 
_heartBeatUpdateTimeout, s_logger);
                         cmd.add("-i", primaryStoragePool._poolIp);
                         cmd.add("-p", primaryStoragePool._poolMountSourcePath);
@@ -127,14 +128,21 @@ protected void runInContext() {
                         cmd.add("-h", _hostIP);
                         result = cmd.execute();
                         if (result != null) {
-                            s_logger.warn("write heartbeat failed: " + result 
+ ", retry: " + i);
+                            s_logger.warn("write heartbeat failed: " + result 
+ ", try: " + i + " of " + _heartBeatUpdateMaxTries);
+                            try {
+                                Thread.sleep(_heartBeatUpdateRetrySleep);
+                            } catch (InterruptedException e) {
+                                s_logger.debug("[ignored] interupted between 
heartbeat retries.");
+                            }
                         } else {
                             break;
                         }
                     }
 
                     if (result != null) {
-                        s_logger.warn("write heartbeat failed: " + result + "; 
reboot the host");
+                        // Stop cloudstack-agent if can't write to heartbeat 
file.
+                        // This will raise an alert on the mgmt server
+                        s_logger.warn("write heartbeat failed: " + result + "; 
stopping cloudstack-agent");
                         Script cmd = new Script(s_heartBeatPath, 
_heartBeatUpdateTimeout, s_logger);
                         cmd.add("-i", primaryStoragePool._poolIp);
                         cmd.add("-p", primaryStoragePool._poolMountSourcePath);
diff --git a/scripts/vm/hypervisor/kvm/kvmheartbeat.sh 
b/scripts/vm/hypervisor/kvm/kvmheartbeat.sh
index 7c8ee67f30c..30ca72a2aa9 100755
--- a/scripts/vm/hypervisor/kvm/kvmheartbeat.sh
+++ b/scripts/vm/hypervisor/kvm/kvmheartbeat.sh
@@ -155,10 +155,10 @@ then
   exit 0
 elif [ "$cflag" == "1" ]
 then
-  /usr/bin/logger -t heartbeat "kvmheartbeat.sh rebooted system because it was 
unable to write the heartbeat to the storage."
+  /usr/bin/logger -t heartbeat "kvmheartbeat.sh stopped cloudstack-agent 
because it was unable to write the heartbeat to the storage."
   sync &
   sleep 5
-  echo b > /proc/sysrq-trigger
+  service cloudstack-agent stop
   exit $?
 else
   write_hbLog 


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to