Jiří Moskovčák has uploaded a new change for review.

Change subject: don't die when broker disconnects
......................................................................

don't die when broker disconnects

Change-Id: Ibd8627346c03894c1654af5d41c3caaf9f3a5ffa
Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1093646
Signed-off-by: Jiri Moskovcak <[email protected]>
---
M ovirt_hosted_engine_ha/agent/agent.py
M ovirt_hosted_engine_ha/agent/constants.py.in
2 files changed, 27 insertions(+), 1 deletion(-)


  git pull ssh://gerrit.ovirt.org:29418/ovirt-hosted-engine-ha 
refs/changes/41/30641/1

diff --git a/ovirt_hosted_engine_ha/agent/agent.py 
b/ovirt_hosted_engine_ha/agent/agent.py
index 0693814..142fe0f 100644
--- a/ovirt_hosted_engine_ha/agent/agent.py
+++ b/ovirt_hosted_engine_ha/agent/agent.py
@@ -29,8 +29,10 @@
 import pwd
 import signal
 import sys
+import time
 
 from ..lib import util
+from ..lib import exceptions as ex
 from . import constants
 from . import hosted_engine
 
@@ -151,4 +153,26 @@
 
     def _run_agent(self):
         # Only one service type for now, run it in the main thread
-        hosted_engine.HostedEngine(self.shutdown_requested).start_monitoring()
+
+        for attempt in range(constants.AGENT_START_RETRIES):
+            try:
+                hosted_engine.HostedEngine(self.shutdown_requested)\
+                    .start_monitoring()
+                # if we're here, the agent stopped gracefully,
+                # so we don't want to restart it
+                break
+            except ex.DisconnectionError as e:
+                self._log.error("Disconnected from broker '{0}'"
+                                " - reinitializing".format(str(e)))
+            except ex.BrokerInitializationError as e:
+                self._log.error("Can't initialize brokerlink '{0}'"
+                                " - reinitializing".format(str(e)))
+            except Exception as e:
+                self._log.error("")
+
+            attempt += 1
+            time.sleep(constants.AGENT_START_RETRY_WAIT)
+            self._log.warn("Restarting agent, attempt '{0}'".format(attempt))
+        else:
+            self._log.error("Too many errors occurred, giving up. "
+                            "Please review the log and consider filing a bug.")
diff --git a/ovirt_hosted_engine_ha/agent/constants.py.in 
b/ovirt_hosted_engine_ha/agent/constants.py.in
index 083e046..20902f2 100644
--- a/ovirt_hosted_engine_ha/agent/constants.py.in
+++ b/ovirt_hosted_engine_ha/agent/constants.py.in
@@ -56,6 +56,8 @@
 MAX_DOMAIN_MONITOR_WAIT_SECS = 240
 METADATA_LOG_PERIOD_SECS = 600
 ENGINE_STARTING_TIMEOUT = 600
+AGENT_START_RETRIES = 10
+AGENT_START_RETRY_WAIT = 5
 
 BASE_SCORE = 2400
 GATEWAY_SCORE_PENALTY = 1600


-- 
To view, visit http://gerrit.ovirt.org/30641
To unsubscribe, visit http://gerrit.ovirt.org/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ibd8627346c03894c1654af5d41c3caaf9f3a5ffa
Gerrit-PatchSet: 1
Gerrit-Project: ovirt-hosted-engine-ha
Gerrit-Branch: ovirt-hosted-engine-ha-1.2
Gerrit-Owner: Jiří Moskovčák <[email protected]>
_______________________________________________
Engine-patches mailing list
[email protected]
http://lists.ovirt.org/mailman/listinfo/engine-patches

Reply via email to