Jiří Moskovčák has uploaded a new change for review.

Change subject: don't die when broker disconnects
......................................................................

don't die when broker disconnects

Change-Id: Ibd8627346c03894c1654af5d41c3caaf9f3a5ffa
Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1093646
Signed-off-by: Jiri Moskovcak <[email protected]>
---
M ovirt_hosted_engine_ha/agent/agent.py
M ovirt_hosted_engine_ha/agent/constants.py.in
M ovirt_hosted_engine_ha/agent/hosted_engine.py
M ovirt_hosted_engine_ha/lib/brokerlink.py
4 files changed, 31 insertions(+), 4 deletions(-)


  git pull ssh://gerrit.ovirt.org:29418/ovirt-hosted-engine-ha 
refs/changes/49/33849/1

diff --git a/ovirt_hosted_engine_ha/agent/agent.py 
b/ovirt_hosted_engine_ha/agent/agent.py
index 0693814..965fd89 100644
--- a/ovirt_hosted_engine_ha/agent/agent.py
+++ b/ovirt_hosted_engine_ha/agent/agent.py
@@ -29,8 +29,10 @@
 import pwd
 import signal
 import sys
+import time
 
 from ..lib import util
+from ..lib import exceptions as ex
 from . import constants
 from . import hosted_engine
 
@@ -151,4 +153,25 @@
 
     def _run_agent(self):
         # Only one service type for now, run it in the main thread
-        hosted_engine.HostedEngine(self.shutdown_requested).start_monitoring()
+
+        for attempt in range(constants.AGENT_START_RETRIES):
+            try:
+                hosted_engine.HostedEngine(self.shutdown_requested)\
+                    .start_monitoring()
+                # if we're here, the agent stopped gracefully,
+                # so we don't want to restart it
+                break
+            except ex.DisconnectionError as e:
+                self._log.error("Disconnected from broker '{0}'"
+                                " - reinitializing".format(str(e)))
+            except ex.BrokerInitializationError as e:
+                self._log.error("Can't initialize brokerlink '{0}'"
+                                " - reinitializing".format(str(e)))
+            except Exception as e:
+                self._log.error("")
+
+            time.sleep(constants.AGENT_START_RETRY_WAIT)
+            self._log.warn("Restarting agent, attempt '{0}'".format(attempt))
+        else:
+            self._log.error("Too many errors occurred, giving up. "
+                            "Please review the log and consider filing a bug.")
diff --git a/ovirt_hosted_engine_ha/agent/constants.py.in 
b/ovirt_hosted_engine_ha/agent/constants.py.in
index 14da964..39ad0c3 100644
--- a/ovirt_hosted_engine_ha/agent/constants.py.in
+++ b/ovirt_hosted_engine_ha/agent/constants.py.in
@@ -53,6 +53,8 @@
 MAX_DOMAIN_MONITOR_WAIT_SECS = 240
 METADATA_LOG_PERIOD_SECS = 600
 ENGINE_STARTING_TIMEOUT = 600
+AGENT_START_RETRIES = 10
+AGENT_START_RETRY_WAIT = 5
 
 BASE_SCORE = 2400
 GATEWAY_SCORE_PENALTY = 1600
diff --git a/ovirt_hosted_engine_ha/agent/hosted_engine.py 
b/ovirt_hosted_engine_ha/agent/hosted_engine.py
index 5b79950..67488bc 100644
--- a/ovirt_hosted_engine_ha/agent/hosted_engine.py
+++ b/ovirt_hosted_engine_ha/agent/hosted_engine.py
@@ -368,7 +368,8 @@
         if not self._broker:
             self._broker = brokerlink.BrokerLink()
         try:
-            self._broker.connect(constants.BROKER_CONNECTION_RETRIES)
+            self._broker.connect(constants.BROKER_CONNECTION_RETRIES,
+                                 constants.BROKER_CONNECTION_WAIT)
         except Exception as e:
             self._log.error("Failed to connect to ha-broker: %s", str(e))
             raise
diff --git a/ovirt_hosted_engine_ha/lib/brokerlink.py 
b/ovirt_hosted_engine_ha/lib/brokerlink.py
index 5394493..58b0baa 100644
--- a/ovirt_hosted_engine_ha/lib/brokerlink.py
+++ b/ovirt_hosted_engine_ha/lib/brokerlink.py
@@ -38,7 +38,7 @@
         self._log = logging.getLogger("%s.BrokerLink" % __name__)
         self._socket = None
 
-    def connect(self, retries=0):
+    def connect(self, retries=5, wait=5):
         """
         Connect to the HA Broker.  Upon failure, reconnection attempts will
         be made approximately once per second until the specified number of
@@ -61,13 +61,14 @@
 
         attempt = 0
         while True:
+            attempt -= 1
             try:
                 self._socket.connect(constants.BROKER_SOCKET_FILE)
             except (socket.error, socket.timeout) as e:
                 if attempt < retries:
                     self._log.info("Failed to connect to broker: %s", str(e))
                     self._log.info("Retrying broker connection...")
-                    time.sleep(1)
+                    time.sleep(wait)
                     continue
                 else:
                     self._log.error("Failed to connect to broker: %s", str(e))


-- 
To view, visit http://gerrit.ovirt.org/33849
To unsubscribe, visit http://gerrit.ovirt.org/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ibd8627346c03894c1654af5d41c3caaf9f3a5ffa
Gerrit-PatchSet: 1
Gerrit-Project: ovirt-hosted-engine-ha
Gerrit-Branch: ovirt-hosted-engine-ha-1.1
Gerrit-Owner: Jiří Moskovčák <[email protected]>
_______________________________________________
Engine-patches mailing list
[email protected]
http://lists.ovirt.org/mailman/listinfo/engine-patches

Reply via email to