Jiří Moskovčák has uploaded a new change for review. Change subject: don't die when broker disconnects ......................................................................
don't die when broker disconnects Change-Id: Ibd8627346c03894c1654af5d41c3caaf9f3a5ffa Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1093646 Signed-off-by: Jiri Moskovcak <[email protected]> --- M ovirt_hosted_engine_ha/agent/agent.py M ovirt_hosted_engine_ha/agent/constants.py.in M ovirt_hosted_engine_ha/agent/hosted_engine.py M ovirt_hosted_engine_ha/lib/brokerlink.py 4 files changed, 31 insertions(+), 4 deletions(-) git pull ssh://gerrit.ovirt.org:29418/ovirt-hosted-engine-ha refs/changes/49/33849/1 diff --git a/ovirt_hosted_engine_ha/agent/agent.py b/ovirt_hosted_engine_ha/agent/agent.py index 0693814..965fd89 100644 --- a/ovirt_hosted_engine_ha/agent/agent.py +++ b/ovirt_hosted_engine_ha/agent/agent.py @@ -29,8 +29,10 @@ import pwd import signal import sys +import time from ..lib import util +from ..lib import exceptions as ex from . import constants from . import hosted_engine @@ -151,4 +153,25 @@ def _run_agent(self): # Only one service type for now, run it in the main thread - hosted_engine.HostedEngine(self.shutdown_requested).start_monitoring() + + for attempt in range(constants.AGENT_START_RETRIES): + try: + hosted_engine.HostedEngine(self.shutdown_requested)\ + .start_monitoring() + # if we're here, the agent stopped gracefully, + # so we don't want to restart it + break + except ex.DisconnectionError as e: + self._log.error("Disconnected from broker '{0}'" + " - reinitializing".format(str(e))) + except ex.BrokerInitializationError as e: + self._log.error("Can't initialize brokerlink '{0}'" + " - reinitializing".format(str(e))) + except Exception as e: + self._log.error("") + + time.sleep(constants.AGENT_START_RETRY_WAIT) + self._log.warn("Restarting agent, attempt '{0}'".format(attempt)) + else: + self._log.error("Too many errors occurred, giving up. " + "Please review the log and consider filing a bug.") diff --git a/ovirt_hosted_engine_ha/agent/constants.py.in b/ovirt_hosted_engine_ha/agent/constants.py.in index 14da964..39ad0c3 100644 --- a/ovirt_hosted_engine_ha/agent/constants.py.in +++ b/ovirt_hosted_engine_ha/agent/constants.py.in @@ -53,6 +53,8 @@ MAX_DOMAIN_MONITOR_WAIT_SECS = 240 METADATA_LOG_PERIOD_SECS = 600 ENGINE_STARTING_TIMEOUT = 600 +AGENT_START_RETRIES = 10 +AGENT_START_RETRY_WAIT = 5 BASE_SCORE = 2400 GATEWAY_SCORE_PENALTY = 1600 diff --git a/ovirt_hosted_engine_ha/agent/hosted_engine.py b/ovirt_hosted_engine_ha/agent/hosted_engine.py index 5b79950..67488bc 100644 --- a/ovirt_hosted_engine_ha/agent/hosted_engine.py +++ b/ovirt_hosted_engine_ha/agent/hosted_engine.py @@ -368,7 +368,8 @@ if not self._broker: self._broker = brokerlink.BrokerLink() try: - self._broker.connect(constants.BROKER_CONNECTION_RETRIES) + self._broker.connect(constants.BROKER_CONNECTION_RETRIES, + constants.BROKER_CONNECTION_WAIT) except Exception as e: self._log.error("Failed to connect to ha-broker: %s", str(e)) raise diff --git a/ovirt_hosted_engine_ha/lib/brokerlink.py b/ovirt_hosted_engine_ha/lib/brokerlink.py index 5394493..58b0baa 100644 --- a/ovirt_hosted_engine_ha/lib/brokerlink.py +++ b/ovirt_hosted_engine_ha/lib/brokerlink.py @@ -38,7 +38,7 @@ self._log = logging.getLogger("%s.BrokerLink" % __name__) self._socket = None - def connect(self, retries=0): + def connect(self, retries=5, wait=5): """ Connect to the HA Broker. Upon failure, reconnection attempts will be made approximately once per second until the specified number of @@ -61,13 +61,14 @@ attempt = 0 while True: + attempt -= 1 try: self._socket.connect(constants.BROKER_SOCKET_FILE) except (socket.error, socket.timeout) as e: if attempt < retries: self._log.info("Failed to connect to broker: %s", str(e)) self._log.info("Retrying broker connection...") - time.sleep(1) + time.sleep(wait) continue else: self._log.error("Failed to connect to broker: %s", str(e)) -- To view, visit http://gerrit.ovirt.org/33849 To unsubscribe, visit http://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ibd8627346c03894c1654af5d41c3caaf9f3a5ffa Gerrit-PatchSet: 1 Gerrit-Project: ovirt-hosted-engine-ha Gerrit-Branch: ovirt-hosted-engine-ha-1.1 Gerrit-Owner: Jiří Moskovčák <[email protected]> _______________________________________________ Engine-patches mailing list [email protected] http://lists.ovirt.org/mailman/listinfo/engine-patches
