Repository: ambari Updated Branches: refs/heads/branch-2.5 bc83d0f1d -> c1f49b244
AMBARI-18825. Make agent to server connect delay configurable (magyari_sandor) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/c1f49b24 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/c1f49b24 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/c1f49b24 Branch: refs/heads/branch-2.5 Commit: c1f49b24484abff1128793c8673e08330068815c Parents: bc83d0f Author: Sandor Magyari <[email protected]> Authored: Thu Nov 10 12:28:12 2016 +0100 Committer: Sandor Magyari <[email protected]> Committed: Tue Nov 15 13:50:37 2016 +0100 ---------------------------------------------------------------------- ambari-agent/conf/unix/ambari-agent.ini | 3 +++ ambari-agent/conf/windows/ambari-agent.ini | 2 ++ ambari-agent/src/main/python/ambari_agent/Controller.py | 8 ++++---- ambari-agent/src/main/python/ambari_agent/NetUtil.py | 10 ++++++---- 4 files changed, 15 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/c1f49b24/ambari-agent/conf/unix/ambari-agent.ini ---------------------------------------------------------------------- diff --git a/ambari-agent/conf/unix/ambari-agent.ini b/ambari-agent/conf/unix/ambari-agent.ini index 0f89106..43740ad 100644 --- a/ambari-agent/conf/unix/ambari-agent.ini +++ b/ambari-agent/conf/unix/ambari-agent.ini @@ -16,6 +16,9 @@ hostname=localhost url_port=8440 secured_url_port=8441 +connect_retry_delay=10 +max_reconnect_retry_delay=30 + [agent] logdir=/var/log/ambari-agent http://git-wip-us.apache.org/repos/asf/ambari/blob/c1f49b24/ambari-agent/conf/windows/ambari-agent.ini ---------------------------------------------------------------------- diff --git a/ambari-agent/conf/windows/ambari-agent.ini b/ambari-agent/conf/windows/ambari-agent.ini index 3612414..42d65c1 100644 --- a/ambari-agent/conf/windows/ambari-agent.ini +++ b/ambari-agent/conf/windows/ambari-agent.ini @@ -16,6 +16,8 @@ hostname=localhost url_port=8440 secured_url_port=8441 +connect_retry_delay=10 +max_reconnect_retry_delay=30 [agent] prefix=data http://git-wip-us.apache.org/repos/asf/ambari/blob/c1f49b24/ambari-agent/src/main/python/ambari_agent/Controller.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/Controller.py b/ambari-agent/src/main/python/ambari_agent/Controller.py index 2a4d384..090938a 100644 --- a/ambari-agent/src/main/python/ambari_agent/Controller.py +++ b/ambari-agent/src/main/python/ambari_agent/Controller.py @@ -58,7 +58,7 @@ AGENT_RAM_OVERUSE_MESSAGE = "Ambari-agent RAM usage {used_ram} MB went above {co class Controller(threading.Thread): - def __init__(self, config, server_hostname, heartbeat_stop_callback = None, range=30): + def __init__(self, config, server_hostname, heartbeat_stop_callback = None): threading.Thread.__init__(self) logger.debug('Initializing Controller RPC thread.') if heartbeat_stop_callback is None: @@ -81,7 +81,7 @@ class Controller(threading.Thread): self.repeatRegistration = False self.isRegistered = False self.cachedconnect = None - self.range = range + self.max_reconnect_retry_delay = int(config.get('server','max_reconnect_retry_delay', default=30)) self.hasMappedComponents = True # Event is used for synchronizing heartbeat iterations (to make possible # manual wait() interruption between heartbeats ) @@ -208,7 +208,7 @@ class Controller(threading.Thread): return except Exception, ex: # try a reconnect only after a certain amount of random time - delay = randint(0, self.range) + delay = randint(0, self.max_reconnect_retry_delay) logger.error("Unable to connect to: " + self.registerUrl, exc_info=True) logger.error("Error:" + str(ex)) logger.warn(""" Sleeping for {0} seconds and then trying again """.format(delay,)) @@ -430,7 +430,7 @@ class Controller(threading.Thread): retry = True #randomize the heartbeat - delay = randint(0, self.range) + delay = randint(0, self.max_reconnect_retry_delay) time.sleep(delay) # Sleep for some time http://git-wip-us.apache.org/repos/asf/ambari/blob/c1f49b24/ambari-agent/src/main/python/ambari_agent/NetUtil.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/NetUtil.py b/ambari-agent/src/main/python/ambari_agent/NetUtil.py index 2e9381b..c3cd621 100644 --- a/ambari-agent/src/main/python/ambari_agent/NetUtil.py +++ b/ambari-agent/src/main/python/ambari_agent/NetUtil.py @@ -30,7 +30,7 @@ logger = logging.getLogger(__name__) class NetUtil: - CONNECT_SERVER_RETRY_INTERVAL_SEC = 10 + DEFAULT_CONNECT_RETRY_DELAY_SEC = 10 HEARTBEAT_IDLE_INTERVAL_DEFAULT_MIN_SEC = 1 HEARTBEAT_IDLE_INTERVAL_DEFAULT_MAX_SEC = 10 MINIMUM_INTERVAL_BETWEEN_HEARTBEATS = 0.1 @@ -52,6 +52,8 @@ class NetUtil: stop_callback = HeartbeatStopHandlers() self.stopCallback = stop_callback self.config = config + self.connect_retry_delay = int(config.get('server','connect_retry_delay', + default=self.DEFAULT_CONNECT_RETRY_DELAY_SEC)) def checkURL(self, url): """Try to connect to a given url. Result is True if url returns HTTP code 200, in any other case @@ -94,7 +96,7 @@ class NetUtil: return False, responseBody def try_to_connect(self, server_url, max_retries, logger=None): - """Try to connect to a given url, sleeping for CONNECT_SERVER_RETRY_INTERVAL_SEC seconds + """Try to connect to a given url, sleeping for connect_retry_delay seconds between retries. No more than max_retries is performed. If max_retries is -1, connection attempts will be repeated forever until server is not reachable @@ -113,10 +115,10 @@ class NetUtil: else: if logger is not None: logger.warn('Server at {0} is not reachable, sleeping for {1} seconds...'.format(server_url, - self.CONNECT_SERVER_RETRY_INTERVAL_SEC)) + self.connect_retry_delay)) retries += 1 - if 0 == self.stopCallback.wait(self.CONNECT_SERVER_RETRY_INTERVAL_SEC): + if 0 == self.stopCallback.wait(self.connect_retry_delay): #stop waiting if logger is not None: logger.info("Stop event received")
