This is an automated email from the ASF dual-hosted git repository.

aonishuk pushed a commit to branch branch-2.7
in repository https://gitbox.apache.org/repos/asf/ambari.git


The following commit(s) were added to refs/heads/branch-2.7 by this push:
     new 5e9fe7f  AMBARI-25455. Ambari-agent does not restart the agent when 
memory leak happens (aonishuk)
5e9fe7f is described below

commit 5e9fe7fe0550caf7d9ab3e3fd0de139376ffc1a8
Author: Andrew Onishuk <aonis...@hortonworks.com>
AuthorDate: Wed Jan 8 11:03:49 2020 +0200

    AMBARI-25455. Ambari-agent does not restart the agent when memory leak 
happens (aonishuk)
---
 .../src/main/python/ambari_agent/AmbariConfig.py        |  8 ++++++++
 .../src/main/python/ambari_agent/HeartbeatThread.py     | 17 +++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/ambari-agent/src/main/python/ambari_agent/AmbariConfig.py 
b/ambari-agent/src/main/python/ambari_agent/AmbariConfig.py
index fedd063..85bca49 100644
--- a/ambari-agent/src/main/python/ambari_agent/AmbariConfig.py
+++ b/ambari-agent/src/main/python/ambari_agent/AmbariConfig.py
@@ -191,6 +191,14 @@ class AmbariConfig:
     return int(self.get('heartbeat', 'state_interval_seconds', '60'))
 
   @property
+  def max_ram_soft(self):
+    return int(self.get('agent', 'memory_threshold_soft_mb', default='0'))
+
+  @property
+  def max_ram_hard(self):
+    return int(self.get('agent', 'memory_threshold_hard_mb', default='0'))
+
+  @property
   def log_max_symbols_size(self):
     return int(self.get('heartbeat', 'log_max_symbols_size', '900000'))
 
diff --git a/ambari-agent/src/main/python/ambari_agent/HeartbeatThread.py 
b/ambari-agent/src/main/python/ambari_agent/HeartbeatThread.py
index 9210e79..acce6cf 100644
--- a/ambari-agent/src/main/python/ambari_agent/HeartbeatThread.py
+++ b/ambari-agent/src/main/python/ambari_agent/HeartbeatThread.py
@@ -38,10 +38,14 @@ from ambari_agent.listeners.HostLevelParamsEventListener 
import HostLevelParamsE
 from ambari_agent.listeners.AlertDefinitionsEventListener import 
AlertDefinitionsEventListener
 from ambari_agent import security
 from ambari_stomp.adapter.websocket import ConnectionIsAlreadyClosed
+from ambari_commons.os_utils import get_used_ram
 
 HEARTBEAT_INTERVAL = 10
 REQUEST_RESPONSE_TIMEOUT = 10
 
+AGENT_AUTO_RESTART_EXIT_CODE = 77
+AGENT_RAM_OVERUSE_MESSAGE = "Ambari-agent RAM usage {used_ram} MB went above 
{config_name}={max_ram} MB. Restarting ambari-agent to clean the RAM."
+
 logger = logging.getLogger(__name__)
 
 class HeartbeatThread(threading.Thread):
@@ -94,6 +98,8 @@ class HeartbeatThread(threading.Thread):
         if not self.initializer_module.is_registered:
           self.register()
 
+        self.check_for_memory_leak()
+
         heartbeat_body = self.get_heartbeat_body()
         logger.debug("Heartbeat body is {0}".format(heartbeat_body))
         response = self.blocking_request(heartbeat_body, 
Constants.HEARTBEAT_ENDPOINT)
@@ -276,3 +282,14 @@ class HeartbeatThread(threading.Thread):
       return 
self.server_responses_listener.responses.blocking_pop(correlation_id, 
timeout=timeout)
     except BlockingDictionary.DictionaryPopTimeout:
       raise Exception("{0} seconds timeout expired waiting for response from 
server at {1} to message from {2}".format(timeout, 
Constants.SERVER_RESPONSES_TOPIC, destination))
+
+  def check_for_memory_leak(self):
+    used_ram = get_used_ram()/1000
+    # dealing with a possible memory leaks
+    if self.config.max_ram_soft and used_ram >= self.config.max_ram_soft and 
not self.initializer_module.action_queue.tasks_in_progress_or_pending():
+      logger.error(AGENT_RAM_OVERUSE_MESSAGE.format(used_ram=used_ram, 
config_name="memory_threshold_soft_mb", max_ram=self.config.max_ram_soft))
+      Utils.restartAgent(self.stop_event)
+    if self.config.max_ram_hard and used_ram >= self.config.max_ram_hard:
+      logger.error(AGENT_RAM_OVERUSE_MESSAGE.format(used_ram=used_ram, 
config_name="memory_threshold_hard_mb", max_ram=self.config.max_ram_hard))
+      Utils.restartAgent(self.stop_event)
+  
\ No newline at end of file

Reply via email to