[
https://issues.apache.org/jira/browse/AMBARI-25087?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16733398#comment-16733398
]
amarnath reddy pappu commented on AMBARI-25087:
-----------------------------------------------
Following change has fixed the issue - at least was unable to reproduce it
after the change
{noformat}
diff --git a/ambari-agent/src/main/python/ambari_agent/Heartbeat.py
b/ambari-agent/src/main/python/ambari_agent/Heartbeat.py
index d7c0325..11ad652 100644
--- a/ambari-agent/src/main/python/ambari_agent/Heartbeat.py
+++ b/ambari-agent/src/main/python/ambari_agent/Heartbeat.py
@@ -75,12 +75,12 @@ class Heartbeat:
if int(id) == 0:
componentsMapped = False
- logger.debug("Building Heartbeat: {responseId = %s, timestamp = %s, "
- "commandsInProgress = %s, componentsMapped = %s,"
- "recoveryTimestamp = %s}",
- str(id), str(timestamp), repr(commandsInProgress),
repr(componentsMapped), str(recovery_timestamp))
-
- logger.debug("Heartbeat: %s", pformat(heartbeat))
+ if logger.isEnabledFor(logging.DEBUG):
+ logger.debug("Building Heartbeat: {responseId = %s, timestamp = %s, "
+ "commandsInProgress = %s, componentsMapped = %s, "
+ "recoveryTimestamp = %s}",
+ id, timestamp, commandsInProgress, componentsMapped,
recovery_timestamp)
+ logger.debug("Heartbeat: %s", pformat(heartbeat))
{noformat}
> Heartbeat lost - Heartbeat building is taking time
> --------------------------------------------------
>
> Key: AMBARI-25087
> URL: https://issues.apache.org/jira/browse/AMBARI-25087
> Project: Ambari
> Issue Type: Bug
> Components: ambari-server
> Affects Versions: 2.6.2
> Reporter: amarnath reddy pappu
> Priority: Major
>
> In the past XXXX has reported several issues where they complained about
> Agents loosing the heartbeat but re-starting the agent is fixing the issue.
> In the logs it shows that building heart beat message and sending to request
> is taking close to 40-50seconds.
> {noformat}
> INFO 2018-12-17 22:30:48,746 Controller.py:304 - Heartbeat (response id = 82)
> with server is running...
> INFO 2018-12-17 22:30:48,851 Controller.py:311 - Building heartbeat message
> INFO 2018-12-17 22:31:29,374 logger.py:75 - Testing the JVM's JCE policy to
> see it if supports an unlimited key length.
> INFO 2018-12-17 22:31:30,017 Hardware.py:188 - Some mount points were
> ignored: /run, /dev/shm, /run/lock, /sys/fs/cgroup, /run/user/2008,
> /run/user/202, /run/user/2002, /run/user/2001, /run/user/203, /run/user/2007,
> /run/user/2019
> INFO 2018-12-17 22:31:51,672 Controller.py:333 - Heartbeat response received
> (id = 83)
> INFO 2018-12-17 22:28:44,155 Controller.py:311 - Building heartbeat message
> INFO 2018-12-17 22:29:14,883 logger.py:75 - Testing the JVM's JCE policy to
> see it if supports an unlimited key length.
> INFO 2018-12-17 22:29:17,051 Hardware.py:188 - Some mount points were
> ignored: /run, /dev/shm, /run/lock, /sys/fs/cgroup, /run/user/2008,
> /run/user/202, /run/user/2002, /run/user/2001, /run/user/203, /run/user/2007,
> /run/user/2019
> {noformat}
> We have generated multiple thread dumps to see what is happening while thread
> is waiting at that point. in multiple dumps thread is kept waiting at below
> log line.
> {noformat}
> # ThreadID: 139846973957888
> File: "/usr/lib/python2.7/threading.py", line 774, in __bootstrap
> self.__bootstrap_inner()
> File: "/usr/lib/python2.7/threading.py", line 801, in __bootstrap_inner
> self.run()
> File: "/usr/lib/ambari-agent/lib/ambari_agent/Controller.py", line 501, in run
> self.registerAndHeartbeat()
> File: "/usr/lib/ambari-agent/lib/ambari_agent/Controller.py", line 529, in
> registerAndHeartbeat
> self.heartbeatWithServer()
> File: "/usr/lib/ambari-agent/lib/ambari_agent/Controller.py", line 313, in
> heartbeatWithServer
> data = json.dumps(self.heartbeat.build(self.responseId, send_state,
> self.hasMappedComponents))
> File: "/usr/lib/ambari-agent/lib/ambari_agent/Heartbeat.py", line 83, in build
> logger.debug("Heartbeat: %s", pformat(heartbeat))
> File: "/usr/lib/python2.7/pprint.py", line 63, in pformat
> return PrettyPrinter(indent=indent, width=width,
> depth=depth).pformat(object)
> File: "/usr/lib/python2.7/pprint.py", line 122, in pformat
> self._format(object, sio, 0, 0, {}, 0)
> File: "/usr/lib/python2.7/pprint.py", line 164, in _format
> allowance + 1, context, level)
> {noformat}
> So for some reason pformat is taking lot of time ( 30-35 seconds ) to format
> the message ( Have not enabled the DEBUG logging)
> {noformat}
> if int(id) == 0:
> componentsMapped = False
> logger.debug("Building Heartbeat: {responseId = %s, timestamp = %s, "
> "commandsInProgress = %s, componentsMapped = %s,"
> "recoveryTimestamp = %s}",
> str(id), str(timestamp), repr(commandsInProgress),
> repr(componentsMapped), str(recovery_timestamp))
> logger.debug("Heartbeat: %s", pformat(heartbeat))
> hostInfo = HostInfo(self.config)
> {noformat}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)