AMBARI-18728. During cluster install, Components get timed out icon while starting (aonishuk)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/a6ef6810 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/a6ef6810 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/a6ef6810 Branch: refs/heads/branch-feature-AMBARI-18634 Commit: a6ef6810053bdf9cc282ec49a76b8277e73f7035 Parents: 57037fe Author: Andrew Onishuk <[email protected]> Authored: Fri Oct 28 17:53:16 2016 +0300 Committer: Andrew Onishuk <[email protected]> Committed: Fri Oct 28 17:53:16 2016 +0300 ---------------------------------------------------------------------- ambari-agent/src/main/python/ambari_agent/ActionQueue.py | 4 ++++ .../main/python/ambari_agent/StatusCommandsExecutor.py | 10 +++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/a6ef6810/ambari-agent/src/main/python/ambari_agent/ActionQueue.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py index bf840e2..1131d21 100644 --- a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py +++ b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py @@ -206,6 +206,10 @@ class ActionQueue(threading.Thread): self.process_status_command_result(result) except Queue.Empty: pass + except IOError: + # on race condition in multiprocessing.Queue if get/put and thread kill are executed at the same time. + # During queue.close IOError will be thrown (this prevents from permanently dead-locked get). + pass def createCommandHandle(self, command): if command.has_key('__handle'): http://git-wip-us.apache.org/repos/asf/ambari/blob/a6ef6810/ambari-agent/src/main/python/ambari_agent/StatusCommandsExecutor.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/StatusCommandsExecutor.py b/ambari-agent/src/main/python/ambari_agent/StatusCommandsExecutor.py index 20acee4..12f58e5 100644 --- a/ambari-agent/src/main/python/ambari_agent/StatusCommandsExecutor.py +++ b/ambari-agent/src/main/python/ambari_agent/StatusCommandsExecutor.py @@ -47,7 +47,7 @@ class StatusCommandsExecutor(multiprocessing.Process): bind_debug_signal_handlers() while True: command = self.actionQueue.statusCommandQueue.get(True) # blocks until status status command appears - logger.info("Running status command for {0}".format(command['componentName'])) # TODO: change to logger.debug once fixed + logger.debug("Running status command for {0}".format(command['componentName'])) timeout_timer = threading.Timer( self.status_command_timeout, self.respawn, [command]) timeout_timer.start() @@ -55,7 +55,7 @@ class StatusCommandsExecutor(multiprocessing.Process): self.process_status_command(command) timeout_timer.cancel() - logger.info("Completed status command for {0}".format(command['componentName'])) # TODO: change to logger.debug once fixed + logger.debug("Completed status command for {0}".format(command['componentName'])) except: logger.exception("StatusCommandsExecutor process failed with exception:") raise @@ -83,4 +83,8 @@ class StatusCommandsExecutor(multiprocessing.Process): raise def kill(self): - os.kill(self.pid, signal.SIGKILL) \ No newline at end of file + os.kill(self.pid, signal.SIGKILL) + + # prevent queue from ending up with non-freed semaphores, locks during put. Which would result in dead-lock in process executing get. + self.actionQueue.statusCommandResultQueue.close() + self.actionQueue.statusCommandResultQueue.join_thread() \ No newline at end of file
