[1/3] ambari git commit: Revert "AMBARI-18629. HDFS goes down after installing cluster (aonishuk) and AMBARI-18505. Ambari Status commands should enforce a timeout < heartbeat interval (aonishuk)"
Repository: ambari Updated Branches: refs/heads/branch-feature-AMBARI-18456 532caef33 -> 159ad0032 Revert "AMBARI-18629. HDFS goes down after installing cluster (aonishuk) and AMBARI-18505. Ambari Status commands should enforce a timeout < heartbeat interval (aonishuk)" Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/c3b31d6f Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/c3b31d6f Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/c3b31d6f Branch: refs/heads/branch-feature-AMBARI-18456 Commit: c3b31d6f7461625d6a7fe533025c5f604c67ffb5 Parents: 7ed5259 Author: Andrew OnishukAuthored: Wed Oct 19 01:52:05 2016 +0300 Committer: Andrew Onishuk Committed: Wed Oct 19 01:52:05 2016 +0300 -- ambari-agent/conf/unix/ambari-agent.ini | 1 - .../src/main/python/ambari_agent/ActionQueue.py | 16 +--- .../ambari_agent/PythonReflectiveExecutor.py| 25 +++- .../test/python/ambari_agent/TestActionQueue.py | 3 +- .../main/python/ambari_commons/thread_utils.py | 43 5 files changed, 8 insertions(+), 80 deletions(-) -- http://git-wip-us.apache.org/repos/asf/ambari/blob/c3b31d6f/ambari-agent/conf/unix/ambari-agent.ini -- diff --git a/ambari-agent/conf/unix/ambari-agent.ini b/ambari-agent/conf/unix/ambari-agent.ini index 1c39c24..914e09a 100644 --- a/ambari-agent/conf/unix/ambari-agent.ini +++ b/ambari-agent/conf/unix/ambari-agent.ini @@ -32,7 +32,6 @@ tolerate_download_failures=true run_as_user=root parallel_execution=0 alert_grace_period=5 -status_command_timeout=2 alert_kinit_timeout=1440 system_resource_overrides=/etc/resource_overrides ; memory_threshold_soft_mb=400 http://git-wip-us.apache.org/repos/asf/ambari/blob/c3b31d6f/ambari-agent/src/main/python/ambari_agent/ActionQueue.py -- diff --git a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py index 5962d94..f104939 100644 --- a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py +++ b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py @@ -29,14 +29,12 @@ import time import signal from AgentException import AgentException -from PythonReflectiveExecutor import PythonReflectiveExecutor from LiveStatus import LiveStatus from ActualConfigHandler import ActualConfigHandler from CommandStatusDict import CommandStatusDict from CustomServiceOrchestrator import CustomServiceOrchestrator from ambari_agent.BackgroundCommandExecutionHandle import BackgroundCommandExecutionHandle from ambari_commons.str_utils import split_on_chunks -from ambari_commons.thread_utils import terminate_thread logger = logging.getLogger() @@ -87,7 +85,6 @@ class ActionQueue(threading.Thread): self.tmpdir = config.get('agent', 'prefix') self.customServiceOrchestrator = CustomServiceOrchestrator(config, controller) self.parallel_execution = config.get_parallel_exec_option() -self.status_command_timeout = int(self.config.get('agent', 'status_command_timeout', 2)) if self.parallel_execution == 1: logger.info("Parallel execution is enabled, will execute agent commands in parallel") @@ -228,18 +225,7 @@ class ActionQueue(threading.Thread): if self.controller.recovery_manager.enabled(): self.controller.recovery_manager.stop_execution_command() elif commandType == self.STATUS_COMMAND: -component_name = command['componentName'] - -thread = threading.Thread(target = self.execute_status_command, args = (command,)) -thread.daemon = True # hanging status commands should not be prevent ambari-agent from stopping -thread.start() -thread.join(timeout=self.status_command_timeout) - -if thread.isAlive(): - terminate_thread(thread) - # Force context to reset to normal. By context we mean sys.path, imports, logger setting, etc. They are set by specific status command, and are not relevant to ambari-agent. - PythonReflectiveExecutor.last_context.revert() - logger.warn("Command {0} for {1} was running for more than {2} seconds. Terminated due to timeout.".format(commandType, component_name, self.status_command_timeout)) +self.execute_status_command(command) else: logger.error("Unrecognized command " + pprint.pformat(command)) except Exception: http://git-wip-us.apache.org/repos/asf/ambari/blob/c3b31d6f/ambari-agent/src/main/python/ambari_agent/PythonReflectiveExecutor.py -- diff --git
ambari git commit: Revert "AMBARI-18629. HDFS goes down after installing cluster (aonishuk) and AMBARI-18505. Ambari Status commands should enforce a timeout < heartbeat interval (aonishuk)"
Repository: ambari Updated Branches: refs/heads/trunk 7ed5259d4 -> c3b31d6f7 Revert "AMBARI-18629. HDFS goes down after installing cluster (aonishuk) and AMBARI-18505. Ambari Status commands should enforce a timeout < heartbeat interval (aonishuk)" Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/c3b31d6f Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/c3b31d6f Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/c3b31d6f Branch: refs/heads/trunk Commit: c3b31d6f7461625d6a7fe533025c5f604c67ffb5 Parents: 7ed5259 Author: Andrew OnishukAuthored: Wed Oct 19 01:52:05 2016 +0300 Committer: Andrew Onishuk Committed: Wed Oct 19 01:52:05 2016 +0300 -- ambari-agent/conf/unix/ambari-agent.ini | 1 - .../src/main/python/ambari_agent/ActionQueue.py | 16 +--- .../ambari_agent/PythonReflectiveExecutor.py| 25 +++- .../test/python/ambari_agent/TestActionQueue.py | 3 +- .../main/python/ambari_commons/thread_utils.py | 43 5 files changed, 8 insertions(+), 80 deletions(-) -- http://git-wip-us.apache.org/repos/asf/ambari/blob/c3b31d6f/ambari-agent/conf/unix/ambari-agent.ini -- diff --git a/ambari-agent/conf/unix/ambari-agent.ini b/ambari-agent/conf/unix/ambari-agent.ini index 1c39c24..914e09a 100644 --- a/ambari-agent/conf/unix/ambari-agent.ini +++ b/ambari-agent/conf/unix/ambari-agent.ini @@ -32,7 +32,6 @@ tolerate_download_failures=true run_as_user=root parallel_execution=0 alert_grace_period=5 -status_command_timeout=2 alert_kinit_timeout=1440 system_resource_overrides=/etc/resource_overrides ; memory_threshold_soft_mb=400 http://git-wip-us.apache.org/repos/asf/ambari/blob/c3b31d6f/ambari-agent/src/main/python/ambari_agent/ActionQueue.py -- diff --git a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py index 5962d94..f104939 100644 --- a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py +++ b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py @@ -29,14 +29,12 @@ import time import signal from AgentException import AgentException -from PythonReflectiveExecutor import PythonReflectiveExecutor from LiveStatus import LiveStatus from ActualConfigHandler import ActualConfigHandler from CommandStatusDict import CommandStatusDict from CustomServiceOrchestrator import CustomServiceOrchestrator from ambari_agent.BackgroundCommandExecutionHandle import BackgroundCommandExecutionHandle from ambari_commons.str_utils import split_on_chunks -from ambari_commons.thread_utils import terminate_thread logger = logging.getLogger() @@ -87,7 +85,6 @@ class ActionQueue(threading.Thread): self.tmpdir = config.get('agent', 'prefix') self.customServiceOrchestrator = CustomServiceOrchestrator(config, controller) self.parallel_execution = config.get_parallel_exec_option() -self.status_command_timeout = int(self.config.get('agent', 'status_command_timeout', 2)) if self.parallel_execution == 1: logger.info("Parallel execution is enabled, will execute agent commands in parallel") @@ -228,18 +225,7 @@ class ActionQueue(threading.Thread): if self.controller.recovery_manager.enabled(): self.controller.recovery_manager.stop_execution_command() elif commandType == self.STATUS_COMMAND: -component_name = command['componentName'] - -thread = threading.Thread(target = self.execute_status_command, args = (command,)) -thread.daemon = True # hanging status commands should not be prevent ambari-agent from stopping -thread.start() -thread.join(timeout=self.status_command_timeout) - -if thread.isAlive(): - terminate_thread(thread) - # Force context to reset to normal. By context we mean sys.path, imports, logger setting, etc. They are set by specific status command, and are not relevant to ambari-agent. - PythonReflectiveExecutor.last_context.revert() - logger.warn("Command {0} for {1} was running for more than {2} seconds. Terminated due to timeout.".format(commandType, component_name, self.status_command_timeout)) +self.execute_status_command(command) else: logger.error("Unrecognized command " + pprint.pformat(command)) except Exception: http://git-wip-us.apache.org/repos/asf/ambari/blob/c3b31d6f/ambari-agent/src/main/python/ambari_agent/PythonReflectiveExecutor.py -- diff --git
ambari git commit: Revert "AMBARI-18629. HDFS goes down after installing cluster (aonishuk) and AMBARI-18505. Ambari Status commands should enforce a timeout < heartbeat interval (aonishuk)"
Repository: ambari Updated Branches: refs/heads/branch-2.5 8564be100 -> ac32c2885 Revert "AMBARI-18629. HDFS goes down after installing cluster (aonishuk) and AMBARI-18505. Ambari Status commands should enforce a timeout < heartbeat interval (aonishuk)" Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/ac32c288 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/ac32c288 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/ac32c288 Branch: refs/heads/branch-2.5 Commit: ac32c2885865bdb13fcd28b1e6e2283df61a6ec7 Parents: 8564be1 Author: Andrew OnishukAuthored: Wed Oct 19 01:49:30 2016 +0300 Committer: Andrew Onishuk Committed: Wed Oct 19 01:49:30 2016 +0300 -- ambari-agent/conf/unix/ambari-agent.ini | 1 - .../src/main/python/ambari_agent/ActionQueue.py | 16 +--- .../ambari_agent/PythonReflectiveExecutor.py| 25 +++- .../test/python/ambari_agent/TestActionQueue.py | 3 +- .../main/python/ambari_commons/thread_utils.py | 43 5 files changed, 8 insertions(+), 80 deletions(-) -- http://git-wip-us.apache.org/repos/asf/ambari/blob/ac32c288/ambari-agent/conf/unix/ambari-agent.ini -- diff --git a/ambari-agent/conf/unix/ambari-agent.ini b/ambari-agent/conf/unix/ambari-agent.ini index 1c39c24..914e09a 100644 --- a/ambari-agent/conf/unix/ambari-agent.ini +++ b/ambari-agent/conf/unix/ambari-agent.ini @@ -32,7 +32,6 @@ tolerate_download_failures=true run_as_user=root parallel_execution=0 alert_grace_period=5 -status_command_timeout=2 alert_kinit_timeout=1440 system_resource_overrides=/etc/resource_overrides ; memory_threshold_soft_mb=400 http://git-wip-us.apache.org/repos/asf/ambari/blob/ac32c288/ambari-agent/src/main/python/ambari_agent/ActionQueue.py -- diff --git a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py index 5962d94..f104939 100644 --- a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py +++ b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py @@ -29,14 +29,12 @@ import time import signal from AgentException import AgentException -from PythonReflectiveExecutor import PythonReflectiveExecutor from LiveStatus import LiveStatus from ActualConfigHandler import ActualConfigHandler from CommandStatusDict import CommandStatusDict from CustomServiceOrchestrator import CustomServiceOrchestrator from ambari_agent.BackgroundCommandExecutionHandle import BackgroundCommandExecutionHandle from ambari_commons.str_utils import split_on_chunks -from ambari_commons.thread_utils import terminate_thread logger = logging.getLogger() @@ -87,7 +85,6 @@ class ActionQueue(threading.Thread): self.tmpdir = config.get('agent', 'prefix') self.customServiceOrchestrator = CustomServiceOrchestrator(config, controller) self.parallel_execution = config.get_parallel_exec_option() -self.status_command_timeout = int(self.config.get('agent', 'status_command_timeout', 2)) if self.parallel_execution == 1: logger.info("Parallel execution is enabled, will execute agent commands in parallel") @@ -228,18 +225,7 @@ class ActionQueue(threading.Thread): if self.controller.recovery_manager.enabled(): self.controller.recovery_manager.stop_execution_command() elif commandType == self.STATUS_COMMAND: -component_name = command['componentName'] - -thread = threading.Thread(target = self.execute_status_command, args = (command,)) -thread.daemon = True # hanging status commands should not be prevent ambari-agent from stopping -thread.start() -thread.join(timeout=self.status_command_timeout) - -if thread.isAlive(): - terminate_thread(thread) - # Force context to reset to normal. By context we mean sys.path, imports, logger setting, etc. They are set by specific status command, and are not relevant to ambari-agent. - PythonReflectiveExecutor.last_context.revert() - logger.warn("Command {0} for {1} was running for more than {2} seconds. Terminated due to timeout.".format(commandType, component_name, self.status_command_timeout)) +self.execute_status_command(command) else: logger.error("Unrecognized command " + pprint.pformat(command)) except Exception: http://git-wip-us.apache.org/repos/asf/ambari/blob/ac32c288/ambari-agent/src/main/python/ambari_agent/PythonReflectiveExecutor.py -- diff --git
ambari git commit: Revert "AMBARI-18629. HDFS goes down after installing cluster (aonishuk) and AMBARI-18505. Ambari Status commands should enforce a timeout < heartbeat interval (aonishuk)"
Repository: ambari Updated Branches: refs/heads/branch-2.4 9332b381b -> af528b5c9 Revert "AMBARI-18629. HDFS goes down after installing cluster (aonishuk) and AMBARI-18505. Ambari Status commands should enforce a timeout < heartbeat interval (aonishuk)" Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/af528b5c Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/af528b5c Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/af528b5c Branch: refs/heads/branch-2.4 Commit: af528b5c9786b1b2d9a0dc7b3f609728acf4d87e Parents: 9332b38 Author: Andrew OnishukAuthored: Wed Oct 19 01:46:07 2016 +0300 Committer: Andrew Onishuk Committed: Wed Oct 19 01:46:07 2016 +0300 -- ambari-agent/conf/unix/ambari-agent.ini | 1 - .../src/main/python/ambari_agent/ActionQueue.py | 16 +--- .../ambari_agent/PythonReflectiveExecutor.py| 25 +++- .../test/python/ambari_agent/TestActionQueue.py | 3 +- .../main/python/ambari_commons/thread_utils.py | 43 5 files changed, 8 insertions(+), 80 deletions(-) -- http://git-wip-us.apache.org/repos/asf/ambari/blob/af528b5c/ambari-agent/conf/unix/ambari-agent.ini -- diff --git a/ambari-agent/conf/unix/ambari-agent.ini b/ambari-agent/conf/unix/ambari-agent.ini index 1c39c24..914e09a 100644 --- a/ambari-agent/conf/unix/ambari-agent.ini +++ b/ambari-agent/conf/unix/ambari-agent.ini @@ -32,7 +32,6 @@ tolerate_download_failures=true run_as_user=root parallel_execution=0 alert_grace_period=5 -status_command_timeout=2 alert_kinit_timeout=1440 system_resource_overrides=/etc/resource_overrides ; memory_threshold_soft_mb=400 http://git-wip-us.apache.org/repos/asf/ambari/blob/af528b5c/ambari-agent/src/main/python/ambari_agent/ActionQueue.py -- diff --git a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py index dedef76..064e4f0 100644 --- a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py +++ b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py @@ -29,14 +29,12 @@ import time import signal from AgentException import AgentException -from PythonReflectiveExecutor import PythonReflectiveExecutor from LiveStatus import LiveStatus from ActualConfigHandler import ActualConfigHandler from CommandStatusDict import CommandStatusDict from CustomServiceOrchestrator import CustomServiceOrchestrator from ambari_agent.BackgroundCommandExecutionHandle import BackgroundCommandExecutionHandle from ambari_commons.str_utils import split_on_chunks -from ambari_commons.thread_utils import terminate_thread logger = logging.getLogger() @@ -86,7 +84,6 @@ class ActionQueue(threading.Thread): self.tmpdir = config.get('agent', 'prefix') self.customServiceOrchestrator = CustomServiceOrchestrator(config, controller) self.parallel_execution = config.get_parallel_exec_option() -self.status_command_timeout = int(self.config.get('agent', 'status_command_timeout', 2)) if self.parallel_execution == 1: logger.info("Parallel execution is enabled, will execute agent commands in parallel") @@ -227,18 +224,7 @@ class ActionQueue(threading.Thread): if self.controller.recovery_manager.enabled(): self.controller.recovery_manager.stop_execution_command() elif commandType == self.STATUS_COMMAND: -component_name = command['componentName'] - -thread = threading.Thread(target = self.execute_status_command, args = (command,)) -thread.daemon = True # hanging status commands should not be prevent ambari-agent from stopping -thread.start() -thread.join(timeout=self.status_command_timeout) - -if thread.isAlive(): - terminate_thread(thread) - # Force context to reset to normal. By context we mean sys.path, imports, logger setting, etc. They are set by specific status command, and are not relevant to ambari-agent. - PythonReflectiveExecutor.last_context.revert() - logger.warn("Command {0} for {1} was running for more than {2} seconds. Terminated due to timeout.".format(commandType, component_name, self.status_command_timeout)) +self.execute_status_command(command) else: logger.error("Unrecognized command " + pprint.pformat(command)) except Exception: http://git-wip-us.apache.org/repos/asf/ambari/blob/af528b5c/ambari-agent/src/main/python/ambari_agent/PythonReflectiveExecutor.py -- diff --git