AMBARI-14657 : Stop All services fails at AMS on cluster with NN HA enabled, with non root Ambari user and AMS in distributed mode. (avijayan)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/99b800c5 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/99b800c5 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/99b800c5 Branch: refs/heads/branch-dev-patch-upgrade Commit: 99b800c599f8692b9b009d8804837f5a8e1927b7 Parents: 2669a7a Author: Aravindan Vijayan <[email protected]> Authored: Wed Jan 13 20:53:27 2016 -0800 Committer: Aravindan Vijayan <[email protected]> Committed: Wed Jan 13 20:53:27 2016 -0800 ---------------------------------------------------------------------- .../0.1.0/package/scripts/ams_service.py | 8 ++++++-- .../0.1.0/package/scripts/hbase_service.py | 2 +- .../stacks/HDP/2.0.6/role_command_order.json | 6 ++++-- .../2.0.6/AMBARI_METRICS/test_metrics_collector.py | 16 ++++++++-------- 4 files changed, 19 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/99b800c5/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/ams_service.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/ams_service.py b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/ams_service.py index 3d1ffda..c9188c2 100644 --- a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/ams_service.py +++ b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/ams_service.py @@ -45,8 +45,12 @@ def ams_service(name, action): #no_op_test = format("ls {pid_file} >/dev/null 2>&1 && ps `cat {pid_file}` >/dev/null 2>&1") if params.is_hbase_distributed: - hbase_service('master', action=action) - hbase_service('regionserver', action=action) + if action == 'stop': + hbase_service('regionserver', action=action) + hbase_service('master', action=action) + else: + hbase_service('master', action=action) + hbase_service('regionserver', action=action) cmd = format("{cmd} --distributed") if action == 'start': http://git-wip-us.apache.org/repos/asf/ambari/blob/99b800c5/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/hbase_service.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/hbase_service.py b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/hbase_service.py index 5f03ca0..4d0d7f3 100644 --- a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/hbase_service.py +++ b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/hbase_service.py @@ -45,7 +45,7 @@ def hbase_service( user = params.hbase_user, # BUGFIX: hbase regionserver sometimes hangs when nn is in safemode timeout = 30, - on_timeout = format("{no_op_test} && kill -9 `cat {pid_file}`") + on_timeout = format("{no_op_test} && {sudo} -H -E kill -9 `{sudo} cat {pid_file}`") ) File(pid_file, http://git-wip-us.apache.org/repos/asf/ambari/blob/99b800c5/ambari-server/src/main/resources/stacks/HDP/2.0.6/role_command_order.json ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/role_command_order.json b/ambari-server/src/main/resources/stacks/HDP/2.0.6/role_command_order.json index f5ecd7b..6ed1537 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/role_command_order.json +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/role_command_order.json @@ -49,14 +49,16 @@ "RESOURCEMANAGER_SERVICE_CHECK-SERVICE_CHECK": ["RESOURCEMANAGER-START"], "PIG_SERVICE_CHECK-SERVICE_CHECK": ["RESOURCEMANAGER-START", "NODEMANAGER-START"], "NAMENODE-STOP": ["RESOURCEMANAGER-STOP", "NODEMANAGER-STOP", - "HISTORYSERVER-STOP", "HBASE_MASTER-STOP"], + "HISTORYSERVER-STOP", "HBASE_MASTER-STOP", "METRICS_COLLECTOR-STOP"], "DATANODE-STOP": ["RESOURCEMANAGER-STOP", "NODEMANAGER-STOP", "HISTORYSERVER-STOP", "HBASE_MASTER-STOP"] }, "_comment" : "Dependencies that are used in HA NameNode cluster", "namenode_optional_ha": { "NAMENODE-START": ["ZKFC-START", "JOURNALNODE-START", "ZOOKEEPER_SERVER-START"], - "ZKFC-START": ["ZOOKEEPER_SERVER-START"] + "ZKFC-START": ["ZOOKEEPER_SERVER-START"], + "ZKFC-STOP": ["NAMENODE-STOP"], + "JOURNALNODE-STOP": ["NAMENODE-STOP"] }, "_comment" : "Dependencies that are used in ResourceManager HA cluster", "resourcemanager_optional_ha" : { http://git-wip-us.apache.org/repos/asf/ambari/blob/99b800c5/ambari-server/src/test/python/stacks/2.0.6/AMBARI_METRICS/test_metrics_collector.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/python/stacks/2.0.6/AMBARI_METRICS/test_metrics_collector.py b/ambari-server/src/test/python/stacks/2.0.6/AMBARI_METRICS/test_metrics_collector.py index 307274f..1c83bb7 100644 --- a/ambari-server/src/test/python/stacks/2.0.6/AMBARI_METRICS/test_metrics_collector.py +++ b/ambari-server/src/test/python/stacks/2.0.6/AMBARI_METRICS/test_metrics_collector.py @@ -39,20 +39,20 @@ class TestMetricsCollector(RMFTestCase): self.assert_hbase_configure('master', distributed=True) self.assert_hbase_configure('regionserver', distributed=True) self.assert_ams('collector', distributed=True) - self.assertResourceCalled('Execute', '/usr/lib/ams-hbase/bin/hbase-daemon.sh --config /etc/ams-hbase/conf stop master', - on_timeout = 'ls /var/run/ambari-metrics-collector//hbase-ams-master.pid >/dev/null 2>&1 && ps `cat /var/run/ambari-metrics-collector//hbase-ams-master.pid` >/dev/null 2>&1 && kill -9 `cat /var/run/ambari-metrics-collector//hbase-ams-master.pid`', + self.assertResourceCalled('Execute', '/usr/lib/ams-hbase/bin/hbase-daemon.sh --config /etc/ams-hbase/conf stop regionserver', + on_timeout = 'ls /var/run/ambari-metrics-collector//hbase-ams-regionserver.pid >/dev/null 2>&1 && ps `cat /var/run/ambari-metrics-collector//hbase-ams-regionserver.pid` >/dev/null 2>&1 && ambari-sudo.sh -H -E kill -9 `ambari-sudo.sh cat /var/run/ambari-metrics-collector//hbase-ams-regionserver.pid`', timeout = 30, user = 'ams' - ) - self.assertResourceCalled('File', '/var/run/ambari-metrics-collector//hbase-ams-master.pid', + ) + self.assertResourceCalled('File', '/var/run/ambari-metrics-collector//hbase-ams-regionserver.pid', action = ['delete'] - ) - self.assertResourceCalled('Execute', '/usr/lib/ams-hbase/bin/hbase-daemon.sh --config /etc/ams-hbase/conf stop regionserver', - on_timeout = 'ls /var/run/ambari-metrics-collector//hbase-ams-regionserver.pid >/dev/null 2>&1 && ps `cat /var/run/ambari-metrics-collector//hbase-ams-regionserver.pid` >/dev/null 2>&1 && kill -9 `cat /var/run/ambari-metrics-collector//hbase-ams-regionserver.pid`', + ) + self.assertResourceCalled('Execute', '/usr/lib/ams-hbase/bin/hbase-daemon.sh --config /etc/ams-hbase/conf stop master', + on_timeout = 'ls /var/run/ambari-metrics-collector//hbase-ams-master.pid >/dev/null 2>&1 && ps `cat /var/run/ambari-metrics-collector//hbase-ams-master.pid` >/dev/null 2>&1 && ambari-sudo.sh -H -E kill -9 `ambari-sudo.sh cat /var/run/ambari-metrics-collector//hbase-ams-master.pid`', timeout = 30, user = 'ams' ) - self.assertResourceCalled('File', '/var/run/ambari-metrics-collector//hbase-ams-regionserver.pid', + self.assertResourceCalled('File', '/var/run/ambari-metrics-collector//hbase-ams-master.pid', action = ['delete'] ) self.assertResourceCalled('Execute', '/usr/sbin/ambari-metrics-collector --config /etc/ambari-metrics-collector/conf --distributed stop',
