Repository: ambari Updated Branches: refs/heads/branch-2.4 afdbe4918 -> f276ee9a7 refs/heads/trunk bd71e62d3 -> b220d26f7
AMBARI-16914. Ambari uses too small a window for region server shutdown (aonishuk) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/b220d26f Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/b220d26f Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/b220d26f Branch: refs/heads/trunk Commit: b220d26f7c158aa48338018ec281a3dab34929d5 Parents: bd71e62 Author: Andrew Onishuk <[email protected]> Authored: Mon Jun 13 18:26:51 2016 +0300 Committer: Andrew Onishuk <[email protected]> Committed: Mon Jun 13 18:26:51 2016 +0300 ---------------------------------------------------------------------- .../0.1.0/configuration/ams-hbase-env.xml | 15 +++++++++++++++ .../0.1.0/package/scripts/hbase_service.py | 2 +- .../0.1.0/package/scripts/params_linux.py | 3 +++ .../HBASE/0.96.0.2.0/configuration/hbase-env.xml | 15 +++++++++++++++ .../0.96.0.2.0/package/scripts/hbase_service.py | 2 +- .../HBASE/0.96.0.2.0/package/scripts/params_linux.py | 1 + .../0.96.0.2.0/package/scripts/phoenix_service.py | 2 -- .../stacks/2.0.6/HBASE/test_phoenix_queryserver.py | 4 ---- .../test/python/stacks/2.0.6/configs/default.json | 6 ++++-- .../test/python/stacks/2.0.6/configs/secured.json | 3 ++- 10 files changed, 42 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/b220d26f/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-hbase-env.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-hbase-env.xml b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-hbase-env.xml index b40923a..4c866d9 100644 --- a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-hbase-env.xml +++ b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-hbase-env.xml @@ -157,6 +157,21 @@ </description> <on-ambari-upgrade add="true"/> </property> + <property> + <name>hbase_regionserver_shutdown_timeout</name> + <value>30</value> + <display-name>HBase RegionServer shutdown timeout</display-name> + <description> +After this number of seconds waiting for graceful stop of HBase Master it will be forced to exit with SIGKILL. +The timeout is introduced because there is a known bug when from time to time HBase RegionServer hangs forever on stop if NN safemode is on. + </description> + <value-attributes> + <type>directory</type> + <overridable>false</overridable> + <editable-only-at-install>true</editable-only-at-install> + </value-attributes> + <on-ambari-upgrade add="true"/> + </property> <!-- hbase-env.sh --> <property> <name>content</name> http://git-wip-us.apache.org/repos/asf/ambari/blob/b220d26f/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/hbase_service.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/hbase_service.py b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/hbase_service.py index ba4725b..42f23bf 100644 --- a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/hbase_service.py +++ b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/hbase_service.py @@ -45,7 +45,7 @@ def hbase_service( Execute ( daemon_cmd, user = params.hbase_user, # BUGFIX: hbase regionserver sometimes hangs when nn is in safemode - timeout = 30, + timeout = params.hbase_regionserver_shutdown_timeout, on_timeout = format("{no_op_test} && {sudo} -H -E kill -9 `{sudo} cat {pid_file}`") ) http://git-wip-us.apache.org/repos/asf/ambari/blob/b220d26f/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/params_linux.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/params_linux.py b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/params_linux.py index 13ec279..52e9fe5 100644 --- a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/params_linux.py +++ b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/params_linux.py @@ -24,6 +24,7 @@ from resource_management.libraries.functions.default import default from resource_management.libraries.functions.format import format from ambari_commons import OSCheck from ambari_commons.constants import AMBARI_SUDO_BINARY +from resource_management.libraries.functions.expect import expect config = Script.get_config() @@ -53,3 +54,5 @@ sudo = AMBARI_SUDO_BINARY dfs_type = default("/commandParams/dfs_type", "") +hbase_regionserver_shutdown_timeout = expect('/configurations/ams-hbase-env/hbase_regionserver_shutdown_timeout', int) + http://git-wip-us.apache.org/repos/asf/ambari/blob/b220d26f/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/configuration/hbase-env.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/configuration/hbase-env.xml b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/configuration/hbase-env.xml index eaee3cf..93ca6ba 100644 --- a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/configuration/hbase-env.xml +++ b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/configuration/hbase-env.xml @@ -139,6 +139,21 @@ and the -Xmn ratio (hbase_regionserver_xmn_ratio) exceeds this value. <description>HBase keytab path</description> <on-ambari-upgrade add="true"/> </property> + <property> + <name>hbase_regionserver_shutdown_timeout</name> + <value>30</value> + <display-name>HBase RegionServer shutdown timeout</display-name> + <description> +After this number of seconds waiting for graceful stop of HBase Master it will be forced to exit with SIGKILL. +The timeout is introduced because there is a known bug when from time to time HBase RegionServer hangs forever on stop if NN safemode is on. + </description> + <value-attributes> + <type>directory</type> + <overridable>false</overridable> + <editable-only-at-install>true</editable-only-at-install> + </value-attributes> + <on-ambari-upgrade add="true"/> + </property> <!-- hbase-env.sh --> <property> <name>content</name> http://git-wip-us.apache.org/repos/asf/ambari/blob/b220d26f/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/hbase_service.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/hbase_service.py b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/hbase_service.py index e9e8803..1dbd560 100644 --- a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/hbase_service.py +++ b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/hbase_service.py @@ -54,7 +54,7 @@ def hbase_service( user = params.hbase_user, only_if = no_op_test, # BUGFIX: hbase regionserver sometimes hangs when nn is in safemode - timeout = 30, + timeout = params.hbase_regionserver_shutdown_timeout, on_timeout = format("! ( {no_op_test} ) || {sudo} -H -E kill -9 `{pid_expression}`"), ) except: http://git-wip-us.apache.org/repos/asf/ambari/blob/b220d26f/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/params_linux.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/params_linux.py b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/params_linux.py index 05bad1c..76cefe7 100644 --- a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/params_linux.py +++ b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/params_linux.py @@ -116,6 +116,7 @@ regionserver_xmn_max = config['configurations']['hbase-env']['hbase_regionserver regionserver_xmn_percent = expect("/configurations/hbase-env/hbase_regionserver_xmn_ratio", float) regionserver_xmn_size = calc_xmn_from_xms(regionserver_heapsize, regionserver_xmn_percent, regionserver_xmn_max) +hbase_regionserver_shutdown_timeout = expect('/configurations/hbase-env/hbase_regionserver_shutdown_timeout', int) phoenix_hosts = default('/clusterHostInfo/phoenix_query_server_hosts', []) phoenix_enabled = default('/configurations/hbase-env/phoenix_sql_enabled', False) http://git-wip-us.apache.org/repos/asf/ambari/blob/b220d26f/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/phoenix_service.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/phoenix_service.py b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/phoenix_service.py index 0a42cda..42d9cd1 100644 --- a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/phoenix_service.py +++ b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/phoenix_service.py @@ -43,8 +43,6 @@ def phoenix_service(action = 'start'): # 'start', 'stop', 'status' elif action == 'stop': Execute(daemon_cmd, - timeout = 30, - on_timeout = format("! ( {no_op_test} ) || {sudo} -H -E kill -9 `cat {pid_file}`"), user=format("{hbase_user}"), environment=env ) http://git-wip-us.apache.org/repos/asf/ambari/blob/b220d26f/ambari-server/src/test/python/stacks/2.0.6/HBASE/test_phoenix_queryserver.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/python/stacks/2.0.6/HBASE/test_phoenix_queryserver.py b/ambari-server/src/test/python/stacks/2.0.6/HBASE/test_phoenix_queryserver.py index ac8b153..e4f5b2f 100644 --- a/ambari-server/src/test/python/stacks/2.0.6/HBASE/test_phoenix_queryserver.py +++ b/ambari-server/src/test/python/stacks/2.0.6/HBASE/test_phoenix_queryserver.py @@ -80,8 +80,6 @@ class TestPhoenixQueryServer(RMFTestCase): self.assertResourceCalled('Execute', '/usr/hdp/current/phoenix-server/bin/queryserver.py stop', - on_timeout = '! ( ls /var/run/hbase/phoenix-hbase-server.pid >/dev/null 2>&1 && ps -p `cat /var/run/hbase/phoenix-hbase-server.pid` >/dev/null 2>&1 ) || ambari-sudo.sh -H -E kill -9 `cat /var/run/hbase/phoenix-hbase-server.pid`', - timeout = 30, environment = {'JAVA_HOME':'/usr/jdk64/jdk1.8.0_40', 'HBASE_CONF_DIR':'/usr/hdp/current/hbase-regionserver/conf'}, user = 'hbase' @@ -140,8 +138,6 @@ class TestPhoenixQueryServer(RMFTestCase): self.assertResourceCalled('Execute', '/usr/hdp/current/phoenix-server/bin/queryserver.py stop', - on_timeout = '! ( ls /var/run/hbase/phoenix-hbase-server.pid >/dev/null 2>&1 && ps -p `cat /var/run/hbase/phoenix-hbase-server.pid` >/dev/null 2>&1 ) || ambari-sudo.sh -H -E kill -9 `cat /var/run/hbase/phoenix-hbase-server.pid`', - timeout = 30, environment = {'JAVA_HOME':'/usr/jdk64/jdk1.8.0_40', 'HBASE_CONF_DIR':'/usr/hdp/current/hbase-regionserver/conf'}, user = 'hbase' http://git-wip-us.apache.org/repos/asf/ambari/blob/b220d26f/ambari-server/src/test/python/stacks/2.0.6/configs/default.json ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/python/stacks/2.0.6/configs/default.json b/ambari-server/src/test/python/stacks/2.0.6/configs/default.json index 04aa828..d4310f1 100644 --- a/ambari-server/src/test/python/stacks/2.0.6/configs/default.json +++ b/ambari-server/src/test/python/stacks/2.0.6/configs/default.json @@ -610,7 +610,8 @@ "hbase_regionserver_xmn_max": "512", "hbase_regionserver_xmn_ratio": "0.2", "hbase_log_dir": "/var/log/hbase", - "hbase_java_io_tmpdir" : "/tmp" + "hbase_java_io_tmpdir" : "/tmp", + "hbase_regionserver_shutdown_timeout": "30" }, "ganglia-env": { "gmond_user": "nobody", @@ -812,7 +813,8 @@ "hbase_regionserver_heapsize": "512m", "hbase_log_dir": "/var/log/ambari-metrics-collector", "hbase_master_xmn_size": "256m", - "content": "\n" + "content": "\n", + "hbase_regionserver_shutdown_timeout": "30" }, "ams-log4j": { "content": "\n" http://git-wip-us.apache.org/repos/asf/ambari/blob/b220d26f/ambari-server/src/test/python/stacks/2.0.6/configs/secured.json ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/python/stacks/2.0.6/configs/secured.json b/ambari-server/src/test/python/stacks/2.0.6/configs/secured.json index 02f982e..fac0649 100644 --- a/ambari-server/src/test/python/stacks/2.0.6/configs/secured.json +++ b/ambari-server/src/test/python/stacks/2.0.6/configs/secured.json @@ -658,7 +658,8 @@ "hbase_regionserver_xmn_ratio": "0.2", "hbase_log_dir": "/var/log/hbase", "hbase_user_keytab": "/etc/security/keytabs/hbase.headless.keytab", - "hbase_java_io_tmpdir" : "/tmp" + "hbase_java_io_tmpdir" : "/tmp", + "hbase_regionserver_shutdown_timeout": "30" }, "flume-env": { "content": "export JAVA_HOME={{java64_home}}",
