AMBARI-21532. Namenode restart - PID file delete happens before the call to check status (dlysnichenko)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/f500c9e4 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/f500c9e4 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/f500c9e4 Branch: refs/heads/branch-feature-AMBARI-12556 Commit: f500c9e48d8b9ed7b3aebe547720da9dc9fc10e5 Parents: bfe772b Author: Lisnichenko Dmitro <dlysniche...@hortonworks.com> Authored: Mon Jul 24 16:59:26 2017 +0300 Committer: Lisnichenko Dmitro <dlysniche...@hortonworks.com> Committed: Mon Jul 24 16:59:26 2017 +0300 ---------------------------------------------------------------------- .../libraries/functions/check_process_status.py | 20 ++++++++++++++++++++ .../libraries/script/script.py | 2 +- .../HDFS/2.1.0.2.0/package/scripts/utils.py | 4 ++++ .../HDFS/3.0.0.3.0/package/scripts/utils.py | 2 ++ .../0.8/services/HDFS/package/scripts/utils.py | 3 +++ 5 files changed, 30 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/f500c9e4/ambari-common/src/main/python/resource_management/libraries/functions/check_process_status.py ---------------------------------------------------------------------- diff --git a/ambari-common/src/main/python/resource_management/libraries/functions/check_process_status.py b/ambari-common/src/main/python/resource_management/libraries/functions/check_process_status.py index 7961f00..ac54bc9 100644 --- a/ambari-common/src/main/python/resource_management/libraries/functions/check_process_status.py +++ b/ambari-common/src/main/python/resource_management/libraries/functions/check_process_status.py @@ -59,3 +59,23 @@ def check_process_status(pid_file): Logger.info("Process with pid {0} is not running. Stale pid file" " at {1}".format(pid, pid_file)) raise ComponentIsNotRunning() + + +def wait_process_stopped(pid_file): + """ + Waits until component is actually stopped (check is performed using + check_process_status() method. + """ + import time + component_is_stopped = False + counter = 0 + while not component_is_stopped: + try: + if counter % 10 == 0: + Logger.logger.info("Waiting for actual component stop") + check_process_status(pid_file) + time.sleep(1) + counter += 1 + except ComponentIsNotRunning, e: + Logger.logger.debug(" reports ComponentIsNotRunning") + component_is_stopped = True http://git-wip-us.apache.org/repos/asf/ambari/blob/f500c9e4/ambari-common/src/main/python/resource_management/libraries/script/script.py ---------------------------------------------------------------------- diff --git a/ambari-common/src/main/python/resource_management/libraries/script/script.py b/ambari-common/src/main/python/resource_management/libraries/script/script.py index 2b374c5..c2c89c4 100644 --- a/ambari-common/src/main/python/resource_management/libraries/script/script.py +++ b/ambari-common/src/main/python/resource_management/libraries/script/script.py @@ -411,7 +411,7 @@ class Script(object): status_method = getattr(self, 'status') component_is_stopped = False counter = 0 - while not component_is_stopped : + while not component_is_stopped: try: if counter % 100 == 0: Logger.logger.info("Waiting for actual component stop") http://git-wip-us.apache.org/repos/asf/ambari/blob/f500c9e4/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/utils.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/utils.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/utils.py index d861ba9..2535f60 100644 --- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/utils.py +++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/utils.py @@ -24,6 +24,7 @@ import ambari_simplejson as json # simplejson is much faster comparing to Python from resource_management.core.resources.system import Directory, File, Execute from resource_management.libraries.functions.format import format from resource_management.libraries.functions import check_process_status +from resource_management.libraries.functions.check_process_status import wait_process_stopped from resource_management.libraries.functions import StackFeature from resource_management.libraries.functions.stack_features import check_stack_feature from resource_management.core import shell @@ -281,6 +282,9 @@ def service(action=None, name=None, user=None, options="", create_pid_dir=False, except: show_logs(log_dir, user) raise + + wait_process_stopped(pid_file) + File(pid_file, action="delete") def get_jmx_data(nn_address, modeler_type, metric, encrypted=False, security_enabled=False): http://git-wip-us.apache.org/repos/asf/ambari/blob/f500c9e4/ambari-server/src/main/resources/common-services/HDFS/3.0.0.3.0/package/scripts/utils.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/HDFS/3.0.0.3.0/package/scripts/utils.py b/ambari-server/src/main/resources/common-services/HDFS/3.0.0.3.0/package/scripts/utils.py index 53774c6..0c28a00 100644 --- a/ambari-server/src/main/resources/common-services/HDFS/3.0.0.3.0/package/scripts/utils.py +++ b/ambari-server/src/main/resources/common-services/HDFS/3.0.0.3.0/package/scripts/utils.py @@ -22,6 +22,7 @@ import urllib2 import ambari_simplejson as json # simplejson is much faster comparing to Python 2.6 json module and has the same functions set. from resource_management.core.resources.system import Directory, File, Execute +from resource_management.libraries.functions.check_process_status import wait_process_stopped from resource_management.libraries.functions.format import format from resource_management.libraries.functions import check_process_status from resource_management.libraries.functions import StackFeature @@ -278,6 +279,7 @@ def service(action=None, name=None, user=None, options="", create_pid_dir=False, except: show_logs(log_dir, user) raise + wait_process_stopped(pid_file) File(pid_file, action="delete") def get_jmx_data(nn_address, modeler_type, metric, encrypted=False, security_enabled=False): http://git-wip-us.apache.org/repos/asf/ambari/blob/f500c9e4/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/scripts/utils.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/scripts/utils.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/scripts/utils.py index 7dcbca8..f7febb0 100644 --- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/scripts/utils.py +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/scripts/utils.py @@ -19,6 +19,7 @@ limitations under the License. import os from resource_management import * +from resource_management.libraries.functions.check_process_status import wait_process_stopped import re @@ -123,6 +124,8 @@ def service(action=None, name=None, user=None, create_pid_dir=False, not_if=service_is_up ) if action == "stop": + wait_process_stopped(pid_file) + File(pid_file, action="delete", )