Repository: ambari Updated Branches: refs/heads/trunk 55c5f7e03 -> 693e758a2
AMBARI-16028: Namenode marked as INITIAL standby could potentially never start if other namenode is down (jluniya) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/693e758a Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/693e758a Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/693e758a Branch: refs/heads/trunk Commit: 693e758a2e46f0c7b8a92eb96f36efba82331d9d Parents: 55c5f7e Author: Jayush Luniya <[email protected]> Authored: Fri Apr 22 11:41:04 2016 -0700 Committer: Jayush Luniya <[email protected]> Committed: Fri Apr 22 11:41:04 2016 -0700 ---------------------------------------------------------------------- .../2.1.0.2.0/package/scripts/hdfs_namenode.py | 41 +++++++++++++++++--- .../2.1.0.2.0/package/scripts/params_linux.py | 8 +++- .../python/stacks/2.0.6/HDFS/test_namenode.py | 9 +++++ 3 files changed, 51 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/693e758a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py index 8b6c924..69235d0 100644 --- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py +++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py @@ -447,11 +447,11 @@ def decommission(): def bootstrap_standby_namenode(params, use_path=False): - + mark_dirs = params.namenode_bootstrapped_mark_dirs bin_path = os.path.join(params.hadoop_bin_dir, '') if use_path else "" - try: iterations = 50 + bootstrapped = False bootstrap_cmd = format("{bin_path}hdfs namenode -bootstrapStandby -nonInteractive") # Blue print based deployments start both NN in parallel and occasionally # the first attempt to bootstrap may fail. Depending on how it fails the @@ -459,22 +459,53 @@ def bootstrap_standby_namenode(params, use_path=False): # bootstrap succeeded). The solution is to call with -force option but only # during initial start if params.command_phase == "INITIAL_START": + # force bootstrap in INITIAL_START phase bootstrap_cmd = format("{bin_path}hdfs namenode -bootstrapStandby -nonInteractive -force") + elif is_namenode_bootstrapped(params): + # Once out of INITIAL_START phase bootstrap only if we couldnt bootstrap during cluster deployment + return True Logger.info("Boostrapping standby namenode: %s" % (bootstrap_cmd)) for i in range(iterations): Logger.info('Try %d out of %d' % (i+1, iterations)) code, out = shell.call(bootstrap_cmd, logoutput=False, user=params.hdfs_user) if code == 0: Logger.info("Standby namenode bootstrapped successfully") - return True + bootstrapped = True + break elif code == 5: Logger.info("Standby namenode already bootstrapped") - return True + bootstrapped = True + break else: Logger.warning('Bootstrap standby namenode failed with %d error code. Will retry' % (code)) except Exception as ex: Logger.error('Bootstrap standby namenode threw an exception. Reason %s' %(str(ex))) - return False + if bootstrapped: + for mark_dir in mark_dirs: + Directory(mark_dir, + create_parents = True + ) + return bootstrapped + +def is_namenode_bootstrapped(params): + mark_dirs = params.namenode_bootstrapped_mark_dirs + nn_name_dirs = params.dfs_name_dir.split(',') + marked = False + # Check if name directories have been marked as formatted + for mark_dir in mark_dirs: + if os.path.isdir(mark_dir): + marked = True + Logger.info(format("{mark_dir} exists. Standby Namenode already bootstrapped")) + break + + # Ensure that all mark dirs created for all name directories + if marked: + for mark_dir in mark_dirs: + Directory(mark_dir, + create_parents = True + ) + + return marked def is_active_namenode(hdfs_binary): http://git-wip-us.apache.org/repos/asf/ambari/blob/693e758a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/params_linux.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/params_linux.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/params_linux.py index 5e855e7..e6fd32c 100644 --- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/params_linux.py +++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/params_linux.py @@ -233,14 +233,18 @@ smoke_hdfs_user_mode = 0770 hdfs_namenode_format_disabled = default("/configurations/cluster-env/hdfs_namenode_format_disabled", False) hdfs_namenode_formatted_mark_suffix = "/namenode-formatted/" +hdfs_namenode_bootstrapped_mark_suffix = "/namenode-bootstrapped/" namenode_formatted_old_mark_dirs = ["/var/run/hadoop/hdfs/namenode-formatted", format("{hadoop_pid_dir_prefix}/hdfs/namenode/formatted"), "/var/lib/hdfs/namenode/formatted"] dfs_name_dirs = dfs_name_dir.split(",") namenode_formatted_mark_dirs = [] +namenode_bootstrapped_mark_dirs = [] for dn_dir in dfs_name_dirs: - tmp_mark_dir = format("{dn_dir}{hdfs_namenode_formatted_mark_suffix}") - namenode_formatted_mark_dirs.append(tmp_mark_dir) + tmp_format_mark_dir = format("{dn_dir}{hdfs_namenode_formatted_mark_suffix}") + tmp_bootstrap_mark_dir = format("{dn_dir}{hdfs_namenode_bootstrapped_mark_suffix}") + namenode_formatted_mark_dirs.append(tmp_format_mark_dir) + namenode_bootstrapped_mark_dirs.append(tmp_bootstrap_mark_dir) # Use the namenode RPC address if configured, otherwise, fallback to the default file system namenode_address = None http://git-wip-us.apache.org/repos/asf/ambari/blob/693e758a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py index 1c08d57..49935a1 100644 --- a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py +++ b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py @@ -842,6 +842,9 @@ class TestNamenode(RMFTestCase): content = Template('exclude_hosts_list.j2'), group = 'hadoop', ) + self.assertResourceCalled('Directory', '/hadoop/hdfs/namenode/namenode-bootstrapped/', + create_parents = True + ) self.assertResourceCalled('Directory', '/var/run/hadoop', owner = 'hdfs', group = 'hadoop', @@ -957,6 +960,9 @@ class TestNamenode(RMFTestCase): content = Template('exclude_hosts_list.j2'), group = 'hadoop', ) + self.assertResourceCalled('Directory', '/hadoop/hdfs/namenode/namenode-bootstrapped/', + create_parents = True + ) self.assertResourceCalled('Directory', '/var/run/hadoop', owner = 'hdfs', group = 'hadoop', @@ -1070,6 +1076,9 @@ class TestNamenode(RMFTestCase): content = Template('exclude_hosts_list.j2'), group = 'hadoop', ) + self.assertResourceCalled('Directory', '/hadoop/hdfs/namenode/namenode-bootstrapped/', + create_parents = True + ) self.assertResourceCalled('Directory', '/var/run/hadoop', owner = 'hdfs', group = 'hadoop',
