AMBARI-22444 - Add Native Libraries To Tez Tarball (jonathanhurley)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/c67a3246 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/c67a3246 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/c67a3246 Branch: refs/heads/branch-feature-AMBARI-22008 Commit: c67a3246e2121c6977094e10cdb1414f45cf1482 Parents: 99152d3 Author: Jonathan Hurley <jhur...@hortonworks.com> Authored: Tue Nov 14 16:19:30 2017 -0500 Committer: Jonathan Hurley <jhur...@hortonworks.com> Committed: Wed Nov 15 15:30:36 2017 -0500 ---------------------------------------------------------------------- .../libraries/functions/copy_tarball.py | 82 +++++++++++++++++--- .../libraries/functions/tar_archive.py | 6 +- .../stacks/2.1/FALCON/test_falcon_server.py | 6 +- 3 files changed, 80 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/c67a3246/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py ---------------------------------------------------------------------- diff --git a/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py b/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py index 219430a..f62aa3d 100644 --- a/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py +++ b/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py @@ -23,18 +23,74 @@ __all__ = ["copy_to_hdfs", "get_sysprep_skip_copy_tarballs_hdfs"] import os import tempfile import re +import tarfile +from contextlib import closing from resource_management.libraries.script.script import Script from resource_management.libraries.resources.hdfs_resource import HdfsResource from resource_management.libraries.functions.default import default from resource_management.core import shell +from resource_management.core import sudo from resource_management.core.logger import Logger +from resource_management.core.exceptions import Fail +from resource_management.core.resources.system import Execute from resource_management.libraries.functions import stack_tools, stack_features, stack_select +from resource_management.libraries.functions import tar_archive STACK_NAME_PATTERN = "{{ stack_name }}" STACK_ROOT_PATTERN = "{{ stack_root }}" STACK_VERSION_PATTERN = "{{ stack_version }}" +def _prepare_tez_tarball(): + """ + Prepares the Tez tarball by adding the Hadoop native libraries found in the mapreduce tarball. + :return: the full path of the newly created tez tarball to use + """ + import tempfile + + Logger.info("Preparing the Tez tarball...") + + # get the mapreduce tarball which matches the version of tez + # tez installs the mapreduce tar, so it should always be present + _, mapreduce_source_file, _, _ = get_tarball_paths("mapreduce") + _, tez_source_file, _, _ = get_tarball_paths("tez") + + temp_dir = Script.get_tmp_dir() + + mapreduce_temp_dir = tempfile.mkdtemp(prefix="mapreduce-tarball-", dir=temp_dir) + tez_temp_dir = tempfile.mkdtemp(prefix="tez-tarball-", dir=temp_dir) + + Logger.info("Extracting {0} to {1}".format(mapreduce_source_file, mapreduce_temp_dir)) + tar_archive.extract_archive(mapreduce_source_file, mapreduce_temp_dir) + + Logger.info("Extracting {0} to {1}".format(tez_source_file, tez_temp_dir)) + tar_archive.untar_archive(tez_source_file, tez_temp_dir) + + hadoop_lib_native_dir = os.path.join(mapreduce_temp_dir, "hadoop", "lib", "native") + tez_lib_dir = os.path.join(tez_temp_dir, "lib") + + if not os.path.exists(hadoop_lib_native_dir): + raise Fail("Unable to seed the Tez tarball with native libraries since the source Hadoop native lib directory {0} does not exist".format(hadoop_lib_native_dir)) + + if not os.path.exists(tez_lib_dir): + raise Fail("Unable to seed the Tez tarball with native libraries since the target Tez lib directory {0} does not exist".format(tez_lib_dir)) + + Execute(("cp", "-a", hadoop_lib_native_dir, tez_lib_dir), sudo = True) + + tez_tarball_with_native_lib = os.path.join(os.path.dirname(tez_source_file), "tez-native.tar.gz") + Logger.info("Creating a new Tez tarball at {0}".format(tez_tarball_with_native_lib)) + + # tar up Tez, making sure to specify nothing for the arcname so that it does not include an absolute path + with closing(tarfile.open(tez_tarball_with_native_lib, "w:gz")) as new_tez_tarball: + new_tez_tarball.add(tez_temp_dir, arcname=os.path.sep) + + # cleanup + sudo.rmtree(mapreduce_temp_dir) + sudo.rmtree(tez_temp_dir) + + return tez_tarball_with_native_lib + + # TODO, in the future, each stack can define its own mapping of tarballs # inside the stack definition directory in some sort of xml file. # PLEASE DO NOT put this in cluster-env since it becomes much harder to change, @@ -50,7 +106,8 @@ TARBALL_MAP = { "tez": { "dirs": ("{0}/{1}/tez/lib/tez.tar.gz".format(STACK_ROOT_PATTERN, STACK_VERSION_PATTERN), "/{0}/apps/{1}/tez/tez.tar.gz".format(STACK_NAME_PATTERN, STACK_VERSION_PATTERN)), - "service": "TEZ" + "service": "TEZ", + "prepare_function": _prepare_tez_tarball }, "tez_hive2": { @@ -132,29 +189,29 @@ def get_tarball_paths(name, use_upgrading_version_during_upgrade=True, custom_so :param use_upgrading_version_during_upgrade: :param custom_source_file: If specified, use this source path instead of the default one from the map. :param custom_dest_file: If specified, use this destination path instead of the default one from the map. - :return: A tuple of (success status, source path, destination path) + :return: A tuple of (success status, source path, destination path, optional preparation function which is invoked to setup the tarball) """ stack_name = Script.get_stack_name() if not stack_name: Logger.error("Cannot copy {0} tarball to HDFS because stack name could not be determined.".format(str(name))) - return (False, None, None) + return False, None, None if name is None or name.lower() not in TARBALL_MAP: Logger.error("Cannot copy tarball to HDFS because {0} is not supported in stack {1} for this operation.".format(str(name), str(stack_name))) - return (False, None, None) + return False, None, None service = TARBALL_MAP[name.lower()]['service'] stack_version = get_current_version(service=service, use_upgrading_version_during_upgrade=use_upgrading_version_during_upgrade) if not stack_version: Logger.error("Cannot copy {0} tarball to HDFS because stack version could be be determined.".format(str(name))) - return (False, None, None) + return False, None, None stack_root = Script.get_stack_root() if not stack_root: Logger.error("Cannot copy {0} tarball to HDFS because stack root could be be determined.".format(str(name))) - return (False, None, None) + return False, None, None (source_file, dest_file) = TARBALL_MAP[name.lower()]['dirs'] @@ -173,7 +230,11 @@ def get_tarball_paths(name, use_upgrading_version_during_upgrade=True, custom_so source_file = source_file.replace(STACK_VERSION_PATTERN, stack_version) dest_file = dest_file.replace(STACK_VERSION_PATTERN, stack_version) - return (True, source_file, dest_file) + prepare_function = None + if "prepare_function" in TARBALL_MAP[name.lower()]: + prepare_function = TARBALL_MAP[name.lower()]['prepare_function'] + + return True, source_file, dest_file, prepare_function def get_current_version(service=None, use_upgrading_version_during_upgrade=True): @@ -272,8 +333,8 @@ def copy_to_hdfs(name, user_group, owner, file_mode=0444, custom_source_file=Non import params Logger.info("Called copy_to_hdfs tarball: {0}".format(name)) - (success, source_file, dest_file) = get_tarball_paths(name, use_upgrading_version_during_upgrade, - custom_source_file, custom_dest_file) + (success, source_file, dest_file, prepare_function) = get_tarball_paths(name, use_upgrading_version_during_upgrade, + custom_source_file, custom_dest_file) if not success: Logger.error("Could not copy tarball {0} due to a missing or incorrect parameter.".format(str(name))) @@ -311,6 +372,9 @@ def copy_to_hdfs(name, user_group, owner, file_mode=0444, custom_source_file=Non # The logic above cannot be used until fast-hdfs-resource.jar supports the mv command, or it switches # to WebHDFS. + # if there is a function which is needed to prepare the tarball, then invoke it first + if prepare_function is not None: + source_file = prepare_function() # If the directory already exists, it is a NO-OP dest_dir = os.path.dirname(dest_file) http://git-wip-us.apache.org/repos/asf/ambari/blob/c67a3246/ambari-common/src/main/python/resource_management/libraries/functions/tar_archive.py ---------------------------------------------------------------------- diff --git a/ambari-common/src/main/python/resource_management/libraries/functions/tar_archive.py b/ambari-common/src/main/python/resource_management/libraries/functions/tar_archive.py index e6d8924..7976587 100644 --- a/ambari-common/src/main/python/resource_management/libraries/functions/tar_archive.py +++ b/ambari-common/src/main/python/resource_management/libraries/functions/tar_archive.py @@ -46,11 +46,13 @@ def archive_directory_dereference(archive, directory): try_sleep = 1, ) -def untar_archive(archive, directory): +def untar_archive(archive, directory, silent=True): """ :param directory: can be a symlink and is followed """ - Execute(('tar','-xvf',archive,'-C',directory+"/"), + options = "-xf" if silent else "-xvf" + + Execute(('tar',options,archive,'-C',directory+"/"), sudo = True, tries = 3, try_sleep = 1, http://git-wip-us.apache.org/repos/asf/ambari/blob/c67a3246/ambari-server/src/test/python/stacks/2.1/FALCON/test_falcon_server.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/python/stacks/2.1/FALCON/test_falcon_server.py b/ambari-server/src/test/python/stacks/2.1/FALCON/test_falcon_server.py index 8c48347..e15cfdb 100644 --- a/ambari-server/src/test/python/stacks/2.1/FALCON/test_falcon_server.py +++ b/ambari-server/src/test/python/stacks/2.1/FALCON/test_falcon_server.py @@ -248,7 +248,7 @@ class TestFalconServer(RMFTestCase): sudo = True, ) self.assertResourceCalled('Execute', ('tar', - '-xvf', + '-xf', '/tmp/falcon-upgrade-backup/falcon-local-backup.tar', '-C', u'/hadoop/falcon/'), @@ -433,7 +433,7 @@ class TestFalconServer(RMFTestCase): self.assertResourceCalled('Execute', ('ambari-python-wrap', '/usr/bin/hdp-select', 'set', 'falcon-server', version), sudo=True,) self.assertResourceCalled('Execute', ('tar', - '-xvf', + '-xf', '/tmp/falcon-upgrade-backup/falcon-local-backup.tar', '-C', u'/hadoop/falcon/'), @@ -473,7 +473,7 @@ class TestFalconServer(RMFTestCase): ('ambari-python-wrap', '/usr/bin/hdp-select', 'set', 'falcon-server', version), sudo=True,) self.assertResourceCalled('Execute', ('tar', - '-xvf', + '-xf', '/tmp/falcon-upgrade-backup/falcon-local-backup.tar', '-C', u'/hadoop/falcon/'),