AMBARI-22444 - Add Native Libraries To Tez Tarball (jonathanhurley)

Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/c67a3246
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/c67a3246
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/c67a3246

Branch: refs/heads/branch-feature-AMBARI-20859
Commit: c67a3246e2121c6977094e10cdb1414f45cf1482
Parents: 99152d3
Author: Jonathan Hurley <jhur...@hortonworks.com>
Authored: Tue Nov 14 16:19:30 2017 -0500
Committer: Jonathan Hurley <jhur...@hortonworks.com>
Committed: Wed Nov 15 15:30:36 2017 -0500

----------------------------------------------------------------------
 .../libraries/functions/copy_tarball.py         | 82 +++++++++++++++++---
 .../libraries/functions/tar_archive.py          |  6 +-
 .../stacks/2.1/FALCON/test_falcon_server.py     |  6 +-
 3 files changed, 80 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/c67a3246/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py
----------------------------------------------------------------------
diff --git 
a/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py
 
b/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py
index 219430a..f62aa3d 100644
--- 
a/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py
+++ 
b/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py
@@ -23,18 +23,74 @@ __all__ = ["copy_to_hdfs", 
"get_sysprep_skip_copy_tarballs_hdfs"]
 import os
 import tempfile
 import re
+import tarfile
+from contextlib import closing
 
 from resource_management.libraries.script.script import Script
 from resource_management.libraries.resources.hdfs_resource import HdfsResource
 from resource_management.libraries.functions.default import default
 from resource_management.core import shell
+from resource_management.core import sudo
 from resource_management.core.logger import Logger
+from resource_management.core.exceptions import Fail
+from resource_management.core.resources.system import Execute
 from resource_management.libraries.functions import stack_tools, 
stack_features, stack_select
+from resource_management.libraries.functions import tar_archive
 
 STACK_NAME_PATTERN = "{{ stack_name }}"
 STACK_ROOT_PATTERN = "{{ stack_root }}"
 STACK_VERSION_PATTERN = "{{ stack_version }}"
 
+def _prepare_tez_tarball():
+  """
+  Prepares the Tez tarball by adding the Hadoop native libraries found in the 
mapreduce tarball.
+  :return:  the full path of the newly created tez tarball to use
+  """
+  import tempfile
+
+  Logger.info("Preparing the Tez tarball...")
+
+  # get the mapreduce tarball which matches the version of tez
+  # tez installs the mapreduce tar, so it should always be present
+  _, mapreduce_source_file, _, _ = get_tarball_paths("mapreduce")
+  _, tez_source_file, _, _ = get_tarball_paths("tez")
+
+  temp_dir = Script.get_tmp_dir()
+
+  mapreduce_temp_dir = tempfile.mkdtemp(prefix="mapreduce-tarball-", 
dir=temp_dir)
+  tez_temp_dir = tempfile.mkdtemp(prefix="tez-tarball-", dir=temp_dir)
+
+  Logger.info("Extracting {0} to {1}".format(mapreduce_source_file, 
mapreduce_temp_dir))
+  tar_archive.extract_archive(mapreduce_source_file, mapreduce_temp_dir)
+
+  Logger.info("Extracting {0} to {1}".format(tez_source_file, tez_temp_dir))
+  tar_archive.untar_archive(tez_source_file, tez_temp_dir)
+
+  hadoop_lib_native_dir = os.path.join(mapreduce_temp_dir, "hadoop", "lib", 
"native")
+  tez_lib_dir = os.path.join(tez_temp_dir, "lib")
+
+  if not os.path.exists(hadoop_lib_native_dir):
+    raise Fail("Unable to seed the Tez tarball with native libraries since the 
source Hadoop native lib directory {0} does not 
exist".format(hadoop_lib_native_dir))
+
+  if not os.path.exists(tez_lib_dir):
+    raise Fail("Unable to seed the Tez tarball with native libraries since the 
target Tez lib directory {0} does not exist".format(tez_lib_dir))
+
+  Execute(("cp", "-a", hadoop_lib_native_dir, tez_lib_dir), sudo = True)
+
+  tez_tarball_with_native_lib = os.path.join(os.path.dirname(tez_source_file), 
"tez-native.tar.gz")
+  Logger.info("Creating a new Tez tarball at 
{0}".format(tez_tarball_with_native_lib))
+
+  # tar up Tez, making sure to specify nothing for the arcname so that it does 
not include an absolute path
+  with closing(tarfile.open(tez_tarball_with_native_lib, "w:gz")) as 
new_tez_tarball:
+    new_tez_tarball.add(tez_temp_dir, arcname=os.path.sep)
+
+  # cleanup
+  sudo.rmtree(mapreduce_temp_dir)
+  sudo.rmtree(tez_temp_dir)
+
+  return tez_tarball_with_native_lib
+
+
 # TODO, in the future, each stack can define its own mapping of tarballs
 # inside the stack definition directory in some sort of xml file.
 # PLEASE DO NOT put this in cluster-env since it becomes much harder to change,
@@ -50,7 +106,8 @@ TARBALL_MAP = {
   "tez": {
     "dirs": ("{0}/{1}/tez/lib/tez.tar.gz".format(STACK_ROOT_PATTERN, 
STACK_VERSION_PATTERN),
            "/{0}/apps/{1}/tez/tez.tar.gz".format(STACK_NAME_PATTERN, 
STACK_VERSION_PATTERN)),
-    "service": "TEZ"
+    "service": "TEZ",
+    "prepare_function": _prepare_tez_tarball
   },
 
   "tez_hive2": {
@@ -132,29 +189,29 @@ def get_tarball_paths(name, 
use_upgrading_version_during_upgrade=True, custom_so
   :param use_upgrading_version_during_upgrade:
   :param custom_source_file: If specified, use this source path instead of the 
default one from the map.
   :param custom_dest_file: If specified, use this destination path instead of 
the default one from the map.
-  :return: A tuple of (success status, source path, destination path)
+  :return: A tuple of (success status, source path, destination path, optional 
preparation function which is invoked to setup the tarball)
   """
   stack_name = Script.get_stack_name()
 
   if not stack_name:
     Logger.error("Cannot copy {0} tarball to HDFS because stack name could not 
be determined.".format(str(name)))
-    return (False, None, None)
+    return False, None, None
 
   if name is None or name.lower() not in TARBALL_MAP:
     Logger.error("Cannot copy tarball to HDFS because {0} is not supported in 
stack {1} for this operation.".format(str(name), str(stack_name)))
-    return (False, None, None)
+    return False, None, None
 
   service = TARBALL_MAP[name.lower()]['service']
 
   stack_version = get_current_version(service=service, 
use_upgrading_version_during_upgrade=use_upgrading_version_during_upgrade)
   if not stack_version:
     Logger.error("Cannot copy {0} tarball to HDFS because stack version could 
be be determined.".format(str(name)))
-    return (False, None, None)
+    return False, None, None
 
   stack_root = Script.get_stack_root()
   if not stack_root:
     Logger.error("Cannot copy {0} tarball to HDFS because stack root could be 
be determined.".format(str(name)))
-    return (False, None, None)
+    return False, None, None
 
   (source_file, dest_file) = TARBALL_MAP[name.lower()]['dirs']
 
@@ -173,7 +230,11 @@ def get_tarball_paths(name, 
use_upgrading_version_during_upgrade=True, custom_so
   source_file = source_file.replace(STACK_VERSION_PATTERN, stack_version)
   dest_file = dest_file.replace(STACK_VERSION_PATTERN, stack_version)
 
-  return (True, source_file, dest_file)
+  prepare_function = None
+  if "prepare_function" in TARBALL_MAP[name.lower()]:
+    prepare_function = TARBALL_MAP[name.lower()]['prepare_function']
+
+  return True, source_file, dest_file, prepare_function
 
 
 def get_current_version(service=None, 
use_upgrading_version_during_upgrade=True):
@@ -272,8 +333,8 @@ def copy_to_hdfs(name, user_group, owner, file_mode=0444, 
custom_source_file=Non
   import params
 
   Logger.info("Called copy_to_hdfs tarball: {0}".format(name))
-  (success, source_file, dest_file) = get_tarball_paths(name, 
use_upgrading_version_during_upgrade,
-                                                         custom_source_file, 
custom_dest_file)
+  (success, source_file, dest_file, prepare_function) = 
get_tarball_paths(name, use_upgrading_version_during_upgrade,
+                                                                          
custom_source_file, custom_dest_file)
 
   if not success:
     Logger.error("Could not copy tarball {0} due to a missing or incorrect 
parameter.".format(str(name)))
@@ -311,6 +372,9 @@ def copy_to_hdfs(name, user_group, owner, file_mode=0444, 
custom_source_file=Non
   # The logic above cannot be used until fast-hdfs-resource.jar supports the 
mv command, or it switches
   # to WebHDFS.
 
+  # if there is a function which is needed to prepare the tarball, then invoke 
it first
+  if prepare_function is not None:
+    source_file = prepare_function()
 
   # If the directory already exists, it is a NO-OP
   dest_dir = os.path.dirname(dest_file)

http://git-wip-us.apache.org/repos/asf/ambari/blob/c67a3246/ambari-common/src/main/python/resource_management/libraries/functions/tar_archive.py
----------------------------------------------------------------------
diff --git 
a/ambari-common/src/main/python/resource_management/libraries/functions/tar_archive.py
 
b/ambari-common/src/main/python/resource_management/libraries/functions/tar_archive.py
index e6d8924..7976587 100644
--- 
a/ambari-common/src/main/python/resource_management/libraries/functions/tar_archive.py
+++ 
b/ambari-common/src/main/python/resource_management/libraries/functions/tar_archive.py
@@ -46,11 +46,13 @@ def archive_directory_dereference(archive, directory):
     try_sleep = 1,
   )
 
-def untar_archive(archive, directory):
+def untar_archive(archive, directory, silent=True):
   """
   :param directory:   can be a symlink and is followed
   """
-  Execute(('tar','-xvf',archive,'-C',directory+"/"),
+  options = "-xf" if silent else "-xvf"
+
+  Execute(('tar',options,archive,'-C',directory+"/"),
     sudo = True,
     tries = 3,
     try_sleep = 1,

http://git-wip-us.apache.org/repos/asf/ambari/blob/c67a3246/ambari-server/src/test/python/stacks/2.1/FALCON/test_falcon_server.py
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/test/python/stacks/2.1/FALCON/test_falcon_server.py 
b/ambari-server/src/test/python/stacks/2.1/FALCON/test_falcon_server.py
index 8c48347..e15cfdb 100644
--- a/ambari-server/src/test/python/stacks/2.1/FALCON/test_falcon_server.py
+++ b/ambari-server/src/test/python/stacks/2.1/FALCON/test_falcon_server.py
@@ -248,7 +248,7 @@ class TestFalconServer(RMFTestCase):
         sudo = True,
     )
     self.assertResourceCalled('Execute', ('tar',
-     '-xvf',
+     '-xf',
      '/tmp/falcon-upgrade-backup/falcon-local-backup.tar',
      '-C',
      u'/hadoop/falcon/'),
@@ -433,7 +433,7 @@ class TestFalconServer(RMFTestCase):
     self.assertResourceCalled('Execute',
                               ('ambari-python-wrap', '/usr/bin/hdp-select', 
'set', 'falcon-server', version), sudo=True,)
     self.assertResourceCalled('Execute', ('tar',
-                                          '-xvf',
+                                          '-xf',
                                           
'/tmp/falcon-upgrade-backup/falcon-local-backup.tar',
                                           '-C',
                                           u'/hadoop/falcon/'),
@@ -473,7 +473,7 @@ class TestFalconServer(RMFTestCase):
                               ('ambari-python-wrap', '/usr/bin/hdp-select', 
'set', 'falcon-server', version), sudo=True,)
 
     self.assertResourceCalled('Execute', ('tar',
-     '-xvf',
+     '-xf',
      '/tmp/falcon-upgrade-backup/falcon-local-backup.tar',
      '-C',
      u'/hadoop/falcon/'),

Reply via email to