Repository: ambari Updated Branches: refs/heads/branch-feature-AMBARI-22457 519f52706 -> 58c7f784a
AMBARI-22486 - Conditionally Rebuild MapReduce and Tez Tarballs with LZO if Enabled (jonathanhurley) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/58c7f784 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/58c7f784 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/58c7f784 Branch: refs/heads/branch-feature-AMBARI-22457 Commit: 58c7f784abe9b6f05a1358c14a8f94afc6725f79 Parents: 519f527 Author: Jonathan Hurley <[email protected]> Authored: Mon Nov 20 14:23:20 2017 -0500 Committer: Jonathan Hurley <[email protected]> Committed: Tue Nov 21 20:32:13 2017 -0500 ---------------------------------------------------------------------- .../libraries/functions/copy_tarball.py | 99 +++++++++++++++++++- .../TEZ/0.4.0.2.1/package/scripts/tez.py | 5 +- .../configuration-mapred/mapred-site.xml | 2 +- .../2.2/services/TEZ/configuration/tez-site.xml | 4 +- .../YARN/configuration-mapred/mapred-site.xml | 2 +- .../stacks/HDP/2.6/upgrades/config-upgrade.xml | 6 +- 6 files changed, 106 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/58c7f784/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py ---------------------------------------------------------------------- diff --git a/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py b/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py index b05c97c..bf0701c 100644 --- a/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py +++ b/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py @@ -28,6 +28,8 @@ from contextlib import closing from resource_management.libraries.script.script import Script from resource_management.libraries.resources.hdfs_resource import HdfsResource +from resource_management.libraries.functions import component_version +from resource_management.libraries.functions import lzo_utils from resource_management.libraries.functions.default import default from resource_management.core import shell from resource_management.core import sudo @@ -45,6 +47,9 @@ STACK_VERSION_PATTERN = "{{ stack_version }}" def _prepare_tez_tarball(): """ Prepares the Tez tarball by adding the Hadoop native libraries found in the mapreduce tarball. + It's very important to use the version of mapreduce which matches tez here. + Additionally, this will also copy native LZO to the tez tarball if LZO is enabled and the + GPL license has been accepted. :return: the full path of the newly created tez tarball to use """ import tempfile @@ -79,12 +84,31 @@ def _prepare_tez_tarball(): if not os.path.exists(tez_lib_dir): raise Fail("Unable to seed the Tez tarball with native libraries since the target Tez lib directory {0} does not exist".format(tez_lib_dir)) - # ensure that the tez/lib directory is readable by non-root (which it typically is not) - sudo.chmod(tez_lib_dir, 0755) - # copy native libraries from hadoop to tez Execute(("cp", "-a", hadoop_lib_native_dir, tez_lib_dir), sudo = True) + # if enabled, LZO GPL libraries must be copied as well + if lzo_utils.should_install_lzo(): + stack_root = Script.get_stack_root() + tez_version = component_version.get_component_repository_version("TEZ") + hadoop_lib_native_lzo_dir = os.path.join(stack_root, tez_version, "hadoop", "lib", "native") + + if not sudo.path_isdir(hadoop_lib_native_lzo_dir): + Logger.warning("Unable to located native LZO libraries at {0}, falling back to hadoop home".format(hadoop_lib_native_lzo_dir)) + hadoop_lib_native_lzo_dir = os.path.join(stack_root, "current", "hadoop-client", "lib", "native") + + if not sudo.path_isdir(hadoop_lib_native_lzo_dir): + raise Fail("Unable to seed the Tez tarball with native libraries since LZO is enabled but the native LZO libraries could not be found at {0}".format(hadoop_lib_native_lzo_dir)) + + Execute(("cp", "-a", hadoop_lib_native_lzo_dir, tez_lib_dir), sudo = True) + + + # ensure that the tez/lib directory is readable by non-root (which it typically is not) + Directory(tez_lib_dir, + mode = 0755, + cd_access = 'a', + recursive_ownership = True) + # create the staging directory so that non-root agents can write to it tez_native_tarball_staging_dir = os.path.join(temp_dir, "tez-native-tarball-staging") if not os.path.exists(tez_native_tarball_staging_dir): @@ -111,6 +135,72 @@ def _prepare_tez_tarball(): return tez_tarball_with_native_lib +def _prepare_mapreduce_tarball(): + """ + Prepares the mapreduce tarball by including the native LZO libraries if necessary. If LZO is + not enabled or has not been opted-in, then this will do nothing and return the original + tarball to upload to HDFS. + :return: the full path of the newly created mapreduce tarball to use or the original path + if no changes were made + """ + # get the mapreduce tarball to crack open and add LZO libraries to + _, mapreduce_source_file, _, _ = get_tarball_paths("mapreduce") + + if not lzo_utils.should_install_lzo(): + return mapreduce_source_file + + Logger.info("Preparing the mapreduce tarball with native LZO libraries...") + + temp_dir = Script.get_tmp_dir() + + # create the temp staging directories ensuring that non-root agents using tarfile can work with them + mapreduce_temp_dir = tempfile.mkdtemp(prefix="mapreduce-tarball-", dir=temp_dir) + sudo.chmod(mapreduce_temp_dir, 0777) + + # calculate the source directory for LZO + hadoop_lib_native_source_dir = os.path.join(os.path.dirname(mapreduce_source_file), "lib", "native") + if not sudo.path_exists(hadoop_lib_native_source_dir): + raise Fail("Unable to seed the mapreduce tarball with native LZO libraries since the source Hadoop native lib directory {0} does not exist".format(hadoop_lib_native_source_dir)) + + Logger.info("Extracting {0} to {1}".format(mapreduce_source_file, mapreduce_temp_dir)) + tar_archive.extract_archive(mapreduce_source_file, mapreduce_temp_dir) + + mapreduce_lib_dir = os.path.join(mapreduce_temp_dir, "hadoop", "lib") + + # copy native libraries from source hadoop to target + Execute(("cp", "-af", hadoop_lib_native_source_dir, mapreduce_lib_dir), sudo = True) + + # ensure that the hadoop/lib/native directory is readable by non-root (which it typically is not) + Directory(mapreduce_lib_dir, + mode = 0755, + cd_access = 'a', + recursive_ownership = True) + + # create the staging directory so that non-root agents can write to it + mapreduce_native_tarball_staging_dir = os.path.join(temp_dir, "mapreduce-native-tarball-staging") + if not os.path.exists(mapreduce_native_tarball_staging_dir): + Directory(mapreduce_native_tarball_staging_dir, + mode = 0777, + cd_access = 'a', + create_parents = True, + recursive_ownership = True) + + mapreduce_tarball_with_native_lib = os.path.join(mapreduce_native_tarball_staging_dir, "mapreduce-native.tar.gz") + Logger.info("Creating a new mapreduce tarball at {0}".format(mapreduce_tarball_with_native_lib)) + + # tar up mapreduce, making sure to specify nothing for the arcname so that it does not include an absolute path + with closing(tarfile.open(mapreduce_tarball_with_native_lib, "w:gz")) as new_tarball: + new_tarball.add(mapreduce_temp_dir, arcname = os.path.sep) + + # ensure that the tarball can be read and uploaded + sudo.chmod(mapreduce_tarball_with_native_lib, 0744) + + # cleanup + sudo.rmtree(mapreduce_temp_dir) + + return mapreduce_tarball_with_native_lib + + # TODO, in the future, each stack can define its own mapping of tarballs # inside the stack definition directory in some sort of xml file. # PLEASE DO NOT put this in cluster-env since it becomes much harder to change, @@ -163,7 +253,8 @@ TARBALL_MAP = { "mapreduce": { "dirs": ("{0}/{1}/hadoop/mapreduce.tar.gz".format(STACK_ROOT_PATTERN, STACK_VERSION_PATTERN), "/{0}/apps/{1}/mapreduce/mapreduce.tar.gz".format(STACK_NAME_PATTERN, STACK_VERSION_PATTERN)), - "service": "MAPREDUCE2" + "service": "MAPREDUCE2", + "prepare_function": _prepare_mapreduce_tarball }, "spark": { http://git-wip-us.apache.org/repos/asf/ambari/blob/58c7f784/ambari-server/src/main/resources/common-services/TEZ/0.4.0.2.1/package/scripts/tez.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/TEZ/0.4.0.2.1/package/scripts/tez.py b/ambari-server/src/main/resources/common-services/TEZ/0.4.0.2.1/package/scripts/tez.py index dfa6501..35647e4 100644 --- a/ambari-server/src/main/resources/common-services/TEZ/0.4.0.2.1/package/scripts/tez.py +++ b/ambari-server/src/main/resources/common-services/TEZ/0.4.0.2.1/package/scripts/tez.py @@ -23,8 +23,8 @@ import os # Local Imports from resource_management.core.resources.system import Directory, File +from resource_management.libraries.functions import lzo_utils from resource_management.libraries.resources.xml_config import XmlConfig -from resource_management.libraries.functions.format import format from resource_management.core.source import InlineTemplate from ambari_commons import OSConst from ambari_commons.os_family_impl import OsFamilyFuncImpl, OsFamilyImpl @@ -37,6 +37,9 @@ def tez(config_dir): """ import params + # ensure that matching LZO libraries are installed for Tez + lzo_utils.install_lzo_if_needed() + Directory(params.tez_etc_dir, mode=0755) Directory(config_dir, http://git-wip-us.apache.org/repos/asf/ambari/blob/58c7f784/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/configuration-mapred/mapred-site.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/configuration-mapred/mapred-site.xml b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/configuration-mapred/mapred-site.xml index 3438c45..398c9d7 100644 --- a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/configuration-mapred/mapred-site.xml +++ b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/configuration-mapred/mapred-site.xml @@ -438,7 +438,7 @@ </property> <property> <name>mapreduce.admin.user.env</name> - <value>LD_LIBRARY_PATH=./mr-framework/hadoop/lib/native:{{hadoop_lib_home}}/native/Linux-{{architecture}}-64</value> + <value>LD_LIBRARY_PATH=./mr-framework/hadoop/lib/native:./mr-framework/hadoop/lib/native/Linux-{{architecture}}-64:{{hadoop_lib_home}}/native/Linux-{{architecture}}-64</value> <description> Additional execution environment entries for map and reduce task processes. This is not an additive property. You must preserve the original value if http://git-wip-us.apache.org/repos/asf/ambari/blob/58c7f784/ambari-server/src/main/resources/stacks/HDP/2.2/services/TEZ/configuration/tez-site.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.2/services/TEZ/configuration/tez-site.xml b/ambari-server/src/main/resources/stacks/HDP/2.2/services/TEZ/configuration/tez-site.xml index 4ffb7a4..5513ab1 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.2/services/TEZ/configuration/tez-site.xml +++ b/ambari-server/src/main/resources/stacks/HDP/2.2/services/TEZ/configuration/tez-site.xml @@ -78,7 +78,7 @@ </property> <property> <name>tez.am.launch.env</name> - <value>LD_LIBRARY_PATH=./tezlib/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-{{architecture}}-64</value> + <value>LD_LIBRARY_PATH=./tezlib/lib/native:./tezlib/lib/native/Linux-{{architecture}}-64:/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-{{architecture}}-64</value> <description> Additional execution environment entries for tez. This is not an additive property. You must preserve the original value if you want to have access to native libraries. @@ -124,7 +124,7 @@ </property> <property> <name>tez.task.launch.env</name> - <value>LD_LIBRARY_PATH=./tezlib/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-{{architecture}}-64</value> + <value>LD_LIBRARY_PATH=./tezlib/lib/native:./tezlib/lib/native/Linux-{{architecture}}-64:/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-{{architecture}}-64</value> <description> Additional execution environment entries for tez. This is not an additive property. You must preserve the original value if you want to have access to native libraries. http://git-wip-us.apache.org/repos/asf/ambari/blob/58c7f784/ambari-server/src/main/resources/stacks/HDP/2.2/services/YARN/configuration-mapred/mapred-site.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.2/services/YARN/configuration-mapred/mapred-site.xml b/ambari-server/src/main/resources/stacks/HDP/2.2/services/YARN/configuration-mapred/mapred-site.xml index 084e912..099e388 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.2/services/YARN/configuration-mapred/mapred-site.xml +++ b/ambari-server/src/main/resources/stacks/HDP/2.2/services/YARN/configuration-mapred/mapred-site.xml @@ -20,7 +20,7 @@ <configuration xmlns:xi="http://www.w3.org/2001/XInclude" supports_final="true"> <property> <name>mapreduce.admin.user.env</name> - <value>LD_LIBRARY_PATH=./mr-framework/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-{{architecture}}-64</value> + <value>LD_LIBRARY_PATH=./mr-framework/hadoop/lib/native:./mr-framework/hadoop/lib/native/Linux-{{architecture}}-64:/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-{{architecture}}-64</value> <description> Additional execution environment entries for map and reduce task processes. This is not an additive property. You must preserve the original value if http://git-wip-us.apache.org/repos/asf/ambari/blob/58c7f784/ambari-server/src/main/resources/stacks/HDP/2.6/upgrades/config-upgrade.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.6/upgrades/config-upgrade.xml b/ambari-server/src/main/resources/stacks/HDP/2.6/upgrades/config-upgrade.xml index 4538072..5c672ba 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.6/upgrades/config-upgrade.xml +++ b/ambari-server/src/main/resources/stacks/HDP/2.6/upgrades/config-upgrade.xml @@ -269,8 +269,8 @@ <changes> <definition xsi:type="configure" id="hdp_2_6_tez_tarball_ld_library"> <type>tez-site</type> - <set key="tez.am.launch.env" value="LD_LIBRARY_PATH=./tezlib/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-{{architecture}}-64"/> - <set key="tez.task.launch.env" value="LD_LIBRARY_PATH=./tezlib/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-{{architecture}}-64"/> + <set key="tez.am.launch.env" value="LD_LIBRARY_PATH=./tezlib/lib/native:./tezlib/lib/native/Linux-{{architecture}}-64:/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-{{architecture}}-64"/> + <set key="tez.task.launch.env" value="LD_LIBRARY_PATH=./tezlib/lib/native:./tezlib/lib/native/Linux-{{architecture}}-64:/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-{{architecture}}-64"/> </definition> </changes> </component> @@ -281,7 +281,7 @@ <changes> <definition xsi:type="configure" id="hdp_2_6_mapreduce_tarball_ld_library"> <type>mapred-site</type> - <set key="mapreduce.admin.user.env" value="LD_LIBRARY_PATH=./mr-framework/hadoop/lib/native:{{hadoop_lib_home}}/native/Linux-{{architecture}}-64"/> + <set key="mapreduce.admin.user.env" value="LD_LIBRARY_PATH=./mr-framework/hadoop/lib/native:./mr-framework/hadoop/lib/native/Linux-{{architecture}}-64:{{hadoop_lib_home}}/native/Linux-{{architecture}}-64"/> </definition> </changes> </component>
