IMPALA-4277: allow overriding of Hive/Hadoop versions/locations This is to help with IMPALA-4277 to make it easier to build against Hadoop/Hive distributions where the directory layout doesn't exactly match our current CDH dependencies, or where we may want to temporarily override a version without making a source change.
Change-Id: I7da10e38f9c4309f2d193dc25f14a6ea308c9639 Reviewed-on: http://gerrit.cloudera.org:8080/4720 Reviewed-by: Sailesh Mukil <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/df680cfe Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/df680cfe Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/df680cfe Branch: refs/heads/master Commit: df680cfe3a99fa295d25d39f2eab4a9cd98509be Parents: d0a2d1d Author: Tim Armstrong <[email protected]> Authored: Thu Oct 13 15:00:08 2016 -0700 Committer: Internal Jenkins <[email protected]> Committed: Tue Oct 18 05:54:09 2016 +0000 ---------------------------------------------------------------------- bin/impala-config.sh | 33 ++++++++++++++++++++++----------- buildall.sh | 2 +- cmake_modules/FindHDFS.cmake | 11 +++-------- common/thrift/CMakeLists.txt | 2 +- 4 files changed, 27 insertions(+), 21 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df680cfe/bin/impala-config.sh ---------------------------------------------------------------------- diff --git a/bin/impala-config.sh b/bin/impala-config.sh index 90e8fc0..2a25248 100755 --- a/bin/impala-config.sh +++ b/bin/impala-config.sh @@ -295,13 +295,13 @@ if [[ $OSTYPE == "darwin"* ]]; then IMPALA_THRIFT_JAVA_VERSION=0.9.2 fi -export IMPALA_HADOOP_VERSION=2.6.0-cdh5.10.0-SNAPSHOT -export IMPALA_HBASE_VERSION=1.2.0-cdh5.10.0-SNAPSHOT -export IMPALA_HIVE_VERSION=1.1.0-cdh5.10.0-SNAPSHOT -export IMPALA_SENTRY_VERSION=1.5.1-cdh5.10.0-SNAPSHOT -export IMPALA_LLAMA_VERSION=1.0.0-cdh5.10.0-SNAPSHOT -export IMPALA_PARQUET_VERSION=1.5.0-cdh5.10.0-SNAPSHOT -export IMPALA_LLAMA_MINIKDC_VERSION=1.0.0 +export IMPALA_HADOOP_VERSION=${IMPALA_HADOOP_VERSION:-2.6.0-cdh5.10.0-SNAPSHOT} +export IMPALA_HBASE_VERSION=${IMPALA_HBASE_VERSION:-1.2.0-cdh5.10.0-SNAPSHOT} +export IMPALA_HIVE_VERSION=${IMPALA_HIVE_VERSION:-1.1.0-cdh5.10.0-SNAPSHOT} +export IMPALA_SENTRY_VERSION=${IMPALA_SENTRY_VERSION:-1.5.1-cdh5.10.0-SNAPSHOT} +export IMPALA_LLAMA_VERSION=${IMPALA_LLAMA_VERSION:-1.0.0-cdh5.10.0-SNAPSHOT} +export IMPALA_PARQUET_VERSION=${IMPALA_PARQUET_VERSION:-1.5.0-cdh5.10.0-SNAPSHOT} +export IMPALA_LLAMA_MINIKDC_VERSION=${IMPALA_LLAMA_MINIKDC_VERSION:-1.0.0} export IMPALA_FE_DIR="$IMPALA_HOME/fe" export IMPALA_BE_DIR="$IMPALA_HOME/be" @@ -319,12 +319,17 @@ else export CDH_COMPONENTS_HOME="$IMPALA_HOME/thirdparty" fi -# Hadoop dependencies are snapshots in the Impala tree +# Typically we build against a snapshot build of Hadoop that includes everything we need +# for building Impala and running a minicluster. export HADOOP_HOME="$CDH_COMPONENTS_HOME/hadoop-${IMPALA_HADOOP_VERSION}/" export HADOOP_CONF_DIR="$IMPALA_FE_DIR/src/test/resources" +# The include and lib paths are needed to pick up hdfs.h and libhdfs.* +# Allow overriding in case we want to point to a package/install with a different layout. +export HADOOP_INCLUDE_DIR=${HADOOP_INCLUDE_DIR:-"${HADOOP_HOME}/include"} +export HADOOP_LIB_DIR=${HADOOP_LIB_DIR:-"${HADOOP_HOME}/lib"} : ${HADOOP_CLASSPATH=} -# Please note that the * is inside quotes, thus it won't get exanded by bash but +# Please note that the * is inside quotes, thus it won't get expanded by bash but # by java, see "Understanding class path wildcards" at http://goo.gl/f0cfft export HADOOP_CLASSPATH="$HADOOP_CLASSPATH:${HADOOP_HOME}/share/hadoop/tools/lib/*" # YARN is configured to use LZO so the LZO jar needs to be in the hadoop classpath. @@ -341,6 +346,9 @@ export SENTRY_CONF_DIR="$IMPALA_HOME/fe/src/test/resources" export HIVE_HOME="$CDH_COMPONENTS_HOME/hive-${IMPALA_HIVE_VERSION}/" export PATH="$HIVE_HOME/bin:$PATH" +# Allow overriding of Hive source location in case we want to build Impala without +# a complete Hive build. +export HIVE_SRC_DIR=${HIVE_SRC_DIR:-"${HIVE_HOME}/src"} export HIVE_CONF_DIR="$IMPALA_FE_DIR/src/test/resources" # Hive looks for jar files in a single directory from HIVE_AUX_JARS_PATH plus @@ -392,7 +400,7 @@ export USER="${USER-`id -un`}" #LIBHDFS_OPTS="-Xcheck:jni -Xcheck:nabounds" # - Points to the location of libbackend.so. LIBHDFS_OPTS="${LIBHDFS_OPTS:-}" -LIBHDFS_OPTS="${LIBHDFS_OPTS} -Djava.library.path=${HADOOP_HOME}/lib/native/" +LIBHDFS_OPTS="${LIBHDFS_OPTS} -Djava.library.path=${HADOOP_LIB_DIR}/native/" # READER BEWARE: This always points to the debug build. # TODO: Consider having cmake scripts change this value depending on # the build type. @@ -412,7 +420,7 @@ LIB_JVM=` find "${JAVA_HOME}/" -name libjvm.so | head -1` LD_LIBRARY_PATH="${LD_LIBRARY_PATH-}" LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:`dirname ${LIB_JAVA}`:`dirname ${LIB_JSIG}`" LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:`dirname ${LIB_JVM}`" -LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${HADOOP_HOME}/lib/native" +LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${HADOOP_LIB_DIR}/native" LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${IMPALA_HOME}/be/build/debug/service" LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${IMPALA_SNAPPY_PATH}" LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${IMPALA_LZO}/build" @@ -443,9 +451,12 @@ export IMPALA_CONFIG_SOURCED=1 echo "IMPALA_HOME = $IMPALA_HOME" echo "HADOOP_HOME = $HADOOP_HOME" echo "HADOOP_CONF_DIR = $HADOOP_CONF_DIR" +echo "HADOOP_INCLUDE_DIR = $HADOOP_INCLUDE_DIR" +echo "HADOOP_LIB_DIR = $HADOOP_LIB_DIR" echo "MINI_DFS_BASE_DATA_DIR = $MINI_DFS_BASE_DATA_DIR" echo "HIVE_HOME = $HIVE_HOME" echo "HIVE_CONF_DIR = $HIVE_CONF_DIR" +echo "HIVE_SRC_DIR = $HIVE_SRC_DIR" echo "HBASE_HOME = $HBASE_HOME" echo "HBASE_CONF_DIR = $HBASE_CONF_DIR" echo "MINIKDC_HOME = $MINIKDC_HOME" http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df680cfe/buildall.sh ---------------------------------------------------------------------- diff --git a/buildall.sh b/buildall.sh index a7858a3..d7159e7 100755 --- a/buildall.sh +++ b/buildall.sh @@ -365,7 +365,7 @@ reconfigure_test_cluster() { # Copy Hadoop-lzo dependencies if available (required to generate Lzo data). if stat "$HADOOP_LZO"/build/native/Linux-*-*/lib/libgplcompression.* > /dev/null ; then - cp "$HADOOP_LZO"/build/native/Linux-*-*/lib/libgplcompression.* "$HADOOP_HOME/lib/native" + cp "$HADOOP_LZO"/build/native/Linux-*-*/lib/libgplcompression.* "$HADOOP_LIB_DIR/native" else echo "No hadoop-lzo found" fi http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df680cfe/cmake_modules/FindHDFS.cmake ---------------------------------------------------------------------- diff --git a/cmake_modules/FindHDFS.cmake b/cmake_modules/FindHDFS.cmake index 6affc56..f0916e1 100644 --- a/cmake_modules/FindHDFS.cmake +++ b/cmake_modules/FindHDFS.cmake @@ -28,9 +28,9 @@ exec_program(hadoop ARGS version OUTPUT_VARIABLE Hadoop_VERSION RETURN_VALUE Hadoop_RETURN) -# currently only looking in HADOOP_HOME +# Only look in HADOOP_INCLUDE_DIR find_path(HDFS_INCLUDE_DIR hdfs.h PATHS - $ENV{HADOOP_HOME}/include/ + $ENV{HADOOP_INCLUDE_DIR} # make sure we don't accidentally pick up a different version NO_DEFAULT_PATH ) @@ -44,12 +44,7 @@ else () endif() message(STATUS "Architecture: ${arch_hint}") - -if ("${arch_hint}" STREQUAL "x64") - set(HDFS_LIB_PATHS $ENV{HADOOP_HOME}/lib/native) -else () - set(HDFS_LIB_PATHS $ENV{HADOOP_HOME}/lib/native) -endif () +set(HDFS_LIB_PATHS $ENV{HADOOP_LIB_DIR}/native) message(STATUS "HDFS_LIB_PATHS: ${HDFS_LIB_PATHS}") http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df680cfe/common/thrift/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/common/thrift/CMakeLists.txt b/common/thrift/CMakeLists.txt index 3104ee2..08fabd8 100644 --- a/common/thrift/CMakeLists.txt +++ b/common/thrift/CMakeLists.txt @@ -119,7 +119,7 @@ function(THRIFT_GEN_DS VAR) endfunction(THRIFT_GEN_DS) message("Using Thrift compiler: ${THRIFT_COMPILER}") -set(THRIFT_INCLUDE_DIR_OPTION -I ${THRIFT_CONTRIB_DIR} -I $ENV{HIVE_HOME}/src/metastore/if) +set(THRIFT_INCLUDE_DIR_OPTION -I ${THRIFT_CONTRIB_DIR} -I $ENV{HIVE_SRC_DIR}/metastore/if) set(BE_OUTPUT_DIR ${CMAKE_SOURCE_DIR}/be/generated-sources) set(FE_OUTPUT_DIR ${CMAKE_SOURCE_DIR}/fe/generated-sources) # TODO: avoid duplicating generated java classes
