This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 38ce7ce95a4 branch-3.1:[feat](thirdparty) Refactor HDFS dependency
management: move HDFS client jars to FE and slim down BE thirdparty (#57202)
(#57527)
38ce7ce95a4 is described below
commit 38ce7ce95a422bd9e1e4bf1ad0e2f6e01aa75ae1
Author: Calvin Kirs <[email protected]>
AuthorDate: Mon Nov 3 11:19:44 2025 +0800
branch-3.1:[feat](thirdparty) Refactor HDFS dependency management: move
HDFS client jars to FE and slim down BE thirdparty (#57202) (#57527)
#57202
---
bin/start_be.sh | 10 +-
build.sh | 175 ++++++++++++++++++------------
cloud/script/start.sh | 11 +-
fe/be-java-extensions/hadoop-deps/pom.xml | 114 +++++++++++++++++++
fe/be-java-extensions/pom.xml | 1 +
5 files changed, 223 insertions(+), 88 deletions(-)
diff --git a/bin/start_be.sh b/bin/start_be.sh
index abffdebe96e..fec1eb01723 100755
--- a/bin/start_be.sh
+++ b/bin/start_be.sh
@@ -225,16 +225,10 @@ done
if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then
# add hadoop libs
- for f in "${DORIS_HOME}/lib/hadoop_hdfs/common"/*.jar; do
+ for f in "${DORIS_HOME}/lib/hadoop_hdfs"/*.jar; do
DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
done
- for f in "${DORIS_HOME}/lib/hadoop_hdfs/common/lib"/*.jar; do
- DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
- done
- for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs"/*.jar; do
- DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
- done
- for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs/lib"/*.jar; do
+ for f in "${DORIS_HOME}/lib/hadoop_hdfs/lib"/*.jar; do
DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
done
fi
diff --git a/build.sh b/build.sh
index 75cb2f3a118..90621ae7461 100755
--- a/build.sh
+++ b/build.sh
@@ -40,6 +40,7 @@ TARGET_SYSTEM="$(uname -s)"
TARGET_ARCH="$(uname -m)"
echo "Target system: ${TARGET_SYSTEM}; Target arch: ${TARGET_ARCH}"
+HADOOP_DEPS_NAME="hadoop-deps"
. "${DORIS_HOME}/env.sh"
# Check args
@@ -54,6 +55,8 @@ Usage: $0 <options>
--file-cache-microbench build Backend file cache microbench tool.
Default OFF.
--cloud build Cloud. Default OFF.
--index-tool build Backend inverted index tool. Default OFF.
+ --benchmark build Google Benchmark. Default OFF.
+ --task-executor-simulator build Backend task executor simulator. Default
OFF.
--broker build Broker. Default ON.
--hive-udf build Hive UDF library for Ingestion Load.
Default ON.
--be-java-extensions build Backend java extensions. Default ON.
@@ -69,6 +72,7 @@ Usage: $0 <options>
DISABLE_BE_JAVA_EXTENSIONS If set DISABLE_BE_JAVA_EXTENSIONS=ON, we will
do not build binary with java-udf,hadoop-hudi-scanner,jdbc-scanner and so on
Default is OFF.
DISABLE_JAVA_CHECK_STYLE If set DISABLE_JAVA_CHECK_STYLE=ON, it will
skip style check of java code in FE.
DISABLE_BUILD_AZURE If set DISABLE_BUILD_AZURE=ON, it will not
build azure into BE.
+
Eg.
$0 build all
$0 --be build Backend
@@ -76,9 +80,10 @@ Usage: $0 <options>
$0 --file-cache-microbench build Backend file cache
microbench tool
$0 --cloud build Cloud
$0 --index-tool build Backend inverted index tool
- $0 --fe --clean clean and build Frontend and Spark
Dpp application
+ $0 --benchmark build Google Benchmark of Backend
+ $0 --fe --clean clean and build Frontend.
$0 --fe --be --clean clean and build Frontend and
Backend
- $0 --spark-dpp build Spark DPP application alone
+ $0 --task-executor-simulator build task executor simulator
$0 --broker build Broker
$0 --be --fe build Backend, Frontend, and Java
UDF library
$0 --be --coverage build Backend with coverage enabled
@@ -137,6 +142,8 @@ if ! OPTS="$(getopt \
-l 'meta-tool' \
-l 'file-cache-microbench' \
-l 'index-tool' \
+ -l 'benchmark' \
+ -l 'task-executor-simulator' \
-l 'spark-dpp' \
-l 'hive-udf' \
-l 'be-java-extensions' \
@@ -160,6 +167,8 @@ BUILD_BROKER=0
BUILD_META_TOOL='OFF'
BUILD_FILE_CACHE_MICROBENCH_TOOL='OFF'
BUILD_INDEX_TOOL='OFF'
+BUILD_BENCHMARK='OFF'
+BUILD_TASK_EXECUTOR_SIMULATOR='OFF'
BUILD_BE_JAVA_EXTENSIONS=0
BUILD_HIVE_UDF=0
CLEAN=0
@@ -178,7 +187,9 @@ if [[ "$#" == 1 ]]; then
BUILD_BROKER=1
BUILD_META_TOOL='OFF'
BUILD_FILE_CACHE_MICROBENCH_TOOL='OFF'
+ BUILD_TASK_EXECUTOR_SIMULATOR='OFF'
BUILD_INDEX_TOOL='OFF'
+ BUILD_BENCHMARK='OFF'
BUILD_HIVE_UDF=1
BUILD_BE_JAVA_EXTENSIONS=1
CLEAN=0
@@ -198,6 +209,7 @@ else
;;
--cloud)
BUILD_CLOUD=1
+ BUILD_BE_JAVA_EXTENSIONS=1
shift
;;
--broker)
@@ -216,6 +228,16 @@ else
BUILD_INDEX_TOOL='ON'
shift
;;
+ --benchmark)
+ BUILD_BENCHMARK='ON'
+ BUILD_BE=1 # go into BE cmake building, but benchmark instead of
doris_be
+ shift
+ ;;
+ --task-executor-simulator)
+ BUILD_TASK_EXECUTOR_SIMULATOR='ON'
+ BUILD_BE=1
+ shift
+ ;;
--spark-dpp)
BUILD_SPARK_DPP=1
shift
@@ -276,6 +298,7 @@ else
BUILD_META_TOOL='ON'
BUILD_FILE_CACHE_MICROBENCH_TOOL='OFF'
BUILD_INDEX_TOOL='ON'
+ BUILD_TASK_EXECUTOR_SIMULATOR='OFF'
BUILD_HIVE_UDF=1
BUILD_BE_JAVA_EXTENSIONS=1
CLEAN=0
@@ -285,8 +308,13 @@ fi
if [[ "${HELP}" -eq 1 ]]; then
usage
fi
-# build thirdparty libraries if necessary
-if [[ ! -f "${DORIS_THIRDPARTY}/installed/lib/libbacktrace.a" ]]; then
+# build thirdparty libraries if necessary. check last thirdparty lib
installation
+if [[ "${TARGET_SYSTEM}" == 'Darwin' ]]; then
+ LAST_THIRDPARTY_LIB='libbrotlienc.a'
+else
+ LAST_THIRDPARTY_LIB='hadoop_hdfs/native/libhdfs.a'
+fi
+if [[ ! -f "${DORIS_THIRDPARTY}/installed/lib/${LAST_THIRDPARTY_LIB}" ]]; then
echo "Thirdparty libraries need to be build ..."
# need remove all installed pkgs because some lib like lz4 will throw
error if its lib alreay exists
rm -rf "${DORIS_THIRDPARTY}/installed"
@@ -341,9 +369,6 @@ if [[ "${CLEAN}" -eq 1 && "${BUILD_BE}" -eq 0 &&
"${BUILD_FE}" -eq 0 && ${BUILD_
exit 0
fi
-if [[ -z "${WITH_MYSQL}" ]]; then
- WITH_MYSQL='OFF'
-fi
if [[ -z "${GLIBC_COMPATIBILITY}" ]]; then
if [[ "${TARGET_SYSTEM}" != 'Darwin' ]]; then
GLIBC_COMPATIBILITY='ON'
@@ -367,41 +392,21 @@ fi
if [[ -z "${STRIP_DEBUG_INFO}" ]]; then
STRIP_DEBUG_INFO='OFF'
fi
-if [[ -z "${USE_MEM_TRACKER}" ]]; then
- if [[ "${TARGET_SYSTEM}" != 'Darwin' ]]; then
- USE_MEM_TRACKER='ON'
- else
- USE_MEM_TRACKER='OFF'
- fi
-fi
BUILD_TYPE_LOWWER=$(echo "${BUILD_TYPE}" | tr '[:upper:]' '[:lower:]')
if [[ "${BUILD_TYPE_LOWWER}" == "asan" ]]; then
USE_JEMALLOC='OFF'
elif [[ -z "${USE_JEMALLOC}" ]]; then
- USE_JEMALLOC='ON'
-fi
-if [[ -f "${TP_INCLUDE_DIR}/jemalloc/jemalloc_doris_with_prefix.h" ]]; then
- # compatible with old thirdparty
- rm -rf "${TP_INCLUDE_DIR}/jemalloc/jemalloc.h"
- rm -rf "${TP_LIB_DIR}/libjemalloc_doris.a"
- rm -rf "${TP_LIB_DIR}/libjemalloc_doris_pic.a"
- rm -rf "${TP_INCLUDE_DIR}/rocksdb"
- rm -rf "${TP_LIB_DIR}/librocksdb.a"
-
- mv "${TP_INCLUDE_DIR}/jemalloc/jemalloc_doris_with_prefix.h"
"${TP_INCLUDE_DIR}/jemalloc/jemalloc.h"
- mv "${TP_LIB_DIR}/libjemalloc_doris_with_prefix.a"
"${TP_LIB_DIR}/libjemalloc_doris.a"
- mv "${TP_LIB_DIR}/libjemalloc_doris_with_prefix_pic.a"
"${TP_LIB_DIR}/libjemalloc_doris_pic.a"
- mv "${TP_LIB_DIR}/librocksdb_jemalloc_with_prefix.a"
"${TP_LIB_DIR}/librocksdb.a"
- mv -f "${TP_INCLUDE_DIR}/rocksdb_jemalloc_with_prefix"
"${TP_INCLUDE_DIR}/rocksdb"
+ if [[ "${TARGET_SYSTEM}" != 'Darwin' ]]; then
+ USE_JEMALLOC='ON'
+ else
+ USE_JEMALLOC='OFF'
+ fi
fi
+
if [[ -z "${USE_BTHREAD_SCANNER}" ]]; then
USE_BTHREAD_SCANNER='OFF'
fi
-if [[ -z "${USE_DWARF}" ]]; then
- USE_DWARF='OFF'
-fi
-
if [[ -z "${USE_UNWIND}" ]]; then
if [[ "${TARGET_SYSTEM}" != 'Darwin' ]]; then
USE_UNWIND='ON'
@@ -442,7 +447,7 @@ if [[ -z "${DISABLE_JAVA_CHECK_STYLE}" ]]; then
DISABLE_JAVA_CHECK_STYLE='OFF'
fi
-if [[ -n "${DISABLE_BUILD_AZURE}" ]]; then
+if [[ "$(echo "${DISABLE_BUILD_AZURE}" | tr '[:lower:]' '[:upper:]')" == "ON"
]]; then
BUILD_AZURE='OFF'
fi
@@ -450,8 +455,8 @@ if [[ -z "${ENABLE_INJECTION_POINT}" ]]; then
ENABLE_INJECTION_POINT='OFF'
fi
-if [[ -z "${ENABLE_CACHE_LOCK_DEBUG}" ]]; then
- ENABLE_CACHE_LOCK_DEBUG='ON'
+if [[ -z "${BUILD_BENCHMARK}" ]]; then
+ BUILD_BENCHMARK='OFF'
fi
if [[ -z "${RECORD_COMPILER_SWITCHES}" ]]; then
@@ -489,22 +494,20 @@ echo "Get params:
BUILD_META_TOOL -- ${BUILD_META_TOOL}
BUILD_FILE_CACHE_MICROBENCH_TOOL -- ${BUILD_FILE_CACHE_MICROBENCH_TOOL}
BUILD_INDEX_TOOL -- ${BUILD_INDEX_TOOL}
+ BUILD_BENCHMARK -- ${BUILD_BENCHMARK}
+ BUILD_TASK_EXECUTOR_SIMULATOR -- ${BUILD_TASK_EXECUTOR_SIMULATOR}
BUILD_BE_JAVA_EXTENSIONS -- ${BUILD_BE_JAVA_EXTENSIONS}
BUILD_HIVE_UDF -- ${BUILD_HIVE_UDF}
PARALLEL -- ${PARALLEL}
CLEAN -- ${CLEAN}
- WITH_MYSQL -- ${WITH_MYSQL}
GLIBC_COMPATIBILITY -- ${GLIBC_COMPATIBILITY}
USE_AVX2 -- ${USE_AVX2}
USE_LIBCPP -- ${USE_LIBCPP}
- USE_DWARF -- ${USE_DWARF}
USE_UNWIND -- ${USE_UNWIND}
STRIP_DEBUG_INFO -- ${STRIP_DEBUG_INFO}
- USE_MEM_TRACKER -- ${USE_MEM_TRACKER}
USE_JEMALLOC -- ${USE_JEMALLOC}
USE_BTHREAD_SCANNER -- ${USE_BTHREAD_SCANNER}
ENABLE_INJECTION_POINT -- ${ENABLE_INJECTION_POINT}
- ENABLE_CACHE_LOCK_DEBUG -- ${ENABLE_CACHE_LOCK_DEBUG}
DENABLE_CLANG_COVERAGE -- ${DENABLE_CLANG_COVERAGE}
DISPLAY_BUILD_TIME -- ${DISPLAY_BUILD_TIME}
ENABLE_PCH -- ${ENABLE_PCH}
@@ -530,9 +533,6 @@ fi
# Assesmble FE modules
FE_MODULES=''
-# TODO: docs are temporarily removed, so this var is always OFF
-# Fix it later
-BUILD_DOCS='OFF'
modules=("")
if [[ "${BUILD_FE}" -eq 1 ]]; then
modules+=("fe-common")
@@ -559,6 +559,7 @@ if [[ "${BUILD_BE_JAVA_EXTENSIONS}" -eq 1 ]]; then
# lakesoul-scanner has been deprecated
# modules+=("be-java-extensions/lakesoul-scanner")
modules+=("be-java-extensions/preload-extensions")
+ modules+=("be-java-extensions/${HADOOP_DEPS_NAME}")
# If the BE_EXTENSION_IGNORE variable is not empty, remove the modules
that need to be ignored from FE_MODULES
if [[ -n "${BE_EXTENSION_IGNORE}" ]]; then
@@ -575,8 +576,10 @@ FE_MODULES="$(
# Clean and build Backend
if [[ "${BUILD_BE}" -eq 1 ]]; then
- update_submodule "be/src/apache-orc" "apache-orc"
"https://github.com/apache/doris-thirdparty/archive/refs/heads/orc.tar.gz"
- update_submodule "be/src/clucene" "clucene"
"https://github.com/apache/doris-thirdparty/archive/refs/heads/clucene-3.0.tar.gz"
+ update_submodule "contrib/apache-orc" "apache-orc"
"https://github.com/apache/doris-thirdparty/archive/refs/heads/orc.tar.gz"
+ update_submodule "contrib/clucene" "clucene"
"https://github.com/apache/doris-thirdparty/archive/refs/heads/clucene.tar.gz"
+ update_submodule "contrib/openblas" "openblas"
"https://github.com/apache/doris-thirdparty/archive/refs/heads/openblas.tar.gz"
+ update_submodule "contrib/faiss" "faiss"
"https://github.com/apache/doris-thirdparty/archive/refs/heads/faiss.tar.gz"
if [[ -e "${DORIS_HOME}/gensrc/build/gen_cpp/version.h" ]]; then
rm -f "${DORIS_HOME}/gensrc/build/gen_cpp/version.h"
fi
@@ -592,10 +595,20 @@ if [[ "${BUILD_BE}" -eq 1 ]]; then
BUILD_FS_BENCHMARK=OFF
fi
+ if [[ -z "${BUILD_TASK_EXECUTOR_SIMULATOR}" ]]; then
+ BUILD_TASK_EXECUTOR_SIMULATOR=OFF
+ fi
+
+ if [[ -z "${BUILD_FILE_CACHE_LRU_TOOL}" ]]; then
+ BUILD_FILE_CACHE_LRU_TOOL=OFF
+ fi
+
echo "-- Make program: ${MAKE_PROGRAM}"
echo "-- Use ccache: ${CMAKE_USE_CCACHE}"
echo "-- Extra cxx flags: ${EXTRA_CXX_FLAGS:-}"
echo "-- Build fs benchmark tool: ${BUILD_FS_BENCHMARK}"
+ echo "-- Build task executor simulator: ${BUILD_TASK_EXECUTOR_SIMULATOR}"
+ echo "-- Build file cache lru tool: ${BUILD_FILE_CACHE_LRU_TOOL}"
mkdir -p "${CMAKE_BUILD_DIR}"
cd "${CMAKE_BUILD_DIR}"
@@ -604,21 +617,20 @@ if [[ "${BUILD_BE}" -eq 1 ]]; then
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
-DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}" \
-DENABLE_INJECTION_POINT="${ENABLE_INJECTION_POINT}" \
- -DENABLE_CACHE_LOCK_DEBUG="${ENABLE_CACHE_LOCK_DEBUG}" \
-DMAKE_TEST=OFF \
+ -DBUILD_BENCHMARK="${BUILD_BENCHMARK}" \
-DBUILD_FS_BENCHMARK="${BUILD_FS_BENCHMARK}" \
+ -DBUILD_TASK_EXECUTOR_SIMULATOR="${BUILD_TASK_EXECUTOR_SIMULATOR}" \
+ -DBUILD_FILE_CACHE_LRU_TOOL="${BUILD_FILE_CACHE_LRU_TOOL}" \
${CMAKE_USE_CCACHE:+${CMAKE_USE_CCACHE}} \
- -DWITH_MYSQL="${WITH_MYSQL}" \
-DUSE_LIBCPP="${USE_LIBCPP}" \
-DBUILD_META_TOOL="${BUILD_META_TOOL}" \
-DBUILD_FILE_CACHE_MICROBENCH_TOOL="${BUILD_FILE_CACHE_MICROBENCH_TOOL}" \
-DBUILD_INDEX_TOOL="${BUILD_INDEX_TOOL}" \
-DSTRIP_DEBUG_INFO="${STRIP_DEBUG_INFO}" \
- -DUSE_DWARF="${USE_DWARF}" \
-DUSE_UNWIND="${USE_UNWIND}" \
-DDISPLAY_BUILD_TIME="${DISPLAY_BUILD_TIME}" \
-DENABLE_PCH="${ENABLE_PCH}" \
- -DUSE_MEM_TRACKER="${USE_MEM_TRACKER}" \
-DUSE_JEMALLOC="${USE_JEMALLOC}" \
-DUSE_AVX2="${USE_AVX2}" \
-DARM_MARCH="${ARM_MARCH}" \
@@ -665,7 +677,6 @@ if [[ "${BUILD_CLOUD}" -eq 1 ]]; then
-DUSE_LIBCPP="${USE_LIBCPP}" \
-DENABLE_HDFS_STORAGE_VAULT=${ENABLE_HDFS_STORAGE_VAULT:-ON} \
-DSTRIP_DEBUG_INFO="${STRIP_DEBUG_INFO}" \
- -DUSE_DWARF="${USE_DWARF}" \
-DUSE_JEMALLOC="${USE_JEMALLOC}" \
-DEXTRA_CXX_FLAGS="${EXTRA_CXX_FLAGS}" \
-DBUILD_AZURE="${BUILD_AZURE}" \
@@ -677,14 +688,6 @@ if [[ "${BUILD_CLOUD}" -eq 1 ]]; then
echo "Build cloud done"
fi
-if [[ "${BUILD_DOCS}" = "ON" ]]; then
- # Build docs, should be built before Frontend
- echo "Build docs"
- cd "${DORIS_HOME}/docs"
- ./build_help_zip.sh
- cd "${DORIS_HOME}"
-fi
-
function build_ui() {
NPM='npm'
if ! ${NPM} --version; then
@@ -815,10 +818,6 @@ if [[ "${OUTPUT_BE_BINARY}" -eq 1 ]]; then
cp -r -p "${DORIS_HOME}/be/output/conf"/* "${DORIS_OUTPUT}/be/conf"/
cp -r -p "${DORIS_HOME}/be/output/dict" "${DORIS_OUTPUT}/be/"
- if [[ -d "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" ]]; then
- cp -r -p "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/"
"${DORIS_OUTPUT}/be/lib/"
- fi
-
if [[ -f "${DORIS_THIRDPARTY}/installed/lib/libz.so" ]]; then
cp -r -p "${DORIS_THIRDPARTY}/installed/lib/libz.so"*
"${DORIS_OUTPUT}/be/lib/"
fi
@@ -864,10 +863,19 @@ EOF
cp -r -p "${DORIS_HOME}/be/output/lib/debug_info"
"${DORIS_OUTPUT}/be/lib"/
fi
+ if [[ "${BUILD_BENCHMARK}" = "ON" ]]; then
+ cp -r -p "${DORIS_HOME}/be/output/lib/benchmark_test"
"${DORIS_OUTPUT}/be/lib/"/
+ fi
+
if [[ "${BUILD_FS_BENCHMARK}" = "ON" ]]; then
cp -r -p "${DORIS_HOME}/bin/run-fs-benchmark.sh"
"${DORIS_OUTPUT}/be/bin/"/
fi
+ if [[ "${BUILD_TASK_EXECUTOR_SIMULATOR}" = "ON" ]]; then
+ cp -r -p "${DORIS_HOME}/bin/run-task-executor-simulator.sh"
"${DORIS_OUTPUT}/be/bin/"/
+ cp -r -p "${DORIS_HOME}/be/output/lib/task_executor_simulator"
"${DORIS_OUTPUT}/be/lib/"/
+ fi
+
extensions_modules=("java-udf")
extensions_modules+=("jdbc-scanner")
extensions_modules+=("hadoop-hudi-scanner")
@@ -879,6 +887,7 @@ EOF
# extensions_modules+=("lakesoul-scanner")
extensions_modules+=("preload-extensions")
extensions_modules+=("iceberg-metadata-scanner")
+ extensions_modules+=("${HADOOP_DEPS_NAME}")
if [[ -n "${BE_EXTENSION_IGNORE}" ]]; then
IFS=',' read -r -a ignore_modules <<<"${BE_EXTENSION_IGNORE}"
@@ -908,13 +917,34 @@ EOF
module_jar="${DORIS_HOME}/fe/be-java-extensions/${extensions_module}/target/${extensions_module}-jar-with-dependencies.jar"
module_proj_jar="${DORIS_HOME}/fe/be-java-extensions/${extensions_module}/target/${extensions_module}-project.jar"
mkdir "${BE_JAVA_EXTENSIONS_DIR}"/"${extensions_module}"
- if [[ -f "${module_jar}" ]]; then
- cp "${module_jar}"
"${BE_JAVA_EXTENSIONS_DIR}"/"${extensions_module}"
- fi
- if [[ -f "${module_proj_jar}" ]]; then
- cp "${module_proj_jar}"
"${BE_JAVA_EXTENSIONS_DIR}"/"${extensions_module}"
+ echo "Copy Be Extensions ${extensions_module} jar to
${BE_JAVA_EXTENSIONS_DIR}/${extensions_module}"
+ if [[ "${extensions_module}" == "${HADOOP_DEPS_NAME}" ]]; then
+
+ BE_HADOOP_HDFS_DIR="${DORIS_OUTPUT}/be/lib/hadoop_hdfs/"
+ echo "Copy Be Extensions hadoop deps jars to ${BE_HADOOP_HDFS_DIR}"
+ rm -rf "${BE_HADOOP_HDFS_DIR}"
+ mkdir "${BE_HADOOP_HDFS_DIR}"
+
HADOOP_DEPS_JAR_DIR="${DORIS_HOME}/fe/be-java-extensions/${HADOOP_DEPS_NAME}/target"
+ echo "HADOOP_DEPS_JAR_DIR: ${HADOOP_DEPS_JAR_DIR}"
+ if [[ -f "${HADOOP_DEPS_JAR_DIR}/${HADOOP_DEPS_NAME}.jar" ]]; then
+ echo "Copy Be Extensions hadoop deps jar to
${BE_HADOOP_HDFS_DIR}"
+ cp "${HADOOP_DEPS_JAR_DIR}/${HADOOP_DEPS_NAME}.jar"
"${BE_HADOOP_HDFS_DIR}"
+ fi
+ if [[ -d "${HADOOP_DEPS_JAR_DIR}/lib" ]]; then
+ cp -r "${HADOOP_DEPS_JAR_DIR}/lib" "${BE_HADOOP_HDFS_DIR}/"
+ fi
+ else
+ if [[ -f "${module_jar}" ]]; then
+ cp "${module_jar}"
"${BE_JAVA_EXTENSIONS_DIR}"/"${extensions_module}"
+ fi
+ if [[ -f "${module_proj_jar}" ]]; then
+ cp "${module_proj_jar}"
"${BE_JAVA_EXTENSIONS_DIR}"/"${extensions_module}"
+ fi
+ if [[ -d
"${DORIS_HOME}/fe/be-java-extensions/${extensions_module}/target/lib" ]]; then
+ cp -r
"${DORIS_HOME}/fe/be-java-extensions/${extensions_module}/target/lib"
"${BE_JAVA_EXTENSIONS_DIR}/${extensions_module}/"
+ fi
fi
- done
+ done
# copy jindofs jars, only support for Linux x64 or arm
install -d "${DORIS_OUTPUT}/be/lib/java_extensions/jindofs"/
@@ -953,8 +983,13 @@ fi
if [[ ${BUILD_CLOUD} -eq 1 ]]; then
rm -rf "${DORIS_HOME}/output/ms"
rm -rf "${DORIS_HOME}/cloud/output/lib/hadoop_hdfs"
- if [[ -d "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" ]]; then
- cp -r -p "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/"
"${DORIS_HOME}/cloud/output/lib"
+ # If hadoop dependencies are required, building cloud module must be done
after building be-java-extensions first
+ # so when running ./build.sh --cloud,we also build be-java-extensions
automatically.
+ # If hadoop-depencies are not needed, you can disable it explicitly, by
setting DISABLE_BE_JAVA_EXTENSIONS during the build.
+
HADOOP_DEPS_JAR_DIR="${DORIS_HOME}/fe/be-java-extensions/${HADOOP_DEPS_NAME}/target"
+ if [[ -d "${HADOOP_DEPS_JAR_DIR}/lib" ]]; then
+ mkdir -p "${DORIS_HOME}/cloud/output/lib/hadoop_hdfs"
+ cp -r "${HADOOP_DEPS_JAR_DIR}/lib/"*
"${DORIS_HOME}/cloud/output/lib/hadoop_hdfs/"
fi
cp -r -p "${DORIS_HOME}/cloud/output" "${DORIS_HOME}/output/ms"
fi
diff --git a/cloud/script/start.sh b/cloud/script/start.sh
index c1577ebdb48..d8d708b5ab0 100644
--- a/cloud/script/start.sh
+++ b/cloud/script/start.sh
@@ -117,16 +117,7 @@ if [[ ${enable_hdfs} -eq 1 ]]; then
if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then
# add hadoop libs
- for f in "${DORIS_HOME}/lib/hadoop_hdfs/common"/*.jar; do
- DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
- done
- for f in "${DORIS_HOME}/lib/hadoop_hdfs/common/lib"/*.jar; do
- DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
- done
- for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs"/*.jar; do
- DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
- done
- for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs/lib"/*.jar; do
+ for f in "${DORIS_HOME}/lib/hadoop_hdfs"/*.jar; do
DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
done
fi
diff --git a/fe/be-java-extensions/hadoop-deps/pom.xml
b/fe/be-java-extensions/hadoop-deps/pom.xml
new file mode 100644
index 00000000000..4829b5a3273
--- /dev/null
+++ b/fe/be-java-extensions/hadoop-deps/pom.xml
@@ -0,0 +1,114 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <artifactId>be-java-extensions</artifactId>
+ <groupId>org.apache.doris</groupId>
+ <version>${revision}</version>
+ </parent>
+
+ <artifactId>hadoop-deps</artifactId>
+
+ <dependencies>
+ <dependency>
+ <groupId>com.fasterxml.woodstox</groupId>
+ <artifactId>woodstox-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-collections</groupId>
+ <artifactId>commons-collections</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-configuration2</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-auth</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.thirdparty</groupId>
+ <artifactId>hadoop-shaded-protobuf_3_7</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-client</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <artifactId>maven-jar-plugin</artifactId>
+ <configuration>
+ <finalName>${project.artifactId}</finalName>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <version>3.6.0</version>
+ <executions>
+ <execution>
+ <id>copy-dependencies</id>
+ <phase>package</phase>
+ <goals>
+ <goal>copy-dependencies</goal>
+ </goals>
+ <configuration>
+
<outputDirectory>${project.build.directory}/lib</outputDirectory>
+ <includeScope>runtime</includeScope>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+</project>
\ No newline at end of file
diff --git a/fe/be-java-extensions/pom.xml b/fe/be-java-extensions/pom.xml
index 39a457bcd78..8151cd179d1 100644
--- a/fe/be-java-extensions/pom.xml
+++ b/fe/be-java-extensions/pom.xml
@@ -33,6 +33,7 @@ under the License.
<!-- <module>lakesoul-scanner</module> -->
<module>preload-extensions</module>
<module>trino-connector-scanner</module>
+ <module>hadoop-deps</module>
</modules>
<parent>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]