This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 38ce7ce95a4 branch-3.1:[feat](thirdparty) Refactor HDFS dependency 
management: move HDFS client jars to FE and slim down BE thirdparty (#57202) 
(#57527)
38ce7ce95a4 is described below

commit 38ce7ce95a422bd9e1e4bf1ad0e2f6e01aa75ae1
Author: Calvin Kirs <[email protected]>
AuthorDate: Mon Nov 3 11:19:44 2025 +0800

    branch-3.1:[feat](thirdparty) Refactor HDFS dependency management: move 
HDFS client jars to FE and slim down BE thirdparty (#57202) (#57527)
    
    #57202
---
 bin/start_be.sh                           |  10 +-
 build.sh                                  | 175 ++++++++++++++++++------------
 cloud/script/start.sh                     |  11 +-
 fe/be-java-extensions/hadoop-deps/pom.xml | 114 +++++++++++++++++++
 fe/be-java-extensions/pom.xml             |   1 +
 5 files changed, 223 insertions(+), 88 deletions(-)

diff --git a/bin/start_be.sh b/bin/start_be.sh
index abffdebe96e..fec1eb01723 100755
--- a/bin/start_be.sh
+++ b/bin/start_be.sh
@@ -225,16 +225,10 @@ done
 
 if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then
     # add hadoop libs
-    for f in "${DORIS_HOME}/lib/hadoop_hdfs/common"/*.jar; do
+    for f in "${DORIS_HOME}/lib/hadoop_hdfs"/*.jar; do
         DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
     done
-    for f in "${DORIS_HOME}/lib/hadoop_hdfs/common/lib"/*.jar; do
-        DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
-    done
-    for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs"/*.jar; do
-        DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
-    done
-    for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs/lib"/*.jar; do
+    for f in "${DORIS_HOME}/lib/hadoop_hdfs/lib"/*.jar; do
         DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
     done
 fi
diff --git a/build.sh b/build.sh
index 75cb2f3a118..90621ae7461 100755
--- a/build.sh
+++ b/build.sh
@@ -40,6 +40,7 @@ TARGET_SYSTEM="$(uname -s)"
 TARGET_ARCH="$(uname -m)"
 echo "Target system: ${TARGET_SYSTEM}; Target arch: ${TARGET_ARCH}"
 
+HADOOP_DEPS_NAME="hadoop-deps"
 . "${DORIS_HOME}/env.sh"
 
 # Check args
@@ -54,6 +55,8 @@ Usage: $0 <options>
      --file-cache-microbench    build Backend file cache microbench tool. 
Default OFF.
      --cloud                    build Cloud. Default OFF.
      --index-tool               build Backend inverted index tool. Default OFF.
+     --benchmark                build Google Benchmark. Default OFF.
+     --task-executor-simulator  build Backend task executor simulator. Default 
OFF.
      --broker                   build Broker. Default ON.
      --hive-udf                 build Hive UDF library for Ingestion Load. 
Default ON.
      --be-java-extensions       build Backend java extensions. Default ON.
@@ -69,6 +72,7 @@ Usage: $0 <options>
     DISABLE_BE_JAVA_EXTENSIONS  If set DISABLE_BE_JAVA_EXTENSIONS=ON, we will 
do not build binary with java-udf,hadoop-hudi-scanner,jdbc-scanner and so on 
Default is OFF.
     DISABLE_JAVA_CHECK_STYLE    If set DISABLE_JAVA_CHECK_STYLE=ON, it will 
skip style check of java code in FE.
     DISABLE_BUILD_AZURE         If set DISABLE_BUILD_AZURE=ON, it will not 
build azure into BE.
+
   Eg.
     $0                                      build all
     $0 --be                                 build Backend
@@ -76,9 +80,10 @@ Usage: $0 <options>
     $0 --file-cache-microbench              build Backend file cache 
microbench tool
     $0 --cloud                              build Cloud
     $0 --index-tool                         build Backend inverted index tool
-    $0 --fe --clean                         clean and build Frontend and Spark 
Dpp application
+    $0 --benchmark                          build Google Benchmark of Backend
+    $0 --fe --clean                         clean and build Frontend.
     $0 --fe --be --clean                    clean and build Frontend and 
Backend
-    $0 --spark-dpp                          build Spark DPP application alone
+    $0 --task-executor-simulator            build task executor simulator
     $0 --broker                             build Broker
     $0 --be --fe                            build Backend, Frontend, and Java 
UDF library
     $0 --be --coverage                      build Backend with coverage enabled
@@ -137,6 +142,8 @@ if ! OPTS="$(getopt \
     -l 'meta-tool' \
     -l 'file-cache-microbench' \
     -l 'index-tool' \
+    -l 'benchmark' \
+    -l 'task-executor-simulator' \
     -l 'spark-dpp' \
     -l 'hive-udf' \
     -l 'be-java-extensions' \
@@ -160,6 +167,8 @@ BUILD_BROKER=0
 BUILD_META_TOOL='OFF'
 BUILD_FILE_CACHE_MICROBENCH_TOOL='OFF'
 BUILD_INDEX_TOOL='OFF'
+BUILD_BENCHMARK='OFF'
+BUILD_TASK_EXECUTOR_SIMULATOR='OFF'
 BUILD_BE_JAVA_EXTENSIONS=0
 BUILD_HIVE_UDF=0
 CLEAN=0
@@ -178,7 +187,9 @@ if [[ "$#" == 1 ]]; then
     BUILD_BROKER=1
     BUILD_META_TOOL='OFF'
     BUILD_FILE_CACHE_MICROBENCH_TOOL='OFF'
+    BUILD_TASK_EXECUTOR_SIMULATOR='OFF'
     BUILD_INDEX_TOOL='OFF'
+    BUILD_BENCHMARK='OFF'
     BUILD_HIVE_UDF=1
     BUILD_BE_JAVA_EXTENSIONS=1
     CLEAN=0
@@ -198,6 +209,7 @@ else
             ;;
         --cloud)
             BUILD_CLOUD=1
+            BUILD_BE_JAVA_EXTENSIONS=1
             shift
             ;;
         --broker)
@@ -216,6 +228,16 @@ else
             BUILD_INDEX_TOOL='ON'
             shift
             ;;
+        --benchmark)
+            BUILD_BENCHMARK='ON'
+            BUILD_BE=1 # go into BE cmake building, but benchmark instead of 
doris_be
+            shift
+            ;;
+        --task-executor-simulator)
+            BUILD_TASK_EXECUTOR_SIMULATOR='ON'
+            BUILD_BE=1
+            shift
+            ;;
         --spark-dpp)
             BUILD_SPARK_DPP=1
             shift
@@ -276,6 +298,7 @@ else
         BUILD_META_TOOL='ON'
         BUILD_FILE_CACHE_MICROBENCH_TOOL='OFF'
         BUILD_INDEX_TOOL='ON'
+       BUILD_TASK_EXECUTOR_SIMULATOR='OFF'
         BUILD_HIVE_UDF=1
         BUILD_BE_JAVA_EXTENSIONS=1
         CLEAN=0
@@ -285,8 +308,13 @@ fi
 if [[ "${HELP}" -eq 1 ]]; then
     usage
 fi
-# build thirdparty libraries if necessary
-if [[ ! -f "${DORIS_THIRDPARTY}/installed/lib/libbacktrace.a" ]]; then
+# build thirdparty libraries if necessary. check last thirdparty lib 
installation
+if [[ "${TARGET_SYSTEM}" == 'Darwin' ]]; then
+    LAST_THIRDPARTY_LIB='libbrotlienc.a'
+else
+    LAST_THIRDPARTY_LIB='hadoop_hdfs/native/libhdfs.a'
+fi
+if [[ ! -f "${DORIS_THIRDPARTY}/installed/lib/${LAST_THIRDPARTY_LIB}" ]]; then
     echo "Thirdparty libraries need to be build ..."
     # need remove all installed pkgs because some lib like lz4 will throw 
error if its lib alreay exists
     rm -rf "${DORIS_THIRDPARTY}/installed"
@@ -341,9 +369,6 @@ if [[ "${CLEAN}" -eq 1 && "${BUILD_BE}" -eq 0 && 
"${BUILD_FE}" -eq 0 && ${BUILD_
     exit 0
 fi
 
-if [[ -z "${WITH_MYSQL}" ]]; then
-    WITH_MYSQL='OFF'
-fi
 if [[ -z "${GLIBC_COMPATIBILITY}" ]]; then
     if [[ "${TARGET_SYSTEM}" != 'Darwin' ]]; then
         GLIBC_COMPATIBILITY='ON'
@@ -367,41 +392,21 @@ fi
 if [[ -z "${STRIP_DEBUG_INFO}" ]]; then
     STRIP_DEBUG_INFO='OFF'
 fi
-if [[ -z "${USE_MEM_TRACKER}" ]]; then
-    if [[ "${TARGET_SYSTEM}" != 'Darwin' ]]; then
-        USE_MEM_TRACKER='ON'
-    else
-        USE_MEM_TRACKER='OFF'
-    fi
-fi
 BUILD_TYPE_LOWWER=$(echo "${BUILD_TYPE}" | tr '[:upper:]' '[:lower:]')
 if [[ "${BUILD_TYPE_LOWWER}" == "asan" ]]; then
     USE_JEMALLOC='OFF'
 elif [[ -z "${USE_JEMALLOC}" ]]; then
-    USE_JEMALLOC='ON'
-fi
-if [[ -f "${TP_INCLUDE_DIR}/jemalloc/jemalloc_doris_with_prefix.h" ]]; then
-    # compatible with old thirdparty
-    rm -rf "${TP_INCLUDE_DIR}/jemalloc/jemalloc.h"
-    rm -rf "${TP_LIB_DIR}/libjemalloc_doris.a"
-    rm -rf "${TP_LIB_DIR}/libjemalloc_doris_pic.a"
-    rm -rf "${TP_INCLUDE_DIR}/rocksdb"
-    rm -rf "${TP_LIB_DIR}/librocksdb.a"
-
-    mv "${TP_INCLUDE_DIR}/jemalloc/jemalloc_doris_with_prefix.h" 
"${TP_INCLUDE_DIR}/jemalloc/jemalloc.h"
-    mv "${TP_LIB_DIR}/libjemalloc_doris_with_prefix.a" 
"${TP_LIB_DIR}/libjemalloc_doris.a"
-    mv "${TP_LIB_DIR}/libjemalloc_doris_with_prefix_pic.a" 
"${TP_LIB_DIR}/libjemalloc_doris_pic.a"
-    mv "${TP_LIB_DIR}/librocksdb_jemalloc_with_prefix.a" 
"${TP_LIB_DIR}/librocksdb.a"
-    mv -f "${TP_INCLUDE_DIR}/rocksdb_jemalloc_with_prefix" 
"${TP_INCLUDE_DIR}/rocksdb"
+    if [[ "${TARGET_SYSTEM}" != 'Darwin' ]]; then
+        USE_JEMALLOC='ON'
+    else
+        USE_JEMALLOC='OFF'
+    fi
 fi
+
 if [[ -z "${USE_BTHREAD_SCANNER}" ]]; then
     USE_BTHREAD_SCANNER='OFF'
 fi
 
-if [[ -z "${USE_DWARF}" ]]; then
-    USE_DWARF='OFF'
-fi
-
 if [[ -z "${USE_UNWIND}" ]]; then
     if [[ "${TARGET_SYSTEM}" != 'Darwin' ]]; then
         USE_UNWIND='ON'
@@ -442,7 +447,7 @@ if [[ -z "${DISABLE_JAVA_CHECK_STYLE}" ]]; then
     DISABLE_JAVA_CHECK_STYLE='OFF'
 fi
 
-if [[ -n "${DISABLE_BUILD_AZURE}" ]]; then
+if [[ "$(echo "${DISABLE_BUILD_AZURE}" | tr '[:lower:]' '[:upper:]')" == "ON" 
]]; then
     BUILD_AZURE='OFF'
 fi
 
@@ -450,8 +455,8 @@ if [[ -z "${ENABLE_INJECTION_POINT}" ]]; then
     ENABLE_INJECTION_POINT='OFF'
 fi
 
-if [[ -z "${ENABLE_CACHE_LOCK_DEBUG}" ]]; then
-    ENABLE_CACHE_LOCK_DEBUG='ON'
+if [[ -z "${BUILD_BENCHMARK}" ]]; then
+    BUILD_BENCHMARK='OFF'
 fi
 
 if [[ -z "${RECORD_COMPILER_SWITCHES}" ]]; then
@@ -489,22 +494,20 @@ echo "Get params:
     BUILD_META_TOOL                     -- ${BUILD_META_TOOL}
     BUILD_FILE_CACHE_MICROBENCH_TOOL    -- ${BUILD_FILE_CACHE_MICROBENCH_TOOL}
     BUILD_INDEX_TOOL                    -- ${BUILD_INDEX_TOOL}
+    BUILD_BENCHMARK                     -- ${BUILD_BENCHMARK}
+    BUILD_TASK_EXECUTOR_SIMULATOR       -- ${BUILD_TASK_EXECUTOR_SIMULATOR}
     BUILD_BE_JAVA_EXTENSIONS            -- ${BUILD_BE_JAVA_EXTENSIONS}
     BUILD_HIVE_UDF                      -- ${BUILD_HIVE_UDF}
     PARALLEL                            -- ${PARALLEL}
     CLEAN                               -- ${CLEAN}
-    WITH_MYSQL                          -- ${WITH_MYSQL}
     GLIBC_COMPATIBILITY                 -- ${GLIBC_COMPATIBILITY}
     USE_AVX2                            -- ${USE_AVX2}
     USE_LIBCPP                          -- ${USE_LIBCPP}
-    USE_DWARF                           -- ${USE_DWARF}
     USE_UNWIND                          -- ${USE_UNWIND}
     STRIP_DEBUG_INFO                    -- ${STRIP_DEBUG_INFO}
-    USE_MEM_TRACKER                     -- ${USE_MEM_TRACKER}
     USE_JEMALLOC                        -- ${USE_JEMALLOC}
     USE_BTHREAD_SCANNER                 -- ${USE_BTHREAD_SCANNER}
     ENABLE_INJECTION_POINT              -- ${ENABLE_INJECTION_POINT}
-    ENABLE_CACHE_LOCK_DEBUG             -- ${ENABLE_CACHE_LOCK_DEBUG}
     DENABLE_CLANG_COVERAGE              -- ${DENABLE_CLANG_COVERAGE}
     DISPLAY_BUILD_TIME                  -- ${DISPLAY_BUILD_TIME}
     ENABLE_PCH                          -- ${ENABLE_PCH}
@@ -530,9 +533,6 @@ fi
 
 # Assesmble FE modules
 FE_MODULES=''
-# TODO: docs are temporarily removed, so this var is always OFF
-# Fix it later
-BUILD_DOCS='OFF'
 modules=("")
 if [[ "${BUILD_FE}" -eq 1 ]]; then
     modules+=("fe-common")
@@ -559,6 +559,7 @@ if [[ "${BUILD_BE_JAVA_EXTENSIONS}" -eq 1 ]]; then
     # lakesoul-scanner has been deprecated
     # modules+=("be-java-extensions/lakesoul-scanner")
     modules+=("be-java-extensions/preload-extensions")
+    modules+=("be-java-extensions/${HADOOP_DEPS_NAME}")
 
     # If the BE_EXTENSION_IGNORE variable is not empty, remove the modules 
that need to be ignored from FE_MODULES
     if [[ -n "${BE_EXTENSION_IGNORE}" ]]; then
@@ -575,8 +576,10 @@ FE_MODULES="$(
 
 # Clean and build Backend
 if [[ "${BUILD_BE}" -eq 1 ]]; then
-    update_submodule "be/src/apache-orc" "apache-orc" 
"https://github.com/apache/doris-thirdparty/archive/refs/heads/orc.tar.gz";
-    update_submodule "be/src/clucene" "clucene" 
"https://github.com/apache/doris-thirdparty/archive/refs/heads/clucene-3.0.tar.gz";
+    update_submodule "contrib/apache-orc" "apache-orc" 
"https://github.com/apache/doris-thirdparty/archive/refs/heads/orc.tar.gz";
+    update_submodule "contrib/clucene" "clucene" 
"https://github.com/apache/doris-thirdparty/archive/refs/heads/clucene.tar.gz";
+    update_submodule "contrib/openblas" "openblas" 
"https://github.com/apache/doris-thirdparty/archive/refs/heads/openblas.tar.gz";
+    update_submodule "contrib/faiss" "faiss" 
"https://github.com/apache/doris-thirdparty/archive/refs/heads/faiss.tar.gz";
     if [[ -e "${DORIS_HOME}/gensrc/build/gen_cpp/version.h" ]]; then
         rm -f "${DORIS_HOME}/gensrc/build/gen_cpp/version.h"
     fi
@@ -592,10 +595,20 @@ if [[ "${BUILD_BE}" -eq 1 ]]; then
         BUILD_FS_BENCHMARK=OFF
     fi
 
+    if [[ -z "${BUILD_TASK_EXECUTOR_SIMULATOR}" ]]; then
+        BUILD_TASK_EXECUTOR_SIMULATOR=OFF
+    fi
+
+    if [[ -z "${BUILD_FILE_CACHE_LRU_TOOL}" ]]; then
+        BUILD_FILE_CACHE_LRU_TOOL=OFF
+    fi
+
     echo "-- Make program: ${MAKE_PROGRAM}"
     echo "-- Use ccache: ${CMAKE_USE_CCACHE}"
     echo "-- Extra cxx flags: ${EXTRA_CXX_FLAGS:-}"
     echo "-- Build fs benchmark tool: ${BUILD_FS_BENCHMARK}"
+    echo "-- Build task executor simulator: ${BUILD_TASK_EXECUTOR_SIMULATOR}"
+    echo "-- Build file cache lru tool: ${BUILD_FILE_CACHE_LRU_TOOL}"
 
     mkdir -p "${CMAKE_BUILD_DIR}"
     cd "${CMAKE_BUILD_DIR}"
@@ -604,21 +617,20 @@ if [[ "${BUILD_BE}" -eq 1 ]]; then
         -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
         -DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}" \
         -DENABLE_INJECTION_POINT="${ENABLE_INJECTION_POINT}" \
-        -DENABLE_CACHE_LOCK_DEBUG="${ENABLE_CACHE_LOCK_DEBUG}" \
         -DMAKE_TEST=OFF \
+        -DBUILD_BENCHMARK="${BUILD_BENCHMARK}" \
         -DBUILD_FS_BENCHMARK="${BUILD_FS_BENCHMARK}" \
+        -DBUILD_TASK_EXECUTOR_SIMULATOR="${BUILD_TASK_EXECUTOR_SIMULATOR}" \
+        -DBUILD_FILE_CACHE_LRU_TOOL="${BUILD_FILE_CACHE_LRU_TOOL}" \
         ${CMAKE_USE_CCACHE:+${CMAKE_USE_CCACHE}} \
-        -DWITH_MYSQL="${WITH_MYSQL}" \
         -DUSE_LIBCPP="${USE_LIBCPP}" \
         -DBUILD_META_TOOL="${BUILD_META_TOOL}" \
         
-DBUILD_FILE_CACHE_MICROBENCH_TOOL="${BUILD_FILE_CACHE_MICROBENCH_TOOL}" \
         -DBUILD_INDEX_TOOL="${BUILD_INDEX_TOOL}" \
         -DSTRIP_DEBUG_INFO="${STRIP_DEBUG_INFO}" \
-        -DUSE_DWARF="${USE_DWARF}" \
         -DUSE_UNWIND="${USE_UNWIND}" \
         -DDISPLAY_BUILD_TIME="${DISPLAY_BUILD_TIME}" \
         -DENABLE_PCH="${ENABLE_PCH}" \
-        -DUSE_MEM_TRACKER="${USE_MEM_TRACKER}" \
         -DUSE_JEMALLOC="${USE_JEMALLOC}" \
         -DUSE_AVX2="${USE_AVX2}" \
         -DARM_MARCH="${ARM_MARCH}" \
@@ -665,7 +677,6 @@ if [[ "${BUILD_CLOUD}" -eq 1 ]]; then
         -DUSE_LIBCPP="${USE_LIBCPP}" \
         -DENABLE_HDFS_STORAGE_VAULT=${ENABLE_HDFS_STORAGE_VAULT:-ON} \
         -DSTRIP_DEBUG_INFO="${STRIP_DEBUG_INFO}" \
-        -DUSE_DWARF="${USE_DWARF}" \
         -DUSE_JEMALLOC="${USE_JEMALLOC}" \
         -DEXTRA_CXX_FLAGS="${EXTRA_CXX_FLAGS}" \
         -DBUILD_AZURE="${BUILD_AZURE}" \
@@ -677,14 +688,6 @@ if [[ "${BUILD_CLOUD}" -eq 1 ]]; then
     echo "Build cloud done"
 fi
 
-if [[ "${BUILD_DOCS}" = "ON" ]]; then
-    # Build docs, should be built before Frontend
-    echo "Build docs"
-    cd "${DORIS_HOME}/docs"
-    ./build_help_zip.sh
-    cd "${DORIS_HOME}"
-fi
-
 function build_ui() {
     NPM='npm'
     if ! ${NPM} --version; then
@@ -815,10 +818,6 @@ if [[ "${OUTPUT_BE_BINARY}" -eq 1 ]]; then
     cp -r -p "${DORIS_HOME}/be/output/conf"/* "${DORIS_OUTPUT}/be/conf"/
     cp -r -p "${DORIS_HOME}/be/output/dict" "${DORIS_OUTPUT}/be/"
 
-    if [[ -d "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" ]]; then
-        cp -r -p "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" 
"${DORIS_OUTPUT}/be/lib/"
-    fi
-
     if [[ -f "${DORIS_THIRDPARTY}/installed/lib/libz.so" ]]; then
         cp -r -p "${DORIS_THIRDPARTY}/installed/lib/libz.so"* 
"${DORIS_OUTPUT}/be/lib/"
     fi
@@ -864,10 +863,19 @@ EOF
         cp -r -p "${DORIS_HOME}/be/output/lib/debug_info" 
"${DORIS_OUTPUT}/be/lib"/
     fi
 
+    if [[ "${BUILD_BENCHMARK}" = "ON" ]]; then
+        cp -r -p "${DORIS_HOME}/be/output/lib/benchmark_test" 
"${DORIS_OUTPUT}/be/lib/"/
+    fi
+
     if [[ "${BUILD_FS_BENCHMARK}" = "ON" ]]; then
         cp -r -p "${DORIS_HOME}/bin/run-fs-benchmark.sh" 
"${DORIS_OUTPUT}/be/bin/"/
     fi
 
+    if [[ "${BUILD_TASK_EXECUTOR_SIMULATOR}" = "ON" ]]; then
+        cp -r -p "${DORIS_HOME}/bin/run-task-executor-simulator.sh" 
"${DORIS_OUTPUT}/be/bin/"/
+        cp -r -p "${DORIS_HOME}/be/output/lib/task_executor_simulator" 
"${DORIS_OUTPUT}/be/lib/"/
+    fi
+
     extensions_modules=("java-udf")
     extensions_modules+=("jdbc-scanner")
     extensions_modules+=("hadoop-hudi-scanner")
@@ -879,6 +887,7 @@ EOF
     # extensions_modules+=("lakesoul-scanner")
     extensions_modules+=("preload-extensions")
     extensions_modules+=("iceberg-metadata-scanner")
+    extensions_modules+=("${HADOOP_DEPS_NAME}")
 
     if [[ -n "${BE_EXTENSION_IGNORE}" ]]; then
         IFS=',' read -r -a ignore_modules <<<"${BE_EXTENSION_IGNORE}"
@@ -908,13 +917,34 @@ EOF
         
module_jar="${DORIS_HOME}/fe/be-java-extensions/${extensions_module}/target/${extensions_module}-jar-with-dependencies.jar"
         
module_proj_jar="${DORIS_HOME}/fe/be-java-extensions/${extensions_module}/target/${extensions_module}-project.jar"
         mkdir "${BE_JAVA_EXTENSIONS_DIR}"/"${extensions_module}"
-        if [[ -f "${module_jar}" ]]; then
-            cp "${module_jar}" 
"${BE_JAVA_EXTENSIONS_DIR}"/"${extensions_module}"
-        fi
-        if [[ -f "${module_proj_jar}" ]]; then
-            cp "${module_proj_jar}" 
"${BE_JAVA_EXTENSIONS_DIR}"/"${extensions_module}"
+        echo "Copy Be Extensions ${extensions_module} jar to 
${BE_JAVA_EXTENSIONS_DIR}/${extensions_module}"
+     if [[ "${extensions_module}" == "${HADOOP_DEPS_NAME}" ]]; then
+          
+            BE_HADOOP_HDFS_DIR="${DORIS_OUTPUT}/be/lib/hadoop_hdfs/"
+            echo "Copy Be Extensions hadoop deps jars to ${BE_HADOOP_HDFS_DIR}"
+            rm -rf "${BE_HADOOP_HDFS_DIR}"
+            mkdir "${BE_HADOOP_HDFS_DIR}"
+            
HADOOP_DEPS_JAR_DIR="${DORIS_HOME}/fe/be-java-extensions/${HADOOP_DEPS_NAME}/target"
+            echo "HADOOP_DEPS_JAR_DIR: ${HADOOP_DEPS_JAR_DIR}"
+            if [[ -f "${HADOOP_DEPS_JAR_DIR}/${HADOOP_DEPS_NAME}.jar" ]]; then
+                echo "Copy Be Extensions hadoop deps jar to 
${BE_HADOOP_HDFS_DIR}"
+                cp "${HADOOP_DEPS_JAR_DIR}/${HADOOP_DEPS_NAME}.jar" 
"${BE_HADOOP_HDFS_DIR}"
+            fi
+            if [[ -d "${HADOOP_DEPS_JAR_DIR}/lib" ]]; then
+                cp -r "${HADOOP_DEPS_JAR_DIR}/lib" "${BE_HADOOP_HDFS_DIR}/"
+            fi
+        else
+            if [[ -f "${module_jar}" ]]; then
+                cp "${module_jar}" 
"${BE_JAVA_EXTENSIONS_DIR}"/"${extensions_module}"
+            fi
+            if [[ -f "${module_proj_jar}" ]]; then
+                cp "${module_proj_jar}" 
"${BE_JAVA_EXTENSIONS_DIR}"/"${extensions_module}"
+            fi
+            if [[ -d 
"${DORIS_HOME}/fe/be-java-extensions/${extensions_module}/target/lib" ]]; then
+                cp -r 
"${DORIS_HOME}/fe/be-java-extensions/${extensions_module}/target/lib" 
"${BE_JAVA_EXTENSIONS_DIR}/${extensions_module}/"
+            fi
         fi
-    done
+    done        
 
     # copy jindofs jars, only support for Linux x64 or arm
     install -d "${DORIS_OUTPUT}/be/lib/java_extensions/jindofs"/
@@ -953,8 +983,13 @@ fi
 if [[ ${BUILD_CLOUD} -eq 1 ]]; then
     rm -rf "${DORIS_HOME}/output/ms"
     rm -rf "${DORIS_HOME}/cloud/output/lib/hadoop_hdfs"
-    if [[ -d "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" ]]; then
-        cp -r -p "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" 
"${DORIS_HOME}/cloud/output/lib"
+    # If hadoop dependencies are required, building cloud module must be done 
after building be-java-extensions first
+    # so when running ./build.sh --cloud,we also build be-java-extensions 
automatically.
+    # If hadoop-depencies are not needed, you can disable it explicitly, by 
setting DISABLE_BE_JAVA_EXTENSIONS during the build.
+    
HADOOP_DEPS_JAR_DIR="${DORIS_HOME}/fe/be-java-extensions/${HADOOP_DEPS_NAME}/target"
+    if [[ -d "${HADOOP_DEPS_JAR_DIR}/lib" ]]; then
+        mkdir -p "${DORIS_HOME}/cloud/output/lib/hadoop_hdfs"
+        cp -r "${HADOOP_DEPS_JAR_DIR}/lib/"* 
"${DORIS_HOME}/cloud/output/lib/hadoop_hdfs/"
     fi
     cp -r -p "${DORIS_HOME}/cloud/output" "${DORIS_HOME}/output/ms"
 fi
diff --git a/cloud/script/start.sh b/cloud/script/start.sh
index c1577ebdb48..d8d708b5ab0 100644
--- a/cloud/script/start.sh
+++ b/cloud/script/start.sh
@@ -117,16 +117,7 @@ if [[ ${enable_hdfs} -eq 1 ]]; then
 
     if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then
         # add hadoop libs
-        for f in "${DORIS_HOME}/lib/hadoop_hdfs/common"/*.jar; do
-            DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
-        done
-        for f in "${DORIS_HOME}/lib/hadoop_hdfs/common/lib"/*.jar; do
-            DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
-        done
-        for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs"/*.jar; do
-            DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
-        done
-        for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs/lib"/*.jar; do
+        for f in "${DORIS_HOME}/lib/hadoop_hdfs"/*.jar; do
             DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
         done
     fi
diff --git a/fe/be-java-extensions/hadoop-deps/pom.xml 
b/fe/be-java-extensions/hadoop-deps/pom.xml
new file mode 100644
index 00000000000..4829b5a3273
--- /dev/null
+++ b/fe/be-java-extensions/hadoop-deps/pom.xml
@@ -0,0 +1,114 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <artifactId>be-java-extensions</artifactId>
+        <groupId>org.apache.doris</groupId>
+        <version>${revision}</version>
+    </parent>
+
+    <artifactId>hadoop-deps</artifactId>
+
+    <dependencies>
+        <dependency>
+            <groupId>com.fasterxml.woodstox</groupId>
+            <artifactId>woodstox-core</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>commons-collections</groupId>
+            <artifactId>commons-collections</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-configuration2</artifactId>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-hdfs</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-common</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-auth</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hadoop.thirdparty</groupId>
+            <artifactId>hadoop-shaded-protobuf_3_7</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-client</artifactId>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.apache.hadoop</groupId>
+                    <artifactId>hadoop-yarn-api</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.hadoop</groupId>
+                    <artifactId>hadoop-yarn-client</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.hadoop</groupId>
+                    <artifactId>hadoop-mapreduce-client-core</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.hadoop</groupId>
+                    <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <artifactId>maven-jar-plugin</artifactId>
+                <configuration>
+                    <finalName>${project.artifactId}</finalName>
+                </configuration>
+            </plugin>
+            <plugin>
+                <artifactId>maven-dependency-plugin</artifactId>
+                <version>3.6.0</version>
+                <executions>
+                    <execution>
+                        <id>copy-dependencies</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>copy-dependencies</goal>
+                        </goals>
+                        <configuration>
+                            
<outputDirectory>${project.build.directory}/lib</outputDirectory>
+                            <includeScope>runtime</includeScope>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
\ No newline at end of file
diff --git a/fe/be-java-extensions/pom.xml b/fe/be-java-extensions/pom.xml
index 39a457bcd78..8151cd179d1 100644
--- a/fe/be-java-extensions/pom.xml
+++ b/fe/be-java-extensions/pom.xml
@@ -33,6 +33,7 @@ under the License.
         <!-- <module>lakesoul-scanner</module> -->
         <module>preload-extensions</module>
         <module>trino-connector-scanner</module>
+        <module>hadoop-deps</module>
     </modules>
 
     <parent>


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to