This is an automated email from the ASF dual-hosted git repository.

gavinchou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new c3612374bb3 [chore](cloud) Update build and start script (#56031)
c3612374bb3 is described below

commit c3612374bb306fad284ecb61a24e7872c882f145
Author: Gavin Chou <[email protected]>
AuthorDate: Mon Sep 15 15:13:10 2025 +0800

    [chore](cloud) Update build and start script (#56031)
    
    * make HDFS storage optional when build in cloud mode
    * optimize meta-service start script
---
 build.sh                                         |  1 +
 cloud/CMakeLists.txt                             | 33 +++++----
 cloud/script/start.sh                            | 90 +++++++++++++-----------
 cloud/src/meta-service/meta_service_resource.cpp | 11 +++
 cloud/src/recycler/CMakeLists.txt                |  4 ++
 cloud/src/recycler/checker.cpp                   |  7 ++
 cloud/src/recycler/recycler.cpp                  |  7 ++
 cloud/test/CMakeLists.txt                        | 11 ++-
 run-cloud-ut.sh                                  |  1 +
 9 files changed, 107 insertions(+), 58 deletions(-)

diff --git a/build.sh b/build.sh
index a93c98483cf..cfac0453ca3 100755
--- a/build.sh
+++ b/build.sh
@@ -656,6 +656,7 @@ if [[ "${BUILD_CLOUD}" -eq 1 ]]; then
         -DMAKE_TEST=OFF \
         "${CMAKE_USE_CCACHE}" \
         -DUSE_LIBCPP="${USE_LIBCPP}" \
+        -DENABLE_HDFS_STORAGE_VAULT=${ENABLE_HDFS_STORAGE_VAULT:-ON} \
         -DSTRIP_DEBUG_INFO="${STRIP_DEBUG_INFO}" \
         -DUSE_JEMALLOC="${USE_JEMALLOC}" \
         -DEXTRA_CXX_FLAGS="${EXTRA_CXX_FLAGS}" \
diff --git a/cloud/CMakeLists.txt b/cloud/CMakeLists.txt
index 21cabcd72ec..d2da775e184 100644
--- a/cloud/CMakeLists.txt
+++ b/cloud/CMakeLists.txt
@@ -262,15 +262,19 @@ include_directories(
     ${GPERFTOOLS_HOME}/include
 )
 
-if ("${DORIS_JAVA_HOME}" STREQUAL "")
-    set(DORIS_JAVA_HOME "$ENV{JAVA_HOME}")
-endif()
+option(ENABLE_HDFS_STORAGE_VAULT "Enable HDFS storage support" ON)
+if (ENABLE_HDFS_STORAGE_VAULT)
+    add_compile_definitions(ENABLE_HDFS_STORAGE_VAULT)
+    if ("${DORIS_JAVA_HOME}" STREQUAL "")
+        set(DORIS_JAVA_HOME "$ENV{JAVA_HOME}")
+    endif()
 
-include_directories(${DORIS_JAVA_HOME}/include)
-if (NOT OS_MACOSX)
-    include_directories(${DORIS_JAVA_HOME}/include/linux)
-else()
-    include_directories(${DORIS_JAVA_HOME}/include/darwin)
+    include_directories(${DORIS_JAVA_HOME}/include)
+    if (NOT OS_MACOSX)
+        include_directories(${DORIS_JAVA_HOME}/include/linux)
+    else()
+        include_directories(${DORIS_JAVA_HOME}/include/darwin)
+    endif()
 endif()
 
 set(WL_START_GROUP "-Wl,--start-group")
@@ -312,10 +316,6 @@ set(DORIS_DEPENDENCIES
 
 message(STATUS "DORIS_DEPENDENCIES is ${DORIS_DEPENDENCIES}")
 
-if ("${DORIS_JAVA_HOME}" STREQUAL "")
-    set(DORIS_JAVA_HOME "$ENV{JAVA_HOME}")
-endif()
-
 # Add all external dependencies. They should come after the project's libs.
 # static link gcc's lib
 set(DORIS_LINK_LIBS ${DORIS_LINK_LIBS}
@@ -333,9 +333,14 @@ set(DORIS_LINK_LIBS ${DORIS_LINK_LIBS}
     -static-libstdc++
     -static-libgcc
     -lresolv
-    -L${DORIS_JAVA_HOME}/lib/server
-    -ljvm
 )
+
+if (ENABLE_HDFS_STORAGE_VAULT)
+    set(DORIS_LINK_LIBS ${DORIS_LINK_LIBS}
+        -L${DORIS_JAVA_HOME}/lib/server
+        -ljvm)
+endif()
+
 if (NOT (USE_LIBCPP AND COMPILER_CLANG))
     set(DORIS_LINK_LIBS ${DORIS_LINK_LIBS} -lstdc++fs)
 endif()
diff --git a/cloud/script/start.sh b/cloud/script/start.sh
index 506a279ad78..befe0a9f2e9 100644
--- a/cloud/script/start.sh
+++ b/cloud/script/start.sh
@@ -54,8 +54,15 @@ if [[ ${RUN_RECYCLYER} -eq 1 ]]; then
 fi
 # echo "$@" "daemonized=${daemonized}"}
 
-# export env variables from doris_cloud.conf
-# read from doris_cloud.conf
+custom_start="${DORIS_HOME}/bin/custom_start.sh" 
+if [[ -f "${custom_start}" ]]; then
+  source "${custom_start}" 
+fi
+enable_hdfs=${enable_hdfs:-1}
+process_name="${process_name:-doris_cloud}"
+
+# export env variables from ${process_name}.conf
+# read from ${process_name}.conf
 while read -r line; do
     envline="$(echo "${line}" |
         sed 's/[[:blank:]]*=[[:blank:]]*/=/g' |
@@ -66,7 +73,7 @@ while read -r line; do
     if [[ "${envline}" == *"="* ]]; then
         eval 'export "${envline}"'
     fi
-done <"${DORIS_HOME}/conf/doris_cloud.conf"
+done <"${DORIS_HOME}/conf/${process_name}.conf"
 
 role=''
 if [[ ${RUN_METASERVICE} -eq 0 ]] && [[ ${RUN_RECYCLYER} -eq 0 ]]; then
@@ -78,53 +85,59 @@ elif [[ ${RUN_METASERVICE} -eq 0 ]] && [[ ${RUN_RECYCLYER} 
-eq 1 ]]; then
 elif [[ ${RUN_METASERVICE} -eq 1 ]] && [[ ${RUN_RECYCLYER} -eq 1 ]]; then
     role='MetaService and Recycler'
 fi
-process=doris_cloud
 
-if [[ ${RUN_VERSION} -eq 0 ]] && [[ -f "${DORIS_HOME}/bin/${process}.pid" ]]; 
then
-    pid=$(cat "${DORIS_HOME}/bin/${process}.pid")
+if [[ ${RUN_VERSION} -eq 0 ]] && [[ -f "${DORIS_HOME}/bin/${process_name}.pid" 
]]; then
+    pid=$(cat "${DORIS_HOME}/bin/${process_name}.pid")
     if [[ "${pid}" != "" ]]; then
-        if kill -0 "$(cat "${DORIS_HOME}/bin/${process}.pid")" >/dev/null 
2>&1; then
+        if kill -0 "$(cat "${DORIS_HOME}/bin/${process_name}.pid")" >/dev/null 
2>&1; then
             echo "pid file existed, ${role} have already started, pid=${pid}"
             exit 1
         fi
     fi
     echo "pid file existed but process not alive, remove it, pid=${pid}"
-    rm -f "${DORIS_HOME}/bin/${process}.pid"
+    rm -f "${DORIS_HOME}/bin/${process_name}.pid"
 fi
 
 lib_path="${DORIS_HOME}/lib"
-bin="${DORIS_HOME}/lib/doris_cloud"
+bin="${DORIS_HOME}/lib/${process_name}"
 export LD_LIBRARY_PATH="${lib_path}:${LD_LIBRARY_PATH}"
 
-chmod 550 "${DORIS_HOME}/lib/doris_cloud"
+chmod 550 "${DORIS_HOME}/lib/${process_name}"
 
-if [[ -z "${JAVA_HOME}" ]]; then
-    echo "The JAVA_HOME environment variable is not defined correctly"
-    echo "This environment variable is needed to run this program"
-    echo "NB: JAVA_HOME should point to a JDK not a JRE"
-    echo "You can set it in doris_cloud.conf"
-    exit 1
-fi
+if [[ ${enable_hdfs} -eq 1 ]]; then
+    if [[ -z "${JAVA_HOME}" ]]; then
+        echo "The JAVA_HOME environment variable is not defined correctly"
+        echo "This environment variable is needed to run this program"
+        echo "NB: JAVA_HOME should point to a JDK not a JRE"
+        echo "You can set it in doris_cloud.conf"
+        exit 1
+    fi
 
-if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then
-    # add hadoop libs
-    for f in "${DORIS_HOME}/lib/hadoop_hdfs/common"/*.jar; do
-        DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
-    done
-    for f in "${DORIS_HOME}/lib/hadoop_hdfs/common/lib"/*.jar; do
-        DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
-    done
-    for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs"/*.jar; do
-        DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
-    done
-    for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs/lib"/*.jar; do
-        DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
-    done
-fi
+    if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then
+        # add hadoop libs
+        for f in "${DORIS_HOME}/lib/hadoop_hdfs/common"/*.jar; do
+            DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
+        done
+        for f in "${DORIS_HOME}/lib/hadoop_hdfs/common/lib"/*.jar; do
+            DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
+        done
+        for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs"/*.jar; do
+            DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
+        done
+        for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs/lib"/*.jar; do
+            DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
+        done
+    fi
+
+    export CLASSPATH="${DORIS_CLASSPATH}"
 
-export CLASSPATH="${DORIS_CLASSPATH}"
+    export LD_LIBRARY_PATH="${JAVA_HOME}/lib/server:${LD_LIBRARY_PATH}"
 
-export LD_LIBRARY_PATH="${JAVA_HOME}/lib/server:${LD_LIBRARY_PATH}"
+    ## set libhdfs3 conf
+    if [[ -f "${DORIS_HOME}/conf/hdfs-site.xml" ]]; then
+        export LIBHDFS3_CONF="${DORIS_HOME}/conf/hdfs-site.xml"
+    fi
+fi
 
 # filter known leak
 export LSAN_OPTIONS=suppressions=${DORIS_HOME}/conf/lsan_suppr.conf
@@ -136,13 +149,6 @@ export 
UBSAN_OPTIONS=suppressions=${DORIS_HOME}/conf/ubsan_suppr.conf
 export 
ASAN_OPTIONS=symbolize=1:abort_on_error=1:disable_coredump=0:unmap_shadow_on_exit=1:detect_container_overflow=0:check_malloc_usable_size=0:${ASAN_OPTIONS}
 export UBSAN_OPTIONS=print_stacktrace=1:${UBSAN_OPTIONS}
 
-## set libhdfs3 conf
-if [[ -f "${DORIS_HOME}/conf/hdfs-site.xml" ]]; then
-    export LIBHDFS3_CONF="${DORIS_HOME}/conf/hdfs-site.xml"
-fi
-
-# echo "LIBHDFS3_CONF=${LIBHDFS3_CONF}"
-
 # to enable dump jeprof heap stats prodigally, change `prof_active:false` to 
`prof_active:true` or curl http://be_host:be_webport/jeheap/prof/true
 # to control the dump interval change `lg_prof_interval` to a specific value, 
it is pow/exponent of 2 in size of bytes, default 34 means 2 ** 34 = 16GB
 # to control the dump path, change `prof_prefix` to a specific path, e.g. 
/doris_cloud/log/ms_, by default it dumps at the path where the start command 
called
@@ -155,7 +161,7 @@ fi
 
 mkdir -p "${DORIS_HOME}/log"
 echo "$(date +'%F %T') start with args: $*"
-out_file=${DORIS_HOME}/log/${process}.out
+out_file=${DORIS_HOME}/log/${process_name}.out
 if [[ "${RUN_DAEMON}" -eq 1 ]]; then
     # append 10 blank lines to ensure the following tail -n10 works correctly
     printf "\n\n\n\n\n\n\n\n\n\n" >>"${out_file}"
diff --git a/cloud/src/meta-service/meta_service_resource.cpp 
b/cloud/src/meta-service/meta_service_resource.cpp
index dfa528010f4..1e1b30bcf7b 100644
--- a/cloud/src/meta-service/meta_service_resource.cpp
+++ b/cloud/src/meta-service/meta_service_resource.cpp
@@ -364,6 +364,17 @@ bool normalize_hdfs_fs_name(std::string& fs_name) {
 static int add_hdfs_storage_vault(InstanceInfoPB& instance, Transaction* txn,
                                   StorageVaultPB& hdfs_param, MetaServiceCode& 
code,
                                   std::string& msg) {
+#ifndef ENABLE_HDFS_STORAGE_VAULT
+    code = MetaServiceCode::INVALID_ARGUMENT;
+    msg = fmt::format(
+            "HDFS is disabled (via the ENABLE_HDFS_STORAGE_VAULT build 
option), "
+            "but HDFS storage vaults were detected: {}",
+            hdfs_param.name());
+    LOG(ERROR) << "HDFS is disabled (via the ENABLE_HDFS_STORAGE_VAULT build 
option), "
+               << "but HDFS storage vaults were detected: " << 
hdfs_param.name();
+    return -1;
+#endif
+
     if (!hdfs_param.has_hdfs_info()) {
         code = MetaServiceCode::INVALID_ARGUMENT;
         msg = fmt::format("vault_name={} passed invalid argument", 
hdfs_param.name());
diff --git a/cloud/src/recycler/CMakeLists.txt 
b/cloud/src/recycler/CMakeLists.txt
index 6dbb8a0d696..12dc7351853 100644
--- a/cloud/src/recycler/CMakeLists.txt
+++ b/cloud/src/recycler/CMakeLists.txt
@@ -9,6 +9,10 @@ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lfdb_c 
-L${THIRDPARTY_DIR
 
 file(GLOB_RECURSE SRC_LIST CONFIGURE_DEPENDS *.cpp)
 
+if (NOT ENABLE_HDFS_STORAGE_VAULT)
+    list(REMOVE_ITEM SRC_LIST ${CMAKE_CURRENT_SOURCE_DIR}/hdfs_accessor.cpp)
+endif()
+
 if(BUILD_AZURE STREQUAL "OFF")
     list(REMOVE_ITEM SRC_LIST 
"${CMAKE_CURRENT_SOURCE_DIR}/azure_obj_client.cpp")
 endif()
diff --git a/cloud/src/recycler/checker.cpp b/cloud/src/recycler/checker.cpp
index 6096551f6e0..a83d4725e27 100644
--- a/cloud/src/recycler/checker.cpp
+++ b/cloud/src/recycler/checker.cpp
@@ -52,7 +52,9 @@
 #include "meta-store/keys.h"
 #include "meta-store/txn_kv.h"
 #include "meta-store/txn_kv_error.h"
+#ifdef ENABLE_HDFS_STORAGE_VAULT
 #include "recycler/hdfs_accessor.h"
+#endif
 #include "recycler/s3_accessor.h"
 #include "recycler/storage_vault_accessor.h"
 #ifdef UNIT_TEST
@@ -471,6 +473,7 @@ int InstanceChecker::init_storage_vault_accessors(const 
InstanceInfoPB& instance
         
TEST_SYNC_POINT_CALLBACK("InstanceRecycler::init_storage_vault_accessors.mock_vault",
                                  &accessor_map_, &vault);
         if (vault.has_hdfs_info()) {
+#ifdef ENABLE_HDFS_STORAGE_VAULT
             auto accessor = std::make_shared<HdfsAccessor>(vault.hdfs_info());
             int ret = accessor->init();
             if (ret != 0) {
@@ -480,6 +483,10 @@ int InstanceChecker::init_storage_vault_accessors(const 
InstanceInfoPB& instance
             }
 
             accessor_map_.emplace(vault.id(), std::move(accessor));
+#else
+            LOG(ERROR) << "HDFS is disabled (via the ENABLE_HDFS_STORAGE_VAULT 
build option), "
+                       << "but HDFS storage vaults were detected";
+#endif
         } else if (vault.has_obj_info()) {
 #ifdef UNIT_TEST
             auto accessor = std::make_shared<MockAccessor>();
diff --git a/cloud/src/recycler/recycler.cpp b/cloud/src/recycler/recycler.cpp
index 95c26385693..d5d368964a9 100644
--- a/cloud/src/recycler/recycler.cpp
+++ b/cloud/src/recycler/recycler.cpp
@@ -49,7 +49,9 @@
 #include "meta-store/txn_kv_error.h"
 #include "meta-store/versioned_value.h"
 #include "recycler/checker.h"
+#ifdef ENABLE_HDFS_STORAGE_VAULT
 #include "recycler/hdfs_accessor.h"
+#endif
 #include "recycler/s3_accessor.h"
 #include "recycler/storage_vault_accessor.h"
 #ifdef UNIT_TEST
@@ -600,6 +602,7 @@ int InstanceRecycler::init_storage_vault_accessors() {
         
TEST_SYNC_POINT_CALLBACK("InstanceRecycler::init_storage_vault_accessors.mock_vault",
                                  &accessor_map_, &vault);
         if (vault.has_hdfs_info()) {
+#ifdef ENABLE_HDFS_STORAGE_VAULT
             auto accessor = std::make_shared<HdfsAccessor>(vault.hdfs_info());
             int ret = accessor->init();
             if (ret != 0) {
@@ -612,6 +615,10 @@ int InstanceRecycler::init_storage_vault_accessors() {
                       << " resource_id=" << vault.id() << " name=" << 
vault.name()
                       << " hdfs_vault=" << 
vault.hdfs_info().ShortDebugString();
             accessor_map_.emplace(vault.id(), std::move(accessor));
+#else
+            LOG(ERROR) << "HDFS is disabled (via the ENABLE_HDFS_STORAGE_VAULT 
build option), "
+                       << "but HDFS storage vaults were detected";
+#endif
         } else if (vault.has_obj_info()) {
             auto s3_conf = S3Conf::from_obj_store_info(vault.obj_info());
             if (!s3_conf) {
diff --git a/cloud/test/CMakeLists.txt b/cloud/test/CMakeLists.txt
index ffd768809b8..e1bfb42a626 100644
--- a/cloud/test/CMakeLists.txt
+++ b/cloud/test/CMakeLists.txt
@@ -63,7 +63,11 @@ add_executable(s3_accessor_test s3_accessor_test.cpp)
 
 add_executable(s3_accessor_mock_test s3_accessor_mock_test.cpp)
 
-add_executable(hdfs_accessor_test hdfs_accessor_test.cpp)
+option(ENABLE_HDFS_STORAGE_VAULT "Enable HDFS storage support" ON)
+if (ENABLE_HDFS_STORAGE_VAULT)
+    add_compile_definitions(ENABLE_HDFS_STORAGE_VAULT)
+    add_executable(hdfs_accessor_test hdfs_accessor_test.cpp)
+endif()
 
 add_executable(stopwatch_test stopwatch_test.cpp)
 
@@ -110,7 +114,10 @@ target_link_libraries(s3_accessor_test ${TEST_LINK_LIBS})
 
 target_link_libraries(s3_accessor_mock_test ${TEST_LINK_LIBS})
 
-target_link_libraries(hdfs_accessor_test ${TEST_LINK_LIBS})
+option(ENABLE_HDFS_STORAGE_VAULT "Enable HDFS storage support" ON)
+if (ENABLE_HDFS_STORAGE_VAULT)
+    target_link_libraries(hdfs_accessor_test ${TEST_LINK_LIBS})
+endif()
 
 target_link_libraries(stopwatch_test ${TEST_LINK_LIBS})
 
diff --git a/run-cloud-ut.sh b/run-cloud-ut.sh
index 2aa3fa80a56..fe9ea3de61c 100755
--- a/run-cloud-ut.sh
+++ b/run-cloud-ut.sh
@@ -187,6 +187,7 @@ find . -name "*.gcda" -exec rm {} \;
     -DMAKE_TEST=ON \
     -DGLIBC_COMPATIBILITY="${GLIBC_COMPATIBILITY}" \
     -DUSE_LIBCPP="${USE_LIBCPP}" \
+    -DENABLE_HDFS_STORAGE_VAULT=${ENABLE_HDFS_STORAGE_VAULT:-ON} \
     -DUSE_MEM_TRACKER=ON \
     -DUSE_JEMALLOC=OFF \
     -DSTRICT_MEMORY_USE=OFF \


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to