This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 1983a9cf0b048cc7e6af814e5015a63cb598f641
Author: Qi Chen <[email protected]>
AuthorDate: Thu Jul 6 15:00:26 2023 +0800

    [Fix](multi-catalog) Fix hadoop short circuit reading can not enabled in 
some environments. (#21516)
    
    Fix hadoop short circuit reading can not enabled in some environments.
    - Revert #21430 because it will cause performance degradation issue.
    - Add `$HADOOP_CONF_DIR` to `$CLASSPATH`.
    - Remove empty `hdfs-site.xml`. Because in some environments it will cause 
hadoop short circuit reading can not enabled.
    - Copy the hadoop common native libs(which is copied from 
https://github.com/apache/doris-thirdparty/pull/98
    ) and add it to `LD_LIBRARY_PATH`. Because in some environments 
`LD_LIBRARY_PATH` doesn't contain hadoop common native libs, which will cause 
hadoop short circuit reading can not enabled.
---
 be/CMakeLists.txt                               |  1 -
 bin/run-fs-benchmark.sh                         |  8 +++++++-
 bin/start_be.sh                                 | 11 +++++++----
 bin/start_fe.sh                                 |  5 ++++-
 build.sh                                        |  2 --
 conf/hdfs-site.xml                              | 23 -----------------------
 docs/en/docs/lakehouse/multi-catalog/hive.md    |  2 +-
 docs/zh-CN/docs/lakehouse/multi-catalog/hive.md |  2 +-
 thirdparty/build-thirdparty.sh                  |  3 +++
 9 files changed, 23 insertions(+), 34 deletions(-)

diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index 5804067da9..5844e80d42 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -1073,7 +1073,6 @@ install(FILES
     ${BASE_DIR}/../conf/odbcinst.ini
     ${BASE_DIR}/../conf/asan_suppr.conf
     ${BASE_DIR}/../conf/lsan_suppr.conf
-    ${BASE_DIR}/../conf/hdfs-site.xml
     DESTINATION ${OUTPUT_DIR}/conf)
 
 get_property(dirs DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY 
INCLUDE_DIRECTORIES)
diff --git a/bin/run-fs-benchmark.sh b/bin/run-fs-benchmark.sh
index a128217fa2..fe3ab8bc15 100755
--- a/bin/run-fs-benchmark.sh
+++ b/bin/run-fs-benchmark.sh
@@ -71,12 +71,18 @@ if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then
     done
 fi
 
+if [[ -n "${HADOOP_CONF_DIR}" ]]; then
+    export DORIS_CLASSPATH="${HADOOP_CONF_DIR}:${DORIS_CLASSPATH}"
+fi
+
 # the CLASSPATH and LIBHDFS_OPTS is used for hadoop libhdfs
 # and conf/ dir so that hadoop libhdfs can read .xml config file in conf/
-export CLASSPATH="${DORIS_HOME}/conf/:${DORIS_CLASSPATH}"
+export CLASSPATH="${DORIS_HOME}/conf/:${DORIS_CLASSPATH}:${CLASSPATH}"
 # DORIS_CLASSPATH is for self-managed jni
 export DORIS_CLASSPATH="-Djava.class.path=${DORIS_CLASSPATH}"
 
+export 
LD_LIBRARY_PATH="${DORIS_HOME}/lib/hadoop_hdfs/native:${LD_LIBRARY_PATH}"
+
 jdk_version() {
     local java_cmd="${1}"
     local result
diff --git a/bin/start_be.sh b/bin/start_be.sh
index 8984986551..60d2677399 100755
--- a/bin/start_be.sh
+++ b/bin/start_be.sh
@@ -102,15 +102,18 @@ if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then
     done
 fi
 
+if [[ -n "${HADOOP_CONF_DIR}" ]]; then
+    export DORIS_CLASSPATH="${HADOOP_CONF_DIR}:${DORIS_CLASSPATH}"
+fi
+
 # the CLASSPATH and LIBHDFS_OPTS is used for hadoop libhdfs
 # and conf/ dir so that hadoop libhdfs can read .xml config file in conf/
-if command -v hadoop >/dev/null 2>&1; then
-    HADOOP_SYSTEM_CLASSPATH="$(hadoop classpath --glob)"
-fi
-export 
CLASSPATH="${DORIS_CLASSPATH}:${HADOOP_SYSTEM_CLASSPATH}:${DORIS_HOME}/conf/"
+export CLASSPATH="${DORIS_HOME}/conf/:${DORIS_CLASSPATH}:${CLASSPATH}"
 # DORIS_CLASSPATH is for self-managed jni
 export DORIS_CLASSPATH="-Djava.class.path=${DORIS_CLASSPATH}"
 
+export 
LD_LIBRARY_PATH="${DORIS_HOME}/lib/hadoop_hdfs/native:${LD_LIBRARY_PATH}"
+
 jdk_version() {
     local java_cmd="${1}"
     local result
diff --git a/bin/start_fe.sh b/bin/start_fe.sh
index 9d21651fc0..3cdebe8f4b 100755
--- a/bin/start_fe.sh
+++ b/bin/start_fe.sh
@@ -179,7 +179,10 @@ done
 # make sure the doris-fe.jar is at first order, so that some classed
 # with same qualified name can be loaded priority from doris-fe.jar
 CLASSPATH="${DORIS_FE_JAR}:${CLASSPATH}"
-export CLASSPATH="${CLASSPATH}:${DORIS_HOME}/lib:${DORIS_HOME}/conf"
+if [[ -n "${HADOOP_CONF_DIR}" ]]; then
+    CLASSPATH="${HADOOP_CONF_DIR}:${CLASSPATH}"
+fi
+export CLASSPATH="${DORIS_HOME}/conf:${CLASSPATH}:${DORIS_HOME}/lib"
 
 pidfile="${PID_DIR}/fe.pid"
 
diff --git a/build.sh b/build.sh
index 260bc1dd11..7a316b4d77 100755
--- a/build.sh
+++ b/build.sh
@@ -570,7 +570,6 @@ if [[ "${BUILD_FE}" -eq 1 ]]; then
     cp -r -p "${DORIS_HOME}/bin"/*_fe.sh "${DORIS_OUTPUT}/fe/bin"/
     cp -r -p "${DORIS_HOME}/conf/fe.conf" "${DORIS_OUTPUT}/fe/conf"/
     cp -r -p "${DORIS_HOME}/conf/ldap.conf" "${DORIS_OUTPUT}/fe/conf"/
-    cp -r -p "${DORIS_HOME}/conf"/*.xml "${DORIS_OUTPUT}/fe/conf"/
     cp -r -p "${DORIS_HOME}/conf/mysql_ssl_default_certificate" 
"${DORIS_OUTPUT}/fe/"/
     rm -rf "${DORIS_OUTPUT}/fe/lib"/*
     cp -r -p "${DORIS_HOME}/fe/fe-core/target/lib"/* "${DORIS_OUTPUT}/fe/lib"/
@@ -603,7 +602,6 @@ if [[ "${OUTPUT_BE_BINARY}" -eq 1 ]]; then
 
     if [[ -d "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" ]]; then
         cp -r -p "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" 
"${DORIS_OUTPUT}/be/lib/"
-        rm -rf "${DORIS_OUTPUT}/be/lib/hadoop_hdfs/native/"
     fi
 
     if [[ "${BUILD_BE_JAVA_EXTENSIONS_IN_CONF}" -eq 1 ]]; then
diff --git a/conf/hdfs-site.xml b/conf/hdfs-site.xml
deleted file mode 100644
index 32235bf8bc..0000000000
--- a/conf/hdfs-site.xml
+++ /dev/null
@@ -1,23 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-  http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-
-<!-- Put site-specific property overrides in this file. -->
-
-<configuration>
-</configuration>
diff --git a/docs/en/docs/lakehouse/multi-catalog/hive.md 
b/docs/en/docs/lakehouse/multi-catalog/hive.md
index 04ef0de50e..9daacbcd22 100644
--- a/docs/en/docs/lakehouse/multi-catalog/hive.md
+++ b/docs/en/docs/lakehouse/multi-catalog/hive.md
@@ -32,7 +32,7 @@ In addition to Hive, many other systems also use the Hive 
Metastore to store met
 
 ## Terms and Conditions
 
-1. Need to put core-site.xml, hdfs-site.xml and hive-site.xml in the conf 
directory of FE and BE.
+1. Need to put core-site.xml, hdfs-site.xml and hive-site.xml in the conf 
directory of FE and BE. First read the hadoop configuration file in the conf 
directory, and then read the related to the environment variable 
`HADOOP_CONF_DIR` configuration file.
 2. hive supports version 1/2/3.
 3. Support Managed Table and External Table and part of Hive View.
 4. Can identify hive, iceberg, hudi metadata stored in Hive Metastore.
diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md 
b/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md
index 0b8c4ac62a..25c5c6bd26 100644
--- a/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md
+++ b/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md
@@ -32,7 +32,7 @@ under the License.
 
 ## 使用须知
 
-1. 需将 core-site.xml,hdfs-site.xml 和 hive-site.xml  放到 FE 和 BE 的 conf 目录下。
+1. 将 core-site.xml,hdfs-site.xml 和 hive-site.xml  放到 FE 和 BE 的 conf 目录下。优先读取 
conf 目录下的 hadoop 配置文件,再读取环境变量 `HADOOP_CONF_DIR` 的相关配置文件。 
 2. hive 支持 1/2/3 版本。
 3. 支持 Managed Table 和 External Table,支持部分 Hive View。
 4. 可以识别 Hive Metastore 中存储的 hive、iceberg、hudi 元数据。
diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh
index 2b12012316..1fd8f0f7f7 100755
--- a/thirdparty/build-thirdparty.sh
+++ b/thirdparty/build-thirdparty.sh
@@ -1583,6 +1583,9 @@ build_hadoop_libs() {
     mkdir -p "${TP_INSTALL_DIR}/lib/hadoop_hdfs/"
     cp -r ./hadoop-dist/target/hadoop-libhdfs-3.3.4/* 
"${TP_INSTALL_DIR}/lib/hadoop_hdfs/"
     cp -r ./hadoop-dist/target/hadoop-libhdfs-3.3.4/include/hdfs.h 
"${TP_INSTALL_DIR}/include/hadoop_hdfs/"
+    rm -rf "${TP_INSTALL_DIR}/lib/hadoop_hdfs/native/*.a"
+    find ./hadoop-dist/target/hadoop-3.3.4/lib/native/ -type f ! -name '*.a' 
-exec cp {} "${TP_INSTALL_DIR}/lib/hadoop_hdfs/native/" \;
+    find ./hadoop-dist/target/hadoop-3.3.4/lib/native/ -type l -exec cp -P {} 
"${TP_INSTALL_DIR}/lib/hadoop_hdfs/native/" \;
 }
 
 if [[ "${#packages[@]}" -eq 0 ]]; then


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to