This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 1983a9cf0b048cc7e6af814e5015a63cb598f641 Author: Qi Chen <[email protected]> AuthorDate: Thu Jul 6 15:00:26 2023 +0800 [Fix](multi-catalog) Fix hadoop short circuit reading can not enabled in some environments. (#21516) Fix hadoop short circuit reading can not enabled in some environments. - Revert #21430 because it will cause performance degradation issue. - Add `$HADOOP_CONF_DIR` to `$CLASSPATH`. - Remove empty `hdfs-site.xml`. Because in some environments it will cause hadoop short circuit reading can not enabled. - Copy the hadoop common native libs(which is copied from https://github.com/apache/doris-thirdparty/pull/98 ) and add it to `LD_LIBRARY_PATH`. Because in some environments `LD_LIBRARY_PATH` doesn't contain hadoop common native libs, which will cause hadoop short circuit reading can not enabled. --- be/CMakeLists.txt | 1 - bin/run-fs-benchmark.sh | 8 +++++++- bin/start_be.sh | 11 +++++++---- bin/start_fe.sh | 5 ++++- build.sh | 2 -- conf/hdfs-site.xml | 23 ----------------------- docs/en/docs/lakehouse/multi-catalog/hive.md | 2 +- docs/zh-CN/docs/lakehouse/multi-catalog/hive.md | 2 +- thirdparty/build-thirdparty.sh | 3 +++ 9 files changed, 23 insertions(+), 34 deletions(-) diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index 5804067da9..5844e80d42 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -1073,7 +1073,6 @@ install(FILES ${BASE_DIR}/../conf/odbcinst.ini ${BASE_DIR}/../conf/asan_suppr.conf ${BASE_DIR}/../conf/lsan_suppr.conf - ${BASE_DIR}/../conf/hdfs-site.xml DESTINATION ${OUTPUT_DIR}/conf) get_property(dirs DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES) diff --git a/bin/run-fs-benchmark.sh b/bin/run-fs-benchmark.sh index a128217fa2..fe3ab8bc15 100755 --- a/bin/run-fs-benchmark.sh +++ b/bin/run-fs-benchmark.sh @@ -71,12 +71,18 @@ if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then done fi +if [[ -n "${HADOOP_CONF_DIR}" ]]; then + export DORIS_CLASSPATH="${HADOOP_CONF_DIR}:${DORIS_CLASSPATH}" +fi + # the CLASSPATH and LIBHDFS_OPTS is used for hadoop libhdfs # and conf/ dir so that hadoop libhdfs can read .xml config file in conf/ -export CLASSPATH="${DORIS_HOME}/conf/:${DORIS_CLASSPATH}" +export CLASSPATH="${DORIS_HOME}/conf/:${DORIS_CLASSPATH}:${CLASSPATH}" # DORIS_CLASSPATH is for self-managed jni export DORIS_CLASSPATH="-Djava.class.path=${DORIS_CLASSPATH}" +export LD_LIBRARY_PATH="${DORIS_HOME}/lib/hadoop_hdfs/native:${LD_LIBRARY_PATH}" + jdk_version() { local java_cmd="${1}" local result diff --git a/bin/start_be.sh b/bin/start_be.sh index 8984986551..60d2677399 100755 --- a/bin/start_be.sh +++ b/bin/start_be.sh @@ -102,15 +102,18 @@ if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then done fi +if [[ -n "${HADOOP_CONF_DIR}" ]]; then + export DORIS_CLASSPATH="${HADOOP_CONF_DIR}:${DORIS_CLASSPATH}" +fi + # the CLASSPATH and LIBHDFS_OPTS is used for hadoop libhdfs # and conf/ dir so that hadoop libhdfs can read .xml config file in conf/ -if command -v hadoop >/dev/null 2>&1; then - HADOOP_SYSTEM_CLASSPATH="$(hadoop classpath --glob)" -fi -export CLASSPATH="${DORIS_CLASSPATH}:${HADOOP_SYSTEM_CLASSPATH}:${DORIS_HOME}/conf/" +export CLASSPATH="${DORIS_HOME}/conf/:${DORIS_CLASSPATH}:${CLASSPATH}" # DORIS_CLASSPATH is for self-managed jni export DORIS_CLASSPATH="-Djava.class.path=${DORIS_CLASSPATH}" +export LD_LIBRARY_PATH="${DORIS_HOME}/lib/hadoop_hdfs/native:${LD_LIBRARY_PATH}" + jdk_version() { local java_cmd="${1}" local result diff --git a/bin/start_fe.sh b/bin/start_fe.sh index 9d21651fc0..3cdebe8f4b 100755 --- a/bin/start_fe.sh +++ b/bin/start_fe.sh @@ -179,7 +179,10 @@ done # make sure the doris-fe.jar is at first order, so that some classed # with same qualified name can be loaded priority from doris-fe.jar CLASSPATH="${DORIS_FE_JAR}:${CLASSPATH}" -export CLASSPATH="${CLASSPATH}:${DORIS_HOME}/lib:${DORIS_HOME}/conf" +if [[ -n "${HADOOP_CONF_DIR}" ]]; then + CLASSPATH="${HADOOP_CONF_DIR}:${CLASSPATH}" +fi +export CLASSPATH="${DORIS_HOME}/conf:${CLASSPATH}:${DORIS_HOME}/lib" pidfile="${PID_DIR}/fe.pid" diff --git a/build.sh b/build.sh index 260bc1dd11..7a316b4d77 100755 --- a/build.sh +++ b/build.sh @@ -570,7 +570,6 @@ if [[ "${BUILD_FE}" -eq 1 ]]; then cp -r -p "${DORIS_HOME}/bin"/*_fe.sh "${DORIS_OUTPUT}/fe/bin"/ cp -r -p "${DORIS_HOME}/conf/fe.conf" "${DORIS_OUTPUT}/fe/conf"/ cp -r -p "${DORIS_HOME}/conf/ldap.conf" "${DORIS_OUTPUT}/fe/conf"/ - cp -r -p "${DORIS_HOME}/conf"/*.xml "${DORIS_OUTPUT}/fe/conf"/ cp -r -p "${DORIS_HOME}/conf/mysql_ssl_default_certificate" "${DORIS_OUTPUT}/fe/"/ rm -rf "${DORIS_OUTPUT}/fe/lib"/* cp -r -p "${DORIS_HOME}/fe/fe-core/target/lib"/* "${DORIS_OUTPUT}/fe/lib"/ @@ -603,7 +602,6 @@ if [[ "${OUTPUT_BE_BINARY}" -eq 1 ]]; then if [[ -d "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" ]]; then cp -r -p "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" "${DORIS_OUTPUT}/be/lib/" - rm -rf "${DORIS_OUTPUT}/be/lib/hadoop_hdfs/native/" fi if [[ "${BUILD_BE_JAVA_EXTENSIONS_IN_CONF}" -eq 1 ]]; then diff --git a/conf/hdfs-site.xml b/conf/hdfs-site.xml deleted file mode 100644 index 32235bf8bc..0000000000 --- a/conf/hdfs-site.xml +++ /dev/null @@ -1,23 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!-- -Licensed to the Apache Software Foundation (ASF) under one -or more contributor license agreements. See the NOTICE file -distributed with this work for additional information -regarding copyright ownership. The ASF licenses this file -to you under the Apache License, Version 2.0 (the -"License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, -software distributed under the License is distributed on an -"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -KIND, either express or implied. See the License for the -specific language governing permissions and limitations -under the License. ---> -<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> - -<!-- Put site-specific property overrides in this file. --> - -<configuration> -</configuration> diff --git a/docs/en/docs/lakehouse/multi-catalog/hive.md b/docs/en/docs/lakehouse/multi-catalog/hive.md index 04ef0de50e..9daacbcd22 100644 --- a/docs/en/docs/lakehouse/multi-catalog/hive.md +++ b/docs/en/docs/lakehouse/multi-catalog/hive.md @@ -32,7 +32,7 @@ In addition to Hive, many other systems also use the Hive Metastore to store met ## Terms and Conditions -1. Need to put core-site.xml, hdfs-site.xml and hive-site.xml in the conf directory of FE and BE. +1. Need to put core-site.xml, hdfs-site.xml and hive-site.xml in the conf directory of FE and BE. First read the hadoop configuration file in the conf directory, and then read the related to the environment variable `HADOOP_CONF_DIR` configuration file. 2. hive supports version 1/2/3. 3. Support Managed Table and External Table and part of Hive View. 4. Can identify hive, iceberg, hudi metadata stored in Hive Metastore. diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md b/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md index 0b8c4ac62a..25c5c6bd26 100644 --- a/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md +++ b/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md @@ -32,7 +32,7 @@ under the License. ## 使用须知 -1. 需将 core-site.xml,hdfs-site.xml 和 hive-site.xml 放到 FE 和 BE 的 conf 目录下。 +1. 将 core-site.xml,hdfs-site.xml 和 hive-site.xml 放到 FE 和 BE 的 conf 目录下。优先读取 conf 目录下的 hadoop 配置文件,再读取环境变量 `HADOOP_CONF_DIR` 的相关配置文件。 2. hive 支持 1/2/3 版本。 3. 支持 Managed Table 和 External Table,支持部分 Hive View。 4. 可以识别 Hive Metastore 中存储的 hive、iceberg、hudi 元数据。 diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh index 2b12012316..1fd8f0f7f7 100755 --- a/thirdparty/build-thirdparty.sh +++ b/thirdparty/build-thirdparty.sh @@ -1583,6 +1583,9 @@ build_hadoop_libs() { mkdir -p "${TP_INSTALL_DIR}/lib/hadoop_hdfs/" cp -r ./hadoop-dist/target/hadoop-libhdfs-3.3.4/* "${TP_INSTALL_DIR}/lib/hadoop_hdfs/" cp -r ./hadoop-dist/target/hadoop-libhdfs-3.3.4/include/hdfs.h "${TP_INSTALL_DIR}/include/hadoop_hdfs/" + rm -rf "${TP_INSTALL_DIR}/lib/hadoop_hdfs/native/*.a" + find ./hadoop-dist/target/hadoop-3.3.4/lib/native/ -type f ! -name '*.a' -exec cp {} "${TP_INSTALL_DIR}/lib/hadoop_hdfs/native/" \; + find ./hadoop-dist/target/hadoop-3.3.4/lib/native/ -type l -exec cp -P {} "${TP_INSTALL_DIR}/lib/hadoop_hdfs/native/" \; } if [[ "${#packages[@]}" -eq 0 ]]; then --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
