This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
new 680199e855 [Fix](multi-catalog) Fix hadoop short circuit reading can
not enabled in some environments. (#21548)
680199e855 is described below
commit 680199e8557de1a806321ce81981f838b2fb590f
Author: Qi Chen <[email protected]>
AuthorDate: Thu Jul 6 13:44:55 2023 +0800
[Fix](multi-catalog) Fix hadoop short circuit reading can not enabled in
some environments. (#21548)
Merge #21516 to branch-1.2-lts.
Fix hadoop short circuit reading can not enabled in some environments.
- Revert #21430 because it will cause performance degradation issue.
- Add `$HADOOP_CONF_DIR` to `$CLASSPATH`.
- Remove empty `hdfs-site.xml`. Because in some environments it will cause
hadoop short circuit reading can not enabled.
- Copy the hadoop common native libs(which is copied from
https://github.com/apache/doris-thirdparty/pull/98
) and add it to `LD_LIBRARY_PATH`. Because in some environments
`LD_LIBRARY_PATH` doesn't contain hadoop common native libs, which will cause
hadoop short circuit reading can not enabled.
---
be/CMakeLists.txt | 1 -
bin/start_be.sh | 11 +++++++----
bin/start_fe.sh | 5 ++++-
build.sh | 2 --
conf/hdfs-site.xml | 23 -----------------------
docs/en/docs/lakehouse/multi-catalog/hive.md | 13 +++++++------
docs/zh-CN/docs/lakehouse/multi-catalog/hive.md | 17 +++++++++--------
thirdparty/build-thirdparty.sh | 3 +++
8 files changed, 30 insertions(+), 45 deletions(-)
diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index 12621403fb..9307e018f0 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -951,7 +951,6 @@ install(FILES
${BASE_DIR}/../conf/be.conf
${BASE_DIR}/../conf/odbcinst.ini
${BASE_DIR}/../conf/asan_suppr.conf
- ${BASE_DIR}/../conf/hdfs-site.xml
DESTINATION ${OUTPUT_DIR}/conf)
get_property(dirs DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY
INCLUDE_DIRECTORIES)
diff --git a/bin/start_be.sh b/bin/start_be.sh
index e4bd71a940..a26241862b 100755
--- a/bin/start_be.sh
+++ b/bin/start_be.sh
@@ -102,15 +102,18 @@ if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then
done
fi
+if [[ -n "${HADOOP_CONF_DIR}" ]]; then
+ export DORIS_CLASSPATH="${HADOOP_CONF_DIR}:${DORIS_CLASSPATH}"
+fi
+
# the CLASSPATH and LIBHDFS_OPTS is used for hadoop libhdfs
# and conf/ dir so that hadoop libhdfs can read .xml config file in conf/
-if command -v hadoop >/dev/null 2>&1; then
- HADOOP_SYSTEM_CLASSPATH="$(hadoop classpath --glob)"
-fi
-export
CLASSPATH="${HADOOP_SYSTEM_CLASSPATH}:${DORIS_HOME}/conf/:${DORIS_CLASSPATH}"
+export CLASSPATH="${DORIS_HOME}/conf/:${DORIS_CLASSPATH}:${CLASSPATH}"
# DORIS_CLASSPATH is for self-managed jni
export DORIS_CLASSPATH="-Djava.class.path=${DORIS_CLASSPATH}"
+export
LD_LIBRARY_PATH="${DORIS_HOME}/lib/hadoop_hdfs/native:${LD_LIBRARY_PATH}"
+
jdk_version() {
local java_cmd="${1}"
local result
diff --git a/bin/start_fe.sh b/bin/start_fe.sh
index 5028baedaf..03b0e8e8d5 100755
--- a/bin/start_fe.sh
+++ b/bin/start_fe.sh
@@ -179,7 +179,10 @@ done
# make sure the doris-fe.jar is at first order, so that some classed
# with same qualified name can be loaded priority from doris-fe.jar
CLASSPATH="${DORIS_FE_JAR}:${CLASSPATH}"
-export CLASSPATH="${CLASSPATH}:${DORIS_HOME}/lib:${DORIS_HOME}/conf"
+if [[ -n "${HADOOP_CONF_DIR}" ]]; then
+ CLASSPATH="${HADOOP_CONF_DIR}:${CLASSPATH}"
+fi
+export CLASSPATH="${DORIS_HOME}/conf:${CLASSPATH}:${DORIS_HOME}/lib"
pidfile="${PID_DIR}/fe.pid"
diff --git a/build.sh b/build.sh
index 531ba4ea13..a7f46b018d 100755
--- a/build.sh
+++ b/build.sh
@@ -507,7 +507,6 @@ if [[ "${BUILD_FE}" -eq 1 ]]; then
cp -r -p "${DORIS_HOME}/bin"/*_fe.sh "${DORIS_OUTPUT}/fe/bin"/
cp -r -p "${DORIS_HOME}/conf/fe.conf" "${DORIS_OUTPUT}/fe/conf"/
cp -r -p "${DORIS_HOME}/conf/ldap.conf" "${DORIS_OUTPUT}/fe/conf"/
- cp -r -p "${DORIS_HOME}/conf"/*.xml "${DORIS_OUTPUT}/fe/conf"/
cp -r -p "${DORIS_HOME}/conf/mysql_ssl_default_certificate"
"${DORIS_OUTPUT}/fe/"/
rm -rf "${DORIS_OUTPUT}/fe/lib"/*
cp -r -p "${DORIS_HOME}/fe/fe-core/target/lib"/* "${DORIS_OUTPUT}/fe/lib"/
@@ -541,7 +540,6 @@ if [[ "${BUILD_BE}" -eq 1 ]]; then
if [[ -d "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" ]]; then
cp -r -p "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/"
"${DORIS_OUTPUT}/be/lib/"
- rm -rf "${DORIS_OUTPUT}/be/lib/hadoop_hdfs/native/"
fi
if [[ "${BUILD_JAVA_UDF}" -eq 0 ]]; then
diff --git a/conf/hdfs-site.xml b/conf/hdfs-site.xml
deleted file mode 100644
index 32235bf8bc..0000000000
--- a/conf/hdfs-site.xml
+++ /dev/null
@@ -1,23 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements. See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership. The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied. See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-
-<!-- Put site-specific property overrides in this file. -->
-
-<configuration>
-</configuration>
diff --git a/docs/en/docs/lakehouse/multi-catalog/hive.md
b/docs/en/docs/lakehouse/multi-catalog/hive.md
index f82df22ab7..1a269d6774 100644
--- a/docs/en/docs/lakehouse/multi-catalog/hive.md
+++ b/docs/en/docs/lakehouse/multi-catalog/hive.md
@@ -34,17 +34,18 @@ Besides Hive, many other systems, such as Iceberg and Hudi,
use Hive Metastore t
When connnecting to Hive, Doris:
-1. Supports Hive version 1/2/3;
-2. Supports both Managed Table and External Table;
-3. Can identify metadata of Hive, Iceberg, and Hudi stored in Hive Metastore;
-4. Supports Hive tables with data stored in JuiceFS, which can be used the
same way as normal Hive tables (put `juicefs-hadoop-x.x.x.jar` in `fe/lib/` and
`apache_hdfs_broker/lib/`).
-5. Supports Hive tables with data stored in CHDFS, which can be used the same
way as normal Hive tables. Follow below steps to prepare doris environment:
+1. Need to put core-site.xml, hdfs-site.xml and hive-site.xml in the conf
directory of FE and BE. First read the hadoop configuration file in the conf
directory, and then read the related to the environment variable
`HADOOP_CONF_DIR` configuration file.
+2. Supports Hive version 1/2/3;
+3. Supports both Managed Table and External Table;
+4. Can identify metadata of Hive, Iceberg, and Hudi stored in Hive Metastore;
+5. Supports Hive tables with data stored in JuiceFS, which can be used the
same way as normal Hive tables (put `juicefs-hadoop-x.x.x.jar` in `fe/lib/` and
`apache_hdfs_broker/lib/`).
+6. Supports Hive tables with data stored in CHDFS, which can be used the same
way as normal Hive tables. Follow below steps to prepare doris environment:
1. put chdfs_hadoop_plugin_network-x.x.jar in fe/lib/ and
apache_hdfs_broker/lib/
2. copy core-site.xml and hdfs-site.xml from hive cluster to fe/conf/ and
apache_hdfs_broker/conf
<version since="dev">
-6. Supports Hive / Iceberg tables with data stored in GooseFS(GFS), which can
be used the same way as normal Hive tables. Follow below steps to prepare doris
environment:
+7. Supports Hive / Iceberg tables with data stored in GooseFS(GFS), which can
be used the same way as normal Hive tables. Follow below steps to prepare doris
environment:
1. put goosefs-x.x.x-client.jar in fe/lib/ and apache_hdfs_broker/lib/
2. add extra properties 'fs.AbstractFileSystem.gfs.impl' =
'com.qcloud.cos.goosefs.hadoop.GooseFileSystem', 'fs.gfs.impl' =
'com.qcloud.cos.goosefs.hadoop.FileSystem' when creating catalog
diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md
b/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md
index 98d32b8631..6cc16bdbe1 100644
--- a/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md
+++ b/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md
@@ -30,19 +30,20 @@ under the License.
除了 Hive 外,很多其他系统也会使用 Hive Metastore 存储元数据。所以通过 Hive Catalog,我们不仅能访问
Hive,也能访问使用 Hive Metastore 作为元数据存储的系统。如 Iceberg、Hudi 等。
-## 使用限制
-
-1. hive 支持 1/2/3 版本。
-2. 支持 Managed Table 和 External Table。
-3. 可以识别 Hive Metastore 中存储的 hive、iceberg、hudi 元数据。
-4. 支持数据存储在 Juicefs 上的 hive 表,用法如下(需要把juicefs-hadoop-x.x.x.jar放在 fe/lib/ 和
apache_hdfs_broker/lib/ 下)。
-5. 支持数据存储在 CHDFS 上的 hive 表。需配置环境:
+## 使用须知
+
+1. 将 core-site.xml,hdfs-site.xml 和 hive-site.xml 放到 FE 和 BE 的 conf 目录下。优先读取
conf 目录下的 hadoop 配置文件,再读取环境变量 `HADOOP_CONF_DIR` 的相关配置文件。
+2. hive 支持 1/2/3 版本。
+3. 支持 Managed Table 和 External Table。
+4. 可以识别 Hive Metastore 中存储的 hive、iceberg、hudi 元数据。
+5. 支持数据存储在 Juicefs 上的 hive 表,用法如下(需要把juicefs-hadoop-x.x.x.jar放在 fe/lib/ 和
apache_hdfs_broker/lib/ 下)。
+6. 支持数据存储在 CHDFS 上的 hive 表。需配置环境:
1. 把chdfs_hadoop_plugin_network-x.x.jar 放在 fe/lib/ 和
apache_hdfs_broker/lib/ 下
2. 将 hive 所在 Hadoop 集群的 core-site.xml 和 hdfs-site.xml 复制到 fe/conf/ 和
apache_hdfs_broker/conf 目录下
<version since="dev">
-6. 支持数据存在在 GooseFS(GFS) 上的 hive、iceberg表。需配置环境:
+7. 支持数据存在在 GooseFS(GFS) 上的 hive、iceberg表。需配置环境:
1. 把 goosefs-x.x.x-client.jar 放在 fe/lib/ 和 apache_hdfs_broker/lib/ 下
2. 创建 catalog 时增加属性:'fs.AbstractFileSystem.gfs.impl' =
'com.qcloud.cos.goosefs.hadoop.GooseFileSystem', 'fs.gfs.impl' =
'com.qcloud.cos.goosefs.hadoop.FileSystem'
diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh
index 58b629654a..ac6850a094 100755
--- a/thirdparty/build-thirdparty.sh
+++ b/thirdparty/build-thirdparty.sh
@@ -1576,6 +1576,9 @@ build_hadoop_libs() {
mkdir -p "${TP_INSTALL_DIR}/lib/hadoop_hdfs/"
cp -r ./hadoop-dist/target/hadoop-libhdfs-3.3.4/*
"${TP_INSTALL_DIR}/lib/hadoop_hdfs/"
cp -r ./hadoop-dist/target/hadoop-libhdfs-3.3.4/include/hdfs.h
"${TP_INSTALL_DIR}/include/hadoop_hdfs/"
+ rm -rf "${TP_INSTALL_DIR}/lib/hadoop_hdfs/native/*.a"
+ find ./hadoop-dist/target/hadoop-3.3.4/lib/native/ -type f ! -name '*.a'
-exec cp {} "${TP_INSTALL_DIR}/lib/hadoop_hdfs/native/" \;
+ find ./hadoop-dist/target/hadoop-3.3.4/lib/native/ -type l -exec cp -P {}
"${TP_INSTALL_DIR}/lib/hadoop_hdfs/native/" \;
}
if [[ "${#packages[@]}" -eq 0 ]]; then
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]