This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 7e61a85331 [refactor](libhdfs) introduce hadoop libhdfs (#18204)
7e61a85331 is described below
commit 7e61a8533151d80a903d2452119b77bca59b43bc
Author: Mingyu Chen <[email protected]>
AuthorDate: Fri Mar 31 18:41:39 2023 +0800
[refactor](libhdfs) introduce hadoop libhdfs (#18204)
1. Introduce hadoop libhdfs
2. For Linux-X86 platform, use the hadoop libhdfs
3. For other platform, use libhdfs3, because currently we don't have
hadoop libhdfs binary for other platform
Co-authored-by: adonis0147 <[email protected]>
---
be/CMakeLists.txt | 25 ++++++--
be/src/common/config.h | 2 -
be/src/io/fs/err_utils.cpp | 12 +++-
be/src/{util/hdfs_util.h => io/fs/hdfs.h} | 34 ++---------
be/src/io/fs/hdfs_file_reader.cpp | 7 ++-
be/src/io/fs/hdfs_file_system.cpp | 7 +++
be/src/io/fs/hdfs_file_system.h | 4 +-
be/src/io/hdfs_builder.cpp | 6 +-
be/src/io/hdfs_builder.h | 8 ++-
be/src/tools/meta_tool.cpp | 13 +++--
be/src/util/hdfs_util.h | 3 +-
be/src/util/jni-util.cpp | 68 ++++++++++++++--------
be/src/util/jni-util.h | 5 ++
be/src/util/libjvm_loader.cpp | 9 +++
.../vec/exec/format/parquet/bool_rle_decoder.cpp | 3 +
.../parquet/vparquet_column_chunk_reader.cpp | 3 +-
bin/start_be.sh | 52 ++++++++++++++---
build.sh | 4 ++
conf/be.conf | 3 +
.../org/apache/doris/clone/BeLoadRebalancer.java | 2 +-
20 files changed, 181 insertions(+), 89 deletions(-)
diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index 8bb1100c51..cc0b6bbdc1 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -420,9 +420,6 @@ set_target_properties(k5crypto PROPERTIES IMPORTED_LOCATION
${THIRDPARTY_DIR}/li
add_library(gssapi_krb5 STATIC IMPORTED)
set_target_properties(gssapi_krb5 PROPERTIES IMPORTED_LOCATION
${THIRDPARTY_DIR}/lib/libgssapi_krb5.a)
-add_library(hdfs3 STATIC IMPORTED)
-set_target_properties(hdfs3 PROPERTIES IMPORTED_LOCATION
${THIRDPARTY_DIR}/lib/libhdfs3.a)
-
find_program(THRIFT_COMPILER thrift ${CMAKE_SOURCE_DIR}/bin)
if (OS_MACOSX)
@@ -762,12 +759,32 @@ set(COMMON_THIRDPARTY
# put this after lz4 to avoid using lz4 lib in librdkafka
librdkafka_cpp
librdkafka
- hdfs3
xml2
lzma
simdjson
)
+if (ARCH_AMD64 AND OS_LINUX)
+ add_library(hadoop_hdfs STATIC IMPORTED)
+ set_target_properties(hadoop_hdfs PROPERTIES IMPORTED_LOCATION
${THIRDPARTY_DIR}/lib/hadoop_hdfs/native/libhdfs.a)
+
+ set(COMMON_THIRDPARTY
+ ${COMMON_THIRDPARTY}
+ hadoop_hdfs
+ )
+ add_definitions(-DUSE_HADOOP_HDFS)
+else()
+ add_library(hdfs3 STATIC IMPORTED)
+ set_target_properties(hdfs3 PROPERTIES IMPORTED_LOCATION
${THIRDPARTY_DIR}/lib/libhdfs3.a)
+
+ # TODO: use arm hadoop hdfs to replace this
+ set(COMMON_THIRDPARTY
+ ${COMMON_THIRDPARTY}
+ hdfs3
+ )
+ add_definitions(-DUSE_LIBHDFS3)
+endif()
+
if (absl_FOUND)
set(COMMON_THIRDPARTY
${COMMON_THIRDPARTY}
diff --git a/be/src/common/config.h b/be/src/common/config.h
index b59d9d42e3..8f34155280 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -879,8 +879,6 @@ CONF_Int32(segcompaction_threshold_segment_num, "10");
// The segment whose row number above the threshold will be compacted during
segcompaction
CONF_Int32(segcompaction_small_threshold, "1048576");
-CONF_String(jvm_max_heap_size, "1024M");
-
// enable java udf and jdbc scannode
CONF_Bool(enable_java_support, "true");
diff --git a/be/src/io/fs/err_utils.cpp b/be/src/io/fs/err_utils.cpp
index 1e788165e5..d01c7e7488 100644
--- a/be/src/io/fs/err_utils.cpp
+++ b/be/src/io/fs/err_utils.cpp
@@ -18,10 +18,11 @@
#include "io/fs/err_utils.h"
#include <fmt/format.h>
-#include <hdfs/hdfs.h>
#include <sstream>
+#include "io/fs/hdfs.h"
+
namespace doris {
namespace io {
@@ -37,8 +38,15 @@ std::string errcode_to_str(const std::error_code& ec) {
std::string hdfs_error() {
std::stringstream ss;
char buf[1024];
- ss << "(" << errno << "), " << strerror_r(errno, buf, 1024);
+ ss << "(" << errno << "), " << strerror_r(errno, buf, 1024) << ")";
+#ifdef USE_HADOOP_HDFS
+ char* root_cause = hdfsGetLastExceptionRootCause();
+ if (root_cause != nullptr) {
+ ss << ", reason: " << root_cause;
+ }
+#else
ss << ", reason: " << hdfsGetLastError();
+#endif
return ss.str();
}
diff --git a/be/src/util/hdfs_util.h b/be/src/io/fs/hdfs.h
similarity index 59%
copy from be/src/util/hdfs_util.h
copy to be/src/io/fs/hdfs.h
index f98bdd5ab3..eb9e1b2c07 100644
--- a/be/src/util/hdfs_util.h
+++ b/be/src/io/fs/hdfs.h
@@ -17,34 +17,8 @@
#pragma once
+#ifdef USE_HADOOP_HDFS
+#include <hadoop_hdfs/hdfs.h>
+#else
#include <hdfs/hdfs.h>
-
-#include <map>
-#include <memory>
-#include <string>
-
-#include "common/status.h"
-#include "io/fs/path.h"
-#include "io/hdfs_builder.h"
-
-namespace doris {
-namespace io {
-
-class HDFSHandle {
-public:
- ~HDFSHandle() {}
-
- static HDFSHandle& instance();
-
- hdfsFS create_hdfs_fs(HDFSCommonBuilder& builder);
-
-private:
- HDFSHandle() {}
-};
-
-// if the format of path is hdfs://ip:port/path, replace it to /path.
-// path like hdfs://ip:port/path can't be used by libhdfs3.
-Path convert_path(const Path& path, const std::string& namenode);
-
-} // namespace io
-} // namespace doris
+#endif
diff --git a/be/src/io/fs/hdfs_file_reader.cpp
b/be/src/io/fs/hdfs_file_reader.cpp
index 219410ac18..ddd035213b 100644
--- a/be/src/io/fs/hdfs_file_reader.cpp
+++ b/be/src/io/fs/hdfs_file_reader.cpp
@@ -17,9 +17,11 @@
#include "io/fs/hdfs_file_reader.h"
+#include "io/fs/err_utils.h"
#include "io/fs/hdfs_file_system.h"
#include "service/backend_options.h"
#include "util/doris_metrics.h"
+
namespace doris {
namespace io {
HdfsFileReader::HdfsFileReader(Path path, size_t file_size, const std::string&
name_node,
@@ -66,7 +68,7 @@ Status HdfsFileReader::read_at_impl(size_t offset, Slice
result, size_t* bytes_r
int res = hdfsSeek(handle->hdfs_fs, _hdfs_file, offset);
if (res != 0) {
return Status::InternalError("Seek to offset failed. (BE: {})
offset={}, err: {}",
- BackendOptions::get_localhost(), offset,
hdfsGetLastError());
+ BackendOptions::get_localhost(), offset,
hdfs_error());
}
size_t bytes_req = result.size;
@@ -84,8 +86,7 @@ Status HdfsFileReader::read_at_impl(size_t offset, Slice
result, size_t* bytes_r
if (loop_read < 0) {
return Status::InternalError(
"Read hdfs file failed. (BE: {}) namenode:{}, path:{},
err: {}",
- BackendOptions::get_localhost(), _name_node,
_path.string(),
- hdfsGetLastError());
+ BackendOptions::get_localhost(), _name_node,
_path.string(), hdfs_error());
}
if (loop_read == 0) {
break;
diff --git a/be/src/io/fs/hdfs_file_system.cpp
b/be/src/io/fs/hdfs_file_system.cpp
index 3eb5bea4c4..0d33bd30c9 100644
--- a/be/src/io/fs/hdfs_file_system.cpp
+++ b/be/src/io/fs/hdfs_file_system.cpp
@@ -68,6 +68,13 @@ private:
Status HdfsFileSystem::create(const THdfsParams& hdfs_params, const
std::string& path,
std::shared_ptr<HdfsFileSystem>* fs) {
+#ifdef USE_HADOOP_HDFS
+ if (!config::enable_java_support) {
+ return Status::InternalError(
+ "hdfs file system is not enabled, you can change be config
enable_java_support to "
+ "true.");
+ }
+#endif
(*fs).reset(new HdfsFileSystem(hdfs_params, path));
return (*fs)->connect();
}
diff --git a/be/src/io/fs/hdfs_file_system.h b/be/src/io/fs/hdfs_file_system.h
index 75663bf198..9cec56b86c 100644
--- a/be/src/io/fs/hdfs_file_system.h
+++ b/be/src/io/fs/hdfs_file_system.h
@@ -18,13 +18,13 @@
#pragma once
#include <gen_cpp/PlanNodes_types.h>
-#include <hdfs/hdfs.h>
#include <atomic>
+#include "io/fs/hdfs.h"
#include "io/fs/remote_file_system.h"
-namespace doris {
+namespace doris {
namespace io {
class HdfsFileSystemHandle {
diff --git a/be/src/io/hdfs_builder.cpp b/be/src/io/hdfs_builder.cpp
index b08b973860..8647a7450a 100644
--- a/be/src/io/hdfs_builder.cpp
+++ b/be/src/io/hdfs_builder.cpp
@@ -26,6 +26,7 @@
#include "util/string_util.h"
#include "util/uid_util.h"
#include "util/url_coding.h"
+
namespace doris {
Status HDFSCommonBuilder::init_hdfs_builder() {
@@ -35,6 +36,7 @@ Status HDFSCommonBuilder::init_hdfs_builder() {
return Status::InternalError(
"failed to init HDFSCommonBuilder, please check check
be/conf/hdfs-site.xml");
}
+ hdfsBuilderSetForceNewInstance(hdfs_builder);
return Status::OK();
}
@@ -53,7 +55,10 @@ Status HDFSCommonBuilder::run_kinit() {
if (!rc) {
return Status::InternalError("Kinit failed, errMsg: " + msg);
}
+#ifdef USE_LIBHDFS3
+ hdfsBuilderSetPrincipal(hdfs_builder, hdfs_kerberos_principal.c_str());
hdfsBuilderSetKerbTicketCachePath(hdfs_builder, ticket_path.c_str());
+#endif
return Status::OK();
}
@@ -100,7 +105,6 @@ Status createHDFSBuilder(const THdfsParams& hdfsParams,
HDFSCommonBuilder* build
if (hdfsParams.__isset.hdfs_kerberos_principal) {
builder->need_kinit = true;
builder->hdfs_kerberos_principal = hdfsParams.hdfs_kerberos_principal;
- hdfsBuilderSetPrincipal(builder->get(),
hdfsParams.hdfs_kerberos_principal.c_str());
}
if (hdfsParams.__isset.hdfs_kerberos_keytab) {
builder->need_kinit = true;
diff --git a/be/src/io/hdfs_builder.h b/be/src/io/hdfs_builder.h
index ecc08d5a71..7d448cb1cb 100644
--- a/be/src/io/hdfs_builder.h
+++ b/be/src/io/hdfs_builder.h
@@ -17,10 +17,9 @@
#pragma once
-#include <hdfs/hdfs.h>
-
#include "common/status.h"
#include "gen_cpp/PlanNodes_types.h"
+#include "io/fs/hdfs.h"
namespace doris {
@@ -38,9 +37,12 @@ class HDFSCommonBuilder {
public:
HDFSCommonBuilder() {}
~HDFSCommonBuilder() {
+#ifdef USE_LIBHDFS3
+ // for hadoop hdfs, the hdfs_builder will be freed in hdfsConnect
if (hdfs_builder != nullptr) {
hdfsFreeBuilder(hdfs_builder);
}
+#endif
}
// Must call this to init hdfs_builder first.
@@ -51,7 +53,7 @@ public:
Status run_kinit();
private:
- hdfsBuilder* hdfs_builder;
+ hdfsBuilder* hdfs_builder = nullptr;
bool need_kinit {false};
std::string hdfs_kerberos_keytab;
std::string hdfs_kerberos_principal;
diff --git a/be/src/tools/meta_tool.cpp b/be/src/tools/meta_tool.cpp
index 02dffcd9c8..452402852f 100644
--- a/be/src/tools/meta_tool.cpp
+++ b/be/src/tools/meta_tool.cpp
@@ -142,7 +142,7 @@ void delete_meta(DataDir* data_dir) {
Status init_data_dir(const std::string& dir, std::unique_ptr<DataDir>* ret) {
std::string root_path;
- RETURN_IF_ERROR(io::global_local_filesystem()->canonicalize(dir,
&root_path));
+ RETURN_IF_ERROR(doris::io::global_local_filesystem()->canonicalize(dir,
&root_path));
doris::StorePath path;
auto res = parse_root_path(root_path, &path);
if (!res.ok()) {
@@ -156,8 +156,8 @@ Status init_data_dir(const std::string& dir,
std::unique_ptr<DataDir>* ret) {
std::cout << "new data dir failed" << std::endl;
return Status::InternalError("new data dir failed");
}
- st = p->init();
- if (!st.ok()) {
+ res = p->init();
+ if (!res.ok()) {
std::cout << "data_dir load failed" << std::endl;
return Status::InternalError("data_dir load failed");
}
@@ -188,7 +188,7 @@ void batch_delete_meta(const std::string& tablet_file) {
}
// 1. get dir
std::string dir;
- Status st = io::global_local_filesystem()->canonicalize(v[0], &dir);
+ Status st = doris::io::global_local_filesystem()->canonicalize(v[0],
&dir);
if (!st.ok()) {
std::cout << "invalid root dir in tablet_file: " << line <<
std::endl;
err_num++;
@@ -295,7 +295,7 @@ Status get_segment_footer(doris::io::FileReader*
file_reader, SegmentFooterPB* f
void show_segment_footer(const std::string& file_name) {
doris::io::FileReaderSPtr file_reader;
- Status st = doris::io::global_local_filesystem()->open_file(file_name,
&file_reader);
+ Status status = doris::io::global_local_filesystem()->open_file(file_name,
&file_reader);
if (!status.ok()) {
std::cout << "open file failed: " << status << std::endl;
return;
@@ -327,7 +327,8 @@ int main(int argc, char** argv) {
show_meta();
} else if (FLAGS_operation == "batch_delete_meta") {
std::string tablet_file;
- Status st =
io::global_local_filesystem()->canonicalize(FLAGS_tablet_file, &tablet_file);
+ Status st =
+
doris::io::global_local_filesystem()->canonicalize(FLAGS_tablet_file,
&tablet_file);
if (!st.ok()) {
std::cout << "invalid tablet file: " << FLAGS_tablet_file
<< ", error: " << st.to_string() << std::endl;
diff --git a/be/src/util/hdfs_util.h b/be/src/util/hdfs_util.h
index f98bdd5ab3..2e56181df7 100644
--- a/be/src/util/hdfs_util.h
+++ b/be/src/util/hdfs_util.h
@@ -17,13 +17,12 @@
#pragma once
-#include <hdfs/hdfs.h>
-
#include <map>
#include <memory>
#include <string>
#include "common/status.h"
+#include "io/fs/hdfs.h"
#include "io/fs/path.h"
#include "io/hdfs_builder.h"
diff --git a/be/src/util/jni-util.cpp b/be/src/util/jni-util.cpp
index e7a6eb8b8b..955efb78e7 100644
--- a/be/src/util/jni-util.cpp
+++ b/be/src/util/jni-util.cpp
@@ -25,11 +25,14 @@
#include <filesystem>
#include <mutex>
#include <sstream>
+#include <string>
+#include <vector>
#include "common/config.h"
#include "gutil/strings/substitute.h"
-#include "jni_native_method.h"
-#include "libjvm_loader.h"
+#include "util/defer_op.h"
+#include "util/jni_native_method.h"
+#include "util/libjvm_loader.h"
using std::string;
@@ -37,10 +40,10 @@ namespace doris {
namespace {
JavaVM* g_vm;
-std::once_flag g_vm_once;
+[[maybe_unused]] std::once_flag g_vm_once;
const std::string GetDorisJNIClasspath() {
- const auto* classpath = getenv("DORIS_JNI_CLASSPATH_PARAMETER");
+ const auto* classpath = getenv("DORIS_CLASSPATH");
if (classpath) {
return classpath;
} else {
@@ -66,37 +69,47 @@ const std::string GetDorisJNIClasspath() {
}
}
-void FindOrCreateJavaVM() {
+// Only used on non-x86 platform
+[[maybe_unused]] void FindOrCreateJavaVM() {
int num_vms;
- int rv = LibJVMLoader::JNI_GetCreatedJavaVMs(&g_vm, 1, &num_vms);
+ int rv = JNI_GetCreatedJavaVMs(&g_vm, 1, &num_vms);
if (rv == 0) {
- auto classpath = GetDorisJNIClasspath();
- std::string heap_size = fmt::format("-Xmx{}",
config::jvm_max_heap_size);
- std::string log_path = fmt::format("-DlogPath={}/log/udf-jdbc.log",
getenv("DORIS_HOME"));
- std::string jvm_name = fmt::format("-Dsun.java.command={}", "DorisBE");
-
- JavaVMOption options[] = {
- {const_cast<char*>(classpath.c_str()), nullptr},
- {const_cast<char*>(heap_size.c_str()), nullptr},
- {const_cast<char*>(log_path.c_str()), nullptr},
- {const_cast<char*>(jvm_name.c_str()), nullptr},
+ std::vector<std::string> options;
+
+ char* java_opts = getenv("JAVA_OPTS");
+ if (java_opts == nullptr) {
+ options = {
+ GetDorisJNIClasspath(), fmt::format("-Xmx{}", "1g"),
+ fmt::format("-DlogPath={}/log/jni.log",
getenv("DORIS_HOME")),
+ fmt::format("-Dsun.java.command={}", "DorisBE"),
"-XX:-CriticalJNINatives",
#ifdef __APPLE__
- // On macOS, we should disable MaxFDLimit, otherwise the
RLIMIT_NOFILE
- // will be assigned the minimum of OPEN_MAX (10240) and
rlim_cur (See src/hotspot/os/bsd/os_bsd.cpp)
- // and it can not pass the check performed by storage engine.
- // The newer JDK has fixed this issue.
- {const_cast<char*>("-XX:-MaxFDLimit"), nullptr},
+ // On macOS, we should disable MaxFDLimit, otherwise the
RLIMIT_NOFILE
+ // will be assigned the minimum of OPEN_MAX (10240) and
rlim_cur (See src/hotspot/os/bsd/os_bsd.cpp)
+ // and it can not pass the check performed by storage
engine.
+ // The newer JDK has fixed this issue.
+ "-XX:-MaxFDLimit"
#endif
- };
+ };
+ } else {
+ std::istringstream stream(java_opts);
+ options =
std::vector<std::string>(std::istream_iterator<std::string> {stream},
+
std::istream_iterator<std::string>());
+ options.push_back(GetDorisJNIClasspath());
+ }
+ std::unique_ptr<JavaVMOption[]> jvm_options(new
JavaVMOption[options.size()]);
+ for (int i = 0; i < options.size(); ++i) {
+ jvm_options[i] = {const_cast<char*>(options[i].c_str()), nullptr};
+ }
+
JNIEnv* env;
JavaVMInitArgs vm_args;
vm_args.version = JNI_VERSION_1_8;
- vm_args.options = options;
- vm_args.nOptions = sizeof(options) / sizeof(JavaVMOption);
+ vm_args.options = jvm_options.get();
+ vm_args.nOptions = options.size();
// Set it to JNI_FALSE because JNI_TRUE will let JVM ignore the max
size config.
vm_args.ignoreUnrecognized = JNI_FALSE;
- jint res = LibJVMLoader::JNI_CreateJavaVM(&g_vm, (void**)&env,
&vm_args);
+ jint res = JNI_CreateJavaVM(&g_vm, (void**)&env, &vm_args);
if (JNI_OK != res) {
DCHECK(false) << "Failed to create JVM, code= " << res;
}
@@ -152,6 +165,7 @@ Status JniLocalFrame::push(JNIEnv* env, int max_local_ref) {
Status JniUtil::GetJNIEnvSlowPath(JNIEnv** env) {
DCHECK(!tls_env_) << "Call GetJNIEnv() fast path";
+#ifdef USE_LIBHDFS3
std::call_once(g_vm_once, FindOrCreateJavaVM);
int rc = g_vm->GetEnv(reinterpret_cast<void**>(&tls_env_),
JNI_VERSION_1_8);
if (rc == JNI_EDETACHED) {
@@ -160,6 +174,10 @@ Status JniUtil::GetJNIEnvSlowPath(JNIEnv** env) {
if (rc != 0 || tls_env_ == nullptr) {
return Status::InternalError("Unable to get JVM: {}", rc);
}
+#else
+ // the hadoop libhdfs will do all the stuff
+ tls_env_ = getJNIEnv();
+#endif
*env = tls_env_;
return Status::OK();
}
diff --git a/be/src/util/jni-util.h b/be/src/util/jni-util.h
index 5aa8be9a1f..ec5f6abf6e 100644
--- a/be/src/util/jni-util.h
+++ b/be/src/util/jni-util.h
@@ -23,6 +23,11 @@
#include "gutil/macros.h"
#include "util/thrift_util.h"
+#ifdef USE_HADOOP_HDFS
+// defined in hadoop_hdfs/hdfs.h
+extern "C" JNIEnv* getJNIEnv(void);
+#endif
+
namespace doris {
#define RETURN_ERROR_IF_EXC(env) \
diff --git a/be/src/util/libjvm_loader.cpp b/be/src/util/libjvm_loader.cpp
index 127d28c2de..6175da6081 100644
--- a/be/src/util/libjvm_loader.cpp
+++ b/be/src/util/libjvm_loader.cpp
@@ -25,6 +25,15 @@
#include "common/status.h"
+_JNI_IMPORT_OR_EXPORT_ jint JNICALL JNI_GetCreatedJavaVMs(JavaVM** vm_buf,
jsize bufLen,
+ jsize* numVMs) {
+ return doris::LibJVMLoader::JNI_GetCreatedJavaVMs(vm_buf, bufLen, numVMs);
+}
+
+_JNI_IMPORT_OR_EXPORT_ jint JNICALL JNI_CreateJavaVM(JavaVM** pvm, void**
penv, void* args) {
+ return doris::LibJVMLoader::JNI_CreateJavaVM(pvm, penv, args);
+}
+
namespace {
#ifndef __APPLE__
diff --git a/be/src/vec/exec/format/parquet/bool_rle_decoder.cpp
b/be/src/vec/exec/format/parquet/bool_rle_decoder.cpp
index 0856687bbf..c954f98b25 100644
--- a/be/src/vec/exec/format/parquet/bool_rle_decoder.cpp
+++ b/be/src/vec/exec/format/parquet/bool_rle_decoder.cpp
@@ -24,6 +24,7 @@ void BoolRLEDecoder::set_data(Slice* slice) {
_data = slice;
_num_bytes = slice->size;
_offset = 0;
+
if (_num_bytes < 4) {
LOG(FATAL) << "Received invalid length : " +
std::to_string(_num_bytes) +
" (corrupt data page?)";
@@ -62,6 +63,8 @@ Status BoolRLEDecoder::decode_values(MutableColumnPtr&
doris_column, DataTypePtr
case ColumnSelectVector::CONTENT: {
bool value; // Can't use uint8_t directly, we should correct it.
for (size_t i = 0; i < run_length; ++i) {
+ DCHECK(_current_value_idx < max_values)
+ << _current_value_idx << " vs. " << max_values;
value = _values[_current_value_idx++];
column_data[data_index++] = (UInt8)value;
}
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp
b/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp
index b74d9c3db0..b08e316c22 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp
@@ -233,7 +233,8 @@ void ColumnChunkReader::_reserve_decompress_buf(size_t
size) {
Status ColumnChunkReader::skip_values(size_t num_values, bool skip_data) {
if (UNLIKELY(_remaining_num_values < num_values)) {
- return Status::IOError("Skip too many values in current page");
+ return Status::IOError("Skip too many values in current page. {} vs.
{}",
+ _remaining_num_values, num_values);
}
_remaining_num_values -= num_values;
if (skip_data) {
diff --git a/bin/start_be.sh b/bin/start_be.sh
index 7204d65114..bbaea90c02 100755
--- a/bin/start_be.sh
+++ b/bin/start_be.sh
@@ -20,6 +20,7 @@ set -eo pipefail
curdir="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
+MACHINE_OS=$(uname -s)
if [[ "$(uname -s)" == 'Darwin' ]] && command -v brew &>/dev/null; then
PATH="$(brew --prefix)/opt/gnu-getopt/bin:${PATH}"
export PATH
@@ -70,16 +71,36 @@ if [[ "$(uname -s)" != 'Darwin' ]]; then
fi
fi
-# add libs to CLASSPATH
+# add java libs
for f in "${DORIS_HOME}/lib"/*.jar; do
- if [[ -z "${DORIS_JNI_CLASSPATH_PARAMETER}" ]]; then
- export DORIS_JNI_CLASSPATH_PARAMETER="${f}"
+ if [[ -z "${DORIS_CLASSPATH}" ]]; then
+ export DORIS_CLASSPATH="${f}"
else
- export
DORIS_JNI_CLASSPATH_PARAMETER="${f}:${DORIS_JNI_CLASSPATH_PARAMETER}"
+ export DORIS_CLASSPATH="${f}:${DORIS_CLASSPATH}"
fi
done
-# DORIS_JNI_CLASSPATH_PARAMETER is used to configure additional jar path to
jvm. e.g. -Djava.class.path=$DORIS_HOME/lib/java-udf.jar
-export
DORIS_JNI_CLASSPATH_PARAMETER="-Djava.class.path=${DORIS_JNI_CLASSPATH_PARAMETER}"
+
+if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then
+ # add hadoop libs
+ for f in "${DORIS_HOME}/lib/hadoop_hdfs/common"/*.jar; do
+ DORIS_CLASSPATH="${f}:${DORIS_CLASSPATH}"
+ done
+ for f in "${DORIS_HOME}/lib/hadoop_hdfs/common/lib"/*.jar; do
+ DORIS_CLASSPATH="${f}:${DORIS_CLASSPATH}"
+ done
+ for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs"/*.jar; do
+ DORIS_CLASSPATH="${f}:${DORIS_CLASSPATH}"
+ done
+ for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs/lib"/*.jar; do
+ DORIS_CLASSPATH="${f}:${DORIS_CLASSPATH}"
+ done
+fi
+
+# the CLASSPATH and LIBHDFS_OPTS is used for hadoop libhdfs
+# and conf/ dir so that hadoop libhdfs can read .xml config file in conf/
+export CLASSPATH="${DORIS_HOME}/conf/:${DORIS_CLASSPATH}"
+# DORIS_CLASSPATH is for self-managed jni
+export DORIS_CLASSPATH="-Djava.class.path=${DORIS_CLASSPATH}"
jdk_version() {
local java_cmd="${1}"
@@ -230,11 +251,28 @@ set_tcmalloc_heap_limit() {
# set_tcmalloc_heap_limit || exit 1
-## set hdfs conf
+## set hdfs3 conf
if [[ -f "${DORIS_HOME}/conf/hdfs-site.xml" ]]; then
export LIBHDFS3_CONF="${DORIS_HOME}/conf/hdfs-site.xml"
fi
+if [[ -z ${JAVA_OPTS} ]]; then
+ # set default JAVA_OPTS
+ CUR_DATE=$(date +%Y%m%d-%H%M%S)
+ JAVA_OPTS="-Xmx1024m -DlogPath=${DORIS_HOME}/log/jni.log
-Xloggc:${DORIS_HOME}/log/be.gc.log.${CUR_DATE} -Dsun.java.command=DorisBE
-XX:-CriticalJNINatives"
+fi
+
+if [[ "${MACHINE_OS}" == "Darwin" ]]; then
+ JAVA_OPTS="${JAVA_OPTS} -XX:-MaxFDLimit"
+fi
+
+# set LIBHDFS_OPTS for hadoop libhdfs
+export LIBHDFS_OPTS="${JAVA_OPTS}"
+
+#echo "CLASSPATH: ${CLASSPATH}"
+#echo "LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}"
+#echo "LIBHDFS_OPTS: ${LIBHDFS_OPTS}"
+
# see
https://github.com/apache/doris/blob/master/docs/zh-CN/community/developer-guide/debug-tool.md#jemalloc-heap-profile
export
JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:30000,dirty_decay_ms:30000,oversize_threshold:0,lg_tcache_max:16,prof_prefix:jeprof.out"
diff --git a/build.sh b/build.sh
index 8c5f18fa9a..e89d71ca72 100755
--- a/build.sh
+++ b/build.sh
@@ -547,6 +547,10 @@ if [[ "${OUTPUT_BE_BINARY}" -eq 1 ]]; then
cp -r -p "${DORIS_HOME}/be/output/bin"/* "${DORIS_OUTPUT}/be/bin"/
cp -r -p "${DORIS_HOME}/be/output/conf"/* "${DORIS_OUTPUT}/be/conf"/
+ if [[ -d "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" ]]; then
+ cp -r -p "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/"
"${DORIS_OUTPUT}/be/lib/"
+ fi
+
if [[ "${DISABLE_JAVA_UDF_IN_CONF}" -eq 1 ]]; then
echo -e "\033[33;1mWARNNING: \033[37;1mDisable Java UDF support in
be.conf due to the BE was built without Java UDF.\033[0m"
cat >>"${DORIS_OUTPUT}/be/conf/be.conf" <<EOF
diff --git a/conf/be.conf b/conf/be.conf
index 30eee9e088..cc1b8f6c59 100644
--- a/conf/be.conf
+++ b/conf/be.conf
@@ -17,6 +17,9 @@
PPROF_TMPDIR="$DORIS_HOME/log/"
+DATE = `date +%Y%m%d-%H%M%S`
+JAVA_OPTS="-Xmx1024m -DlogPath=$DORIS_HOME/log/jni.log
-Xloggc:$DORIS_HOME/log/be.gc.log.$CUR_DATE -Dsun.java.command=DorisBE
-XX:-CriticalJNINatives"
+
# since 1.2, the JAVA_HOME need to be set to run BE process.
# JAVA_HOME=/path/to/jdk/
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java
b/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java
index 1f5aa9bcd8..67b9a16d8e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java
@@ -271,7 +271,7 @@ public class BeLoadRebalancer extends Rebalancer {
if (lowBackend == null) {
continue;
}
- if (hosts.contains(lowBackend.getIp())) {
+ if (!Config.allow_replica_on_same_host &&
hosts.contains(lowBackend.getIp())) {
continue;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]