This is an automated email from the ASF dual-hosted git repository.
wangdan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pegasus.git
The following commit(s) were added to refs/heads/master by this push:
new 817a18d2a feat(rocksdb): support building RocksDB with the HDFS plugin
in the Pegasus server (#2362)
817a18d2a is described below
commit 817a18d2a80e290d4e2fa4f4ada538a9d232258c
Author: Dan Wang <[email protected]>
AuthorDate: Wed Feb 11 11:14:09 2026 +0800
feat(rocksdb): support building RocksDB with the HDFS plugin in the Pegasus
server (#2362)
https://github.com/apache/incubator-pegasus/issues/2361
Enable building RocksDB with HDFS plugin through the following steps:
1. Introduce
[rocksdb-hdfs-env](https://github.com/riversand963/rocksdb-hdfs-env)
plugin when building RocksDB as a third-party dependency.
2. Configure Java and Hadoop environment variables, as well as the dynamic
library
search paths, to support compiling and linking the plugin.
3. Apply patches to fix issues encountered during compilation.
---
.../rebuild_thirdparty_if_needed/action.yaml | 9 +++--
cmake_modules/BaseFunctions.cmake | 41 +++++++++++++++++++---
run.sh | 13 +++++++
thirdparty/CMakeLists.txt | 5 ++-
thirdparty/fix_rocksdb-plugin-hdfs.patch | 33 +++++++++++++++++
5 files changed, 93 insertions(+), 8 deletions(-)
diff --git a/.github/actions/rebuild_thirdparty_if_needed/action.yaml
b/.github/actions/rebuild_thirdparty_if_needed/action.yaml
index 9b4e7098a..809e2449e 100644
--- a/.github/actions/rebuild_thirdparty_if_needed/action.yaml
+++ b/.github/actions/rebuild_thirdparty_if_needed/action.yaml
@@ -37,15 +37,18 @@ runs:
# Build third-parties and leave some necessary libraries and source.
run: |
rm -f /root/thirdparties-src.zip
+ ../admin_tools/download_hadoop.sh hadoop-bin
+ rm -rf hadoop-bin/share/doc
+ mv hadoop-bin ..
+ # The RocksDB HDFS plugin (rocksdb-hdfs-env) in thirdparty relies on
${HADOOP_HOME}
+ # environment variable to locate the libraries to link against.
+ export HADOOP_HOME="$(dirname "$(pwd)")"/hadoop-bin
mkdir build
cmake -DCMAKE_BUILD_TYPE=Release -DROCKSDB_PORTABLE=1
-DUSE_JEMALLOC=${USE_JEMALLOC} -DENABLE_ASAN=${ENABLE_ASAN} -B build/
cmake --build build/ -j $(nproc)
rm -rf build/Build build/Download/[a-y]* build/Source/[a-g]*
build/Source/[i-q]* build/Source/[s-z]*
find ./ -name '*CMakeFiles*' -type d -exec rm -rf "{}" +
- ../admin_tools/download_hadoop.sh hadoop-bin
../admin_tools/download_zk.sh zookeeper-bin
- rm -rf hadoop-bin/share/doc
rm -rf zookeeper-bin/docs
- mv hadoop-bin ..
mv zookeeper-bin ..
shell: bash
diff --git a/cmake_modules/BaseFunctions.cmake
b/cmake_modules/BaseFunctions.cmake
index 650aea062..4ad5319b3 100644
--- a/cmake_modules/BaseFunctions.cmake
+++ b/cmake_modules/BaseFunctions.cmake
@@ -325,6 +325,42 @@ function(dsn_setup_include_path)#TODO(huangwei5): remove
this
include_directories(${THIRDPARTY_INSTALL_DIR}/include)
endfunction(dsn_setup_include_path)
+function(dsn_setup_java_libs)
+ if (NOT DEFINED ARCH_TYPE)
+ message(FATAL_ERROR "ARCH_TYPE is not defined. Please configure with
-DARCH_TYPE=...")
+ endif()
+
+ if (NOT DEFINED JAVA_HOME)
+ message(FATAL_ERROR "JAVA_HOME is not defined. Please configure with
-DJAVA_HOME=...")
+ endif()
+
+ if (NOT EXISTS "${JAVA_HOME}")
+ message(FATAL_ERROR "JAVA_HOME does not exist: ${JAVA_HOME}")
+ endif()
+
+ message(STATUS "JAVA_HOME = ${JAVA_HOME}")
+
+ if (APPLE)
+ if (NOT EXISTS "${JAVA_HOME}/lib/server/libjvm.dylib"
+ AND NOT EXISTS "${JAVA_HOME}/jre/lib/server/libjvm.dylib")
+ message(FATAL_ERROR "libjvm.dylib not found under JAVA_HOME:
${JAVA_HOME}")
+ endif()
+ else()
+ if (NOT EXISTS "${JAVA_HOME}/lib/server/libjvm.so"
+ AND NOT EXISTS "${JAVA_HOME}/jre/lib/${ARCH_TYPE}/server/libjvm.so")
+ message(FATAL_ERROR "libjvm.so not found under JAVA_HOME: ${JAVA_HOME}")
+ endif()
+ endif()
+
+ # Provide directories to be searched for JVM libraries such as libjvm.so,
libjava.so
+ # and libverify.so.
+ #
+ # Currently these directories are used by the RocksDB HDFS plugin
(rocksdb-hdfs-env)
+ # in thirdparty to be searched while linking against JVM libraries for JNI.
+ link_directories(${JAVA_HOME}/jre/lib/${ARCH_TYPE}/server)
+ link_directories(${JAVA_HOME}/jre/lib/${ARCH_TYPE})
+endfunction(dsn_setup_java_libs)
+
function(dsn_setup_thirdparty_libs)
set(BOOST_ROOT ${THIRDPARTY_INSTALL_DIR})
set(Boost_USE_MULTITHREADED ON)
@@ -360,10 +396,7 @@ function(dsn_setup_thirdparty_libs)
endif()
find_package(RocksDB REQUIRED)
- # libhdfs
- find_package(JNI REQUIRED)
- message (STATUS "JAVA_JVM_LIBRARY=${JAVA_JVM_LIBRARY}")
- link_libraries(${JAVA_JVM_LIBRARY})
+ dsn_setup_java_libs()
find_package(OpenSSL REQUIRED)
include_directories(${OPENSSL_INCLUDE_DIR})
diff --git a/run.sh b/run.sh
index 1c7909577..304e171c6 100755
--- a/run.sh
+++ b/run.sh
@@ -276,6 +276,17 @@ function run_build()
echo "Build start time: `date`"
start_time=`date +%s`
+ case "$(uname)" in
+ Darwin)
+ echo "Currently, macOS does not support
${ROOT}/admin_tools/config_hdfs.sh"
+ ;;
+ *)
+ # The RocksDB HDFS plugin (rocksdb-hdfs-env) in thirdparty relies
on ${HADOOP_HOME}
+ # environment variable to locate the libraries to link against.
+ source "${ROOT}"/admin_tools/config_hdfs.sh
+ ;;
+ esac
+
if [[ ${SKIP_THIRDPARTY} == "YES" ]]; then
echo "Skip building third-parties..."
else
@@ -301,6 +312,8 @@ function run_build()
fi
CMAKE_OPTIONS="${CMAKE_OPTIONS}
+ -DARCH_TYPE=${ARCH_TYPE}
+ -DJAVA_HOME=${JAVA_HOME}
-DENABLE_GCOV=${ENABLE_GCOV}
-DENABLE_GPERF=${ENABLE_GPERF}
-DBoost_NO_BOOST_CMAKE=ON
diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt
index d139262fa..a76d894e3 100644
--- a/thirdparty/CMakeLists.txt
+++ b/thirdparty/CMakeLists.txt
@@ -502,7 +502,7 @@ set(ROCKSDB_OPTIONS
if (NOT APPLE)
set(ROCKSDB_OPTIONS
${ROCKSDB_OPTIONS}
- -DROCKSDB_PLUGINS=encfs)
+ "-DROCKSDB_PLUGINS=encfs hdfs")
endif ()
ExternalProject_Add(rocksdb
URL ${OSS_URL_PREFIX}/rocksdb-v8.5.3.tar.gz
@@ -511,6 +511,9 @@ ExternalProject_Add(rocksdb
PATCH_COMMAND patch -p1 <
${TP_DIR}/fix_rocksdb-cmake-PORTABLE-option.patch
COMMAND rm -rf ${TP_DIR}/build/Source/rocksdb/plugin/encfs
COMMAND git clone -b main --depth=1
https://github.com/pegasus-kv/encfs.git
${TP_DIR}/build/Source/rocksdb/plugin/encfs
+ COMMAND rm -rf ${TP_DIR}/build/Source/rocksdb/plugin/hdfs
+ COMMAND git clone -b master --depth=1
https://github.com/riversand963/rocksdb-hdfs-env.git
${TP_DIR}/build/Source/rocksdb/plugin/hdfs
+ COMMAND cd ${TP_DIR}/build/Source/rocksdb/plugin/hdfs && patch -p1 <
${TP_DIR}/fix_rocksdb-plugin-hdfs.patch
DEPENDS googletest jemalloc lz4 snappy zstd
CMAKE_ARGS ${ROCKSDB_OPTIONS}
DOWNLOAD_EXTRACT_TIMESTAMP true
diff --git a/thirdparty/fix_rocksdb-plugin-hdfs.patch
b/thirdparty/fix_rocksdb-plugin-hdfs.patch
new file mode 100644
index 000000000..a02492fac
--- /dev/null
+++ b/thirdparty/fix_rocksdb-plugin-hdfs.patch
@@ -0,0 +1,33 @@
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index c660747..5fef10e 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -5,6 +5,7 @@ cmake_minimum_required(VERSION 3.4)
+ # Windows is not supported.
+
+ set(hdfs_SOURCES "env_hdfs.cc" "env_hdfs_impl.cc" PARENT_SCOPE)
++set(hdfs_HEADERS "env_hdfs.h" PARENT_SCOPE)
+ set(hdfs_LIBS "hdfs" "dl" "verify" "java" "jvm" PARENT_SCOPE)
+ set(hdfs_INCLUDE_PATHS "$ENV{JAVA_HOME}/include"
"$ENV{JAVA_HOME}/include/linux" "$ENV{HADOOP_HOME}/include" PARENT_SCOPE)
+ set(hdfs_LINK_PATHS "$ENV{JAVA_HOME}/jre/lib/amd64/server"
"$ENV{JAVA_HOME}/jre/lib/amd64" "$ENV{HADOOP_HOME}/lib/native" PARENT_SCOPE)
+diff --git a/env_hdfs_impl.cc b/env_hdfs_impl.cc
+index 01574bc..3927e5d 100644
+--- a/env_hdfs_impl.cc
++++ b/env_hdfs_impl.cc
+@@ -14,6 +14,7 @@
+ #include <iostream>
+ #include <sstream>
+ #include "logging/logging.h"
++#include "port/sys_time.h"
+ #include "rocksdb/status.h"
+ #include "util/string_util.h"
+
+@@ -524,7 +525,7 @@ IOStatus HdfsFileSystem::FileExists(const std::string&
fname,
+ default: // anything else should be an error
+ ROCKS_LOG_FATAL(mylog, "FileExists hdfsExists call failed");
+ return IOStatus::IOError("hdfsExists call failed with error " +
+- ROCKSDB_NAMESPACE::ToString(value) + " on path
" + fname + ".\n");
++ std::to_string(value) + " on path " + fname +
".\n");
+ }
+ }
+
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]