This is an automated email from the ASF dual-hosted git repository.
philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 832b91c2f [VL] Link lib gluten to arrow's static libraries (#6231)
832b91c2f is described below
commit 832b91c2fb3546efb45a8da3d953c35be01fa3d5
Author: PHILO-HE <[email protected]>
AuthorDate: Tue Jul 2 14:11:30 2024 +0800
[VL] Link lib gluten to arrow's static libraries (#6231)
---
.../backendsapi/velox/VeloxListenerApi.scala | 20 +----------
cpp/CMake/ConfigArrow.cmake | 41 +++++++++-------------
cpp/core/CMakeLists.txt | 4 ++-
cpp/velox/CMakeLists.txt | 5 ---
dev/build_arrow.sh | 17 ++++-----
dev/builddeps-veloxbe.sh | 5 ++-
ep/build-velox/src/modify_velox.patch | 23 +++++++++---
7 files changed, 48 insertions(+), 67 deletions(-)
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
index 81f06478c..e1abbdd7c 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
@@ -134,28 +134,10 @@ class VeloxListenerApi extends ListenerApi {
) {
loadLibFromJar(loader, conf)
}
- loader
- .newTransaction()
- .loadAndCreateLink(s"libarrow.so.$ARROW_VERSION.0.0",
s"libarrow.so.$ARROW_VERSION", false)
- .loadAndCreateLink(
- s"libparquet.so.$ARROW_VERSION.0.0",
- s"libparquet.so.$ARROW_VERSION",
- false)
- .commit()
}
private def loadLibWithMacOS(loader: JniLibLoader): Unit = {
- loader
- .newTransaction()
- .loadAndCreateLink(
- s"libarrow.$ARROW_VERSION.0.0.dylib",
- s"libarrow.$ARROW_VERSION.dylib",
- false)
- .loadAndCreateLink(
- s"libparquet.$ARROW_VERSION.0.0.dylib",
- s"libparquet.$ARROW_VERSION.dylib",
- false)
- .commit()
+ // Placeholder for loading shared libs on MacOS if user needs.
}
private def initialize(conf: SparkConf, isDriver: Boolean): Unit = {
diff --git a/cpp/CMake/ConfigArrow.cmake b/cpp/CMake/ConfigArrow.cmake
index 110836347..e27a3414d 100644
--- a/cpp/CMake/ConfigArrow.cmake
+++ b/cpp/CMake/ConfigArrow.cmake
@@ -15,24 +15,22 @@
# specific language governing permissions and limitations
# under the License.
-if(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
- set(ARROW_SHARED_LIBRARY_SUFFIX ".1500.dylib")
- set(ARROW_SHARED_LIBRARY_PARENT_SUFFIX ".1500.1.0.dylib")
-else()
- set(ARROW_SHARED_LIBRARY_SUFFIX ".so.1500")
- set(ARROW_SHARED_LIBRARY_PARENT_SUFFIX ".so.1500.1.0")
-endif()
+set(ARROW_STATIC_LIBRARY_SUFFIX ".a")
set(ARROW_LIB_NAME "arrow")
set(PARQUET_LIB_NAME "parquet")
-set(ARROW_DATASET_LIB_NAME "arrow_dataset")
-set(ARROW_SUBSTRAIT_LIB_NAME "arrow_substrait")
+set(ARROW_BUNDLED_DEPS "arrow_bundled_dependencies")
+
+set(ARROW_INSTALL_DIR "${ARROW_HOME}/install")
+set(ARROW_LIB_DIR "${ARROW_INSTALL_DIR}/lib")
+set(ARROW_LIB64_DIR "${ARROW_INSTALL_DIR}/lib64")
+set(ARROW_INCLUDE_DIR "${ARROW_INSTALL_DIR}/include")
function(FIND_ARROW_LIB LIB_NAME)
if(NOT TARGET Arrow::${LIB_NAME})
set(ARROW_LIB_FULL_NAME
-
${CMAKE_SHARED_LIBRARY_PREFIX}${LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX})
- add_library(Arrow::${LIB_NAME} SHARED IMPORTED)
+
${CMAKE_SHARED_LIBRARY_PREFIX}${LIB_NAME}${ARROW_STATIC_LIBRARY_SUFFIX})
+ add_library(Arrow::${LIB_NAME} STATIC IMPORTED)
# Firstly find the lib from velox's arrow build path. If not found, try to
# find it from system.
find_library(
@@ -41,22 +39,17 @@ function(FIND_ARROW_LIB LIB_NAME)
PATHS ${ARROW_LIB_DIR} ${ARROW_LIB64_DIR})
if(NOT ARROW_LIB_${LIB_NAME})
message(FATAL_ERROR "Arrow library Not Found: ${ARROW_LIB_FULL_NAME}")
+ endif()
+ message(STATUS "Found Arrow library: ${ARROW_LIB_${LIB_NAME}}")
+ if(LIB_NAME STREQUAL ${ARROW_BUNDLED_DEPS})
+ set_target_properties(
+ Arrow::${LIB_NAME} PROPERTIES IMPORTED_LOCATION
+ ${ARROW_LIB_${LIB_NAME}})
else()
- message(STATUS "Found Arrow library: ${ARROW_LIB_${LIB_NAME}}")
set_target_properties(
Arrow::${LIB_NAME}
- PROPERTIES IMPORTED_LOCATION "${ARROW_LIB_${LIB_NAME}}"
- INTERFACE_INCLUDE_DIRECTORIES
- "${ARROW_HOME}/install/include")
+ PROPERTIES IMPORTED_LOCATION ${ARROW_LIB_${LIB_NAME}}
+ INTERFACE_INCLUDE_DIRECTORIES ${ARROW_HOME}/install/include)
endif()
- file(
- COPY ${ARROW_LIB_${LIB_NAME}}
- DESTINATION ${root_directory}/releases/
- FOLLOW_SYMLINK_CHAIN)
endif()
endfunction()
-
-set(ARROW_INSTALL_DIR "${ARROW_HOME}/install")
-set(ARROW_LIB_DIR "${ARROW_INSTALL_DIR}/lib")
-set(ARROW_LIB64_DIR "${ARROW_INSTALL_DIR}/lib64")
-set(ARROW_INCLUDE_DIR "${ARROW_INSTALL_DIR}/include")
diff --git a/cpp/core/CMakeLists.txt b/cpp/core/CMakeLists.txt
index e17d13581..cc5b6c7e9 100644
--- a/cpp/core/CMakeLists.txt
+++ b/cpp/core/CMakeLists.txt
@@ -238,6 +238,7 @@ endif()
find_arrow_lib(${ARROW_LIB_NAME})
find_arrow_lib(${PARQUET_LIB_NAME})
+find_arrow_lib(${ARROW_BUNDLED_DEPS})
if(ENABLE_HBM)
include(BuildMemkind)
@@ -314,7 +315,8 @@ else()
set(LIBHDFS3_DESTINATION ${CMAKE_INSTALL_LIBDIR})
endif()
-target_link_libraries(gluten PUBLIC Arrow::arrow Arrow::parquet)
+target_link_libraries(gluten PUBLIC Arrow::parquet Arrow::arrow
+ Arrow::arrow_bundled_dependencies)
target_link_libraries(gluten PRIVATE google::glog)
install(TARGETS gluten DESTINATION ${CMAKE_INSTALL_LIBDIR})
diff --git a/cpp/velox/CMakeLists.txt b/cpp/velox/CMakeLists.txt
index 716a5f68a..b734669b8 100644
--- a/cpp/velox/CMakeLists.txt
+++ b/cpp/velox/CMakeLists.txt
@@ -590,11 +590,6 @@ endif()
target_link_libraries(velox PUBLIC gluten)
add_velox_dependencies()
-# Arrow libraries appear after Velox dependencies to avoid linker error
-find_arrow_lib(${ARROW_LIB_NAME})
-find_arrow_lib(${PARQUET_LIB_NAME})
-target_link_libraries(velox PUBLIC Arrow::arrow Arrow::parquet)
-
target_link_libraries(velox PUBLIC Folly::folly)
find_re2()
target_link_libraries(velox PUBLIC ${RE2_LIBRARY})
diff --git a/dev/build_arrow.sh b/dev/build_arrow.sh
index a822c4119..897dfcd26 100755
--- a/dev/build_arrow.sh
+++ b/dev/build_arrow.sh
@@ -17,15 +17,13 @@
CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
source ${CURRENT_DIR}/build_helper_functions.sh
VELOX_ARROW_BUILD_VERSION=15.0.0
-ARROW_PREFIX=$CURRENT_DIR/arrow_ep
-# Always uses BUNDLED in case of that thrift is not installed.
-THRIFT_SOURCE="BUNDLED"
+ARROW_PREFIX=$CURRENT_DIR/../ep/_ep/arrow_ep
BUILD_TYPE=Release
function prepare_arrow_build() {
- sudo rm -rf arrow_ep/
+ mkdir -p ${ARROW_PREFIX}/../ && cd ${ARROW_PREFIX}/../ && sudo rm -rf
arrow_ep/
wget_and_untar
https://archive.apache.org/dist/arrow/arrow-${VELOX_ARROW_BUILD_VERSION}/apache-arrow-${VELOX_ARROW_BUILD_VERSION}.tar.gz
arrow_ep
- cd arrow_ep/
+ cd arrow_ep
patch -p1 < $CURRENT_DIR/../ep/build-velox/src/modify_arrow.patch
patch -p1 <
$CURRENT_DIR/../ep/build-velox/src/modify_arrow_dataset_scan_option.patch
}
@@ -38,15 +36,14 @@ function install_arrow_deps {
}
function build_arrow_cpp() {
- if [ -n "$1" ]; then
- BUILD_TYPE=$1
- fi
pushd $ARROW_PREFIX/cpp
cmake_install \
-DARROW_PARQUET=ON \
-DARROW_FILESYSTEM=ON \
-DARROW_PROTOBUF_USE_SHARED=OFF \
+ -DARROW_DEPENDENCY_USE_SHARED=OFF \
+ -DARROW_DEPENDENCY_SOURCE=BUNDLED \
-DARROW_WITH_THRIFT=ON \
-DARROW_WITH_LZ4=ON \
-DARROW_WITH_SNAPPY=ON \
@@ -59,8 +56,8 @@ function build_arrow_cpp() {
-DARROW_TESTING=ON \
-DCMAKE_INSTALL_PREFIX=/usr/local \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
- -DARROW_BUILD_STATIC=ON \
- -DThrift_SOURCE=${THRIFT_SOURCE}
+ -DARROW_BUILD_SHARED=OFF \
+ -DARROW_BUILD_STATIC=ON
popd
}
diff --git a/dev/builddeps-veloxbe.sh b/dev/builddeps-veloxbe.sh
index 4e0882a83..6668d0871 100755
--- a/dev/builddeps-veloxbe.sh
+++ b/dev/builddeps-veloxbe.sh
@@ -192,7 +192,7 @@ function build_arrow {
cd $GLUTEN_DIR/dev
source build_arrow.sh
prepare_arrow_build
- build_arrow_cpp $BUILD_TYPE
+ build_arrow_cpp
echo "Finished building arrow CPP"
build_arrow_java
echo "Finished building arrow Java"
@@ -208,9 +208,8 @@ function build_velox {
--num_threads=$NUM_THREADS
}
-## compile gluten cpp
function build_gluten_cpp {
- echo "Start to Gluten CPP"
+ echo "Start to build Gluten CPP"
cd $GLUTEN_DIR/cpp
rm -rf build
mkdir build
diff --git a/ep/build-velox/src/modify_velox.patch
b/ep/build-velox/src/modify_velox.patch
index cc05d3f91..7e1f19b3c 100644
--- a/ep/build-velox/src/modify_velox.patch
+++ b/ep/build-velox/src/modify_velox.patch
@@ -36,25 +36,38 @@ index d49115f12..1aaa8e532 100644
+ endif()
endif()
diff --git a/CMake/resolve_dependency_modules/arrow/CMakeLists.txt
b/CMake/resolve_dependency_modules/arrow/CMakeLists.txt
-index 3f01df2fd..8c1c493f3 100644
+index 3f01df2fd..a8da374a2 100644
--- a/CMake/resolve_dependency_modules/arrow/CMakeLists.txt
+++ b/CMake/resolve_dependency_modules/arrow/CMakeLists.txt
-@@ -24,6 +24,9 @@ if(VELOX_ENABLE_ARROW)
+@@ -23,7 +23,11 @@ if(VELOX_ENABLE_ARROW)
+
set(ARROW_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep")
set(ARROW_CMAKE_ARGS
- -DARROW_PARQUET=OFF
+- -DARROW_PARQUET=OFF
+ -DARROW_PARQUET=ON
+ -DARROW_FILESYSTEM=ON
+ -DARROW_PROTOBUF_USE_SHARED=OFF
++ -DARROW_DEPENDENCY_USE_SHARED=OFF
++ -DARROW_DEPENDENCY_SOURCE=BUNDLED
-DARROW_WITH_THRIFT=ON
-DARROW_WITH_LZ4=ON
-DARROW_WITH_SNAPPY=ON
-@@ -66,6 +69,8 @@ if(VELOX_ENABLE_ARROW)
+@@ -37,7 +41,7 @@ if(VELOX_ENABLE_ARROW)
+ -DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX}/install
+ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+ -DARROW_BUILD_STATIC=ON
+- -DThrift_SOURCE=${THRIFT_SOURCE})
++ -DARROW_BUILD_SHARED=OFF)
+ set(ARROW_LIBDIR ${ARROW_PREFIX}/install/${CMAKE_INSTALL_LIBDIR})
+
+ add_library(thrift STATIC IMPORTED GLOBAL)
+@@ -66,6 +70,9 @@ if(VELOX_ENABLE_ARROW)
arrow_ep
PREFIX ${ARROW_PREFIX}
URL ${VELOX_ARROW_SOURCE_URL}
+ PATCH_COMMAND patch -p1 < ${CMAKE_CURRENT_SOURCE_DIR}/modify_arrow.patch
-+ COMMAND patch -p1 <
${CMAKE_CURRENT_SOURCE_DIR}/modify_arrow_dataset_scan_option.patch
++ COMMAND patch -p1 <
++ ${CMAKE_CURRENT_SOURCE_DIR}/modify_arrow_dataset_scan_option.patch
URL_HASH ${VELOX_ARROW_BUILD_SHA256_CHECKSUM}
SOURCE_SUBDIR cpp
CMAKE_ARGS ${ARROW_CMAKE_ARGS}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]