This is an automated email from the ASF dual-hosted git repository.

philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 832b91c2f [VL] Link lib gluten to arrow's static libraries (#6231)
832b91c2f is described below

commit 832b91c2fb3546efb45a8da3d953c35be01fa3d5
Author: PHILO-HE <[email protected]>
AuthorDate: Tue Jul 2 14:11:30 2024 +0800

    [VL] Link lib gluten to arrow's static libraries (#6231)
---
 .../backendsapi/velox/VeloxListenerApi.scala       | 20 +----------
 cpp/CMake/ConfigArrow.cmake                        | 41 +++++++++-------------
 cpp/core/CMakeLists.txt                            |  4 ++-
 cpp/velox/CMakeLists.txt                           |  5 ---
 dev/build_arrow.sh                                 | 17 ++++-----
 dev/builddeps-veloxbe.sh                           |  5 ++-
 ep/build-velox/src/modify_velox.patch              | 23 +++++++++---
 7 files changed, 48 insertions(+), 67 deletions(-)

diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
index 81f06478c..e1abbdd7c 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
@@ -134,28 +134,10 @@ class VeloxListenerApi extends ListenerApi {
     ) {
       loadLibFromJar(loader, conf)
     }
-    loader
-      .newTransaction()
-      .loadAndCreateLink(s"libarrow.so.$ARROW_VERSION.0.0", 
s"libarrow.so.$ARROW_VERSION", false)
-      .loadAndCreateLink(
-        s"libparquet.so.$ARROW_VERSION.0.0",
-        s"libparquet.so.$ARROW_VERSION",
-        false)
-      .commit()
   }
 
   private def loadLibWithMacOS(loader: JniLibLoader): Unit = {
-    loader
-      .newTransaction()
-      .loadAndCreateLink(
-        s"libarrow.$ARROW_VERSION.0.0.dylib",
-        s"libarrow.$ARROW_VERSION.dylib",
-        false)
-      .loadAndCreateLink(
-        s"libparquet.$ARROW_VERSION.0.0.dylib",
-        s"libparquet.$ARROW_VERSION.dylib",
-        false)
-      .commit()
+    // Placeholder for loading shared libs on MacOS if user needs.
   }
 
   private def initialize(conf: SparkConf, isDriver: Boolean): Unit = {
diff --git a/cpp/CMake/ConfigArrow.cmake b/cpp/CMake/ConfigArrow.cmake
index 110836347..e27a3414d 100644
--- a/cpp/CMake/ConfigArrow.cmake
+++ b/cpp/CMake/ConfigArrow.cmake
@@ -15,24 +15,22 @@
 # specific language governing permissions and limitations
 # under the License.
 
-if(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
-  set(ARROW_SHARED_LIBRARY_SUFFIX ".1500.dylib")
-  set(ARROW_SHARED_LIBRARY_PARENT_SUFFIX ".1500.1.0.dylib")
-else()
-  set(ARROW_SHARED_LIBRARY_SUFFIX ".so.1500")
-  set(ARROW_SHARED_LIBRARY_PARENT_SUFFIX ".so.1500.1.0")
-endif()
+set(ARROW_STATIC_LIBRARY_SUFFIX ".a")
 
 set(ARROW_LIB_NAME "arrow")
 set(PARQUET_LIB_NAME "parquet")
-set(ARROW_DATASET_LIB_NAME "arrow_dataset")
-set(ARROW_SUBSTRAIT_LIB_NAME "arrow_substrait")
+set(ARROW_BUNDLED_DEPS "arrow_bundled_dependencies")
+
+set(ARROW_INSTALL_DIR "${ARROW_HOME}/install")
+set(ARROW_LIB_DIR "${ARROW_INSTALL_DIR}/lib")
+set(ARROW_LIB64_DIR "${ARROW_INSTALL_DIR}/lib64")
+set(ARROW_INCLUDE_DIR "${ARROW_INSTALL_DIR}/include")
 
 function(FIND_ARROW_LIB LIB_NAME)
   if(NOT TARGET Arrow::${LIB_NAME})
     set(ARROW_LIB_FULL_NAME
-        
${CMAKE_SHARED_LIBRARY_PREFIX}${LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX})
-    add_library(Arrow::${LIB_NAME} SHARED IMPORTED)
+        
${CMAKE_SHARED_LIBRARY_PREFIX}${LIB_NAME}${ARROW_STATIC_LIBRARY_SUFFIX})
+    add_library(Arrow::${LIB_NAME} STATIC IMPORTED)
     # Firstly find the lib from velox's arrow build path. If not found, try to
     # find it from system.
     find_library(
@@ -41,22 +39,17 @@ function(FIND_ARROW_LIB LIB_NAME)
       PATHS ${ARROW_LIB_DIR} ${ARROW_LIB64_DIR})
     if(NOT ARROW_LIB_${LIB_NAME})
       message(FATAL_ERROR "Arrow library Not Found: ${ARROW_LIB_FULL_NAME}")
+    endif()
+    message(STATUS "Found Arrow library: ${ARROW_LIB_${LIB_NAME}}")
+    if(LIB_NAME STREQUAL ${ARROW_BUNDLED_DEPS})
+      set_target_properties(
+        Arrow::${LIB_NAME} PROPERTIES IMPORTED_LOCATION
+                                      ${ARROW_LIB_${LIB_NAME}})
     else()
-      message(STATUS "Found Arrow library: ${ARROW_LIB_${LIB_NAME}}")
       set_target_properties(
         Arrow::${LIB_NAME}
-        PROPERTIES IMPORTED_LOCATION "${ARROW_LIB_${LIB_NAME}}"
-                   INTERFACE_INCLUDE_DIRECTORIES
-                   "${ARROW_HOME}/install/include")
+        PROPERTIES IMPORTED_LOCATION ${ARROW_LIB_${LIB_NAME}}
+                   INTERFACE_INCLUDE_DIRECTORIES ${ARROW_HOME}/install/include)
     endif()
-    file(
-      COPY ${ARROW_LIB_${LIB_NAME}}
-      DESTINATION ${root_directory}/releases/
-      FOLLOW_SYMLINK_CHAIN)
   endif()
 endfunction()
-
-set(ARROW_INSTALL_DIR "${ARROW_HOME}/install")
-set(ARROW_LIB_DIR "${ARROW_INSTALL_DIR}/lib")
-set(ARROW_LIB64_DIR "${ARROW_INSTALL_DIR}/lib64")
-set(ARROW_INCLUDE_DIR "${ARROW_INSTALL_DIR}/include")
diff --git a/cpp/core/CMakeLists.txt b/cpp/core/CMakeLists.txt
index e17d13581..cc5b6c7e9 100644
--- a/cpp/core/CMakeLists.txt
+++ b/cpp/core/CMakeLists.txt
@@ -238,6 +238,7 @@ endif()
 
 find_arrow_lib(${ARROW_LIB_NAME})
 find_arrow_lib(${PARQUET_LIB_NAME})
+find_arrow_lib(${ARROW_BUNDLED_DEPS})
 
 if(ENABLE_HBM)
   include(BuildMemkind)
@@ -314,7 +315,8 @@ else()
   set(LIBHDFS3_DESTINATION ${CMAKE_INSTALL_LIBDIR})
 endif()
 
-target_link_libraries(gluten PUBLIC Arrow::arrow Arrow::parquet)
+target_link_libraries(gluten PUBLIC Arrow::parquet Arrow::arrow
+                                    Arrow::arrow_bundled_dependencies)
 target_link_libraries(gluten PRIVATE google::glog)
 
 install(TARGETS gluten DESTINATION ${CMAKE_INSTALL_LIBDIR})
diff --git a/cpp/velox/CMakeLists.txt b/cpp/velox/CMakeLists.txt
index 716a5f68a..b734669b8 100644
--- a/cpp/velox/CMakeLists.txt
+++ b/cpp/velox/CMakeLists.txt
@@ -590,11 +590,6 @@ endif()
 target_link_libraries(velox PUBLIC gluten)
 add_velox_dependencies()
 
-# Arrow libraries appear after Velox dependencies to avoid linker error
-find_arrow_lib(${ARROW_LIB_NAME})
-find_arrow_lib(${PARQUET_LIB_NAME})
-target_link_libraries(velox PUBLIC Arrow::arrow Arrow::parquet)
-
 target_link_libraries(velox PUBLIC Folly::folly)
 find_re2()
 target_link_libraries(velox PUBLIC ${RE2_LIBRARY})
diff --git a/dev/build_arrow.sh b/dev/build_arrow.sh
index a822c4119..897dfcd26 100755
--- a/dev/build_arrow.sh
+++ b/dev/build_arrow.sh
@@ -17,15 +17,13 @@
 CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
 source ${CURRENT_DIR}/build_helper_functions.sh
 VELOX_ARROW_BUILD_VERSION=15.0.0
-ARROW_PREFIX=$CURRENT_DIR/arrow_ep
-# Always uses BUNDLED in case of that thrift is not installed.
-THRIFT_SOURCE="BUNDLED"
+ARROW_PREFIX=$CURRENT_DIR/../ep/_ep/arrow_ep
 BUILD_TYPE=Release
 
 function prepare_arrow_build() {
-  sudo rm -rf arrow_ep/
+  mkdir -p ${ARROW_PREFIX}/../ && cd ${ARROW_PREFIX}/../ && sudo rm -rf 
arrow_ep/
   wget_and_untar 
https://archive.apache.org/dist/arrow/arrow-${VELOX_ARROW_BUILD_VERSION}/apache-arrow-${VELOX_ARROW_BUILD_VERSION}.tar.gz
 arrow_ep
-  cd arrow_ep/
+  cd arrow_ep
   patch -p1 < $CURRENT_DIR/../ep/build-velox/src/modify_arrow.patch
   patch -p1 < 
$CURRENT_DIR/../ep/build-velox/src/modify_arrow_dataset_scan_option.patch
 }
@@ -38,15 +36,14 @@ function install_arrow_deps {
 }
 
 function build_arrow_cpp() {
- if [ -n "$1" ]; then
-   BUILD_TYPE=$1
- fi
  pushd $ARROW_PREFIX/cpp
 
  cmake_install \
        -DARROW_PARQUET=ON \
        -DARROW_FILESYSTEM=ON \
        -DARROW_PROTOBUF_USE_SHARED=OFF \
+       -DARROW_DEPENDENCY_USE_SHARED=OFF \
+       -DARROW_DEPENDENCY_SOURCE=BUNDLED \
        -DARROW_WITH_THRIFT=ON \
        -DARROW_WITH_LZ4=ON \
        -DARROW_WITH_SNAPPY=ON \
@@ -59,8 +56,8 @@ function build_arrow_cpp() {
        -DARROW_TESTING=ON \
        -DCMAKE_INSTALL_PREFIX=/usr/local \
        -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-       -DARROW_BUILD_STATIC=ON \
-       -DThrift_SOURCE=${THRIFT_SOURCE}
+       -DARROW_BUILD_SHARED=OFF \
+       -DARROW_BUILD_STATIC=ON
  popd
 }
 
diff --git a/dev/builddeps-veloxbe.sh b/dev/builddeps-veloxbe.sh
index 4e0882a83..6668d0871 100755
--- a/dev/builddeps-veloxbe.sh
+++ b/dev/builddeps-veloxbe.sh
@@ -192,7 +192,7 @@ function build_arrow {
   cd $GLUTEN_DIR/dev
   source build_arrow.sh
   prepare_arrow_build
-  build_arrow_cpp $BUILD_TYPE
+  build_arrow_cpp
   echo "Finished building arrow CPP"
   build_arrow_java
   echo "Finished building arrow Java"
@@ -208,9 +208,8 @@ function build_velox {
                    --num_threads=$NUM_THREADS
 }
 
-## compile gluten cpp
 function build_gluten_cpp {
-  echo "Start to Gluten CPP"
+  echo "Start to build Gluten CPP"
   cd $GLUTEN_DIR/cpp
   rm -rf build
   mkdir build
diff --git a/ep/build-velox/src/modify_velox.patch 
b/ep/build-velox/src/modify_velox.patch
index cc05d3f91..7e1f19b3c 100644
--- a/ep/build-velox/src/modify_velox.patch
+++ b/ep/build-velox/src/modify_velox.patch
@@ -36,25 +36,38 @@ index d49115f12..1aaa8e532 100644
 +  endif()
  endif()
 diff --git a/CMake/resolve_dependency_modules/arrow/CMakeLists.txt 
b/CMake/resolve_dependency_modules/arrow/CMakeLists.txt
-index 3f01df2fd..8c1c493f3 100644
+index 3f01df2fd..a8da374a2 100644
 --- a/CMake/resolve_dependency_modules/arrow/CMakeLists.txt
 +++ b/CMake/resolve_dependency_modules/arrow/CMakeLists.txt
-@@ -24,6 +24,9 @@ if(VELOX_ENABLE_ARROW)
+@@ -23,7 +23,11 @@ if(VELOX_ENABLE_ARROW)
+
    set(ARROW_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep")
    set(ARROW_CMAKE_ARGS
-       -DARROW_PARQUET=OFF
+-      -DARROW_PARQUET=OFF
 +      -DARROW_PARQUET=ON
 +      -DARROW_FILESYSTEM=ON
 +      -DARROW_PROTOBUF_USE_SHARED=OFF
++      -DARROW_DEPENDENCY_USE_SHARED=OFF
++      -DARROW_DEPENDENCY_SOURCE=BUNDLED
        -DARROW_WITH_THRIFT=ON
        -DARROW_WITH_LZ4=ON
        -DARROW_WITH_SNAPPY=ON
-@@ -66,6 +69,8 @@ if(VELOX_ENABLE_ARROW)
+@@ -37,7 +41,7 @@ if(VELOX_ENABLE_ARROW)
+       -DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX}/install
+       -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+       -DARROW_BUILD_STATIC=ON
+-      -DThrift_SOURCE=${THRIFT_SOURCE})
++      -DARROW_BUILD_SHARED=OFF)
+   set(ARROW_LIBDIR ${ARROW_PREFIX}/install/${CMAKE_INSTALL_LIBDIR})
+
+   add_library(thrift STATIC IMPORTED GLOBAL)
+@@ -66,6 +70,9 @@ if(VELOX_ENABLE_ARROW)
      arrow_ep
      PREFIX ${ARROW_PREFIX}
      URL ${VELOX_ARROW_SOURCE_URL}
 +    PATCH_COMMAND patch -p1 < ${CMAKE_CURRENT_SOURCE_DIR}/modify_arrow.patch
-+    COMMAND patch -p1 < 
${CMAKE_CURRENT_SOURCE_DIR}/modify_arrow_dataset_scan_option.patch
++    COMMAND patch -p1 <
++            ${CMAKE_CURRENT_SOURCE_DIR}/modify_arrow_dataset_scan_option.patch
      URL_HASH ${VELOX_ARROW_BUILD_SHA256_CHECKSUM}
      SOURCE_SUBDIR cpp
      CMAKE_ARGS ${ARROW_CMAKE_ARGS}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to