This is an automated email from the ASF dual-hosted git repository.

weibin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-graphar.git


The following commit(s) were added to refs/heads/main by this push:
     new 241b6447 feat(c++): recover the BUILD_ARROW_FROM_SOURCE option (#621)
241b6447 is described below

commit 241b644783fd4da92dbb75c3977730c4e98df749
Author: Weibin Zeng <[email protected]>
AuthorDate: Tue Sep 10 11:18:53 2024 +0800

    feat(c++): recover the BUILD_ARROW_FROM_SOURCE option (#621)
    
    
    Signed-off-by: acezen <[email protected]>
---
 .github/workflows/ci-nightly.yml |  76 +++++++++++++
 cpp/CMakeLists.txt               | 232 +++++++++++++++++++++++++--------------
 cpp/README.md                    |  10 ++
 cpp/cmake/apache-arrow.cmake     |   7 +-
 cpp/src/graphar/filesystem.cc    |  12 +-
 5 files changed, 248 insertions(+), 89 deletions(-)

diff --git a/.github/workflows/ci-nightly.yml b/.github/workflows/ci-nightly.yml
new file mode 100644
index 00000000..09ae5c6c
--- /dev/null
+++ b/.github/workflows/ci-nightly.yml
@@ -0,0 +1,76 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: GraphAr C++ CI Nightly
+
+on:
+  workflow_dispatch:
+  schedule:
+    # The notifications for scheduled workflows are sent to the user who
+    # last modified the cron syntax in the workflow file.
+    # Trigger the workflow at 03:00(CST) every day.
+    - cron:  '00 19 * * *'
+jobs:
+  GraphAr-ubuntu-arrow-from-source:
+    if: ${{ github.ref == 'refs/heads/main' && github.repository == 
'apache/incubator-graphar' }}
+    runs-on: ubuntu-latest
+    env:
+      GAR_TEST_DATA: ${{ github.workspace }}/graphar-testing/
+    steps:
+    - uses: actions/checkout@v4
+      with:
+          submodules: true
+
+    - name: Cache for ccache
+      uses: actions/cache@v3
+      with:
+        path: ~/.ccache
+        key: ${{ matrix.os }}-build-ccache-${{ hashFiles('**/git-modules.txt') 
}}
+        restore-keys: |
+          ${{ matrix.os }}-build-ccache-
+
+    - name: Install dependencies
+      run: |
+        sudo apt-get update -y
+        sudo apt-get install -y libboost-graph-dev ccache libcurl4-openssl-dev
+
+        # install Catch2 v3
+        git clone --branch v3.6.0 https://github.com/catchorg/Catch2.git 
--depth 1
+        pushd Catch2
+        cmake -Bbuild -H. -DBUILD_TESTING=OFF
+        sudo cmake --build build/ --target install
+        popd
+
+        git clone https://github.com/apache/incubator-graphar-testing.git 
$GAR_TEST_DATA --depth 1
+
+    - name: CMake
+      run: |
+        mkdir build
+        pushd build
+        cmake ../cpp -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTS=ON 
-DBUILD_EXAMPLES=ON -DBUILD_ARROW_FROM_SOURCE=ON
+        popd
+
+    - name: Build GraphAr
+      run: |
+        pushd build
+        make -j$(nproc)
+        make graphar-ccache-stats
+        popd
+    - name: Test
+      run: |
+        cd build
+        ctest --output-on-failure
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index ef54cbf7..d162392b 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -48,6 +48,7 @@ option(ENABLE_DOCS "Enable documentation" OFF)
 option(BUILD_DOCS_ONLY "Build docs only" OFF)
 option(USE_STATIC_ARROW "Link arrow static library" OFF)
 option(GRAPHAR_BUILD_STATIC "Build GraphAr as static libraries" OFF)
+option(BUILD_ARROW_FROM_SOURCE "Build Arrow from source" OFF)
 
 if (USE_STATIC_ARROW)
     set(GRAPHAR_BUILD_STATIC ON)
@@ -176,59 +177,7 @@ macro(install_graphar_target target)
   )
 endmacro()
 
-# 
------------------------------------------------------------------------------
-# building or find third party library
-# 
------------------------------------------------------------------------------
-# check if arrow is installed
-find_package(Arrow QUIET)
-if (NOT ${Arrow_FOUND})
-    message(FATAL_ERROR "apache-arrow is required, please install it and 
retry")
-endif()
-find_package(ArrowDataset QUIET)
-if (NOT ${ArrowDataset_FOUND})
-    message(FATAL_ERROR "apache-arrow-dataset is required, please install it 
and retry")
-endif()
-if (${Arrow_VERSION} VERSION_GREATER_EQUAL "12.0.0")
-    # ArrowAcero is available in Arrow 12.0.0 and later
-    find_package(ArrowAcero QUIET)
-    if (NOT ${ArrowAcero_FOUND})
-        message(FATAL_ERROR "apache-arrow-acero is required, please install it 
and retry")
-    endif()
-endif()
-# Check if ORC is enabled.
-if (NOT ${ARROW_ORC})
-    message(WARNING "apache-arrow is built without ORC extension, ORC related 
functionalities will be disabled.")
-else()
-    add_definitions(-DARROW_ORC) # Add macro, otherwise inconsistent in build 
phase on ubuntu.
-endif()
-
-find_package(Parquet QUIET)
-if (NOT ${Parquet_FOUND})
-    message(FATAL_ERROR "parquet is required, please install it and retry")
-endif()
-
-find_package(Protobuf REQUIRED)
-include_directories(${PROTOBUF_INCLUDE_DIRS})
-
-macro(get_target_location var target)
-  if(TARGET ${target})
-    foreach(prop LOCATION LOCATION_NOCONFIG LOCATION_DEBUG LOCATION_RELEASE)
-      get_target_property(${var} ${target} ${prop})
-      if(NOT ("${${var}}" STREQUAL "${var}-NOTFOUND"))
-        break ()
-      endif()
-    endforeach()
-  endif()
-endmacro()
-
-include_directories(${CMAKE_CURRENT_BINARY_DIR}/src)
-include_directories(src)
-
-# 
------------------------------------------------------------------------------
-# generate graphar library
-# 
------------------------------------------------------------------------------
 macro(build_graphar)
-
     file(GLOB_RECURSE CORE_SRC_FILES "src/graphar/*.cc" 
${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/mini-yaml/yaml/*.cpp)
     if(GRAPHAR_BUILD_STATIC)
         add_library(graphar STATIC ${CORE_SRC_FILES})
@@ -237,9 +186,6 @@ macro(build_graphar)
     endif()
     install_graphar_target(graphar)
     target_compile_features(graphar PRIVATE cxx_std_17)
-    target_include_directories(graphar PUBLIC 
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
-                                          
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
-    )
     target_include_directories(graphar PRIVATE 
${CMAKE_CURRENT_SOURCE_DIR}/thirdparty)
     target_link_libraries(graphar PRIVATE ${CMAKE_DL_LIBS})
 
@@ -272,7 +218,101 @@ macro(build_graphar)
     endif()
 endmacro()
 
-build_graphar()
+macro(build_graphar_with_arrow_bundled)
+    file(GLOB_RECURSE CORE_SRC_FILES "src/graphar/*.cc" 
${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/mini-yaml/yaml/*.cpp)
+    if(GRAPHAR_BUILD_STATIC)
+        add_library(graphar STATIC ${CORE_SRC_FILES})
+    else()
+        add_library(graphar SHARED ${CORE_SRC_FILES})
+    endif()
+    install_graphar_target(graphar)
+    target_compile_features(graphar PRIVATE cxx_std_17)
+    target_include_directories(graphar PRIVATE 
${CMAKE_CURRENT_SOURCE_DIR}/thirdparty)
+    target_include_directories(graphar SYSTEM BEFORE PRIVATE 
${GAR_ARROW_INCLUDE_DIR})
+    target_link_libraries(graphar PRIVATE ${CMAKE_DL_LIBS})
+
+    if(APPLE)
+        message(STATUS "Linking arrow bundled dependencies " 
${GAR_PARQUET_STATIC_LIB} ${GAR_ACERO_STATIC_LIB})
+        target_link_libraries(graphar PRIVATE -Wl,-force_load gar_arrow_static
+                "${GAR_PARQUET_STATIC_LIB}"
+                "${GAR_DATASET_STATIC_LIB}"
+                "${GAR_ARROW_ACERO_STATIC_LIB}"
+                "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}"
+                "-framework CoreFoundation"
+                "-framework Security")
+    else()
+        target_link_libraries(graphar PRIVATE -Wl,--exclude-libs,ALL 
-Wl,--whole-archive gar_arrow_static
+                "${GAR_PARQUET_STATIC_LIB}"
+                "${GAR_DATASET_STATIC_LIB}"
+                "${GAR_ARROW_ACERO_STATIC_LIB}"
+                "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" -Wl,--no-whole-archive) 
+    endif()
+
+    # if OpenSSL library exists, link the OpenSSL library.
+    # OpenSSL has to be linked after GAR_ARROW_BUNDLED_DEPS_STATIC_LIB
+    if(OPENSSL_FOUND)
+        target_link_libraries(graphar PUBLIC OpenSSL::SSL)
+    endif()
+    if (CURL_FOUND)
+        target_link_libraries(graphar PUBLIC ${CURL_LIBRARIES})
+    endif()
+endmacro()
+
+# 
------------------------------------------------------------------------------
+# building or find third party library
+# 
------------------------------------------------------------------------------
+
+include_directories(${CMAKE_CURRENT_BINARY_DIR}/src)
+include_directories(src)
+
+if (BUILD_ARROW_FROM_SOURCE)
+    # the nessary dependencies for building arrow from source
+    find_package(OpenSSL REQUIRED)
+    if(OPENSSL_FOUND)
+        if(OPENSSL_VERSION LESS "1.1.0")
+            message(ERROR "The OpenSSL must be greater than or equal to 1.1.0, 
current version is  ${OPENSSL_VERSION}")
+        endif()
+    endif()
+    find_package(CURL REQUIRED)
+
+    include(apache-arrow)
+    build_arrow()
+    add_definitions(-DARROW_ORC) # Add macro, otherwise inconsistent in build 
phase with not from source.
+    build_graphar_with_arrow_bundled()
+else()
+    # check if arrow is installed
+    find_package(Arrow QUIET)
+    if (NOT ${Arrow_FOUND})
+        message(FATAL_ERROR "apache-arrow is required, please install it and 
retry")
+    endif()
+    find_package(ArrowDataset QUIET)
+    if (NOT ${ArrowDataset_FOUND})
+        message(FATAL_ERROR "apache-arrow-dataset is required, please install 
it and retry")
+    endif()
+    if (${Arrow_VERSION} VERSION_GREATER_EQUAL "12.0.0")
+        # ArrowAcero is available in Arrow 12.0.0 and later
+        find_package(ArrowAcero QUIET)
+        if (NOT ${ArrowAcero_FOUND})
+            message(FATAL_ERROR "apache-arrow-acero is required, please 
install it and retry")
+        endif()
+    endif()
+    # Check if ORC is enabled.
+    if (NOT ${ARROW_ORC})
+        message(WARNING "apache-arrow is built without ORC extension, ORC 
related functionalities will be disabled.")
+    else()
+        add_definitions(-DARROW_ORC) # Add macro, otherwise inconsistent in 
build phase on ubuntu.
+    endif()
+
+    find_package(Parquet QUIET)
+    if (NOT ${Parquet_FOUND})
+        message(FATAL_ERROR "parquet is required, please install it and retry")
+    endif()
+
+    find_package(Protobuf REQUIRED)
+    include_directories(${PROTOBUF_INCLUDE_DIRS})
+
+    build_graphar()
+endif()
 
 # 
------------------------------------------------------------------------------
 # build example
@@ -287,29 +327,40 @@ if (BUILD_EXAMPLES)
         message(STATUS "Found example - " ${E_NAME})
         add_executable(${E_NAME} examples/${E_NAME}.cc)
         target_include_directories(${E_NAME} PRIVATE examples
-                                                     
${PROJECT_SOURCE_DIR}/include
                                                      
${CMAKE_CURRENT_SOURCE_DIR}/thirdparty
-                                                     
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/Catch2/single_include>
         )
         target_include_directories(${E_NAME} SYSTEM PRIVATE 
${Boost_INCLUDE_DIRS})
         target_link_libraries(${E_NAME} PRIVATE graphar ${Boost_LIBRARIES} 
${CMAKE_DL_LIBS})
-        if(APPLE)
-            if(USE_STATIC_ARROW)
-                target_link_libraries(${E_NAME} PRIVATE -Wl,-force_load
-                    Arrow::arrow_static
-                    Parquet::parquet_static)
+        if (BUILD_ARROW_FROM_SOURCE)
+            target_include_directories(${E_NAME} SYSTEM BEFORE PRIVATE 
${GAR_ARROW_INCLUDE_DIR})
+            if (APPLE)
+                target_link_libraries(${E_NAME} PRIVATE -Wl,-force_load 
gar_arrow_static
+                    "${GAR_PARQUET_STATIC_LIB}"
+                    "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}")
             else()
-                target_link_libraries(${E_NAME} PRIVATE Arrow::arrow_shared
-                    Parquet::parquet_shared)
+                target_link_libraries(${E_NAME} PRIVATE -Wl,--exclude-libs,ALL 
-Wl,--whole-archive gar_arrow_static
+                    "${GAR_PARQUET_STATIC_LIB}"
+                    "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" 
-Wl,--no-whole-archive) 
             endif()
         else()
-            if(USE_STATIC_ARROW)
-                target_link_libraries(${E_NAME} PRIVATE -Wl,--exclude-libs,ALL 
-Wl,--whole-archive 
-                    Arrow::arrow_static 
-                    Parquet::parquet_static -Wl,--no-whole-archive)
+            if(APPLE)
+                if(USE_STATIC_ARROW)
+                    target_link_libraries(${E_NAME} PRIVATE -Wl,-force_load
+                        Arrow::arrow_static
+                        Parquet::parquet_static)
+                else()
+                    target_link_libraries(${E_NAME} PRIVATE Arrow::arrow_shared
+                        Parquet::parquet_shared)
+                endif()
             else()
-                target_link_libraries(${E_NAME} PRIVATE Arrow::arrow_shared
-                    Parquet::parquet_shared)
+                if(USE_STATIC_ARROW)
+                    target_link_libraries(${E_NAME} PRIVATE 
-Wl,--exclude-libs,ALL -Wl,--whole-archive 
+                        Arrow::arrow_static 
+                        Parquet::parquet_static -Wl,--no-whole-archive)
+                else()
+                    target_link_libraries(${E_NAME} PRIVATE Arrow::arrow_shared
+                        Parquet::parquet_shared)
+                endif()
             endif()
         endif()
     endforeach()
@@ -363,23 +414,36 @@ if (BUILD_TESTS)
         target_compile_features(${target} PRIVATE cxx_std_17)
         target_include_directories(${target} PRIVATE 
${PROJECT_SOURCE_DIR}/thirdparty)
         target_link_libraries(${target} PRIVATE Catch2::Catch2WithMain graphar 
${CMAKE_DL_LIBS})
-        if(APPLE)
-            if(USE_STATIC_ARROW)
-                target_link_libraries(${target} PRIVATE -Wl,-force_load 
-                    Arrow::arrow_static
-                    Parquet::parquet_static)
+        if (BUILD_ARROW_FROM_SOURCE)
+            target_include_directories(${target} SYSTEM BEFORE PRIVATE 
${GAR_ARROW_INCLUDE_DIR})
+            if (APPLE)
+                target_link_libraries(${target} PRIVATE -Wl,-force_load 
gar_arrow_static
+                    "${GAR_PARQUET_STATIC_LIB}"
+                    "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}")
             else()
-                target_link_libraries(${target} PRIVATE Arrow::arrow_shared
-                    Parquet::parquet_shared)
+                target_link_libraries(${target} PRIVATE -Wl,--exclude-libs,ALL 
-Wl,--whole-archive gar_arrow_static
+                    "${GAR_PARQUET_STATIC_LIB}"
+                    "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" 
-Wl,--no-whole-archive) 
             endif()
         else()
-            if(USE_STATIC_ARROW)
-                target_link_libraries(${target} PRIVATE -Wl,--exclude-libs,ALL 
-Wl,--whole-archive
-                    Arrow::arrow_static
-                    Parquet::parquet_static -Wl,--no-whole-archive)
+            if(APPLE)
+                if(USE_STATIC_ARROW)
+                    target_link_libraries(${target} PRIVATE -Wl,-force_load 
+                        Arrow::arrow_static
+                        Parquet::parquet_static)
+                else()
+                    target_link_libraries(${target} PRIVATE Arrow::arrow_shared
+                        Parquet::parquet_shared)
+                endif()
             else()
-                target_link_libraries(${target} PRIVATE Arrow::arrow_shared
-                    Parquet::parquet_shared)
+                if(USE_STATIC_ARROW)
+                    target_link_libraries(${target} PRIVATE 
-Wl,--exclude-libs,ALL -Wl,--whole-archive
+                        Arrow::arrow_static
+                        Parquet::parquet_static -Wl,--no-whole-archive)
+                else()
+                    target_link_libraries(${target} PRIVATE Arrow::arrow_shared
+                        Parquet::parquet_shared)
+                endif()
             endif()
         endif()
         target_include_directories(${target} PRIVATE 
${PROJECT_SOURCE_DIR}/include 
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/Catch2/single_include>)
diff --git a/cpp/README.md b/cpp/README.md
index 43e131f4..5bb6b800 100644
--- a/cpp/README.md
+++ b/cpp/README.md
@@ -113,6 +113,16 @@ Extra Build Options:
 1. `-DGRAPHAR_BUILD_STATIC=ON`: Build GraphAr as static libraries.
 2. `-DUSE_STATIC_ARROW=ON`: Link arrow static library to build GraphAr. If set 
this option, the option `GRAPHAR_BUILD_STATIC=ON` will be set.
 
+### Building with Arrow from source
+In case you want to build GraphAr as single static library including all 
dependencies, we include a [apache-arrow.cmake](cmake/apache-arrow.cmake) file 
that allows you to build Arrow and its dependencies from source and link it 
statically. To do this, you can follow the steps below:
+
+```bash
+mkdir build-static
+cd build-static
+cmake -DGRAPHAR_BUILD_STATIC=ON -DBUILD_ARROW_FROM_SOURCE=ON ..
+make -j8    # if you have 8 CPU cores, otherwise adjust, use -j`nproc` for all 
cores
+```
+
 ### Install
 
 After the building, you can install the GraphAr C++ library with:
diff --git a/cpp/cmake/apache-arrow.cmake b/cpp/cmake/apache-arrow.cmake
index b3e36f09..2708a4dc 100644
--- a/cpp/cmake/apache-arrow.cmake
+++ b/cpp/cmake/apache-arrow.cmake
@@ -83,6 +83,7 @@ function(build_arrow)
                              "-DARROW_WITH_RE2=OFF"
                              "-DARROW_FILESYSTEM=ON"
                              "-DARROW_CSV=ON"
+                             "-DARROW_JSON=ON"
                              "-DARROW_PYTHON=OFF"
                              "-DARROW_BUILD_BENCHMAKRS=OFF"
                              "-DARROW_BUILD_TESTS=OFF"
@@ -107,10 +108,10 @@ function(build_arrow)
 
     find_package(Threads)
     find_package(Arrow QUIET)
-    set(ARROW_VERSION_TO_BUILD "12.0.0" CACHE INTERNAL "arrow version")
+    set(ARROW_VERSION_TO_BUILD "15.0.0" CACHE INTERNAL "arrow version")
     if (Arrow_FOUND) # arrow is installed, build the same version as the 
installed one
-        message(STATUS "Found Arrow installed, align to version: 
${Arrow_VERSION}")
-        set(ARROW_VERSION_TO_BUILD "${Arrow_VERSION}" CACHE INTERNAL "arrow 
version")
+       message(STATUS "Found Arrow installed, align to version: 
${Arrow_VERSION}")
+       set(ARROW_VERSION_TO_BUILD "${Arrow_VERSION}" CACHE INTERNAL "arrow 
version")
     endif ()
     set(GAR_ARROW_SOURCE_FILE 
"https://www.apache.org/dyn/closer.lua?action=download&filename=arrow/arrow-${ARROW_VERSION_TO_BUILD}/apache-arrow-${ARROW_VERSION_TO_BUILD}.tar.gz";)
 
diff --git a/cpp/src/graphar/filesystem.cc b/cpp/src/graphar/filesystem.cc
index 0ccbc5a9..c871ea4e 100644
--- a/cpp/src/graphar/filesystem.cc
+++ b/cpp/src/graphar/filesystem.cc
@@ -23,6 +23,9 @@
 #include "arrow/api.h"
 #include "arrow/csv/api.h"
 #include "arrow/dataset/api.h"
+#if defined(ARROW_VERSION) && ARROW_VERSION <= 12000000
+#include "arrow/dataset/file_json.h"
+#endif
 #include "arrow/filesystem/api.h"
 #include "arrow/filesystem/s3fs.h"
 #include "arrow/ipc/writer.h"
@@ -314,8 +317,13 @@ Result<std::shared_ptr<FileSystem>> 
FileSystemFromUriOrPath(
 }
 
 Status InitializeS3() {
-  RETURN_NOT_ARROW_OK(
-      arrow::fs::InitializeS3(arrow::fs::S3GlobalOptions::Defaults()));
+#if defined(ARROW_VERSION) && ARROW_VERSION > 12000000
+  auto options = arrow::fs::S3GlobalOptions::Defaults();
+#else
+  arrow::fs::S3GlobalOptions options;
+  options.log_level = arrow::fs::S3LogLevel::Fatal;
+#endif
+  RETURN_NOT_ARROW_OK(arrow::fs::InitializeS3(options));
   return Status::OK();
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to