This is an automated email from the ASF dual-hosted git repository.
weibin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-graphar.git
The following commit(s) were added to refs/heads/main by this push:
new 241b6447 feat(c++): recover the BUILD_ARROW_FROM_SOURCE option (#621)
241b6447 is described below
commit 241b644783fd4da92dbb75c3977730c4e98df749
Author: Weibin Zeng <[email protected]>
AuthorDate: Tue Sep 10 11:18:53 2024 +0800
feat(c++): recover the BUILD_ARROW_FROM_SOURCE option (#621)
Signed-off-by: acezen <[email protected]>
---
.github/workflows/ci-nightly.yml | 76 +++++++++++++
cpp/CMakeLists.txt | 232 +++++++++++++++++++++++++--------------
cpp/README.md | 10 ++
cpp/cmake/apache-arrow.cmake | 7 +-
cpp/src/graphar/filesystem.cc | 12 +-
5 files changed, 248 insertions(+), 89 deletions(-)
diff --git a/.github/workflows/ci-nightly.yml b/.github/workflows/ci-nightly.yml
new file mode 100644
index 00000000..09ae5c6c
--- /dev/null
+++ b/.github/workflows/ci-nightly.yml
@@ -0,0 +1,76 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: GraphAr C++ CI Nightly
+
+on:
+ workflow_dispatch:
+ schedule:
+ # The notifications for scheduled workflows are sent to the user who
+ # last modified the cron syntax in the workflow file.
+ # Trigger the workflow at 03:00(CST) every day.
+ - cron: '00 19 * * *'
+jobs:
+ GraphAr-ubuntu-arrow-from-source:
+ if: ${{ github.ref == 'refs/heads/main' && github.repository ==
'apache/incubator-graphar' }}
+ runs-on: ubuntu-latest
+ env:
+ GAR_TEST_DATA: ${{ github.workspace }}/graphar-testing/
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ submodules: true
+
+ - name: Cache for ccache
+ uses: actions/cache@v3
+ with:
+ path: ~/.ccache
+ key: ${{ matrix.os }}-build-ccache-${{ hashFiles('**/git-modules.txt')
}}
+ restore-keys: |
+ ${{ matrix.os }}-build-ccache-
+
+ - name: Install dependencies
+ run: |
+ sudo apt-get update -y
+ sudo apt-get install -y libboost-graph-dev ccache libcurl4-openssl-dev
+
+ # install Catch2 v3
+ git clone --branch v3.6.0 https://github.com/catchorg/Catch2.git
--depth 1
+ pushd Catch2
+ cmake -Bbuild -H. -DBUILD_TESTING=OFF
+ sudo cmake --build build/ --target install
+ popd
+
+ git clone https://github.com/apache/incubator-graphar-testing.git
$GAR_TEST_DATA --depth 1
+
+ - name: CMake
+ run: |
+ mkdir build
+ pushd build
+ cmake ../cpp -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTS=ON
-DBUILD_EXAMPLES=ON -DBUILD_ARROW_FROM_SOURCE=ON
+ popd
+
+ - name: Build GraphAr
+ run: |
+ pushd build
+ make -j$(nproc)
+ make graphar-ccache-stats
+ popd
+ - name: Test
+ run: |
+ cd build
+ ctest --output-on-failure
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index ef54cbf7..d162392b 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -48,6 +48,7 @@ option(ENABLE_DOCS "Enable documentation" OFF)
option(BUILD_DOCS_ONLY "Build docs only" OFF)
option(USE_STATIC_ARROW "Link arrow static library" OFF)
option(GRAPHAR_BUILD_STATIC "Build GraphAr as static libraries" OFF)
+option(BUILD_ARROW_FROM_SOURCE "Build Arrow from source" OFF)
if (USE_STATIC_ARROW)
set(GRAPHAR_BUILD_STATIC ON)
@@ -176,59 +177,7 @@ macro(install_graphar_target target)
)
endmacro()
-#
------------------------------------------------------------------------------
-# building or find third party library
-#
------------------------------------------------------------------------------
-# check if arrow is installed
-find_package(Arrow QUIET)
-if (NOT ${Arrow_FOUND})
- message(FATAL_ERROR "apache-arrow is required, please install it and
retry")
-endif()
-find_package(ArrowDataset QUIET)
-if (NOT ${ArrowDataset_FOUND})
- message(FATAL_ERROR "apache-arrow-dataset is required, please install it
and retry")
-endif()
-if (${Arrow_VERSION} VERSION_GREATER_EQUAL "12.0.0")
- # ArrowAcero is available in Arrow 12.0.0 and later
- find_package(ArrowAcero QUIET)
- if (NOT ${ArrowAcero_FOUND})
- message(FATAL_ERROR "apache-arrow-acero is required, please install it
and retry")
- endif()
-endif()
-# Check if ORC is enabled.
-if (NOT ${ARROW_ORC})
- message(WARNING "apache-arrow is built without ORC extension, ORC related
functionalities will be disabled.")
-else()
- add_definitions(-DARROW_ORC) # Add macro, otherwise inconsistent in build
phase on ubuntu.
-endif()
-
-find_package(Parquet QUIET)
-if (NOT ${Parquet_FOUND})
- message(FATAL_ERROR "parquet is required, please install it and retry")
-endif()
-
-find_package(Protobuf REQUIRED)
-include_directories(${PROTOBUF_INCLUDE_DIRS})
-
-macro(get_target_location var target)
- if(TARGET ${target})
- foreach(prop LOCATION LOCATION_NOCONFIG LOCATION_DEBUG LOCATION_RELEASE)
- get_target_property(${var} ${target} ${prop})
- if(NOT ("${${var}}" STREQUAL "${var}-NOTFOUND"))
- break ()
- endif()
- endforeach()
- endif()
-endmacro()
-
-include_directories(${CMAKE_CURRENT_BINARY_DIR}/src)
-include_directories(src)
-
-#
------------------------------------------------------------------------------
-# generate graphar library
-#
------------------------------------------------------------------------------
macro(build_graphar)
-
file(GLOB_RECURSE CORE_SRC_FILES "src/graphar/*.cc"
${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/mini-yaml/yaml/*.cpp)
if(GRAPHAR_BUILD_STATIC)
add_library(graphar STATIC ${CORE_SRC_FILES})
@@ -237,9 +186,6 @@ macro(build_graphar)
endif()
install_graphar_target(graphar)
target_compile_features(graphar PRIVATE cxx_std_17)
- target_include_directories(graphar PUBLIC
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
-
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
- )
target_include_directories(graphar PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/thirdparty)
target_link_libraries(graphar PRIVATE ${CMAKE_DL_LIBS})
@@ -272,7 +218,101 @@ macro(build_graphar)
endif()
endmacro()
-build_graphar()
+macro(build_graphar_with_arrow_bundled)
+ file(GLOB_RECURSE CORE_SRC_FILES "src/graphar/*.cc"
${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/mini-yaml/yaml/*.cpp)
+ if(GRAPHAR_BUILD_STATIC)
+ add_library(graphar STATIC ${CORE_SRC_FILES})
+ else()
+ add_library(graphar SHARED ${CORE_SRC_FILES})
+ endif()
+ install_graphar_target(graphar)
+ target_compile_features(graphar PRIVATE cxx_std_17)
+ target_include_directories(graphar PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/thirdparty)
+ target_include_directories(graphar SYSTEM BEFORE PRIVATE
${GAR_ARROW_INCLUDE_DIR})
+ target_link_libraries(graphar PRIVATE ${CMAKE_DL_LIBS})
+
+ if(APPLE)
+ message(STATUS "Linking arrow bundled dependencies "
${GAR_PARQUET_STATIC_LIB} ${GAR_ACERO_STATIC_LIB})
+ target_link_libraries(graphar PRIVATE -Wl,-force_load gar_arrow_static
+ "${GAR_PARQUET_STATIC_LIB}"
+ "${GAR_DATASET_STATIC_LIB}"
+ "${GAR_ARROW_ACERO_STATIC_LIB}"
+ "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}"
+ "-framework CoreFoundation"
+ "-framework Security")
+ else()
+ target_link_libraries(graphar PRIVATE -Wl,--exclude-libs,ALL
-Wl,--whole-archive gar_arrow_static
+ "${GAR_PARQUET_STATIC_LIB}"
+ "${GAR_DATASET_STATIC_LIB}"
+ "${GAR_ARROW_ACERO_STATIC_LIB}"
+ "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}" -Wl,--no-whole-archive)
+ endif()
+
+ # if OpenSSL library exists, link the OpenSSL library.
+ # OpenSSL has to be linked after GAR_ARROW_BUNDLED_DEPS_STATIC_LIB
+ if(OPENSSL_FOUND)
+ target_link_libraries(graphar PUBLIC OpenSSL::SSL)
+ endif()
+ if (CURL_FOUND)
+ target_link_libraries(graphar PUBLIC ${CURL_LIBRARIES})
+ endif()
+endmacro()
+
+#
------------------------------------------------------------------------------
+# building or find third party library
+#
------------------------------------------------------------------------------
+
+include_directories(${CMAKE_CURRENT_BINARY_DIR}/src)
+include_directories(src)
+
+if (BUILD_ARROW_FROM_SOURCE)
+ # the nessary dependencies for building arrow from source
+ find_package(OpenSSL REQUIRED)
+ if(OPENSSL_FOUND)
+ if(OPENSSL_VERSION LESS "1.1.0")
+ message(ERROR "The OpenSSL must be greater than or equal to 1.1.0,
current version is ${OPENSSL_VERSION}")
+ endif()
+ endif()
+ find_package(CURL REQUIRED)
+
+ include(apache-arrow)
+ build_arrow()
+ add_definitions(-DARROW_ORC) # Add macro, otherwise inconsistent in build
phase with not from source.
+ build_graphar_with_arrow_bundled()
+else()
+ # check if arrow is installed
+ find_package(Arrow QUIET)
+ if (NOT ${Arrow_FOUND})
+ message(FATAL_ERROR "apache-arrow is required, please install it and
retry")
+ endif()
+ find_package(ArrowDataset QUIET)
+ if (NOT ${ArrowDataset_FOUND})
+ message(FATAL_ERROR "apache-arrow-dataset is required, please install
it and retry")
+ endif()
+ if (${Arrow_VERSION} VERSION_GREATER_EQUAL "12.0.0")
+ # ArrowAcero is available in Arrow 12.0.0 and later
+ find_package(ArrowAcero QUIET)
+ if (NOT ${ArrowAcero_FOUND})
+ message(FATAL_ERROR "apache-arrow-acero is required, please
install it and retry")
+ endif()
+ endif()
+ # Check if ORC is enabled.
+ if (NOT ${ARROW_ORC})
+ message(WARNING "apache-arrow is built without ORC extension, ORC
related functionalities will be disabled.")
+ else()
+ add_definitions(-DARROW_ORC) # Add macro, otherwise inconsistent in
build phase on ubuntu.
+ endif()
+
+ find_package(Parquet QUIET)
+ if (NOT ${Parquet_FOUND})
+ message(FATAL_ERROR "parquet is required, please install it and retry")
+ endif()
+
+ find_package(Protobuf REQUIRED)
+ include_directories(${PROTOBUF_INCLUDE_DIRS})
+
+ build_graphar()
+endif()
#
------------------------------------------------------------------------------
# build example
@@ -287,29 +327,40 @@ if (BUILD_EXAMPLES)
message(STATUS "Found example - " ${E_NAME})
add_executable(${E_NAME} examples/${E_NAME}.cc)
target_include_directories(${E_NAME} PRIVATE examples
-
${PROJECT_SOURCE_DIR}/include
${CMAKE_CURRENT_SOURCE_DIR}/thirdparty
-
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/Catch2/single_include>
)
target_include_directories(${E_NAME} SYSTEM PRIVATE
${Boost_INCLUDE_DIRS})
target_link_libraries(${E_NAME} PRIVATE graphar ${Boost_LIBRARIES}
${CMAKE_DL_LIBS})
- if(APPLE)
- if(USE_STATIC_ARROW)
- target_link_libraries(${E_NAME} PRIVATE -Wl,-force_load
- Arrow::arrow_static
- Parquet::parquet_static)
+ if (BUILD_ARROW_FROM_SOURCE)
+ target_include_directories(${E_NAME} SYSTEM BEFORE PRIVATE
${GAR_ARROW_INCLUDE_DIR})
+ if (APPLE)
+ target_link_libraries(${E_NAME} PRIVATE -Wl,-force_load
gar_arrow_static
+ "${GAR_PARQUET_STATIC_LIB}"
+ "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}")
else()
- target_link_libraries(${E_NAME} PRIVATE Arrow::arrow_shared
- Parquet::parquet_shared)
+ target_link_libraries(${E_NAME} PRIVATE -Wl,--exclude-libs,ALL
-Wl,--whole-archive gar_arrow_static
+ "${GAR_PARQUET_STATIC_LIB}"
+ "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}"
-Wl,--no-whole-archive)
endif()
else()
- if(USE_STATIC_ARROW)
- target_link_libraries(${E_NAME} PRIVATE -Wl,--exclude-libs,ALL
-Wl,--whole-archive
- Arrow::arrow_static
- Parquet::parquet_static -Wl,--no-whole-archive)
+ if(APPLE)
+ if(USE_STATIC_ARROW)
+ target_link_libraries(${E_NAME} PRIVATE -Wl,-force_load
+ Arrow::arrow_static
+ Parquet::parquet_static)
+ else()
+ target_link_libraries(${E_NAME} PRIVATE Arrow::arrow_shared
+ Parquet::parquet_shared)
+ endif()
else()
- target_link_libraries(${E_NAME} PRIVATE Arrow::arrow_shared
- Parquet::parquet_shared)
+ if(USE_STATIC_ARROW)
+ target_link_libraries(${E_NAME} PRIVATE
-Wl,--exclude-libs,ALL -Wl,--whole-archive
+ Arrow::arrow_static
+ Parquet::parquet_static -Wl,--no-whole-archive)
+ else()
+ target_link_libraries(${E_NAME} PRIVATE Arrow::arrow_shared
+ Parquet::parquet_shared)
+ endif()
endif()
endif()
endforeach()
@@ -363,23 +414,36 @@ if (BUILD_TESTS)
target_compile_features(${target} PRIVATE cxx_std_17)
target_include_directories(${target} PRIVATE
${PROJECT_SOURCE_DIR}/thirdparty)
target_link_libraries(${target} PRIVATE Catch2::Catch2WithMain graphar
${CMAKE_DL_LIBS})
- if(APPLE)
- if(USE_STATIC_ARROW)
- target_link_libraries(${target} PRIVATE -Wl,-force_load
- Arrow::arrow_static
- Parquet::parquet_static)
+ if (BUILD_ARROW_FROM_SOURCE)
+ target_include_directories(${target} SYSTEM BEFORE PRIVATE
${GAR_ARROW_INCLUDE_DIR})
+ if (APPLE)
+ target_link_libraries(${target} PRIVATE -Wl,-force_load
gar_arrow_static
+ "${GAR_PARQUET_STATIC_LIB}"
+ "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}")
else()
- target_link_libraries(${target} PRIVATE Arrow::arrow_shared
- Parquet::parquet_shared)
+ target_link_libraries(${target} PRIVATE -Wl,--exclude-libs,ALL
-Wl,--whole-archive gar_arrow_static
+ "${GAR_PARQUET_STATIC_LIB}"
+ "${GAR_ARROW_BUNDLED_DEPS_STATIC_LIB}"
-Wl,--no-whole-archive)
endif()
else()
- if(USE_STATIC_ARROW)
- target_link_libraries(${target} PRIVATE -Wl,--exclude-libs,ALL
-Wl,--whole-archive
- Arrow::arrow_static
- Parquet::parquet_static -Wl,--no-whole-archive)
+ if(APPLE)
+ if(USE_STATIC_ARROW)
+ target_link_libraries(${target} PRIVATE -Wl,-force_load
+ Arrow::arrow_static
+ Parquet::parquet_static)
+ else()
+ target_link_libraries(${target} PRIVATE Arrow::arrow_shared
+ Parquet::parquet_shared)
+ endif()
else()
- target_link_libraries(${target} PRIVATE Arrow::arrow_shared
- Parquet::parquet_shared)
+ if(USE_STATIC_ARROW)
+ target_link_libraries(${target} PRIVATE
-Wl,--exclude-libs,ALL -Wl,--whole-archive
+ Arrow::arrow_static
+ Parquet::parquet_static -Wl,--no-whole-archive)
+ else()
+ target_link_libraries(${target} PRIVATE Arrow::arrow_shared
+ Parquet::parquet_shared)
+ endif()
endif()
endif()
target_include_directories(${target} PRIVATE
${PROJECT_SOURCE_DIR}/include
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/Catch2/single_include>)
diff --git a/cpp/README.md b/cpp/README.md
index 43e131f4..5bb6b800 100644
--- a/cpp/README.md
+++ b/cpp/README.md
@@ -113,6 +113,16 @@ Extra Build Options:
1. `-DGRAPHAR_BUILD_STATIC=ON`: Build GraphAr as static libraries.
2. `-DUSE_STATIC_ARROW=ON`: Link arrow static library to build GraphAr. If set
this option, the option `GRAPHAR_BUILD_STATIC=ON` will be set.
+### Building with Arrow from source
+In case you want to build GraphAr as single static library including all
dependencies, we include a [apache-arrow.cmake](cmake/apache-arrow.cmake) file
that allows you to build Arrow and its dependencies from source and link it
statically. To do this, you can follow the steps below:
+
+```bash
+mkdir build-static
+cd build-static
+cmake -DGRAPHAR_BUILD_STATIC=ON -DBUILD_ARROW_FROM_SOURCE=ON ..
+make -j8 # if you have 8 CPU cores, otherwise adjust, use -j`nproc` for all
cores
+```
+
### Install
After the building, you can install the GraphAr C++ library with:
diff --git a/cpp/cmake/apache-arrow.cmake b/cpp/cmake/apache-arrow.cmake
index b3e36f09..2708a4dc 100644
--- a/cpp/cmake/apache-arrow.cmake
+++ b/cpp/cmake/apache-arrow.cmake
@@ -83,6 +83,7 @@ function(build_arrow)
"-DARROW_WITH_RE2=OFF"
"-DARROW_FILESYSTEM=ON"
"-DARROW_CSV=ON"
+ "-DARROW_JSON=ON"
"-DARROW_PYTHON=OFF"
"-DARROW_BUILD_BENCHMAKRS=OFF"
"-DARROW_BUILD_TESTS=OFF"
@@ -107,10 +108,10 @@ function(build_arrow)
find_package(Threads)
find_package(Arrow QUIET)
- set(ARROW_VERSION_TO_BUILD "12.0.0" CACHE INTERNAL "arrow version")
+ set(ARROW_VERSION_TO_BUILD "15.0.0" CACHE INTERNAL "arrow version")
if (Arrow_FOUND) # arrow is installed, build the same version as the
installed one
- message(STATUS "Found Arrow installed, align to version:
${Arrow_VERSION}")
- set(ARROW_VERSION_TO_BUILD "${Arrow_VERSION}" CACHE INTERNAL "arrow
version")
+ message(STATUS "Found Arrow installed, align to version:
${Arrow_VERSION}")
+ set(ARROW_VERSION_TO_BUILD "${Arrow_VERSION}" CACHE INTERNAL "arrow
version")
endif ()
set(GAR_ARROW_SOURCE_FILE
"https://www.apache.org/dyn/closer.lua?action=download&filename=arrow/arrow-${ARROW_VERSION_TO_BUILD}/apache-arrow-${ARROW_VERSION_TO_BUILD}.tar.gz")
diff --git a/cpp/src/graphar/filesystem.cc b/cpp/src/graphar/filesystem.cc
index 0ccbc5a9..c871ea4e 100644
--- a/cpp/src/graphar/filesystem.cc
+++ b/cpp/src/graphar/filesystem.cc
@@ -23,6 +23,9 @@
#include "arrow/api.h"
#include "arrow/csv/api.h"
#include "arrow/dataset/api.h"
+#if defined(ARROW_VERSION) && ARROW_VERSION <= 12000000
+#include "arrow/dataset/file_json.h"
+#endif
#include "arrow/filesystem/api.h"
#include "arrow/filesystem/s3fs.h"
#include "arrow/ipc/writer.h"
@@ -314,8 +317,13 @@ Result<std::shared_ptr<FileSystem>>
FileSystemFromUriOrPath(
}
Status InitializeS3() {
- RETURN_NOT_ARROW_OK(
- arrow::fs::InitializeS3(arrow::fs::S3GlobalOptions::Defaults()));
+#if defined(ARROW_VERSION) && ARROW_VERSION > 12000000
+ auto options = arrow::fs::S3GlobalOptions::Defaults();
+#else
+ arrow::fs::S3GlobalOptions options;
+ options.log_level = arrow::fs::S3LogLevel::Fatal;
+#endif
+ RETURN_NOT_ARROW_OK(arrow::fs::InitializeS3(options));
return Status::OK();
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]