This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new 48de821 ARROW-3846: [Gandiva][C++] Build Gandiva C++ libraries and get unit tests passing on Windows 48de821 is described below commit 48de821ea79bdf4d0480a2f6b377300ddc5bbd9a Author: Wes McKinney <wesm+...@apache.org> AuthorDate: Thu Jan 31 23:04:59 2019 -0600 ARROW-3846: [Gandiva][C++] Build Gandiva C++ libraries and get unit tests passing on Windows The tests pass cleanly for me on Windows with these changes. Can't say this was the most enjoyable project. I will fix up the CI but wanted to get eyes on the changes in case anything looks undesirable Some notes * This requires that LLVM was built with CMake, which is not true of the Windows installers from llvm.org. It works for me if I `conda install llvmdev=6.0.1 clangdev=6.0.1` * I had to suppress a ton of compiler warnings that seem to be coming from the LLVM headers. see gandiva/llvm_includes.h * Unix targets are still using strptime for date parsing. We might want to use `arrow/vendored/date.h` on all platforms unless there is some significant performance difference * This doesn't build in Appveyor yet. I wanted to wait to see which builds folks think we should add this too (build times will go up because installing llvmdev/clangdev will be time consuming) * Python or other bindings will have to be investigated in a separate patch Author: Wes McKinney <wesm+...@apache.org> Closes #3295 from wesm/gandiva-windows and squashes the following commits: 1fb56714 <Wes McKinney> Add missing file 1e0fdbc8 <Wes McKinney> Revert Flatbuffers changes that are causing CI flakiness 5fb78bc4 <Wes McKinney> Bump flatbuffers version to v1.10.0 707e9d9f <Wes McKinney> Visibility fixes for windows with the cast_time changes fac669e0 <Wes McKinney> Rebase, remove failing unit test f3b60bf3 <Wes McKinney> Some basic fixes to build system, clang IR generation, suppress endogenous LLVM warnings with MSVC 2017 --- appveyor.yml | 1 + ci/appveyor-cpp-build.bat | 7 +++ ci/cpp-msvc-build-main.bat | 1 + cpp/CMakeLists.txt | 6 ++- cpp/cmake_modules/FindProtobuf.cmake | 9 ++-- cpp/cmake_modules/FindRE2.cmake | 8 ++- cpp/cmake_modules/ThirdpartyToolchain.cmake | 24 ++++----- cpp/src/arrow/util/bit-util-test.cc | 24 +++++++++ cpp/src/arrow/util/bit-util.h | 51 ++++++++++++++++++ cpp/src/arrow/util/parsing.h | 2 +- .../vendored/datetime.h} | 20 ++----- cpp/src/arrow/vendored/datetime/ios.h | 4 +- cpp/src/arrow/vendored/datetime/tz.cpp | 3 ++ .../vendored/datetime/visibility.h} | 25 +++------ cpp/src/gandiva/CMakeLists.txt | 23 +++++++- cpp/src/gandiva/annotator.h | 3 +- cpp/src/gandiva/bitmap_accumulator.h | 3 +- cpp/src/gandiva/bitmap_accumulator_test.cc | 16 +++--- cpp/src/gandiva/cast_time.cc | 5 +- cpp/src/gandiva/compiled_expr.h | 2 +- cpp/src/gandiva/configuration.h | 11 ++-- cpp/src/gandiva/date_utils.cc | 3 +- cpp/src/gandiva/date_utils.h | 61 +++++++++++++++++++++- cpp/src/gandiva/decimal_type_util.cc | 5 -- cpp/src/gandiva/decimal_type_util.h | 26 +++++---- cpp/src/gandiva/dex.h | 35 +++++++------ cpp/src/gandiva/dex_visitor.h | 5 +- cpp/src/gandiva/engine.cc | 14 +++++ cpp/src/gandiva/engine.h | 9 ++-- cpp/src/gandiva/expr_decomposer.cc | 6 +-- cpp/src/gandiva/expr_decomposer.h | 8 ++- cpp/src/gandiva/expression.h | 3 +- cpp/src/gandiva/expression_registry.h | 5 +- cpp/src/gandiva/filter.cc | 2 + cpp/src/gandiva/filter.h | 12 ++--- cpp/src/gandiva/func_descriptor.h | 3 +- cpp/src/gandiva/function_holder.h | 4 +- cpp/src/gandiva/function_registry.h | 3 +- cpp/src/gandiva/function_signature.h | 3 +- cpp/src/gandiva/like_holder.h | 5 +- cpp/src/gandiva/llvm_generator.h | 3 +- .../gandiva/{function_holder.h => llvm_includes.h} | 32 ++++++------ cpp/src/gandiva/llvm_types.h | 6 +-- cpp/src/gandiva/lvalue.h | 7 +-- cpp/src/gandiva/native_function.h | 3 +- cpp/src/gandiva/node.h | 13 ++--- cpp/src/gandiva/node_visitor.h | 3 +- cpp/src/gandiva/precompiled/CMakeLists.txt | 33 +++++++++--- .../gandiva/precompiled/epoch_time_point_test.cc | 37 +++++++------ cpp/src/gandiva/precompiled/extended_math_ops.cc | 38 +++++++++----- .../{function_holder.h => precompiled/testing.h} | 26 ++++----- cpp/src/gandiva/precompiled/time_test.cc | 7 +-- cpp/src/gandiva/projector.cc | 2 + cpp/src/gandiva/projector.h | 12 +++-- cpp/src/gandiva/regex_util.h | 3 +- cpp/src/gandiva/selection_vector.cc | 14 ++++- cpp/src/gandiva/selection_vector.h | 3 +- cpp/src/gandiva/tests/date_time_test.cc | 2 +- cpp/src/gandiva/tests/projector_test.cc | 10 ++-- cpp/src/gandiva/to_date_holder.cc | 21 +++----- cpp/src/gandiva/to_date_holder.h | 3 +- cpp/src/gandiva/to_date_holder_test.cc | 57 ++++++++++++-------- cpp/src/gandiva/tree_expr_builder.h | 3 +- cpp/src/gandiva/value_validity_pair.h | 3 +- .../gandiva/{function_holder.h => visibility.h} | 41 ++++++++++----- cpp/thirdparty/versions.txt | 2 +- 66 files changed, 550 insertions(+), 294 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index dbf13ff..d955484 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -47,6 +47,7 @@ matrix: environment: global: USE_CLCACHE: true + ARROW_BUILD_GANDIVA: "OFF" PYTHON: "3.6" ARCH: "64" diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat index 78f5e41..f95b88e 100644 --- a/ci/appveyor-cpp-build.bat +++ b/ci/appveyor-cpp-build.bat @@ -104,6 +104,13 @@ conda create -n arrow -q -y -c conda-forge ^ call activate arrow +set ARROW_LLVM_VERSION=6.0.1 + +if "%ARROW_BUILD_GANDIVA%" == "ON" ( + @rem Install llvmdev in the toolchain if building gandiva.dll + conda install -q -y llvmdev=%ARROW_LLVM_VERSION% || exit /B +) + @rem Use Boost from Anaconda set BOOST_ROOT=%CONDA_PREFIX%\Library set BOOST_LIBRARYDIR=%CONDA_PREFIX%\Library\lib diff --git a/ci/cpp-msvc-build-main.bat b/ci/cpp-msvc-build-main.bat index ccd64e3..779af15 100644 --- a/ci/cpp-msvc-build-main.bat +++ b/ci/cpp-msvc-build-main.bat @@ -55,6 +55,7 @@ cmake -G "%GENERATOR%" %CMAKE_ARGS% ^ -DARROW_VERBOSE_THIRDPARTY_BUILD=ON ^ -DARROW_CXXFLAGS="%ARROW_CXXFLAGS%" ^ -DCMAKE_CXX_FLAGS_RELEASE="/MD %CMAKE_CXX_FLAGS_RELEASE%" ^ + -DARROW_GANDIVA=%ARROW_BUILD_GANDIVA% ^ -DARROW_PARQUET=ON ^ -DARROW_PYTHON=ON ^ .. || exit /B diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3ec430e..9cdbe7d 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -309,8 +309,12 @@ Note that this requires linking Boost statically" set(BROTLI_MSVC_STATIC_LIB_SUFFIX "-static" CACHE STRING "Brotli static lib suffix used on Windows with MSVC (default -static)") + set(PROTOBUF_MSVC_STATIC_LIB_SUFFIX "" CACHE STRING + "Protobuf static lib suffix used on Windows with MSVC (default is empty string)") + set(RE2_MSVC_STATIC_LIB_SUFFIX "_static" CACHE STRING + "re2 static lib suffix used on Windows with MSVC (default is _static)") set(SNAPPY_MSVC_STATIC_LIB_SUFFIX "_static" CACHE STRING - "Snappy static lib suffix used on Windows with MSVC (default is empty string)") + "Snappy static lib suffix used on Windows with MSVC (default is _static)") set(LZ4_MSVC_STATIC_LIB_SUFFIX "_static" CACHE STRING "Lz4 static lib suffix used on Windows with MSVC (default _static)") set(ZSTD_MSVC_STATIC_LIB_SUFFIX "_static" CACHE STRING diff --git a/cpp/cmake_modules/FindProtobuf.cmake b/cpp/cmake_modules/FindProtobuf.cmake index e4a87f4..f53f48d 100644 --- a/cpp/cmake_modules/FindProtobuf.cmake +++ b/cpp/cmake_modules/FindProtobuf.cmake @@ -44,12 +44,12 @@ if (EXISTS "${_protobuf_path}/lib/${CMAKE_LIBRARY_ARCHITECTURE}") set (lib_dirs "lib/${CMAKE_LIBRARY_ARCHITECTURE}" ${lib_dirs}) endif () -find_library (PROTOBUF_LIBRARY NAMES protobuf PATHS +find_library (PROTOBUF_LIBRARY NAMES protobuf libprotobuf PATHS ${_protobuf_path} NO_DEFAULT_PATH PATH_SUFFIXES ${lib_dirs}) -find_library (PROTOC_LIBRARY NAMES protoc PATHS +find_library (PROTOC_LIBRARY NAMES protoc libprotoc PATHS ${_protobuf_path} NO_DEFAULT_PATH PATH_SUFFIXES ${lib_dirs}) @@ -66,7 +66,7 @@ if (PROTOBUF_INCLUDE_DIR AND PROTOBUF_LIBRARY AND PROTOC_LIBRARY AND PROTOBUF_EX get_filename_component (PROTOBUF_LIBS ${PROTOBUF_LIBRARY} PATH) set (PROTOBUF_LIB_NAME protobuf) set (PROTOC_LIB_NAME protoc) - set (PROTOBUF_STATIC_LIB ${PROTOBUF_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${PROTOBUF_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}) + set (PROTOBUF_STATIC_LIB ${PROTOBUF_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${PROTOBUF_LIB_NAME}${PROTOBUF_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}) set (PROTOC_STATIC_LIB ${PROTOBUF_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${PROTOC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}) else () set (PROTOBUF_FOUND FALSE) @@ -81,7 +81,7 @@ if (PROTOBUF_FOUND) message (STATUS "Found the Protoc executable: ${PROTOBUF_EXECUTABLE}") else() if (_protobuf_path) - set (PROTOBUF_ERR_MSG "Could not find Protobuf. Looked in ${_protobuf_path}.") + set (PROTOBUF_ERR_MSG "Could not find Protobuf. Looked in ${_protobuf_path}") else () set (PROTOBUF_ERR_MSG "Could not find Protobuf in system search paths.") endif() @@ -100,4 +100,3 @@ mark_as_advanced ( PROTOBUF_STATIC_LIB PROTOC_STATIC_LIB ) - diff --git a/cpp/cmake_modules/FindRE2.cmake b/cpp/cmake_modules/FindRE2.cmake index ae0f182..51b093f 100644 --- a/cpp/cmake_modules/FindRE2.cmake +++ b/cpp/cmake_modules/FindRE2.cmake @@ -45,14 +45,18 @@ if (EXISTS "${_re2_path}/lib/${CMAKE_LIBRARY_ARCHITECTURE}") set (lib_dirs "lib/${CMAKE_LIBRARY_ARCHITECTURE}" ${lib_dirs}) endif () -find_library(RE2_STATIC_LIB NAMES libre2${CMAKE_STATIC_LIBRARY_SUFFIX} +set(RE2_LIB_NAME re2) +set(RE2_STATIC_LIB_NAME ${CMAKE_STATIC_LIBRARY_PREFIX}${RE2_LIB_NAME}${RE2_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}) +set(RE2_SHARED_LIB_NAME ${CMAKE_SHARED_LIBRARY_PREFIX}${RE2_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) + +find_library(RE2_STATIC_LIB NAMES ${RE2_STATIC_LIB_NAME} PATHS ${_re2_path} NO_DEFAULT_PATH PATH_SUFFIXES ${lib_dirs} DOC "Google's re2 regex static library" ) -find_library(RE2_SHARED_LIB NAMES libre2${CMAKE_SHARED_LIBRARY_SUFFIX} +find_library(RE2_SHARED_LIB NAMES ${RE2_SHARED_LIB_NAME} PATHS ${_re2_path} NO_DEFAULT_PATH PATH_SUFFIXES ${lib_dirs} diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index ce5073f..fedeed4 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -769,24 +769,22 @@ if (ARROW_WITH_RAPIDJSON) ## Flatbuffers if("${FLATBUFFERS_HOME}" STREQUAL "") - set(FLATBUFFERS_CMAKE_CXX_FLAGS ${EP_CXX_FLAGS}) set(FLATBUFFERS_PREFIX "${THIRDPARTY_PREFIX}") - if (MSVC) - set(FLATBUFFERS_CMAKE_CXX_FLAGS "/EHsc") + set(FLATBUFFERS_CMAKE_CXX_FLAGS /EHsc) + else() + set(FLATBUFFERS_CMAKE_CXX_FLAGS -fPIC) endif() - - # RELEASE build is required for `flatc` to be installed. - set(FLATBUFFERS_BUILD_TYPE RELEASE) - set(FLATBUFFERS_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} - -DCMAKE_BUILD_TYPE=${FLATBUFFERS_BUILD_TYPE} - -DCMAKE_INSTALL_PREFIX=${FLATBUFFERS_PREFIX} - -DCMAKE_CXX_FLAGS=${FLATBUFFERS_CMAKE_CXX_FLAGS} - -DFLATBUFFERS_BUILD_TESTS=OFF) - + # We always need to do release builds, otherwise flatc will not be installed. ExternalProject_Add(flatbuffers_ep URL ${FLATBUFFERS_SOURCE_URL} - CMAKE_ARGS ${FLATBUFFERS_CMAKE_ARGS} + CMAKE_ARGS + "-DCMAKE_CXX_FLAGS=${FLATBUFFERS_CMAKE_CXX_FLAGS}" + "-DCMAKE_INSTALL_PREFIX:PATH=${FLATBUFFERS_PREFIX}" + "-DFLATBUFFERS_BUILD_TESTS=OFF" + "-DCMAKE_BUILD_TYPE=RELEASE" + "-DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_CXX_FLAGS}" + "-DCMAKE_C_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_C_FLAGS}" ${EP_LOG_OPTIONS}) set(FLATBUFFERS_INCLUDE_DIR "${FLATBUFFERS_PREFIX}/include") diff --git a/cpp/src/arrow/util/bit-util-test.cc b/cpp/src/arrow/util/bit-util-test.cc index 6709ae4..6bcb6ea 100644 --- a/cpp/src/arrow/util/bit-util-test.cc +++ b/cpp/src/arrow/util/bit-util-test.cc @@ -788,6 +788,30 @@ TEST(BitUtil, CountLeadingZeros) { EXPECT_EQ(BitUtil::CountLeadingZeros(U64(ULLONG_MAX)), 0); } +TEST(BitUtil, CountTrailingZeros) { + EXPECT_EQ(BitUtil::CountTrailingZeros(U32(0)), 32); + EXPECT_EQ(BitUtil::CountTrailingZeros(U32(1) << 31), 31); + EXPECT_EQ(BitUtil::CountTrailingZeros(U32(1) << 30), 30); + EXPECT_EQ(BitUtil::CountTrailingZeros(U32(1) << 29), 29); + EXPECT_EQ(BitUtil::CountTrailingZeros(U32(1) << 28), 28); + EXPECT_EQ(BitUtil::CountTrailingZeros(U32(8)), 3); + EXPECT_EQ(BitUtil::CountTrailingZeros(U32(4)), 2); + EXPECT_EQ(BitUtil::CountTrailingZeros(U32(2)), 1); + EXPECT_EQ(BitUtil::CountTrailingZeros(U32(1)), 0); + EXPECT_EQ(BitUtil::CountTrailingZeros(U32(ULONG_MAX)), 0); + + EXPECT_EQ(BitUtil::CountTrailingZeros(U64(0)), 64); + EXPECT_EQ(BitUtil::CountTrailingZeros(U64(1) << 63), 63); + EXPECT_EQ(BitUtil::CountTrailingZeros(U64(1) << 62), 62); + EXPECT_EQ(BitUtil::CountTrailingZeros(U64(1) << 61), 61); + EXPECT_EQ(BitUtil::CountTrailingZeros(U64(1) << 60), 60); + EXPECT_EQ(BitUtil::CountTrailingZeros(U64(8)), 3); + EXPECT_EQ(BitUtil::CountTrailingZeros(U64(4)), 2); + EXPECT_EQ(BitUtil::CountTrailingZeros(U64(2)), 1); + EXPECT_EQ(BitUtil::CountTrailingZeros(U64(1)), 0); + EXPECT_EQ(BitUtil::CountTrailingZeros(U64(ULLONG_MAX)), 0); +} + #undef U32 #undef U64 diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h index 8e6979f..bfdb44f 100644 --- a/cpp/src/arrow/util/bit-util.h +++ b/cpp/src/arrow/util/bit-util.h @@ -45,6 +45,7 @@ #if defined(_MSC_VER) #include <intrin.h> #pragma intrinsic(_BitScanReverse) +#pragma intrinsic(_BitScanForward) #define ARROW_BYTE_SWAP64 _byteswap_uint64 #define ARROW_BYTE_SWAP32 _byteswap_ulong #else @@ -182,6 +183,56 @@ static inline int CountLeadingZeros(uint64_t value) { #endif } +static inline int CountTrailingZeros(uint32_t value) { +#if defined(__clang__) || defined(__GNUC__) + if (value == 0) return 32; + return static_cast<int>(__builtin_ctzl(value)); +#elif defined(_MSC_VER) + unsigned long index; // NOLINT + if (_BitScanForward(&index, value)) { + return static_cast<int>(index); + } else { + return 32; + } +#else + int bitpos = 0; + if (value) { + while (value & 1 == 0) { + value >>= 1; + ++bitpos; + } + } else { + bitpos = 32; + } + return bitpos; +#endif +} + +static inline int CountTrailingZeros(uint64_t value) { +#if defined(__clang__) || defined(__GNUC__) + if (value == 0) return 64; + return static_cast<int>(__builtin_ctzll(value)); +#elif defined(_MSC_VER) + unsigned long index; // NOLINT + if (_BitScanForward64(&index, value)) { + return static_cast<int>(index); + } else { + return 64; + } +#else + int bitpos = 0; + if (value) { + while (value & 1 == 0) { + value >>= 1; + ++bitpos; + } + } else { + bitpos = 64; + } + return bitpos; +#endif +} + // Returns the minimum number of bits needed to represent an unsigned value static inline int NumRequiredBits(uint64_t x) { return 64 - CountLeadingZeros(x); } diff --git a/cpp/src/arrow/util/parsing.h b/cpp/src/arrow/util/parsing.h index 0d8eb97..fc6ca04 100644 --- a/cpp/src/arrow/util/parsing.h +++ b/cpp/src/arrow/util/parsing.h @@ -34,7 +34,7 @@ #include "arrow/type.h" #include "arrow/type_traits.h" #include "arrow/util/checked_cast.h" -#include "arrow/vendored/datetime/date.h" +#include "arrow/vendored/datetime.h" namespace arrow { namespace internal { diff --git a/cpp/src/gandiva/function_holder.h b/cpp/src/arrow/vendored/datetime.h similarity index 68% copy from cpp/src/gandiva/function_holder.h copy to cpp/src/arrow/vendored/datetime.h index 4d007d1..424313a 100644 --- a/cpp/src/gandiva/function_holder.h +++ b/cpp/src/arrow/vendored/datetime.h @@ -15,21 +15,7 @@ // specific language governing permissions and limitations // under the License. -#ifndef GANDIVA_FUNCTION_HOLDER_H -#define GANDIVA_FUNCTION_HOLDER_H +#pragma once -#include <memory> - -namespace gandiva { - -/// Holder for a function that can be invoked from LLVM. -class FunctionHolder { - public: - virtual ~FunctionHolder() = default; -}; - -using FunctionHolderPtr = std::shared_ptr<FunctionHolder>; - -} // namespace gandiva - -#endif // GANDIVA_FUNCTION_HOLDER_H +#include "arrow/vendored/datetime/date.h" +#include "arrow/vendored/datetime/tz.h" diff --git a/cpp/src/arrow/vendored/datetime/ios.h b/cpp/src/arrow/vendored/datetime/ios.h index ec8342f..23dc167 100644 --- a/cpp/src/arrow/vendored/datetime/ios.h +++ b/cpp/src/arrow/vendored/datetime/ios.h @@ -40,10 +40,10 @@ { namespace iOSUtils { - + std::string get_tzdata_path(); std::string get_current_timezone(); - + } // namespace iOSUtils } // namespace date } // namespace util diff --git a/cpp/src/arrow/vendored/datetime/tz.cpp b/cpp/src/arrow/vendored/datetime/tz.cpp index ffea8d6..e05423e 100644 --- a/cpp/src/arrow/vendored/datetime/tz.cpp +++ b/cpp/src/arrow/vendored/datetime/tz.cpp @@ -30,6 +30,9 @@ // been invented (that would involve another several millennia of evolution). // We did not mean to shout. +// wesm: This is required so that symbols are properly exported from the DLL +#include "visibility.h" + #ifdef _WIN32 // windows.h will be included directly and indirectly (e.g. by curl). // We need to define these macros to prevent windows.h bringing in diff --git a/cpp/src/gandiva/function_holder.h b/cpp/src/arrow/vendored/datetime/visibility.h similarity index 68% copy from cpp/src/gandiva/function_holder.h copy to cpp/src/arrow/vendored/datetime/visibility.h index 4d007d1..ae03123 100644 --- a/cpp/src/gandiva/function_holder.h +++ b/cpp/src/arrow/vendored/datetime/visibility.h @@ -15,21 +15,12 @@ // specific language governing permissions and limitations // under the License. -#ifndef GANDIVA_FUNCTION_HOLDER_H -#define GANDIVA_FUNCTION_HOLDER_H +#pragma once -#include <memory> - -namespace gandiva { - -/// Holder for a function that can be invoked from LLVM. -class FunctionHolder { - public: - virtual ~FunctionHolder() = default; -}; - -using FunctionHolderPtr = std::shared_ptr<FunctionHolder>; - -} // namespace gandiva - -#endif // GANDIVA_FUNCTION_HOLDER_H +#if defined(ARROW_STATIC) +// intentially empty +#elif defined(ARROW_EXPORTING) +#define DATE_BUILD_DLL +#else +#define DATE_USE_DLL +#endif diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt index 52784e7..d5f4364 100644 --- a/cpp/src/gandiva/CMakeLists.txt +++ b/cpp/src/gandiva/CMakeLists.txt @@ -32,8 +32,8 @@ set(GANDIVA_BC_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/gandiva) set(GANDIVA_BC_FILE_NAME irhelpers.bc) -set(GANDIVA_BC_INSTALL_PATH ${GANDIVA_BC_INSTALL_DIR}/${GANDIVA_BC_FILE_NAME}) -set(GANDIVA_BC_OUTPUT_PATH ${CMAKE_CURRENT_BINARY_DIR}/${GANDIVA_BC_FILE_NAME}) +set(GANDIVA_BC_INSTALL_PATH "${GANDIVA_BC_INSTALL_DIR}/${GANDIVA_BC_FILE_NAME}") +set(GANDIVA_BC_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${GANDIVA_BC_FILE_NAME}") install(FILES ${GANDIVA_BC_OUTPUT_PATH} DESTINATION ${GANDIVA_BC_INSTALL_DIR}) @@ -95,16 +95,35 @@ if (ARROW_GANDIVA_STATIC_LIBSTDCPP -static-libgcc) endif() +# if (MSVC) +# # Symbols that need to be made public in gandiva.dll for LLVM IR +# # compilation +# set(MSVC_SYMBOL_EXPORTS _Init_thread_header) +# foreach(SYMBOL ${MSVC_SYMBOL_EXPORTS}) +# set(GANDIVA_SHARED_LINK_FLAGS "${GANDIVA_SHARED_LINK_FLAGS} /EXPORT:${SYMBOL}") +# endforeach() +# endif() + ADD_ARROW_LIB(gandiva SOURCES ${SRC_FILES} OUTPUTS GANDIVA_LIBRARIES DEPENDENCIES arrow_dependencies precompiled EXTRA_INCLUDES $<TARGET_PROPERTY:LLVM::LLVM_INTERFACE,INTERFACE_INCLUDE_DIRECTORIES> + SHARED_LINK_FLAGS ${GANDIVA_SHARED_LINK_FLAGS} SHARED_LINK_LIBS arrow_shared SHARED_PRIVATE_LINK_LIBS ${GANDIVA_SHARED_PRIVATE_LINK_LIBS} STATIC_LINK_LIBS ${GANDIVA_STATIC_LINK_LIBS}) +foreach(LIB_TARGET ${GANDIVA_LIBRARIES}) + target_compile_definitions(${LIB_TARGET} + PRIVATE GANDIVA_EXPORTING) +endforeach() + +if (ARROW_BUILD_STATIC AND WIN32) + target_compile_definitions(gandiva_static PUBLIC GANDIVA_STATIC) +endif() + add_dependencies(gandiva ${GANDIVA_LIBRARIES}) # install for gandiva diff --git a/cpp/src/gandiva/annotator.h b/cpp/src/gandiva/annotator.h index 6c2cd05..c0ddc02 100644 --- a/cpp/src/gandiva/annotator.h +++ b/cpp/src/gandiva/annotator.h @@ -27,12 +27,13 @@ #include "gandiva/eval_batch.h" #include "gandiva/gandiva_aliases.h" #include "gandiva/logging.h" +#include "gandiva/visibility.h" namespace gandiva { /// \brief annotate the arrow fields in an expression, and use that /// to convert the incoming arrow-format row batch to an EvalBatch. -class Annotator { +class GANDIVA_EXPORT Annotator { public: Annotator() : buffer_count_(0), local_bitmap_count_(0) {} diff --git a/cpp/src/gandiva/bitmap_accumulator.h b/cpp/src/gandiva/bitmap_accumulator.h index 157405d..15a2044 100644 --- a/cpp/src/gandiva/bitmap_accumulator.h +++ b/cpp/src/gandiva/bitmap_accumulator.h @@ -24,12 +24,13 @@ #include "gandiva/dex.h" #include "gandiva/dex_visitor.h" #include "gandiva/eval_batch.h" +#include "gandiva/visibility.h" namespace gandiva { /// \brief Extract bitmap buffer from either the input/buffer vectors or the /// local validity bitmap, and accumultes them to do the final computation. -class BitMapAccumulator : public DexDefaultVisitor { +class GANDIVA_EXPORT BitMapAccumulator : public DexDefaultVisitor { public: explicit BitMapAccumulator(const EvalBatch& eval_batch) : eval_batch_(eval_batch), all_invalid_(false) {} diff --git a/cpp/src/gandiva/bitmap_accumulator_test.cc b/cpp/src/gandiva/bitmap_accumulator_test.cc index 53e8aac..51a8b09 100644 --- a/cpp/src/gandiva/bitmap_accumulator_test.cc +++ b/cpp/src/gandiva/bitmap_accumulator_test.cc @@ -21,24 +21,22 @@ #include <vector> #include <gtest/gtest.h> + +#include "arrow/test-util.h" + #include "gandiva/dex.h" namespace gandiva { class TestBitMapAccumulator : public ::testing::Test { protected: - void FillBitMap(uint8_t* bmap, int nrecords); + void FillBitMap(uint8_t* bmap, uint32_t seed, int nrecords); void ByteWiseIntersectBitMaps(uint8_t* dst, const std::vector<uint8_t*>& srcs, int nrecords); }; -void TestBitMapAccumulator::FillBitMap(uint8_t* bmap, int nbytes) { - unsigned int cur = 0; - - for (int i = 0; i < nbytes; ++i) { - rand_r(&cur); - bmap[i] = static_cast<uint8_t>(cur % UINT8_MAX); - } +void TestBitMapAccumulator::FillBitMap(uint8_t* bmap, uint32_t seed, int nbytes) { + ::arrow::random_bytes(nbytes, seed, bmap); } void TestBitMapAccumulator::ByteWiseIntersectBitMaps(uint8_t* dst, @@ -61,7 +59,7 @@ TEST_F(TestBitMapAccumulator, TestIntersectBitMaps) { uint8_t expected_bitmap[length]; for (int i = 0; i < 4; i++) { - FillBitMap(src_bitmaps[i], length); + FillBitMap(src_bitmaps[i], i, length); } for (int i = 0; i < 4; i++) { diff --git a/cpp/src/gandiva/cast_time.cc b/cpp/src/gandiva/cast_time.cc index ee3fd31..1d4293b 100644 --- a/cpp/src/gandiva/cast_time.cc +++ b/cpp/src/gandiva/cast_time.cc @@ -15,7 +15,10 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/vendored/datetime/tz.h" +#include <cstdint> + +#include "arrow/vendored/datetime.h" + #include "gandiva/precompiled/time_fields.h" #ifndef GANDIVA_UNIT_TEST diff --git a/cpp/src/gandiva/compiled_expr.h b/cpp/src/gandiva/compiled_expr.h index 2f23971..b7799f1 100644 --- a/cpp/src/gandiva/compiled_expr.h +++ b/cpp/src/gandiva/compiled_expr.h @@ -18,7 +18,7 @@ #ifndef GANDIVA_COMPILED_EXPR_H #define GANDIVA_COMPILED_EXPR_H -#include <llvm/IR/IRBuilder.h> +#include "gandiva/llvm_includes.h" #include "gandiva/value_validity_pair.h" namespace gandiva { diff --git a/cpp/src/gandiva/configuration.h b/cpp/src/gandiva/configuration.h index 04e2eed..480a95e 100644 --- a/cpp/src/gandiva/configuration.h +++ b/cpp/src/gandiva/configuration.h @@ -15,16 +15,18 @@ // specific language governing permissions and limitations // under the License. -#ifndef GANDIVA_CONFIGURATION_H -#define GANDIVA_CONFIGURATION_H +#pragma once #include <memory> #include <string> #include "arrow/status.h" +#include "gandiva/visibility.h" + namespace gandiva { +GANDIVA_EXPORT extern const char kByteCodeFilePath[]; class ConfigurationBuilder; @@ -32,7 +34,7 @@ class ConfigurationBuilder; /// /// It contains elements to customize gandiva execution /// at run time. -class Configuration { +class GANDIVA_EXPORT Configuration { public: friend class ConfigurationBuilder; @@ -53,7 +55,7 @@ class Configuration { /// /// Provides a default configuration and convenience methods /// to override specific values and build a custom instance -class ConfigurationBuilder { +class GANDIVA_EXPORT ConfigurationBuilder { public: ConfigurationBuilder() : byte_code_file_path_(kByteCodeFilePath) {} @@ -83,4 +85,3 @@ class ConfigurationBuilder { }; } // namespace gandiva -#endif // GANDIVA_CONFIGURATION_H diff --git a/cpp/src/gandiva/date_utils.cc b/cpp/src/gandiva/date_utils.cc index 8a7e1f0..f0a80d3 100644 --- a/cpp/src/gandiva/date_utils.cc +++ b/cpp/src/gandiva/date_utils.cc @@ -16,6 +16,7 @@ // under the License. #include <algorithm> +#include <cstdint> #include <memory> #include <sstream> #include <vector> @@ -57,7 +58,7 @@ Status DateUtils::ToInternalFormat(const std::string& format, std::stringstream buffer; bool is_in_quoted_text = false; - for (uint i = 0; i < format.length(); i++) { + for (size_t i = 0; i < format.size(); i++) { char currentChar = format[i]; // logic before we append to the buffer diff --git a/cpp/src/gandiva/date_utils.h b/cpp/src/gandiva/date_utils.h index 64a150b..e87203b 100644 --- a/cpp/src/gandiva/date_utils.h +++ b/cpp/src/gandiva/date_utils.h @@ -23,12 +23,22 @@ #include <unordered_map> #include <vector> +#if defined(_MSC_VER) +#include <ctime> +#include <iomanip> +#include <sstream> +#endif + +#include "arrow/util/macros.h" +#include "arrow/vendored/datetime.h" + #include "gandiva/arrow.h" +#include "gandiva/visibility.h" namespace gandiva { /// \brief Utility class for converting sql date patterns to internal date patterns. -class DateUtils { +class GANDIVA_EXPORT DateUtils { public: static Status ToInternalFormat(const std::string& format, std::shared_ptr<std::string>* internal_format); @@ -47,6 +57,55 @@ class DateUtils { static std::vector<std::string> GetExactMatches(const std::string& pattern); }; +namespace internal { + +/// \brief Returns seconds since the UNIX epoch +static inline bool ParseTimestamp(const char* buf, const char* format, + bool ignoreTimeInDay, int64_t* out) { +#if defined(_MSC_VER) + static std::locale lc_all(setlocale(LC_ALL, NULLPTR)); + std::istringstream stream(buf); + stream.imbue(lc_all); + + // TODO: date::parse fails parsing when the hour value is 0. + // eg.1886-12-01 00:00:00 + arrow::util::date::sys_seconds seconds; + if (ignoreTimeInDay) { + arrow::util::date::sys_days days; + stream >> arrow::util::date::parse(format, days); + if (stream.fail()) { + return false; + } + seconds = days; + } else { + stream >> arrow::util::date::parse(format, seconds); + if (stream.fail()) { + return false; + } + } + auto seconds_in_epoch = seconds.time_since_epoch().count(); + *out = seconds_in_epoch; + return true; +#else + struct tm result; + char* ret = strptime(buf, format, &result); + if (ret == NULLPTR) { + return false; + } + // ignore the time part + arrow::util::date::sys_seconds secs = + arrow::util::date::sys_days(arrow::util::date::year(result.tm_year + 1900) / + (result.tm_mon + 1) / result.tm_mday); + if (!ignoreTimeInDay) { + secs += (std::chrono::hours(result.tm_hour) + std::chrono::minutes(result.tm_min) + + std::chrono::seconds(result.tm_sec)); + } + *out = secs.time_since_epoch().count(); + return true; +#endif +} + +} // namespace internal } // namespace gandiva #endif // TO_DATE_HELPER_H diff --git a/cpp/src/gandiva/decimal_type_util.cc b/cpp/src/gandiva/decimal_type_util.cc index 2795e91..74c9326 100644 --- a/cpp/src/gandiva/decimal_type_util.cc +++ b/cpp/src/gandiva/decimal_type_util.cc @@ -20,11 +20,6 @@ namespace gandiva { -constexpr int32_t DecimalTypeUtil::kMaxDecimal32Precision; -constexpr int32_t DecimalTypeUtil::kMaxDecimal64Precision; -constexpr int32_t DecimalTypeUtil::kMaxPrecision; - -constexpr int32_t DecimalTypeUtil::kMaxScale; constexpr int32_t DecimalTypeUtil::kMinAdjustedScale; #define DCHECK_TYPE(type) \ diff --git a/cpp/src/gandiva/decimal_type_util.h b/cpp/src/gandiva/decimal_type_util.h index 2c095c1..aa3c255 100644 --- a/cpp/src/gandiva/decimal_type_util.h +++ b/cpp/src/gandiva/decimal_type_util.h @@ -24,12 +24,13 @@ #include <memory> #include "gandiva/arrow.h" +#include "gandiva/visibility.h" namespace gandiva { /// @brief Handles conversion of scale/precision for operations on decimal types. /// TODO : do validations for all of these. -class DecimalTypeUtil { +class GANDIVA_EXPORT DecimalTypeUtil { public: enum Op { kOpAdd, @@ -65,7 +66,16 @@ class DecimalTypeUtil { static Decimal128TypePtr MakeType(int32_t precision, int32_t scale); private: - static Decimal128TypePtr MakeAdjustedType(int32_t precision, int32_t scale); + // Reduce the scale if possible so that precision stays <= kMaxPrecision + static Decimal128TypePtr MakeAdjustedType(int32_t precision, int32_t scale) { + if (precision > kMaxPrecision) { + int32_t min_scale = std::min(scale, kMinAdjustedScale); + int32_t delta = precision - kMaxPrecision; + precision = kMaxPrecision; + scale = std::max(scale - delta, min_scale); + } + return MakeType(precision, scale); + } }; inline Decimal128TypePtr DecimalTypeUtil::MakeType(int32_t precision, int32_t scale) { @@ -73,18 +83,6 @@ inline Decimal128TypePtr DecimalTypeUtil::MakeType(int32_t precision, int32_t sc arrow::decimal(precision, scale)); } -// Reduce the scale if possible so that precision stays <= kMaxPrecision -inline Decimal128TypePtr DecimalTypeUtil::MakeAdjustedType(int32_t precision, - int32_t scale) { - if (precision > kMaxPrecision) { - int32_t min_scale = std::min(scale, kMinAdjustedScale); - int32_t delta = precision - kMaxPrecision; - precision = kMaxPrecision; - scale = std::max(scale - delta, min_scale); - } - return MakeType(precision, scale); -} - } // namespace gandiva #endif // GANDIVA_DECIMAL_TYPE_SQL_H diff --git a/cpp/src/gandiva/dex.h b/cpp/src/gandiva/dex.h index afce44e..894d961 100644 --- a/cpp/src/gandiva/dex.h +++ b/cpp/src/gandiva/dex.h @@ -32,11 +32,12 @@ #include "gandiva/literal_holder.h" #include "gandiva/native_function.h" #include "gandiva/value_validity_pair.h" +#include "gandiva/visibility.h" namespace gandiva { /// \brief Decomposed expression : the validity and value are separated. -class Dex { +class GANDIVA_EXPORT Dex { public: /// Derived classes should simply invoke the Visit api of the visitor. virtual void Accept(DexVisitor& visitor) = 0; @@ -44,7 +45,7 @@ class Dex { }; /// Base class for other Vector related Dex. -class VectorReadBaseDex : public Dex { +class GANDIVA_EXPORT VectorReadBaseDex : public Dex { public: explicit VectorReadBaseDex(FieldDescriptorPtr field_desc) : field_desc_(field_desc) {} @@ -59,7 +60,7 @@ class VectorReadBaseDex : public Dex { }; /// validity component of a ValueVector -class VectorReadValidityDex : public VectorReadBaseDex { +class GANDIVA_EXPORT VectorReadValidityDex : public VectorReadBaseDex { public: explicit VectorReadValidityDex(FieldDescriptorPtr field_desc) : VectorReadBaseDex(field_desc) {} @@ -70,7 +71,7 @@ class VectorReadValidityDex : public VectorReadBaseDex { }; /// value component of a fixed-len ValueVector -class VectorReadFixedLenValueDex : public VectorReadBaseDex { +class GANDIVA_EXPORT VectorReadFixedLenValueDex : public VectorReadBaseDex { public: explicit VectorReadFixedLenValueDex(FieldDescriptorPtr field_desc) : VectorReadBaseDex(field_desc) {} @@ -81,7 +82,7 @@ class VectorReadFixedLenValueDex : public VectorReadBaseDex { }; /// value component of a variable-len ValueVector -class VectorReadVarLenValueDex : public VectorReadBaseDex { +class GANDIVA_EXPORT VectorReadVarLenValueDex : public VectorReadBaseDex { public: explicit VectorReadVarLenValueDex(FieldDescriptorPtr field_desc) : VectorReadBaseDex(field_desc) {} @@ -94,7 +95,7 @@ class VectorReadVarLenValueDex : public VectorReadBaseDex { }; /// validity based on a local bitmap. -class LocalBitMapValidityDex : public Dex { +class GANDIVA_EXPORT LocalBitMapValidityDex : public Dex { public: explicit LocalBitMapValidityDex(int local_bitmap_idx) : local_bitmap_idx_(local_bitmap_idx) {} @@ -108,7 +109,7 @@ class LocalBitMapValidityDex : public Dex { }; /// base function expression -class FuncDex : public Dex { +class GANDIVA_EXPORT FuncDex : public Dex { public: FuncDex(FuncDescriptorPtr func_descriptor, const NativeFunction* native_function, FunctionHolderPtr function_holder, const ValueValidityPairVector& args) @@ -134,7 +135,7 @@ class FuncDex : public Dex { /// A function expression that only deals with non-null inputs, and generates non-null /// outputs. -class NonNullableFuncDex : public FuncDex { +class GANDIVA_EXPORT NonNullableFuncDex : public FuncDex { public: NonNullableFuncDex(FuncDescriptorPtr func_descriptor, const NativeFunction* native_function, @@ -147,7 +148,7 @@ class NonNullableFuncDex : public FuncDex { /// A function expression that deals with nullable inputs, but generates non-null /// outputs. -class NullableNeverFuncDex : public FuncDex { +class GANDIVA_EXPORT NullableNeverFuncDex : public FuncDex { public: NullableNeverFuncDex(FuncDescriptorPtr func_descriptor, const NativeFunction* native_function, @@ -160,7 +161,7 @@ class NullableNeverFuncDex : public FuncDex { /// A function expression that deals with nullable inputs, and /// nullable outputs. -class NullableInternalFuncDex : public FuncDex { +class GANDIVA_EXPORT NullableInternalFuncDex : public FuncDex { public: NullableInternalFuncDex(FuncDescriptorPtr func_descriptor, const NativeFunction* native_function, @@ -179,17 +180,17 @@ class NullableInternalFuncDex : public FuncDex { }; /// special validity type that always returns true. -class TrueDex : public Dex { +class GANDIVA_EXPORT TrueDex : public Dex { void Accept(DexVisitor& visitor) override { visitor.Visit(*this); } }; /// special validity type that always returns false. -class FalseDex : public Dex { +class GANDIVA_EXPORT FalseDex : public Dex { void Accept(DexVisitor& visitor) override { visitor.Visit(*this); } }; /// decomposed expression for a literal. -class LiteralDex : public Dex { +class GANDIVA_EXPORT LiteralDex : public Dex { public: LiteralDex(DataTypePtr type, const LiteralHolder& holder) : type_(type), holder_(holder) {} @@ -206,7 +207,7 @@ class LiteralDex : public Dex { }; /// decomposed if-else expression. -class IfDex : public Dex { +class GANDIVA_EXPORT IfDex : public Dex { public: IfDex(ValueValidityPairPtr condition_vv, ValueValidityPairPtr then_vv, ValueValidityPairPtr else_vv, DataTypePtr result_type, int local_bitmap_idx, @@ -242,7 +243,7 @@ class IfDex : public Dex { }; // decomposed boolean expression. -class BooleanDex : public Dex { +class GANDIVA_EXPORT BooleanDex : public Dex { public: BooleanDex(const ValueValidityPairVector& args, int local_bitmap_idx) : args_(args), local_bitmap_idx_(local_bitmap_idx) {} @@ -258,7 +259,7 @@ class BooleanDex : public Dex { }; /// Boolean-AND expression -class BooleanAndDex : public BooleanDex { +class GANDIVA_EXPORT BooleanAndDex : public BooleanDex { public: BooleanAndDex(const ValueValidityPairVector& args, int local_bitmap_idx) : BooleanDex(args, local_bitmap_idx) {} @@ -267,7 +268,7 @@ class BooleanAndDex : public BooleanDex { }; /// Boolean-OR expression -class BooleanOrDex : public BooleanDex { +class GANDIVA_EXPORT BooleanOrDex : public BooleanDex { public: BooleanOrDex(const ValueValidityPairVector& args, int local_bitmap_idx) : BooleanDex(args, local_bitmap_idx) {} diff --git a/cpp/src/gandiva/dex_visitor.h b/cpp/src/gandiva/dex_visitor.h index 456fe43..c34629a 100644 --- a/cpp/src/gandiva/dex_visitor.h +++ b/cpp/src/gandiva/dex_visitor.h @@ -21,6 +21,7 @@ #include <string> #include "gandiva/logging.h" +#include "gandiva/visibility.h" namespace gandiva { @@ -41,7 +42,7 @@ template <typename Type> class InExprDexBase; /// \brief Visitor for decomposed expression. -class DexVisitor { +class GANDIVA_EXPORT DexVisitor { public: virtual ~DexVisitor() = default; @@ -67,7 +68,7 @@ class DexVisitor { #define VISIT_DCHECK(DEX_CLASS) \ void Visit(const DEX_CLASS& dex) override { DCHECK(0); } -class DexDefaultVisitor : public DexVisitor { +class GANDIVA_EXPORT DexDefaultVisitor : public DexVisitor { VISIT_DCHECK(VectorReadValidityDex) VISIT_DCHECK(VectorReadFixedLenValueDex) VISIT_DCHECK(VectorReadVarLenValueDex) diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index 9aaafea..d073a3e 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -23,6 +23,15 @@ #include <unordered_set> #include <utility> +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4141) +#pragma warning(disable : 4146) +#pragma warning(disable : 4244) +#pragma warning(disable : 4267) +#pragma warning(disable : 4624) +#endif + #include <llvm/Analysis/Passes.h> #include <llvm/Analysis/TargetTransformInfo.h> #include <llvm/Bitcode/BitcodeReader.h> @@ -39,6 +48,11 @@ #include <llvm/Transforms/Scalar.h> #include <llvm/Transforms/Scalar/GVN.h> #include <llvm/Transforms/Vectorize.h> + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + #include "gandiva/decimal_ir.h" #include "gandiva/exported_funcs_registry.h" diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h index 1248014..7a976d5 100644 --- a/cpp/src/gandiva/engine.h +++ b/cpp/src/gandiva/engine.h @@ -23,24 +23,21 @@ #include <string> #include <vector> -#include <llvm/ExecutionEngine/ExecutionEngine.h> -#include <llvm/IR/IRBuilder.h> -#include <llvm/IR/LLVMContext.h> -#include <llvm/IR/Module.h> - #include "arrow/status.h" #include "arrow/util/macros.h" #include "gandiva/configuration.h" +#include "gandiva/llvm_includes.h" #include "gandiva/llvm_types.h" #include "gandiva/logging.h" +#include "gandiva/visibility.h" namespace gandiva { class FunctionIRBuilder; /// \brief LLVM Execution engine wrapper. -class Engine { +class GANDIVA_EXPORT Engine { public: llvm::LLVMContext* context() { return context_.get(); } llvm::IRBuilder<>* ir_builder() { return ir_builder_.get(); } diff --git a/cpp/src/gandiva/expr_decomposer.cc b/cpp/src/gandiva/expr_decomposer.cc index bed84ed..91014f1 100644 --- a/cpp/src/gandiva/expr_decomposer.cc +++ b/cpp/src/gandiva/expr_decomposer.cc @@ -232,7 +232,7 @@ int ExprDecomposer::PushThenEntry(const IfNode& node) { // push new entry to the stack. std::unique_ptr<IfStackEntry> entry(new IfStackEntry( node, kStackEntryThen, false /*is_terminal_else*/, local_bitmap_idx)); - if_entries_stack_.push(std::move(entry)); + if_entries_stack_.emplace(std::move(entry)); return local_bitmap_idx; } @@ -250,7 +250,7 @@ void ExprDecomposer::PopThenEntry(const IfNode& node) { void ExprDecomposer::PushElseEntry(const IfNode& node, int local_bitmap_idx) { std::unique_ptr<IfStackEntry> entry(new IfStackEntry( node, kStackEntryElse, true /*is_terminal_else*/, local_bitmap_idx)); - if_entries_stack_.push(std::move(entry)); + if_entries_stack_.emplace(std::move(entry)); } bool ExprDecomposer::PopElseEntry(const IfNode& node) { @@ -268,7 +268,7 @@ bool ExprDecomposer::PopElseEntry(const IfNode& node) { void ExprDecomposer::PushConditionEntry(const IfNode& node) { std::unique_ptr<IfStackEntry> entry(new IfStackEntry(node, kStackEntryCondition)); - if_entries_stack_.push(std::move(entry)); + if_entries_stack_.emplace(std::move(entry)); } void ExprDecomposer::PopConditionEntry(const IfNode& node) { diff --git a/cpp/src/gandiva/expr_decomposer.h b/cpp/src/gandiva/expr_decomposer.h index bc21ed0..ab92ca3 100644 --- a/cpp/src/gandiva/expr_decomposer.h +++ b/cpp/src/gandiva/expr_decomposer.h @@ -27,6 +27,7 @@ #include "gandiva/expression.h" #include "gandiva/node.h" #include "gandiva/node_visitor.h" +#include "gandiva/visibility.h" namespace gandiva { @@ -35,7 +36,7 @@ class Annotator; /// \brief Decomposes an expression tree to seperate out the validity and /// value expressions. -class ExprDecomposer : public NodeVisitor { +class GANDIVA_EXPORT ExprDecomposer : public NodeVisitor { public: explicit ExprDecomposer(const FunctionRegistry& registry, Annotator& annotator) : registry_(registry), annotator_(annotator) {} @@ -49,6 +50,8 @@ class ExprDecomposer : public NodeVisitor { } private: + ARROW_DISALLOW_COPY_AND_ASSIGN(ExprDecomposer); + FRIEND_TEST(TestExprDecomposer, TestStackSimple); FRIEND_TEST(TestExprDecomposer, TestNested); FRIEND_TEST(TestExprDecomposer, TestInternalIf); @@ -83,6 +86,9 @@ class ExprDecomposer : public NodeVisitor { StackEntryType entry_type_; bool is_terminal_else_; int local_bitmap_idx_; + + private: + ARROW_DISALLOW_COPY_AND_ASSIGN(IfStackEntry); }; // pop 'condition entry' into stack. diff --git a/cpp/src/gandiva/expression.h b/cpp/src/gandiva/expression.h index e3ae18f..2141e87 100644 --- a/cpp/src/gandiva/expression.h +++ b/cpp/src/gandiva/expression.h @@ -22,11 +22,12 @@ #include "gandiva/arrow.h" #include "gandiva/gandiva_aliases.h" +#include "gandiva/visibility.h" namespace gandiva { /// \brief An expression tree with a root node, and a result field. -class Expression { +class GANDIVA_EXPORT Expression { public: Expression(const NodePtr root, const FieldPtr result) : root_(root), result_(result) {} diff --git a/cpp/src/gandiva/expression_registry.h b/cpp/src/gandiva/expression_registry.h index a03deab..4524a07 100644 --- a/cpp/src/gandiva/expression_registry.h +++ b/cpp/src/gandiva/expression_registry.h @@ -24,6 +24,7 @@ #include "gandiva/arrow.h" #include "gandiva/function_signature.h" #include "gandiva/gandiva_aliases.h" +#include "gandiva/visibility.h" namespace gandiva { @@ -33,13 +34,13 @@ class FunctionRegistry; /// /// Has helper methods for clients to programatically discover /// data types and functions supported by Gandiva. -class ExpressionRegistry { +class GANDIVA_EXPORT ExpressionRegistry { public: using iterator = const NativeFunction*; ExpressionRegistry(); ~ExpressionRegistry(); static DataTypeVector supported_types() { return supported_types_; } - class FunctionSignatureIterator { + class GANDIVA_EXPORT FunctionSignatureIterator { public: explicit FunctionSignatureIterator(iterator it) : it_(it) {} diff --git a/cpp/src/gandiva/filter.cc b/cpp/src/gandiva/filter.cc index 6075e25..3bba190 100644 --- a/cpp/src/gandiva/filter.cc +++ b/cpp/src/gandiva/filter.cc @@ -37,6 +37,8 @@ Filter::Filter(std::unique_ptr<LLVMGenerator> llvm_generator, SchemaPtr schema, schema_(schema), configuration_(configuration) {} +Filter::~Filter() {} + Status Filter::Make(SchemaPtr schema, ConditionPtr condition, std::shared_ptr<Configuration> configuration, std::shared_ptr<Filter>* filter) { diff --git a/cpp/src/gandiva/filter.h b/cpp/src/gandiva/filter.h index 6ff7010..4fbda80 100644 --- a/cpp/src/gandiva/filter.h +++ b/cpp/src/gandiva/filter.h @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -#ifndef GANDIVA_EXPR_FILTER_H -#define GANDIVA_EXPR_FILTER_H +#pragma once #include <memory> #include <string> @@ -29,6 +28,7 @@ #include "gandiva/condition.h" #include "gandiva/configuration.h" #include "gandiva/selection_vector.h" +#include "gandiva/visibility.h" namespace gandiva { @@ -38,12 +38,14 @@ class LLVMGenerator; /// /// A filter is built for a specific schema and condition. Once the filter is built, it /// can be used to evaluate many row batches. -class Filter { +class GANDIVA_EXPORT Filter { public: Filter(std::unique_ptr<LLVMGenerator> llvm_generator, SchemaPtr schema, std::shared_ptr<Configuration> config); - ~Filter() = default; + // Inline dtor will attempt to resolve the destructor for + // LLVMGenerator on MSVC, so we compile the dtor in the object code + ~Filter(); /// Build a filter for the given schema and condition, with the default configuration. /// @@ -81,5 +83,3 @@ class Filter { }; } // namespace gandiva - -#endif // GANDIVA_EXPR_FILTER_H diff --git a/cpp/src/gandiva/func_descriptor.h b/cpp/src/gandiva/func_descriptor.h index 9b18a9b..08f7199 100644 --- a/cpp/src/gandiva/func_descriptor.h +++ b/cpp/src/gandiva/func_descriptor.h @@ -22,11 +22,12 @@ #include <vector> #include "gandiva/arrow.h" +#include "gandiva/visibility.h" namespace gandiva { /// Descriptor for a function in the expression. -class FuncDescriptor { +class GANDIVA_EXPORT FuncDescriptor { public: FuncDescriptor(const std::string& name, const DataTypeVector& params, DataTypePtr return_type) diff --git a/cpp/src/gandiva/function_holder.h b/cpp/src/gandiva/function_holder.h index 4d007d1..43dbeac 100644 --- a/cpp/src/gandiva/function_holder.h +++ b/cpp/src/gandiva/function_holder.h @@ -20,10 +20,12 @@ #include <memory> +#include "gandiva/visibility.h" + namespace gandiva { /// Holder for a function that can be invoked from LLVM. -class FunctionHolder { +class GANDIVA_EXPORT FunctionHolder { public: virtual ~FunctionHolder() = default; }; diff --git a/cpp/src/gandiva/function_registry.h b/cpp/src/gandiva/function_registry.h index 810bf2d..f7aa3de 100644 --- a/cpp/src/gandiva/function_registry.h +++ b/cpp/src/gandiva/function_registry.h @@ -22,11 +22,12 @@ #include "gandiva/function_registry_common.h" #include "gandiva/gandiva_aliases.h" #include "gandiva/native_function.h" +#include "gandiva/visibility.h" namespace gandiva { ///\brief Registry of pre-compiled IR functions. -class FunctionRegistry { +class GANDIVA_EXPORT FunctionRegistry { public: using iterator = const NativeFunction*; diff --git a/cpp/src/gandiva/function_signature.h b/cpp/src/gandiva/function_signature.h index ee82abc..a5015ce 100644 --- a/cpp/src/gandiva/function_signature.h +++ b/cpp/src/gandiva/function_signature.h @@ -24,12 +24,13 @@ #include "gandiva/arrow.h" #include "gandiva/logging.h" +#include "gandiva/visibility.h" namespace gandiva { /// \brief Signature for a function : includes the base name, input param types and /// output types. -class FunctionSignature { +class GANDIVA_EXPORT FunctionSignature { public: FunctionSignature(const std::string& base_name, const DataTypeVector& param_types, DataTypePtr ret_type) diff --git a/cpp/src/gandiva/like_holder.h b/cpp/src/gandiva/like_holder.h index 23ed367..eab30bf 100644 --- a/cpp/src/gandiva/like_holder.h +++ b/cpp/src/gandiva/like_holder.h @@ -22,14 +22,17 @@ #include <string> #include <re2/re2.h> + #include "arrow/status.h" + #include "gandiva/function_holder.h" #include "gandiva/node.h" +#include "gandiva/visibility.h" namespace gandiva { /// Function Holder for SQL 'like' -class LikeHolder : public FunctionHolder { +class GANDIVA_EXPORT LikeHolder : public FunctionHolder { public: ~LikeHolder() override = default; diff --git a/cpp/src/gandiva/llvm_generator.h b/cpp/src/gandiva/llvm_generator.h index 937e5ac..2c1d5c1 100644 --- a/cpp/src/gandiva/llvm_generator.h +++ b/cpp/src/gandiva/llvm_generator.h @@ -36,13 +36,14 @@ #include "gandiva/llvm_types.h" #include "gandiva/lvalue.h" #include "gandiva/value_validity_pair.h" +#include "gandiva/visibility.h" namespace gandiva { class FunctionHolder; /// Builds an LLVM module and generates code for the specified set of expressions. -class LLVMGenerator { +class GANDIVA_EXPORT LLVMGenerator { public: /// \brief Factory method to initialize the generator. static Status Make(std::shared_ptr<Configuration> config, diff --git a/cpp/src/gandiva/function_holder.h b/cpp/src/gandiva/llvm_includes.h similarity index 64% copy from cpp/src/gandiva/function_holder.h copy to cpp/src/gandiva/llvm_includes.h index 4d007d1..9de1f45 100644 --- a/cpp/src/gandiva/function_holder.h +++ b/cpp/src/gandiva/llvm_includes.h @@ -15,21 +15,23 @@ // specific language governing permissions and limitations // under the License. -#ifndef GANDIVA_FUNCTION_HOLDER_H -#define GANDIVA_FUNCTION_HOLDER_H +#pragma once -#include <memory> +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4141) +#pragma warning(disable : 4146) +#pragma warning(disable : 4244) +#pragma warning(disable : 4267) +#pragma warning(disable : 4291) +#pragma warning(disable : 4624) +#endif -namespace gandiva { +#include <llvm/ExecutionEngine/ExecutionEngine.h> +#include <llvm/IR/IRBuilder.h> +#include <llvm/IR/LLVMContext.h> +#include <llvm/IR/Module.h> -/// Holder for a function that can be invoked from LLVM. -class FunctionHolder { - public: - virtual ~FunctionHolder() = default; -}; - -using FunctionHolderPtr = std::shared_ptr<FunctionHolder>; - -} // namespace gandiva - -#endif // GANDIVA_FUNCTION_HOLDER_H +#if defined(_MSC_VER) +#pragma warning(pop) +#endif diff --git a/cpp/src/gandiva/llvm_types.h b/cpp/src/gandiva/llvm_types.h index 9cf4dd5..2629d32 100644 --- a/cpp/src/gandiva/llvm_types.h +++ b/cpp/src/gandiva/llvm_types.h @@ -21,15 +21,15 @@ #include <map> #include <vector> -#include <llvm/IR/IRBuilder.h> -#include <llvm/IR/LLVMContext.h> #include "gandiva/arrow.h" +#include "gandiva/llvm_includes.h" #include "gandiva/logging.h" +#include "gandiva/visibility.h" namespace gandiva { /// \brief Holder for llvm types, and mappings between arrow types and llvm types. -class LLVMTypes { +class GANDIVA_EXPORT LLVMTypes { public: explicit LLVMTypes(llvm::LLVMContext& context); diff --git a/cpp/src/gandiva/lvalue.h b/cpp/src/gandiva/lvalue.h index ce5040f..6c9814c 100644 --- a/cpp/src/gandiva/lvalue.h +++ b/cpp/src/gandiva/lvalue.h @@ -20,14 +20,15 @@ #include <vector> -#include <llvm/IR/IRBuilder.h> #include "arrow/util/macros.h" + +#include "gandiva/llvm_includes.h" #include "gandiva/logging.h" namespace gandiva { /// \brief Tracks validity/value builders in LLVM. -class LValue { +class GANDIVA_EXPORT LValue { public: explicit LValue(llvm::Value* data, llvm::Value* length = NULLPTR, llvm::Value* validity = NULLPTR) @@ -54,7 +55,7 @@ class LValue { llvm::Value* validity_; }; -class DecimalLValue : public LValue { +class GANDIVA_EXPORT DecimalLValue : public LValue { public: DecimalLValue(llvm::Value* data, llvm::Value* validity, llvm::Value* precision, llvm::Value* scale) diff --git a/cpp/src/gandiva/native_function.h b/cpp/src/gandiva/native_function.h index 5b130a9..82714c7 100644 --- a/cpp/src/gandiva/native_function.h +++ b/cpp/src/gandiva/native_function.h @@ -23,6 +23,7 @@ #include <vector> #include "gandiva/function_signature.h" +#include "gandiva/visibility.h" namespace gandiva { @@ -37,7 +38,7 @@ enum ResultNullableType { /// \brief Holder for the mapping from a function in an expression to a /// precompiled function. -class NativeFunction { +class GANDIVA_EXPORT NativeFunction { public: // fucntion attributes. static constexpr int32_t kNeedsContext = (1 << 1); diff --git a/cpp/src/gandiva/node.h b/cpp/src/gandiva/node.h index 77cde68..ca51123 100644 --- a/cpp/src/gandiva/node.h +++ b/cpp/src/gandiva/node.h @@ -30,12 +30,13 @@ #include "gandiva/gandiva_aliases.h" #include "gandiva/literal_holder.h" #include "gandiva/node_visitor.h" +#include "gandiva/visibility.h" namespace gandiva { /// \brief Represents a node in the expression tree. Validity and value are /// in a joined state. -class Node { +class GANDIVA_EXPORT Node { public: explicit Node(DataTypePtr return_type) : return_type_(return_type) {} @@ -53,7 +54,7 @@ class Node { }; /// \brief Node in the expression tree, representing a literal. -class LiteralNode : public Node { +class GANDIVA_EXPORT LiteralNode : public Node { public: LiteralNode(DataTypePtr type, const LiteralHolder& holder, bool is_null) : Node(type), holder_(holder), is_null_(is_null) {} @@ -95,7 +96,7 @@ class LiteralNode : public Node { }; /// \brief Node in the expression tree, representing an arrow field. -class FieldNode : public Node { +class GANDIVA_EXPORT FieldNode : public Node { public: explicit FieldNode(FieldPtr field) : Node(field->type()), field_(field) {} @@ -112,7 +113,7 @@ class FieldNode : public Node { }; /// \brief Node in the expression tree, representing a function. -class FunctionNode : public Node { +class GANDIVA_EXPORT FunctionNode : public Node { public: FunctionNode(const std::string& name, const NodeVector& children, DataTypePtr retType); @@ -154,7 +155,7 @@ inline FunctionNode::FunctionNode(const std::string& name, const NodeVector& chi } /// \brief Node in the expression tree, representing an if-else expression. -class IfNode : public Node { +class GANDIVA_EXPORT IfNode : public Node { public: IfNode(NodePtr condition, NodePtr then_node, NodePtr else_node, DataTypePtr result_type) : Node(result_type), @@ -183,7 +184,7 @@ class IfNode : public Node { }; /// \brief Node in the expression tree, representing an and/or boolean expression. -class BooleanNode : public Node { +class GANDIVA_EXPORT BooleanNode : public Node { public: enum ExprType : char { AND, OR }; diff --git a/cpp/src/gandiva/node_visitor.h b/cpp/src/gandiva/node_visitor.h index ba3645a..27d0564 100644 --- a/cpp/src/gandiva/node_visitor.h +++ b/cpp/src/gandiva/node_visitor.h @@ -23,6 +23,7 @@ #include "arrow/status.h" #include "gandiva/logging.h" +#include "gandiva/visibility.h" namespace gandiva { @@ -35,7 +36,7 @@ template <typename Type> class InExpressionNode; /// \brief Visitor for nodes in the expression tree. -class NodeVisitor { +class GANDIVA_EXPORT NodeVisitor { public: virtual ~NodeVisitor() = default; diff --git a/cpp/src/gandiva/precompiled/CMakeLists.txt b/cpp/src/gandiva/precompiled/CMakeLists.txt index 83183bc..5c40a6c 100644 --- a/cpp/src/gandiva/precompiled/CMakeLists.txt +++ b/cpp/src/gandiva/precompiled/CMakeLists.txt @@ -30,6 +30,18 @@ set(PRECOMPILED_SRCS timestamp_arithmetic.cc ../../arrow/util/basic_decimal.cc) +if (MSVC) + # clang pretends to be a particular version of MSVC. Version 1900 is + # Visual Studio 2015, and the standard library uses C++14 features, + # so we have to use that -std version to get the IR compilation to + # work + set(PLATFORM_CLANG_OPTIONS + -std=c++14 -fms-compatibility -fms-compatibility-version=19) +else() + set(PLATFORM_CLANG_OPTIONS + -std=c++11) +endif() + # Create bitcode for each of the source files. foreach(SRC_FILE ${PRECOMPILED_SRCS}) get_filename_component(SRC_BASE ${SRC_FILE} NAME_WE) @@ -38,10 +50,13 @@ foreach(SRC_FILE ${PRECOMPILED_SRCS}) add_custom_command( OUTPUT ${BC_FILE} COMMAND ${CLANG_EXECUTABLE} + ${PLATFORM_CLANG_OPTIONS} -DGANDIVA_IR - -std=c++11 -emit-llvm + -DNDEBUG # DCHECK macros not implemented in precompiled code + -DARROW_STATIC # Do not set __declspec(dllimport) on MSVC on Arrow symbols + -DGANDIVA_STATIC # Do not set __declspec(dllimport) on MSVC on Gandiva symbols -fno-use-cxa-atexit # Workaround for unresolved __dso_handle - -O3 -c ${ABSOLUTE_SRC} -o ${BC_FILE} + -emit-llvm -O3 -c ${ABSOLUTE_SRC} -o ${BC_FILE} ${ARROW_GANDIVA_PC_CXX_FLAGS} -I${CMAKE_SOURCE_DIR}/src DEPENDS ${SRC_FILE}) @@ -64,14 +79,20 @@ function(add_precompiled_unit_test REL_TEST_NAME) set(TEST_NAME "gandiva-precompiled-${TEST_NAME}") add_executable(${TEST_NAME} ${REL_TEST_NAME} ${ARGN}) - add_dependencies(gandiva-tests ${TEST_NAME}) target_include_directories(${TEST_NAME} PRIVATE ${CMAKE_SOURCE_DIR}/src) target_link_libraries(${TEST_NAME} PRIVATE ${ARROW_TEST_LINK_LIBS} ${RE2_LIBRARY} ) - target_compile_definitions(${TEST_NAME} PRIVATE GANDIVA_UNIT_TEST=1) - add_test(NAME ${TEST_NAME} COMMAND ${TEST_NAME}) - set_property(TEST ${TEST_NAME} PROPERTY LABELS gandiva-tests {TEST_NAME}) + target_compile_definitions(${TEST_NAME} PRIVATE + GANDIVA_UNIT_TEST=1 + ARROW_STATIC + GANDIVA_STATIC) + set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/${TEST_NAME}") + add_test(${TEST_NAME} ${TEST_PATH}) + set_property(TEST ${TEST_NAME} + APPEND PROPERTY + LABELS "unittest;gandiva-tests") + add_dependencies(gandiva-tests ${TEST_NAME}) endfunction(add_precompiled_unit_test REL_TEST_NAME) # testing diff --git a/cpp/src/gandiva/precompiled/epoch_time_point_test.cc b/cpp/src/gandiva/precompiled/epoch_time_point_test.cc index f489b7d..32cb9e8 100644 --- a/cpp/src/gandiva/precompiled/epoch_time_point_test.cc +++ b/cpp/src/gandiva/precompiled/epoch_time_point_test.cc @@ -15,36 +15,39 @@ // specific language governing permissions and limitations // under the License. -#include <time.h> +#include <ctime> #include <gtest/gtest.h> #include "./epoch_time_point.h" +#include "gandiva/precompiled/testing.h" #include "gandiva/precompiled/types.h" -namespace gandiva { +#include "gandiva/date_utils.h" -timestamp StringToTimestamp(const char* buf) { - struct tm tm; - strptime(buf, "%Y-%m-%d %H:%M:%S", &tm); - return timegm(&tm) * 1000; // to millis -} +namespace gandiva { TEST(TestEpochTimePoint, TestTm) { auto ts = StringToTimestamp("2015-05-07 10:20:34"); EpochTimePoint tp(ts); + struct tm* tm_ptr; +#if defined(_MSC_VER) + __time64_t tsec = ts / 1000; + tm_ptr = _gmtime64(&tsec); +#else struct tm tm; time_t tsec = ts / 1000; - gmtime_r(&tsec, &tm); - - EXPECT_EQ(tp.TmYear(), tm.tm_year); - EXPECT_EQ(tp.TmMon(), tm.tm_mon); - EXPECT_EQ(tp.TmYday(), tm.tm_yday); - EXPECT_EQ(tp.TmMday(), tm.tm_mday); - EXPECT_EQ(tp.TmWday(), tm.tm_wday); - EXPECT_EQ(tp.TmHour(), tm.tm_hour); - EXPECT_EQ(tp.TmMin(), tm.tm_min); - EXPECT_EQ(tp.TmSec(), tm.tm_sec); + tm_ptr = gmtime_r(&tsec, &tm); +#endif + + EXPECT_EQ(tp.TmYear(), tm_ptr->tm_year); + EXPECT_EQ(tp.TmMon(), tm_ptr->tm_mon); + EXPECT_EQ(tp.TmYday(), tm_ptr->tm_yday); + EXPECT_EQ(tp.TmMday(), tm_ptr->tm_mday); + EXPECT_EQ(tp.TmWday(), tm_ptr->tm_wday); + EXPECT_EQ(tp.TmHour(), tm_ptr->tm_hour); + EXPECT_EQ(tp.TmMin(), tm_ptr->tm_min); + EXPECT_EQ(tp.TmSec(), tm_ptr->tm_sec); } TEST(TestEpochTimePoint, TestAddYears) { diff --git a/cpp/src/gandiva/precompiled/extended_math_ops.cc b/cpp/src/gandiva/precompiled/extended_math_ops.cc index 1b7642c..b17ccd8 100644 --- a/cpp/src/gandiva/precompiled/extended_math_ops.cc +++ b/cpp/src/gandiva/precompiled/extended_math_ops.cc @@ -33,30 +33,40 @@ extern "C" { INNER(float64, OUT_TYPE) // Cubic root -#define CBRT(IN_TYPE, OUT_TYPE) \ - FORCE_INLINE \ - OUT_TYPE cbrt_##IN_TYPE(IN_TYPE in) { return static_cast<float64>(cbrtl(in)); } +#define CBRT(IN_TYPE, OUT_TYPE) \ + FORCE_INLINE \ + OUT_TYPE cbrt_##IN_TYPE(IN_TYPE in) { \ + return static_cast<float64>(cbrtl(static_cast<long double>(in))); \ + } ENUMERIC_TYPES_UNARY(CBRT, float64) // Exponent -#define EXP(IN_TYPE, OUT_TYPE) \ - FORCE_INLINE \ - OUT_TYPE exp_##IN_TYPE(IN_TYPE in) { return static_cast<float64>(expl(in)); } +#define EXP(IN_TYPE, OUT_TYPE) \ + FORCE_INLINE \ + OUT_TYPE exp_##IN_TYPE(IN_TYPE in) { \ + return static_cast<float64>(expl(static_cast<long double>(in))); \ + } ENUMERIC_TYPES_UNARY(EXP, float64) // log -#define LOG(IN_TYPE, OUT_TYPE) \ - FORCE_INLINE \ - OUT_TYPE log_##IN_TYPE(IN_TYPE in) { return static_cast<float64>(logl(in)); } +#define LOG(IN_TYPE, OUT_TYPE) \ + FORCE_INLINE \ + OUT_TYPE log_##IN_TYPE(IN_TYPE in) { \ + return static_cast<float64>(logl(static_cast<long double>(in))); \ + } ENUMERIC_TYPES_UNARY(LOG, float64) // log base 10 -#define LOG10(IN_TYPE, OUT_TYPE) \ - FORCE_INLINE \ - OUT_TYPE log10_##IN_TYPE(IN_TYPE in) { return static_cast<float64>(log10l(in)); } +#define LOG10(IN_TYPE, OUT_TYPE) \ + FORCE_INLINE \ + OUT_TYPE log10_##IN_TYPE(IN_TYPE in) { \ + return static_cast<float64>(log10l(static_cast<long double>(in))); \ + } + +#define LOGL(VALUE) static_cast<float64>(logl(static_cast<long double>(VALUE))) ENUMERIC_TYPES_UNARY(LOG10, float64) @@ -74,12 +84,12 @@ void set_error_for_logbase(int64_t execution_context, double base) { #define LOG_WITH_BASE(IN_TYPE1, IN_TYPE2, OUT_TYPE) \ FORCE_INLINE \ OUT_TYPE log_##IN_TYPE1##_##IN_TYPE2(int64 context, IN_TYPE1 base, IN_TYPE2 value) { \ - OUT_TYPE log_of_base = static_cast<float64>(logl(base)); \ + OUT_TYPE log_of_base = LOGL(base); \ if (log_of_base == 0) { \ set_error_for_logbase(context, static_cast<float64>(base)); \ return 0; \ } \ - return static_cast<float64>(logl(value) / logl(base)); \ + return LOGL(value) / LOGL(base); \ } LOG_WITH_BASE(int32, int32, float64) diff --git a/cpp/src/gandiva/function_holder.h b/cpp/src/gandiva/precompiled/testing.h similarity index 72% copy from cpp/src/gandiva/function_holder.h copy to cpp/src/gandiva/precompiled/testing.h index 4d007d1..3214eec 100644 --- a/cpp/src/gandiva/function_holder.h +++ b/cpp/src/gandiva/precompiled/testing.h @@ -15,21 +15,23 @@ // specific language governing permissions and limitations // under the License. -#ifndef GANDIVA_FUNCTION_HOLDER_H -#define GANDIVA_FUNCTION_HOLDER_H +#pragma once -#include <memory> +#include <ctime> -namespace gandiva { +#include <gtest/gtest.h> -/// Holder for a function that can be invoked from LLVM. -class FunctionHolder { - public: - virtual ~FunctionHolder() = default; -}; +#include "arrow/util/logging.h" -using FunctionHolderPtr = std::shared_ptr<FunctionHolder>; +#include "gandiva/date_utils.h" +#include "gandiva/precompiled/types.h" -} // namespace gandiva +namespace gandiva { -#endif // GANDIVA_FUNCTION_HOLDER_H +timestamp StringToTimestamp(const char* buf) { + int64_t out = 0; + DCHECK(internal::ParseTimestamp(buf, "%Y-%m-%d %H:%M:%S", false, &out)); + return out * 1000; +} + +} // namespace gandiva diff --git a/cpp/src/gandiva/precompiled/time_test.cc b/cpp/src/gandiva/precompiled/time_test.cc index 36ba9b3..b8f8069 100644 --- a/cpp/src/gandiva/precompiled/time_test.cc +++ b/cpp/src/gandiva/precompiled/time_test.cc @@ -18,16 +18,11 @@ #include <gtest/gtest.h> #include <time.h> #include "../execution_context.h" +#include "gandiva/precompiled/testing.h" #include "gandiva/precompiled/types.h" namespace gandiva { -timestamp StringToTimestamp(const char* buf) { - struct tm tm; - strptime(buf, "%Y-%m-%d %H:%M:%S", &tm); - return timegm(&tm) * 1000; // to millis -} - TEST(TestTime, TestCastDate) { ExecutionContext context; int64_t context_ptr = reinterpret_cast<int64_t>(&context); diff --git a/cpp/src/gandiva/projector.cc b/cpp/src/gandiva/projector.cc index 8fc5b8c..7950fc7 100644 --- a/cpp/src/gandiva/projector.cc +++ b/cpp/src/gandiva/projector.cc @@ -36,6 +36,8 @@ Projector::Projector(std::unique_ptr<LLVMGenerator> llvm_generator, SchemaPtr sc output_fields_(output_fields), configuration_(configuration) {} +Projector::~Projector() {} + Status Projector::Make(SchemaPtr schema, const ExpressionVector& exprs, std::shared_ptr<Projector>* projector) { return Projector::Make(schema, exprs, ConfigurationBuilder::DefaultConfiguration(), diff --git a/cpp/src/gandiva/projector.h b/cpp/src/gandiva/projector.h index c9d7271..58bac78 100644 --- a/cpp/src/gandiva/projector.h +++ b/cpp/src/gandiva/projector.h @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -#ifndef GANDIVA_EXPR_PROJECTOR_H -#define GANDIVA_EXPR_PROJECTOR_H +#pragma once #include <memory> #include <string> @@ -28,6 +27,7 @@ #include "gandiva/arrow.h" #include "gandiva/configuration.h" #include "gandiva/expression.h" +#include "gandiva/visibility.h" namespace gandiva { @@ -37,8 +37,12 @@ class LLVMGenerator; /// /// A projector is built for a specific schema and vector of expressions. /// Once the projector is built, it can be used to evaluate many row batches. -class Projector { +class GANDIVA_EXPORT Projector { public: + // Inline dtor will attempt to resolve the destructor for + // LLVMGenerator on MSVC, so we compile the dtor in the object code + ~Projector(); + /// Build a default projector for the given schema to evaluate /// the vector of expressions. /// @@ -99,5 +103,3 @@ class Projector { }; } // namespace gandiva - -#endif // GANDIVA_EXPR_PROJECTOR_H diff --git a/cpp/src/gandiva/regex_util.h b/cpp/src/gandiva/regex_util.h index 6a22af2..7ea7060 100644 --- a/cpp/src/gandiva/regex_util.h +++ b/cpp/src/gandiva/regex_util.h @@ -23,11 +23,12 @@ #include <string> #include "gandiva/arrow.h" +#include "gandiva/visibility.h" namespace gandiva { /// \brief Utility class for converting sql patterns to pcre patterns. -class RegexUtil { +class GANDIVA_EXPORT RegexUtil { public: // Convert an sql pattern to a pcre pattern static Status SqlLikePatternToPcre(const std::string& like_pattern, char escape_char, diff --git a/cpp/src/gandiva/selection_vector.cc b/cpp/src/gandiva/selection_vector.cc index f89b80c..e643cec 100644 --- a/cpp/src/gandiva/selection_vector.cc +++ b/cpp/src/gandiva/selection_vector.cc @@ -22,6 +22,8 @@ #include <utility> #include <vector> +#include "arrow/util/bit-util.h" + #include "gandiva/selection_vector_impl.h" namespace gandiva { @@ -48,8 +50,18 @@ Status SelectionVector::PopulateFromBitMap(const uint8_t* bitmap, int64_t bitmap uint64_t current_word = bitmap_64[bitmap_idx]; while (current_word != 0) { +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4146) +#endif + // MSVC warns about negating an unsigned type. We suppress it for now uint64_t highest_only = current_word & -current_word; - int pos_in_word = __builtin_ctzl(highest_only); + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + + int pos_in_word = arrow::BitUtil::CountTrailingZeros(highest_only); int64_t pos_in_bitmap = bitmap_idx * 64 + pos_in_word; if (pos_in_bitmap > max_bitmap_index) { diff --git a/cpp/src/gandiva/selection_vector.h b/cpp/src/gandiva/selection_vector.h index dcd2f6b..2e99417 100644 --- a/cpp/src/gandiva/selection_vector.h +++ b/cpp/src/gandiva/selection_vector.h @@ -24,12 +24,13 @@ #include "gandiva/arrow.h" #include "gandiva/logging.h" +#include "gandiva/visibility.h" namespace gandiva { /// \brief Selection Vector : vector of indices in a row-batch for a selection, /// backed by an arrow-array. -class SelectionVector { +class GANDIVA_EXPORT SelectionVector { public: virtual ~SelectionVector() = default; diff --git a/cpp/src/gandiva/tests/date_time_test.cc b/cpp/src/gandiva/tests/date_time_test.cc index 643b8c8..7867a95 100644 --- a/cpp/src/gandiva/tests/date_time_test.cc +++ b/cpp/src/gandiva/tests/date_time_test.cc @@ -57,7 +57,7 @@ int64_t MillisSince(time_t base_line, int32_t yy, int32_t mm, int32_t dd, int32_ given_ts.tm_min = min; given_ts.tm_sec = sec; - return (lround(difftime(mktime(&given_ts), base_line)) * 1000 + millis); + return (static_cast<int64_t>(difftime(mktime(&given_ts), base_line)) * 1000 + millis); } TEST_F(TestProjector, TestIsNull) { diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc index 5c32f50..ba0e632 100644 --- a/cpp/src/gandiva/tests/projector_test.cc +++ b/cpp/src/gandiva/tests/projector_test.cc @@ -86,10 +86,14 @@ TEST_F(TestProjector, TestProjectCache) { EXPECT_EQ(cached_projector, projector); // if configuration is different, should return a new projector. + + // build a new path by replacing the first '/' with '//' + std::string alt_path(GANDIVA_BYTE_COMPILE_FILE_PATH); + auto pos = alt_path.find('/', 0); + EXPECT_NE(pos, std::string::npos); + alt_path.replace(pos, 1, "//"); auto other_configuration = - ConfigurationBuilder() - .set_byte_code_file_path("/" + std::string(GANDIVA_BYTE_COMPILE_FILE_PATH)) - .build(); + ConfigurationBuilder().set_byte_code_file_path(alt_path).build(); std::shared_ptr<Projector> should_be_new_projector2; status = Projector::Make(schema, {sum_expr, sub_expr}, other_configuration, &should_be_new_projector2); diff --git a/cpp/src/gandiva/to_date_holder.cc b/cpp/src/gandiva/to_date_holder.cc index 6e02a6a..824654f 100644 --- a/cpp/src/gandiva/to_date_holder.cc +++ b/cpp/src/gandiva/to_date_holder.cc @@ -18,7 +18,7 @@ #include <algorithm> #include <string> -#include "arrow/vendored/datetime/date.h" +#include "arrow/vendored/datetime.h" #include "gandiva/date_utils.h" #include "gandiva/execution_context.h" @@ -64,8 +64,7 @@ Status ToDateHolder::Make(const FunctionNode& node, Status ToDateHolder::Make(const std::string& sql_pattern, int32_t suppress_errors, std::shared_ptr<ToDateHolder>* holder) { std::shared_ptr<std::string> transformed_pattern; - Status status = DateUtils::ToInternalFormat(sql_pattern, &transformed_pattern); - ARROW_RETURN_NOT_OK(status); + ARROW_RETURN_NOT_OK(DateUtils::ToInternalFormat(sql_pattern, &transformed_pattern)); auto lholder = std::shared_ptr<ToDateHolder>( new ToDateHolder(*(transformed_pattern.get()), suppress_errors)); *holder = lholder; @@ -82,22 +81,14 @@ int64_t ToDateHolder::operator()(ExecutionContext* context, const std::string& d // Issues // 1. processes date that do not match the format. // 2. does not process time in format +08:00 (or) id. - struct tm result = {}; - char* ret = strptime(data.c_str(), pattern_.c_str(), &result); - if (ret == nullptr) { + int64_t seconds_since_epoch = 0; + if (!internal::ParseTimestamp(data.c_str(), pattern_.c_str(), true, + &seconds_since_epoch)) { return_error(context, data); return 0; } + *out_valid = true; - // ignore the time part - arrow::util::date::sys_seconds secs = - arrow::util::date::sys_days(arrow::util::date::year(result.tm_year + 1900) / - (result.tm_mon + 1) / result.tm_mday); - int64_t seconds_since_epoch = secs.time_since_epoch().count(); - if (seconds_since_epoch == 0) { - return_error(context, data); - return 0; - } return seconds_since_epoch * 1000; } diff --git a/cpp/src/gandiva/to_date_holder.h b/cpp/src/gandiva/to_date_holder.h index 91133cc..c0c5afb 100644 --- a/cpp/src/gandiva/to_date_holder.h +++ b/cpp/src/gandiva/to_date_holder.h @@ -27,11 +27,12 @@ #include "gandiva/execution_context.h" #include "gandiva/function_holder.h" #include "gandiva/node.h" +#include "gandiva/visibility.h" namespace gandiva { /// Function Holder for SQL 'to_date' -class ToDateHolder : public FunctionHolder { +class GANDIVA_EXPORT ToDateHolder : public FunctionHolder { public: ~ToDateHolder() override = default; diff --git a/cpp/src/gandiva/to_date_holder_test.cc b/cpp/src/gandiva/to_date_holder_test.cc index 2a207b2..0effffb 100644 --- a/cpp/src/gandiva/to_date_holder_test.cc +++ b/cpp/src/gandiva/to_date_holder_test.cc @@ -18,6 +18,8 @@ #include <memory> #include <vector> +#include "arrow/test-util.h" + #include "gandiva/execution_context.h" #include "gandiva/to_date_holder.h" #include "precompiled/epoch_time_point.h" @@ -37,57 +39,68 @@ class TestToDateHolder : public ::testing::Test { return FunctionNode("to_date_utf8_utf8_int32", {field, pattern_node, suppres_error_node}, arrow::int64()); } + + protected: + ExecutionContext execution_context_; }; TEST_F(TestToDateHolder, TestSimpleDateTime) { std::shared_ptr<ToDateHolder> to_date_holder; + ASSERT_OK(ToDateHolder::Make("YYYY-MM-DD HH:MI:SS", 1, &to_date_holder)); - auto status = ToDateHolder::Make("YYYY-MM-DD HH:MI:SS", 1, &to_date_holder); - EXPECT_EQ(status.ok(), true) << status.message(); - ExecutionContext execution_context; auto& to_date = *to_date_holder; bool out_valid; int64_t millis_since_epoch = - to_date(&execution_context, "1986-12-01 01:01:01", true, &out_valid); + to_date(&execution_context_, "1986-12-01 01:01:01", true, &out_valid); EXPECT_EQ(millis_since_epoch, 533779200000); millis_since_epoch = - to_date(&execution_context, "1986-12-01 01:01:01.11", true, &out_valid); + to_date(&execution_context_, "1986-12-01 01:01:01.11", true, &out_valid); EXPECT_EQ(millis_since_epoch, 533779200000); millis_since_epoch = - to_date(&execution_context, "1986-12-01 01:01:01 +0800", true, &out_valid); + to_date(&execution_context_, "1986-12-01 01:01:01 +0800", true, &out_valid); EXPECT_EQ(millis_since_epoch, 533779200000); +#if 0 + // TODO : this fails parsing with date::parse and strptime on linux + millis_since_epoch = + to_date(&execution_context_, "1886-12-01 00:00:00", true, &out_valid); + EXPECT_EQ(out_valid, true); + EXPECT_EQ(millis_since_epoch, -2621894400000); +#endif + millis_since_epoch = - to_date(&execution_context, "1986-12-11 01:30:00", true, &out_valid); + to_date(&execution_context_, "1886-12-01 01:01:01", true, &out_valid); + EXPECT_EQ(millis_since_epoch, -2621894400000); + + millis_since_epoch = + to_date(&execution_context_, "1986-12-11 01:30:00", true, &out_valid); EXPECT_EQ(millis_since_epoch, 534643200000); } TEST_F(TestToDateHolder, TestSimpleDate) { std::shared_ptr<ToDateHolder> to_date_holder; + ASSERT_OK(ToDateHolder::Make("YYYY-MM-DD", 1, &to_date_holder)); - auto status = ToDateHolder::Make("YYYY-MM-DD", 1, &to_date_holder); - EXPECT_EQ(status.ok(), true) << status.message(); - ExecutionContext execution_context; auto& to_date = *to_date_holder; bool out_valid; int64_t millis_since_epoch = - to_date(&execution_context, "1986-12-01", true, &out_valid); + to_date(&execution_context_, "1986-12-01", true, &out_valid); EXPECT_EQ(millis_since_epoch, 533779200000); - millis_since_epoch = to_date(&execution_context, "1986-12-1", true, &out_valid); + millis_since_epoch = to_date(&execution_context_, "1986-12-1", true, &out_valid); EXPECT_EQ(millis_since_epoch, 533779200000); - millis_since_epoch = to_date(&execution_context, "1886-12-1", true, &out_valid); + millis_since_epoch = to_date(&execution_context_, "1886-12-1", true, &out_valid); EXPECT_EQ(millis_since_epoch, -2621894400000); - millis_since_epoch = to_date(&execution_context, "2012-12-1", true, &out_valid); + millis_since_epoch = to_date(&execution_context_, "2012-12-1", true, &out_valid); EXPECT_EQ(millis_since_epoch, 1354320000000); // wrong month. should return 0 since we are suppresing errors. millis_since_epoch = - to_date(&execution_context, "1986-21-01 01:01:01 +0800", true, &out_valid); + to_date(&execution_context_, "1986-21-01 01:01:01 +0800", true, &out_valid); EXPECT_EQ(millis_since_epoch, 0); } @@ -96,22 +109,22 @@ TEST_F(TestToDateHolder, TestSimpleDateTimeError) { auto status = ToDateHolder::Make("YYYY-MM-DD HH:MI:SS", 0, &to_date_holder); EXPECT_EQ(status.ok(), true) << status.message(); - ExecutionContext execution_context; auto& to_date = *to_date_holder; bool out_valid; int64_t millis_since_epoch = - to_date(&execution_context, "1986-21-01 01:01:01 +0800", true, &out_valid); + to_date(&execution_context_, "1986-01-40 01:01:01 +0800", true, &out_valid); + EXPECT_EQ(0, millis_since_epoch); std::string expected_error = - "Error parsing value 1986-21-01 01:01:01 +0800 for given format"; - EXPECT_TRUE(execution_context.get_error().find(expected_error) != std::string::npos) + "Error parsing value 1986-01-40 01:01:01 +0800 for given format"; + EXPECT_TRUE(execution_context_.get_error().find(expected_error) != std::string::npos) << status.message(); // not valid should not return error - execution_context.Reset(); - millis_since_epoch = to_date(&execution_context, "nullptr", false, &out_valid); + execution_context_.Reset(); + millis_since_epoch = to_date(&execution_context_, "nullptr", false, &out_valid); EXPECT_EQ(millis_since_epoch, 0); - EXPECT_TRUE(execution_context.has_error() == false); + EXPECT_TRUE(execution_context_.has_error() == false); } TEST_F(TestToDateHolder, TestSimpleDateTimeMakeError) { diff --git a/cpp/src/gandiva/tree_expr_builder.h b/cpp/src/gandiva/tree_expr_builder.h index 3d60b5b..4b2789a 100644 --- a/cpp/src/gandiva/tree_expr_builder.h +++ b/cpp/src/gandiva/tree_expr_builder.h @@ -27,11 +27,12 @@ #include "gandiva/condition.h" #include "gandiva/decimal_scalar.h" #include "gandiva/expression.h" +#include "gandiva/visibility.h" namespace gandiva { /// \brief Tree Builder for a nested expression. -class TreeExprBuilder { +class GANDIVA_EXPORT TreeExprBuilder { public: /// \brief create a node on a literal. static NodePtr MakeLiteral(bool value); diff --git a/cpp/src/gandiva/value_validity_pair.h b/cpp/src/gandiva/value_validity_pair.h index 1bcd5d6..0de525d 100644 --- a/cpp/src/gandiva/value_validity_pair.h +++ b/cpp/src/gandiva/value_validity_pair.h @@ -21,11 +21,12 @@ #include <vector> #include "gandiva/gandiva_aliases.h" +#include "gandiva/visibility.h" namespace gandiva { /// Pair of vector/validities generated after decomposing an expression tree/subtree. -class ValueValidityPair { +class GANDIVA_EXPORT ValueValidityPair { public: ValueValidityPair(const DexVector& validity_exprs, DexPtr value_expr) : validity_exprs_(validity_exprs), value_expr_(value_expr) {} diff --git a/cpp/src/gandiva/function_holder.h b/cpp/src/gandiva/visibility.h similarity index 54% copy from cpp/src/gandiva/function_holder.h copy to cpp/src/gandiva/visibility.h index 4d007d1..450b305 100644 --- a/cpp/src/gandiva/function_holder.h +++ b/cpp/src/gandiva/visibility.h @@ -15,21 +15,34 @@ // specific language governing permissions and limitations // under the License. -#ifndef GANDIVA_FUNCTION_HOLDER_H -#define GANDIVA_FUNCTION_HOLDER_H +#pragma once -#include <memory> +#if defined(_WIN32) || defined(__CYGWIN__) +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4251) +#else +#pragma GCC diagnostic ignored "-Wattributes" +#endif -namespace gandiva { +#ifdef GANDIVA_STATIC +#define GANDIVA_EXPORT +#elif defined(GANDIVA_EXPORTING) +#define GANDIVA_EXPORT __declspec(dllexport) +#else +#define GANDIVA_EXPORT __declspec(dllimport) +#endif -/// Holder for a function that can be invoked from LLVM. -class FunctionHolder { - public: - virtual ~FunctionHolder() = default; -}; +#define GANDIVA_NO_EXPORT +#else // Not Windows +#ifndef GANDIVA_EXPORT +#define GANDIVA_EXPORT __attribute__((visibility("default"))) +#endif +#ifndef GANDIVA_NO_EXPORT +#define GANDIVA_NO_EXPORT __attribute__((visibility("hidden"))) +#endif +#endif // Non-Windows -using FunctionHolderPtr = std::shared_ptr<FunctionHolder>; - -} // namespace gandiva - -#endif // GANDIVA_FUNCTION_HOLDER_H +#if defined(_MSC_VER) +#pragma warning(pop) +#endif diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 00cd31e..e62a37b 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -27,7 +27,7 @@ BOOST_VERSION=1.67.0 BROTLI_VERSION=v0.6.0 CARES_VERSION=1.15.0 DOUBLE_CONVERSION_VERSION=v3.1.1 -FLATBUFFERS_VERSION=02a7807dd8d26f5668ffbbec0360dc107bbfabd5 +FLATBUFFERS_VERSION=v1.10.0 GBENCHMARK_VERSION=v1.4.1 GFLAGS_VERSION=v2.2.0 GLOG_VERSION=v0.3.5