Repository: parquet-cpp Updated Branches: refs/heads/master d0646659c -> cf31e6d1b
PARQUET-947: Account for Arrow library consolidation in ARROW-795, API changes in ARROW-782 Author: Wes McKinney <[email protected]> Closes #292 from wesm/PARQUET-947 and squashes the following commits: 2d68d5b [Wes McKinney] Fix typo 35feebc [Wes McKinney] Update to Arrow HEAD 7fa2b1b [Wes McKinney] Account for API changes in ARROW-782 8d6c50d [Wes McKinney] Update Arrow version 7b2016f [Wes McKinney] Remove arrow_io library after ARROW-795 Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/cf31e6d1 Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/cf31e6d1 Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/cf31e6d1 Branch: refs/heads/master Commit: cf31e6d1bb27b807bd742cfb33179668c5afb2f3 Parents: d064665 Author: Wes McKinney <[email protected]> Authored: Mon Apr 10 11:05:24 2017 -0400 Committer: Wes McKinney <[email protected]> Committed: Mon Apr 10 11:05:24 2017 -0400 ---------------------------------------------------------------------- CMakeLists.txt | 6 ++---- cmake_modules/FindArrow.cmake | 15 ++------------- cmake_modules/ThirdpartyToolchain.cmake | 25 +++---------------------- src/parquet/arrow/CMakeLists.txt | 3 --- src/parquet/arrow/parquet-arrow.pc.in | 2 +- src/parquet/arrow/reader.cc | 28 ++++++++++++++-------------- src/parquet/arrow/schema.cc | 18 +++++++++--------- src/parquet/arrow/writer.cc | 12 ++++++------ 8 files changed, 37 insertions(+), 72 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cf31e6d1/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/CMakeLists.txt b/CMakeLists.txt index ee31424..5c3d91b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -481,12 +481,10 @@ endif() if ("${PARQUET_ARROW_LINKAGE}" STREQUAL "shared") set(ARROW_LINK_LIBS - arrow - arrow_io) + arrow) else() set(ARROW_LINK_LIBS - arrow_static - arrow_io_static) + arrow_static) endif() ############################################################# http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cf31e6d1/cmake_modules/FindArrow.cmake ---------------------------------------------------------------------- diff --git a/cmake_modules/FindArrow.cmake b/cmake_modules/FindArrow.cmake index 0a3e7e2..c3f835b 100644 --- a/cmake_modules/FindArrow.cmake +++ b/cmake_modules/FindArrow.cmake @@ -46,30 +46,21 @@ find_library(ARROW_LIB_PATH NAMES arrow ${ARROW_SEARCH_LIB_PATH} NO_DEFAULT_PATH) -find_library(ARROW_IO_LIB_PATH NAMES arrow_io - PATHS - ${ARROW_SEARCH_LIB_PATH} - NO_DEFAULT_PATH) - if (ARROW_INCLUDE_DIR AND (PARQUET_MINIMAL_DEPENDENCY OR ARROW_LIB_PATH)) set(ARROW_FOUND TRUE) set(ARROW_HEADER_NAME arrow/api.h) set(ARROW_HEADER ${ARROW_INCLUDE_DIR}/${ARROW_HEADER_NAME}) set(ARROW_LIB_NAME libarrow) - set(ARROW_IO_LIB_NAME libarrow_io) get_filename_component(ARROW_LIBS ${ARROW_LIB_PATH} DIRECTORY) set(ARROW_STATIC_LIB ${ARROW_LIBS}/${ARROW_LIB_NAME}.a) set(ARROW_SHARED_LIB ${ARROW_LIBS}/${ARROW_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) - set(ARROW_IO_STATIC_LIB ${ARROW_LIBS}/${ARROW_IO_LIB_NAME}.a) - set(ARROW_IO_SHARED_LIB ${ARROW_LIBS}/${ARROW_IO_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) if (NOT Arrow_FIND_QUIETLY) if (PARQUET_MINIMAL_DEPENDENCY) - message(STATUS "Found the Arrow core and IO header: ${ARROW_HEADER}") + message(STATUS "Found the Arrow header: ${ARROW_HEADER}") else () - message(STATUS "Found the Arrow core library: ${ARROW_LIB_PATH}") - message(STATUS "Found the Arrow IO library: ${ARROW_IO_LIB_PATH}") + message(STATUS "Found the Arrow library: ${ARROW_LIB_PATH}") endif () endif () else () @@ -92,6 +83,4 @@ mark_as_advanced( ARROW_LIBS ARROW_STATIC_LIB ARROW_SHARED_LIB - ARROW_IO_STATIC_LIB - ARROW_IO_SHARED_LIB ) http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cf31e6d1/cmake_modules/ThirdpartyToolchain.cmake ---------------------------------------------------------------------- diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake index 0cb3ef7..1294f46 100644 --- a/cmake_modules/ThirdpartyToolchain.cmake +++ b/cmake_modules/ThirdpartyToolchain.cmake @@ -22,7 +22,7 @@ set(THRIFT_VERSION "0.10.0") # Brotli 0.5.2 does not install headers/libraries yet, but 0.6.0.dev does set(BROTLI_VERSION "5db62dcc9d386579609540cdf8869e95ad334bbd") -set(ARROW_VERSION "15b874e47e3975c5240290ec7ed105bf8d1b56bc") +set(ARROW_VERSION "c2f28cd07413e262fa0b741c286f86d5c7277c56") # find boost headers and libs set(Boost_DEBUG TRUE) @@ -359,36 +359,25 @@ endif() ## Apache Arrow pkg_check_modules(ARROW arrow) -pkg_check_modules(ARROW_IO arrow-io) -if (ARROW_FOUND AND ARROW_IO_FOUND) +if (ARROW_FOUND) set(ARROW_INCLUDE_DIR ${ARROW_INCLUDE_DIRS}) if (COMMAND pkg_get_variable) pkg_get_variable(ARROW_ABI_VERSION arrow abi_version) - pkg_get_variable(ARROW_IO_ABI_VERSION arrow-io abi_version) else() set(ARROW_ABI_VERSION "") - set(ARROW_IO_ABI_VERSION "") endif() if (ARROW_ABI_VERSION STREQUAL "") set(ARROW_SHARED_LIB_SUFFIX "") else() set(ARROW_SHARED_LIB_SUFFIX ".${ARROW_ABI_VERSION}") endif() - if (ARROW_IO_ABI_VERSION STREQUAL "") - set(ARROW_IO_SHARED_LIB_SUFFIX "") - else() - set(ARROW_IO_SHARED_LIB_SUFFIX ".${ARROW_ABI_VERSION}") - endif() set(ARROW_LIB_NAME ${CMAKE_SHARED_LIBRARY_PREFIX}arrow) - set(ARROW_IO_LIB_NAME ${CMAKE_SHARED_LIBRARY_PREFIX}arrow_io) set(ARROW_SHARED_LIB ${ARROW_LIBDIR}/${ARROW_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}${ARROW_SHARED_LIB_SUFFIX}) set(ARROW_STATIC_LIB ${ARROW_LIBDIR}/${ARROW_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(ARROW_IO_SHARED_LIB ${ARROW_LIBDIR}/${ARROW_IO_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}${ARROW_IO_SHARED_LIB_SUFFIX}) - set(ARROW_IO_STATIC_LIB ${ARROW_LIBDIR}/${ARROW_IO_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}) else() find_package(Arrow) endif() @@ -397,9 +386,7 @@ if (NOT ARROW_FOUND) set(ARROW_HOME "${ARROW_PREFIX}") set(ARROW_INCLUDE_DIR "${ARROW_PREFIX}/include") set(ARROW_SHARED_LIB "${ARROW_PREFIX}/lib/libarrow${CMAKE_SHARED_LIBRARY_SUFFIX}") - set(ARROW_IO_SHARED_LIB "${ARROW_PREFIX}/lib/libarrow_io${CMAKE_SHARED_LIBRARY_SUFFIX}") set(ARROW_STATIC_LIB "${ARROW_PREFIX}/lib/libarrow.a") - set(ARROW_IO_STATIC_LIB "${ARROW_PREFIX}/lib/libarrow_io.a") set(ARROW_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} @@ -412,7 +399,7 @@ if (NOT ARROW_FOUND) ExternalProject_Add(arrow_ep GIT_REPOSITORY https://github.com/apache/arrow.git GIT_TAG ${ARROW_VERSION} - BUILD_BYPRODUCTS "${ARROW_SHARED_LIB}" "${ARROW_IO_SHARED_LIB}" "${ARROW_IO_STATIC_LIB}" "${ARROW_STATIC_LIB}" + BUILD_BYPRODUCTS "${ARROW_SHARED_LIB}" "${ARROW_STATIC_LIB}" # With CMake 3.7.0 there is a SOURCE_SUBDIR argument which we can use # to specify that the CMakeLists.txt of Arrow is located in cpp/ # @@ -434,16 +421,10 @@ endif() include_directories(SYSTEM ${ARROW_INCLUDE_DIR}) add_library(arrow SHARED IMPORTED) set_target_properties(arrow PROPERTIES IMPORTED_LOCATION ${ARROW_SHARED_LIB}) -add_library(arrow_io SHARED IMPORTED) -set_target_properties(arrow_io PROPERTIES IMPORTED_LOCATION ${ARROW_IO_SHARED_LIB}) add_library(arrow_static STATIC IMPORTED) set_target_properties(arrow_static PROPERTIES IMPORTED_LOCATION ${ARROW_STATIC_LIB}) -add_library(arrow_io_static STATIC IMPORTED) -set_target_properties(arrow_io_static PROPERTIES IMPORTED_LOCATION ${ARROW_IO_STATIC_LIB}) if (ARROW_VENDORED) add_dependencies(arrow arrow_ep) - add_dependencies(arrow_io arrow_ep) add_dependencies(arrow_static arrow_ep) - add_dependencies(arrow_io_static arrow_ep) endif() http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cf31e6d1/src/parquet/arrow/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/src/parquet/arrow/CMakeLists.txt b/src/parquet/arrow/CMakeLists.txt index 8bc6af7..c2fd901 100644 --- a/src/parquet/arrow/CMakeLists.txt +++ b/src/parquet/arrow/CMakeLists.txt @@ -31,7 +31,6 @@ add_library(parquet_arrow_objlib OBJECT # Add dependencies so ExternalProjects are built beforehand add_dependencies(parquet_arrow_objlib arrow_static - arrow_io_static parquet_static) # SET_TARGET_PROPERTIES(parquet_arrow PROPERTIES LINKER_LANGUAGE CXX) @@ -47,7 +46,6 @@ if (PARQUET_BUILD_SHARED) SOVERSION "${PARQUET_SO_VERSION}") target_link_libraries(parquet_arrow_shared arrow - arrow_io parquet_shared) if (PARQUET_RPATH_ORIGIN) if (APPLE) @@ -77,7 +75,6 @@ if (PARQUET_BUILD_STATIC) OUTPUT_NAME "parquet_arrow") target_link_libraries(parquet_arrow_static arrow_static - arrow_io_static parquet_static) install(TARGETS parquet_arrow_static ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cf31e6d1/src/parquet/arrow/parquet-arrow.pc.in ---------------------------------------------------------------------- diff --git a/src/parquet/arrow/parquet-arrow.pc.in b/src/parquet/arrow/parquet-arrow.pc.in index 511e0b6..20056bc 100644 --- a/src/parquet/arrow/parquet-arrow.pc.in +++ b/src/parquet/arrow/parquet-arrow.pc.in @@ -24,4 +24,4 @@ Description: Apache Parquet Apache arrow adapter provides Arrow IPC modules for Version: @PARQUET_VERSION@ Libs: -L${libdir} -lparquet_arrow Cflags: -I${includedir} -Requires: parquet arrow-io +Requires: parquet arrow http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cf31e6d1/src/parquet/arrow/reader.cc ---------------------------------------------------------------------- diff --git a/src/parquet/arrow/reader.cc b/src/parquet/arrow/reader.cc index 823aea9..2ca9207 100644 --- a/src/parquet/arrow/reader.cc +++ b/src/parquet/arrow/reader.cc @@ -724,23 +724,23 @@ Status ColumnReader::Impl::WrapIntoListArray(const int16_t* def_levels, std::vector<bool> nullable; std::vector<std::shared_ptr<::arrow::Int32Builder>> offset_builders; std::vector<std::shared_ptr<::arrow::BooleanBuilder>> valid_bits_builders; - nullable.push_back(current_field->nullable); - while (current_field->type->num_children() > 0) { - if (current_field->type->num_children() > 1) { + nullable.push_back(current_field->nullable()); + while (current_field->type()->num_children() > 0) { + if (current_field->type()->num_children() > 1) { return Status::NotImplemented( "Fields with more than one child are not supported."); } else { - if (current_field->type->type != ::arrow::Type::LIST) { + if (current_field->type()->id() != ::arrow::Type::LIST) { return Status::NotImplemented( "Currently only nesting with Lists is supported."); } - current_field = current_field->type->child(0); + current_field = current_field->type()->child(0); } offset_builders.emplace_back( std::make_shared<::arrow::Int32Builder>(pool_, ::arrow::int32())); valid_bits_builders.emplace_back( std::make_shared<::arrow::BooleanBuilder>(pool_, ::arrow::boolean())); - nullable.push_back(current_field->nullable); + nullable.push_back(current_field->nullable()); } int64_t list_depth = offset_builders.size(); @@ -860,12 +860,12 @@ Status ColumnReader::Impl::TypedReadBatch(int batch_size, std::shared_ptr<Array> ::arrow::BitUtil::CeilByte(valid_bits_idx_) / 8, false)); } *out = std::make_shared<ArrayType<ArrowType>>( - field_->type, valid_bits_idx_, data_buffer_, valid_bits_buffer_, null_count_); + field_->type(), valid_bits_idx_, data_buffer_, valid_bits_buffer_, null_count_); // Relase the ownership as the Buffer is now part of a new Array valid_bits_buffer_.reset(); } else { *out = std::make_shared<ArrayType<ArrowType>>( - field_->type, valid_bits_idx_, data_buffer_); + field_->type(), valid_bits_idx_, data_buffer_); } // Relase the ownership as the Buffer is now part of a new Array data_buffer_.reset(); @@ -934,12 +934,12 @@ Status ColumnReader::Impl::TypedReadBatch<::arrow::BooleanType, BooleanType>( valid_bits_buffer_ = valid_bits_buffer; } *out = std::make_shared<BooleanArray>( - field_->type, valid_bits_idx_, data_buffer_, valid_bits_buffer_, null_count_); + field_->type(), valid_bits_idx_, data_buffer_, valid_bits_buffer_, null_count_); // Relase the ownership data_buffer_.reset(); valid_bits_buffer_.reset(); } else { - *out = std::make_shared<BooleanArray>(field_->type, valid_bits_idx_, data_buffer_); + *out = std::make_shared<BooleanArray>(field_->type(), valid_bits_idx_, data_buffer_); data_buffer_.reset(); } @@ -1028,7 +1028,7 @@ Status ColumnReader::Impl::NextBatch(int batch_size, std::shared_ptr<Array>* out return Status::OK(); } - switch (field_->type->type) { + switch (field_->type()->id()) { TYPED_BATCH_CASE(BOOL, ::arrow::BooleanType, BooleanType) TYPED_BATCH_CASE(UINT8, ::arrow::UInt8Type, Int32Type) TYPED_BATCH_CASE(INT8, ::arrow::Int8Type, Int32Type) @@ -1045,8 +1045,8 @@ Status ColumnReader::Impl::NextBatch(int batch_size, std::shared_ptr<Array>* out TYPED_BATCH_CASE(BINARY, ::arrow::BinaryType, ByteArrayType) case ::arrow::Type::TIMESTAMP: { ::arrow::TimestampType* timestamp_type = - static_cast<::arrow::TimestampType*>(field_->type.get()); - switch (timestamp_type->unit) { + static_cast<::arrow::TimestampType*>(field_->type().get()); + switch (timestamp_type->unit()) { case ::arrow::TimeUnit::MILLI: return TypedReadBatch<::arrow::TimestampType, Int64Type>(batch_size, out); break; @@ -1060,7 +1060,7 @@ Status ColumnReader::Impl::NextBatch(int batch_size, std::shared_ptr<Array>* out } default: std::stringstream ss; - ss << "No support for reading columns of type " << field_->type->ToString(); + ss << "No support for reading columns of type " << field_->type()->ToString(); return Status::NotImplemented(ss.str()); } } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cf31e6d1/src/parquet/arrow/schema.cc ---------------------------------------------------------------------- diff --git a/src/parquet/arrow/schema.cc b/src/parquet/arrow/schema.cc index f0d05fc..76b7f77 100644 --- a/src/parquet/arrow/schema.cc +++ b/src/parquet/arrow/schema.cc @@ -327,10 +327,10 @@ Status FieldToNode(const std::shared_ptr<Field>& field, LogicalType::type logical_type = LogicalType::NONE; ParquetType::type type; Repetition::type repetition = - field->nullable ? Repetition::OPTIONAL : Repetition::REQUIRED; + field->nullable() ? Repetition::OPTIONAL : Repetition::REQUIRED; int length = -1; - switch (field->type->type) { + switch (field->type()->id()) { // TODO: // case ArrowType::NA: // break; @@ -393,8 +393,8 @@ Status FieldToNode(const std::shared_ptr<Field>& field, logical_type = LogicalType::DATE; break; case ArrowType::TIMESTAMP: { - auto timestamp_type = static_cast<::arrow::TimestampType*>(field->type.get()); - if (timestamp_type->unit != ::arrow::TimestampType::Unit::MILLI) { + auto timestamp_type = static_cast<::arrow::TimestampType*>(field->type().get()); + if (timestamp_type->unit() != ::arrow::TimestampType::Unit::MILLI) { return Status::NotImplemented( "Other timestamp units than millisecond are not yet support with parquet."); } @@ -410,18 +410,18 @@ Status FieldToNode(const std::shared_ptr<Field>& field, logical_type = LogicalType::TIME_MICROS; break; case ArrowType::STRUCT: { - auto struct_type = std::static_pointer_cast<::arrow::StructType>(field->type); - return StructToNode(struct_type, field->name, field->nullable, properties, out); + auto struct_type = std::static_pointer_cast<::arrow::StructType>(field->type()); + return StructToNode(struct_type, field->name(), field->nullable(), properties, out); } break; case ArrowType::LIST: { - auto list_type = std::static_pointer_cast<::arrow::ListType>(field->type); - return ListToNode(list_type, field->name, field->nullable, properties, out); + auto list_type = std::static_pointer_cast<::arrow::ListType>(field->type()); + return ListToNode(list_type, field->name(), field->nullable(), properties, out); } break; default: // TODO: LIST, DENSE_UNION, SPARE_UNION, JSON_SCALAR, DECIMAL, DECIMAL_TEXT, VARCHAR return Status::NotImplemented("unhandled type"); } - *out = PrimitiveNode::Make(field->name, repetition, type, logical_type, length); + *out = PrimitiveNode::Make(field->name(), repetition, type, logical_type, length); return Status::OK(); } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cf31e6d1/src/parquet/arrow/writer.cc ---------------------------------------------------------------------- diff --git a/src/parquet/arrow/writer.cc b/src/parquet/arrow/writer.cc index a92537a..5933937 100644 --- a/src/parquet/arrow/writer.cc +++ b/src/parquet/arrow/writer.cc @@ -61,7 +61,7 @@ class LevelBuilder : public ::arrow::ArrayVisitor { array_offsets_.push_back(array.offset()); \ valid_bitmaps_.push_back(array.null_bitmap_data()); \ null_counts_.push_back(array.null_count()); \ - values_type_ = array.type_enum(); \ + values_type_ = array.type_id(); \ values_array_ = &array; \ return Status::OK(); \ } @@ -125,15 +125,15 @@ class LevelBuilder : public ::arrow::ArrayVisitor { // Walk downwards to extract nullability std::shared_ptr<Field> current_field = field; - nullable_.push_back(current_field->nullable); - while (current_field->type->num_children() > 0) { - if (current_field->type->num_children() > 1) { + nullable_.push_back(current_field->nullable()); + while (current_field->type()->num_children() > 0) { + if (current_field->type()->num_children() > 1) { return Status::NotImplemented( "Fields with more than one child are not supported."); } else { - current_field = current_field->type->child(0); + current_field = current_field->type()->child(0); } - nullable_.push_back(current_field->nullable); + nullable_.push_back(current_field->nullable()); } // Generate the levels.
