Repository: parquet-cpp Updated Branches: refs/heads/master d7d01d765 -> 02a9f0dbf
PARQUET-919: Account for ARROW-683 changes, but make no functional changes. Set PARQUET_ARROW=on by default Author: Wes McKinney <[email protected]> Closes #272 from wesm/PARQUET-919 and squashes the following commits: 8ece0e8 [Wes McKinney] Skip generated Thrift files in cpplint 4ad23ce [Wes McKinney] Update Arrow version 9006dd1 [Wes McKinney] Account for ARROW-683 changes, but make no functional changes. Set PARQUET_ARROW=on by default Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/02a9f0db Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/02a9f0db Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/02a9f0db Branch: refs/heads/master Commit: 02a9f0dbff0aa59da9c000148e866081419a4348 Parents: d7d01d7 Author: Wes McKinney <[email protected]> Authored: Thu Mar 23 09:46:53 2017 -0400 Committer: Wes McKinney <[email protected]> Committed: Thu Mar 23 09:46:53 2017 -0400 ---------------------------------------------------------------------- CMakeLists.txt | 4 ++-- cmake_modules/ThirdpartyToolchain.cmake | 2 +- src/parquet/arrow/arrow-reader-writer-test.cc | 6 +++--- src/parquet/arrow/arrow-schema-test.cc | 4 ++-- src/parquet/arrow/reader.cc | 6 +++--- src/parquet/arrow/schema.cc | 8 ++++++-- src/parquet/arrow/test-util.h | 2 +- src/parquet/arrow/writer.cc | 8 ++++---- 8 files changed, 22 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/02a9f0db/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/CMakeLists.txt b/CMakeLists.txt index 6dc7866..75f855b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,7 +92,7 @@ if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") ON) option(PARQUET_ARROW "Build the Arrow support" - OFF) + ON) option(PARQUET_ZLIB_VENDORED "Build our own zlib (some libz.a aren't configured for static linking)" ON) @@ -388,7 +388,7 @@ if (UNIX) --verbose=2 --linelength=90 --filter=-whitespace/comments,-readability/todo,-build/header_guard,-runtime/references,-readability/check,-build/c++11,-build/include_order - `find ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_SOURCE_DIR}/tools ${CMAKE_CURRENT_SOURCE_DIR}/examples ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks -name \\*.cc -or -name \\*.h | sed -e '/parquet\\/thrift/g'`) + `find ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_SOURCE_DIR}/tools ${CMAKE_CURRENT_SOURCE_DIR}/examples ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks -name \\*.cc -or -name \\*.h | sed -e '/parquet\\/parquet_/g'`) endif (UNIX) ############################################################ http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/02a9f0db/cmake_modules/ThirdpartyToolchain.cmake ---------------------------------------------------------------------- diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake index ab25199..1e43308 100644 --- a/cmake_modules/ThirdpartyToolchain.cmake +++ b/cmake_modules/ThirdpartyToolchain.cmake @@ -22,7 +22,7 @@ set(THRIFT_VERSION "0.10.0") # Brotli 0.5.2 does not install headers/libraries yet, but 0.6.0.dev does set(BROTLI_VERSION "5db62dcc9d386579609540cdf8869e95ad334bbd") -set(ARROW_VERSION "fa8d27f314b7c21c611d1c5caaa9b32ae0cb2b06") +set(ARROW_VERSION "e8f6a492d30d32cd67fe3a537b3aec4cbae566c9") # find boost headers and libs # Find shared Boost libraries. http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/02a9f0db/src/parquet/arrow/arrow-reader-writer-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/arrow/arrow-reader-writer-test.cc b/src/parquet/arrow/arrow-reader-writer-test.cc index ae3002d..4598cab 100644 --- a/src/parquet/arrow/arrow-reader-writer-test.cc +++ b/src/parquet/arrow/arrow-reader-writer-test.cc @@ -148,13 +148,13 @@ struct test_traits<::arrow::TimestampType> { const int64_t test_traits<::arrow::TimestampType>::value(14695634030000); template <> -struct test_traits<::arrow::DateType> { +struct test_traits<::arrow::Date64Type> { static constexpr ParquetType::type parquet_enum = ParquetType::INT32; static constexpr LogicalType::type logical_enum = LogicalType::DATE; static int64_t const value; }; -const int64_t test_traits<::arrow::DateType>::value(14688000000000); +const int64_t test_traits<::arrow::Date64Type>::value(14688000000000); template <> struct test_traits<::arrow::FloatType> { @@ -317,7 +317,7 @@ class TestParquetIO : public ::testing::Test { typedef ::testing::Types<::arrow::BooleanType, ::arrow::UInt8Type, ::arrow::Int8Type, ::arrow::UInt16Type, ::arrow::Int16Type, ::arrow::Int32Type, ::arrow::UInt64Type, - ::arrow::Int64Type, ::arrow::TimestampType, ::arrow::DateType, ::arrow::FloatType, + ::arrow::Int64Type, ::arrow::TimestampType, ::arrow::Date64Type, ::arrow::FloatType, ::arrow::DoubleType, ::arrow::StringType, ::arrow::BinaryType> TestTypes; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/02a9f0db/src/parquet/arrow/arrow-schema-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/arrow/arrow-schema-test.cc b/src/parquet/arrow/arrow-schema-test.cc index 8db792f..83100d3 100644 --- a/src/parquet/arrow/arrow-schema-test.cc +++ b/src/parquet/arrow/arrow-schema-test.cc @@ -100,7 +100,7 @@ TEST_F(TestConvertParquetSchema, ParquetFlatPrimitives) { parquet_fields.push_back(PrimitiveNode::Make( "date", Repetition::REQUIRED, ParquetType::INT32, LogicalType::DATE)); - arrow_fields.push_back(std::make_shared<Field>("date", ::arrow::date(), false)); + arrow_fields.push_back(std::make_shared<Field>("date", ::arrow::date64(), false)); parquet_fields.push_back( PrimitiveNode::Make("timestamp96", Repetition::REQUIRED, ParquetType::INT96)); @@ -397,7 +397,7 @@ TEST_F(TestConvertArrowSchema, ParquetFlatPrimitives) { parquet_fields.push_back(PrimitiveNode::Make( "date", Repetition::REQUIRED, ParquetType::INT32, LogicalType::DATE)); - arrow_fields.push_back(std::make_shared<Field>("date", ::arrow::date(), false)); + arrow_fields.push_back(std::make_shared<Field>("date", ::arrow::date64(), false)); parquet_fields.push_back(PrimitiveNode::Make("timestamp", Repetition::REQUIRED, ParquetType::INT64, LogicalType::TIMESTAMP_MILLIS)); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/02a9f0db/src/parquet/arrow/reader.cc ---------------------------------------------------------------------- diff --git a/src/parquet/arrow/reader.cc b/src/parquet/arrow/reader.cc index 37683c1..d1bf38e 100644 --- a/src/parquet/arrow/reader.cc +++ b/src/parquet/arrow/reader.cc @@ -369,7 +369,7 @@ Status ColumnReader::Impl::ReadNonNullableBatch<::arrow::TimestampType, Int96Typ } template <> -Status ColumnReader::Impl::ReadNonNullableBatch<::arrow::DateType, Int32Type>( +Status ColumnReader::Impl::ReadNonNullableBatch<::arrow::Date64Type, Int32Type>( TypedColumnReader<Int32Type>* reader, int64_t values_to_read, int64_t* levels_read) { RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(int32_t), false)); auto values = reinterpret_cast<int32_t*>(values_buffer_.mutable_data()); @@ -480,7 +480,7 @@ Status ColumnReader::Impl::ReadNullableBatch<::arrow::TimestampType, Int96Type>( } template <> -Status ColumnReader::Impl::ReadNullableBatch<::arrow::DateType, Int32Type>( +Status ColumnReader::Impl::ReadNullableBatch<::arrow::Date64Type, Int32Type>( TypedColumnReader<Int32Type>* reader, int16_t* def_levels, int16_t* rep_levels, int64_t values_to_read, int64_t* levels_read, int64_t* values_read) { RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(int32_t), false)); @@ -883,7 +883,7 @@ Status ColumnReader::Impl::NextBatch(int batch_size, std::shared_ptr<Array>* out TYPED_BATCH_CASE(INT16, ::arrow::Int16Type, Int32Type) TYPED_BATCH_CASE(UINT32, ::arrow::UInt32Type, Int32Type) TYPED_BATCH_CASE(INT32, ::arrow::Int32Type, Int32Type) - TYPED_BATCH_CASE(DATE, ::arrow::DateType, Int32Type) + TYPED_BATCH_CASE(DATE64, ::arrow::Date64Type, Int32Type) TYPED_BATCH_CASE(UINT64, ::arrow::UInt64Type, Int64Type) TYPED_BATCH_CASE(INT64, ::arrow::Int64Type, Int64Type) TYPED_BATCH_CASE(FLOAT, ::arrow::FloatType, FloatType) http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/02a9f0db/src/parquet/arrow/schema.cc ---------------------------------------------------------------------- diff --git a/src/parquet/arrow/schema.cc b/src/parquet/arrow/schema.cc index 0c336d9..ed989cb 100644 --- a/src/parquet/arrow/schema.cc +++ b/src/parquet/arrow/schema.cc @@ -108,7 +108,7 @@ static Status FromInt32(const PrimitiveNode* node, TypePtr* out) { *out = ::arrow::uint32(); break; case LogicalType::DATE: - *out = ::arrow::date(); + *out = ::arrow::date64(); break; case LogicalType::DECIMAL: *out = MakeDecimalType(node); @@ -378,7 +378,11 @@ Status FieldToNode(const std::shared_ptr<Field>& field, case ArrowType::BINARY: type = ParquetType::BYTE_ARRAY; break; - case ArrowType::DATE: + case ArrowType::DATE32: + type = ParquetType::INT32; + logical_type = LogicalType::DATE; + break; + case ArrowType::DATE64: type = ParquetType::INT32; logical_type = LogicalType::DATE; break; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/02a9f0db/src/parquet/arrow/test-util.h ---------------------------------------------------------------------- diff --git a/src/parquet/arrow/test-util.h b/src/parquet/arrow/test-util.h index 07f1f28..1cf1376 100644 --- a/src/parquet/arrow/test-util.h +++ b/src/parquet/arrow/test-util.h @@ -34,7 +34,7 @@ template <typename ArrowType> using is_arrow_int = std::is_integral<typename ArrowType::c_type>; template <typename ArrowType> -using is_arrow_date = std::is_same<ArrowType, ::arrow::DateType>; +using is_arrow_date = std::is_same<ArrowType, ::arrow::Date64Type>; template <typename ArrowType> using is_arrow_string = std::is_same<ArrowType, ::arrow::StringType>; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/02a9f0db/src/parquet/arrow/writer.cc ---------------------------------------------------------------------- diff --git a/src/parquet/arrow/writer.cc b/src/parquet/arrow/writer.cc index 6e927cd..f2ee734 100644 --- a/src/parquet/arrow/writer.cc +++ b/src/parquet/arrow/writer.cc @@ -80,7 +80,7 @@ class LevelBuilder : public ::arrow::ArrayVisitor { PRIMITIVE_VISIT(Double) PRIMITIVE_VISIT(String) PRIMITIVE_VISIT(Binary) - PRIMITIVE_VISIT(Date) + PRIMITIVE_VISIT(Date64) PRIMITIVE_VISIT(Time) PRIMITIVE_VISIT(Timestamp) PRIMITIVE_VISIT(Interval) @@ -332,7 +332,7 @@ Status FileWriter::Impl::WriteNonNullableBatch(TypedColumnWriter<ParquetType>* w } template <> -Status FileWriter::Impl::WriteNonNullableBatch<Int32Type, ::arrow::DateType>( +Status FileWriter::Impl::WriteNonNullableBatch<Int32Type, ::arrow::Date64Type>( TypedColumnWriter<Int32Type>* writer, int64_t num_values, int64_t num_levels, const int16_t* def_levels, const int16_t* rep_levels, const int64_t* data_ptr) { RETURN_NOT_OK(data_buffer_.Resize(num_values * sizeof(int32_t))); @@ -384,7 +384,7 @@ Status FileWriter::Impl::WriteNullableBatch(TypedColumnWriter<ParquetType>* writ } template <> -Status FileWriter::Impl::WriteNullableBatch<Int32Type, ::arrow::DateType>( +Status FileWriter::Impl::WriteNullableBatch<Int32Type, ::arrow::Date64Type>( TypedColumnWriter<Int32Type>* writer, int64_t num_values, int64_t num_levels, const int16_t* def_levels, const int16_t* rep_levels, const uint8_t* valid_bits, int64_t valid_bits_offset, const int64_t* data_ptr) { @@ -555,7 +555,7 @@ Status FileWriter::Impl::WriteColumnChunk(const Array& data) { WRITE_BATCH_CASE(INT16, Int16Type, Int32Type) WRITE_BATCH_CASE(UINT16, UInt16Type, Int32Type) WRITE_BATCH_CASE(INT32, Int32Type, Int32Type) - WRITE_BATCH_CASE(DATE, DateType, Int32Type) + WRITE_BATCH_CASE(DATE64, Date64Type, Int32Type) WRITE_BATCH_CASE(INT64, Int64Type, Int64Type) WRITE_BATCH_CASE(TIMESTAMP, TimestampType, Int64Type) WRITE_BATCH_CASE(UINT64, UInt64Type, Int64Type)
