Repository: parquet-cpp Updated Branches: refs/heads/master 1580d56d4 -> a48bfaa7e
PARQUET-933: Account for API changes in ARROW-728 Requires https://github.com/apache/arrow/pull/457 Author: Wes McKinney <[email protected]> Closes #280 from wesm/PARQUET-933 and squashes the following commits: 5a4fdeb [Wes McKinney] Use EP_CXX_FLAGS d23acce [Wes McKinney] Upgrade to gbenchmark 1.1.0 8cb1191 [Wes McKinney] Fix benchmarks 29c48c5 [Wes McKinney] Update Arrow version e1af3f0 [Wes McKinney] Account for API changes in ARROW-728 Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/a48bfaa7 Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/a48bfaa7 Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/a48bfaa7 Branch: refs/heads/master Commit: a48bfaa7e9c649b120eb3c88e0234695042d5d4e Parents: 1580d56 Author: Wes McKinney <[email protected]> Authored: Thu Mar 30 17:32:00 2017 -0400 Committer: Wes McKinney <[email protected]> Committed: Thu Mar 30 17:32:00 2017 -0400 ---------------------------------------------------------------------- cmake_modules/ThirdpartyToolchain.cmake | 9 ++++-- .../arrow/arrow-reader-writer-benchmark.cc | 2 +- src/parquet/arrow/arrow-reader-writer-test.cc | 6 ++-- src/parquet/arrow/reader.cc | 3 +- src/parquet/arrow/test-util.h | 4 +-- src/parquet/column/column-io-benchmark.cc | 32 ++++++++++---------- src/parquet/column/level-benchmark.cc | 22 +++++++------- src/parquet/encoding-benchmark.cc | 24 +++++++-------- 8 files changed, 53 insertions(+), 49 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/a48bfaa7/cmake_modules/ThirdpartyToolchain.cmake ---------------------------------------------------------------------- diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake index b4340d0..ecd9bca 100644 --- a/cmake_modules/ThirdpartyToolchain.cmake +++ b/cmake_modules/ThirdpartyToolchain.cmake @@ -16,13 +16,13 @@ # under the License. set(GTEST_VERSION "1.7.0") -set(GBENCHMARK_VERSION "1.0.0") +set(GBENCHMARK_VERSION "1.1.0") set(SNAPPY_VERSION "1.1.3") set(THRIFT_VERSION "0.10.0") # Brotli 0.5.2 does not install headers/libraries yet, but 0.6.0.dev does set(BROTLI_VERSION "5db62dcc9d386579609540cdf8869e95ad334bbd") -set(ARROW_VERSION "c7947dc2d08a0a2295016d34db201cc38a38360c") +set(ARROW_VERSION "15b874e47e3975c5240290ec7ed105bf8d1b56bc") # find boost headers and libs # Find shared Boost libraries. @@ -311,6 +311,7 @@ endif() if(PARQUET_BUILD_BENCHMARKS) add_custom_target(runbenchmark ctest -L benchmark) + if("$ENV{GBENCHMARK_HOME}" STREQUAL "") set(GBENCHMARK_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/gbenchmark_ep/src/gbenchmark_ep-install") set(GBENCHMARK_INCLUDE_DIR "${GBENCHMARK_PREFIX}/include") @@ -319,7 +320,11 @@ if(PARQUET_BUILD_BENCHMARKS) set(GBENCHMARK_CMAKE_ARGS "-DCMAKE_BUILD_TYPE=Release" "-DCMAKE_INSTALL_PREFIX:PATH=${GBENCHMARK_PREFIX}" + "-DBENCHMARK_ENABLE_TESTING=OFF" "-DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}") + if (APPLE) + set(GBENCHMARK_CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS} "-DBENCHMARK_USE_LIBCXX=ON") + endif() if (CMAKE_VERSION VERSION_GREATER "3.2") # BUILD_BYPRODUCTS is a 3.2+ feature ExternalProject_Add(gbenchmark_ep http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/a48bfaa7/src/parquet/arrow/arrow-reader-writer-benchmark.cc ---------------------------------------------------------------------- diff --git a/src/parquet/arrow/arrow-reader-writer-benchmark.cc b/src/parquet/arrow/arrow-reader-writer-benchmark.cc index c4a4777..7d8c107 100644 --- a/src/parquet/arrow/arrow-reader-writer-benchmark.cc +++ b/src/parquet/arrow/arrow-reader-writer-benchmark.cc @@ -98,7 +98,7 @@ std::shared_ptr<::arrow::Table> TableFromVector( std::vector<std::shared_ptr<::arrow::Field>>({field})); auto column = std::make_shared<::arrow::Column>(field, array); return std::make_shared<::arrow::Table>( - "table", schema, std::vector<std::shared_ptr<::arrow::Column>>({column})); + schema, std::vector<std::shared_ptr<::arrow::Column>>({column})); } template <bool nullable, typename ParquetType> http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/a48bfaa7/src/parquet/arrow/arrow-reader-writer-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/arrow/arrow-reader-writer-test.cc b/src/parquet/arrow/arrow-reader-writer-test.cc index 4598cab..3b232f9 100644 --- a/src/parquet/arrow/arrow-reader-writer-test.cc +++ b/src/parquet/arrow/arrow-reader-writer-test.cc @@ -773,7 +773,7 @@ void MakeDoubleTable(int num_columns, int num_rows, std::shared_ptr<Table>* out) fields[i] = column->field(); } auto schema = std::make_shared<::arrow::Schema>(fields); - *out = std::make_shared<Table>("schema", schema, columns); + *out = std::make_shared<Table>(schema, columns); } void DoTableRoundtrip(const std::shared_ptr<Table>& table, int num_threads, @@ -810,7 +810,7 @@ TEST(TestArrowReadWrite, MultithreadedRead) { std::shared_ptr<Table> result; DoTableRoundtrip(table, num_threads, {}, &result); - ASSERT_TRUE(table->Equals(result)); + ASSERT_TRUE(table->Equals(*result)); } TEST(TestArrowReadWrite, ReadColumnSubset) { @@ -833,7 +833,7 @@ TEST(TestArrowReadWrite, ReadColumnSubset) { } auto ex_schema = std::make_shared<::arrow::Schema>(ex_fields); - auto expected = std::make_shared<Table>("schema", ex_schema, ex_columns); + Table expected(ex_schema, ex_columns); ASSERT_TRUE(result->Equals(expected)); } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/a48bfaa7/src/parquet/arrow/reader.cc ---------------------------------------------------------------------- diff --git a/src/parquet/arrow/reader.cc b/src/parquet/arrow/reader.cc index d1bf38e..53600b4 100644 --- a/src/parquet/arrow/reader.cc +++ b/src/parquet/arrow/reader.cc @@ -210,7 +210,6 @@ Status FileReader::Impl::ReadTable( const std::vector<int>& indices, std::shared_ptr<Table>* table) { auto descr = reader_->metadata()->schema(); - const std::string& name = descr->name(); std::shared_ptr<::arrow::Schema> schema; RETURN_NOT_OK(FromParquetSchema(descr, indices, &schema)); @@ -233,7 +232,7 @@ Status FileReader::Impl::ReadTable( RETURN_NOT_OK(ParallelFor(nthreads, num_columns, ReadColumnFunc)); } - *table = std::make_shared<Table>(name, schema, columns); + *table = std::make_shared<Table>(schema, columns); return Status::OK(); } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/a48bfaa7/src/parquet/arrow/test-util.h ---------------------------------------------------------------------- diff --git a/src/parquet/arrow/test-util.h b/src/parquet/arrow/test-util.h index 1cf1376..2cfc60a 100644 --- a/src/parquet/arrow/test-util.h +++ b/src/parquet/arrow/test-util.h @@ -272,7 +272,7 @@ std::shared_ptr<::arrow::Table> MakeSimpleTable( std::vector<std::shared_ptr<::arrow::Column>> columns({column}); std::vector<std::shared_ptr<::arrow::Field>> fields({column->field()}); auto schema = std::make_shared<::arrow::Schema>(fields); - return std::make_shared<::arrow::Table>("table", schema, columns); + return std::make_shared<::arrow::Table>(schema, columns); } template <typename T> @@ -300,7 +300,7 @@ void ExpectArrayT<::arrow::BooleanType>(void* expected, Array* result) { std::shared_ptr<Array> expected_array; EXPECT_OK(builder.Finish(&expected_array)); - EXPECT_TRUE(result->Equals(expected_array)); + EXPECT_TRUE(result->Equals(*expected_array)); } } // namespace arrow http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/a48bfaa7/src/parquet/column/column-io-benchmark.cc ---------------------------------------------------------------------- diff --git a/src/parquet/column/column-io-benchmark.cc b/src/parquet/column/column-io-benchmark.cc index fb491b9..24afab2 100644 --- a/src/parquet/column/column-io-benchmark.cc +++ b/src/parquet/column/column-io-benchmark.cc @@ -45,22 +45,22 @@ std::shared_ptr<ColumnDescriptor> Int64Schema(Repetition::type repetition) { } void SetBytesProcessed(::benchmark::State& state, Repetition::type repetition) { - int64_t bytes_processed = state.iterations() * state.range_x() * sizeof(int64_t); + int64_t bytes_processed = state.iterations() * state.range(0) * sizeof(int64_t); if (repetition != Repetition::REQUIRED) { - bytes_processed += state.iterations() * state.range_x() * sizeof(int16_t); + bytes_processed += state.iterations() * state.range(0) * sizeof(int16_t); } if (repetition == Repetition::REPEATED) { - bytes_processed += state.iterations() * state.range_x() * sizeof(int16_t); + bytes_processed += state.iterations() * state.range(0) * sizeof(int16_t); } - state.SetBytesProcessed(state.iterations() * state.range_x() * sizeof(int16_t)); + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(int16_t)); } template <Repetition::type repetition> static void BM_WriteInt64Column(::benchmark::State& state) { format::ColumnChunk thrift_metadata; - std::vector<int64_t> values(state.range_x(), 128); - std::vector<int16_t> definition_levels(state.range_x(), 1); - std::vector<int16_t> repetition_levels(state.range_x(), 0); + std::vector<int64_t> values(state.range(0), 128); + std::vector<int16_t> definition_levels(state.range(0), 1); + std::vector<int16_t> repetition_levels(state.range(0), 0); std::shared_ptr<ColumnDescriptor> schema = Int64Schema(repetition); std::shared_ptr<WriterProperties> properties = default_writer_properties(); auto metadata = ColumnChunkMetaDataBuilder::Make( @@ -69,7 +69,7 @@ static void BM_WriteInt64Column(::benchmark::State& state) { while (state.KeepRunning()) { InMemoryOutputStream stream; std::unique_ptr<Int64Writer> writer = BuildWriter( - state.range_x(), &stream, metadata.get(), schema.get(), properties.get()); + state.range(0), &stream, metadata.get(), schema.get(), properties.get()); writer->WriteBatch( values.size(), definition_levels.data(), repetition_levels.data(), values.data()); writer->Close(); @@ -94,9 +94,9 @@ std::unique_ptr<Int64Reader> BuildReader( template <Repetition::type repetition> static void BM_ReadInt64Column(::benchmark::State& state) { format::ColumnChunk thrift_metadata; - std::vector<int64_t> values(state.range_x(), 128); - std::vector<int16_t> definition_levels(state.range_x(), 1); - std::vector<int16_t> repetition_levels(state.range_x(), 0); + std::vector<int64_t> values(state.range(0), 128); + std::vector<int16_t> definition_levels(state.range(0), 1); + std::vector<int16_t> repetition_levels(state.range(0), 0); std::shared_ptr<ColumnDescriptor> schema = Int64Schema(repetition); std::shared_ptr<WriterProperties> properties = default_writer_properties(); auto metadata = ColumnChunkMetaDataBuilder::Make( @@ -104,17 +104,17 @@ static void BM_ReadInt64Column(::benchmark::State& state) { InMemoryOutputStream stream; std::unique_ptr<Int64Writer> writer = BuildWriter( - state.range_x(), &stream, metadata.get(), schema.get(), properties.get()); + state.range(0), &stream, metadata.get(), schema.get(), properties.get()); writer->WriteBatch( values.size(), definition_levels.data(), repetition_levels.data(), values.data()); writer->Close(); std::shared_ptr<Buffer> src = stream.GetBuffer(); - std::vector<int64_t> values_out(state.range_y()); - std::vector<int16_t> definition_levels_out(state.range_y()); - std::vector<int16_t> repetition_levels_out(state.range_y()); + std::vector<int64_t> values_out(state.range(1)); + std::vector<int16_t> definition_levels_out(state.range(1)); + std::vector<int16_t> repetition_levels_out(state.range(1)); while (state.KeepRunning()) { - std::unique_ptr<Int64Reader> reader = BuildReader(src, state.range_y(), schema.get()); + std::unique_ptr<Int64Reader> reader = BuildReader(src, state.range(1), schema.get()); int64_t values_read = 0; for (size_t i = 0; i < values.size(); i += values_read) { reader->ReadBatch(values_out.size(), definition_levels_out.data(), http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/a48bfaa7/src/parquet/column/level-benchmark.cc ---------------------------------------------------------------------- diff --git a/src/parquet/column/level-benchmark.cc b/src/parquet/column/level-benchmark.cc index 036108f..34c7218 100644 --- a/src/parquet/column/level-benchmark.cc +++ b/src/parquet/column/level-benchmark.cc @@ -25,10 +25,10 @@ namespace parquet { namespace benchmark { static void BM_RleEncoding(::benchmark::State& state) { - std::vector<int16_t> levels(state.range_x(), 0); + std::vector<int16_t> levels(state.range(0), 0); int64_t n = 0; - std::generate(levels.begin(), levels.end(), - [&state, &n] { return (n++ % state.range_y()) == 0; }); + std::generate( + levels.begin(), levels.end(), [&state, &n] { return (n++ % state.range(1)) == 0; }); int16_t max_level = 1; int64_t rle_size = LevelEncoder::MaxBufferSize(Encoding::RLE, max_level, levels.size()); auto buffer_rle = std::make_shared<PoolBuffer>(); @@ -40,18 +40,18 @@ static void BM_RleEncoding(::benchmark::State& state) { buffer_rle->mutable_data(), buffer_rle->size()); level_encoder.Encode(levels.size(), levels.data()); } - state.SetBytesProcessed(state.iterations() * state.range_x() * sizeof(int16_t)); - state.SetItemsProcessed(state.iterations() * state.range_x()); + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(int16_t)); + state.SetItemsProcessed(state.iterations() * state.range(0)); } BENCHMARK(BM_RleEncoding)->RangePair(1024, 65536, 1, 16); static void BM_RleDecoding(::benchmark::State& state) { LevelEncoder level_encoder; - std::vector<int16_t> levels(state.range_x(), 0); + std::vector<int16_t> levels(state.range(0), 0); int64_t n = 0; - std::generate(levels.begin(), levels.end(), - [&state, &n] { return (n++ % state.range_y()) == 0; }); + std::generate( + levels.begin(), levels.end(), [&state, &n] { return (n++ % state.range(1)) == 0; }); int16_t max_level = 1; int64_t rle_size = LevelEncoder::MaxBufferSize(Encoding::RLE, max_level, levels.size()); auto buffer_rle = std::make_shared<PoolBuffer>(); @@ -64,11 +64,11 @@ static void BM_RleDecoding(::benchmark::State& state) { while (state.KeepRunning()) { LevelDecoder level_decoder; level_decoder.SetData(Encoding::RLE, max_level, levels.size(), buffer_rle->data()); - level_decoder.Decode(state.range_x(), levels.data()); + level_decoder.Decode(state.range(0), levels.data()); } - state.SetBytesProcessed(state.iterations() * state.range_x() * sizeof(int16_t)); - state.SetItemsProcessed(state.iterations() * state.range_x()); + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(int16_t)); + state.SetItemsProcessed(state.iterations() * state.range(0)); } BENCHMARK(BM_RleDecoding)->RangePair(1024, 65536, 1, 16); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/a48bfaa7/src/parquet/encoding-benchmark.cc ---------------------------------------------------------------------- diff --git a/src/parquet/encoding-benchmark.cc b/src/parquet/encoding-benchmark.cc index 8ea684a..1e93ba7 100644 --- a/src/parquet/encoding-benchmark.cc +++ b/src/parquet/encoding-benchmark.cc @@ -38,21 +38,21 @@ std::shared_ptr<ColumnDescriptor> Int64Schema(Repetition::type repetition) { } static void BM_PlainEncodingBoolean(::benchmark::State& state) { - std::vector<bool> values(state.range_x(), 64); + std::vector<bool> values(state.range(0), 64); PlainEncoder<BooleanType> encoder(nullptr); while (state.KeepRunning()) { encoder.Put(values, values.size()); encoder.FlushValues(); } - state.SetBytesProcessed(state.iterations() * state.range_x() * sizeof(bool)); + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(bool)); } BENCHMARK(BM_PlainEncodingBoolean)->Range(1024, 65536); static void BM_PlainDecodingBoolean(::benchmark::State& state) { - std::vector<bool> values(state.range_x(), 64); - bool* output = new bool[state.range_x()]; + std::vector<bool> values(state.range(0), 64); + bool* output = new bool[state.range(0)]; PlainEncoder<BooleanType> encoder(nullptr); encoder.Put(values, values.size()); std::shared_ptr<Buffer> buf = encoder.FlushValues(); @@ -63,27 +63,27 @@ static void BM_PlainDecodingBoolean(::benchmark::State& state) { decoder.Decode(output, values.size()); } - state.SetBytesProcessed(state.iterations() * state.range_x() * sizeof(bool)); + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(bool)); delete[] output; } BENCHMARK(BM_PlainDecodingBoolean)->Range(1024, 65536); static void BM_PlainEncodingInt64(::benchmark::State& state) { - std::vector<int64_t> values(state.range_x(), 64); + std::vector<int64_t> values(state.range(0), 64); PlainEncoder<Int64Type> encoder(nullptr); while (state.KeepRunning()) { encoder.Put(values.data(), values.size()); encoder.FlushValues(); } - state.SetBytesProcessed(state.iterations() * state.range_x() * sizeof(int64_t)); + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(int64_t)); } BENCHMARK(BM_PlainEncodingInt64)->Range(1024, 65536); static void BM_PlainDecodingInt64(::benchmark::State& state) { - std::vector<int64_t> values(state.range_x(), 64); + std::vector<int64_t> values(state.range(0), 64); PlainEncoder<Int64Type> encoder(nullptr); encoder.Put(values.data(), values.size()); std::shared_ptr<Buffer> buf = encoder.FlushValues(); @@ -93,7 +93,7 @@ static void BM_PlainDecodingInt64(::benchmark::State& state) { decoder.SetData(values.size(), buf->data(), buf->size()); decoder.Decode(values.data(), values.size()); } - state.SetBytesProcessed(state.iterations() * state.range_x() * sizeof(int64_t)); + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(int64_t)); } BENCHMARK(BM_PlainDecodingInt64)->Range(1024, 65536); @@ -133,14 +133,14 @@ static void DecodeDict( decoder.Decode(values.data(), num_values); } - state.SetBytesProcessed(state.iterations() * state.range_x() * sizeof(T)); + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(T)); } static void BM_DictDecodingInt64_repeats(::benchmark::State& state) { typedef Int64Type Type; typedef typename Type::c_type T; - std::vector<T> values(state.range_x(), 64); + std::vector<T> values(state.range(0), 64); DecodeDict<Type>(values, state); } @@ -150,7 +150,7 @@ static void BM_DictDecodingInt64_literals(::benchmark::State& state) { typedef Int64Type Type; typedef typename Type::c_type T; - std::vector<T> values(state.range_x()); + std::vector<T> values(state.range(0)); for (size_t i = 0; i < values.size(); ++i) { values[i] = i; }
