Repository: parquet-cpp Updated Branches: refs/heads/master aba7c374c -> 82515fead
PARQUET-752: Account for upstream Arrow API changes As soon as ARROW-261/317 are merged, I'll update the thirdparty git SHA so we can get a green build Author: Wes McKinney <wes.mckin...@twosigma.com> Closes #180 from wesm/PARQUET-752 and squashes the following commits: 0085d92 [Wes McKinney] Fix benchmark code for API changes. Remove conda builds e2ee9b3 [Wes McKinney] Update thirdparty build directions now that Arrow doesn't have the old scripts 92f6c35 [Wes McKinney] Bump thirdparty to arrow HEAD 3d49b50 [Wes McKinney] Fix for ARROW-317 d68d9d6 [Wes McKinney] Account for upstream API changes in ARROW-261 Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/82515fea Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/82515fea Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/82515fea Branch: refs/heads/master Commit: 82515feadd593482e509d2e6931cda29aba66cb0 Parents: aba7c37 Author: Wes McKinney <wes.mckin...@twosigma.com> Authored: Tue Oct 18 08:09:33 2016 -0400 Committer: Wes McKinney <wes.mckin...@twosigma.com> Committed: Tue Oct 18 08:09:33 2016 -0400 ---------------------------------------------------------------------- .travis.yml | 16 ----- ci/travis_conda_build.sh | 46 ------------- .../arrow/arrow-reader-writer-benchmark.cc | 3 +- src/parquet/arrow/arrow-reader-writer-test.cc | 36 ++++++---- src/parquet/arrow/reader.cc | 6 +- src/parquet/arrow/test-util.h | 72 ++++++++++---------- src/parquet/arrow/writer.cc | 13 ++-- src/parquet/column/writer.cc | 4 +- thirdparty/build_thirdparty.sh | 20 +++--- thirdparty/versions.sh | 2 +- 10 files changed, 84 insertions(+), 134 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/.travis.yml ---------------------------------------------------------------------- diff --git a/.travis.yml b/.travis.yml index 6dc994e..5ca6de4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -38,22 +38,6 @@ matrix: before_install: - mkdir $TRAVIS_BUILD_DIR/parquet-build - pushd $TRAVIS_BUILD_DIR/parquet-build - - compiler: gcc - env: PARQUET_TEST_GROUP=packaging - os: linux - before_script: - - export CC="gcc-4.9" - - export CXX="g++-4.9" - script: - - $TRAVIS_BUILD_DIR/ci/travis_conda_build.sh - - os: osx - env: PARQUET_TEST_GROUP=packaging - compiler: clang - addons: - before_script: - before_install: - script: - - $TRAVIS_BUILD_DIR/ci/travis_conda_build.sh language: cpp http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/ci/travis_conda_build.sh ---------------------------------------------------------------------- diff --git a/ci/travis_conda_build.sh b/ci/travis_conda_build.sh deleted file mode 100755 index 4d9c03d..0000000 --- a/ci/travis_conda_build.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env bash - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. See accompanying LICENSE file. - -set -e - -if [ $TRAVIS_OS_NAME == "linux" ]; then - MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh" -else - MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh" -fi - -wget -O miniconda.sh $MINICONDA_URL -MINICONDA=$HOME/miniconda -bash miniconda.sh -b -p $MINICONDA -export PATH="$MINICONDA/bin:$PATH" - -conda update -y -q conda -conda install -y -q conda-build -conda info -a - -conda config --set show_channel_urls yes -conda config --add channels conda-forge -conda config --add channels apache - -conda install --yes jinja2 anaconda-client - -cd $TRAVIS_BUILD_DIR - -conda build conda.recipe - -CONDA_PACKAGE=`conda build --output conda.recipe | grep bz2` - -if [ $TRAVIS_BRANCH == "master" ] && [ $TRAVIS_PULL_REQUEST == "false" ]; then - anaconda --token $ANACONDA_TOKEN upload $CONDA_PACKAGE --user apache --channel dev; -fi http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/src/parquet/arrow/arrow-reader-writer-benchmark.cc ---------------------------------------------------------------------- diff --git a/src/parquet/arrow/arrow-reader-writer-benchmark.cc b/src/parquet/arrow/arrow-reader-writer-benchmark.cc index 9ce5f96..bbaefaa 100644 --- a/src/parquet/arrow/arrow-reader-writer-benchmark.cc +++ b/src/parquet/arrow/arrow-reader-writer-benchmark.cc @@ -94,7 +94,8 @@ std::shared_ptr<::arrow::Table> TableFromVector( } else { builder.Append(vec.data(), vec.size(), nullptr); } - std::shared_ptr<::arrow::Array> array = builder.Finish(); + std::shared_ptr<::arrow::Array> array; + builder.Finish(&array); auto field = std::make_shared<::arrow::Field>("column", type, nullable); auto schema = std::make_shared<::arrow::Schema>( std::vector<std::shared_ptr<::arrow::Field>>({field})); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/src/parquet/arrow/arrow-reader-writer-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/arrow/arrow-reader-writer-test.cc b/src/parquet/arrow/arrow-reader-writer-test.cc index b1f1c52..1f28e5c 100644 --- a/src/parquet/arrow/arrow-reader-writer-test.cc +++ b/src/parquet/arrow/arrow-reader-writer-test.cc @@ -264,7 +264,8 @@ typedef ::testing::Types<::arrow::BooleanType, ::arrow::UInt8Type, ::arrow::Int8 TYPED_TEST_CASE(TestParquetIO, TestTypes); TYPED_TEST(TestParquetIO, SingleColumnRequiredWrite) { - auto values = NonNullArray<TypeParam>(SMALL_SIZE); + std::shared_ptr<Array> values; + ASSERT_OK(NonNullArray<TypeParam>(SMALL_SIZE, &values)); std::shared_ptr<GroupNode> schema = this->MakeSchema(Repetition::REQUIRED); this->WriteFlatColumn(schema, values); @@ -273,7 +274,8 @@ TYPED_TEST(TestParquetIO, SingleColumnRequiredWrite) { } TYPED_TEST(TestParquetIO, SingleColumnTableRequiredWrite) { - auto values = NonNullArray<TypeParam>(SMALL_SIZE); + std::shared_ptr<Array> values; + ASSERT_OK(NonNullArray<TypeParam>(SMALL_SIZE, &values)); std::shared_ptr<Table> table = MakeSimpleTable(values, false); this->sink_ = std::make_shared<InMemoryOutputStream>(); ASSERT_OK_NO_THROW(WriteFlatTable(table.get(), ::arrow::default_memory_pool(), @@ -291,7 +293,8 @@ TYPED_TEST(TestParquetIO, SingleColumnTableRequiredWrite) { TYPED_TEST(TestParquetIO, SingleColumnOptionalReadWrite) { // This also tests max_definition_level = 1 - auto values = NullableArray<TypeParam>(SMALL_SIZE, 10); + std::shared_ptr<Array> values; + ASSERT_OK(NullableArray<TypeParam>(SMALL_SIZE, 10, &values)); std::shared_ptr<GroupNode> schema = this->MakeSchema(Repetition::OPTIONAL); this->WriteFlatColumn(schema, values); @@ -301,7 +304,8 @@ TYPED_TEST(TestParquetIO, SingleColumnOptionalReadWrite) { TYPED_TEST(TestParquetIO, SingleColumnTableOptionalReadWrite) { // This also tests max_definition_level = 1 - std::shared_ptr<Array> values = NullableArray<TypeParam>(SMALL_SIZE, 10); + std::shared_ptr<Array> values; + ASSERT_OK(NullableArray<TypeParam>(SMALL_SIZE, 10, &values)); std::shared_ptr<Table> table = MakeSimpleTable(values, true); this->sink_ = std::make_shared<InMemoryOutputStream>(); ASSERT_OK_NO_THROW(WriteFlatTable(table.get(), ::arrow::default_memory_pool(), @@ -311,7 +315,8 @@ TYPED_TEST(TestParquetIO, SingleColumnTableOptionalReadWrite) { } TYPED_TEST(TestParquetIO, SingleColumnRequiredChunkedWrite) { - auto values = NonNullArray<TypeParam>(SMALL_SIZE); + std::shared_ptr<Array> values; + ASSERT_OK(NonNullArray<TypeParam>(SMALL_SIZE, &values)); int64_t chunk_size = values->length() / 4; std::shared_ptr<GroupNode> schema = this->MakeSchema(Repetition::REQUIRED); @@ -327,7 +332,8 @@ TYPED_TEST(TestParquetIO, SingleColumnRequiredChunkedWrite) { } TYPED_TEST(TestParquetIO, SingleColumnTableRequiredChunkedWrite) { - auto values = NonNullArray<TypeParam>(LARGE_SIZE); + std::shared_ptr<Array> values; + ASSERT_OK(NonNullArray<TypeParam>(LARGE_SIZE, &values)); std::shared_ptr<Table> table = MakeSimpleTable(values, false); this->sink_ = std::make_shared<InMemoryOutputStream>(); ASSERT_OK_NO_THROW(WriteFlatTable( @@ -338,7 +344,8 @@ TYPED_TEST(TestParquetIO, SingleColumnTableRequiredChunkedWrite) { TYPED_TEST(TestParquetIO, SingleColumnOptionalChunkedWrite) { int64_t chunk_size = SMALL_SIZE / 4; - auto values = NullableArray<TypeParam>(SMALL_SIZE, 10); + std::shared_ptr<Array> values; + ASSERT_OK(NullableArray<TypeParam>(SMALL_SIZE, 10, &values)); std::shared_ptr<GroupNode> schema = this->MakeSchema(Repetition::OPTIONAL); FileWriter writer(::arrow::default_memory_pool(), this->MakeWriter(schema)); @@ -354,7 +361,8 @@ TYPED_TEST(TestParquetIO, SingleColumnOptionalChunkedWrite) { TYPED_TEST(TestParquetIO, SingleColumnTableOptionalChunkedWrite) { // This also tests max_definition_level = 1 - auto values = NullableArray<TypeParam>(LARGE_SIZE, 100); + std::shared_ptr<Array> values; + ASSERT_OK(NullableArray<TypeParam>(LARGE_SIZE, 100, &values)); std::shared_ptr<Table> table = MakeSimpleTable(values, true); this->sink_ = std::make_shared<InMemoryOutputStream>(); ASSERT_OK_NO_THROW(WriteFlatTable(table.get(), ::arrow::default_memory_pool(), @@ -367,8 +375,8 @@ using TestUInt32ParquetIO = TestParquetIO<::arrow::UInt32Type>; TEST_F(TestUInt32ParquetIO, Parquet_2_0_Compability) { // This also tests max_definition_level = 1 - std::shared_ptr<PrimitiveArray> values = - NullableArray<::arrow::UInt32Type>(LARGE_SIZE, 100); + std::shared_ptr<Array> values; + ASSERT_OK(NullableArray<::arrow::UInt32Type>(LARGE_SIZE, 100, &values)); std::shared_ptr<Table> table = MakeSimpleTable(values, true); // Parquet 2.0 roundtrip should yield an uint32_t column again @@ -384,8 +392,12 @@ TEST_F(TestUInt32ParquetIO, Parquet_2_0_Compability) { TEST_F(TestUInt32ParquetIO, Parquet_1_0_Compability) { // This also tests max_definition_level = 1 - std::shared_ptr<PrimitiveArray> values = - NullableArray<::arrow::UInt32Type>(LARGE_SIZE, 100); + std::shared_ptr<Array> arr; + ASSERT_OK(NullableArray<::arrow::UInt32Type>(LARGE_SIZE, 100, &arr)); + + std::shared_ptr<::arrow::UInt32Array> values = + std::dynamic_pointer_cast<::arrow::UInt32Array>(arr); + std::shared_ptr<Table> table = MakeSimpleTable(values, true); // Parquet 1.0 returns an int64_t column as there is no way to tell a Parquet 1.0 http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/src/parquet/arrow/reader.cc ---------------------------------------------------------------------- diff --git a/src/parquet/arrow/reader.cc b/src/parquet/arrow/reader.cc index 0e9f255..c581997 100644 --- a/src/parquet/arrow/reader.cc +++ b/src/parquet/arrow/reader.cc @@ -304,8 +304,7 @@ Status FlatColumnReader::Impl::TypedReadBatch( } if (!column_reader_->HasNext()) { NextRowGroup(); } } - *out = builder.Finish(); - return Status::OK(); + return builder.Finish(out); } template <> @@ -347,8 +346,7 @@ Status FlatColumnReader::Impl::TypedReadBatch<::arrow::StringType, ByteArrayType } if (!column_reader_->HasNext()) { NextRowGroup(); } } - *out = builder.Finish(); - return Status::OK(); + return builder.Finish(out); } #define TYPED_BATCH_CASE(ENUM, ArrowType, ParquetType) \ http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/src/parquet/arrow/test-util.h ---------------------------------------------------------------------- diff --git a/src/parquet/arrow/test-util.h b/src/parquet/arrow/test-util.h index deac9f7..92798ff 100644 --- a/src/parquet/arrow/test-util.h +++ b/src/parquet/arrow/test-util.h @@ -23,9 +23,11 @@ #include "arrow/types/string.h" namespace parquet { - namespace arrow { +using ::arrow::Array; +using ::arrow::Status; + template <typename ArrowType> using is_arrow_float = std::is_floating_point<typename ArrowType::c_type>; @@ -36,56 +38,52 @@ template <typename ArrowType> using is_arrow_string = std::is_same<ArrowType, ::arrow::StringType>; template <class ArrowType> -typename std::enable_if<is_arrow_float<ArrowType>::value, - std::shared_ptr<::arrow::PrimitiveArray>>::type -NonNullArray(size_t size) { +typename std::enable_if<is_arrow_float<ArrowType>::value, Status>::type NonNullArray( + size_t size, std::shared_ptr<Array>* out) { std::vector<typename ArrowType::c_type> values; ::arrow::test::random_real<typename ArrowType::c_type>(size, 0, 0, 1, &values); ::arrow::NumericBuilder<ArrowType> builder( ::arrow::default_memory_pool(), std::make_shared<ArrowType>()); builder.Append(values.data(), values.size()); - return std::static_pointer_cast<::arrow::PrimitiveArray>(builder.Finish()); + return builder.Finish(out); } template <class ArrowType> -typename std::enable_if<is_arrow_int<ArrowType>::value, - std::shared_ptr<::arrow::PrimitiveArray>>::type -NonNullArray(size_t size) { +typename std::enable_if<is_arrow_int<ArrowType>::value, Status>::type NonNullArray( + size_t size, std::shared_ptr<Array>* out) { std::vector<typename ArrowType::c_type> values; ::arrow::test::randint<typename ArrowType::c_type>(size, 0, 64, &values); ::arrow::NumericBuilder<ArrowType> builder( ::arrow::default_memory_pool(), std::make_shared<ArrowType>()); builder.Append(values.data(), values.size()); - return std::static_pointer_cast<::arrow::PrimitiveArray>(builder.Finish()); + return builder.Finish(out); } template <class ArrowType> -typename std::enable_if<is_arrow_string<ArrowType>::value, - std::shared_ptr<::arrow::StringArray>>::type -NonNullArray(size_t size) { +typename std::enable_if<is_arrow_string<ArrowType>::value, Status>::type NonNullArray( + size_t size, std::shared_ptr<Array>* out) { ::arrow::StringBuilder builder( ::arrow::default_memory_pool(), std::make_shared<::arrow::StringType>()); for (size_t i = 0; i < size; i++) { builder.Append("test-string"); } - return std::static_pointer_cast<::arrow::StringArray>(builder.Finish()); + return builder.Finish(out); } template <> -std::shared_ptr<::arrow::PrimitiveArray> NonNullArray<::arrow::BooleanType>(size_t size) { +Status NonNullArray<::arrow::BooleanType>(size_t size, std::shared_ptr<Array>* out) { std::vector<uint8_t> values; ::arrow::test::randint<uint8_t>(size, 0, 1, &values); ::arrow::BooleanBuilder builder( ::arrow::default_memory_pool(), std::make_shared<::arrow::BooleanType>()); builder.Append(values.data(), values.size()); - return std::static_pointer_cast<::arrow::PrimitiveArray>(builder.Finish()); + return builder.Finish(out); } // This helper function only supports (size/2) nulls. template <typename ArrowType> -typename std::enable_if<is_arrow_float<ArrowType>::value, - std::shared_ptr<::arrow::PrimitiveArray>>::type -NullableArray(size_t size, size_t num_nulls) { +typename std::enable_if<is_arrow_float<ArrowType>::value, Status>::type NullableArray( + size_t size, size_t num_nulls, std::shared_ptr<Array>* out) { std::vector<typename ArrowType::c_type> values; ::arrow::test::random_real<typename ArrowType::c_type>(size, 0, 0, 1, &values); std::vector<uint8_t> valid_bytes(size, 1); @@ -97,14 +95,13 @@ NullableArray(size_t size, size_t num_nulls) { ::arrow::NumericBuilder<ArrowType> builder( ::arrow::default_memory_pool(), std::make_shared<ArrowType>()); builder.Append(values.data(), values.size(), valid_bytes.data()); - return std::static_pointer_cast<::arrow::PrimitiveArray>(builder.Finish()); + return builder.Finish(out); } // This helper function only supports (size/2) nulls. template <typename ArrowType> -typename std::enable_if<is_arrow_int<ArrowType>::value, - std::shared_ptr<::arrow::PrimitiveArray>>::type -NullableArray(size_t size, size_t num_nulls) { +typename std::enable_if<is_arrow_int<ArrowType>::value, Status>::type NullableArray( + size_t size, size_t num_nulls, std::shared_ptr<Array>* out) { std::vector<typename ArrowType::c_type> values; ::arrow::test::randint<typename ArrowType::c_type>(size, 0, 64, &values); std::vector<uint8_t> valid_bytes(size, 1); @@ -116,14 +113,13 @@ NullableArray(size_t size, size_t num_nulls) { ::arrow::NumericBuilder<ArrowType> builder( ::arrow::default_memory_pool(), std::make_shared<ArrowType>()); builder.Append(values.data(), values.size(), valid_bytes.data()); - return std::static_pointer_cast<::arrow::PrimitiveArray>(builder.Finish()); + return builder.Finish(out); } // This helper function only supports (size/2) nulls yet. template <typename ArrowType> -typename std::enable_if<is_arrow_string<ArrowType>::value, - std::shared_ptr<::arrow::StringArray>>::type -NullableArray(size_t size, size_t num_nulls) { +typename std::enable_if<is_arrow_string<ArrowType>::value, Status>::type NullableArray( + size_t size, size_t num_nulls, std::shared_ptr<::arrow::Array>* out) { std::vector<uint8_t> valid_bytes(size, 1); for (size_t i = 0; i < num_nulls; i++) { @@ -135,13 +131,13 @@ NullableArray(size_t size, size_t num_nulls) { for (size_t i = 0; i < size; i++) { builder.Append("test-string"); } - return std::static_pointer_cast<::arrow::StringArray>(builder.Finish()); + return builder.Finish(out); } // This helper function only supports (size/2) nulls yet. template <> -std::shared_ptr<::arrow::PrimitiveArray> NullableArray<::arrow::BooleanType>( - size_t size, size_t num_nulls) { +Status NullableArray<::arrow::BooleanType>( + size_t size, size_t num_nulls, std::shared_ptr<Array>* out) { std::vector<uint8_t> values; ::arrow::test::randint<uint8_t>(size, 0, 1, &values); std::vector<uint8_t> valid_bytes(size, 1); @@ -153,17 +149,17 @@ std::shared_ptr<::arrow::PrimitiveArray> NullableArray<::arrow::BooleanType>( ::arrow::BooleanBuilder builder( ::arrow::default_memory_pool(), std::make_shared<::arrow::BooleanType>()); builder.Append(values.data(), values.size(), valid_bytes.data()); - return std::static_pointer_cast<::arrow::PrimitiveArray>(builder.Finish()); + return builder.Finish(out); } -std::shared_ptr<::arrow::Column> MakeColumn(const std::string& name, - const std::shared_ptr<::arrow::Array>& array, bool nullable) { +std::shared_ptr<::arrow::Column> MakeColumn( + const std::string& name, const std::shared_ptr<Array>& array, bool nullable) { auto field = std::make_shared<::arrow::Field>(name, array->type(), nullable); return std::make_shared<::arrow::Column>(field, array); } std::shared_ptr<::arrow::Table> MakeSimpleTable( - const std::shared_ptr<::arrow::Array>& values, bool nullable) { + const std::shared_ptr<Array>& values, bool nullable) { std::shared_ptr<::arrow::Column> column = MakeColumn("col", values, nullable); std::vector<std::shared_ptr<::arrow::Column>> columns({column}); std::vector<std::shared_ptr<::arrow::Field>> fields({column->field()}); @@ -172,7 +168,7 @@ std::shared_ptr<::arrow::Table> MakeSimpleTable( } template <typename T> -void ExpectArray(T* expected, ::arrow::Array* result) { +void ExpectArray(T* expected, Array* result) { auto p_array = static_cast<::arrow::PrimitiveArray*>(result); for (int i = 0; i < result->length(); i++) { EXPECT_EQ(expected[i], reinterpret_cast<const T*>(p_array->data()->data())[i]); @@ -180,7 +176,7 @@ void ExpectArray(T* expected, ::arrow::Array* result) { } template <typename ArrowType> -void ExpectArray(typename ArrowType::c_type* expected, ::arrow::Array* result) { +void ExpectArray(typename ArrowType::c_type* expected, Array* result) { ::arrow::PrimitiveArray* p_array = static_cast<::arrow::PrimitiveArray*>(result); for (int64_t i = 0; i < result->length(); i++) { EXPECT_EQ(expected[i], @@ -189,11 +185,13 @@ void ExpectArray(typename ArrowType::c_type* expected, ::arrow::Array* result) { } template <> -void ExpectArray<::arrow::BooleanType>(uint8_t* expected, ::arrow::Array* result) { +void ExpectArray<::arrow::BooleanType>(uint8_t* expected, Array* result) { ::arrow::BooleanBuilder builder( ::arrow::default_memory_pool(), std::make_shared<::arrow::BooleanType>()); builder.Append(expected, result->length()); - std::shared_ptr<::arrow::Array> expected_array = builder.Finish(); + + std::shared_ptr<Array> expected_array; + EXPECT_OK(builder.Finish(&expected_array)); EXPECT_TRUE(result->Equals(expected_array)); } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/src/parquet/arrow/writer.cc ---------------------------------------------------------------------- diff --git a/src/parquet/arrow/writer.cc b/src/parquet/arrow/writer.cc index ff3707b..e75d4b7 100644 --- a/src/parquet/arrow/writer.cc +++ b/src/parquet/arrow/writer.cc @@ -45,6 +45,8 @@ using parquet::schema::GroupNode; namespace parquet { namespace arrow { +namespace BitUtil = ::arrow::BitUtil; + class FileWriter::Impl { public: Impl(MemoryPool* pool, std::unique_ptr<ParquetFileWriter> writer); @@ -176,7 +178,7 @@ Status FileWriter::Impl::TypedWriteBatch<BooleanType, ::arrow::BooleanType>( if (writer->descr()->max_definition_level() == 0) { // no nulls, just dump the data for (int64_t i = 0; i < length; i++) { - buffer_ptr[i] = ::arrow::util::get_bit(data_ptr, offset + i); + buffer_ptr[i] = BitUtil::GetBit(data_ptr, offset + i); } PARQUET_CATCH_NOT_OK(writer->WriteBatch(length, nullptr, nullptr, buffer_ptr)); } else if (writer->descr()->max_definition_level() == 1) { @@ -186,7 +188,7 @@ Status FileWriter::Impl::TypedWriteBatch<BooleanType, ::arrow::BooleanType>( if (data->null_count() == 0) { std::fill(def_levels_ptr, def_levels_ptr + length, 1); for (int64_t i = 0; i < length; i++) { - buffer_ptr[i] = ::arrow::util::get_bit(data_ptr, offset + i); + buffer_ptr[i] = BitUtil::GetBit(data_ptr, offset + i); } // TODO(PARQUET-644): write boolean values as a packed bitmap PARQUET_CATCH_NOT_OK( @@ -198,7 +200,7 @@ Status FileWriter::Impl::TypedWriteBatch<BooleanType, ::arrow::BooleanType>( def_levels_ptr[i] = 0; } else { def_levels_ptr[i] = 1; - buffer_ptr[buffer_idx++] = ::arrow::util::get_bit(data_ptr, offset + i); + buffer_ptr[buffer_idx++] = BitUtil::GetBit(data_ptr, offset + i); } } PARQUET_CATCH_NOT_OK( @@ -260,9 +262,8 @@ Status FileWriter::Impl::WriteFlatColumnChunk( DCHECK((offset + length) <= data->length()); RETURN_NOT_OK(data_buffer_.Resize(length * sizeof(ByteArray))); auto buffer_ptr = reinterpret_cast<ByteArray*>(data_buffer_.mutable_data()); - auto values = std::dynamic_pointer_cast<PrimitiveArray>(data->values()); - auto data_ptr = reinterpret_cast<const uint8_t*>(values->data()->data()); - DCHECK(values != nullptr); + auto data_ptr = reinterpret_cast<const uint8_t*>(data->data()->data()); + DCHECK(data_ptr != nullptr); auto writer = reinterpret_cast<TypedColumnWriter<ByteArrayType>*>(column_writer); if (writer->descr()->max_definition_level() > 0) { RETURN_NOT_OK(def_levels_buffer_.Resize(length * sizeof(int16_t))); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/src/parquet/column/writer.cc ---------------------------------------------------------------------- diff --git a/src/parquet/column/writer.cc b/src/parquet/column/writer.cc index b917945..d1c3fe2 100644 --- a/src/parquet/column/writer.cc +++ b/src/parquet/column/writer.cc @@ -353,8 +353,8 @@ inline int64_t TypedColumnWriter<DType>::WriteMiniBatch(int64_t num_values, } template <typename DType> -void TypedColumnWriter<DType>::WriteBatch(int64_t num_values, - const int16_t* def_levels, const int16_t* rep_levels, const T* values) { +void TypedColumnWriter<DType>::WriteBatch(int64_t num_values, const int16_t* def_levels, + const int16_t* rep_levels, const T* values) { // We check for DataPage limits only after we have inserted the values. If a user // writes a large number of values, the DataPage size can be much above the limit. // The purpose of this chunking is to bound this. Even if a user writes large number http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/thirdparty/build_thirdparty.sh ---------------------------------------------------------------------- diff --git a/thirdparty/build_thirdparty.sh b/thirdparty/build_thirdparty.sh index 6ebcd96..4a91516 100755 --- a/thirdparty/build_thirdparty.sh +++ b/thirdparty/build_thirdparty.sh @@ -75,15 +75,6 @@ fi STANDARD_DARWIN_FLAGS="-std=c++11 -stdlib=libc++" -# build arrow -if [ -n "$F_ALL" -o -n "$F_ARROW" ]; then - cd $TP_DIR/$ARROW_BASEDIR/cpp - source ./setup_build_env.sh - cmake . -DARROW_PARQUET=OFF -DARROW_HDFS=ON -DCMAKE_INSTALL_PREFIX=$PREFIX - make -j$PARALLEL install - # : -fi - # build googletest GOOGLETEST_ERROR="failed for googletest!" if [ -n "$F_ALL" -o -n "$F_GTEST" ]; then @@ -142,5 +133,16 @@ if [ -n "$F_ALL" -o -n "$F_THRIFT" ]; then fi fi +# build arrow +if [ -n "$F_ALL" -o -n "$F_ARROW" ]; then + cd $TP_DIR/$ARROW_BASEDIR/cpp + cmake -DARROW_BUILD_TESTS=off \ + -DARROW_HDFS=ON \ + -DCMAKE_INSTALL_PREFIX=$PREFIX \ + . + make -j$PARALLEL install + # : +fi + echo "---------------------" echo "Thirdparty dependencies built and installed into $PREFIX successfully" http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/82515fea/thirdparty/versions.sh ---------------------------------------------------------------------- diff --git a/thirdparty/versions.sh b/thirdparty/versions.sh index f058b79..87fe6b6 100755 --- a/thirdparty/versions.sh +++ b/thirdparty/versions.sh @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -ARROW_VERSION="7fb4d24a35269db99fa112c0512d4a32c372dd74" +ARROW_VERSION="676c32ccea6274c75b2750453c1ddbc5f645c037" ARROW_URL="https://github.com/apache/arrow/archive/${ARROW_VERSION}.tar.gz" ARROW_BASEDIR="arrow-${ARROW_VERSION}"