This is an automated email from the ASF dual-hosted git repository. uwe pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new 933b32b ARROW-2388: [C++] Use valid_bytes API for StringBuilder::Append 933b32b is described below commit 933b32b612ab0935dab6fc0f670765444f0ab510 Author: Kouhei Sutou <k...@clear-code.com> AuthorDate: Wed Apr 4 14:21:00 2018 +0200 ARROW-2388: [C++] Use valid_bytes API for StringBuilder::Append Because Append of other builders use valid_bytes not null_bytes. Author: Kouhei Sutou <k...@clear-code.com> Closes #1833 from kou/cpp-string-array-builder-append-valid-bytes and squashes the following commits: 47b2158 <Kouhei Sutou> Fix format 5c03f85 <Kouhei Sutou> Use valid_bytes API for StringBuilder::Append --- cpp/src/arrow/array-test.cc | 10 +++++----- cpp/src/arrow/builder.cc | 20 +++++++++++++------- cpp/src/arrow/builder.h | 3 ++- 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc index 308bbcd..0b4342a 100644 --- a/cpp/src/arrow/array-test.cc +++ b/cpp/src/arrow/array-test.cc @@ -991,13 +991,13 @@ TEST_F(TestStringBuilder, TestScalarAppend) { TEST_F(TestStringBuilder, TestAppendVector) { vector<string> strings = {"", "bb", "a", "", "ccc"}; - vector<uint8_t> is_null = {0, 0, 0, 1, 0}; + vector<uint8_t> valid_bytes = {1, 1, 1, 0, 1}; int N = static_cast<int>(strings.size()); int reps = 1000; for (int j = 0; j < reps; ++j) { - ASSERT_OK(builder_->Append(strings, is_null.data())); + ASSERT_OK(builder_->Append(strings, valid_bytes.data())); } Done(); @@ -1008,9 +1008,7 @@ TEST_F(TestStringBuilder, TestAppendVector) { int32_t length; int32_t pos = 0; for (int i = 0; i < N * reps; ++i) { - if (is_null[i % N]) { - ASSERT_TRUE(result_->IsNull(i)); - } else { + if (valid_bytes[i % N]) { ASSERT_FALSE(result_->IsNull(i)); result_->GetValue(i, &length); ASSERT_EQ(pos, result_->value_offset(i)); @@ -1018,6 +1016,8 @@ TEST_F(TestStringBuilder, TestAppendVector) { ASSERT_EQ(strings[i % N], result_->GetString(i)); pos += length; + } else { + ASSERT_TRUE(result_->IsNull(i)); } } } diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc index ec48656..a502e1f 100644 --- a/cpp/src/arrow/builder.cc +++ b/cpp/src/arrow/builder.cc @@ -1386,7 +1386,7 @@ const uint8_t* BinaryBuilder::GetValue(int64_t i, int32_t* out_length) const { StringBuilder::StringBuilder(MemoryPool* pool) : BinaryBuilder(utf8(), pool) {} Status StringBuilder::Append(const std::vector<std::string>& values, - uint8_t* null_bytes) { + const uint8_t* valid_bytes) { std::size_t total_length = std::accumulate( values.begin(), values.end(), 0ULL, [](uint64_t sum, const std::string& str) { return sum + str.size(); }); @@ -1394,16 +1394,22 @@ Status StringBuilder::Append(const std::vector<std::string>& values, RETURN_NOT_OK(value_data_builder_.Reserve(total_length)); RETURN_NOT_OK(offsets_builder_.Reserve(values.size())); - for (std::size_t i = 0; i < values.size(); ++i) { - RETURN_NOT_OK(AppendNextOffset()); - if (null_bytes[i]) { - UnsafeAppendToBitmap(false); - } else { + if (valid_bytes) { + for (std::size_t i = 0; i < values.size(); ++i) { + RETURN_NOT_OK(AppendNextOffset()); + if (valid_bytes[i]) { + RETURN_NOT_OK(value_data_builder_.Append( + reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size())); + } + } + } else { + for (std::size_t i = 0; i < values.size(); ++i) { + RETURN_NOT_OK(AppendNextOffset()); RETURN_NOT_OK(value_data_builder_.Append( reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size())); - UnsafeAppendToBitmap(true); } } + UnsafeAppendToBitmap(valid_bytes, values.size()); return Status::OK(); } diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h index cdcee80..32cfdd4 100644 --- a/cpp/src/arrow/builder.h +++ b/cpp/src/arrow/builder.h @@ -718,7 +718,8 @@ class ARROW_EXPORT StringBuilder : public BinaryBuilder { using BinaryBuilder::Append; - Status Append(const std::vector<std::string>& values, uint8_t* null_bytes); + Status Append(const std::vector<std::string>& values, + const uint8_t* valid_bytes = NULLPTR); }; // ---------------------------------------------------------------------- -- To stop receiving notification emails like this one, please contact u...@apache.org.