This is an automated email from the ASF dual-hosted git repository. pcmoritz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new 5874af5 ARROW-3765: [Gandiva] Segfault when the validity bitmap has not been allocated 5874af5 is described below commit 5874af553f035244713b687b50e57dce81204433 Author: suquark <suqu...@gmail.com> AuthorDate: Thu Nov 15 22:10:27 2018 -0800 ARROW-3765: [Gandiva] Segfault when the validity bitmap has not been allocated https://issues.apache.org/jira/browse/ARROW-3765 Author: suquark <suqu...@gmail.com> Closes #2967 from suquark/gandiva and squashes the following commits: 6d09068d0 <suquark> lint 4b3ea9d32 <suquark> lint 76b7e7f1e <suquark> fix bug efff64a4c <suquark> combine tests to reduce build time 5e4dda518 <suquark> lint b509b0573 <suquark> rename test 4e2528bdb <suquark> lint bdf08f9ff <suquark> fix bugs & add new tests de2061330 <suquark> Gandiva null validity buffer support. --- cpp/src/gandiva/annotator.cc | 12 ++++++---- cpp/src/gandiva/bitmap_accumulator.h | 4 +++- cpp/src/gandiva/tests/filter_test.cc | 46 ++++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 6 deletions(-) diff --git a/cpp/src/gandiva/annotator.cc b/cpp/src/gandiva/annotator.cc index 0fe9fc8..3c8585c 100644 --- a/cpp/src/gandiva/annotator.cc +++ b/cpp/src/gandiva/annotator.cc @@ -59,11 +59,13 @@ void Annotator::PrepareBuffersForField(const FieldDescriptor& desc, EvalBatch* eval_batch) { int buffer_idx = 0; - // TODO: - // - validity is optional - - uint8_t* validity_buf = const_cast<uint8_t*>(array_data.buffers[buffer_idx]->data()); - eval_batch->SetBuffer(desc.validity_idx(), validity_buf); + // The validity buffer is optional. Use nullptr if it does not have one. + if (array_data.buffers[buffer_idx]) { + uint8_t* validity_buf = const_cast<uint8_t*>(array_data.buffers[buffer_idx]->data()); + eval_batch->SetBuffer(desc.validity_idx(), validity_buf); + } else { + eval_batch->SetBuffer(desc.validity_idx(), nullptr); + } ++buffer_idx; if (desc.HasOffsetsIdx()) { diff --git a/cpp/src/gandiva/bitmap_accumulator.h b/cpp/src/gandiva/bitmap_accumulator.h index 31b6609..157405d 100644 --- a/cpp/src/gandiva/bitmap_accumulator.h +++ b/cpp/src/gandiva/bitmap_accumulator.h @@ -20,6 +20,7 @@ #include <vector> +#include "arrow/util/macros.h" #include "gandiva/dex.h" #include "gandiva/dex_visitor.h" #include "gandiva/eval_batch.h" @@ -36,7 +37,8 @@ class BitMapAccumulator : public DexDefaultVisitor { void Visit(const VectorReadValidityDex& dex) { int idx = dex.ValidityIdx(); auto bitmap = eval_batch_.GetBuffer(idx); - src_maps_.push_back(bitmap); + // The bitmap could be null. Ignore it in this case. + if (bitmap != NULLPTR) src_maps_.push_back(bitmap); } void Visit(const LocalBitMapValidityDex& dex) { diff --git a/cpp/src/gandiva/tests/filter_test.cc b/cpp/src/gandiva/tests/filter_test.cc index f95cdcc..f63899a 100644 --- a/cpp/src/gandiva/tests/filter_test.cc +++ b/cpp/src/gandiva/tests/filter_test.cc @@ -290,4 +290,50 @@ TEST_F(TestFilter, TestSimpleSVInt32) { EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray()); } +TEST_F(TestFilter, TestNullValidityBuffer) { + // schema for input fields + auto field0 = field("f0", int32()); + auto field1 = field("f1", int32()); + auto schema = arrow::schema({field0, field1}); + + // Build condition f0 + f1 < 10 + auto node_f0 = TreeExprBuilder::MakeField(field0); + auto node_f1 = TreeExprBuilder::MakeField(field1); + auto sum_func = + TreeExprBuilder::MakeFunction("add", {node_f0, node_f1}, arrow::int32()); + auto literal_10 = TreeExprBuilder::MakeLiteral((int32_t)10); + auto less_than_10 = TreeExprBuilder::MakeFunction("less_than", {sum_func, literal_10}, + arrow::boolean()); + auto condition = TreeExprBuilder::MakeCondition(less_than_10); + + std::shared_ptr<Filter> filter; + Status status = Filter::Make(schema, condition, &filter); + EXPECT_TRUE(status.ok()); + + // Create a row-batch with some sample data + int num_records = 5; + + auto array_ = MakeArrowArrayInt32({1, 2, 3, 4, 6}, {true, true, true, false, true}); + // Create an array without a validity buffer. + auto array0 = + std::make_shared<arrow::Int32Array>(5, array_->data()->buffers[1], nullptr, 0); + auto array1 = MakeArrowArrayInt32({5, 9, 6, 17, 3}, {true, true, false, true, true}); + // expected output (indices for which condition matches) + auto exp = MakeArrowArrayUint16({0, 4}); + + // prepare input record batch + auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1}); + + std::shared_ptr<SelectionVector> selection_vector; + status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector); + EXPECT_TRUE(status.ok()); + + // Evaluate expression + status = filter->Evaluate(*in_batch, selection_vector); + EXPECT_TRUE(status.ok()); + + // Validate results + EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray()); +} + } // namespace gandiva