This is an automated email from the ASF dual-hosted git repository.

pcmoritz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 5874af5  ARROW-3765: [Gandiva] Segfault when the validity bitmap has 
not been allocated
5874af5 is described below

commit 5874af553f035244713b687b50e57dce81204433
Author: suquark <suqu...@gmail.com>
AuthorDate: Thu Nov 15 22:10:27 2018 -0800

    ARROW-3765: [Gandiva] Segfault when the validity bitmap has not been 
allocated
    
    https://issues.apache.org/jira/browse/ARROW-3765
    
    Author: suquark <suqu...@gmail.com>
    
    Closes #2967 from suquark/gandiva and squashes the following commits:
    
    6d09068d0 <suquark> lint
    4b3ea9d32 <suquark> lint
    76b7e7f1e <suquark> fix bug
    efff64a4c <suquark> combine tests to reduce build time
    5e4dda518 <suquark> lint
    b509b0573 <suquark> rename test
    4e2528bdb <suquark> lint
    bdf08f9ff <suquark> fix bugs & add new tests
    de2061330 <suquark> Gandiva null validity buffer support.
---
 cpp/src/gandiva/annotator.cc         | 12 ++++++----
 cpp/src/gandiva/bitmap_accumulator.h |  4 +++-
 cpp/src/gandiva/tests/filter_test.cc | 46 ++++++++++++++++++++++++++++++++++++
 3 files changed, 56 insertions(+), 6 deletions(-)

diff --git a/cpp/src/gandiva/annotator.cc b/cpp/src/gandiva/annotator.cc
index 0fe9fc8..3c8585c 100644
--- a/cpp/src/gandiva/annotator.cc
+++ b/cpp/src/gandiva/annotator.cc
@@ -59,11 +59,13 @@ void Annotator::PrepareBuffersForField(const 
FieldDescriptor& desc,
                                        EvalBatch* eval_batch) {
   int buffer_idx = 0;
 
-  // TODO:
-  // - validity is optional
-
-  uint8_t* validity_buf = 
const_cast<uint8_t*>(array_data.buffers[buffer_idx]->data());
-  eval_batch->SetBuffer(desc.validity_idx(), validity_buf);
+  // The validity buffer is optional. Use nullptr if it does not have one.
+  if (array_data.buffers[buffer_idx]) {
+    uint8_t* validity_buf = 
const_cast<uint8_t*>(array_data.buffers[buffer_idx]->data());
+    eval_batch->SetBuffer(desc.validity_idx(), validity_buf);
+  } else {
+    eval_batch->SetBuffer(desc.validity_idx(), nullptr);
+  }
   ++buffer_idx;
 
   if (desc.HasOffsetsIdx()) {
diff --git a/cpp/src/gandiva/bitmap_accumulator.h 
b/cpp/src/gandiva/bitmap_accumulator.h
index 31b6609..157405d 100644
--- a/cpp/src/gandiva/bitmap_accumulator.h
+++ b/cpp/src/gandiva/bitmap_accumulator.h
@@ -20,6 +20,7 @@
 
 #include <vector>
 
+#include "arrow/util/macros.h"
 #include "gandiva/dex.h"
 #include "gandiva/dex_visitor.h"
 #include "gandiva/eval_batch.h"
@@ -36,7 +37,8 @@ class BitMapAccumulator : public DexDefaultVisitor {
   void Visit(const VectorReadValidityDex& dex) {
     int idx = dex.ValidityIdx();
     auto bitmap = eval_batch_.GetBuffer(idx);
-    src_maps_.push_back(bitmap);
+    // The bitmap could be null. Ignore it in this case.
+    if (bitmap != NULLPTR) src_maps_.push_back(bitmap);
   }
 
   void Visit(const LocalBitMapValidityDex& dex) {
diff --git a/cpp/src/gandiva/tests/filter_test.cc 
b/cpp/src/gandiva/tests/filter_test.cc
index f95cdcc..f63899a 100644
--- a/cpp/src/gandiva/tests/filter_test.cc
+++ b/cpp/src/gandiva/tests/filter_test.cc
@@ -290,4 +290,50 @@ TEST_F(TestFilter, TestSimpleSVInt32) {
   EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
 }
 
+TEST_F(TestFilter, TestNullValidityBuffer) {
+  // schema for input fields
+  auto field0 = field("f0", int32());
+  auto field1 = field("f1", int32());
+  auto schema = arrow::schema({field0, field1});
+
+  // Build condition f0 + f1 < 10
+  auto node_f0 = TreeExprBuilder::MakeField(field0);
+  auto node_f1 = TreeExprBuilder::MakeField(field1);
+  auto sum_func =
+      TreeExprBuilder::MakeFunction("add", {node_f0, node_f1}, arrow::int32());
+  auto literal_10 = TreeExprBuilder::MakeLiteral((int32_t)10);
+  auto less_than_10 = TreeExprBuilder::MakeFunction("less_than", {sum_func, 
literal_10},
+                                                    arrow::boolean());
+  auto condition = TreeExprBuilder::MakeCondition(less_than_10);
+
+  std::shared_ptr<Filter> filter;
+  Status status = Filter::Make(schema, condition, &filter);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 5;
+
+  auto array_ = MakeArrowArrayInt32({1, 2, 3, 4, 6}, {true, true, true, false, 
true});
+  // Create an array without a validity buffer.
+  auto array0 =
+      std::make_shared<arrow::Int32Array>(5, array_->data()->buffers[1], 
nullptr, 0);
+  auto array1 = MakeArrowArrayInt32({5, 9, 6, 17, 3}, {true, true, false, 
true, true});
+  // expected output (indices for which condition matches)
+  auto exp = MakeArrowArrayUint16({0, 4});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, 
array1});
+
+  std::shared_ptr<SelectionVector> selection_vector;
+  status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector);
+  EXPECT_TRUE(status.ok());
+
+  // Evaluate expression
+  status = filter->Evaluate(*in_batch, selection_vector);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
+}
+
 }  // namespace gandiva

Reply via email to