This is an automated email from the ASF dual-hosted git repository.

westonpace pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 67fa416d89 GH-34241: [C++] Fix ExecSpanIterator to properly initialize 
empty dictionary arrays (#34246)
67fa416d89 is described below

commit 67fa416d89eca6ec94ced5c03939e853cd0c1b0a
Author: Weston Pace <[email protected]>
AuthorDate: Wed Feb 22 11:31:04 2023 -0800

    GH-34241: [C++] Fix ExecSpanIterator to properly initialize empty 
dictionary arrays (#34246)
    
    
    * Closes: #34241
    
    Authored-by: Weston Pace <[email protected]>
    Signed-off-by: Weston Pace <[email protected]>
---
 cpp/src/arrow/array/data.cc        | 15 +++++++++++----
 cpp/src/arrow/compute/exec_test.cc | 25 +++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc
index cb0f8dc55f..b13022f9c3 100644
--- a/cpp/src/arrow/array/data.cc
+++ b/cpp/src/arrow/array/data.cc
@@ -233,10 +233,17 @@ void FillZeroLengthArray(const DataType* type, ArraySpan* 
span) {
     span->buffers[i] = {};
   }
 
-  // Fill children
-  span->child_data.resize(type->num_fields());
-  for (int i = 0; i < type->num_fields(); ++i) {
-    FillZeroLengthArray(type->field(i)->type().get(), &span->child_data[i]);
+  if (type->id() == Type::DICTIONARY) {
+    span->child_data.resize(1);
+    const std::shared_ptr<DataType>& value_type =
+        checked_cast<const DictionaryType*>(type)->value_type();
+    FillZeroLengthArray(value_type.get(), &span->child_data[0]);
+  } else {
+    // Fill children
+    span->child_data.resize(type->num_fields());
+    for (int i = 0; i < type->num_fields(); ++i) {
+      FillZeroLengthArray(type->field(i)->type().get(), &span->child_data[i]);
+    }
   }
 }
 
diff --git a/cpp/src/arrow/compute/exec_test.cc 
b/cpp/src/arrow/compute/exec_test.cc
index defc848c5e..24da4a2e8d 100644
--- a/cpp/src/arrow/compute/exec_test.cc
+++ b/cpp/src/arrow/compute/exec_test.cc
@@ -829,6 +829,10 @@ TEST_F(TestExecSpanIterator, ChunkedArrays) {
 
 TEST_F(TestExecSpanIterator, ZeroLengthInputs) {
   auto carr = std::make_shared<ChunkedArray>(ArrayVector{}, int32());
+  auto dict_arr =
+      std::make_shared<ChunkedArray>(ArrayVector{}, dictionary(int32(), 
utf8()));
+  auto nested_arr = std::make_shared<ChunkedArray>(
+      ArrayVector{}, struct_({field("x", int32()), field("y", int64())}));
 
   auto CheckArgs = [&](const ExecBatch& batch) {
     ExecSpanIterator iterator;
@@ -836,6 +840,19 @@ TEST_F(TestExecSpanIterator, ZeroLengthInputs) {
     ExecSpan iter_span;
     ASSERT_TRUE(iterator.Next(&iter_span));
     ASSERT_EQ(0, iter_span.length);
+    for (int col_idx = 0; col_idx < iter_span.num_values(); col_idx++) {
+      const ExecValue& val = iter_span.values[col_idx];
+      ASSERT_TRUE(val.is_array());
+      const ArraySpan& span = val.array;
+      if (span.type->id() == Type::DICTIONARY) {
+        ASSERT_EQ(1, span.child_data.size());
+        ASSERT_EQ(0, span.dictionary().length);
+      } else {
+        for (const auto& child : span.child_data) {
+          ASSERT_EQ(0, child.length);
+        }
+      }
+    }
     ASSERT_FALSE(iterator.Next(&iter_span));
   };
 
@@ -846,6 +863,14 @@ TEST_F(TestExecSpanIterator, ZeroLengthInputs) {
   input.values = {Datum(carr)};
   CheckArgs(input);
 
+  // Zero-length ChunkedArray with zero chunks, dictionary
+  input.values = {Datum(dict_arr)};
+  CheckArgs(input);
+
+  // Zero-length ChunkedArray with zero chunks, nested
+  input.values = {Datum(nested_arr)};
+  CheckArgs(input);
+
   // Zero-length array
   input.values = {Datum(GetInt32Array(0))};
   CheckArgs(input);

Reply via email to