This is an automated email from the ASF dual-hosted git repository.
westonpace pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 67fa416d89 GH-34241: [C++] Fix ExecSpanIterator to properly initialize
empty dictionary arrays (#34246)
67fa416d89 is described below
commit 67fa416d89eca6ec94ced5c03939e853cd0c1b0a
Author: Weston Pace <[email protected]>
AuthorDate: Wed Feb 22 11:31:04 2023 -0800
GH-34241: [C++] Fix ExecSpanIterator to properly initialize empty
dictionary arrays (#34246)
* Closes: #34241
Authored-by: Weston Pace <[email protected]>
Signed-off-by: Weston Pace <[email protected]>
---
cpp/src/arrow/array/data.cc | 15 +++++++++++----
cpp/src/arrow/compute/exec_test.cc | 25 +++++++++++++++++++++++++
2 files changed, 36 insertions(+), 4 deletions(-)
diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc
index cb0f8dc55f..b13022f9c3 100644
--- a/cpp/src/arrow/array/data.cc
+++ b/cpp/src/arrow/array/data.cc
@@ -233,10 +233,17 @@ void FillZeroLengthArray(const DataType* type, ArraySpan*
span) {
span->buffers[i] = {};
}
- // Fill children
- span->child_data.resize(type->num_fields());
- for (int i = 0; i < type->num_fields(); ++i) {
- FillZeroLengthArray(type->field(i)->type().get(), &span->child_data[i]);
+ if (type->id() == Type::DICTIONARY) {
+ span->child_data.resize(1);
+ const std::shared_ptr<DataType>& value_type =
+ checked_cast<const DictionaryType*>(type)->value_type();
+ FillZeroLengthArray(value_type.get(), &span->child_data[0]);
+ } else {
+ // Fill children
+ span->child_data.resize(type->num_fields());
+ for (int i = 0; i < type->num_fields(); ++i) {
+ FillZeroLengthArray(type->field(i)->type().get(), &span->child_data[i]);
+ }
}
}
diff --git a/cpp/src/arrow/compute/exec_test.cc
b/cpp/src/arrow/compute/exec_test.cc
index defc848c5e..24da4a2e8d 100644
--- a/cpp/src/arrow/compute/exec_test.cc
+++ b/cpp/src/arrow/compute/exec_test.cc
@@ -829,6 +829,10 @@ TEST_F(TestExecSpanIterator, ChunkedArrays) {
TEST_F(TestExecSpanIterator, ZeroLengthInputs) {
auto carr = std::make_shared<ChunkedArray>(ArrayVector{}, int32());
+ auto dict_arr =
+ std::make_shared<ChunkedArray>(ArrayVector{}, dictionary(int32(),
utf8()));
+ auto nested_arr = std::make_shared<ChunkedArray>(
+ ArrayVector{}, struct_({field("x", int32()), field("y", int64())}));
auto CheckArgs = [&](const ExecBatch& batch) {
ExecSpanIterator iterator;
@@ -836,6 +840,19 @@ TEST_F(TestExecSpanIterator, ZeroLengthInputs) {
ExecSpan iter_span;
ASSERT_TRUE(iterator.Next(&iter_span));
ASSERT_EQ(0, iter_span.length);
+ for (int col_idx = 0; col_idx < iter_span.num_values(); col_idx++) {
+ const ExecValue& val = iter_span.values[col_idx];
+ ASSERT_TRUE(val.is_array());
+ const ArraySpan& span = val.array;
+ if (span.type->id() == Type::DICTIONARY) {
+ ASSERT_EQ(1, span.child_data.size());
+ ASSERT_EQ(0, span.dictionary().length);
+ } else {
+ for (const auto& child : span.child_data) {
+ ASSERT_EQ(0, child.length);
+ }
+ }
+ }
ASSERT_FALSE(iterator.Next(&iter_span));
};
@@ -846,6 +863,14 @@ TEST_F(TestExecSpanIterator, ZeroLengthInputs) {
input.values = {Datum(carr)};
CheckArgs(input);
+ // Zero-length ChunkedArray with zero chunks, dictionary
+ input.values = {Datum(dict_arr)};
+ CheckArgs(input);
+
+ // Zero-length ChunkedArray with zero chunks, nested
+ input.values = {Datum(nested_arr)};
+ CheckArgs(input);
+
// Zero-length array
input.values = {Datum(GetInt32Array(0))};
CheckArgs(input);