This is an automated email from the ASF dual-hosted git repository.
zanmato pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 7b8e3e88f6 GH-47807: [C++][Compute] Fix the issue that null count is
not updated when setting slice on an array span (#47808)
7b8e3e88f6 is described below
commit 7b8e3e88f6e87437e821ccf15c3ff6f60a8d0ae1
Author: Rossi Sun <[email protected]>
AuthorDate: Tue Oct 14 11:09:51 2025 -0700
GH-47807: [C++][Compute] Fix the issue that null count is not updated when
setting slice on an array span (#47808)
### Rationale for this change
The null count is not updated when setting slice on an array span, after a
preceding set slice sees a 0 null count. An incorrectly null count will cause
subsequent failures wrt. null processing like #47807.
### What changes are included in this PR?
Narrowing the null count update condition when setting slice on an array
span: as long as there is a valid buffer, we set null count to unknown.
### Are these changes tested?
Test included.
### Are there any user-facing changes?
None.
* GitHub Issue: #47807
Authored-by: Rossi Sun <[email protected]>
Signed-off-by: Rossi Sun <[email protected]>
---
cpp/src/arrow/array/CMakeLists.txt | 1 +
cpp/src/arrow/array/data.h | 2 +-
cpp/src/arrow/array/data_test.cc | 46 ++++++++++++++++++++++
.../arrow/compute/kernels/scalar_if_else_test.cc | 11 ++++++
4 files changed, 59 insertions(+), 1 deletion(-)
diff --git a/cpp/src/arrow/array/CMakeLists.txt
b/cpp/src/arrow/array/CMakeLists.txt
index d8dc83bb71..6a9c3cec02 100644
--- a/cpp/src/arrow/array/CMakeLists.txt
+++ b/cpp/src/arrow/array/CMakeLists.txt
@@ -16,6 +16,7 @@
# under the License.
add_arrow_test(concatenate_test)
+add_arrow_test(data_test)
add_arrow_test(diff_test)
# Headers: top level
diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h
index e921da86e1..c6636df9bb 100644
--- a/cpp/src/arrow/array/data.h
+++ b/cpp/src/arrow/array/data.h
@@ -641,7 +641,7 @@ struct ARROW_EXPORT ArraySpan {
this->length = length;
if (this->type->id() == Type::NA) {
this->null_count = this->length;
- } else if (this->MayHaveNulls()) {
+ } else if (buffers[0].data != NULLPTR) {
this->null_count = kUnknownNullCount;
} else {
this->null_count = 0;
diff --git a/cpp/src/arrow/array/data_test.cc b/cpp/src/arrow/array/data_test.cc
new file mode 100644
index 0000000000..011249c54e
--- /dev/null
+++ b/cpp/src/arrow/array/data_test.cc
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "arrow/array.h"
+#include "arrow/array/data.h"
+#include "arrow/testing/gtest_util.h"
+
+namespace arrow {
+
+TEST(ArraySpan, SetSlice) {
+ auto arr = ArrayFromJSON(int32(), "[0, 1, 2, 3, 4, 5, 6, null, 7, 8, 9]");
+ ArraySpan span(*arr->data());
+ ASSERT_EQ(span.length, arr->length());
+ ASSERT_EQ(span.null_count, 1);
+ ASSERT_EQ(span.offset, 0);
+
+ span.SetSlice(0, 7);
+ ASSERT_EQ(span.length, 7);
+ ASSERT_EQ(span.null_count, kUnknownNullCount);
+ ASSERT_EQ(span.offset, 0);
+ ASSERT_EQ(span.GetNullCount(), 0);
+
+ span.SetSlice(7, 4);
+ ASSERT_EQ(span.length, 4);
+ ASSERT_EQ(span.null_count, kUnknownNullCount);
+ ASSERT_EQ(span.offset, 7);
+ ASSERT_EQ(span.GetNullCount(), 1);
+}
+
+} // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
index 196912679b..b357a28d0f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
@@ -3720,6 +3720,17 @@ TEST(TestChoose, FixedSizeBinary) {
*MakeArrayOfNull(type, 5));
}
+// GH-47807: Null count in ArraySpan not updated correctly when executing
chunked.
+TEST(TestChoose, WrongNullCountForChunked) {
+ auto indices = ArrayFromJSON(int64(), "[0, 1, 0, 1, 0, null]");
+ auto values1 = ArrayFromJSON(int64(), "[10, 11, 12, 13, 14, 15]");
+ auto values2 = ChunkedArrayFromJSON(int64(), {"[100, 101]", "[102, 103, 104,
105]"});
+ ASSERT_OK_AND_ASSIGN(auto result, CallFunction("choose", {indices, values1,
values2}));
+ ASSERT_OK(result.chunked_array()->ValidateFull());
+ AssertDatumsEqual(ChunkedArrayFromJSON(int64(), {"[10, 101]", "[12, 103, 14,
null]"}),
+ result);
+}
+
TEST(TestChooseKernel, DispatchBest) {
ASSERT_OK_AND_ASSIGN(auto function,
GetFunctionRegistry()->GetFunction("choose"));
auto Check = [&](std::vector<TypeHolder> original_values) {