This is an automated email from the ASF dual-hosted git repository.

zanmato pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 7b8e3e88f6 GH-47807: [C++][Compute] Fix the issue that null count is 
not updated when setting slice on an array span (#47808)
7b8e3e88f6 is described below

commit 7b8e3e88f6e87437e821ccf15c3ff6f60a8d0ae1
Author: Rossi Sun <[email protected]>
AuthorDate: Tue Oct 14 11:09:51 2025 -0700

    GH-47807: [C++][Compute] Fix the issue that null count is not updated when 
setting slice on an array span (#47808)
    
    ### Rationale for this change
    
    The null count is not updated when setting slice on an array span, after a 
preceding set slice sees a  0 null count. An incorrectly null count will cause 
subsequent failures wrt. null processing like #47807.
    
    ### What changes are included in this PR?
    
    Narrowing the null count update condition when setting slice on an array 
span: as long as there is a valid buffer, we set null count to unknown.
    
    ### Are these changes tested?
    
    Test included.
    
    ### Are there any user-facing changes?
    
    None.
    * GitHub Issue: #47807
    
    Authored-by: Rossi Sun <[email protected]>
    Signed-off-by: Rossi Sun <[email protected]>
---
 cpp/src/arrow/array/CMakeLists.txt                 |  1 +
 cpp/src/arrow/array/data.h                         |  2 +-
 cpp/src/arrow/array/data_test.cc                   | 46 ++++++++++++++++++++++
 .../arrow/compute/kernels/scalar_if_else_test.cc   | 11 ++++++
 4 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/array/CMakeLists.txt 
b/cpp/src/arrow/array/CMakeLists.txt
index d8dc83bb71..6a9c3cec02 100644
--- a/cpp/src/arrow/array/CMakeLists.txt
+++ b/cpp/src/arrow/array/CMakeLists.txt
@@ -16,6 +16,7 @@
 # under the License.
 
 add_arrow_test(concatenate_test)
+add_arrow_test(data_test)
 add_arrow_test(diff_test)
 
 # Headers: top level
diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h
index e921da86e1..c6636df9bb 100644
--- a/cpp/src/arrow/array/data.h
+++ b/cpp/src/arrow/array/data.h
@@ -641,7 +641,7 @@ struct ARROW_EXPORT ArraySpan {
     this->length = length;
     if (this->type->id() == Type::NA) {
       this->null_count = this->length;
-    } else if (this->MayHaveNulls()) {
+    } else if (buffers[0].data != NULLPTR) {
       this->null_count = kUnknownNullCount;
     } else {
       this->null_count = 0;
diff --git a/cpp/src/arrow/array/data_test.cc b/cpp/src/arrow/array/data_test.cc
new file mode 100644
index 0000000000..011249c54e
--- /dev/null
+++ b/cpp/src/arrow/array/data_test.cc
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "arrow/array.h"
+#include "arrow/array/data.h"
+#include "arrow/testing/gtest_util.h"
+
+namespace arrow {
+
+TEST(ArraySpan, SetSlice) {
+  auto arr = ArrayFromJSON(int32(), "[0, 1, 2, 3, 4, 5, 6, null, 7, 8, 9]");
+  ArraySpan span(*arr->data());
+  ASSERT_EQ(span.length, arr->length());
+  ASSERT_EQ(span.null_count, 1);
+  ASSERT_EQ(span.offset, 0);
+
+  span.SetSlice(0, 7);
+  ASSERT_EQ(span.length, 7);
+  ASSERT_EQ(span.null_count, kUnknownNullCount);
+  ASSERT_EQ(span.offset, 0);
+  ASSERT_EQ(span.GetNullCount(), 0);
+
+  span.SetSlice(7, 4);
+  ASSERT_EQ(span.length, 4);
+  ASSERT_EQ(span.null_count, kUnknownNullCount);
+  ASSERT_EQ(span.offset, 7);
+  ASSERT_EQ(span.GetNullCount(), 1);
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc 
b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
index 196912679b..b357a28d0f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
@@ -3720,6 +3720,17 @@ TEST(TestChoose, FixedSizeBinary) {
               *MakeArrayOfNull(type, 5));
 }
 
+// GH-47807: Null count in ArraySpan not updated correctly when executing 
chunked.
+TEST(TestChoose, WrongNullCountForChunked) {
+  auto indices = ArrayFromJSON(int64(), "[0, 1, 0, 1, 0, null]");
+  auto values1 = ArrayFromJSON(int64(), "[10, 11, 12, 13, 14, 15]");
+  auto values2 = ChunkedArrayFromJSON(int64(), {"[100, 101]", "[102, 103, 104, 
105]"});
+  ASSERT_OK_AND_ASSIGN(auto result, CallFunction("choose", {indices, values1, 
values2}));
+  ASSERT_OK(result.chunked_array()->ValidateFull());
+  AssertDatumsEqual(ChunkedArrayFromJSON(int64(), {"[10, 101]", "[12, 103, 14, 
null]"}),
+                    result);
+}
+
 TEST(TestChooseKernel, DispatchBest) {
   ASSERT_OK_AND_ASSIGN(auto function, 
GetFunctionRegistry()->GetFunction("choose"));
   auto Check = [&](std::vector<TypeHolder> original_values) {

Reply via email to