This is an automated email from the ASF dual-hosted git repository.

pitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new d11916f154 GH-46856: [C++][Python] Add binary view comparison kernels 
(#49964)
d11916f154 is described below

commit d11916f1543f0c9847f83a6d06e2a0bf0f8d5d8a
Author: Roman Kvasnytskyi <[email protected]>
AuthorDate: Tue Jun 9 16:16:01 2026 +0200

    GH-46856: [C++][Python] Add binary view comparison kernels (#49964)
    
    ### Rationale for this change
     `pyarrow.compute.equal` fails for `pa.binary_view()` arrays because C++ 
compute has no registered comparison kernel for `(binary_view, binary_view)`.
    
    This fixes that missing kernel path and also enables the same comparisons 
for `utf8_view`.
    
    ### What changes are included in this PR?
    This adds comparison kernel support for `binary_view` and `utf8_view`.
    
      The following functions now work for same-type inputs:
    
      - `equal`
      - `not_equal`
      - `greater`
      - `greater_equal`
      - `less`
      - `less_equal`
    
    ### Are these changes tested?
      Added C++ tests covering:
    
      - inline and out-of-line values
      - nulls
      - sliced arrays
      - array-array comparisons
      - array-scalar and scalar-array comparisons
      - all six comparison functions
    
      Added Python regression tests for `pa.binary_view()` and 
`pa.string_view()`.
    
      Verified the same cases fail before this patch at 
`a0d2885b101acb439f7f79ec2237028974e74e64` with `ArrowNotImplementedError: no 
kernel matching input types`.
    
    ### Are there any user-facing changes?
    
    `pyarrow.compute` comparison functions now work for `pa.binary_view()` and 
`pa.string_view()` arrays where they previously failed with a missing kernel 
error.
    
    ### AI Usage
    Tests were generated by LLM agents along with part or PR summary
    
    Addresses: GH-46856
    Partially addresses: GH-44336
    
    * GitHub Issue: #46856
    
    Authored-by: Roman Kvasnytskyi <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/compute/kernels/codegen_internal.h   | 16 +++++++
 cpp/src/arrow/compute/kernels/scalar_compare.cc    |  6 +++
 .../arrow/compute/kernels/scalar_compare_test.cc   | 49 ++++++++++++++++++++++
 3 files changed, 71 insertions(+)

diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h 
b/cpp/src/arrow/compute/kernels/codegen_internal.h
index 15a946fbdb..d33ffacb2f 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -350,6 +350,22 @@ struct ArrayIterator<Type, enable_if_base_binary<Type>> {
   }
 };
 
+template <typename Type>
+struct ArrayIterator<Type, enable_if_binary_view_like<Type>> {
+  const BinaryViewType::c_type* views;
+  const std::shared_ptr<Buffer>* data_buffers;
+  int64_t position;
+
+  explicit ArrayIterator(const ArraySpan& arr)
+      : views(arr.GetValues<BinaryViewType::c_type>(1)),
+        data_buffers(arr.GetVariadicBuffers().data()),
+        position(0) {}
+
+  std::string_view operator()() {
+    return util::FromBinaryView(views[position++], data_buffers);
+  }
+};
+
 template <>
 struct ArrayIterator<FixedSizeBinaryType> {
   const ArraySpan& arr;
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare.cc 
b/cpp/src/arrow/compute/kernels/scalar_compare.cc
index 773a3f684b..3dfd66655e 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare.cc
@@ -433,6 +433,12 @@ std::shared_ptr<ScalarFunction> 
MakeCompareFunction(std::string name, FunctionDo
         GenerateVarBinaryBase<applicator::ScalarBinaryEqualTypes, BooleanType, 
Op>(*ty);
     DCHECK_OK(func->AddKernel({ty, ty}, boolean(), std::move(exec)));
   }
+  for (const auto& ty : BinaryViewTypes()) {
+    auto exec =
+        GenerateVarBinaryViewBase<applicator::ScalarBinaryEqualTypes, 
BooleanType, Op>(
+            *ty);
+    DCHECK_OK(func->AddKernel({ty, ty}, boolean(), std::move(exec)));
+  }
 
   for (const auto id : {Type::DECIMAL128, Type::DECIMAL256}) {
     auto exec = GenerateDecimal<applicator::ScalarBinaryEqualTypes, 
BooleanType, Op>(id);
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc 
b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
index 23c7ab21bd..2aae5bf2ee 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
@@ -1196,6 +1196,55 @@ TEST_F(TestStringCompareKernel, RandomCompareArrayArray) 
{
   }
 }
 
+TEST(TestBinaryViewCompareKernel, ArrayArray) {
+  const auto cases = std::vector<std::shared_ptr<DataType>>{binary_view(), 
utf8_view()};
+  const auto expected = std::vector<std::pair<std::string, std::string>>{
+      {"equal", "[true, false, false, false, false, false, null]"},
+      {"not_equal", "[false, true, true, true, true, true, null]"},
+      {"greater", "[false, false, false, false, false, true, null]"},
+      {"greater_equal", "[true, false, false, false, false, true, null]"},
+      {"less", "[false, true, true, true, true, false, null]"},
+      {"less_equal", "[true, true, true, true, true, false, null]"}};
+
+  for (const auto& ty : cases) {
+    auto lhs =
+        ArrayFromJSON(ty, R"(["", "abc", "abcdefghijkl", "abcdefghijklm", 
"prefix_same_A",
+                "samepref_size", null])");
+    auto rhs = ArrayFromJSON(
+        ty, R"(["", "abd", "abcdefghijklm", "abcdefghijklz", "prefix_same_B",
+                "samepref", null])");
+
+    CheckScalarBinary("equal", ArrayFromJSON(ty, R"([])"), ArrayFromJSON(ty, 
R"([])"),
+                      ArrayFromJSON(boolean(), R"([])"));
+    CheckScalarBinary("equal", ArrayFromJSON(ty, R"([null])"),
+                      ArrayFromJSON(ty, R"([null])"),
+                      ArrayFromJSON(boolean(), R"([null])"));
+    for (const auto& function_and_expected : expected) {
+      CheckScalarBinary(function_and_expected.first, lhs, rhs,
+                        ArrayFromJSON(boolean(), 
function_and_expected.second));
+    }
+  }
+}
+
+TEST(TestBinaryViewCompareKernel, ArrayScalar) {
+  for (const auto& ty : {binary_view(), utf8_view()}) {
+    auto arr = ArrayFromJSON(ty, R"(["", "abc", "abcdefghijklmnop", null])");
+    auto scalar = ScalarFromJSON(ty, R"("abc")");
+    auto null_scalar = ScalarFromJSON(ty, "null");
+
+    CheckScalarBinary("equal", arr, scalar,
+                      ArrayFromJSON(boolean(), R"([false, true, false, 
null])"));
+    CheckScalarBinary("equal", scalar, arr,
+                      ArrayFromJSON(boolean(), R"([false, true, false, 
null])"));
+    CheckScalarBinary("greater", arr, scalar,
+                      ArrayFromJSON(boolean(), R"([false, false, true, 
null])"));
+    CheckScalarBinary("less", scalar, arr,
+                      ArrayFromJSON(boolean(), R"([false, false, true, 
null])"));
+    CheckScalarBinary("equal", arr, null_scalar,
+                      ArrayFromJSON(boolean(), R"([null, null, null, null])"));
+  }
+}
+
 template <typename T>
 class TestVarArgsCompare : public ::testing::Test {
  protected:

Reply via email to