wesm commented on a change in pull request #7755:
URL: https://github.com/apache/arrow/pull/7755#discussion_r454394631



##########
File path: cpp/src/arrow/compute/kernels/scalar_string.cc
##########
@@ -385,35 +371,34 @@ void TransformBinaryContainsExact(const uint8_t* pattern, 
int64_t pattern_length
   bitmap_writer.Finish();
 }
 
-using BinaryContainsExactState = OptionsWrapper<BinaryContainsExactOptions>;
+using MatchSubstringState = OptionsWrapper<MatchSubstringOptions>;
 
 template <typename Type>
-struct BinaryContainsExact {
+struct MatchSubstring {
   using offset_type = typename Type::offset_type;
   static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    BinaryContainsExactOptions arg = BinaryContainsExactState::Get(ctx);
+    MatchSubstringOptions arg = MatchSubstringState::Get(ctx);
     const uint8_t* pat = reinterpret_cast<const uint8_t*>(arg.pattern.c_str());
     const int64_t pat_size = arg.pattern.length();
     StringBoolTransform<Type>(
         ctx, batch,
         [pat, pat_size](const void* offsets, const uint8_t* data, int64_t 
length,
                         int64_t output_offset, uint8_t* output) {
-          TransformBinaryContainsExact<offset_type>(
+          TransformMatchSubstring<offset_type>(
               pat, pat_size, reinterpret_cast<const offset_type*>(offsets), 
data, length,
               output_offset, output);
         },
         out);
   }
 };
 
-void AddBinaryContainsExact(FunctionRegistry* registry) {
-  auto func = std::make_shared<ScalarFunction>("binary_contains_exact", 
Arity::Unary());
-  auto exec_32 = BinaryContainsExact<StringType>::Exec;
-  auto exec_64 = BinaryContainsExact<LargeStringType>::Exec;
+void AddMatchSubstring(FunctionRegistry* registry) {

Review comment:
       I think this is fine, we can use `match_substring_case_insensitive` for 
the case insensitive version
   
   cc @xhochy 

##########
File path: cpp/src/arrow/compute/kernels/test_util.cc
##########
@@ -44,6 +46,7 @@ void CheckScalarUnary(std::string func_name, 
std::shared_ptr<Array> input,
   }
 
   if (auto length = input->length() / 3) {
+    // XXX Is the recursive call intended?

Review comment:
       Nope, should fix this

##########
File path: cpp/src/arrow/compute/kernels/scalar_validity.cc
##########
@@ -37,23 +37,22 @@ struct IsValidOperator {
   static void Call(KernelContext* ctx, const ArrayData& arr, ArrayData* out) {
     DCHECK_EQ(out->offset, 0);
     DCHECK_LE(out->length, arr.length);
-    if (arr.buffers[0] != nullptr) {
-      out->buffers[1] = arr.offset == 0
-                            ? arr.buffers[0]
-                            : SliceBuffer(arr.buffers[0], arr.offset / 8, 
arr.length / 8);
+    if (arr.null_count != 0 && arr.buffers[0] != nullptr) {

Review comment:
       Note: I added a helper function `ArrayData::MayHaveNulls` to do this 
since it's such a common pattern

##########
File path: cpp/src/arrow/compute/kernels/vector_nested_test.cc
##########
@@ -30,7 +30,9 @@ TEST(TestVectorNested, ListFlatten) {
     auto input = ArrayFromJSON(ty, "[[0, null, 1], null, [2, 3], []]");
     auto expected = ArrayFromJSON(int32(), "[0, null, 1, 2, 3]");
     ASSERT_OK_AND_ASSIGN(Datum out, CallFunction("list_flatten", {input}));
-    AssertArraysEqual(*expected, *out.make_array());
+    std::shared_ptr<Array> actual = std::move(out).make_array();
+    ASSERT_OK(actual->ValidateFull());
+    AssertArraysEqual(*expected, *actual);

Review comment:
       Seems like this should be extracted into a helper in test_util.h




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to