This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 103489b  ARROW-10208: [C++] Fix split string kernels on sliced input
103489b is described below

commit 103489b727c37ec7d0ea7411a3a972dd8d2859af
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Oct 22 18:19:08 2020 +0200

    ARROW-10208: [C++] Fix split string kernels on sliced input
    
    Nulls were propagated incorrectly.  We can simply let the kernel machinery 
do this for us.
    
    Closes #8496 from pitrou/ARROW-10208-split-string-sliced
    
    Authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/compute/kernels/scalar_string.cc      | 4 ++--
 cpp/src/arrow/compute/kernels/scalar_string_test.cc | 6 ++----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc 
b/cpp/src/arrow/compute/kernels/scalar_string.cc
index 00ab80b..0a1c9a0 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -923,8 +923,6 @@ struct SplitBaseTransform {
       ListOffsetsBuilderType list_offsets_builder(ctx->memory_pool());
       KERNEL_RETURN_IF_ERROR(ctx, list_offsets_builder.Resize(input_nstrings));
       ArrayData* output_list = out->mutable_array();
-      // // we use the same null values
-      output_list->buffers[0] = input.buffers[0];
       // initial value
       KERNEL_RETURN_IF_ERROR(
           ctx, list_offsets_builder.Append(static_cast<list_offset_type>(0)));
@@ -986,6 +984,7 @@ struct SplitPatternTransform : SplitBaseTransform<Type, 
ListType, SplitPatternOp
     }
     return Status::OK();
   }
+
   static bool Find(const uint8_t* begin, const uint8_t* end,
                    const uint8_t** separator_begin, const uint8_t** 
separator_end,
                    const SplitPatternOptions& options) {
@@ -1004,6 +1003,7 @@ struct SplitPatternTransform : SplitBaseTransform<Type, 
ListType, SplitPatternOp
     }
     return false;
   }
+
   static bool FindReverse(const uint8_t* begin, const uint8_t* end,
                           const uint8_t** separator_begin, const uint8_t** 
separator_end,
                           const SplitPatternOptions& options) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc 
b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index e77a4cc..c76b50e 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -339,10 +339,8 @@ TYPED_TEST(TestStringKernels, SplitBasics) {
   // basics
   this->CheckUnary("split_pattern", R"(["foo bar", "foo"])", 
list(this->type()),
                    R"([["foo", "bar"], ["foo"]])", &options);
-  // TODO: enable test when the following issue is fixed:
-  // https://issues.apache.org/jira/browse/ARROW-10208
-  // this->CheckUnary("split_pattern", R"(["foo bar", "foo", null])", 
list(this->type()),
-  //                  R"([["foo", "bar"], ["foo"], null])", &options);
+  this->CheckUnary("split_pattern", R"(["foo bar", "foo", null])", 
list(this->type()),
+                   R"([["foo", "bar"], ["foo"], null])", &options);
   // edgy cases
   this->CheckUnary("split_pattern", R"(["f  o o "])", list(this->type()),
                    R"([["f", "", "o", "o", ""]])", &options);

Reply via email to