This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 103489b ARROW-10208: [C++] Fix split string kernels on sliced input
103489b is described below
commit 103489b727c37ec7d0ea7411a3a972dd8d2859af
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Oct 22 18:19:08 2020 +0200
ARROW-10208: [C++] Fix split string kernels on sliced input
Nulls were propagated incorrectly. We can simply let the kernel machinery
do this for us.
Closes #8496 from pitrou/ARROW-10208-split-string-sliced
Authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/compute/kernels/scalar_string.cc | 4 ++--
cpp/src/arrow/compute/kernels/scalar_string_test.cc | 6 ++----
2 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc
b/cpp/src/arrow/compute/kernels/scalar_string.cc
index 00ab80b..0a1c9a0 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -923,8 +923,6 @@ struct SplitBaseTransform {
ListOffsetsBuilderType list_offsets_builder(ctx->memory_pool());
KERNEL_RETURN_IF_ERROR(ctx, list_offsets_builder.Resize(input_nstrings));
ArrayData* output_list = out->mutable_array();
- // // we use the same null values
- output_list->buffers[0] = input.buffers[0];
// initial value
KERNEL_RETURN_IF_ERROR(
ctx, list_offsets_builder.Append(static_cast<list_offset_type>(0)));
@@ -986,6 +984,7 @@ struct SplitPatternTransform : SplitBaseTransform<Type,
ListType, SplitPatternOp
}
return Status::OK();
}
+
static bool Find(const uint8_t* begin, const uint8_t* end,
const uint8_t** separator_begin, const uint8_t**
separator_end,
const SplitPatternOptions& options) {
@@ -1004,6 +1003,7 @@ struct SplitPatternTransform : SplitBaseTransform<Type,
ListType, SplitPatternOp
}
return false;
}
+
static bool FindReverse(const uint8_t* begin, const uint8_t* end,
const uint8_t** separator_begin, const uint8_t**
separator_end,
const SplitPatternOptions& options) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index e77a4cc..c76b50e 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -339,10 +339,8 @@ TYPED_TEST(TestStringKernels, SplitBasics) {
// basics
this->CheckUnary("split_pattern", R"(["foo bar", "foo"])",
list(this->type()),
R"([["foo", "bar"], ["foo"]])", &options);
- // TODO: enable test when the following issue is fixed:
- // https://issues.apache.org/jira/browse/ARROW-10208
- // this->CheckUnary("split_pattern", R"(["foo bar", "foo", null])",
list(this->type()),
- // R"([["foo", "bar"], ["foo"], null])", &options);
+ this->CheckUnary("split_pattern", R"(["foo bar", "foo", null])",
list(this->type()),
+ R"([["foo", "bar"], ["foo"], null])", &options);
// edgy cases
this->CheckUnary("split_pattern", R"(["f o o "])", list(this->type()),
R"([["f", "", "o", "o", ""]])", &options);