pitrou commented on code in PR #39245:
URL: https://github.com/apache/arrow/pull/39245#discussion_r1449109183
##########
cpp/src/arrow/compute/kernels/scalar_string_test.cc:
##########
@@ -712,11 +712,139 @@ TEST_F(TestFixedSizeBinaryKernels, BinaryLength) {
"[6, null, 6]");
}
+TEST_F(TestFixedSizeBinaryKernels, BinarySliceEmpty) {
+ SliceOptions options{2, 4};
+ CheckScalarUnary("binary_slice", ArrayFromJSON(fixed_size_binary(0),
R"([""])"),
+ ArrayFromJSON(fixed_size_binary(0), R"([""])"), &options);
+
+ CheckScalarUnary("binary_slice",
+ ArrayFromJSON(fixed_size_binary(0), R"(["", null, ""])"),
+ ArrayFromJSON(fixed_size_binary(0), R"(["", null, ""])"),
&options);
+
+ CheckUnary("binary_slice", R"([null, null])", fixed_size_binary(2),
R"([null, null])",
+ &options);
+}
+
+TEST_F(TestFixedSizeBinaryKernels, BinarySliceBasic) {
+ SliceOptions options{2, 4};
+ CheckUnary("binary_slice", R"(["abcdef", null, "foobaz"])",
fixed_size_binary(2),
+ R"(["cd", null, "ob"])", &options);
+
+ SliceOptions options_edgecase_1{-3, 1};
+ CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(0),
+ R"(["", ""])", &options_edgecase_1);
+
+ SliceOptions options_edgecase_2{-10, -3};
+ CheckUnary("binary_slice", R"(["abcdef", "foobaz", null])",
fixed_size_binary(3),
+ R"(["abc", "foo", null])", &options_edgecase_2);
+
+ auto input = ArrayFromJSON(this->type(), R"(["foobaz"])");
+ EXPECT_RAISES_WITH_MESSAGE_THAT(
+ Invalid,
+ testing::HasSubstr("Function 'binary_slice' cannot be called without
options"),
+ CallFunction("binary_slice", {input}));
+
+ SliceOptions options_invalid{2, 4, 0};
+ EXPECT_RAISES_WITH_MESSAGE_THAT(
+ Invalid, testing::HasSubstr("Slice step cannot be zero"),
+ CallFunction("binary_slice", {input}, &options_invalid));
+}
+
+TEST_F(TestFixedSizeBinaryKernels, BinarySlicePosPos) {
+ SliceOptions options_step{1, 5, 2};
+ CheckUnary("binary_slice", R"([null, "abcdef", "foobaz"])",
fixed_size_binary(2),
+ R"([null, "bd", "ob"])", &options_step);
+
+ SliceOptions options_step_neg{5, 0, -2};
+ CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(3),
+ R"(["fdb", "zbo"])", &options_step_neg);
+}
+
+TEST_F(TestFixedSizeBinaryKernels, BinarySlicePosNeg) {
+ SliceOptions options{2, -1};
+ CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(3),
+ R"(["cde", "oba"])", &options);
+
+ SliceOptions options_step{1, -1, 2};
+ CheckUnary("binary_slice", R"(["abcdef", null, "foobaz"])",
fixed_size_binary(2),
+ R"(["bd", null, "ob"])", &options_step);
+
+ SliceOptions options_step_neg{5, -4, -2};
+ CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(2),
+ R"(["fd", "zb"])", &options_step_neg);
+
+ options_step_neg.stop = -6;
+ CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(3),
+ R"(["fdb", "zbo"])", &options_step_neg);
+}
+
+TEST_F(TestFixedSizeBinaryKernels, BinarySliceNegNeg) {
+ SliceOptions options{-2, -1};
+ CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(1),
+ R"(["e", "a"])", &options);
+
+ SliceOptions options_step{-4, -1, 2};
+ CheckUnary("binary_slice", R"(["abcdef", "foobaz", null, null])",
fixed_size_binary(2),
+ R"(["ce", "oa", null, null])", &options_step);
+
+ SliceOptions options_step_neg{-1, -3, -2};
+ CheckUnary("binary_slice", R"([null, "abcdef", null, "foobaz"])",
fixed_size_binary(1),
+ R"([null, "f", null, "z"])", &options_step_neg);
+
+ options_step_neg.stop = -4;
+ CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(2),
+ R"(["fd", "zb"])", &options_step_neg);
+}
+
+TEST_F(TestFixedSizeBinaryKernels, BinarySliceNegPos) {
+ SliceOptions options{-2, 4};
+ CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(0),
+ R"(["", ""])", &options);
+
+ SliceOptions options_step{-4, 5, 2};
+ CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(2),
+ R"(["ce", "oa"])", &options_step);
+
+ SliceOptions options_step_neg{-1, 1, -2};
+ CheckUnary("binary_slice", R"([null, "abcdef", "foobaz", null])",
fixed_size_binary(2),
+ R"([null, "fd", "zb", null])", &options_step_neg);
+
+ options_step_neg.stop = 0;
+ CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(3),
+ R"(["fdb", "zbo"])", &options_step_neg);
+}
+
+TEST_F(TestFixedSizeBinaryKernels, BinarySliceConsistentyWithVarLenBinary) {
+ std::string source_str = "abcdef";
+ for (size_t str_len = 0; str_len < source_str.size(); ++str_len) {
+ auto input_str = source_str.substr(0, str_len);
+ auto fixed_input =
ArrayFromJSON(fixed_size_binary(static_cast<int32_t>(str_len)),
+ R"([")" + input_str + R"("])");
+ auto varlen_input = ArrayFromJSON(binary(), R"([")" + input_str + R"("])");
+ for (auto start = -6; start <= 6; ++start) {
+ for (auto stop = -6; stop <= 6; ++stop) {
+ for (auto step = -3; step <= 4; ++step) {
+ if (step == 0) {
+ continue;
+ }
+ SliceOptions options{start, stop, step};
+ auto expected =
+ CallFunction("binary_slice", {varlen_input},
&options).ValueOrDie();
+ auto actual =
+ CallFunction("binary_slice", {fixed_input},
&options).ValueOrDie();
+ actual = Cast(actual, binary()).ValueOrDie();
+ AssertDatumsEqual(expected, actual);
Review Comment:
We should validate the output:
```suggestion
ASSERT_OK(actual.make_array()->ValidateFull());
AssertDatumsEqual(expected, actual);
```
##########
python/pyarrow/tests/test_compute.py:
##########
@@ -574,6 +575,16 @@ def test_binary_slice_compatibility():
assert expected.equals(result)
# Positional options
assert pc.binary_slice(arr, start, stop, step) == result
+ # Fixed size binary input / output
+ for item in data:
+ print(item)
+ print(start, stop, step)
+ fsb_scalar = pa.scalar(item, type=pa.binary(len(item)))
+ expected = item[start:stop:step]
+ print(expected)
Review Comment:
Remove the print statements here?
```suggestion
fsb_scalar = pa.scalar(item, type=pa.binary(len(item)))
expected = item[start:stop:step]
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]