pitrou commented on code in PR #39245:
URL: https://github.com/apache/arrow/pull/39245#discussion_r1434334074


##########
cpp/src/arrow/compute/kernels/scalar_string_test.cc:
##########
@@ -712,6 +712,95 @@ TEST_F(TestFixedSizeBinaryKernels, BinaryLength) {
              "[6, null, 6]");
 }
 
+TEST_F(TestFixedSizeBinaryKernels, SliceBytesBasic) {
+  SliceOptions options{2, 4};
+  CheckUnary("binary_slice", R"(["abcabc", "defdef"])", fixed_size_binary(2),

Review Comment:
   Can we use less repeated values to make sure the substring selection is 
right?
   ```suggestion
     CheckUnary("binary_slice", R"(["abcdef", "ghijkl"])", fixed_size_binary(2),
   ```



##########
cpp/src/arrow/compute/kernels/scalar_string_test.cc:
##########
@@ -712,6 +712,95 @@ TEST_F(TestFixedSizeBinaryKernels, BinaryLength) {
              "[6, null, 6]");
 }
 
+TEST_F(TestFixedSizeBinaryKernels, SliceBytesBasic) {

Review Comment:
   Nit
   ```suggestion
   TEST_F(TestFixedSizeBinaryKernels, BinarySliceBasic) {
   ```



##########
cpp/src/arrow/compute/kernels/scalar_string_test.cc:
##########
@@ -712,6 +712,95 @@ TEST_F(TestFixedSizeBinaryKernels, BinaryLength) {
              "[6, null, 6]");
 }
 
+TEST_F(TestFixedSizeBinaryKernels, SliceBytesBasic) {
+  SliceOptions options{2, 4};
+  CheckUnary("binary_slice", R"(["abcabc", "defdef"])", fixed_size_binary(2),

Review Comment:
   Also, can you add some null values at some point?



##########
cpp/src/arrow/compute/kernels/scalar_string_ascii.cc:
##########
@@ -2568,6 +2571,32 @@ struct SliceBytesTransform : StringSliceTransformBase {
 
     return dest - output;
   }
+
+  static Result<int32_t> FixedOutputSize(SliceOptions options, int32_t 
input_width_32) {
+    auto step = options.step;
+    if (step == 0) {
+      return Status::Invalid("Slice step cannot be zero");
+    }
+    auto start = options.start;
+    auto stop = options.stop;
+    auto input_width = static_cast<int64_t>(input_width_32);
+
+    if (start < 0) {
+      start = std::max(static_cast<int64_t>(0), start + input_width);
+    }
+    if (stop < 0) {
+      stop = std::max(static_cast<int64_t>(0), stop + input_width);
+    }
+    start = std::min(start, input_width);
+    stop = std::min(stop, input_width);
+
+    if ((start == stop) || ((start >= stop) && (step > 0)) ||
+        ((start <= stop) && (step < 0))) {
+      return 0;
+    }
+    return static_cast<int32_t>(std::max(
+        static_cast<int64_t>(0), (stop - start + (step - (step > 0 ? 1 : -1))) 
/ step));
+  }

Review Comment:
   Hmm, can you rephrase the code such that `step > 0` and `step < 0` have 
separate code paths? It would make things clearer.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to