jorisvandenbossche commented on code in PR #14395:
URL: https://github.com/apache/arrow/pull/14395#discussion_r1016263630


##########
cpp/src/arrow/compute/kernels/scalar_nested_test.cc:
##########
@@ -116,6 +117,125 @@ TEST(TestScalarNested, ListElementInvalid) {
               Raises(StatusCode::Invalid));
 }
 
+TEST(TestScalarNested, ListSliceVariableOutput) {
+  const auto value_types = {float32(), int32()};
+  for (auto value_type : value_types) {
+    /* Variable list size output required variable size list input. */
+    auto inputs = {ArrayFromJSON(list(value_type), "[[1, 2, 3], [4, 5], [6], 
null]")};
+    for (auto input : inputs) {
+      ListSliceOptions args(/*start=*/0, /*stop=*/2, /*step=*/1,
+                            /*return_fixed_size_list=*/false);
+      auto expected = ArrayFromJSON(list(value_type), "[[1, 2], [4, 5], [6], 
null]");
+      CheckScalarUnary("list_slice", input, expected, &args);
+
+      args.start = 1;
+      expected = ArrayFromJSON(list(value_type), "[[2], [5], [], null]");
+      CheckScalarUnary("list_slice", input, expected, &args);
+
+      args.start = 2;
+      args.stop = 4;
+      expected = ArrayFromJSON(list(value_type), "[[3], [], [], null]");
+      CheckScalarUnary("list_slice", input, expected, &args);
+    }
+  }
+
+  // Verify passing `return_fixed_size_list=false` with fixed size input
+  // returns variable size even if stop is beyond list_size
+  ListSliceOptions args(/*start=*/0, /*stop=*/2, /*step=*/1,
+                        /*return_fixed_size_list=*/false);
+  auto input = ArrayFromJSON(fixed_size_list(int32(), 1), "[[1]]");
+  auto expected = ArrayFromJSON(list(int32()), "[[1]]");
+  CheckScalarUnary("list_slice", input, expected, &args);
+}
+
+TEST(TestScalarNested, ListSliceFixedOutput) {
+  const auto value_types = {float32(), int32()};
+  for (auto value_type : value_types) {
+    auto inputs = {ArrayFromJSON(list(value_type), "[[1, 2, 3], [4, 5], [6], 
null]"),
+                   ArrayFromJSON(fixed_size_list(value_type, 3),
+                                 "[[1, 2, 3], [4, 5, null], [6, null, null], 
null]")};
+    for (auto input : inputs) {
+      ListSliceOptions args(/*start=*/0, /*stop=*/2, /*step=*/1,
+                            /*return_fixed_size_list=*/true);
+      auto expected = ArrayFromJSON(fixed_size_list(value_type, 2),
+                                    "[[1, 2], [4, 5], [6, null], null]");
+      CheckScalarUnary("list_slice", input, expected, &args);
+
+      args.start = 1;
+      expected =
+          ArrayFromJSON(fixed_size_list(value_type, 1), "[[2], [5], [null], 
null]");
+      CheckScalarUnary("list_slice", input, expected, &args);
+
+      args.start = 2;
+      args.stop = 4;
+      expected = ArrayFromJSON(fixed_size_list(value_type, 2),
+                               "[[3, null], [null, null], [null, null], 
null]");
+      CheckScalarUnary("list_slice", input, expected, &args);
+    }
+  }
+}
+
+TEST(TestScalarNested, ListSliceOutputEqualsInputType) {
+  // Default is to return same type as the one passed in.
+  auto inputs = {
+      ArrayFromJSON(list(int8()), "[[1, 2, 3], [4, 5], [6, null], null]"),
+      ArrayFromJSON(large_list(int8()), "[[1, 2, 3], [4, 5], [6, null], 
null]"),
+      ArrayFromJSON(fixed_size_list(int8(), 2), "[[1, 2], [4, 5], [6, null], 
null]")};
+  for (auto input : inputs) {
+    ListSliceOptions args(/*start=*/0, /*stop=*/2, /*step=*/1);
+    auto expected = ArrayFromJSON(input->type(), "[[1, 2], [4, 5], [6, null], 
null]");
+    CheckScalarUnary("list_slice", input, expected, &args);
+  }
+}
+
+TEST(TestScalarNested, ListSliceBadParameters) {
+  auto input = ArrayFromJSON(list(int32()), "[[1]]");
+
+  // negative start
+  ListSliceOptions args(/*start=*/-1, /*stop=*/1, /*step=*/1,
+                        /*return_fixed_size_list=*/true);
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      ::testing::HasSubstr(
+          "`start`(-1) should be greater than 0 and smaller than `stop`(1)"),
+      CallFunction("list_slice", {input}, &args));
+  // start greater than stop
+  args.start = 1;
+  args.stop = 0;
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      ::testing::HasSubstr(
+          "`start`(1) should be greater than 0 and smaller than `stop`(0)"),
+      CallFunction("list_slice", {input}, &args));
+  // start same as stop
+  args.stop = args.start;
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      ::testing::HasSubstr(
+          "`start`(1) should be greater than 0 and smaller than `stop`(1)"),
+      CallFunction("list_slice", {input}, &args));
+  // stop not set and FixedSizeList requested
+  args.stop = std::nullopt;
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      NotImplemented,
+      ::testing::HasSubstr("NotImplemented: Unable to produce 
FixedSizeListArray without "
+                           "`stop` being set."),
+      CallFunction("list_slice", {input}, &args));
+  // stop not set and ListArray requested
+  args.stop = std::nullopt;
+  args.return_fixed_size_list = false;
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      NotImplemented, ::testing::HasSubstr("Slicing to end not yet 
implemented"),
+      CallFunction("list_slice", {input}, &args));
+  // step other than `1` not implmented
+  args.stop = 2;
+  args.step = 2;
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      NotImplemented,
+      ::testing::HasSubstr("Setting `step` to anything other than 1 is not 
supported"),
+      CallFunction("list_slice", {input}, &args));
+}
+

Review Comment:
   @milesgranger I think what you added is still a "normal" slice, where the 
parent list array is sliced. 
   
   A python snippet (but I suppose something similar can be done here in C++), 
but so you can create a ListArray where the child array itself has an offset 
separately from the parent list array:
   
   ```python
   >>> arr = pa.array([[1], [2, 3], None, [4]])
   >>> arr.offsets.tolist()
   [0, 1, 3, 3, 4]
   >>> arr.values.tolist()
   [1, 2, 3, 4]
   
   # slicing a list array slices the offsets, but keeps the child array intact 
(not sliced)
   >>> arr2 = arr.slice(2) 
   >>> arr2.offset
   2
   >>> arr2.offsets.offset
   2
   >>> arr2.offsets.tolist()
   [3, 3, 4]
   >>> arr2.values.offset
   0
   >>> arr2.values.tolist()
   [1, 2, 3, 4]
   
   # manually creating a non-sliced ListArray (i.e. offsets are not sliced) but 
with child array with offset 
   >>> arr3 = pa.ListArray.from_arrays(pa.array([0, 1, 3]), pa.array([0, 1, 2, 
3, 4]).slice(2))
   >>> arr3
   <pyarrow.lib.ListArray object at 0x7f0aed532ec0>
   [
     [
       2
     ],
     [
       3,
       4
     ]
   ]
   
   >>> arr3.offset
   0
   >>> arr3.values.tolist()
   [2, 3, 4]
   >>> arr3.values.offset
   2
   ```
   
   
   @lidavidm is this what you meant? (since the basic sliced arrays are already 
covered by `CheckScalarUnary`)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to