edponce commented on a change in pull request #11159:
URL: https://github.com/apache/arrow/pull/11159#discussion_r711600893



##########
File path: docs/source/python/api/compute.rst
##########
@@ -372,5 +372,6 @@ Structural Transforms
    is_null
    is_valid
    list_value_length
+   list_element
    list_flatten

Review comment:
       Since you are already making changes here, please organize this list 
alphabetically, `list_value_length` should not be the first one.

##########
File path: cpp/src/arrow/compute/kernels/scalar_nested_test.cc
##########
@@ -43,6 +43,74 @@ TEST(TestScalarNested, ListValueLength) {
                    "[3, null, 3, 3]");
 }
 
+TEST(TestScalarNested, ListElementNonFixedListWithNulls) {
+  auto sample = "[[7, 5, 81], [6, null, 4, 7, 8], [3, 12, 2, 0], [1, 9], 
null]";
+  auto types = NumericTypes();
+  auto index_types = IntTypes();
+  for (auto ty : types) {
+    for (auto list_type : {list(ty), large_list(ty)}) {
+      auto input = ArrayFromJSON(list_type, sample);
+      auto null_input = ArrayFromJSON(list_type, "[null]");
+      for (auto index_type : index_types) {
+        auto index = ScalarFromJSON(index_type, "1");
+        auto expected = ArrayFromJSON(ty, "[5, null, 12, 9, null]");
+        auto expected_null = ArrayFromJSON(ty, "[null]");
+        CheckScalar("list_element", {input, index}, expected);
+        CheckScalar("list_element", {null_input, index}, expected_null);
+      }
+    }
+  }
+}
+
+TEST(TestScalarNested, ListElementFixedList) {
+  auto sample = "[[7, 5, 81], [6, 4, 8], [3, 12, 2], [1, 43, 87]]";
+  auto types = NumericTypes();
+  auto index_types = IntTypes();
+  for (auto ty : types) {
+    auto input = ArrayFromJSON(fixed_size_list(ty, 3), sample);
+    for (auto index_type : index_types) {
+      auto index = ScalarFromJSON(index_type, "0");
+      auto expected = ArrayFromJSON(ty, "[7, 6, 3, 1]");
+      CheckScalar("list_element", {input, index}, expected);
+    }
+  }
+}
+
+TEST(TestScalarNested, ListElementInvalid) {
+  auto input_array = ArrayFromJSON(list(float32()), "[[0.1, 1.1], [0.2, 
1.2]]");
+  auto input_scalar = ScalarFromJSON(list(float32()), "[0.1, 0.2]");
+
+  // invalid index: null
+  auto index = ScalarFromJSON(int32(), "null");
+  EXPECT_THAT(CallFunction("list_element", {input_array, index}),
+              Raises(StatusCode::Invalid));
+  EXPECT_THAT(CallFunction("list_element", {input_scalar, index}),
+              Raises(StatusCode::Invalid));
+
+  // invalid index: < 0
+  index = ScalarFromJSON(int32(), "-1");
+  EXPECT_THAT(CallFunction("list_element", {input_array, index}),
+              Raises(StatusCode::Invalid));
+  EXPECT_THAT(CallFunction("list_element", {input_scalar, index}),
+              Raises(StatusCode::Invalid));
+
+  // invalid index: >= list.length
+  index = ScalarFromJSON(int32(), "2");
+  EXPECT_THAT(CallFunction("list_element", {input_array, index}),
+              Raises(StatusCode::Invalid));
+  EXPECT_THAT(CallFunction("list_element", {input_scalar, index}),
+              Raises(StatusCode::Invalid));
+
+  // invalid input
+  input_array = ArrayFromJSON(list(float32()), "[[41, 6, 93], [], [2]]");
+  input_scalar = ScalarFromJSON(list(float32()), "[]");
+  index = ScalarFromJSON(int32(), "0");
+  EXPECT_THAT(CallFunction("list_element", {input_array, index}),
+              Raises(StatusCode::Invalid));
+  EXPECT_THAT(CallFunction("list_element", {input_scalar, index}),
+              Raises(StatusCode::Invalid));
+}

Review comment:
       For the invalid tests, I suggest to use 
`EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ...);` because in addition to 
validating the raised flag, it validates the expected message.
   For example,
   ```c++
   index = ScalarFromJSON(int32(), "-1");
   EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("must be 
non-negative") , CallFunction("list_element", {input_array, index}));
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -62,6 +62,57 @@ const FunctionDoc list_value_length_doc{
      "Null values emit a null in the output."),
     {"lists"}};
 
+template <typename Type, typename IndexType>
+Status ListElement(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  using ListArrayType = typename TypeTraits<Type>::ArrayType;
+  using IndexScalarType = typename TypeTraits<IndexType>::ScalarType;
+  ListArrayType list_array(batch[0].array());
+  const auto& index_scalar = batch[1].scalar_as<IndexScalarType>();
+  auto index = index_scalar.value;
+  if (ARROW_PREDICT_FALSE(index < 0)) {
+    return Status::Invalid("Index ", index,
+                           " is out of bounds: should be greater than or equal 
to 0");
+  }

Review comment:
       I would change the error message to match the check being performed:
   ```c++
   "Index ", index, " must be non-negative"
   ```
   The phrase "out of bounds" seems to imply that both sides were checked when 
in fact the positive bound is checked in the loop below.

##########
File path: docs/source/cpp/compute.rst
##########
@@ -1509,6 +1511,9 @@ Structural transforms
   are discarded.  Output type is Int32 for List and FixedSizeList, Int64 for
   LargeList.
 
+* \(3) Output is an array of the same length as the input list array. The

Review comment:
       Also, please add a statement describing how null entries are handled, 
similar to the long description used during function registration.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to