edponce commented on a change in pull request #11159:
URL: https://github.com/apache/arrow/pull/11159#discussion_r711600893
##########
File path: docs/source/python/api/compute.rst
##########
@@ -372,5 +372,6 @@ Structural Transforms
is_null
is_valid
list_value_length
+ list_element
list_flatten
Review comment:
Since you are already making changes here, please organize this list
alphabetically, `list_value_length` should not be the first one.
##########
File path: cpp/src/arrow/compute/kernels/scalar_nested_test.cc
##########
@@ -43,6 +43,74 @@ TEST(TestScalarNested, ListValueLength) {
"[3, null, 3, 3]");
}
+TEST(TestScalarNested, ListElementNonFixedListWithNulls) {
+ auto sample = "[[7, 5, 81], [6, null, 4, 7, 8], [3, 12, 2, 0], [1, 9],
null]";
+ auto types = NumericTypes();
+ auto index_types = IntTypes();
+ for (auto ty : types) {
+ for (auto list_type : {list(ty), large_list(ty)}) {
+ auto input = ArrayFromJSON(list_type, sample);
+ auto null_input = ArrayFromJSON(list_type, "[null]");
+ for (auto index_type : index_types) {
+ auto index = ScalarFromJSON(index_type, "1");
+ auto expected = ArrayFromJSON(ty, "[5, null, 12, 9, null]");
+ auto expected_null = ArrayFromJSON(ty, "[null]");
+ CheckScalar("list_element", {input, index}, expected);
+ CheckScalar("list_element", {null_input, index}, expected_null);
+ }
+ }
+ }
+}
+
+TEST(TestScalarNested, ListElementFixedList) {
+ auto sample = "[[7, 5, 81], [6, 4, 8], [3, 12, 2], [1, 43, 87]]";
+ auto types = NumericTypes();
+ auto index_types = IntTypes();
+ for (auto ty : types) {
+ auto input = ArrayFromJSON(fixed_size_list(ty, 3), sample);
+ for (auto index_type : index_types) {
+ auto index = ScalarFromJSON(index_type, "0");
+ auto expected = ArrayFromJSON(ty, "[7, 6, 3, 1]");
+ CheckScalar("list_element", {input, index}, expected);
+ }
+ }
+}
+
+TEST(TestScalarNested, ListElementInvalid) {
+ auto input_array = ArrayFromJSON(list(float32()), "[[0.1, 1.1], [0.2,
1.2]]");
+ auto input_scalar = ScalarFromJSON(list(float32()), "[0.1, 0.2]");
+
+ // invalid index: null
+ auto index = ScalarFromJSON(int32(), "null");
+ EXPECT_THAT(CallFunction("list_element", {input_array, index}),
+ Raises(StatusCode::Invalid));
+ EXPECT_THAT(CallFunction("list_element", {input_scalar, index}),
+ Raises(StatusCode::Invalid));
+
+ // invalid index: < 0
+ index = ScalarFromJSON(int32(), "-1");
+ EXPECT_THAT(CallFunction("list_element", {input_array, index}),
+ Raises(StatusCode::Invalid));
+ EXPECT_THAT(CallFunction("list_element", {input_scalar, index}),
+ Raises(StatusCode::Invalid));
+
+ // invalid index: >= list.length
+ index = ScalarFromJSON(int32(), "2");
+ EXPECT_THAT(CallFunction("list_element", {input_array, index}),
+ Raises(StatusCode::Invalid));
+ EXPECT_THAT(CallFunction("list_element", {input_scalar, index}),
+ Raises(StatusCode::Invalid));
+
+ // invalid input
+ input_array = ArrayFromJSON(list(float32()), "[[41, 6, 93], [], [2]]");
+ input_scalar = ScalarFromJSON(list(float32()), "[]");
+ index = ScalarFromJSON(int32(), "0");
+ EXPECT_THAT(CallFunction("list_element", {input_array, index}),
+ Raises(StatusCode::Invalid));
+ EXPECT_THAT(CallFunction("list_element", {input_scalar, index}),
+ Raises(StatusCode::Invalid));
+}
Review comment:
For the invalid tests, I suggest to use
`EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ...);` because in addition to
validating the raised flag, it validates the expected message.
For example,
```c++
index = ScalarFromJSON(int32(), "-1");
EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("must be
non-negative") , CallFunction("list_element", {input_array, index}));
```
##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -62,6 +62,57 @@ const FunctionDoc list_value_length_doc{
"Null values emit a null in the output."),
{"lists"}};
+template <typename Type, typename IndexType>
+Status ListElement(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ using ListArrayType = typename TypeTraits<Type>::ArrayType;
+ using IndexScalarType = typename TypeTraits<IndexType>::ScalarType;
+ ListArrayType list_array(batch[0].array());
+ const auto& index_scalar = batch[1].scalar_as<IndexScalarType>();
+ auto index = index_scalar.value;
+ if (ARROW_PREDICT_FALSE(index < 0)) {
+ return Status::Invalid("Index ", index,
+ " is out of bounds: should be greater than or equal
to 0");
+ }
Review comment:
I would change the error message to match the check being performed:
```c++
"Index ", index, " must be non-negative"
```
The phrase "out of bounds" seems to imply that both sides were checked when
in fact the positive bound is checked in the loop below.
##########
File path: docs/source/cpp/compute.rst
##########
@@ -1509,6 +1511,9 @@ Structural transforms
are discarded. Output type is Int32 for List and FixedSizeList, Int64 for
LargeList.
+* \(3) Output is an array of the same length as the input list array. The
Review comment:
Also, please add a statement describing how null entries are handled,
similar to the long description used during function registration.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]