lidavidm commented on a change in pull request #11159:
URL: https://github.com/apache/arrow/pull/11159#discussion_r710051474
##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -62,6 +62,57 @@ const FunctionDoc list_value_length_doc{
"Null values emit a null in the output."),
{"lists"}};
+template <typename Type, typename IndexType>
+Status ListElement(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ using ListArrayType = typename TypeTraits<Type>::ArrayType;
+ using IndexScalarType = typename TypeTraits<IndexType>::ScalarType;
+ ListArrayType list_array(batch[0].array());
+ const auto& index_scalar = batch[1].scalar_as<IndexScalarType>();
+ auto index = index_scalar.value;
+ if (ARROW_PREDICT_FALSE(index < 0)) {
+ return Status::Invalid("Index ", index,
+ " is out of bounds: should be greater than or equal
to 0");
+ }
+ std::unique_ptr<ArrayBuilder> builder;
+ RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), list_array.value_type(),
&builder));
+ for (int i = 0; i < list_array.length(); ++i) {
+ if (list_array.IsNull(i)) {
+ RETURN_NOT_OK(builder->AppendNull());
+ continue;
+ }
+ std::shared_ptr<arrow::Array> value_array = list_array.value_slice(i);
+ auto len = value_array->length();
+ if (ARROW_PREDICT_FALSE(index > len)) {
+ return Status::Invalid("Index ", index, " is out of bounds: should be in
[0, ", len,
+ "]");
+ }
+ ARROW_ASSIGN_OR_RAISE(auto scalar, (value_array->GetScalar(index)));
+ RETURN_NOT_OK(builder->AppendScalar(*scalar));
Review comment:
You may actually prefer AppendArraySlice which lets you skip allocating
a scalar entirely:
https://github.com/apache/arrow/blob/012248a7aca9373a39d5ac8ce9e496b8df0f10e6/cpp/src/arrow/array/builder_base.h#L126-L132
##########
File path: cpp/src/arrow/compute/kernels/codegen_internal.cc
##########
@@ -194,6 +194,11 @@ Result<ValueDescr> FirstType(KernelContext*, const
std::vector<ValueDescr>& desc
return result;
}
+Result<ValueDescr> ListValuesType(KernelContext*, const
std::vector<ValueDescr>& args) {
+ const auto& list_type = checked_cast<const BaseListType&>(*args[0].type);
+ return ValueDescr::Array(list_type.value_type());
Review comment:
It might be good to use GetBroadcastShape as FirstType does above so
that this can handle all-scalar inputs too.
##########
File path: docs/source/cpp/compute.rst
##########
@@ -1474,13 +1474,15 @@ value, but smaller than nulls.
Structural transforms
~~~~~~~~~~~~~~~~~~~~~
-+--------------------------+------------+--------------------+---------------------+---------+
-| Function name | Arity | Input types | Output type
| Notes |
-+==========================+============+====================+=====================+=========+
-| list_flatten | Unary | List-like | List value type
| \(1) |
-+--------------------------+------------+--------------------+---------------------+---------+
-| list_parent_indices | Unary | List-like | Int32 or Int64
| \(2) |
-+--------------------------+------------+--------------------+---------------------+---------+
++--------------------------+------------+--------------------------+---------------------+---------+
+| Function name | Arity | Input types | Output
type | Notes |
++==========================+============+==========================+=====================+=========+
+| list_flatten | Unary | List-like | List
value type | \(1) |
++--------------------------+------------+--------------------------+---------------------+---------+
+| list_parent_indices | Unary | List-like | Int32 or
Int64 | \(2) |
++--------------------------+------------+--------------------------+---------------------+---------+
+| list_element | Binary | List-like, Int32 or Int64| List
value type | \(3) |
Review comment:
Also try to keep things in alphabetical order, and reformat the table as
needed (Input types could have one more column of padding added to it).
##########
File path: cpp/src/arrow/compute/kernels/scalar_nested_test.cc
##########
@@ -39,6 +39,46 @@ TEST(TestScalarNested, ListValueLength) {
}
}
+TEST(TestScalarNested, ListElement) {
+ for (auto ty : {list(int16()), fixed_size_list(uint8(), 2)}) {
+ auto input = ArrayFromJSON(ty, "[[8, 5], [74, 92], [7, 4]]");
+ auto out_ty = ty->id() == Type::LIST ? int16() : uint8();
+ auto expected = ArrayFromJSON(out_ty, "[5, 92, 4]");
+ auto index = ScalarFromJSON(int32(), "1");
+ CheckVectorBinary("list_element", input, Datum(index), expected);
Review comment:
We don't need CheckVectorBinary anymore since this is a scalar kernel.
##########
File path: cpp/src/arrow/compute/kernels/test_util.cc
##########
@@ -190,6 +190,13 @@ void CheckVectorUnary(std::string func_name, Datum input,
Datum expected,
AssertDatumsEqual(expected, actual, /*verbose=*/true);
}
+void CheckVectorBinary(std::string func_name, Datum v1, Datum v2, Datum
expected,
+ const FunctionOptions* options) {
Review comment:
This is a scalar kernel, no need for CheckVectorBinary.
##########
File path: cpp/src/arrow/compute/kernels/scalar_nested_test.cc
##########
@@ -39,6 +39,46 @@ TEST(TestScalarNested, ListValueLength) {
}
}
+TEST(TestScalarNested, ListElement) {
+ for (auto ty : {list(int16()), fixed_size_list(uint8(), 2)}) {
+ auto input = ArrayFromJSON(ty, "[[8, 5], [74, 92], [7, 4]]");
+ auto out_ty = ty->id() == Type::LIST ? int16() : uint8();
+ auto expected = ArrayFromJSON(out_ty, "[5, 92, 4]");
+ auto index = ScalarFromJSON(int32(), "1");
+ CheckVectorBinary("list_element", input, Datum(index), expected);
+ }
+
+ for (auto ty : {fixed_size_list(float32(), 2), large_list(float64())}) {
+ auto input = ArrayFromJSON(ty, "[[8.0, 5.0], [74.0, 92.0], [7.0, 4.0]]");
+ auto out_ty = ty->id() == Type::FIXED_SIZE_LIST ? float32() : float64();
+ auto expected = ArrayFromJSON(out_ty, "[8, 74.0, 7.0]");
+ auto index = ScalarFromJSON(int64(), "0");
+ CheckVectorBinary("list_element", input, Datum(index), expected);
+ }
+
+ // Construct a list with a non-empty null slot
+ auto input = ArrayFromJSON(list(int16()), "[[0, null, 1], [0, 0], [2, 3],
[4, 5]]");
+ TweakValidityBit(input, 1, false);
+ auto expected = ArrayFromJSON(int16(), "[null, 0, 3, 5]");
+ auto index = ScalarFromJSON(int32(), "1");
+ CheckVectorBinary("list_element", input, Datum(index), expected);
+}
+
+TEST(TestScalarNested, ListElementChunkedArray) {
Review comment:
CheckScalar will do this for you. It'll also check scalar inputs, which
you may want to handle.
##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -62,6 +62,57 @@ const FunctionDoc list_value_length_doc{
"Null values emit a null in the output."),
{"lists"}};
+template <typename Type, typename IndexType>
+Status ListElement(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ using ListArrayType = typename TypeTraits<Type>::ArrayType;
+ using IndexScalarType = typename TypeTraits<IndexType>::ScalarType;
+ ListArrayType list_array(batch[0].array());
+ const auto& index_scalar = batch[1].scalar_as<IndexScalarType>();
+ auto index = index_scalar.value;
+ if (ARROW_PREDICT_FALSE(index < 0)) {
+ return Status::Invalid("Index ", index,
+ " is out of bounds: should be greater than or equal
to 0");
+ }
+ std::unique_ptr<ArrayBuilder> builder;
+ RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), list_array.value_type(),
&builder));
+ for (int i = 0; i < list_array.length(); ++i) {
+ if (list_array.IsNull(i)) {
+ RETURN_NOT_OK(builder->AppendNull());
+ continue;
+ }
+ std::shared_ptr<arrow::Array> value_array = list_array.value_slice(i);
+ auto len = value_array->length();
+ if (ARROW_PREDICT_FALSE(index > len)) {
+ return Status::Invalid("Index ", index, " is out of bounds: should be in
[0, ", len,
+ "]");
+ }
+ ARROW_ASSIGN_OR_RAISE(auto scalar, (value_array->GetScalar(index)));
+ RETURN_NOT_OK(builder->AppendScalar(*scalar));
+ }
+ ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
+ out->value = result->data();
+ return Status::OK();
+}
+
+template <typename InListType, typename IndexType>
+void AddListElement(ScalarFunction* func) {
Review comment:
nit: AddListElementKernel?
##########
File path: cpp/src/arrow/compute/kernels/scalar_nested_test.cc
##########
@@ -39,6 +39,46 @@ TEST(TestScalarNested, ListValueLength) {
}
}
+TEST(TestScalarNested, ListElement) {
+ for (auto ty : {list(int16()), fixed_size_list(uint8(), 2)}) {
+ auto input = ArrayFromJSON(ty, "[[8, 5], [74, 92], [7, 4]]");
+ auto out_ty = ty->id() == Type::LIST ? int16() : uint8();
Review comment:
Why not just keep the element type consistent?
##########
File path: cpp/src/arrow/compute/kernels/scalar_nested_test.cc
##########
@@ -39,6 +39,46 @@ TEST(TestScalarNested, ListValueLength) {
}
}
+TEST(TestScalarNested, ListElement) {
+ for (auto ty : {list(int16()), fixed_size_list(uint8(), 2)}) {
Review comment:
Why not test large list here as well?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]