lidavidm commented on a change in pull request #11159:
URL: https://github.com/apache/arrow/pull/11159#discussion_r708773278
##########
File path: cpp/src/arrow/compute/kernels/vector_nested.cc
##########
@@ -169,6 +206,22 @@ void RegisterVectorNested(FunctionRegistry* registry) {
DCHECK_OK(registry->AddFunction(std::move(flatten)));
DCHECK_OK(registry->AddFunction(std::make_shared<ListParentIndicesFunction>()));
+
+ auto list_element = std::make_shared<VectorFunction>("list_element",
Arity::Binary(),
+ &list_element_doc);
+ DCHECK_OK(list_element->AddKernel(
+ {InputType::Array(Type::LIST), InputType::Scalar(Type::INT32)},
+ OutputType(ListValuesType), ListElement<ListType, Int32Type>));
+ DCHECK_OK(list_element->AddKernel(
+ {InputType::Array(Type::LIST), InputType::Scalar(Type::INT64)},
+ OutputType(ListValuesType), ListElement<ListType, Int64Type>));
+ DCHECK_OK(list_element->AddKernel(
+ {InputType::Array(Type::LARGE_LIST), InputType::Scalar(Type::INT32)},
+ OutputType(ListValuesType), ListElement<LargeListType, Int32Type>));
+ DCHECK_OK(list_element->AddKernel(
+ {InputType::Array(Type::LARGE_LIST), InputType::Scalar(Type::INT64)},
+ OutputType(ListValuesType), ListElement<LargeListType, Int64Type>));
+ DCHECK_OK(registry->AddFunction(std::move(list_element)));
Review comment:
What about fixed-size list?
##########
File path: cpp/src/arrow/compute/kernels/vector_nested.cc
##########
@@ -157,6 +157,43 @@ class ListParentIndicesFunction : public MetaFunction {
}
};
+const FunctionDoc list_element_doc(
+ "Compute elements using of nested list values using an index",
+ ("`lists` must have a list-like type.\n"
+ "For each value in each list of `lists`, the element at `index`\n"
+ "is emitted."),
+ {"lists", "index"});
+
+template <typename Type, typename IndexType>
+Status ListElement(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ using ListArrayType = typename TypeTraits<Type>::ArrayType;
+ using IndexScalarType = typename TypeTraits<IndexType>::ScalarType;
+ ListArrayType list_array(batch[0].array());
+ const auto& index_scalar = batch[1].scalar_as<IndexScalarType>();
+ auto index = index_scalar.value;
+ ScalarVector scalars;
+ for (int i = 0; i < list_array.length(); ++i) {
+ if (list_array.IsNull(i)) {
+ scalars.push_back(MakeNullScalar(list_array.value_type()));
+ continue;
+ }
+ std::shared_ptr<arrow::Array> value_array = list_array.value_slice(i);
+ auto len = value_array->length();
+ if (ARROW_PREDICT_FALSE(index < 0 || index > len)) {
+ return Status::Invalid("Index ", index, " is out of bounds: should be in
[0, ", len,
Review comment:
I wonder if we might also want this to be able to just append a null as
well.
##########
File path: cpp/src/arrow/compute/kernels/vector_nested.cc
##########
@@ -157,6 +157,43 @@ class ListParentIndicesFunction : public MetaFunction {
}
};
+const FunctionDoc list_element_doc(
+ "Compute elements using of nested list values using an index",
+ ("`lists` must have a list-like type.\n"
+ "For each value in each list of `lists`, the element at `index`\n"
+ "is emitted."),
+ {"lists", "index"});
+
+template <typename Type, typename IndexType>
+Status ListElement(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ using ListArrayType = typename TypeTraits<Type>::ArrayType;
+ using IndexScalarType = typename TypeTraits<IndexType>::ScalarType;
+ ListArrayType list_array(batch[0].array());
+ const auto& index_scalar = batch[1].scalar_as<IndexScalarType>();
+ auto index = index_scalar.value;
+ ScalarVector scalars;
Review comment:
nit: you could `scalars.reserve(list_array.length())`
##########
File path: cpp/src/arrow/compute/kernels/vector_nested.cc
##########
@@ -157,6 +157,43 @@ class ListParentIndicesFunction : public MetaFunction {
}
};
+const FunctionDoc list_element_doc(
+ "Compute elements using of nested list values using an index",
+ ("`lists` must have a list-like type.\n"
+ "For each value in each list of `lists`, the element at `index`\n"
+ "is emitted."),
+ {"lists", "index"});
+
+template <typename Type, typename IndexType>
+Status ListElement(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ using ListArrayType = typename TypeTraits<Type>::ArrayType;
+ using IndexScalarType = typename TypeTraits<IndexType>::ScalarType;
+ ListArrayType list_array(batch[0].array());
+ const auto& index_scalar = batch[1].scalar_as<IndexScalarType>();
+ auto index = index_scalar.value;
+ ScalarVector scalars;
+ for (int i = 0; i < list_array.length(); ++i) {
+ if (list_array.IsNull(i)) {
+ scalars.push_back(MakeNullScalar(list_array.value_type()));
+ continue;
+ }
+ std::shared_ptr<arrow::Array> value_array = list_array.value_slice(i);
+ auto len = value_array->length();
+ if (ARROW_PREDICT_FALSE(index < 0 || index > len)) {
Review comment:
For index < 0 maybe you could hoist the check outside the loop
##########
File path: cpp/src/arrow/compute/kernels/vector_nested.cc
##########
@@ -157,6 +157,43 @@ class ListParentIndicesFunction : public MetaFunction {
}
};
+const FunctionDoc list_element_doc(
+ "Compute elements using of nested list values using an index",
+ ("`lists` must have a list-like type.\n"
+ "For each value in each list of `lists`, the element at `index`\n"
+ "is emitted."),
+ {"lists", "index"});
+
+template <typename Type, typename IndexType>
+Status ListElement(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ using ListArrayType = typename TypeTraits<Type>::ArrayType;
+ using IndexScalarType = typename TypeTraits<IndexType>::ScalarType;
+ ListArrayType list_array(batch[0].array());
+ const auto& index_scalar = batch[1].scalar_as<IndexScalarType>();
+ auto index = index_scalar.value;
+ ScalarVector scalars;
+ for (int i = 0; i < list_array.length(); ++i) {
+ if (list_array.IsNull(i)) {
+ scalars.push_back(MakeNullScalar(list_array.value_type()));
+ continue;
+ }
+ std::shared_ptr<arrow::Array> value_array = list_array.value_slice(i);
+ auto len = value_array->length();
+ if (ARROW_PREDICT_FALSE(index < 0 || index > len)) {
+ return Status::Invalid("Index ", index, " is out of bounds: should be in
[0, ", len,
+ "]");
+ }
+ ARROW_ASSIGN_OR_RAISE(auto scalar, (value_array->GetScalar(index)));
+ scalars.push_back(scalar);
+ }
+ std::unique_ptr<ArrayBuilder> builder;
+ RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), list_array.value_type(),
&builder));
+ RETURN_NOT_OK(builder->AppendScalars(scalars));
Review comment:
For what it's worth this isn't all that different from just having the
builder and appending each scalar individually in the first place. Also, then,
you can AppendNull instead of having to allocate a null scalar.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]