lidavidm commented on a change in pull request #11159:
URL: https://github.com/apache/arrow/pull/11159#discussion_r711614955
##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -80,6 +80,160 @@ const FunctionDoc list_value_length_doc{
"Null values emit a null in the output."),
{"lists"}};
+template <typename Type, typename IndexType>
+Status ListElementArray(KernelContext* ctx, const ExecBatch& batch, Datum*
out) {
+ using ListArrayType = typename TypeTraits<Type>::ArrayType;
+ using IndexScalarType = typename TypeTraits<IndexType>::ScalarType;
+ const auto& index_scalar = batch[1].scalar_as<IndexScalarType>();
+ if (ARROW_PREDICT_FALSE(!index_scalar.is_valid)) {
+ return Status::Invalid("Index must not be null");
+ }
+ ListArrayType list_array(batch[0].array());
+ auto index = index_scalar.value;
+ if (ARROW_PREDICT_FALSE(index < 0)) {
+ return Status::Invalid("Index ", index,
+ " is out of bounds: should be greater than or equal
to 0");
+ }
+ std::unique_ptr<ArrayBuilder> builder;
+ RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), list_array.value_type(),
&builder));
+ RETURN_NOT_OK(builder->Reserve(list_array.length()));
+ for (int i = 0; i < list_array.length(); ++i) {
+ if (list_array.IsNull(i)) {
+ RETURN_NOT_OK(builder->AppendNull());
+ continue;
+ }
+ std::shared_ptr<arrow::Array> value_array = list_array.value_slice(i);
+ auto len = value_array->length();
+ if (ARROW_PREDICT_FALSE(index >= static_cast<typename
IndexType::c_type>(len))) {
+ return Status::Invalid("Index ", index, " is out of bounds: should be in
[0, ", len,
+ ")");
+ }
+ RETURN_NOT_OK(builder->AppendArraySlice(*value_array->data(), index, 1));
+ }
+ ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
+ out->value = result->data();
+ return Status::OK();
+}
+
+template <typename InListScalarType, typename IndexType>
+Status ListElementScalar(KernelContext* /*ctx*/, const ExecBatch& batch,
Datum* out) {
+ using IndexScalarType = typename TypeTraits<IndexType>::ScalarType;
+ const auto& index_scalar = batch[1].scalar_as<IndexScalarType>();
+ if (ARROW_PREDICT_FALSE(!index_scalar.is_valid)) {
+ return Status::Invalid("Index must not be null");
+ }
+ const auto& list_scalar = batch[0].scalar_as<InListScalarType>();
+ if (ARROW_PREDICT_FALSE(!list_scalar.is_valid)) {
+ out->value =
+ MakeNullScalar(checked_cast<const
BaseListType&>(*batch[0].type()).value_type());
+ return Status::OK();
+ }
+ auto list = list_scalar.value;
+ auto index = index_scalar.value;
+ auto len = list->length();
+ if (ARROW_PREDICT_FALSE(index < 0 ||
+ index >= static_cast<typename
IndexType::c_type>(len))) {
+ return Status::Invalid("Index ", index, " is out of bounds: should be in
[0, ", len,
+ ")");
+ }
+ ARROW_ASSIGN_OR_RAISE(auto result, list->GetScalar(index));
+ out->value = result;
+ return Status::OK();
+}
+
+template <typename InListType, typename IndexType>
+void AddListElementArrayKernel(ScalarFunction* func) {
+ auto inputs = {InputType::Array(InListType::type_id),
+ InputType::Scalar(IndexType::type_id)};
+ auto output = OutputType{ListValuesType};
+ auto sig =
+ KernelSignature::Make(std::move(inputs), std::move(output),
/*is_varargs=*/false);
+ auto array_exec = ListElementArray<InListType, IndexType>;
+ ScalarKernel kernel{std::move(sig), std::move(array_exec)};
+ kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+ kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+ DCHECK_OK(func->AddKernel(std::move(kernel)));
+}
+
+template <typename InListScalarType, typename IndexType>
+void AddListElementScalarKernel(ScalarFunction* func) {
+ auto inputs = {InputType::Scalar(InListScalarType::TypeClass::type_id),
+ InputType::Scalar(IndexType::type_id)};
+ auto output = OutputType{ListValuesType};
+ auto sig =
+ KernelSignature::Make(std::move(inputs), std::move(output),
/*is_varargs=*/false);
+ auto scalar_exec = ListElementScalar<InListScalarType, IndexType>;
+ ScalarKernel kernel{std::move(sig), std::move(scalar_exec)};
+ kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+ kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+ DCHECK_OK(func->AddKernel(std::move(kernel)));
+}
+
+void AddListElementArrayKernels(ScalarFunction* func) {
+ AddListElementArrayKernel<ListType, UInt8Type>(func);
+ AddListElementArrayKernel<ListType, Int8Type>(func);
+ AddListElementArrayKernel<ListType, UInt16Type>(func);
+ AddListElementArrayKernel<ListType, Int16Type>(func);
+ AddListElementArrayKernel<ListType, UInt32Type>(func);
+ AddListElementArrayKernel<ListType, Int32Type>(func);
+ AddListElementArrayKernel<ListType, UInt64Type>(func);
+ AddListElementArrayKernel<ListType, Int64Type>(func);
+
+ AddListElementArrayKernel<LargeListType, UInt8Type>(func);
+ AddListElementArrayKernel<LargeListType, Int8Type>(func);
+ AddListElementArrayKernel<LargeListType, UInt16Type>(func);
+ AddListElementArrayKernel<LargeListType, Int16Type>(func);
+ AddListElementArrayKernel<LargeListType, UInt32Type>(func);
+ AddListElementArrayKernel<LargeListType, Int32Type>(func);
+ AddListElementArrayKernel<LargeListType, UInt64Type>(func);
+ AddListElementArrayKernel<LargeListType, Int64Type>(func);
+
+ AddListElementArrayKernel<FixedSizeListType, UInt8Type>(func);
+ AddListElementArrayKernel<FixedSizeListType, Int8Type>(func);
+ AddListElementArrayKernel<FixedSizeListType, UInt16Type>(func);
+ AddListElementArrayKernel<FixedSizeListType, Int16Type>(func);
+ AddListElementArrayKernel<FixedSizeListType, UInt32Type>(func);
+ AddListElementArrayKernel<FixedSizeListType, Int32Type>(func);
+ AddListElementArrayKernel<FixedSizeListType, UInt64Type>(func);
+ AddListElementArrayKernel<FixedSizeListType, Int64Type>(func);
+}
+
+void AddListElementScalarKernels(ScalarFunction* func) {
+ AddListElementScalarKernel<ListScalar, UInt8Type>(func);
+ AddListElementScalarKernel<ListScalar, Int8Type>(func);
+ AddListElementScalarKernel<ListScalar, UInt16Type>(func);
+ AddListElementScalarKernel<ListScalar, Int16Type>(func);
+ AddListElementScalarKernel<ListScalar, UInt32Type>(func);
+ AddListElementScalarKernel<ListScalar, Int32Type>(func);
+ AddListElementScalarKernel<ListScalar, UInt64Type>(func);
+ AddListElementScalarKernel<ListScalar, Int64Type>(func);
+
+ AddListElementScalarKernel<LargeListScalar, UInt8Type>(func);
+ AddListElementScalarKernel<LargeListScalar, Int8Type>(func);
+ AddListElementScalarKernel<LargeListScalar, UInt16Type>(func);
+ AddListElementScalarKernel<LargeListScalar, Int16Type>(func);
+ AddListElementScalarKernel<LargeListScalar, UInt32Type>(func);
+ AddListElementScalarKernel<LargeListScalar, Int32Type>(func);
+ AddListElementScalarKernel<LargeListScalar, UInt64Type>(func);
+ AddListElementScalarKernel<LargeListScalar, Int64Type>(func);
+
+ AddListElementScalarKernel<FixedSizeListScalar, UInt8Type>(func);
+ AddListElementScalarKernel<FixedSizeListScalar, Int8Type>(func);
+ AddListElementScalarKernel<FixedSizeListScalar, UInt16Type>(func);
+ AddListElementScalarKernel<FixedSizeListScalar, Int16Type>(func);
+ AddListElementScalarKernel<FixedSizeListScalar, UInt32Type>(func);
+ AddListElementScalarKernel<FixedSizeListScalar, Int32Type>(func);
+ AddListElementScalarKernel<FixedSizeListScalar, UInt64Type>(func);
+ AddListElementScalarKernel<FixedSizeListScalar, Int64Type>(func);
Review comment:
I think we can do something like this then:
```cpp
void AddListElementScalarKernels(ScalarFunction* func) {
for (const auto list_type_id : {Type::LIST, Type::LARGE_LIST,
Type::FIXED_SIZE_LIST}) {
for (const auto& index_type : IntTypes()) {
auto inputs = {InputType::Scalar(list_type_id),
InputType::Scalar(index_type)};
auto output = OutputType{ListValuesType};
auto sig =
KernelSignature::Make(std::move(inputs), std::move(output),
/*is_varargs=*/false);
auto scalar_exec = GenerateInteger<ListElementScalar, void>(index_type);
ScalarKernel kernel{std::move(sig), std::move(scalar_exec)};
kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
DCHECK_OK(func->AddKernel(std::move(kernel)));
}
}
}
```
This will register all the kernels in one go and save us some templating,
and is more in line with what we do when we generate a kernel for many
different types.
GenerateInteger comes from codegen_internal.h:
https://github.com/apache/arrow/blob/e5f3e04b4b80c9b9c53f1f0f71f39d9f8308dced/cpp/src/arrow/compute/kernels/codegen_internal.h#L1018-L1022
IntTypes comes from type.h:
https://github.com/apache/arrow/blob/e5f3e04b4b80c9b9c53f1f0f71f39d9f8308dced/cpp/src/arrow/type.h#L1966-L1967
The kernel itself would look something like this:
```cpp
template <typename, typename IndexType>
Status ListElementScalar(KernelContext* /*ctx*/, const ExecBatch& batch,
Datum* out) {
using IndexScalarType = typename TypeTraits<IndexType>::ScalarType;
const auto& index_scalar = batch[1].scalar_as<IndexScalarType>();
if (ARROW_PREDICT_FALSE(!index_scalar.is_valid)) {
return Status::Invalid("Index must not be null");
}
// note the below - no need to cast to the concrete scalar type, the base
class is enough
const auto& list_scalar = batch[0].scalar_as<BaseListScalar>();
// ...snip...
return Status::OK();
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]