lidavidm commented on a change in pull request #11159:
URL: https://github.com/apache/arrow/pull/11159#discussion_r711589236



##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -80,6 +80,160 @@ const FunctionDoc list_value_length_doc{
      "Null values emit a null in the output."),
     {"lists"}};
 
+template <typename Type, typename IndexType>
+Status ListElementArray(KernelContext* ctx, const ExecBatch& batch, Datum* 
out) {
+  using ListArrayType = typename TypeTraits<Type>::ArrayType;
+  using IndexScalarType = typename TypeTraits<IndexType>::ScalarType;
+  const auto& index_scalar = batch[1].scalar_as<IndexScalarType>();
+  if (ARROW_PREDICT_FALSE(!index_scalar.is_valid)) {
+    return Status::Invalid("Index must not be null");
+  }
+  ListArrayType list_array(batch[0].array());
+  auto index = index_scalar.value;
+  if (ARROW_PREDICT_FALSE(index < 0)) {
+    return Status::Invalid("Index ", index,
+                           " is out of bounds: should be greater than or equal 
to 0");
+  }
+  std::unique_ptr<ArrayBuilder> builder;
+  RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), list_array.value_type(), 
&builder));
+  RETURN_NOT_OK(builder->Reserve(list_array.length()));
+  for (int i = 0; i < list_array.length(); ++i) {
+    if (list_array.IsNull(i)) {
+      RETURN_NOT_OK(builder->AppendNull());
+      continue;
+    }
+    std::shared_ptr<arrow::Array> value_array = list_array.value_slice(i);
+    auto len = value_array->length();
+    if (ARROW_PREDICT_FALSE(index >= static_cast<typename 
IndexType::c_type>(len))) {
+      return Status::Invalid("Index ", index, " is out of bounds: should be in 
[0, ", len,
+                             ")");
+    }
+    RETURN_NOT_OK(builder->AppendArraySlice(*value_array->data(), index, 1));
+  }
+  ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
+  out->value = result->data();
+  return Status::OK();
+}
+
+template <typename InListScalarType, typename IndexType>
+Status ListElementScalar(KernelContext* /*ctx*/, const ExecBatch& batch, 
Datum* out) {
+  using IndexScalarType = typename TypeTraits<IndexType>::ScalarType;
+  const auto& index_scalar = batch[1].scalar_as<IndexScalarType>();
+  if (ARROW_PREDICT_FALSE(!index_scalar.is_valid)) {
+    return Status::Invalid("Index must not be null");
+  }
+  const auto& list_scalar = batch[0].scalar_as<InListScalarType>();
+  if (ARROW_PREDICT_FALSE(!list_scalar.is_valid)) {
+    out->value =
+        MakeNullScalar(checked_cast<const 
BaseListType&>(*batch[0].type()).value_type());
+    return Status::OK();
+  }
+  auto list = list_scalar.value;
+  auto index = index_scalar.value;
+  auto len = list->length();
+  if (ARROW_PREDICT_FALSE(index < 0 ||
+                          index >= static_cast<typename 
IndexType::c_type>(len))) {
+    return Status::Invalid("Index ", index, " is out of bounds: should be in 
[0, ", len,
+                           ")");
+  }
+  ARROW_ASSIGN_OR_RAISE(auto result, list->GetScalar(index));
+  out->value = result;

Review comment:
       ```suggestion
     ARROW_ASSIGN_OR_RAISE(out->value, list->GetScalar(index));
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_nested_test.cc
##########
@@ -43,6 +43,74 @@ TEST(TestScalarNested, ListValueLength) {
                    "[3, null, 3, 3]");
 }
 
+TEST(TestScalarNested, ListElementNonFixedListWithNulls) {
+  auto sample = "[[7, 5, 81], [6, null, 4, 7, 8], [3, 12, 2, 0], [1, 9], 
null]";
+  auto types = {uint8(), int32(), uint64(), float32(), float64()};
+  auto index_types = {uint8(), int8(), uint16(), uint32(), int32(), uint64()};

Review comment:
       FYI, we recently added some helpers you can use instead of having to 
write out this list: 
https://github.com/apache/arrow/blob/7bf5609d359076dc29f72dce665f11c829187122/cpp/src/arrow/type.h#L1966-L1972

##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -80,6 +80,160 @@ const FunctionDoc list_value_length_doc{
      "Null values emit a null in the output."),
     {"lists"}};
 
+template <typename Type, typename IndexType>
+Status ListElementArray(KernelContext* ctx, const ExecBatch& batch, Datum* 
out) {
+  using ListArrayType = typename TypeTraits<Type>::ArrayType;
+  using IndexScalarType = typename TypeTraits<IndexType>::ScalarType;
+  const auto& index_scalar = batch[1].scalar_as<IndexScalarType>();
+  if (ARROW_PREDICT_FALSE(!index_scalar.is_valid)) {
+    return Status::Invalid("Index must not be null");
+  }
+  ListArrayType list_array(batch[0].array());
+  auto index = index_scalar.value;
+  if (ARROW_PREDICT_FALSE(index < 0)) {
+    return Status::Invalid("Index ", index,
+                           " is out of bounds: should be greater than or equal 
to 0");
+  }
+  std::unique_ptr<ArrayBuilder> builder;
+  RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), list_array.value_type(), 
&builder));
+  RETURN_NOT_OK(builder->Reserve(list_array.length()));
+  for (int i = 0; i < list_array.length(); ++i) {
+    if (list_array.IsNull(i)) {
+      RETURN_NOT_OK(builder->AppendNull());
+      continue;
+    }
+    std::shared_ptr<arrow::Array> value_array = list_array.value_slice(i);
+    auto len = value_array->length();
+    if (ARROW_PREDICT_FALSE(index >= static_cast<typename 
IndexType::c_type>(len))) {
+      return Status::Invalid("Index ", index, " is out of bounds: should be in 
[0, ", len,
+                             ")");
+    }
+    RETURN_NOT_OK(builder->AppendArraySlice(*value_array->data(), index, 1));
+  }
+  ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
+  out->value = result->data();
+  return Status::OK();
+}
+
+template <typename InListScalarType, typename IndexType>
+Status ListElementScalar(KernelContext* /*ctx*/, const ExecBatch& batch, 
Datum* out) {
+  using IndexScalarType = typename TypeTraits<IndexType>::ScalarType;
+  const auto& index_scalar = batch[1].scalar_as<IndexScalarType>();
+  if (ARROW_PREDICT_FALSE(!index_scalar.is_valid)) {
+    return Status::Invalid("Index must not be null");
+  }
+  const auto& list_scalar = batch[0].scalar_as<InListScalarType>();
+  if (ARROW_PREDICT_FALSE(!list_scalar.is_valid)) {
+    out->value =
+        MakeNullScalar(checked_cast<const 
BaseListType&>(*batch[0].type()).value_type());
+    return Status::OK();
+  }
+  auto list = list_scalar.value;
+  auto index = index_scalar.value;
+  auto len = list->length();
+  if (ARROW_PREDICT_FALSE(index < 0 ||
+                          index >= static_cast<typename 
IndexType::c_type>(len))) {
+    return Status::Invalid("Index ", index, " is out of bounds: should be in 
[0, ", len,
+                           ")");
+  }
+  ARROW_ASSIGN_OR_RAISE(auto result, list->GetScalar(index));
+  out->value = result;
+  return Status::OK();
+}
+
+template <typename InListType, typename IndexType>
+void AddListElementArrayKernel(ScalarFunction* func) {
+  auto inputs = {InputType::Array(InListType::type_id),
+                 InputType::Scalar(IndexType::type_id)};
+  auto output = OutputType{ListValuesType};
+  auto sig =
+      KernelSignature::Make(std::move(inputs), std::move(output), 
/*is_varargs=*/false);
+  auto array_exec = ListElementArray<InListType, IndexType>;
+  ScalarKernel kernel{std::move(sig), std::move(array_exec)};
+  kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+  DCHECK_OK(func->AddKernel(std::move(kernel)));
+}
+
+template <typename InListScalarType, typename IndexType>
+void AddListElementScalarKernel(ScalarFunction* func) {
+  auto inputs = {InputType::Scalar(InListScalarType::TypeClass::type_id),
+                 InputType::Scalar(IndexType::type_id)};
+  auto output = OutputType{ListValuesType};
+  auto sig =
+      KernelSignature::Make(std::move(inputs), std::move(output), 
/*is_varargs=*/false);
+  auto scalar_exec = ListElementScalar<InListScalarType, IndexType>;
+  ScalarKernel kernel{std::move(sig), std::move(scalar_exec)};
+  kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+  DCHECK_OK(func->AddKernel(std::move(kernel)));
+}
+
+void AddListElementArrayKernels(ScalarFunction* func) {
+  AddListElementArrayKernel<ListType, UInt8Type>(func);
+  AddListElementArrayKernel<ListType, Int8Type>(func);
+  AddListElementArrayKernel<ListType, UInt16Type>(func);
+  AddListElementArrayKernel<ListType, Int16Type>(func);
+  AddListElementArrayKernel<ListType, UInt32Type>(func);
+  AddListElementArrayKernel<ListType, Int32Type>(func);
+  AddListElementArrayKernel<ListType, UInt64Type>(func);
+  AddListElementArrayKernel<ListType, Int64Type>(func);
+
+  AddListElementArrayKernel<LargeListType, UInt8Type>(func);
+  AddListElementArrayKernel<LargeListType, Int8Type>(func);
+  AddListElementArrayKernel<LargeListType, UInt16Type>(func);
+  AddListElementArrayKernel<LargeListType, Int16Type>(func);
+  AddListElementArrayKernel<LargeListType, UInt32Type>(func);
+  AddListElementArrayKernel<LargeListType, Int32Type>(func);
+  AddListElementArrayKernel<LargeListType, UInt64Type>(func);
+  AddListElementArrayKernel<LargeListType, Int64Type>(func);
+
+  AddListElementArrayKernel<FixedSizeListType, UInt8Type>(func);
+  AddListElementArrayKernel<FixedSizeListType, Int8Type>(func);
+  AddListElementArrayKernel<FixedSizeListType, UInt16Type>(func);
+  AddListElementArrayKernel<FixedSizeListType, Int16Type>(func);
+  AddListElementArrayKernel<FixedSizeListType, UInt32Type>(func);
+  AddListElementArrayKernel<FixedSizeListType, Int32Type>(func);
+  AddListElementArrayKernel<FixedSizeListType, UInt64Type>(func);
+  AddListElementArrayKernel<FixedSizeListType, Int64Type>(func);
+}
+
+void AddListElementScalarKernels(ScalarFunction* func) {
+  AddListElementScalarKernel<ListScalar, UInt8Type>(func);
+  AddListElementScalarKernel<ListScalar, Int8Type>(func);
+  AddListElementScalarKernel<ListScalar, UInt16Type>(func);
+  AddListElementScalarKernel<ListScalar, Int16Type>(func);
+  AddListElementScalarKernel<ListScalar, UInt32Type>(func);
+  AddListElementScalarKernel<ListScalar, Int32Type>(func);
+  AddListElementScalarKernel<ListScalar, UInt64Type>(func);
+  AddListElementScalarKernel<ListScalar, Int64Type>(func);
+
+  AddListElementScalarKernel<LargeListScalar, UInt8Type>(func);
+  AddListElementScalarKernel<LargeListScalar, Int8Type>(func);
+  AddListElementScalarKernel<LargeListScalar, UInt16Type>(func);
+  AddListElementScalarKernel<LargeListScalar, Int16Type>(func);
+  AddListElementScalarKernel<LargeListScalar, UInt32Type>(func);
+  AddListElementScalarKernel<LargeListScalar, Int32Type>(func);
+  AddListElementScalarKernel<LargeListScalar, UInt64Type>(func);
+  AddListElementScalarKernel<LargeListScalar, Int64Type>(func);
+
+  AddListElementScalarKernel<FixedSizeListScalar, UInt8Type>(func);
+  AddListElementScalarKernel<FixedSizeListScalar, Int8Type>(func);
+  AddListElementScalarKernel<FixedSizeListScalar, UInt16Type>(func);
+  AddListElementScalarKernel<FixedSizeListScalar, Int16Type>(func);
+  AddListElementScalarKernel<FixedSizeListScalar, UInt32Type>(func);
+  AddListElementScalarKernel<FixedSizeListScalar, Int32Type>(func);
+  AddListElementScalarKernel<FixedSizeListScalar, UInt64Type>(func);
+  AddListElementScalarKernel<FixedSizeListScalar, Int64Type>(func);

Review comment:
       Note that all three scalar types have a common base type, 
BaseListScalar. So you don't need to template this kernel on the list type, you 
can work with all of them identically.

##########
File path: docs/source/cpp/compute.rst
##########
@@ -1509,6 +1511,9 @@ Structural transforms
   are discarded.  Output type is Int32 for List and FixedSizeList, Int64 for
   LargeList.
 
+* \(3) Output is an array of the same length as the input list array. The

Review comment:
       Nit: we usually renumber these notes when we reorder the table (sorry, 
yes, this is kinda tedious)




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to