lidavidm commented on a change in pull request #11159:
URL: https://github.com/apache/arrow/pull/11159#discussion_r710051474



##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -62,6 +62,57 @@ const FunctionDoc list_value_length_doc{
      "Null values emit a null in the output."),
     {"lists"}};
 
+template <typename Type, typename IndexType>
+Status ListElement(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  using ListArrayType = typename TypeTraits<Type>::ArrayType;
+  using IndexScalarType = typename TypeTraits<IndexType>::ScalarType;
+  ListArrayType list_array(batch[0].array());
+  const auto& index_scalar = batch[1].scalar_as<IndexScalarType>();
+  auto index = index_scalar.value;
+  if (ARROW_PREDICT_FALSE(index < 0)) {
+    return Status::Invalid("Index ", index,
+                           " is out of bounds: should be greater than or equal 
to 0");
+  }
+  std::unique_ptr<ArrayBuilder> builder;
+  RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), list_array.value_type(), 
&builder));
+  for (int i = 0; i < list_array.length(); ++i) {
+    if (list_array.IsNull(i)) {
+      RETURN_NOT_OK(builder->AppendNull());
+      continue;
+    }
+    std::shared_ptr<arrow::Array> value_array = list_array.value_slice(i);
+    auto len = value_array->length();
+    if (ARROW_PREDICT_FALSE(index > len)) {
+      return Status::Invalid("Index ", index, " is out of bounds: should be in 
[0, ", len,
+                             "]");
+    }
+    ARROW_ASSIGN_OR_RAISE(auto scalar, (value_array->GetScalar(index)));
+    RETURN_NOT_OK(builder->AppendScalar(*scalar));

Review comment:
       You may actually prefer AppendArraySlice which lets you skip allocating 
a scalar entirely: 
https://github.com/apache/arrow/blob/012248a7aca9373a39d5ac8ce9e496b8df0f10e6/cpp/src/arrow/array/builder_base.h#L126-L132

##########
File path: cpp/src/arrow/compute/kernels/codegen_internal.cc
##########
@@ -194,6 +194,11 @@ Result<ValueDescr> FirstType(KernelContext*, const 
std::vector<ValueDescr>& desc
   return result;
 }
 
+Result<ValueDescr> ListValuesType(KernelContext*, const 
std::vector<ValueDescr>& args) {
+  const auto& list_type = checked_cast<const BaseListType&>(*args[0].type);
+  return ValueDescr::Array(list_type.value_type());

Review comment:
       It might be good to use GetBroadcastShape as FirstType does above so 
that this can handle all-scalar inputs too.

##########
File path: docs/source/cpp/compute.rst
##########
@@ -1474,13 +1474,15 @@ value, but smaller than nulls.
 Structural transforms
 ~~~~~~~~~~~~~~~~~~~~~
 
-+--------------------------+------------+--------------------+---------------------+---------+
-| Function name            | Arity      | Input types        | Output type     
    | Notes   |
-+==========================+============+====================+=====================+=========+
-| list_flatten             | Unary      | List-like          | List value type 
    | \(1)    |
-+--------------------------+------------+--------------------+---------------------+---------+
-| list_parent_indices      | Unary      | List-like          | Int32 or Int64  
    | \(2)    |
-+--------------------------+------------+--------------------+---------------------+---------+
++--------------------------+------------+--------------------------+---------------------+---------+
+| Function name            | Arity      | Input types              | Output 
type         | Notes   |
++==========================+============+==========================+=====================+=========+
+| list_flatten             | Unary      | List-like                | List 
value type     | \(1)    |
++--------------------------+------------+--------------------------+---------------------+---------+
+| list_parent_indices      | Unary      | List-like                | Int32 or 
Int64      | \(2)    |
++--------------------------+------------+--------------------------+---------------------+---------+
+| list_element             | Binary     | List-like, Int32 or Int64| List 
value type     | \(3)    |

Review comment:
       Also try to keep things in alphabetical order, and reformat the table as 
needed (Input types could have one more column of padding added to it).

##########
File path: cpp/src/arrow/compute/kernels/scalar_nested_test.cc
##########
@@ -39,6 +39,46 @@ TEST(TestScalarNested, ListValueLength) {
   }
 }
 
+TEST(TestScalarNested, ListElement) {
+  for (auto ty : {list(int16()), fixed_size_list(uint8(), 2)}) {
+    auto input = ArrayFromJSON(ty, "[[8, 5], [74, 92], [7, 4]]");
+    auto out_ty = ty->id() == Type::LIST ? int16() : uint8();
+    auto expected = ArrayFromJSON(out_ty, "[5, 92, 4]");
+    auto index = ScalarFromJSON(int32(), "1");
+    CheckVectorBinary("list_element", input, Datum(index), expected);

Review comment:
       We don't need CheckVectorBinary anymore since this is a scalar kernel.

##########
File path: cpp/src/arrow/compute/kernels/test_util.cc
##########
@@ -190,6 +190,13 @@ void CheckVectorUnary(std::string func_name, Datum input, 
Datum expected,
   AssertDatumsEqual(expected, actual, /*verbose=*/true);
 }
 
+void CheckVectorBinary(std::string func_name, Datum v1, Datum v2, Datum 
expected,
+                       const FunctionOptions* options) {

Review comment:
       This is a scalar kernel, no need for CheckVectorBinary.

##########
File path: cpp/src/arrow/compute/kernels/scalar_nested_test.cc
##########
@@ -39,6 +39,46 @@ TEST(TestScalarNested, ListValueLength) {
   }
 }
 
+TEST(TestScalarNested, ListElement) {
+  for (auto ty : {list(int16()), fixed_size_list(uint8(), 2)}) {
+    auto input = ArrayFromJSON(ty, "[[8, 5], [74, 92], [7, 4]]");
+    auto out_ty = ty->id() == Type::LIST ? int16() : uint8();
+    auto expected = ArrayFromJSON(out_ty, "[5, 92, 4]");
+    auto index = ScalarFromJSON(int32(), "1");
+    CheckVectorBinary("list_element", input, Datum(index), expected);
+  }
+
+  for (auto ty : {fixed_size_list(float32(), 2), large_list(float64())}) {
+    auto input = ArrayFromJSON(ty, "[[8.0, 5.0], [74.0, 92.0], [7.0, 4.0]]");
+    auto out_ty = ty->id() == Type::FIXED_SIZE_LIST ? float32() : float64();
+    auto expected = ArrayFromJSON(out_ty, "[8, 74.0, 7.0]");
+    auto index = ScalarFromJSON(int64(), "0");
+    CheckVectorBinary("list_element", input, Datum(index), expected);
+  }
+
+  // Construct a list with a non-empty null slot
+  auto input = ArrayFromJSON(list(int16()), "[[0, null, 1], [0, 0], [2, 3], 
[4, 5]]");
+  TweakValidityBit(input, 1, false);
+  auto expected = ArrayFromJSON(int16(), "[null, 0, 3, 5]");
+  auto index = ScalarFromJSON(int32(), "1");
+  CheckVectorBinary("list_element", input, Datum(index), expected);
+}
+
+TEST(TestScalarNested, ListElementChunkedArray) {

Review comment:
       CheckScalar will do this for you. It'll also check scalar inputs, which 
you may want to handle.

##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -62,6 +62,57 @@ const FunctionDoc list_value_length_doc{
      "Null values emit a null in the output."),
     {"lists"}};
 
+template <typename Type, typename IndexType>
+Status ListElement(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  using ListArrayType = typename TypeTraits<Type>::ArrayType;
+  using IndexScalarType = typename TypeTraits<IndexType>::ScalarType;
+  ListArrayType list_array(batch[0].array());
+  const auto& index_scalar = batch[1].scalar_as<IndexScalarType>();
+  auto index = index_scalar.value;
+  if (ARROW_PREDICT_FALSE(index < 0)) {
+    return Status::Invalid("Index ", index,
+                           " is out of bounds: should be greater than or equal 
to 0");
+  }
+  std::unique_ptr<ArrayBuilder> builder;
+  RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), list_array.value_type(), 
&builder));
+  for (int i = 0; i < list_array.length(); ++i) {
+    if (list_array.IsNull(i)) {
+      RETURN_NOT_OK(builder->AppendNull());
+      continue;
+    }
+    std::shared_ptr<arrow::Array> value_array = list_array.value_slice(i);
+    auto len = value_array->length();
+    if (ARROW_PREDICT_FALSE(index > len)) {
+      return Status::Invalid("Index ", index, " is out of bounds: should be in 
[0, ", len,
+                             "]");
+    }
+    ARROW_ASSIGN_OR_RAISE(auto scalar, (value_array->GetScalar(index)));
+    RETURN_NOT_OK(builder->AppendScalar(*scalar));
+  }
+  ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
+  out->value = result->data();
+  return Status::OK();
+}
+
+template <typename InListType, typename IndexType>
+void AddListElement(ScalarFunction* func) {

Review comment:
       nit: AddListElementKernel?

##########
File path: cpp/src/arrow/compute/kernels/scalar_nested_test.cc
##########
@@ -39,6 +39,46 @@ TEST(TestScalarNested, ListValueLength) {
   }
 }
 
+TEST(TestScalarNested, ListElement) {
+  for (auto ty : {list(int16()), fixed_size_list(uint8(), 2)}) {
+    auto input = ArrayFromJSON(ty, "[[8, 5], [74, 92], [7, 4]]");
+    auto out_ty = ty->id() == Type::LIST ? int16() : uint8();

Review comment:
       Why not just keep the element type consistent?

##########
File path: cpp/src/arrow/compute/kernels/scalar_nested_test.cc
##########
@@ -39,6 +39,46 @@ TEST(TestScalarNested, ListValueLength) {
   }
 }
 
+TEST(TestScalarNested, ListElement) {
+  for (auto ty : {list(int16()), fixed_size_list(uint8(), 2)}) {

Review comment:
       Why not test large list here as well?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to