jorisvandenbossche commented on code in PR #14495:
URL: https://github.com/apache/arrow/pull/14495#discussion_r1004539770


##########
cpp/src/arrow/compute/kernels/scalar_nested.cc:
##########
@@ -269,14 +312,34 @@ struct StructFieldFunctor {
   }
 };
 
+Result<const DataType*> RecursiveResolveStructFieldType(const FieldRef& 
field_ref,

Review Comment:
   This could maybe also be replaced or simplified with one of the existing 
FieldRef / FieldPath utilities?



##########
python/pyarrow/tests/test_compute.py:
##########
@@ -2691,16 +2691,28 @@ def test_struct_fields_options():
     b = pa.array(["bar", None, ""])
     c = pa.StructArray.from_arrays([a, b], ["a", "b"])
     arr = pa.StructArray.from_arrays([a, c], ["a", "c"])
-
-    assert pc.struct_field(arr,
-                           indices=[1, 1]) == pa.array(["bar", None, ""])
-    assert pc.struct_field(arr, [1, 1]) == pa.array(["bar", None, ""])
-    assert pc.struct_field(arr, [0]) == pa.array([4, 5, 6], type=pa.int64())
+    assert pc.struct_field(arr, '.c.b', is_dot_path=True) == b
+    assert pc.struct_field(arr, '.a', is_dot_path=True) == a
+    assert pc.struct_field(arr, 'a') == a
+    assert pc.struct_field(arr, indices=[1, 1]) == b
+    assert pc.struct_field(arr, [1, 1]) == b

Review Comment:
   I think it should also be possible to pass the names as a list? (`['b', 
'c']`)



##########
cpp/src/arrow/compute/kernels/scalar_nested.cc:
##########
@@ -252,6 +266,35 @@ struct StructFieldFunctor {
     return Status::OK();
   }
 
+  static Result<std::shared_ptr<Array>> ApplyFieldRef(KernelContext* ctx,
+                                                      const FieldRef& 
field_ref,
+                                                      std::shared_ptr<Array> 
current) {
+    if (current->type_id() != Type::STRUCT) {
+      return Status::Invalid("Not a StructArray: ", current->ToString(),
+                             "\nMaybe a bad FieldRef? ", field_ref.ToString());
+    }
+
+    if (field_ref.IsName()) {
+      const auto& array = checked_cast<const StructArray&>(*current);
+      current = array.GetFieldByName(*field_ref.name());
+      if (current == nullptr) {
+        return Status::Invalid("Field not found in struct: '", 
*field_ref.name(), "'");
+      }
+    } else if (field_ref.IsFieldPath()) {
+      for (const auto& idx : field_ref.field_path()->indices()) {
+        ARROW_RETURN_NOT_OK(CheckIndex(idx, *current->type()));
+        const auto& array = checked_cast<const StructArray&>(*current);
+        ARROW_ASSIGN_OR_RAISE(current, array.GetFlattenedField(idx, 
ctx->memory_pool()));
+      }

Review Comment:
   There is a `FieldPath::Get()` for arrays, could that be used here instead of 
this loop?



##########
cpp/src/arrow/compute/api_scalar.cc:
##########
@@ -560,8 +560,12 @@ StrptimeOptions::StrptimeOptions(std::string format, 
TimeUnit::type unit,
 StrptimeOptions::StrptimeOptions() : StrptimeOptions("", TimeUnit::MICRO, 
false) {}
 constexpr char StrptimeOptions::kTypeName[];
 
+StructFieldOptions::StructFieldOptions(FieldRef field_ref)
+    : FunctionOptions(internal::kStructFieldOptionsType), field_ref(field_ref) 
{}
 StructFieldOptions::StructFieldOptions(std::vector<int> indices)
-    : FunctionOptions(internal::kStructFieldOptionsType), 
indices(std::move(indices)) {}
+    : StructFieldOptions(FieldRef(FieldPath(std::move(indices)))) {}

Review Comment:
   Should we set both FieldRef and indices here? (for backwards compatibility 
of the indices field in the options class)



##########
cpp/src/arrow/compute/kernels/scalar_nested.cc:
##########
@@ -252,6 +266,35 @@ struct StructFieldFunctor {
     return Status::OK();
   }
 
+  static Result<std::shared_ptr<Array>> ApplyFieldRef(KernelContext* ctx,
+                                                      const FieldRef& 
field_ref,
+                                                      std::shared_ptr<Array> 
current) {
+    if (current->type_id() != Type::STRUCT) {
+      return Status::Invalid("Not a StructArray: ", current->ToString(),
+                             "\nMaybe a bad FieldRef? ", field_ref.ToString());
+    }
+
+    if (field_ref.IsName()) {
+      const auto& array = checked_cast<const StructArray&>(*current);
+      current = array.GetFieldByName(*field_ref.name());
+      if (current == nullptr) {
+        return Status::Invalid("Field not found in struct: '", 
*field_ref.name(), "'");
+      }
+    } else if (field_ref.IsFieldPath()) {
+      for (const auto& idx : field_ref.field_path()->indices()) {
+        ARROW_RETURN_NOT_OK(CheckIndex(idx, *current->type()));
+        const auto& array = checked_cast<const StructArray&>(*current);
+        ARROW_ASSIGN_OR_RAISE(current, array.GetFlattenedField(idx, 
ctx->memory_pool()));
+      }
+    } else {
+      DCHECK(field_ref.IsNested());
+      for (const auto& ref : *field_ref.nested_refs()) {
+        ARROW_ASSIGN_OR_RAISE(current, ApplyFieldRef(ctx, ref, current));
+      }

Review Comment:
   Possible alternative: convert the nested field ref to a FieldPath (eg with 
FieldRef::FindOne?) before the code to handle a FieldPath, and then it can use 
the same code.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to