jorisvandenbossche commented on code in PR #14495:
URL: https://github.com/apache/arrow/pull/14495#discussion_r1016318695


##########
cpp/src/arrow/compute/kernels/scalar_nested_test.cc:
##########
@@ -124,6 +124,13 @@ TEST(TestScalarNested, StructField) {
   StructFieldOptions invalid2({2, 4});
   StructFieldOptions invalid3({3});
   StructFieldOptions invalid4({0, 1});
+
+  // Test using FieldRefs
+  StructFieldOptions extract0_field_ref_path(FieldRef(FieldPath({0})));
+  StructFieldOptions extract0_field_ref_name(FieldRef("a"));
+  ASSERT_OK_AND_ASSIGN(auto dotted_path_ref, FieldRef::FromDotPath(".c.d"));
+  StructFieldOptions extract20_field_ref_nest(dotted_path_ref);

Review Comment:
   ```suggestion
     StructFieldOptions extract20_field_ref_nest(FieldRef::FromDotPath(".c.d"));
   ```



##########
cpp/src/arrow/compute/kernels/scalar_nested_test.cc:
##########
@@ -141,16 +148,25 @@ TEST(TestScalarNested, StructField) {
                 &extract0);
     CheckScalar("struct_field", {arr}, ArrayFromJSON(int64(), "[10, 11, 12, 
null]"),
                 &extract20);
+
+    CheckScalar("struct_field", {arr}, ArrayFromJSON(int32(), "[1, null, 3, 
null]"),
+                &extract0_field_ref_path);
+    CheckScalar("struct_field", {arr}, ArrayFromJSON(int32(), "[1, null, 3, 
null]"),
+                &extract0_field_ref_name);
+    CheckScalar("struct_field", {arr}, ArrayFromJSON(int64(), "[10, 11, 12, 
null]"),
+                &extract20_field_ref_nest);
+
     EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
                                     ::testing::HasSubstr("out-of-bounds field 
reference"),
                                     CallFunction("struct_field", {arr}, 
&invalid1));
     EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
-                                    ::testing::HasSubstr("out-of-bounds field 
reference"),
+                                    ::testing::HasSubstr("No match for 
FieldRef"),

Review Comment:
   The original error seems more informative here. Would there be a way to 
preserve that? Although I suppose that would need to be done in 
`FieldRef::FindOne`?



##########
cpp/src/arrow/compute/kernels/scalar_nested_test.cc:
##########
@@ -141,16 +148,25 @@ TEST(TestScalarNested, StructField) {
                 &extract0);
     CheckScalar("struct_field", {arr}, ArrayFromJSON(int64(), "[10, 11, 12, 
null]"),
                 &extract20);
+
+    CheckScalar("struct_field", {arr}, ArrayFromJSON(int32(), "[1, null, 3, 
null]"),
+                &extract0_field_ref_path);
+    CheckScalar("struct_field", {arr}, ArrayFromJSON(int32(), "[1, null, 3, 
null]"),
+                &extract0_field_ref_name);
+    CheckScalar("struct_field", {arr}, ArrayFromJSON(int64(), "[10, 11, 12, 
null]"),
+                &extract20_field_ref_nest);
+
     EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
                                     ::testing::HasSubstr("out-of-bounds field 
reference"),
                                     CallFunction("struct_field", {arr}, 
&invalid1));
     EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
-                                    ::testing::HasSubstr("out-of-bounds field 
reference"),
+                                    ::testing::HasSubstr("No match for 
FieldRef"),
                                     CallFunction("struct_field", {arr}, 
&invalid2));
     EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
                                     ::testing::HasSubstr("out-of-bounds field 
reference"),
                                     CallFunction("struct_field", {arr}, 
&invalid3));
-    EXPECT_RAISES_WITH_MESSAGE_THAT(TypeError, ::testing::HasSubstr("cannot 
subscript"),
+    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+                                    ::testing::HasSubstr("No match for 
FieldRef"),

Review Comment:
   Similarly here



##########
python/pyarrow/_compute.pyx:
##########
@@ -1322,7 +1322,39 @@ class MakeStructOptions(_MakeStructOptions):
 
 cdef class _StructFieldOptions(FunctionOptions):
     def _set_options(self, indices):
-        self.wrapped.reset(new CStructFieldOptions(indices))
+        cdef:
+            CFieldRef field_ref
+            const CFieldRef* field_ref_ptr
+
+        # List[str]/List[bytes] converted to '.a.dotted.path'
+        if isinstance(indices, list) and len(indices):
+            if isinstance(indices[0], str):
+                indices = '.' + '.'.join(indices)
+            elif isinstance(indices[0], bytes):
+                indices = b'.' + b'.'.join(indices)

Review Comment:
   This disallows doing a mix of string and int? Alternatively, I would maybe 
avoid converting a list to a DottedPath, but rather add a generic list handling 
case. 
   
   More in general, I think we should align this more with the existing method 
to create a FieldRef in Python/cython (eg `FieldRef._nested_field` 
constructor). If we have a list input here, we can just pass it through, and 
then handle the resulting Expression as you already do here.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to