jorisvandenbossche commented on code in PR #14495:
URL: https://github.com/apache/arrow/pull/14495#discussion_r1016318695
##########
cpp/src/arrow/compute/kernels/scalar_nested_test.cc:
##########
@@ -124,6 +124,13 @@ TEST(TestScalarNested, StructField) {
StructFieldOptions invalid2({2, 4});
StructFieldOptions invalid3({3});
StructFieldOptions invalid4({0, 1});
+
+ // Test using FieldRefs
+ StructFieldOptions extract0_field_ref_path(FieldRef(FieldPath({0})));
+ StructFieldOptions extract0_field_ref_name(FieldRef("a"));
+ ASSERT_OK_AND_ASSIGN(auto dotted_path_ref, FieldRef::FromDotPath(".c.d"));
+ StructFieldOptions extract20_field_ref_nest(dotted_path_ref);
Review Comment:
```suggestion
StructFieldOptions extract20_field_ref_nest(FieldRef::FromDotPath(".c.d"));
```
##########
cpp/src/arrow/compute/kernels/scalar_nested_test.cc:
##########
@@ -141,16 +148,25 @@ TEST(TestScalarNested, StructField) {
&extract0);
CheckScalar("struct_field", {arr}, ArrayFromJSON(int64(), "[10, 11, 12,
null]"),
&extract20);
+
+ CheckScalar("struct_field", {arr}, ArrayFromJSON(int32(), "[1, null, 3,
null]"),
+ &extract0_field_ref_path);
+ CheckScalar("struct_field", {arr}, ArrayFromJSON(int32(), "[1, null, 3,
null]"),
+ &extract0_field_ref_name);
+ CheckScalar("struct_field", {arr}, ArrayFromJSON(int64(), "[10, 11, 12,
null]"),
+ &extract20_field_ref_nest);
+
EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
::testing::HasSubstr("out-of-bounds field
reference"),
CallFunction("struct_field", {arr},
&invalid1));
EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
- ::testing::HasSubstr("out-of-bounds field
reference"),
+ ::testing::HasSubstr("No match for
FieldRef"),
Review Comment:
The original error seems more informative here. Would there be a way to
preserve that? Although I suppose that would need to be done in
`FieldRef::FindOne`?
##########
cpp/src/arrow/compute/kernels/scalar_nested_test.cc:
##########
@@ -141,16 +148,25 @@ TEST(TestScalarNested, StructField) {
&extract0);
CheckScalar("struct_field", {arr}, ArrayFromJSON(int64(), "[10, 11, 12,
null]"),
&extract20);
+
+ CheckScalar("struct_field", {arr}, ArrayFromJSON(int32(), "[1, null, 3,
null]"),
+ &extract0_field_ref_path);
+ CheckScalar("struct_field", {arr}, ArrayFromJSON(int32(), "[1, null, 3,
null]"),
+ &extract0_field_ref_name);
+ CheckScalar("struct_field", {arr}, ArrayFromJSON(int64(), "[10, 11, 12,
null]"),
+ &extract20_field_ref_nest);
+
EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
::testing::HasSubstr("out-of-bounds field
reference"),
CallFunction("struct_field", {arr},
&invalid1));
EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
- ::testing::HasSubstr("out-of-bounds field
reference"),
+ ::testing::HasSubstr("No match for
FieldRef"),
CallFunction("struct_field", {arr},
&invalid2));
EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
::testing::HasSubstr("out-of-bounds field
reference"),
CallFunction("struct_field", {arr},
&invalid3));
- EXPECT_RAISES_WITH_MESSAGE_THAT(TypeError, ::testing::HasSubstr("cannot
subscript"),
+ EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+ ::testing::HasSubstr("No match for
FieldRef"),
Review Comment:
Similarly here
##########
python/pyarrow/_compute.pyx:
##########
@@ -1322,7 +1322,39 @@ class MakeStructOptions(_MakeStructOptions):
cdef class _StructFieldOptions(FunctionOptions):
def _set_options(self, indices):
- self.wrapped.reset(new CStructFieldOptions(indices))
+ cdef:
+ CFieldRef field_ref
+ const CFieldRef* field_ref_ptr
+
+ # List[str]/List[bytes] converted to '.a.dotted.path'
+ if isinstance(indices, list) and len(indices):
+ if isinstance(indices[0], str):
+ indices = '.' + '.'.join(indices)
+ elif isinstance(indices[0], bytes):
+ indices = b'.' + b'.'.join(indices)
Review Comment:
This disallows doing a mix of string and int? Alternatively, I would maybe
avoid converting a list to a DottedPath, but rather add a generic list handling
case.
More in general, I think we should align this more with the existing method
to create a FieldRef in Python/cython (eg `FieldRef._nested_field`
constructor). If we have a list input here, we can just pass it through, and
then handle the resulting Expression as you already do here.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]