lidavidm commented on a change in pull request #12724:
URL: https://github.com/apache/arrow/pull/12724#discussion_r837407292
##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
##########
@@ -153,27 +153,32 @@ void AddListCast(CastFunction* func) {
struct CastStruct {
static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
const CastOptions& options = CastState::Get(ctx);
- const StructType& in_type = checked_cast<const
StructType&>(*batch[0].type());
- const StructType& out_type = checked_cast<const StructType&>(*out->type());
- const auto in_field_count = in_type.num_fields();
-
- if (in_field_count != out_type.num_fields()) {
- return Status::TypeError("struct field sizes do not match: ",
in_type.ToString(),
- " ", out_type.ToString());
- }
-
- for (int i = 0; i < in_field_count; ++i) {
- const auto in_field = in_type.field(i);
- const auto out_field = out_type.field(i);
- if (in_field->name() != out_field->name()) {
- return Status::TypeError("struct field names do not match: ",
in_type.ToString(),
- " ", out_type.ToString());
+ const auto& in_type = checked_cast<const StructType&>(*batch[0].type());
+ const auto& out_type = checked_cast<const StructType&>(*out->type());
+ const int in_field_count = in_type.num_fields();
+ const int out_field_count = out_type.num_fields();
+
+ std::vector<int> fields_to_select(out_field_count, -1);
+
+ int out_field_index = 0;
+ for (int in_field_index = 0;
+ in_field_index < in_field_count && out_field_index < out_field_count;
+ ++in_field_index) {
+ const auto in_field = in_type.field(in_field_index);
+ const auto out_field = out_type.field(out_field_index);
Review comment:
```suggestion
const auto& in_field = in_type.field(in_field_index);
const auto& out_field = out_type.field(out_field_index);
```
##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
##########
@@ -204,9 +210,11 @@ struct CastStruct {
in_array.offset, in_array.length));
}
- for (int i = 0; i < in_field_count; ++i) {
- auto values = in_array.child_data[i]->Slice(in_array.offset,
in_array.length);
- auto target_type = out->type()->field(i)->type();
+ out_field_index = 0;
+ for (int field_index : fields_to_select) {
+ auto values =
+ in_array.child_data[field_index]->Slice(in_array.offset,
in_array.length);
+ auto target_type = out->type()->field(out_field_index++)->type();
Review comment:
```suggestion
const auto& values =
in_array.child_data[field_index]->Slice(in_array.offset,
in_array.length);
const auto& target_type =
out->type()->field(out_field_index++)->type();
```
##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc
##########
@@ -2245,8 +2245,182 @@ static void CheckStructToStruct(
}
}
-TEST(Cast, StructToSameSizedAndNamedStruct) {
- CheckStructToStruct({int32(), float32(), int64()});
+static void CheckStructToStructSubset(
+ const std::vector<std::shared_ptr<DataType>>& value_types) {
+ for (const auto& src_value_type : value_types) {
+ ARROW_SCOPED_TRACE("From type: ", src_value_type->ToString());
+ for (const auto& dest_value_type : value_types) {
+ ARROW_SCOPED_TRACE("To type: ", dest_value_type->ToString());
+
+ std::vector<std::string> field_names = {"a", "b", "c", "d", "e"};
+
+ std::shared_ptr<Array> a1, b1, c1, d1, e1;
+ a1 = ArrayFromJSON(src_value_type, "[1, 2, 5]");
+ b1 = ArrayFromJSON(src_value_type, "[3, 4, 7]");
+ c1 = ArrayFromJSON(src_value_type, "[9, 11, 44]");
+ d1 = ArrayFromJSON(src_value_type, "[6, 51, 49]");
+ e1 = ArrayFromJSON(src_value_type, "[19, 17, 74]");
+
+ std::shared_ptr<Array> a2, b2, c2, d2, e2;
+ a2 = ArrayFromJSON(dest_value_type, "[1, 2, 5]");
+ b2 = ArrayFromJSON(dest_value_type, "[3, 4, 7]");
+ c2 = ArrayFromJSON(dest_value_type, "[9, 11, 44]");
+ d2 = ArrayFromJSON(dest_value_type, "[6, 51, 49]");
+ e2 = ArrayFromJSON(dest_value_type, "[19, 17, 74]");
+
+ ASSERT_OK_AND_ASSIGN(auto src,
+ StructArray::Make({a1, b1, c1, d1, e1},
field_names));
+ ASSERT_OK_AND_ASSIGN(auto dest1,
+ StructArray::Make({a2},
std::vector<std::string>{"a"}));
+ CheckCast(src, dest1);
+
+ ASSERT_OK_AND_ASSIGN(
+ auto dest2, StructArray::Make({b2, c2},
std::vector<std::string>{"b", "c"}));
+ CheckCast(src, dest2);
+
+ ASSERT_OK_AND_ASSIGN(
+ auto dest3,
+ StructArray::Make({c2, d2, e2}, std::vector<std::string>{"c", "d",
"e"}));
+ CheckCast(src, dest3);
+
+ ASSERT_OK_AND_ASSIGN(
+ auto dest4, StructArray::Make({a2, b2, c2, e2},
+ std::vector<std::string>{"a", "b",
"c", "e"}));
+ CheckCast(src, dest4);
+
+ ASSERT_OK_AND_ASSIGN(
+ auto dest5, StructArray::Make({a2, b2, c2, d2, e2}, {"a", "b", "c",
"d", "e"}));
+ CheckCast(src, dest5);
+
+ // field does not exist
+ const auto dest6 = arrow::struct_({std::make_shared<Field>("a", int8()),
+ std::make_shared<Field>("d", int16()),
+ std::make_shared<Field>("f",
int64())});
+ const auto options6 = CastOptions::Safe(dest6);
+ EXPECT_RAISES_WITH_MESSAGE_THAT(
+ TypeError,
+ ::testing::HasSubstr("struct fields don't match or are in the wrong
order"),
+ Cast(src, options6));
+
+ // fields in wrong order
+ const auto dest7 = arrow::struct_({std::make_shared<Field>("a", int8()),
+ std::make_shared<Field>("c", int16()),
+ std::make_shared<Field>("b",
int64())});
+ const auto options7 = CastOptions::Safe(dest7);
+ EXPECT_RAISES_WITH_MESSAGE_THAT(
+ TypeError,
+ ::testing::HasSubstr("struct fields don't match or are in the wrong
order"),
+ Cast(src, options7));
+
+ // duplicate field names
+ const auto dest8 = arrow::struct_(
+ {std::make_shared<Field>("a", int8()), std::make_shared<Field>("c",
int16()),
+ std::make_shared<Field>("d", int32()), std::make_shared<Field>("a",
int64())});
+ const auto options8 = CastOptions::Safe(dest8);
+ EXPECT_RAISES_WITH_MESSAGE_THAT(
+ TypeError,
+ ::testing::HasSubstr("struct fields don't match or are in the wrong
order"),
+ Cast(src, options8));
+ }
+ }
+}
+
+static void CheckStructToStructSubsetWithNulls(
Review comment:
I would expect "with nulls" to have (some) children with nulls
##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc
##########
@@ -2281,52 +2454,102 @@ TEST(Cast, StructToDifferentSizeStruct) {
EXPECT_RAISES_WITH_MESSAGE_THAT(
TypeError,
- ::testing::HasSubstr("Type error: struct field sizes do not match:
struct<a: int8, "
- "b: int8> struct<a: int8, b: int8, c: int8>"),
+ ::testing::HasSubstr("struct fields don't match or are in the wrong
order"),
Cast(src, options));
}
-TEST(Cast, StructToSameSizedButDifferentNullabilityStruct) {
- // OK to go from not-nullable to nullable...
- std::vector<std::shared_ptr<Field>> fields1 = {
- std::make_shared<Field>("a", int8(), false),
- std::make_shared<Field>("b", int8(), false)};
- std::shared_ptr<Array> a1, b1;
- a1 = ArrayFromJSON(int8(), "[1, 2]");
- b1 = ArrayFromJSON(int8(), "[3, 4]");
- ASSERT_OK_AND_ASSIGN(auto src1, StructArray::Make({a1, b1}, fields1));
-
- std::vector<std::shared_ptr<Field>> fields2 = {
- std::make_shared<Field>("a", int8(), true),
- std::make_shared<Field>("b", int8(), true)};
- std::shared_ptr<Array> a2, b2;
- a2 = ArrayFromJSON(int8(), "[1, 2]");
- b2 = ArrayFromJSON(int8(), "[3, 4]");
- ASSERT_OK_AND_ASSIGN(auto dest1, StructArray::Make({a2, b2}, fields2));
-
- CheckCast(src1, dest1);
-
- // But not the other way around
- std::vector<std::shared_ptr<Field>> fields3 = {
- std::make_shared<Field>("a", int8(), true),
- std::make_shared<Field>("b", int8(), true)};
- std::shared_ptr<Array> a3, b3;
- a3 = ArrayFromJSON(int8(), "[1, null]");
- b3 = ArrayFromJSON(int8(), "[3, 4]");
- ASSERT_OK_AND_ASSIGN(auto src2, StructArray::Make({a3, b3}, fields3));
-
- std::vector<std::shared_ptr<Field>> fields4 = {
- std::make_shared<Field>("a", int8(), false),
- std::make_shared<Field>("b", int8(), false)};
- const auto dest2 = arrow::struct_(fields4);
- const auto options = CastOptions::Safe(dest2);
-
- EXPECT_RAISES_WITH_MESSAGE_THAT(
- TypeError,
- ::testing::HasSubstr(
- "Type error: cannot cast nullable struct to non-nullable "
- "struct: struct<a: int8, b: int8> struct<a: int8 not null, b: int8
not null>"),
- Cast(src2, options));
+TEST(Cast, StructToDifferentNullabilityStruct) {
+ {
+ // OK to go from non-nullable to nullable...
+ ARROW_SCOPED_TRACE("From non-nullable to nullable");
Review comment:
nit, but this is really only useful for loops or templated tests, or
when the line number is otherwise insufficient. For something like this it
doesn't bring much.
##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
##########
@@ -153,27 +153,32 @@ void AddListCast(CastFunction* func) {
struct CastStruct {
static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
const CastOptions& options = CastState::Get(ctx);
- const StructType& in_type = checked_cast<const
StructType&>(*batch[0].type());
- const StructType& out_type = checked_cast<const StructType&>(*out->type());
- const auto in_field_count = in_type.num_fields();
-
- if (in_field_count != out_type.num_fields()) {
- return Status::TypeError("struct field sizes do not match: ",
in_type.ToString(),
- " ", out_type.ToString());
- }
-
- for (int i = 0; i < in_field_count; ++i) {
- const auto in_field = in_type.field(i);
- const auto out_field = out_type.field(i);
- if (in_field->name() != out_field->name()) {
- return Status::TypeError("struct field names do not match: ",
in_type.ToString(),
- " ", out_type.ToString());
+ const auto& in_type = checked_cast<const StructType&>(*batch[0].type());
+ const auto& out_type = checked_cast<const StructType&>(*out->type());
+ const int in_field_count = in_type.num_fields();
+ const int out_field_count = out_type.num_fields();
+
+ std::vector<int> fields_to_select(out_field_count, -1);
+
+ int out_field_index = 0;
+ for (int in_field_index = 0;
+ in_field_index < in_field_count && out_field_index < out_field_count;
+ ++in_field_index) {
+ const auto in_field = in_type.field(in_field_index);
+ const auto out_field = out_type.field(out_field_index);
Review comment:
(nit, to avoid a copy)
##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
##########
@@ -182,9 +187,10 @@ struct CastStruct {
DCHECK(!out_scalar->is_valid);
if (in_scalar.is_valid) {
- for (int i = 0; i < in_field_count; i++) {
- auto values = in_scalar.value[i];
- auto target_type = out->type()->field(i)->type();
+ out_field_index = 0;
+ for (int field_index : fields_to_select) {
+ auto values = in_scalar.value[field_index];
+ auto target_type = out->type()->field(out_field_index++)->type();
Review comment:
```suggestion
const auto& values = in_scalar.value[field_index];
const auto& target_type =
out->type()->field(out_field_index++)->type();
```
##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc
##########
@@ -2245,8 +2245,182 @@ static void CheckStructToStruct(
}
}
-TEST(Cast, StructToSameSizedAndNamedStruct) {
- CheckStructToStruct({int32(), float32(), int64()});
+static void CheckStructToStructSubset(
+ const std::vector<std::shared_ptr<DataType>>& value_types) {
+ for (const auto& src_value_type : value_types) {
+ ARROW_SCOPED_TRACE("From type: ", src_value_type->ToString());
+ for (const auto& dest_value_type : value_types) {
+ ARROW_SCOPED_TRACE("To type: ", dest_value_type->ToString());
+
+ std::vector<std::string> field_names = {"a", "b", "c", "d", "e"};
+
+ std::shared_ptr<Array> a1, b1, c1, d1, e1;
+ a1 = ArrayFromJSON(src_value_type, "[1, 2, 5]");
+ b1 = ArrayFromJSON(src_value_type, "[3, 4, 7]");
+ c1 = ArrayFromJSON(src_value_type, "[9, 11, 44]");
+ d1 = ArrayFromJSON(src_value_type, "[6, 51, 49]");
+ e1 = ArrayFromJSON(src_value_type, "[19, 17, 74]");
+
+ std::shared_ptr<Array> a2, b2, c2, d2, e2;
+ a2 = ArrayFromJSON(dest_value_type, "[1, 2, 5]");
+ b2 = ArrayFromJSON(dest_value_type, "[3, 4, 7]");
+ c2 = ArrayFromJSON(dest_value_type, "[9, 11, 44]");
+ d2 = ArrayFromJSON(dest_value_type, "[6, 51, 49]");
+ e2 = ArrayFromJSON(dest_value_type, "[19, 17, 74]");
+
+ ASSERT_OK_AND_ASSIGN(auto src,
+ StructArray::Make({a1, b1, c1, d1, e1},
field_names));
+ ASSERT_OK_AND_ASSIGN(auto dest1,
+ StructArray::Make({a2},
std::vector<std::string>{"a"}));
+ CheckCast(src, dest1);
+
+ ASSERT_OK_AND_ASSIGN(
+ auto dest2, StructArray::Make({b2, c2},
std::vector<std::string>{"b", "c"}));
+ CheckCast(src, dest2);
+
+ ASSERT_OK_AND_ASSIGN(
+ auto dest3,
+ StructArray::Make({c2, d2, e2}, std::vector<std::string>{"c", "d",
"e"}));
+ CheckCast(src, dest3);
+
+ ASSERT_OK_AND_ASSIGN(
+ auto dest4, StructArray::Make({a2, b2, c2, e2},
+ std::vector<std::string>{"a", "b",
"c", "e"}));
+ CheckCast(src, dest4);
+
+ ASSERT_OK_AND_ASSIGN(
+ auto dest5, StructArray::Make({a2, b2, c2, d2, e2}, {"a", "b", "c",
"d", "e"}));
+ CheckCast(src, dest5);
+
+ // field does not exist
+ const auto dest6 = arrow::struct_({std::make_shared<Field>("a", int8()),
+ std::make_shared<Field>("d", int16()),
+ std::make_shared<Field>("f",
int64())});
+ const auto options6 = CastOptions::Safe(dest6);
+ EXPECT_RAISES_WITH_MESSAGE_THAT(
+ TypeError,
+ ::testing::HasSubstr("struct fields don't match or are in the wrong
order"),
+ Cast(src, options6));
+
+ // fields in wrong order
+ const auto dest7 = arrow::struct_({std::make_shared<Field>("a", int8()),
+ std::make_shared<Field>("c", int16()),
+ std::make_shared<Field>("b",
int64())});
+ const auto options7 = CastOptions::Safe(dest7);
+ EXPECT_RAISES_WITH_MESSAGE_THAT(
+ TypeError,
+ ::testing::HasSubstr("struct fields don't match or are in the wrong
order"),
+ Cast(src, options7));
+
+ // duplicate field names
Review comment:
What I meant is that I would expect you to be able to cast `struct<a:
int8(), a: int8()>` to `struct<a: int64()>` or to `struct<a: int64(), a:
int64()>` successfully (though this is also a sensible case to have)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]