dinimar commented on code in PR #34537:
URL: https://github.com/apache/arrow/pull/34537#discussion_r1150129085
##########
cpp/src/arrow/type_test.cc:
##########
@@ -379,6 +382,168 @@ TEST(TestFieldPath, Basics) {
FieldPath({s.num_fields() * 2}).Get(s));
}
+TEST(TestFieldPath, GetForTable) {
+ using testing::HasSubstr;
+
+ const int length = 4;
+ auto f0 = field("a", int32());
+ auto f1 = field("b", int32());
+ auto f2 = field("c", struct_({f1}));
+ auto f3 = field("d", struct_({f0, f2}));
+ auto table_schema = schema({f0, f1, f2, f3});
+
+ // Each column has a different chunking
+ ChunkedArrayVector columns(4);
+ columns[0] = ChunkedArrayFromJSON(f0->type(), {"[0,1,2,3]"});
+ columns[1] = ChunkedArrayFromJSON(f1->type(), {"[3,2,1]", "[0]"});
+ columns[2] =
+ ChunkedArrayFromJSON(f2->type(), {R"([{"b":3},{"b":2}])",
R"([{"b":1},{"b":0}])"});
+ columns[3] = ChunkedArrayFromJSON(
+ f3->type(), {R"([{"a":0,"c":{"b":3}},{"a":1,"c":{"b":2}}])",
+ R"([{"a":2,"c":{"b":1}}])", R"([{"a":3,"c":{"b":0}}])"});
+ auto table = Table::Make(table_schema, columns, length);
+ ASSERT_OK(table->ValidateFull());
+
+ ASSERT_OK_AND_ASSIGN(auto v0, FieldPath({0}).Get(*table));
+ ASSERT_OK_AND_ASSIGN(auto v1, FieldPath({1}).Get(*table));
+ ASSERT_OK_AND_ASSIGN(auto v2, FieldPath({2}).Get(*table));
+ ASSERT_OK_AND_ASSIGN(auto v2_0, FieldPath({2, 0}).Get(*table));
+ ASSERT_OK_AND_ASSIGN(auto v3, FieldPath({3}).Get(*table));
+ ASSERT_OK_AND_ASSIGN(auto v3_0, FieldPath({3, 0}).Get(*table));
+ ASSERT_OK_AND_ASSIGN(auto v3_1, FieldPath({3, 1}).Get(*table));
+ ASSERT_OK_AND_ASSIGN(auto v3_1_0, FieldPath({3, 1, 0}).Get(*table));
+
+ EXPECT_EQ(v0->num_chunks(), columns[0]->num_chunks());
+ EXPECT_EQ(v1->num_chunks(), columns[1]->num_chunks());
+ EXPECT_EQ(v2->num_chunks(), columns[2]->num_chunks());
+ EXPECT_EQ(v2_0->num_chunks(), columns[2]->num_chunks());
+ EXPECT_EQ(v3->num_chunks(), columns[3]->num_chunks());
+ EXPECT_EQ(v3_0->num_chunks(), columns[3]->num_chunks());
+ EXPECT_EQ(v3_1->num_chunks(), columns[3]->num_chunks());
+ EXPECT_EQ(v3_1_0->num_chunks(), columns[3]->num_chunks());
+
+ EXPECT_TRUE(columns[0]->Equals(v0));
+ EXPECT_TRUE(columns[0]->Equals(v3_0));
+
+ EXPECT_TRUE(columns[1]->Equals(v1));
+ EXPECT_TRUE(columns[1]->Equals(v2_0));
+ EXPECT_TRUE(columns[1]->Equals(v3_1_0));
+
+ EXPECT_TRUE(columns[2]->Equals(v2));
+ EXPECT_TRUE(columns[2]->Equals(v3_1));
+
+ EXPECT_TRUE(columns[3]->Equals(v3));
+
+ for (const auto& path :
+ {FieldPath({4, 1, 0}), FieldPath({3, 2, 0}), FieldPath{3, 1, 1}}) {
+ EXPECT_RAISES_WITH_MESSAGE_THAT(IndexError, HasSubstr("index out of
range"),
+ path.Get(*table));
+ }
+ EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("empty indices cannot be
traversed"),
+ FieldPath().Get(*table));
+}
+
+TEST(TestFieldPath, GetForChunkedArray) {
+ using testing::HasSubstr;
+
+ auto f0 = field("a", int32());
+ auto f1 = field("b", int32());
+ auto f2 = field("c", struct_({f1}));
+ auto f3 = field("d", struct_({f0, f2}));
+ auto type = struct_({f0, f1, f3});
+
+ auto column0 = ChunkedArrayFromJSON(f0->type(), {"[0,1,2,3]"});
+ auto column1 = ChunkedArrayFromJSON(f1->type(), {"[3,2,1,0]"});
+ auto column2_1 =
+ ChunkedArrayFromJSON(f2->type(),
{R"([{"b":3},{"b":2},{"b":1},{"b":0}])"});
+ auto chunked_array = ChunkedArrayFromJSON(
+ type,
+ {
+ R"([{"a":0,"b":3,"d":{"a":0,"c":{"b":3}}}])",
+
R"([{"a":1,"b":2,"d":{"a":1,"c":{"b":2}}},{"a":2,"b":1,"d":{"a":2,"c":{"b":1}}}])",
+ R"([{"a":3,"b":0,"d":{"a":3,"c":{"b":0}}}])",
+ });
+ ASSERT_OK(chunked_array->ValidateFull());
+
+ ASSERT_OK_AND_ASSIGN(auto v0, FieldPath({0}).Get(*chunked_array));
+ ASSERT_OK_AND_ASSIGN(auto v1, FieldPath({1}).Get(*chunked_array));
+ ASSERT_OK_AND_ASSIGN(auto v2_0, FieldPath({2, 0}).Get(*chunked_array));
+ ASSERT_OK_AND_ASSIGN(auto v2_1, FieldPath({2, 1}).Get(*chunked_array));
+ ASSERT_OK_AND_ASSIGN(auto v2_1_0, FieldPath({2, 1, 0}).Get(*chunked_array));
+
+ for (const auto& v : {v0, v1, v2_0, v2_1, v2_1_0}) {
+ EXPECT_EQ(v->num_chunks(), chunked_array->num_chunks());
+ }
+
+ EXPECT_TRUE(column0->Equals(v0));
+ EXPECT_TRUE(column0->Equals(v2_0));
+
+ EXPECT_TRUE(column1->Equals(v1));
+ EXPECT_TRUE(column1->Equals(v2_1_0));
+ EXPECT_FALSE(column1->Equals(v2_1));
+
+ EXPECT_TRUE(column2_1->Equals(v2_1));
+
+ EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented,
+ HasSubstr("Get child data of non-struct
chunked array"),
+ FieldPath({0}).Get(*column0));
+}
+
+TEST(TestFieldPath, GetForChunkedArrayWithNulls) {
+ auto int_field = field("i", int32());
+ auto int_chunked_array =
+ ChunkedArrayFromJSON(int_field->type(), {"[0,1]", "[2,null]", "[3,4]"});
+
+ ASSERT_OK_AND_ASSIGN(auto null_bitmap, AllocateEmptyBitmap(2));
+ ArrayVector struct_chunks;
+ for (const auto& int_chunk : int_chunked_array->chunks()) {
+ ASSERT_OK_AND_ASSIGN(auto chunk,
+ StructArray::Make({int_chunk}, {int_field},
null_bitmap, 2));
+ struct_chunks.push_back(chunk);
+ }
+
+ ASSERT_OK_AND_ASSIGN(auto struct_chunked_array,
ChunkedArray::Make(struct_chunks));
+ ASSERT_OK(struct_chunked_array->ValidateFull());
+ // The top-level null bitmap shouldn't affect the validity of the returned
child field.
+ ASSERT_OK_AND_ASSIGN(auto int_child,
FieldPath({0}).Get(*struct_chunked_array));
+ ASSERT_TRUE(int_chunked_array->Equals(int_child));
+}
+
+TEST(TestFieldPath, GetForRecordBatch) {
Review Comment:
Agree. Need to create issue about generalizing `FieldPath::Get` test cases
and adding missing tests for other types.
But for now, I think we can push the current test cases for `FieldPath::Get`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]