rok commented on code in PR #37533: URL: https://github.com/apache/arrow/pull/37533#discussion_r1426998811
########## cpp/src/arrow/extension/fixed_shape_tensor_test.cc: ########## @@ -321,6 +324,70 @@ TEST_F(TestExtensionType, TestFromTensorType) { } } +void CheckToTensor(const std::vector<int64_t> values, const int64_t cell_size, + const std::vector<int64_t> cell_shape, + const std::vector<int64_t> cell_permutation, + const std::vector<std::string> cell_dim_names, + const std::vector<int64_t> tensor_shape, + const std::vector<std::string> tensor_dim_names, + const std::vector<int64_t> tensor_strides) { + auto buffer = Buffer::Wrap(values); + const std::shared_ptr<DataType> cell_type = fixed_size_list(int64(), cell_size); + std::vector<std::shared_ptr<Buffer>> buffers = {nullptr, buffer}; + auto arr_data = std::make_shared<ArrayData>(int64(), values.size(), buffers); + auto arr = std::make_shared<Int64Array>(arr_data); + ASSERT_OK_AND_ASSIGN(auto fsla_arr, FixedSizeListArray::FromArrays(arr, cell_type)); + + ASSERT_OK_AND_ASSIGN( + auto expected_tensor, + Tensor::Make(int64(), buffer, tensor_shape, tensor_strides, tensor_dim_names)); + const auto ext_type = + fixed_shape_tensor(int64(), cell_shape, cell_permutation, cell_dim_names); + + auto ext_arr = ExtensionType::WrapArray(ext_type, fsla_arr); + const auto tensor_array = std::static_pointer_cast<FixedShapeTensorArray>(ext_arr); + ASSERT_OK_AND_ASSIGN(const auto actual_tensor, tensor_array->ToTensor()); + + ASSERT_EQ(actual_tensor->type(), expected_tensor->type()); + ASSERT_EQ(actual_tensor->shape(), expected_tensor->shape()); + ASSERT_EQ(actual_tensor->strides(), expected_tensor->strides()); + ASSERT_EQ(actual_tensor->dim_names(), expected_tensor->dim_names()); + ASSERT_TRUE(actual_tensor->data()->Equals(*expected_tensor->data())); + ASSERT_TRUE(actual_tensor->Equals(*expected_tensor)); +} + +TEST_F(TestExtensionType, ToTensor) { + auto cell_sizes = std::vector<int64_t>{12, 12, 12, 12, 6, 6, 18, 18, 18, 18}; + + auto cell_shapes = + std::vector<std::vector<int64_t>>{{3, 4}, {4, 3}, {4, 3}, {3, 4}, {2, 3}, + {3, 2}, {3, 6}, {6, 3}, {3, 2, 3}, {3, 2, 3}}; + auto tensor_shapes = std::vector<std::vector<int64_t>>{ + {3, 3, 4}, {3, 4, 3}, {3, 4, 3}, {3, 3, 4}, {6, 2, 3}, + {6, 3, 2}, {2, 3, 6}, {2, 6, 3}, {2, 3, 2, 3}, {2, 3, 2, 3}}; + + auto cell_permutations = + std::vector<std::vector<int64_t>>{{0, 1}, {1, 0}, {0, 1}, {1, 0}, {0, 1}, + {1, 0}, {0, 1}, {1, 0}, {0, 1, 2}, {2, 1, 0}}; + auto tensor_strides = std::vector<std::vector<int64_t>>{ + {96, 32, 8}, {96, 8, 32}, {96, 24, 8}, {96, 8, 24}, {48, 24, 8}, + {48, 8, 24}, {144, 48, 8}, {144, 8, 48}, {144, 48, 24, 8}, {144, 8, 24, 48}}; + + auto cell_dim_names = std::vector<std::vector<std::string>>{ + {"y", "z"}, {"y", "z"}, {"y", "z"}, {"y", "z"}, {"y", "z"}, + {"y", "z"}, {"y", "z"}, {"y", "z"}, {"H", "W", "C"}, {"H", "W", "C"}}; + auto tensor_dim_names = std::vector<std::vector<std::string>>{ + {"", "y", "z"}, {"", "y", "z"}, {"", "y", "z"}, {"", "y", "z"}, + {"", "y", "z"}, {"", "y", "z"}, {"", "y", "z"}, {"", "y", "z"}, + {"", "H", "W", "C"}, {"", "H", "W", "C"}}; Review Comment: > So the behaviour of ToTensor changed regarding the returned shape? (i.e. the implementation in main is wrong?) > Can you show a before/after comparison with an example? The implementation was wrong for cases where `ext_type->permutation()` was empty but `ext_type->dim_names()` was not as it would not correctly assemble `dim_names` for the new `Tensor`. Other changes are more implementation details, e.g. using `internal::Permute<std::string>(permutation, &dim_names);` instead of a for loop. The way strides and shapes are permuted at `ToTensor` is (currently) not changed to main. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org