westonpace commented on code in PR #8510:
URL: https://github.com/apache/arrow/pull/8510#discussion_r1109176783


##########
cpp/src/arrow/extension_type_test.cc:
##########
@@ -333,4 +337,146 @@ TEST_F(TestExtensionType, ValidateExtensionArray) {
   ASSERT_OK(ext_arr4->ValidateFull());
 }
 
+TEST_F(TestExtensionType, TensorArrayType) {
+  using TensorArrayType = extension::TensorArrayType;
+
+  std::vector<int64_t> shape = {3, 3, 4};
+  std::vector<int64_t> cell_shape = {3, 4};
+  auto value_type = int64();
+  std::shared_ptr<DataType> cell_type = fixed_size_list(value_type, 12);
+
+  std::vector<std::string> dim_names = {"x", "y"};
+  std::vector<int64_t> strides = {96, 32, 8};
+  std::vector<int64_t> column_major_strides = {8, 24, 72};
+  std::vector<int64_t> neither_major_strides = {96, 8, 32};
+  std::vector<int64_t> cell_strides = {32, 8};
+  std::vector<int64_t> values = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 
11,
+                                 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 
23,
+                                 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 
35};
+  std::vector<int64_t> values_partial = {0,  1,  2,  3,  4,  5,  6,  7,  8,  
9,  10, 11,
+                                         12, 13, 14, 15, 16, 17, 18, 19, 20, 
21, 22, 23};
+  std::vector<int64_t> shape_partial = {2, 3, 4};
+  std::string serialized =
+      
R"({"shape":[3,4],"dim_names":["x","y"],"metadata":{"key1":"metadata1"}})";
+  std::string metadata = R"({"key1":"metadata1"})";
+
+  ASSERT_OK_AND_ASSIGN(auto tensor,
+                       Tensor::Make(value_type, Buffer::Wrap(values), shape));
+  ASSERT_OK_AND_ASSIGN(
+      auto tensor_partial,
+      Tensor::Make(value_type, Buffer::Wrap(values_partial), shape_partial));
+
+  std::shared_ptr<ExtensionType> ext_type =
+      extension::tensor_array(value_type, cell_shape, {}, dim_names, metadata);
+  auto exact_ext_type = 
internal::checked_pointer_cast<TensorArrayType>(ext_type);
+  ASSERT_OK_AND_ASSIGN(auto ds,
+                       ext_type->Deserialize(ext_type->storage_type(), 
serialized));
+  std::shared_ptr<ExtensionType> deserialized =
+      std::reinterpret_pointer_cast<ExtensionType>(ds);
+
+  ASSERT_TRUE(tensor->is_row_major());
+  ASSERT_EQ(tensor->strides(), strides);
+  ASSERT_EQ(tensor_partial->strides(), strides);
+
+  // Test ExtensionType methods
+  ASSERT_EQ(ext_type->extension_name(), "arrow.fixed_shape_tensor");
+  ASSERT_TRUE(ext_type->ExtensionEquals(*exact_ext_type));
+  ASSERT_TRUE(ext_type->storage_type()->Equals(*cell_type));
+  ASSERT_EQ(ext_type->Serialize(), serialized);
+  ASSERT_TRUE(deserialized->ExtensionEquals(*ext_type));
+  
ASSERT_EQ(internal::checked_pointer_cast<TensorArrayType>(deserialized)->metadata(),
+            metadata);
+  ASSERT_EQ(exact_ext_type->id(), Type::EXTENSION);
+
+  // Test TensorArrayType methods
+  ASSERT_EQ(exact_ext_type->ndim(), cell_shape.size());
+  ASSERT_EQ(exact_ext_type->shape(), cell_shape);
+  ASSERT_EQ(exact_ext_type->strides(), cell_strides);
+  ASSERT_EQ(exact_ext_type->dim_names(), dim_names);
+  ASSERT_EQ(exact_ext_type->metadata(), metadata);
+
+  // Test MakeArray(std::shared_ptr<ArrayData> data)
+  std::vector<std::shared_ptr<Buffer>> buffers = {nullptr, 
Buffer::Wrap(values)};
+  auto arr_data = std::make_shared<ArrayData>(value_type, values.size(), 
buffers, 0, 0);
+  auto arr = std::make_shared<Int64Array>(arr_data);
+  EXPECT_OK_AND_ASSIGN(auto fsla_arr, FixedSizeListArray::FromArrays(arr, 
cell_type));
+  auto data = fsla_arr->data();
+  data->type = ext_type;
+  auto ext_arr = exact_ext_type->MakeArray(data);
+  ASSERT_EQ(ext_arr->length(), shape[0]);
+  ASSERT_EQ(ext_arr->null_count(), 0);
+
+  // Test MakeArray(std::shared_ptr<Tensor> tensor)
+  EXPECT_OK_AND_ASSIGN(auto ext_arr_partial, 
exact_ext_type->MakeArray(tensor_partial));
+  ASSERT_OK(ext_arr->ValidateFull());
+  ASSERT_OK(ext_arr_partial->ValidateFull());
+
+  // Test ToTensor(std::shared_ptr<Array> array)
+  EXPECT_OK_AND_ASSIGN(auto t, exact_ext_type->ToTensor(ext_arr));
+  ASSERT_EQ(t->shape(), tensor->shape());
+  ASSERT_EQ(t->strides(), tensor->strides());
+  ASSERT_TRUE(tensor->Equals(*t));
+
+  // Test slicing
+  auto sliced = 
internal::checked_pointer_cast<ExtensionArray>(ext_arr->Slice(0, 2));
+  auto partial = 
internal::checked_pointer_cast<ExtensionArray>(ext_arr_partial);
+  ASSERT_OK(sliced->ValidateFull());
+  ASSERT_TRUE(sliced->storage()->Equals(*partial->storage()));
+  ASSERT_EQ(sliced->length(), partial->length());
+
+  // TODO: Where should canonical types be registered?
+  ASSERT_OK(RegisterExtensionType(exact_ext_type));

Review Comment:
   How about `arrow::internal::CreateGlobalRegistry`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to