[GitHub] [arrow] jorisvandenbossche commented on a diff in pull request #8510: GH-15483: [C++] Add a Fixed Shape Tensor canonical ExtensionType

via GitHub Tue, 28 Mar 2023 04:15:47 -0700


jorisvandenbossche commented on code in PR #8510:
URL: https://github.com/apache/arrow/pull/8510#discussion_r1150393293



##########
cpp/src/arrow/extension/fixed_shape_tensor_test.cc:
##########
@@ -0,0 +1,425 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/extension/fixed_shape_tensor.h"
+
+#include "arrow/testing/matchers.h"
+
+#include "arrow/array/array_nested.h"
+#include "arrow/array/array_primitive.h"
+#include "arrow/io/memory.h"
+#include "arrow/ipc/reader.h"
+#include "arrow/ipc/writer.h"
+#include "arrow/record_batch.h"
+#include "arrow/tensor.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/key_value_metadata.h"
+
+namespace arrow {
+
+using FixedShapeTensorType = extension::FixedShapeTensorType;
+using extension::fixed_shape_tensor;
+using extension::FixedShapeTensorArray;
+
+class TestExtensionType : public ::testing::Test {
+ public:
+  void SetUp() override {
+    shape_ = {3, 3, 4};
+    cell_shape_ = {3, 4};
+    value_type_ = int64();
+    cell_type_ = fixed_size_list(value_type_, 12);
+    dim_names_ = {"x", "y"};
+    ext_type_ = internal::checked_pointer_cast<ExtensionType>(
+        fixed_shape_tensor(value_type_, cell_shape_, {}, dim_names_));
+    values_ = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 
16, 17,
+               18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 
34, 35};
+    values_partial_ = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
+                       12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
+    shape_partial_ = {2, 3, 4};
+    tensor_strides_ = {96, 32, 8};
+    cell_strides_ = {32, 8};
+    serialized_ = R"({"shape":[3,4],"dim_names":["x","y"]})";
+  }
+
+ protected:
+  std::vector<int64_t> shape_;
+  std::vector<int64_t> shape_partial_;
+  std::vector<int64_t> cell_shape_;
+  std::shared_ptr<DataType> value_type_;
+  std::shared_ptr<DataType> cell_type_;
+  std::vector<std::string> dim_names_;
+  std::shared_ptr<ExtensionType> ext_type_;
+  std::vector<int64_t> values_;
+  std::vector<int64_t> values_partial_;
+  std::vector<int64_t> tensor_strides_;
+  std::vector<int64_t> cell_strides_;
+  std::string serialized_;
+};
+
+auto RoundtripBatch = [](const std::shared_ptr<RecordBatch>& batch,
+                         std::shared_ptr<RecordBatch>* out) {
+  ASSERT_OK_AND_ASSIGN(auto out_stream, io::BufferOutputStream::Create());
+  ASSERT_OK(ipc::WriteRecordBatchStream({batch}, 
ipc::IpcWriteOptions::Defaults(),
+                                        out_stream.get()));
+
+  ASSERT_OK_AND_ASSIGN(auto complete_ipc_stream, out_stream->Finish());
+
+  io::BufferReader reader(complete_ipc_stream);
+  std::shared_ptr<RecordBatchReader> batch_reader;
+  ASSERT_OK_AND_ASSIGN(batch_reader, 
ipc::RecordBatchStreamReader::Open(&reader));
+  ASSERT_OK(batch_reader->ReadNext(out));
+};
+
+TEST_F(TestExtensionType, CheckDummyRegistration) {
+  // We need a registered dummy type at runtime to allow for IPC 
deserialization
+  auto registered_type = GetExtensionType("arrow.fixed_shape_tensor");
+  ASSERT_TRUE(registered_type->type_id == Type::EXTENSION);
+}
+
+TEST_F(TestExtensionType, CreateExtensionType) {
+  auto exact_ext_type = 
internal::checked_pointer_cast<FixedShapeTensorType>(ext_type_);
+
+  // Test ExtensionType methods
+  ASSERT_EQ(ext_type_->extension_name(), "arrow.fixed_shape_tensor");
+  ASSERT_TRUE(ext_type_->Equals(*exact_ext_type));
+  ASSERT_FALSE(ext_type_->Equals(*cell_type_));
+  ASSERT_TRUE(ext_type_->storage_type()->Equals(*cell_type_));
+  ASSERT_EQ(ext_type_->Serialize(), serialized_);
+  ASSERT_OK_AND_ASSIGN(auto ds,
+                       ext_type_->Deserialize(ext_type_->storage_type(), 
serialized_));
+  auto deserialized = std::reinterpret_pointer_cast<ExtensionType>(ds);
+  ASSERT_TRUE(deserialized->Equals(*ext_type_));
+
+  // Test FixedShapeTensorType methods
+  ASSERT_EQ(exact_ext_type->id(), Type::EXTENSION);
+  ASSERT_EQ(exact_ext_type->ndim(), cell_shape_.size());
+  ASSERT_EQ(exact_ext_type->shape(), cell_shape_);
+  ASSERT_EQ(exact_ext_type->strides(), cell_strides_);
+  ASSERT_EQ(exact_ext_type->dim_names(), dim_names_);
+
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, testing::HasSubstr("Invalid: permutation size must match shape 
size."),
+      FixedShapeTensorType::Make(value_type_, cell_shape_, {0}));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, testing::HasSubstr("Invalid: dim_names size must match shape 
size."),
+      FixedShapeTensorType::Make(value_type_, cell_shape_, {}, {"x"}));
+}
+
+TEST_F(TestExtensionType, EqualsCases) {
+  auto ext_type_permutation_1 = fixed_shape_tensor(int64(), {3, 4}, {0, 1}, 
{"x", "y"});
+  auto ext_type_permutation_2 = fixed_shape_tensor(int64(), {3, 4}, {1, 0}, 
{"x", "y"});
+  auto ext_type_no_permutation = fixed_shape_tensor(int64(), {3, 4}, {}, {"x", 
"y"});
+
+  ASSERT_TRUE(ext_type_permutation_1->Equals(ext_type_permutation_1));
+
+  ASSERT_FALSE(fixed_shape_tensor(int32(), {3, 4}, {}, {"x", "y"})
+                   ->Equals(ext_type_no_permutation));
+  ASSERT_FALSE(fixed_shape_tensor(int64(), {2, 4}, {}, {"x", "y"})
+                   ->Equals(ext_type_no_permutation));
+  ASSERT_FALSE(fixed_shape_tensor(int64(), {3, 4}, {}, {"H", "W"})
+                   ->Equals(ext_type_no_permutation));
+
+  ASSERT_TRUE(ext_type_no_permutation->Equals(ext_type_permutation_1));
+  ASSERT_TRUE(ext_type_permutation_1->Equals(ext_type_no_permutation));

Review Comment:
   ```suggestion
     ASSERT_TRUE(ext_type_permutation_1->Equals(ext_type_no_permutation));
     ASSERT_FALSE(ext_type_no_permutation->Equals(ext_type_permutation_2));
     ASSERT_FALSE(ext_type_permutation_2->Equals(ext_type_no_permutation));
   ```
   
   To cover that the trivial permutation check returns false in this case?



##########
cpp/src/arrow/extension/fixed_shape_tensor.h:
##########
@@ -0,0 +1,121 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <numeric>
+#include <sstream>
+
+#include "arrow/extension_type.h"
+
+namespace arrow {
+namespace extension {
+
+const std::shared_ptr<DataType> GetStorageType(
+    const std::shared_ptr<DataType>& value_type, const std::vector<int64_t>& 
shape);
+
+class ARROW_EXPORT FixedShapeTensorArray : public ExtensionArray {
+ public:
+  using ExtensionArray::ExtensionArray;
+
+  /// \brief Create a FixedShapeTensorArray from a Tensor
+  ///
+  /// This function will create a FixedShapeTensorArray from a Tensor, taking 
its
+  /// first dimension as the "element dimension" and the remaining dimensions 
as the
+  /// "tensor dimensions". If Tensor provides strides, they will be used to 
determine
+  /// dimension permutation. Otherwise, row-major permutation will be assumed.

Review Comment:
   ```suggestion
     /// dimension permutation. Otherwise, row-major layout (i.e. no 
permutation) will be assumed.
   ```



##########
cpp/src/arrow/extension/fixed_shape_tensor.cc:
##########
@@ -0,0 +1,323 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/extension/fixed_shape_tensor.h"
+
+#include "arrow/array/array_nested.h"
+#include "arrow/array/array_primitive.h"
+#include "arrow/json/rapidjson_defs.h"  // IWYU pragma: keep
+#include "arrow/tensor.h"
+#include "arrow/util/int_util_overflow.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/sort.h"
+
+#include <rapidjson/document.h>
+#include <rapidjson/writer.h>
+
+namespace rj = arrow::rapidjson;
+
+namespace arrow {
+namespace extension {
+
+bool FixedShapeTensorType::ExtensionEquals(const ExtensionType& other) const {
+  if (extension_name() != other.extension_name()) {
+    return false;
+  }
+  const auto& other_ext = static_cast<const FixedShapeTensorType&>(other);
+
+  auto is_permutation_trivial = [](const std::vector<int64_t>& permutation) {
+    for (size_t i = 1; i < permutation.size(); ++i) {
+      if (permutation[i - 1] + 1 != permutation[i]) {
+        return false;
+      }
+    }
+    return true;
+  };
+  const bool permutation_equivalent =
+      ((permutation_ == other_ext.permutation()) ||
+       (permutation_.empty() && 
is_permutation_trivial(other_ext.permutation())) ||
+       (is_permutation_trivial(permutation_) && 
other_ext.permutation().empty()));
+
+  return (storage_type()->Equals(other_ext.storage_type())) &&
+         (this->shape() == other_ext.shape()) && (dim_names_ == 
other_ext.dim_names()) &&
+         permutation_equivalent;
+}
+
+std::string FixedShapeTensorType::Serialize() const {
+  rj::Document document;
+  document.SetObject();
+  rj::Document::AllocatorType& allocator = document.GetAllocator();
+
+  rj::Value shape(rj::kArrayType);
+  for (auto v : shape_) {
+    shape.PushBack(v, allocator);
+  }
+  document.AddMember(rj::Value("shape", allocator), shape, allocator);
+
+  if (!permutation_.empty()) {
+    rj::Value permutation(rj::kArrayType);
+    for (auto v : permutation_) {
+      permutation.PushBack(v, allocator);
+    }
+    document.AddMember(rj::Value("permutation", allocator), permutation, 
allocator);
+  }
+
+  if (!dim_names_.empty()) {
+    rj::Value dim_names(rj::kArrayType);
+    for (std::string v : dim_names_) {
+      dim_names.PushBack(rj::Value{}.SetString(v.c_str(), allocator), 
allocator);
+    }
+    document.AddMember(rj::Value("dim_names", allocator), dim_names, 
allocator);
+  }
+
+  rj::StringBuffer buffer;
+  rj::Writer<rj::StringBuffer> writer(buffer);
+  document.Accept(writer);
+  return buffer.GetString();
+}
+
+Result<std::shared_ptr<DataType>> FixedShapeTensorType::Deserialize(
+    std::shared_ptr<DataType> storage_type, const std::string& 
serialized_data) const {
+  if (storage_type->id() != Type::FIXED_SIZE_LIST) {
+    return Status::Invalid("Expected FixedSizeList storage type, got ",
+                           storage_type->ToString());
+  }
+  auto value_type =
+      
internal::checked_pointer_cast<FixedSizeListType>(storage_type)->value_type();
+  rj::Document document;
+  if (document.Parse(serialized_data.data(), 
serialized_data.length()).HasParseError() ||
+      !document.HasMember("shape") || !document["shape"].IsArray()) {
+    return Status::Invalid("Invalid serialized JSON data: ", serialized_data);
+  }
+
+  std::vector<int64_t> shape;
+  for (auto& x : document["shape"].GetArray()) {
+    shape.emplace_back(x.GetInt64());
+  }
+  std::vector<int64_t> permutation;
+  if (document.HasMember("permutation")) {
+    for (auto& x : document["permutation"].GetArray()) {
+      permutation.emplace_back(x.GetInt64());
+    }
+    if (shape.size() != permutation.size()) {
+      return Status::Invalid("Invalid permutation");
+    }
+  }
+  std::vector<std::string> dim_names;
+  if (document.HasMember("dim_names")) {
+    for (auto& x : document["dim_names"].GetArray()) {
+      dim_names.emplace_back(x.GetString());
+    }
+    if (shape.size() != dim_names.size()) {
+      return Status::Invalid("Invalid dim_names");
+    }
+  }
+
+  return fixed_shape_tensor(value_type, shape, permutation, dim_names);
+}
+
+std::shared_ptr<Array> FixedShapeTensorType::MakeArray(
+    std::shared_ptr<ArrayData> data) const {
+  return std::make_shared<ExtensionArray>(data);
+}
+
+Result<std::shared_ptr<Array>> FixedShapeTensorArray::FromTensor(
+    const std::shared_ptr<Tensor>& tensor) {
+  auto cell_shape = tensor->shape();
+  cell_shape.erase(cell_shape.begin());
+
+  std::vector<std::string> dim_names;
+  for (size_t i = 1; i < tensor->dim_names().size(); ++i) {
+    dim_names.emplace_back(tensor->dim_names()[i]);
+  }
+
+  auto permutation = internal::ArgSort(tensor->strides());
+  std::reverse(permutation.begin(), permutation.end());
+  if (permutation[0] != 0) {
+    return Status::Invalid(
+        "Only first-major tensors can be zero-copy converted to arrays");
+  }
+  permutation.erase(permutation.begin());
+  for (auto& x : permutation) {
+    x--;
+  }
+
+  auto ext_type = internal::checked_pointer_cast<ExtensionType>(
+      fixed_shape_tensor(tensor->type(), cell_shape, permutation, dim_names));
+
+  std::shared_ptr<FixedSizeListArray> arr;
+  std::shared_ptr<Array> value_array;
+  switch (tensor->type_id()) {
+    case Type::UINT8: {
+      value_array = std::make_shared<UInt8Array>(tensor->size(), 
tensor->data());
+      break;
+    }
+    case Type::INT8: {
+      value_array = std::make_shared<Int8Array>(tensor->size(), 
tensor->data());
+      break;
+    }
+    case Type::UINT16: {
+      value_array = std::make_shared<UInt16Array>(tensor->size(), 
tensor->data());
+      break;
+    }
+    case Type::INT16: {
+      value_array = std::make_shared<Int16Array>(tensor->size(), 
tensor->data());
+      break;
+    }
+    case Type::UINT32: {
+      value_array = std::make_shared<UInt32Array>(tensor->size(), 
tensor->data());
+      break;
+    }
+    case Type::INT32: {
+      value_array = std::make_shared<Int32Array>(tensor->size(), 
tensor->data());
+      break;
+    }
+    case Type::UINT64: {
+      value_array = std::make_shared<Int64Array>(tensor->size(), 
tensor->data());
+      break;
+    }
+    case Type::INT64: {
+      value_array = std::make_shared<Int64Array>(tensor->size(), 
tensor->data());
+      break;
+    }
+    case Type::HALF_FLOAT: {
+      value_array = std::make_shared<HalfFloatArray>(tensor->size(), 
tensor->data());
+      break;
+    }
+    case Type::FLOAT: {
+      value_array = std::make_shared<FloatArray>(tensor->size(), 
tensor->data());
+      break;
+    }
+    case Type::DOUBLE: {
+      value_array = std::make_shared<DoubleArray>(tensor->size(), 
tensor->data());
+      break;
+    }
+    default: {
+      return Status::NotImplemented("Unsupported tensor type: ",
+                                    tensor->type()->ToString());
+    }
+  }
+  arr = std::make_shared<FixedSizeListArray>(ext_type->storage_type(), 
tensor->shape()[0],
+                                             value_array);
+  auto ext_data = arr->data();
+  ext_data->type = ext_type;
+  return ext_type->MakeArray(ext_data);
+}
+
+Result<std::shared_ptr<Tensor>> FixedShapeTensorType::ToTensor(
+    std::shared_ptr<Array> arr) {
+  // To convert an array of n dimensional tensors to a n+1 dimensional tensor 
we
+  // interpret the array's length as the first dimension the new tensor. 
Further, we
+  // define n+1 dimensional tensor's strides by front appending a new stride 
to the n
+  // dimensional tensor's strides.
+
+  ARROW_RETURN_IF(arr->null_count() > 0,
+                  Status::Invalid("Null values not supported in tensors."));
+
+  auto ext_arr = internal::checked_pointer_cast<FixedSizeListArray>(
+      internal::checked_pointer_cast<ExtensionArray>(arr)->storage());
+  ARROW_RETURN_IF(!is_fixed_width(ext_arr->value_type()->id()),
+                  Status::Invalid(ext_arr->value_type()->ToString(),
+                                  " is not valid data type for a tensor"));
+
+  std::vector<int64_t> shape = this->shape();
+  shape.insert(shape.begin(), 1, arr->length());
+
+  int64_t major_stride;
+  std::vector<int64_t> tensor_strides = this->strides();
+  if (internal::MultiplyWithOverflow(ext_arr->value_type()->byte_width(),
+                                     ext_arr->list_type()->list_size(), 
&major_stride)) {
+    return Status::Invalid("Overflow in tensor strides");
+  }
+  if (!this->permutation().empty()) {
+    internal::Permute(this->permutation(), &tensor_strides);

Review Comment:
   If you permute the strides, shouldn't you also permute the shape? (or the 
`this->strides()` were already permuted, so you are permuting "back" here?)



##########
cpp/src/arrow/extension/fixed_shape_tensor.h:
##########
@@ -0,0 +1,121 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <numeric>
+#include <sstream>
+
+#include "arrow/extension_type.h"
+
+namespace arrow {
+namespace extension {
+
+const std::shared_ptr<DataType> GetStorageType(

Review Comment:
   If we keep this in the public arrow::extension namespace, should it get a 
more specific name? (or move it into internal namespace?)



##########
cpp/src/arrow/extension/fixed_shape_tensor_test.cc:
##########
@@ -0,0 +1,425 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/extension/fixed_shape_tensor.h"
+
+#include "arrow/testing/matchers.h"
+
+#include "arrow/array/array_nested.h"
+#include "arrow/array/array_primitive.h"
+#include "arrow/io/memory.h"
+#include "arrow/ipc/reader.h"
+#include "arrow/ipc/writer.h"
+#include "arrow/record_batch.h"
+#include "arrow/tensor.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/key_value_metadata.h"
+
+namespace arrow {
+
+using FixedShapeTensorType = extension::FixedShapeTensorType;
+using extension::fixed_shape_tensor;
+using extension::FixedShapeTensorArray;
+
+class TestExtensionType : public ::testing::Test {
+ public:
+  void SetUp() override {
+    shape_ = {3, 3, 4};
+    cell_shape_ = {3, 4};
+    value_type_ = int64();
+    cell_type_ = fixed_size_list(value_type_, 12);
+    dim_names_ = {"x", "y"};
+    ext_type_ = internal::checked_pointer_cast<ExtensionType>(
+        fixed_shape_tensor(value_type_, cell_shape_, {}, dim_names_));
+    values_ = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 
16, 17,
+               18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 
34, 35};
+    values_partial_ = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
+                       12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
+    shape_partial_ = {2, 3, 4};
+    tensor_strides_ = {96, 32, 8};
+    cell_strides_ = {32, 8};
+    serialized_ = R"({"shape":[3,4],"dim_names":["x","y"]})";
+  }
+
+ protected:
+  std::vector<int64_t> shape_;
+  std::vector<int64_t> shape_partial_;
+  std::vector<int64_t> cell_shape_;
+  std::shared_ptr<DataType> value_type_;
+  std::shared_ptr<DataType> cell_type_;
+  std::vector<std::string> dim_names_;
+  std::shared_ptr<ExtensionType> ext_type_;
+  std::vector<int64_t> values_;
+  std::vector<int64_t> values_partial_;
+  std::vector<int64_t> tensor_strides_;
+  std::vector<int64_t> cell_strides_;
+  std::string serialized_;
+};
+
+auto RoundtripBatch = [](const std::shared_ptr<RecordBatch>& batch,
+                         std::shared_ptr<RecordBatch>* out) {
+  ASSERT_OK_AND_ASSIGN(auto out_stream, io::BufferOutputStream::Create());
+  ASSERT_OK(ipc::WriteRecordBatchStream({batch}, 
ipc::IpcWriteOptions::Defaults(),
+                                        out_stream.get()));
+
+  ASSERT_OK_AND_ASSIGN(auto complete_ipc_stream, out_stream->Finish());
+
+  io::BufferReader reader(complete_ipc_stream);
+  std::shared_ptr<RecordBatchReader> batch_reader;
+  ASSERT_OK_AND_ASSIGN(batch_reader, 
ipc::RecordBatchStreamReader::Open(&reader));
+  ASSERT_OK(batch_reader->ReadNext(out));
+};
+
+TEST_F(TestExtensionType, CheckDummyRegistration) {
+  // We need a registered dummy type at runtime to allow for IPC 
deserialization
+  auto registered_type = GetExtensionType("arrow.fixed_shape_tensor");
+  ASSERT_TRUE(registered_type->type_id == Type::EXTENSION);
+}
+
+TEST_F(TestExtensionType, CreateExtensionType) {
+  auto exact_ext_type = 
internal::checked_pointer_cast<FixedShapeTensorType>(ext_type_);
+
+  // Test ExtensionType methods
+  ASSERT_EQ(ext_type_->extension_name(), "arrow.fixed_shape_tensor");
+  ASSERT_TRUE(ext_type_->Equals(*exact_ext_type));
+  ASSERT_FALSE(ext_type_->Equals(*cell_type_));
+  ASSERT_TRUE(ext_type_->storage_type()->Equals(*cell_type_));
+  ASSERT_EQ(ext_type_->Serialize(), serialized_);
+  ASSERT_OK_AND_ASSIGN(auto ds,
+                       ext_type_->Deserialize(ext_type_->storage_type(), 
serialized_));
+  auto deserialized = std::reinterpret_pointer_cast<ExtensionType>(ds);
+  ASSERT_TRUE(deserialized->Equals(*ext_type_));
+
+  // Test FixedShapeTensorType methods
+  ASSERT_EQ(exact_ext_type->id(), Type::EXTENSION);
+  ASSERT_EQ(exact_ext_type->ndim(), cell_shape_.size());
+  ASSERT_EQ(exact_ext_type->shape(), cell_shape_);
+  ASSERT_EQ(exact_ext_type->strides(), cell_strides_);
+  ASSERT_EQ(exact_ext_type->dim_names(), dim_names_);
+
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, testing::HasSubstr("Invalid: permutation size must match shape 
size."),
+      FixedShapeTensorType::Make(value_type_, cell_shape_, {0}));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, testing::HasSubstr("Invalid: dim_names size must match shape 
size."),
+      FixedShapeTensorType::Make(value_type_, cell_shape_, {}, {"x"}));
+}
+
+TEST_F(TestExtensionType, EqualsCases) {
+  auto ext_type_permutation_1 = fixed_shape_tensor(int64(), {3, 4}, {0, 1}, 
{"x", "y"});
+  auto ext_type_permutation_2 = fixed_shape_tensor(int64(), {3, 4}, {1, 0}, 
{"x", "y"});
+  auto ext_type_no_permutation = fixed_shape_tensor(int64(), {3, 4}, {}, {"x", 
"y"});
+
+  ASSERT_TRUE(ext_type_permutation_1->Equals(ext_type_permutation_1));
+
+  ASSERT_FALSE(fixed_shape_tensor(int32(), {3, 4}, {}, {"x", "y"})
+                   ->Equals(ext_type_no_permutation));
+  ASSERT_FALSE(fixed_shape_tensor(int64(), {2, 4}, {}, {"x", "y"})
+                   ->Equals(ext_type_no_permutation));
+  ASSERT_FALSE(fixed_shape_tensor(int64(), {3, 4}, {}, {"H", "W"})
+                   ->Equals(ext_type_no_permutation));
+
+  ASSERT_TRUE(ext_type_no_permutation->Equals(ext_type_permutation_1));
+  ASSERT_TRUE(ext_type_permutation_1->Equals(ext_type_no_permutation));
+  ASSERT_FALSE(ext_type_permutation_1->Equals(ext_type_permutation_2));
+  ASSERT_FALSE(ext_type_permutation_2->Equals(ext_type_permutation_1));
+}
+
+TEST_F(TestExtensionType, CreateFromArray) {
+  auto exact_ext_type = 
internal::checked_pointer_cast<FixedShapeTensorType>(ext_type_);
+
+  std::vector<std::shared_ptr<Buffer>> buffers = {nullptr, 
Buffer::Wrap(values_)};
+  auto arr_data = std::make_shared<ArrayData>(value_type_, values_.size(), 
buffers, 0, 0);
+  auto arr = std::make_shared<Int64Array>(arr_data);
+  ASSERT_OK_AND_ASSIGN(auto fsla_arr, FixedSizeListArray::FromArrays(arr, 
cell_type_));
+  auto data = fsla_arr->data();
+  data->type = ext_type_;
+  auto ext_arr = exact_ext_type->MakeArray(data);
+  ASSERT_EQ(ext_arr->length(), shape_[0]);
+  ASSERT_EQ(ext_arr->null_count(), 0);
+}
+
+TEST_F(TestExtensionType, CreateFromTensor) {
+  std::vector<int64_t> column_major_strides = {8, 24, 72};
+  std::vector<int64_t> neither_major_strides = {96, 8, 32};
+
+  ASSERT_OK_AND_ASSIGN(auto tensor,
+                       Tensor::Make(value_type_, Buffer::Wrap(values_), 
shape_));
+
+  auto exact_ext_type = 
internal::checked_pointer_cast<FixedShapeTensorType>(ext_type_);
+  ASSERT_OK_AND_ASSIGN(auto ext_arr, 
FixedShapeTensorArray::FromTensor(tensor));
+
+  ASSERT_OK(ext_arr->ValidateFull());
+  ASSERT_TRUE(tensor->is_row_major());
+  ASSERT_EQ(tensor->strides(), tensor_strides_);
+  ASSERT_EQ(ext_arr->length(), shape_[0]);
+
+  auto ext_type_2 = internal::checked_pointer_cast<FixedShapeTensorType>(
+      fixed_shape_tensor(int64(), {3, 4}, {0, 1}));
+  ASSERT_OK_AND_ASSIGN(auto ext_arr_2, 
FixedShapeTensorArray::FromTensor(tensor));
+
+  ASSERT_OK_AND_ASSIGN(
+      auto column_major_tensor,
+      Tensor::Make(value_type_, Buffer::Wrap(values_), shape_, 
column_major_strides));
+  auto ext_type_3 = internal::checked_pointer_cast<FixedShapeTensorType>(
+      fixed_shape_tensor(int64(), {3, 4}, {0, 1}));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      testing::HasSubstr(
+          "Invalid: Only first-major tensors can be zero-copy converted to 
arrays"),
+      FixedShapeTensorArray::FromTensor(column_major_tensor));
+  ASSERT_THAT(FixedShapeTensorArray::FromTensor(column_major_tensor),
+              Raises(StatusCode::Invalid));
+
+  auto neither_major_tensor = std::make_shared<Tensor>(value_type_, 
Buffer::Wrap(values_),
+                                                       shape_, 
neither_major_strides);
+  auto ext_type_4 = internal::checked_pointer_cast<FixedShapeTensorType>(
+      fixed_shape_tensor(int64(), {3, 4}, {1, 0}));
+  ASSERT_OK_AND_ASSIGN(auto ext_arr_4,
+                       
FixedShapeTensorArray::FromTensor(neither_major_tensor));
+
+  auto ext_type_5 = internal::checked_pointer_cast<FixedShapeTensorType>(
+      fixed_shape_tensor(binary(), {1, 2}));
+  auto arr = ArrayFromJSON(binary(), R"(["abc", "def"])");
+
+  ASSERT_OK_AND_ASSIGN(auto fsla_arr,
+                       FixedSizeListArray::FromArrays(arr, 
fixed_size_list(binary(), 1)));
+  auto data = fsla_arr->data();
+  data->type = ext_type_5;
+  auto ext_arr_5 = ext_type_5->MakeArray(data);
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, testing::HasSubstr("binary is not valid data type for a 
tensor"),
+      exact_ext_type->ToTensor(ext_arr_5));
+
+  auto ext_type_6 = internal::checked_pointer_cast<FixedShapeTensorType>(
+      fixed_shape_tensor(int64(), {1, 2}));
+  auto arr_with_null = ArrayFromJSON(int64(), "[0, null]");
+  ASSERT_OK_AND_ASSIGN(auto fsla_arr_6, FixedSizeListArray::FromArrays(
+                                            arr_with_null, 
fixed_size_list(int64(), 1)));
+  auto data6 = fsla_arr_6->data();
+  data6->type = ext_type_6;
+  data6->null_count = 1;
+
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, testing::HasSubstr("Null values not supported in tensors."),
+      ext_type_6->ToTensor(ext_type_6->MakeArray(data6)));
+}
+
+void CheckTensorRoundtrip(const std::shared_ptr<Tensor>& tensor,
+                          std::shared_ptr<DataType> expected_ext_type) {
+  auto ext_type = 
internal::checked_pointer_cast<FixedShapeTensorType>(expected_ext_type);
+  ASSERT_OK_AND_ASSIGN(auto ext_arr, 
FixedShapeTensorArray::FromTensor(tensor));
+  auto generated_ext_type =
+      internal::checked_pointer_cast<FixedShapeTensorType>(ext_arr->type());
+
+  // Check that generated type is equal to the expected type
+  ASSERT_EQ(generated_ext_type->type_name(), ext_type->type_name());
+  ASSERT_EQ(generated_ext_type->shape(), ext_type->shape());
+  ASSERT_EQ(generated_ext_type->dim_names(), ext_type->dim_names());
+  ASSERT_EQ(generated_ext_type->permutation(), ext_type->permutation());
+  
ASSERT_TRUE(generated_ext_type->storage_type()->Equals(*ext_type->storage_type()));
+  ASSERT_TRUE(generated_ext_type->Equals(ext_type));
+
+  // Check Tensor roundtrip
+  ASSERT_OK_AND_ASSIGN(auto tensor_from_array, 
generated_ext_type->ToTensor(ext_arr));
+  ASSERT_EQ(tensor->type(), tensor_from_array->type());
+  ASSERT_EQ(tensor->shape(), tensor_from_array->shape());
+  for (size_t i = 1; i < tensor->dim_names().size(); i++) {
+    ASSERT_EQ(tensor->dim_names()[i], tensor_from_array->dim_names()[i]);
+  }
+  ASSERT_EQ(tensor->strides(), tensor_from_array->strides());
+  ASSERT_TRUE(tensor->data()->Equals(*tensor_from_array->data()));
+  ASSERT_TRUE(tensor->Equals(*tensor_from_array));
+}
+
+TEST_F(TestExtensionType, RoundtripTensor) {
+  auto values = Buffer::Wrap(values_);
+  ASSERT_OK_AND_ASSIGN(auto tensor1, Tensor::Make(value_type_, values, {3, 3, 
4},
+                                                  {96, 32, 8}, {"", "y", 
"z"}));
+  ASSERT_OK_AND_ASSIGN(auto tensor2,
+                       Tensor::Make(value_type_, values, {3, 3, 4}, {96, 8, 
24}));
+  ASSERT_OK_AND_ASSIGN(auto tensor3,
+                       Tensor::Make(value_type_, values, {3, 4, 3}, {96, 24, 
8}));
+  ASSERT_OK_AND_ASSIGN(auto tensor4,
+                       Tensor::Make(value_type_, values, {3, 4, 3}, {96, 8, 
32}));
+  ASSERT_OK_AND_ASSIGN(auto tensor5,
+                       Tensor::Make(value_type_, values, {6, 2, 3}, {48, 24, 
8}));
+  ASSERT_OK_AND_ASSIGN(auto tensor6,
+                       Tensor::Make(value_type_, values, {6, 2, 3}, {48, 8, 
16}));
+  ASSERT_OK_AND_ASSIGN(auto tensor7,
+                       Tensor::Make(value_type_, values, {2, 3, 6}, {144, 48, 
8}));
+  ASSERT_OK_AND_ASSIGN(auto tensor8,
+                       Tensor::Make(value_type_, values, {2, 3, 6}, {144, 8, 
24}));
+  ASSERT_OK_AND_ASSIGN(auto tensor9,
+                       Tensor::Make(value_type_, values, {2, 3, 2, 3}, {144, 
48, 24, 8}));
+  ASSERT_OK_AND_ASSIGN(auto tensor10,
+                       Tensor::Make(value_type_, values, {2, 3, 2, 3}, {144, 
8, 24, 48}));
+
+  CheckTensorRoundtrip(tensor1,
+                       fixed_shape_tensor(value_type_, {3, 4}, {0, 1}, {"y", 
"z"}));
+  CheckTensorRoundtrip(tensor2, fixed_shape_tensor(value_type_, {3, 4}, {1, 
0}, {}));
+  CheckTensorRoundtrip(tensor3, fixed_shape_tensor(value_type_, {4, 3}, {0, 
1}));
+  CheckTensorRoundtrip(tensor4, fixed_shape_tensor(value_type_, {4, 3}, {1, 
0}));
+  CheckTensorRoundtrip(tensor5, fixed_shape_tensor(value_type_, {2, 3}, {0, 
1}));
+  CheckTensorRoundtrip(tensor6, fixed_shape_tensor(value_type_, {2, 3}, {1, 
0}));
+  CheckTensorRoundtrip(tensor7, fixed_shape_tensor(value_type_, {3, 6}, {0, 
1}));
+  CheckTensorRoundtrip(tensor8, fixed_shape_tensor(value_type_, {3, 6}, {1, 
0}));
+  CheckTensorRoundtrip(tensor9, fixed_shape_tensor(value_type_, {3, 2, 3}, {0, 
1, 2}));
+  CheckTensorRoundtrip(tensor10, fixed_shape_tensor(value_type_, {3, 2, 3}, 
{2, 1, 0}));
+}
+
+TEST_F(TestExtensionType, SliceTensor) {
+  ASSERT_OK_AND_ASSIGN(auto tensor,
+                       Tensor::Make(value_type_, Buffer::Wrap(values_), 
shape_));
+  ASSERT_OK_AND_ASSIGN(
+      auto tensor_partial,
+      Tensor::Make(value_type_, Buffer::Wrap(values_partial_), 
shape_partial_));
+  ASSERT_EQ(tensor->strides(), tensor_strides_);
+  ASSERT_EQ(tensor_partial->strides(), tensor_strides_);
+  auto ext_type = fixed_shape_tensor(value_type_, cell_shape_, {}, dim_names_);
+  auto exact_ext_type = 
internal::checked_pointer_cast<FixedShapeTensorType>(ext_type_);
+
+  ASSERT_OK_AND_ASSIGN(auto ext_arr, 
FixedShapeTensorArray::FromTensor(tensor));
+  ASSERT_OK_AND_ASSIGN(auto ext_arr_partial,
+                       FixedShapeTensorArray::FromTensor(tensor_partial));
+  ASSERT_OK(ext_arr->ValidateFull());
+  ASSERT_OK(ext_arr_partial->ValidateFull());
+
+  auto sliced = 
internal::checked_pointer_cast<ExtensionArray>(ext_arr->Slice(0, 2));
+  auto partial = 
internal::checked_pointer_cast<ExtensionArray>(ext_arr_partial);
+
+  ASSERT_TRUE(sliced->Equals(*partial));
+  ASSERT_OK(sliced->ValidateFull());
+  ASSERT_OK(partial->ValidateFull());
+  ASSERT_TRUE(sliced->storage()->Equals(*partial->storage()));
+  ASSERT_EQ(sliced->length(), partial->length());
+}
+
+void CheckSerializationRoundtrip(const std::shared_ptr<DataType>& ext_type) {
+  auto fst_type = 
internal::checked_pointer_cast<FixedShapeTensorType>(ext_type);
+  auto serialized = fst_type->Serialize();
+  ASSERT_OK_AND_ASSIGN(auto deserialized,
+                       fst_type->Deserialize(fst_type->storage_type(), 
serialized));
+  ASSERT_TRUE(fst_type->Equals(*deserialized));
+}
+
+void CheckDeserializationRaises(const std::shared_ptr<DataType>& storage_type,
+                                const std::string& serialized,
+                                const std::string& expected_message) {
+  auto fst_type = internal::checked_pointer_cast<FixedShapeTensorType>(
+      fixed_shape_tensor(int64(), {3, 4}));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, 
testing::HasSubstr(expected_message),
+                                  fst_type->Deserialize(storage_type, 
serialized));
+}
+
+TEST_F(TestExtensionType, MetadataSerializationRoundtrip) {
+  CheckSerializationRoundtrip(ext_type_);
+  CheckSerializationRoundtrip(fixed_shape_tensor(value_type_, {}, {}, {}));
+  CheckSerializationRoundtrip(fixed_shape_tensor(value_type_, {0}, {}, {}));
+  CheckSerializationRoundtrip(fixed_shape_tensor(value_type_, {1}, {0}, 
{"x"}));
+  CheckSerializationRoundtrip(
+      fixed_shape_tensor(value_type_, {256, 256, 3}, {0, 1, 2}, {"H", "W", 
"C"}));
+  CheckSerializationRoundtrip(
+      fixed_shape_tensor(value_type_, {256, 256, 3}, {2, 0, 1}, {"C", "H", 
"W"}));
+
+  auto storage_type = fixed_size_list(int64(), 12);
+  CheckDeserializationRaises(boolean(), R"({"shape":[3,4]})",
+                             "Expected FixedSizeList storage type, got bool");
+  CheckDeserializationRaises(storage_type, R"({"dim_names":["x","y"]})",
+                             "Invalid serialized JSON data");
+  CheckDeserializationRaises(storage_type, R"({"shape":(3,4)})",
+                             "Invalid serialized JSON data");
+  CheckDeserializationRaises(storage_type, 
R"({"shape":[3,4],"permutation":[1,0,2]})",
+                             "Invalid permutation");
+  CheckDeserializationRaises(storage_type, 
R"({"shape":[3],"dim_names":["x","y"]})",
+                             "Invalid dim_names");
+}
+
+TEST_F(TestExtensionType, RoudtripBatch) {
+  auto exact_ext_type = 
internal::checked_pointer_cast<FixedShapeTensorType>(ext_type_);
+
+  std::vector<std::shared_ptr<Buffer>> buffers = {nullptr, 
Buffer::Wrap(values_)};
+  auto arr_data = std::make_shared<ArrayData>(value_type_, values_.size(), 
buffers, 0, 0);
+  auto arr = std::make_shared<Int64Array>(arr_data);
+  ASSERT_OK_AND_ASSIGN(auto fsla_arr, FixedSizeListArray::FromArrays(arr, 
cell_type_));
+  auto data = fsla_arr->data();
+  data->type = ext_type_;
+  auto ext_arr = exact_ext_type->MakeArray(data);
+
+  auto ext_metadata =
+      key_value_metadata({{"ARROW:extension:name", 
exact_ext_type->extension_name()},
+                          {"ARROW:extension:metadata", serialized_}});
+  auto ext_field = field("f0", exact_ext_type, true, ext_metadata);
+  auto batch = RecordBatch::Make(schema({ext_field}), ext_arr->length(), 
{ext_arr});
+  std::shared_ptr<RecordBatch> read_batch;
+  RoundtripBatch(batch, &read_batch);
+  CompareBatch(*batch, *read_batch, /*compare_metadata=*/true);
+}
+
+TEST_F(TestExtensionType, RoudtripBatchFromTensor) {
+  auto exact_ext_type = 
internal::checked_pointer_cast<FixedShapeTensorType>(ext_type_);
+  ASSERT_OK_AND_ASSIGN(auto tensor, Tensor::Make(value_type_, 
Buffer::Wrap(values_),
+                                                 shape_, {}, {"n", "x", "y"}));
+  ASSERT_OK_AND_ASSIGN(auto ext_arr, 
FixedShapeTensorArray::FromTensor(tensor));
+  ext_arr->data()->type = exact_ext_type;
+
+  auto ext_metadata =
+      key_value_metadata({{"ARROW:extension:name", 
ext_type_->extension_name()},
+                          {"ARROW:extension:metadata", serialized_}});
+  auto ext_field = field("f0", ext_type_, true, ext_metadata);
+  auto batch = RecordBatch::Make(schema({ext_field}), ext_arr->length(), 
{ext_arr});
+  std::shared_ptr<RecordBatch> read_batch;
+  RoundtripBatch(batch, &read_batch);
+  CompareBatch(*batch, *read_batch, /*compare_metadata=*/true);
+}
+
+TEST_F(TestExtensionType, ComputeStrides) {
+  auto exact_ext_type = 
internal::checked_pointer_cast<FixedShapeTensorType>(ext_type_);
+
+  auto ext_type_1 = internal::checked_pointer_cast<FixedShapeTensorType>(
+      fixed_shape_tensor(int64(), cell_shape_, {}, dim_names_));
+  auto ext_type_2 = internal::checked_pointer_cast<FixedShapeTensorType>(
+      fixed_shape_tensor(int64(), cell_shape_, {}, dim_names_));
+  auto ext_type_3 = internal::checked_pointer_cast<FixedShapeTensorType>(
+      fixed_shape_tensor(int32(), cell_shape_, {}, dim_names_));
+  ASSERT_TRUE(ext_type_1->Equals(*ext_type_2));
+  ASSERT_FALSE(ext_type_1->Equals(*ext_type_3));
+
+  auto ext_type_4 = internal::checked_pointer_cast<FixedShapeTensorType>(
+      fixed_shape_tensor(int64(), {3, 4, 7}, {}, {"x", "y", "z"}));
+  ASSERT_EQ(ext_type_4->strides(), (std::vector<int64_t>{224, 56, 8}));
+  ext_type_4 = internal::checked_pointer_cast<FixedShapeTensorType>(
+      fixed_shape_tensor(int64(), {3, 4, 7}, {0, 1, 2}, {"x", "y", "z"}));
+  ASSERT_EQ(ext_type_4->strides(), (std::vector<int64_t>{224, 56, 8}));
+
+  auto ext_type_5 = internal::checked_pointer_cast<FixedShapeTensorType>(
+      fixed_shape_tensor(int64(), {3, 4, 7}, {1, 0, 2}));
+  ASSERT_EQ(ext_type_5->strides(), (std::vector<int64_t>{168, 56, 8}));
+  ASSERT_EQ(ext_type_5->Serialize(), 
R"({"shape":[3,4,7],"permutation":[1,0,2]})");
+
+  auto ext_type_6 = internal::checked_pointer_cast<FixedShapeTensorType>(
+      fixed_shape_tensor(int64(), {3, 4, 7}, {1, 2, 0}, {}));
+  ASSERT_EQ(ext_type_6->strides(), (std::vector<int64_t>{168, 24, 8}));

Review Comment:
   Can you explain why we get this strides? (just to help my understanding why 
this is the expected result. I don't directly see it)
   (those "look" like row major strides, since they are consistently 
decreasing, but I expected non-row-major strides given the permutation)



##########
cpp/src/arrow/extension/fixed_shape_tensor.h:
##########
@@ -0,0 +1,121 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <numeric>
+#include <sstream>
+
+#include "arrow/extension_type.h"
+
+namespace arrow {
+namespace extension {
+
+const std::shared_ptr<DataType> GetStorageType(
+    const std::shared_ptr<DataType>& value_type, const std::vector<int64_t>& 
shape);
+
+class ARROW_EXPORT FixedShapeTensorArray : public ExtensionArray {
+ public:
+  using ExtensionArray::ExtensionArray;
+
+  /// \brief Create a FixedShapeTensorArray from a Tensor
+  ///
+  /// This function will create a FixedShapeTensorArray from a Tensor, taking 
its
+  /// first dimension as the "element dimension" and the remaining dimensions 
as the

Review Comment:
   I don't find "element dimension" super clear (the "element" could also be 
the individual elements, i.e. tensors, of the array?) 
   Maybe something like "taking its first dimension as the number of elements 
in the resulting array and the remaining dimensions as the shape of the 
individual tensors" ?



##########
cpp/src/arrow/extension/fixed_shape_tensor.cc:
##########
@@ -0,0 +1,323 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/extension/fixed_shape_tensor.h"
+
+#include "arrow/array/array_nested.h"
+#include "arrow/array/array_primitive.h"
+#include "arrow/json/rapidjson_defs.h"  // IWYU pragma: keep
+#include "arrow/tensor.h"
+#include "arrow/util/int_util_overflow.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/sort.h"
+
+#include <rapidjson/document.h>
+#include <rapidjson/writer.h>
+
+namespace rj = arrow::rapidjson;
+
+namespace arrow {
+namespace extension {
+
+bool FixedShapeTensorType::ExtensionEquals(const ExtensionType& other) const {
+  if (extension_name() != other.extension_name()) {
+    return false;
+  }
+  const auto& other_ext = static_cast<const FixedShapeTensorType&>(other);
+
+  auto is_permutation_trivial = [](const std::vector<int64_t>& permutation) {
+    for (size_t i = 1; i < permutation.size(); ++i) {
+      if (permutation[i - 1] + 1 != permutation[i]) {
+        return false;
+      }
+    }
+    return true;
+  };
+  const bool permutation_equivalent =
+      ((permutation_ == other_ext.permutation()) ||
+       (permutation_.empty() && 
is_permutation_trivial(other_ext.permutation())) ||
+       (is_permutation_trivial(permutation_) && 
other_ext.permutation().empty()));
+
+  return (storage_type()->Equals(other_ext.storage_type())) &&
+         (this->shape() == other_ext.shape()) && (dim_names_ == 
other_ext.dim_names()) &&
+         permutation_equivalent;
+}
+
+std::string FixedShapeTensorType::Serialize() const {
+  rj::Document document;
+  document.SetObject();
+  rj::Document::AllocatorType& allocator = document.GetAllocator();
+
+  rj::Value shape(rj::kArrayType);
+  for (auto v : shape_) {
+    shape.PushBack(v, allocator);
+  }
+  document.AddMember(rj::Value("shape", allocator), shape, allocator);
+
+  if (!permutation_.empty()) {
+    rj::Value permutation(rj::kArrayType);
+    for (auto v : permutation_) {
+      permutation.PushBack(v, allocator);
+    }
+    document.AddMember(rj::Value("permutation", allocator), permutation, 
allocator);
+  }
+
+  if (!dim_names_.empty()) {
+    rj::Value dim_names(rj::kArrayType);
+    for (std::string v : dim_names_) {
+      dim_names.PushBack(rj::Value{}.SetString(v.c_str(), allocator), 
allocator);
+    }
+    document.AddMember(rj::Value("dim_names", allocator), dim_names, 
allocator);
+  }
+
+  rj::StringBuffer buffer;
+  rj::Writer<rj::StringBuffer> writer(buffer);
+  document.Accept(writer);
+  return buffer.GetString();
+}
+
+Result<std::shared_ptr<DataType>> FixedShapeTensorType::Deserialize(
+    std::shared_ptr<DataType> storage_type, const std::string& 
serialized_data) const {
+  if (storage_type->id() != Type::FIXED_SIZE_LIST) {
+    return Status::Invalid("Expected FixedSizeList storage type, got ",
+                           storage_type->ToString());
+  }
+  auto value_type =
+      
internal::checked_pointer_cast<FixedSizeListType>(storage_type)->value_type();
+  rj::Document document;
+  if (document.Parse(serialized_data.data(), 
serialized_data.length()).HasParseError() ||
+      !document.HasMember("shape") || !document["shape"].IsArray()) {
+    return Status::Invalid("Invalid serialized JSON data: ", serialized_data);
+  }
+
+  std::vector<int64_t> shape;
+  for (auto& x : document["shape"].GetArray()) {
+    shape.emplace_back(x.GetInt64());
+  }
+  std::vector<int64_t> permutation;
+  if (document.HasMember("permutation")) {
+    for (auto& x : document["permutation"].GetArray()) {
+      permutation.emplace_back(x.GetInt64());
+    }
+    if (shape.size() != permutation.size()) {
+      return Status::Invalid("Invalid permutation");
+    }
+  }
+  std::vector<std::string> dim_names;
+  if (document.HasMember("dim_names")) {
+    for (auto& x : document["dim_names"].GetArray()) {
+      dim_names.emplace_back(x.GetString());
+    }
+    if (shape.size() != dim_names.size()) {
+      return Status::Invalid("Invalid dim_names");
+    }
+  }
+
+  return fixed_shape_tensor(value_type, shape, permutation, dim_names);
+}
+
+std::shared_ptr<Array> FixedShapeTensorType::MakeArray(
+    std::shared_ptr<ArrayData> data) const {
+  return std::make_shared<ExtensionArray>(data);
+}
+
+Result<std::shared_ptr<Array>> FixedShapeTensorArray::FromTensor(
+    const std::shared_ptr<Tensor>& tensor) {
+  auto cell_shape = tensor->shape();
+  cell_shape.erase(cell_shape.begin());
+
+  std::vector<std::string> dim_names;
+  for (size_t i = 1; i < tensor->dim_names().size(); ++i) {
+    dim_names.emplace_back(tensor->dim_names()[i]);
+  }
+
+  auto permutation = internal::ArgSort(tensor->strides());
+  std::reverse(permutation.begin(), permutation.end());
+  if (permutation[0] != 0) {
+    return Status::Invalid(
+        "Only first-major tensors can be zero-copy converted to arrays");
+  }
+  permutation.erase(permutation.begin());
+  for (auto& x : permutation) {
+    x--;
+  }
+
+  auto ext_type = internal::checked_pointer_cast<ExtensionType>(
+      fixed_shape_tensor(tensor->type(), cell_shape, permutation, dim_names));
+
+  std::shared_ptr<FixedSizeListArray> arr;
+  std::shared_ptr<Array> value_array;
+  switch (tensor->type_id()) {
+    case Type::UINT8: {
+      value_array = std::make_shared<UInt8Array>(tensor->size(), 
tensor->data());
+      break;
+    }
+    case Type::INT8: {
+      value_array = std::make_shared<Int8Array>(tensor->size(), 
tensor->data());
+      break;
+    }
+    case Type::UINT16: {
+      value_array = std::make_shared<UInt16Array>(tensor->size(), 
tensor->data());
+      break;
+    }
+    case Type::INT16: {
+      value_array = std::make_shared<Int16Array>(tensor->size(), 
tensor->data());
+      break;
+    }
+    case Type::UINT32: {
+      value_array = std::make_shared<UInt32Array>(tensor->size(), 
tensor->data());
+      break;
+    }
+    case Type::INT32: {
+      value_array = std::make_shared<Int32Array>(tensor->size(), 
tensor->data());
+      break;
+    }
+    case Type::UINT64: {
+      value_array = std::make_shared<Int64Array>(tensor->size(), 
tensor->data());
+      break;
+    }
+    case Type::INT64: {
+      value_array = std::make_shared<Int64Array>(tensor->size(), 
tensor->data());
+      break;
+    }
+    case Type::HALF_FLOAT: {
+      value_array = std::make_shared<HalfFloatArray>(tensor->size(), 
tensor->data());
+      break;
+    }
+    case Type::FLOAT: {
+      value_array = std::make_shared<FloatArray>(tensor->size(), 
tensor->data());
+      break;
+    }
+    case Type::DOUBLE: {
+      value_array = std::make_shared<DoubleArray>(tensor->size(), 
tensor->data());
+      break;
+    }
+    default: {
+      return Status::NotImplemented("Unsupported tensor type: ",
+                                    tensor->type()->ToString());
+    }
+  }
+  arr = std::make_shared<FixedSizeListArray>(ext_type->storage_type(), 
tensor->shape()[0],
+                                             value_array);
+  auto ext_data = arr->data();
+  ext_data->type = ext_type;
+  return ext_type->MakeArray(ext_data);
+}
+
+Result<std::shared_ptr<Tensor>> FixedShapeTensorType::ToTensor(
+    std::shared_ptr<Array> arr) {
+  // To convert an array of n dimensional tensors to a n+1 dimensional tensor 
we
+  // interpret the array's length as the first dimension the new tensor. 
Further, we
+  // define n+1 dimensional tensor's strides by front appending a new stride 
to the n
+  // dimensional tensor's strides.
+
+  ARROW_RETURN_IF(arr->null_count() > 0,
+                  Status::Invalid("Null values not supported in tensors."));

Review Comment:
   ```suggestion
     if (arr->null_count() > 0) {
       return Status::Invalid("Null values not supported in tensors.");
     }
   ```
   
   Very much a nitpick, but given that this is only one line longer, and that 
you use this pattern below as well (good to be consistent), I find this 
explicit version a bit easier to read compared to the macro.



##########
cpp/src/arrow/extension/fixed_shape_tensor.h:
##########
@@ -0,0 +1,121 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <numeric>
+#include <sstream>
+
+#include "arrow/extension_type.h"
+
+namespace arrow {
+namespace extension {
+
+const std::shared_ptr<DataType> GetStorageType(
+    const std::shared_ptr<DataType>& value_type, const std::vector<int64_t>& 
shape);
+
+class ARROW_EXPORT FixedShapeTensorArray : public ExtensionArray {
+ public:
+  using ExtensionArray::ExtensionArray;
+
+  /// \brief Create a FixedShapeTensorArray from a Tensor
+  ///
+  /// This function will create a FixedShapeTensorArray from a Tensor, taking 
its
+  /// first dimension as the "element dimension" and the remaining dimensions 
as the
+  /// "tensor dimensions". If Tensor provides strides, they will be used to 
determine
+  /// dimension permutation. Otherwise, row-major permutation will be assumed.
+  ///
+  /// \param[in] tensor The Tensor to convert to a FixedShapeTensorArray
+  static Result<std::shared_ptr<Array>> FromTensor(const 
std::shared_ptr<Tensor>& tensor);
+};
+
+/// \brief Concrete type class for constant-size Tensor data.
+class ARROW_EXPORT FixedShapeTensorType : public ExtensionType {
+ public:
+  FixedShapeTensorType(const std::shared_ptr<DataType>& value_type,
+                       const std::vector<int64_t>& shape,
+                       const std::vector<int64_t>& permutation = {},
+                       const std::vector<std::string>& dim_names = {})
+      : ExtensionType(GetStorageType(value_type, shape)),
+        value_type_(value_type),
+        shape_(shape),
+        permutation_(permutation),
+        dim_names_(dim_names) {}
+
+  std::string extension_name() const override { return 
"arrow.fixed_shape_tensor"; }
+
+  /// Number of dimensions of tensor elements
+  size_t ndim() { return shape_.size(); }
+
+  /// Shape of tensor elements
+  const std::vector<int64_t>& shape() const { return shape_; }
+
+  /// Strides of tensor elements. Strides state offset in bytes between 
adjacent
+  /// elements along each dimension. In case permutation is non-empty strides 
are
+  /// computed from permuted tensor element's shape.
+  const std::vector<int64_t>& strides();
+
+  /// Permutation mapping from logical to physical memory layout of tensor 
elements
+  const std::vector<int64_t>& permutation() const { return permutation_; }
+
+  /// Dimension names of tensor elements. Dimensions are ordered physically.
+  const std::vector<std::string>& dim_names() const { return dim_names_; }
+
+  bool ExtensionEquals(const ExtensionType& other) const override;
+
+  std::string Serialize() const override;
+
+  Result<std::shared_ptr<DataType>> Deserialize(
+      std::shared_ptr<DataType> storage_type,
+      const std::string& serialized_data) const override;
+
+  /// Create a FixedShapeTensorArray from ArrayData
+  std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const 
override;
+
+  /// \brief Create a Tensor from FixedShapeTensorArray
+  ///
+  /// This function will create a Tensor from a FixedShapeTensorArray, setting 
its
+  /// first dimension as length equal to the FixedShapeTensorArray's length 
and the
+  /// remaining dimensions as the FixedShapeTensorType's element shape.
+  ///
+  /// \param[in] arr The FixedShapeTensorArray to convert to a Tensor
+  Result<std::shared_ptr<Tensor>> ToTensor(std::shared_ptr<Array> arr);

Review Comment:
   Since this is a method to convert an array to tensor, should we move this to 
the Array class?



##########
cpp/src/arrow/extension/fixed_shape_tensor.h:
##########
@@ -0,0 +1,121 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <numeric>
+#include <sstream>
+
+#include "arrow/extension_type.h"
+
+namespace arrow {
+namespace extension {
+
+const std::shared_ptr<DataType> GetStorageType(
+    const std::shared_ptr<DataType>& value_type, const std::vector<int64_t>& 
shape);
+
+class ARROW_EXPORT FixedShapeTensorArray : public ExtensionArray {
+ public:
+  using ExtensionArray::ExtensionArray;
+
+  /// \brief Create a FixedShapeTensorArray from a Tensor
+  ///
+  /// This function will create a FixedShapeTensorArray from a Tensor, taking 
its
+  /// first dimension as the "element dimension" and the remaining dimensions 
as the
+  /// "tensor dimensions". If Tensor provides strides, they will be used to 
determine
+  /// dimension permutation. Otherwise, row-major permutation will be assumed.
+  ///
+  /// \param[in] tensor The Tensor to convert to a FixedShapeTensorArray
+  static Result<std::shared_ptr<Array>> FromTensor(const 
std::shared_ptr<Tensor>& tensor);
+};
+
+/// \brief Concrete type class for constant-size Tensor data.
+class ARROW_EXPORT FixedShapeTensorType : public ExtensionType {
+ public:
+  FixedShapeTensorType(const std::shared_ptr<DataType>& value_type,
+                       const std::vector<int64_t>& shape,
+                       const std::vector<int64_t>& permutation = {},
+                       const std::vector<std::string>& dim_names = {})
+      : ExtensionType(GetStorageType(value_type, shape)),
+        value_type_(value_type),
+        shape_(shape),
+        permutation_(permutation),
+        dim_names_(dim_names) {}
+
+  std::string extension_name() const override { return 
"arrow.fixed_shape_tensor"; }
+
+  /// Number of dimensions of tensor elements
+  size_t ndim() { return shape_.size(); }
+
+  /// Shape of tensor elements
+  const std::vector<int64_t>& shape() const { return shape_; }
+
+  /// Strides of tensor elements. Strides state offset in bytes between 
adjacent
+  /// elements along each dimension. In case permutation is non-empty strides 
are
+  /// computed from permuted tensor element's shape.
+  const std::vector<int64_t>& strides();
+
+  /// Permutation mapping from logical to physical memory layout of tensor 
elements
+  const std::vector<int64_t>& permutation() const { return permutation_; }
+
+  /// Dimension names of tensor elements. Dimensions are ordered physically.
+  const std::vector<std::string>& dim_names() const { return dim_names_; }
+
+  bool ExtensionEquals(const ExtensionType& other) const override;
+
+  std::string Serialize() const override;
+
+  Result<std::shared_ptr<DataType>> Deserialize(
+      std::shared_ptr<DataType> storage_type,
+      const std::string& serialized_data) const override;
+
+  /// Create a FixedShapeTensorArray from ArrayData
+  std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const 
override;
+
+  /// \brief Create a Tensor from FixedShapeTensorArray
+  ///
+  /// This function will create a Tensor from a FixedShapeTensorArray, setting 
its
+  /// first dimension as length equal to the FixedShapeTensorArray's length 
and the
+  /// remaining dimensions as the FixedShapeTensorType's element shape.
+  ///
+  /// \param[in] arr The FixedShapeTensorArray to convert to a Tensor
+  Result<std::shared_ptr<Tensor>> ToTensor(std::shared_ptr<Array> arr);
+
+  /// \brief Create a FixedShapeTensorType instance
+  static Result<std::shared_ptr<DataType>> Make(
+      const std::shared_ptr<DataType>& value_type, const std::vector<int64_t>& 
shape,
+      const std::vector<int64_t>& permutation = {},
+      const std::vector<std::string>& dim_names = {});
+
+  /// \brief Compute strides of FixedShapeTensorType
+  static Result<std::vector<int64_t>> ComputeStrides(const 
FixedShapeTensorType& type);

Review Comment:
   Given that this is exposed as `strides()` (and the result of that is 
cached), is there a reason to expose this publicly? (can be a private method on 
the class?)



##########
cpp/src/arrow/extension/fixed_shape_tensor_test.cc:
##########
@@ -0,0 +1,425 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/extension/fixed_shape_tensor.h"
+
+#include "arrow/testing/matchers.h"
+
+#include "arrow/array/array_nested.h"
+#include "arrow/array/array_primitive.h"
+#include "arrow/io/memory.h"
+#include "arrow/ipc/reader.h"
+#include "arrow/ipc/writer.h"
+#include "arrow/record_batch.h"
+#include "arrow/tensor.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/key_value_metadata.h"
+
+namespace arrow {
+
+using FixedShapeTensorType = extension::FixedShapeTensorType;
+using extension::fixed_shape_tensor;
+using extension::FixedShapeTensorArray;
+
+class TestExtensionType : public ::testing::Test {
+ public:
+  void SetUp() override {
+    shape_ = {3, 3, 4};
+    cell_shape_ = {3, 4};
+    value_type_ = int64();
+    cell_type_ = fixed_size_list(value_type_, 12);
+    dim_names_ = {"x", "y"};
+    ext_type_ = internal::checked_pointer_cast<ExtensionType>(
+        fixed_shape_tensor(value_type_, cell_shape_, {}, dim_names_));
+    values_ = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 
16, 17,
+               18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 
34, 35};
+    values_partial_ = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
+                       12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
+    shape_partial_ = {2, 3, 4};
+    tensor_strides_ = {96, 32, 8};
+    cell_strides_ = {32, 8};
+    serialized_ = R"({"shape":[3,4],"dim_names":["x","y"]})";
+  }
+
+ protected:
+  std::vector<int64_t> shape_;
+  std::vector<int64_t> shape_partial_;
+  std::vector<int64_t> cell_shape_;
+  std::shared_ptr<DataType> value_type_;
+  std::shared_ptr<DataType> cell_type_;
+  std::vector<std::string> dim_names_;
+  std::shared_ptr<ExtensionType> ext_type_;
+  std::vector<int64_t> values_;
+  std::vector<int64_t> values_partial_;
+  std::vector<int64_t> tensor_strides_;
+  std::vector<int64_t> cell_strides_;
+  std::string serialized_;
+};
+
+auto RoundtripBatch = [](const std::shared_ptr<RecordBatch>& batch,
+                         std::shared_ptr<RecordBatch>* out) {
+  ASSERT_OK_AND_ASSIGN(auto out_stream, io::BufferOutputStream::Create());
+  ASSERT_OK(ipc::WriteRecordBatchStream({batch}, 
ipc::IpcWriteOptions::Defaults(),
+                                        out_stream.get()));
+
+  ASSERT_OK_AND_ASSIGN(auto complete_ipc_stream, out_stream->Finish());
+
+  io::BufferReader reader(complete_ipc_stream);
+  std::shared_ptr<RecordBatchReader> batch_reader;
+  ASSERT_OK_AND_ASSIGN(batch_reader, 
ipc::RecordBatchStreamReader::Open(&reader));
+  ASSERT_OK(batch_reader->ReadNext(out));
+};
+
+TEST_F(TestExtensionType, CheckDummyRegistration) {
+  // We need a registered dummy type at runtime to allow for IPC 
deserialization
+  auto registered_type = GetExtensionType("arrow.fixed_shape_tensor");
+  ASSERT_TRUE(registered_type->type_id == Type::EXTENSION);
+}
+
+TEST_F(TestExtensionType, CreateExtensionType) {
+  auto exact_ext_type = 
internal::checked_pointer_cast<FixedShapeTensorType>(ext_type_);
+
+  // Test ExtensionType methods
+  ASSERT_EQ(ext_type_->extension_name(), "arrow.fixed_shape_tensor");
+  ASSERT_TRUE(ext_type_->Equals(*exact_ext_type));
+  ASSERT_FALSE(ext_type_->Equals(*cell_type_));
+  ASSERT_TRUE(ext_type_->storage_type()->Equals(*cell_type_));
+  ASSERT_EQ(ext_type_->Serialize(), serialized_);
+  ASSERT_OK_AND_ASSIGN(auto ds,
+                       ext_type_->Deserialize(ext_type_->storage_type(), 
serialized_));
+  auto deserialized = std::reinterpret_pointer_cast<ExtensionType>(ds);
+  ASSERT_TRUE(deserialized->Equals(*ext_type_));
+
+  // Test FixedShapeTensorType methods
+  ASSERT_EQ(exact_ext_type->id(), Type::EXTENSION);
+  ASSERT_EQ(exact_ext_type->ndim(), cell_shape_.size());
+  ASSERT_EQ(exact_ext_type->shape(), cell_shape_);
+  ASSERT_EQ(exact_ext_type->strides(), cell_strides_);
+  ASSERT_EQ(exact_ext_type->dim_names(), dim_names_);
+
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, testing::HasSubstr("Invalid: permutation size must match shape 
size."),
+      FixedShapeTensorType::Make(value_type_, cell_shape_, {0}));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, testing::HasSubstr("Invalid: dim_names size must match shape 
size."),
+      FixedShapeTensorType::Make(value_type_, cell_shape_, {}, {"x"}));
+}
+
+TEST_F(TestExtensionType, EqualsCases) {
+  auto ext_type_permutation_1 = fixed_shape_tensor(int64(), {3, 4}, {0, 1}, 
{"x", "y"});
+  auto ext_type_permutation_2 = fixed_shape_tensor(int64(), {3, 4}, {1, 0}, 
{"x", "y"});
+  auto ext_type_no_permutation = fixed_shape_tensor(int64(), {3, 4}, {}, {"x", 
"y"});
+
+  ASSERT_TRUE(ext_type_permutation_1->Equals(ext_type_permutation_1));
+
+  ASSERT_FALSE(fixed_shape_tensor(int32(), {3, 4}, {}, {"x", "y"})
+                   ->Equals(ext_type_no_permutation));
+  ASSERT_FALSE(fixed_shape_tensor(int64(), {2, 4}, {}, {"x", "y"})
+                   ->Equals(ext_type_no_permutation));
+  ASSERT_FALSE(fixed_shape_tensor(int64(), {3, 4}, {}, {"H", "W"})
+                   ->Equals(ext_type_no_permutation));
+
+  ASSERT_TRUE(ext_type_no_permutation->Equals(ext_type_permutation_1));
+  ASSERT_TRUE(ext_type_permutation_1->Equals(ext_type_no_permutation));
+  ASSERT_FALSE(ext_type_permutation_1->Equals(ext_type_permutation_2));
+  ASSERT_FALSE(ext_type_permutation_2->Equals(ext_type_permutation_1));
+}
+
+TEST_F(TestExtensionType, CreateFromArray) {
+  auto exact_ext_type = 
internal::checked_pointer_cast<FixedShapeTensorType>(ext_type_);
+
+  std::vector<std::shared_ptr<Buffer>> buffers = {nullptr, 
Buffer::Wrap(values_)};
+  auto arr_data = std::make_shared<ArrayData>(value_type_, values_.size(), 
buffers, 0, 0);
+  auto arr = std::make_shared<Int64Array>(arr_data);
+  ASSERT_OK_AND_ASSIGN(auto fsla_arr, FixedSizeListArray::FromArrays(arr, 
cell_type_));
+  auto data = fsla_arr->data();
+  data->type = ext_type_;
+  auto ext_arr = exact_ext_type->MakeArray(data);
+  ASSERT_EQ(ext_arr->length(), shape_[0]);
+  ASSERT_EQ(ext_arr->null_count(), 0);
+}
+
+TEST_F(TestExtensionType, CreateFromTensor) {
+  std::vector<int64_t> column_major_strides = {8, 24, 72};
+  std::vector<int64_t> neither_major_strides = {96, 8, 32};
+
+  ASSERT_OK_AND_ASSIGN(auto tensor,
+                       Tensor::Make(value_type_, Buffer::Wrap(values_), 
shape_));
+
+  auto exact_ext_type = 
internal::checked_pointer_cast<FixedShapeTensorType>(ext_type_);
+  ASSERT_OK_AND_ASSIGN(auto ext_arr, 
FixedShapeTensorArray::FromTensor(tensor));
+
+  ASSERT_OK(ext_arr->ValidateFull());
+  ASSERT_TRUE(tensor->is_row_major());
+  ASSERT_EQ(tensor->strides(), tensor_strides_);
+  ASSERT_EQ(ext_arr->length(), shape_[0]);
+
+  auto ext_type_2 = internal::checked_pointer_cast<FixedShapeTensorType>(
+      fixed_shape_tensor(int64(), {3, 4}, {0, 1}));
+  ASSERT_OK_AND_ASSIGN(auto ext_arr_2, 
FixedShapeTensorArray::FromTensor(tensor));
+
+  ASSERT_OK_AND_ASSIGN(
+      auto column_major_tensor,
+      Tensor::Make(value_type_, Buffer::Wrap(values_), shape_, 
column_major_strides));
+  auto ext_type_3 = internal::checked_pointer_cast<FixedShapeTensorType>(
+      fixed_shape_tensor(int64(), {3, 4}, {0, 1}));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      testing::HasSubstr(
+          "Invalid: Only first-major tensors can be zero-copy converted to 
arrays"),
+      FixedShapeTensorArray::FromTensor(column_major_tensor));
+  ASSERT_THAT(FixedShapeTensorArray::FromTensor(column_major_tensor),
+              Raises(StatusCode::Invalid));
+
+  auto neither_major_tensor = std::make_shared<Tensor>(value_type_, 
Buffer::Wrap(values_),
+                                                       shape_, 
neither_major_strides);
+  auto ext_type_4 = internal::checked_pointer_cast<FixedShapeTensorType>(
+      fixed_shape_tensor(int64(), {3, 4}, {1, 0}));
+  ASSERT_OK_AND_ASSIGN(auto ext_arr_4,
+                       
FixedShapeTensorArray::FromTensor(neither_major_tensor));
+
+  auto ext_type_5 = internal::checked_pointer_cast<FixedShapeTensorType>(
+      fixed_shape_tensor(binary(), {1, 2}));
+  auto arr = ArrayFromJSON(binary(), R"(["abc", "def"])");
+
+  ASSERT_OK_AND_ASSIGN(auto fsla_arr,
+                       FixedSizeListArray::FromArrays(arr, 
fixed_size_list(binary(), 1)));
+  auto data = fsla_arr->data();
+  data->type = ext_type_5;
+  auto ext_arr_5 = ext_type_5->MakeArray(data);
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, testing::HasSubstr("binary is not valid data type for a 
tensor"),
+      exact_ext_type->ToTensor(ext_arr_5));
+
+  auto ext_type_6 = internal::checked_pointer_cast<FixedShapeTensorType>(
+      fixed_shape_tensor(int64(), {1, 2}));
+  auto arr_with_null = ArrayFromJSON(int64(), "[0, null]");
+  ASSERT_OK_AND_ASSIGN(auto fsla_arr_6, FixedSizeListArray::FromArrays(
+                                            arr_with_null, 
fixed_size_list(int64(), 1)));
+  auto data6 = fsla_arr_6->data();
+  data6->type = ext_type_6;
+  data6->null_count = 1;
+
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, testing::HasSubstr("Null values not supported in tensors."),
+      ext_type_6->ToTensor(ext_type_6->MakeArray(data6)));
+}
+
+void CheckTensorRoundtrip(const std::shared_ptr<Tensor>& tensor,
+                          std::shared_ptr<DataType> expected_ext_type) {
+  auto ext_type = 
internal::checked_pointer_cast<FixedShapeTensorType>(expected_ext_type);
+  ASSERT_OK_AND_ASSIGN(auto ext_arr, 
FixedShapeTensorArray::FromTensor(tensor));
+  auto generated_ext_type =
+      internal::checked_pointer_cast<FixedShapeTensorType>(ext_arr->type());
+
+  // Check that generated type is equal to the expected type
+  ASSERT_EQ(generated_ext_type->type_name(), ext_type->type_name());
+  ASSERT_EQ(generated_ext_type->shape(), ext_type->shape());
+  ASSERT_EQ(generated_ext_type->dim_names(), ext_type->dim_names());
+  ASSERT_EQ(generated_ext_type->permutation(), ext_type->permutation());
+  
ASSERT_TRUE(generated_ext_type->storage_type()->Equals(*ext_type->storage_type()));
+  ASSERT_TRUE(generated_ext_type->Equals(ext_type));
+
+  // Check Tensor roundtrip
+  ASSERT_OK_AND_ASSIGN(auto tensor_from_array, 
generated_ext_type->ToTensor(ext_arr));
+  ASSERT_EQ(tensor->type(), tensor_from_array->type());
+  ASSERT_EQ(tensor->shape(), tensor_from_array->shape());
+  for (size_t i = 1; i < tensor->dim_names().size(); i++) {
+    ASSERT_EQ(tensor->dim_names()[i], tensor_from_array->dim_names()[i]);
+  }
+  ASSERT_EQ(tensor->strides(), tensor_from_array->strides());
+  ASSERT_TRUE(tensor->data()->Equals(*tensor_from_array->data()));
+  ASSERT_TRUE(tensor->Equals(*tensor_from_array));
+}
+
+TEST_F(TestExtensionType, RoundtripTensor) {
+  auto values = Buffer::Wrap(values_);
+  ASSERT_OK_AND_ASSIGN(auto tensor1, Tensor::Make(value_type_, values, {3, 3, 
4},
+                                                  {96, 32, 8}, {"", "y", 
"z"}));
+  ASSERT_OK_AND_ASSIGN(auto tensor2,
+                       Tensor::Make(value_type_, values, {3, 3, 4}, {96, 8, 
24}));
+  ASSERT_OK_AND_ASSIGN(auto tensor3,
+                       Tensor::Make(value_type_, values, {3, 4, 3}, {96, 24, 
8}));
+  ASSERT_OK_AND_ASSIGN(auto tensor4,
+                       Tensor::Make(value_type_, values, {3, 4, 3}, {96, 8, 
32}));
+  ASSERT_OK_AND_ASSIGN(auto tensor5,
+                       Tensor::Make(value_type_, values, {6, 2, 3}, {48, 24, 
8}));
+  ASSERT_OK_AND_ASSIGN(auto tensor6,
+                       Tensor::Make(value_type_, values, {6, 2, 3}, {48, 8, 
16}));
+  ASSERT_OK_AND_ASSIGN(auto tensor7,
+                       Tensor::Make(value_type_, values, {2, 3, 6}, {144, 48, 
8}));
+  ASSERT_OK_AND_ASSIGN(auto tensor8,
+                       Tensor::Make(value_type_, values, {2, 3, 6}, {144, 8, 
24}));
+  ASSERT_OK_AND_ASSIGN(auto tensor9,
+                       Tensor::Make(value_type_, values, {2, 3, 2, 3}, {144, 
48, 24, 8}));
+  ASSERT_OK_AND_ASSIGN(auto tensor10,
+                       Tensor::Make(value_type_, values, {2, 3, 2, 3}, {144, 
8, 24, 48}));
+
+  CheckTensorRoundtrip(tensor1,
+                       fixed_shape_tensor(value_type_, {3, 4}, {0, 1}, {"y", 
"z"}));
+  CheckTensorRoundtrip(tensor2, fixed_shape_tensor(value_type_, {3, 4}, {1, 
0}, {}));

Review Comment:
   I think I commented on this line before (but didn't find an answer to it / 
change for it), but shouldn't the shape of the expected fixed_shape_tensor type 
be {4, 3} ? 
   The tensor itself has shape {3, 3, 4} (so {3, 4} for the individual tensor 
element), but has a permutation, so {4, 3} ?



##########
cpp/src/arrow/CMakeLists.txt:
##########
@@ -541,8 +541,10 @@ if(ARROW_IPC)
 endif()
 
 if(ARROW_JSON)
+  add_definitions(-DARROW_WITH_JSON)

Review Comment:
   No cmake expert here (and sorry for only asking now), so just wondering: can 
this also be "ARROW_JSON"? I see that for other cases, we reuse the same name 
as the cmake option



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [arrow] jorisvandenbossche commented on a diff in pull request #8510: GH-15483: [C++] Add a Fixed Shape Tensor canonical ExtensionType

Reply via email to