This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 2328b6ee39 GH-15058: [C++][Python] Native support for UUID (#37298)
2328b6ee39 is described below

commit 2328b6ee39b497d9f48e6d342db9f7d0c34d9791
Author: Rok Mihevc <[email protected]>
AuthorDate: Mon Aug 26 16:34:18 2024 +0200

    GH-15058: [C++][Python] Native support for UUID (#37298)
    
    ### Rationale for this change
    
    See #15058.
    UUID datatype is common in throughout the ecosystem and Arrow as supporting 
it as a native type would reduce friction.
    
    ### What changes are included in this PR?
    
    This PR implements logic for Arrow canonical extension type in C++ and a 
Python wrapper.
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    Yes, new extension type is added.
    * Closes: #15058
    
    Authored-by: Rok Mihevc <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/CMakeLists.txt                       |   3 +-
 cpp/src/arrow/acero/hash_join_node_test.cc         |   1 +
 cpp/src/arrow/extension/CMakeLists.txt             |   2 +-
 cpp/src/arrow/extension/fixed_shape_tensor_test.cc |  17 +---
 cpp/src/arrow/extension/uuid.cc                    |  58 ++++++++++++
 cpp/src/arrow/extension/uuid.h                     |  61 ++++++++++++
 cpp/src/arrow/extension/uuid_test.cc               |  72 ++++++++++++++
 cpp/src/arrow/extension_type.cc                    |   4 +-
 cpp/src/arrow/extension_type_test.cc               |  19 +---
 cpp/src/arrow/integration/json_integration_test.cc |   2 +-
 cpp/src/arrow/ipc/test_common.cc                   |  35 +++++--
 cpp/src/arrow/ipc/test_common.h                    |   3 +
 cpp/src/arrow/scalar_test.cc                       |   5 +-
 cpp/src/arrow/testing/extension_type.h             |   6 +-
 cpp/src/arrow/testing/gtest_util.cc                |  16 ++--
 dev/archery/archery/integration/datagen.py         |   2 +-
 docs/source/format/CanonicalExtensions.rst         |   2 +
 docs/source/status.rst                             |   2 +-
 python/pyarrow/__init__.py                         |  18 ++--
 python/pyarrow/array.pxi                           |   6 ++
 python/pyarrow/includes/libarrow.pxd               |  10 ++
 python/pyarrow/lib.pxd                             |   3 +
 python/pyarrow/public-api.pxi                      |  11 ++-
 python/pyarrow/scalar.pxi                          |  10 ++
 python/pyarrow/src/arrow/python/gdb.cc             |  27 +-----
 python/pyarrow/tests/extensions.pyx                |   2 +-
 python/pyarrow/tests/test_extension_type.py        | 105 ++++++++++++++-------
 python/pyarrow/tests/test_gdb.py                   |   8 +-
 python/pyarrow/types.pxi                           |  34 +++++++
 29 files changed, 412 insertions(+), 132 deletions(-)

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 89f28ee416..6b0ac8c23c 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -375,6 +375,7 @@ set(ARROW_SRCS
     device.cc
     extension_type.cc
     extension/bool8.cc
+    extension/uuid.cc
     pretty_print.cc
     record_batch.cc
     result.cc
@@ -1225,6 +1226,7 @@ add_subdirectory(testing)
 add_subdirectory(array)
 add_subdirectory(c)
 add_subdirectory(compute)
+add_subdirectory(extension)
 add_subdirectory(io)
 add_subdirectory(tensor)
 add_subdirectory(util)
@@ -1267,7 +1269,6 @@ endif()
 
 if(ARROW_JSON)
   add_subdirectory(json)
-  add_subdirectory(extension)
 endif()
 
 if(ARROW_ORC)
diff --git a/cpp/src/arrow/acero/hash_join_node_test.cc 
b/cpp/src/arrow/acero/hash_join_node_test.cc
index 9065e286a2..76ad9c7d65 100644
--- a/cpp/src/arrow/acero/hash_join_node_test.cc
+++ b/cpp/src/arrow/acero/hash_join_node_test.cc
@@ -29,6 +29,7 @@
 #include "arrow/compute/kernels/test_util.h"
 #include "arrow/compute/light_array_internal.h"
 #include "arrow/compute/row/row_encoder_internal.h"
+#include "arrow/extension/uuid.h"
 #include "arrow/testing/extension_type.h"
 #include "arrow/testing/generator.h"
 #include "arrow/testing/gtest_util.h"
diff --git a/cpp/src/arrow/extension/CMakeLists.txt 
b/cpp/src/arrow/extension/CMakeLists.txt
index 5cb4bc77af..065ea3f1dd 100644
--- a/cpp/src/arrow/extension/CMakeLists.txt
+++ b/cpp/src/arrow/extension/CMakeLists.txt
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set(CANONICAL_EXTENSION_TESTS bool8_test.cc)
+set(CANONICAL_EXTENSION_TESTS bool8_test.cc uuid_test.cc)
 
 if(ARROW_JSON)
   list(APPEND CANONICAL_EXTENSION_TESTS fixed_shape_tensor_test.cc 
opaque_test.cc)
diff --git a/cpp/src/arrow/extension/fixed_shape_tensor_test.cc 
b/cpp/src/arrow/extension/fixed_shape_tensor_test.cc
index 3fd39a11ff..842a78e1a4 100644
--- a/cpp/src/arrow/extension/fixed_shape_tensor_test.cc
+++ b/cpp/src/arrow/extension/fixed_shape_tensor_test.cc
@@ -23,7 +23,7 @@
 #include "arrow/array/array_primitive.h"
 #include "arrow/io/memory.h"
 #include "arrow/ipc/reader.h"
-#include "arrow/ipc/writer.h"
+#include "arrow/ipc/test_common.h"
 #include "arrow/record_batch.h"
 #include "arrow/tensor.h"
 #include "arrow/testing/gtest_util.h"
@@ -33,6 +33,7 @@
 namespace arrow {
 
 using FixedShapeTensorType = extension::FixedShapeTensorType;
+using arrow::ipc::test::RoundtripBatch;
 using extension::fixed_shape_tensor;
 using extension::FixedShapeTensorArray;
 
@@ -71,20 +72,6 @@ class TestExtensionType : public ::testing::Test {
   std::string serialized_;
 };
 
-auto RoundtripBatch = [](const std::shared_ptr<RecordBatch>& batch,
-                         std::shared_ptr<RecordBatch>* out) {
-  ASSERT_OK_AND_ASSIGN(auto out_stream, io::BufferOutputStream::Create());
-  ASSERT_OK(ipc::WriteRecordBatchStream({batch}, 
ipc::IpcWriteOptions::Defaults(),
-                                        out_stream.get()));
-
-  ASSERT_OK_AND_ASSIGN(auto complete_ipc_stream, out_stream->Finish());
-
-  io::BufferReader reader(complete_ipc_stream);
-  std::shared_ptr<RecordBatchReader> batch_reader;
-  ASSERT_OK_AND_ASSIGN(batch_reader, 
ipc::RecordBatchStreamReader::Open(&reader));
-  ASSERT_OK(batch_reader->ReadNext(out));
-};
-
 TEST_F(TestExtensionType, CheckDummyRegistration) {
   // We need a registered dummy type at runtime to allow for IPC 
deserialization
   auto registered_type = GetExtensionType("arrow.fixed_shape_tensor");
diff --git a/cpp/src/arrow/extension/uuid.cc b/cpp/src/arrow/extension/uuid.cc
new file mode 100644
index 0000000000..43b917a17f
--- /dev/null
+++ b/cpp/src/arrow/extension/uuid.cc
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <sstream>
+
+#include "arrow/extension_type.h"
+#include "arrow/util/logging.h"
+
+#include "arrow/extension/uuid.h"
+
+namespace arrow::extension {
+
+bool UuidType::ExtensionEquals(const ExtensionType& other) const {
+  return (other.extension_name() == this->extension_name());
+}
+
+std::shared_ptr<Array> UuidType::MakeArray(std::shared_ptr<ArrayData> data) 
const {
+  DCHECK_EQ(data->type->id(), Type::EXTENSION);
+  DCHECK_EQ("arrow.uuid",
+            static_cast<const ExtensionType&>(*data->type).extension_name());
+  return std::make_shared<UuidArray>(data);
+}
+
+Result<std::shared_ptr<DataType>> UuidType::Deserialize(
+    std::shared_ptr<DataType> storage_type, const std::string& serialized) 
const {
+  if (!serialized.empty()) {
+    return Status::Invalid("Unexpected serialized metadata: '", serialized, 
"'");
+  }
+  if (!storage_type->Equals(*fixed_size_binary(16))) {
+    return Status::Invalid("Invalid storage type for UuidType: ",
+                           storage_type->ToString());
+  }
+  return std::make_shared<UuidType>();
+}
+
+std::string UuidType::ToString(bool show_metadata) const {
+  std::stringstream ss;
+  ss << "extension<" << this->extension_name() << ">";
+  return ss.str();
+}
+
+std::shared_ptr<DataType> uuid() { return std::make_shared<UuidType>(); }
+
+}  // namespace arrow::extension
diff --git a/cpp/src/arrow/extension/uuid.h b/cpp/src/arrow/extension/uuid.h
new file mode 100644
index 0000000000..42bb21cf0b
--- /dev/null
+++ b/cpp/src/arrow/extension/uuid.h
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/extension_type.h"
+
+namespace arrow::extension {
+
+/// \brief UuidArray stores array of UUIDs. Underlying storage type is
+/// FixedSizeBinary(16).
+class ARROW_EXPORT UuidArray : public ExtensionArray {
+ public:
+  using ExtensionArray::ExtensionArray;
+};
+
+/// \brief UuidType is a canonical arrow extension type for UUIDs.
+/// UUIDs are stored as FixedSizeBinary(16) with big-endian notation and this
+/// does not interpret the bytes in any way. Specific UUID version is not
+/// required or guaranteed.
+class ARROW_EXPORT UuidType : public ExtensionType {
+ public:
+  /// \brief Construct a UuidType.
+  UuidType() : ExtensionType(fixed_size_binary(16)) {}
+
+  std::string extension_name() const override { return "arrow.uuid"; }
+  std::string ToString(bool show_metadata = false) const override;
+
+  bool ExtensionEquals(const ExtensionType& other) const override;
+
+  /// Create a UuidArray from ArrayData
+  std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const 
override;
+
+  Result<std::shared_ptr<DataType>> Deserialize(
+      std::shared_ptr<DataType> storage_type,
+      const std::string& serialized) const override;
+
+  std::string Serialize() const override { return ""; }
+
+  /// \brief Create a UuidType instance
+  static Result<std::shared_ptr<DataType>> Make() { return 
std::make_shared<UuidType>(); }
+};
+
+/// \brief Return a UuidType instance.
+ARROW_EXPORT std::shared_ptr<DataType> uuid();
+
+}  // namespace arrow::extension
diff --git a/cpp/src/arrow/extension/uuid_test.cc 
b/cpp/src/arrow/extension/uuid_test.cc
new file mode 100644
index 0000000000..3bbb6eeb4a
--- /dev/null
+++ b/cpp/src/arrow/extension/uuid_test.cc
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/extension/uuid.h"
+
+#include "arrow/testing/matchers.h"
+
+#include "arrow/io/memory.h"
+#include "arrow/ipc/reader.h"
+#include "arrow/ipc/test_common.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/key_value_metadata.h"
+
+#include "arrow/testing/extension_type.h"
+
+namespace arrow {
+
+using arrow::ipc::test::RoundtripBatch;
+
+TEST(TestUuuidExtensionType, ExtensionTypeTest) {
+  auto type = uuid();
+  ASSERT_EQ(type->id(), Type::EXTENSION);
+
+  const auto& ext_type = static_cast<const ExtensionType&>(*type);
+  std::string serialized = ext_type.Serialize();
+
+  ASSERT_OK_AND_ASSIGN(auto deserialized,
+                       ext_type.Deserialize(fixed_size_binary(16), 
serialized));
+  ASSERT_TRUE(deserialized->Equals(*type));
+  ASSERT_FALSE(deserialized->Equals(*fixed_size_binary(16)));
+}
+
+TEST(TestUuuidExtensionType, RoundtripBatch) {
+  auto ext_type = extension::uuid();
+  auto exact_ext_type = 
internal::checked_pointer_cast<extension::UuidType>(ext_type);
+  auto arr = ArrayFromJSON(fixed_size_binary(16), R"(["abcdefghijklmnop", 
null])");
+  auto ext_arr = ExtensionType::WrapArray(ext_type, arr);
+
+  // Pass extension array, expect getting back extension array
+  std::shared_ptr<RecordBatch> read_batch;
+  auto ext_field = field(/*name=*/"f0", /*type=*/ext_type);
+  auto batch = RecordBatch::Make(schema({ext_field}), ext_arr->length(), 
{ext_arr});
+  RoundtripBatch(batch, &read_batch);
+  CompareBatch(*batch, *read_batch, /*compare_metadata=*/true);
+
+  // Pass extension metadata and storage array, expect getting back extension 
array
+  std::shared_ptr<RecordBatch> read_batch2;
+  auto ext_metadata =
+      key_value_metadata({{"ARROW:extension:name", 
exact_ext_type->extension_name()},
+                          {"ARROW:extension:metadata", ""}});
+  ext_field = field(/*name=*/"f0", /*type=*/exact_ext_type->storage_type(),
+                    /*nullable=*/true, /*metadata=*/ext_metadata);
+  auto batch2 = RecordBatch::Make(schema({ext_field}), arr->length(), {arr});
+  RoundtripBatch(batch2, &read_batch2);
+  CompareBatch(*batch, *read_batch2, /*compare_metadata=*/true);
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/extension_type.cc b/cpp/src/arrow/extension_type.cc
index 83c7ebed4f..fc220f73a6 100644
--- a/cpp/src/arrow/extension_type.cc
+++ b/cpp/src/arrow/extension_type.cc
@@ -32,6 +32,7 @@
 #include "arrow/extension/fixed_shape_tensor.h"
 #include "arrow/extension/opaque.h"
 #endif
+#include "arrow/extension/uuid.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
@@ -147,14 +148,13 @@ static void CreateGlobalRegistry() {
   // Register canonical extension types
 
   g_registry = std::make_shared<ExtensionTypeRegistryImpl>();
-  std::vector<std::shared_ptr<DataType>> ext_types{extension::bool8()};
+  std::vector<std::shared_ptr<DataType>> ext_types{extension::bool8(), 
extension::uuid()};
 
 #ifdef ARROW_JSON
   ext_types.push_back(extension::fixed_shape_tensor(int64(), {}));
   ext_types.push_back(extension::opaque(null(), "", ""));
 #endif
 
-  // Register canonical extension types
   for (const auto& ext_type : ext_types) {
     ARROW_CHECK_OK(
         
g_registry->RegisterType(checked_pointer_cast<ExtensionType>(ext_type)));
diff --git a/cpp/src/arrow/extension_type_test.cc 
b/cpp/src/arrow/extension_type_test.cc
index f104c984a6..f49ffc5cba 100644
--- a/cpp/src/arrow/extension_type_test.cc
+++ b/cpp/src/arrow/extension_type_test.cc
@@ -30,6 +30,7 @@
 #include "arrow/io/memory.h"
 #include "arrow/ipc/options.h"
 #include "arrow/ipc/reader.h"
+#include "arrow/ipc/test_common.h"
 #include "arrow/ipc/writer.h"
 #include "arrow/record_batch.h"
 #include "arrow/status.h"
@@ -41,6 +42,8 @@
 
 namespace arrow {
 
+using arrow::ipc::test::RoundtripBatch;
+
 class Parametric1Array : public ExtensionArray {
  public:
   using ExtensionArray::ExtensionArray;
@@ -178,7 +181,7 @@ class ExtStructType : public ExtensionType {
 
 class TestExtensionType : public ::testing::Test {
  public:
-  void SetUp() { 
ASSERT_OK(RegisterExtensionType(std::make_shared<UuidType>())); }
+  void SetUp() { 
ASSERT_OK(RegisterExtensionType(std::make_shared<ExampleUuidType>())); }
 
   void TearDown() {
     if (GetExtensionType("uuid")) {
@@ -211,20 +214,6 @@ TEST_F(TestExtensionType, ExtensionTypeTest) {
   ASSERT_EQ(deserialized->byte_width(), 16);
 }
 
-auto RoundtripBatch = [](const std::shared_ptr<RecordBatch>& batch,
-                         std::shared_ptr<RecordBatch>* out) {
-  ASSERT_OK_AND_ASSIGN(auto out_stream, io::BufferOutputStream::Create());
-  ASSERT_OK(ipc::WriteRecordBatchStream({batch}, 
ipc::IpcWriteOptions::Defaults(),
-                                        out_stream.get()));
-
-  ASSERT_OK_AND_ASSIGN(auto complete_ipc_stream, out_stream->Finish());
-
-  io::BufferReader reader(complete_ipc_stream);
-  std::shared_ptr<RecordBatchReader> batch_reader;
-  ASSERT_OK_AND_ASSIGN(batch_reader, 
ipc::RecordBatchStreamReader::Open(&reader));
-  ASSERT_OK(batch_reader->ReadNext(out));
-};
-
 TEST_F(TestExtensionType, IpcRoundtrip) {
   auto ext_arr = ExampleUuid();
   auto batch = RecordBatch::Make(schema({field("f0", uuid())}), 4, {ext_arr});
diff --git a/cpp/src/arrow/integration/json_integration_test.cc 
b/cpp/src/arrow/integration/json_integration_test.cc
index 9b56928c68..0e84ea6124 100644
--- a/cpp/src/arrow/integration/json_integration_test.cc
+++ b/cpp/src/arrow/integration/json_integration_test.cc
@@ -1046,7 +1046,7 @@ TEST(TestJsonFileReadWrite, JsonExample2) {
 
     auto storage_array =
         ArrayFromJSON(fixed_size_binary(16), R"(["0123456789abcdef", null])");
-    AssertArraysEqual(*batch->column(0), UuidArray(uuid_type, storage_array));
+    AssertArraysEqual(*batch->column(0), ExampleUuidArray(uuid_type, 
storage_array));
 
     AssertArraysEqual(*batch->column(1), NullArray(2));
   }
diff --git a/cpp/src/arrow/ipc/test_common.cc b/cpp/src/arrow/ipc/test_common.cc
index 87c02e2d87..fb4f6bd8ea 100644
--- a/cpp/src/arrow/ipc/test_common.cc
+++ b/cpp/src/arrow/ipc/test_common.cc
@@ -27,8 +27,10 @@
 #include "arrow/array.h"
 #include "arrow/array/builder_binary.h"
 #include "arrow/array/builder_primitive.h"
-#include "arrow/array/builder_time.h"
+#include "arrow/io/memory.h"
+#include "arrow/ipc/reader.h"
 #include "arrow/ipc/test_common.h"
+#include "arrow/ipc/writer.h"
 #include "arrow/pretty_print.h"
 #include "arrow/record_batch.h"
 #include "arrow/status.h"
@@ -242,11 +244,11 @@ Status MakeRandomBooleanArray(const int length, bool 
include_nulls,
                               std::shared_ptr<Array>* out) {
   std::vector<uint8_t> values(length);
   random_null_bytes(length, 0.5, values.data());
-  ARROW_ASSIGN_OR_RAISE(auto data, internal::BytesToBits(values));
+  ARROW_ASSIGN_OR_RAISE(auto data, arrow::internal::BytesToBits(values));
 
   if (include_nulls) {
     std::vector<uint8_t> valid_bytes(length);
-    ARROW_ASSIGN_OR_RAISE(auto null_bitmap, 
internal::BytesToBits(valid_bytes));
+    ARROW_ASSIGN_OR_RAISE(auto null_bitmap, 
arrow::internal::BytesToBits(valid_bytes));
     random_null_bytes(length, 0.1, valid_bytes.data());
     *out = std::make_shared<BooleanArray>(length, data, null_bitmap, -1);
   } else {
@@ -596,7 +598,7 @@ Status MakeStruct(std::shared_ptr<RecordBatch>* out) {
   std::shared_ptr<Array> no_nulls(new StructArray(type, 
list_batch->num_rows(), columns));
   std::vector<uint8_t> null_bytes(list_batch->num_rows(), 1);
   null_bytes[0] = 0;
-  ARROW_ASSIGN_OR_RAISE(auto null_bitmap, internal::BytesToBits(null_bytes));
+  ARROW_ASSIGN_OR_RAISE(auto null_bitmap, 
arrow::internal::BytesToBits(null_bytes));
   std::shared_ptr<Array> with_nulls(
       new StructArray(type, list_batch->num_rows(), columns, null_bitmap, 1));
 
@@ -1088,9 +1090,9 @@ Status MakeUuid(std::shared_ptr<RecordBatch>* out) {
   auto f1 = field("f1", uuid_type, /*nullable=*/false);
   auto schema = ::arrow::schema({f0, f1});
 
-  auto a0 = std::make_shared<UuidArray>(
+  auto a0 = std::make_shared<ExampleUuidArray>(
       uuid_type, ArrayFromJSON(storage_type, R"(["0123456789abcdef", null])"));
-  auto a1 = std::make_shared<UuidArray>(
+  auto a1 = std::make_shared<ExampleUuidArray>(
       uuid_type,
       ArrayFromJSON(storage_type, R"(["ZYXWVUTSRQPONMLK", 
"JIHGFEDBA9876543"])"));
 
@@ -1176,12 +1178,13 @@ enable_if_t<std::is_floating_point<CValueType>::value, 
void> FillRandomData(
 Status MakeRandomTensor(const std::shared_ptr<DataType>& type,
                         const std::vector<int64_t>& shape, bool row_major_p,
                         std::shared_ptr<Tensor>* out, uint32_t seed) {
-  const auto& element_type = internal::checked_cast<const 
FixedWidthType&>(*type);
+  const auto& element_type = arrow::internal::checked_cast<const 
FixedWidthType&>(*type);
   std::vector<int64_t> strides;
   if (row_major_p) {
-    RETURN_NOT_OK(internal::ComputeRowMajorStrides(element_type, shape, 
&strides));
+    RETURN_NOT_OK(arrow::internal::ComputeRowMajorStrides(element_type, shape, 
&strides));
   } else {
-    RETURN_NOT_OK(internal::ComputeColumnMajorStrides(element_type, shape, 
&strides));
+    RETURN_NOT_OK(
+        arrow::internal::ComputeColumnMajorStrides(element_type, shape, 
&strides));
   }
 
   const int64_t element_size = element_type.bit_width() / CHAR_BIT;
@@ -1233,6 +1236,20 @@ Status MakeRandomTensor(const std::shared_ptr<DataType>& 
type,
   return Tensor::Make(type, buf, shape, strides).Value(out);
 }
 
+void RoundtripBatch(const std::shared_ptr<RecordBatch>& batch,
+                    std::shared_ptr<RecordBatch>* out) {
+  ASSERT_OK_AND_ASSIGN(auto out_stream, io::BufferOutputStream::Create());
+  ASSERT_OK(ipc::WriteRecordBatchStream({batch}, 
ipc::IpcWriteOptions::Defaults(),
+                                        out_stream.get()));
+
+  ASSERT_OK_AND_ASSIGN(auto complete_ipc_stream, out_stream->Finish());
+
+  io::BufferReader reader(complete_ipc_stream);
+  std::shared_ptr<RecordBatchReader> batch_reader;
+  ASSERT_OK_AND_ASSIGN(batch_reader, 
ipc::RecordBatchStreamReader::Open(&reader));
+  ASSERT_OK(batch_reader->ReadNext(out));
+}
+
 }  // namespace test
 }  // namespace ipc
 }  // namespace arrow
diff --git a/cpp/src/arrow/ipc/test_common.h b/cpp/src/arrow/ipc/test_common.h
index db8613cbb1..9b7e7f13e3 100644
--- a/cpp/src/arrow/ipc/test_common.h
+++ b/cpp/src/arrow/ipc/test_common.h
@@ -184,6 +184,9 @@ Status MakeRandomTensor(const std::shared_ptr<DataType>& 
type,
                         const std::vector<int64_t>& shape, bool row_major_p,
                         std::shared_ptr<Tensor>* out, uint32_t seed = 0);
 
+ARROW_TESTING_EXPORT void RoundtripBatch(const std::shared_ptr<RecordBatch>& 
batch,
+                                         std::shared_ptr<RecordBatch>* out);
+
 }  // namespace test
 }  // namespace ipc
 }  // namespace arrow
diff --git a/cpp/src/arrow/scalar_test.cc b/cpp/src/arrow/scalar_test.cc
index 104a5697b5..e9ec13e98b 100644
--- a/cpp/src/arrow/scalar_test.cc
+++ b/cpp/src/arrow/scalar_test.cc
@@ -43,7 +43,6 @@ namespace arrow {
 
 using compute::Cast;
 using compute::CastOptions;
-
 using internal::checked_cast;
 using internal::checked_pointer_cast;
 
@@ -2038,7 +2037,7 @@ class TestExtensionScalar : public ::testing::Test {
   void SetUp() {
     type_ = uuid();
     storage_type_ = fixed_size_binary(16);
-    uuid_type_ = checked_cast<const UuidType*>(type_.get());
+    uuid_type_ = checked_cast<const ExampleUuidType*>(type_.get());
   }
 
  protected:
@@ -2049,7 +2048,7 @@ class TestExtensionScalar : public ::testing::Test {
   }
 
   std::shared_ptr<DataType> type_, storage_type_;
-  const UuidType* uuid_type_{nullptr};
+  const ExampleUuidType* uuid_type_{nullptr};
 
   const std::string_view uuid_string1_{UUID_STRING1};
   const std::string_view uuid_string2_{UUID_STRING2};
diff --git a/cpp/src/arrow/testing/extension_type.h 
b/cpp/src/arrow/testing/extension_type.h
index 6515631f20..a4526e31c2 100644
--- a/cpp/src/arrow/testing/extension_type.h
+++ b/cpp/src/arrow/testing/extension_type.h
@@ -27,14 +27,14 @@
 
 namespace arrow {
 
-class ARROW_TESTING_EXPORT UuidArray : public ExtensionArray {
+class ARROW_TESTING_EXPORT ExampleUuidArray : public ExtensionArray {
  public:
   using ExtensionArray::ExtensionArray;
 };
 
-class ARROW_TESTING_EXPORT UuidType : public ExtensionType {
+class ARROW_TESTING_EXPORT ExampleUuidType : public ExtensionType {
  public:
-  UuidType() : ExtensionType(fixed_size_binary(16)) {}
+  ExampleUuidType() : ExtensionType(fixed_size_binary(16)) {}
 
   std::string extension_name() const override { return "uuid"; }
 
diff --git a/cpp/src/arrow/testing/gtest_util.cc 
b/cpp/src/arrow/testing/gtest_util.cc
index 95de16c715..ae2e53b30a 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -49,9 +49,13 @@
 #include "arrow/buffer.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/datum.h"
+#include "arrow/io/memory.h"
 #include "arrow/ipc/json_simple.h"
+#include "arrow/ipc/reader.h"
+#include "arrow/ipc/writer.h"
 #include "arrow/json/rapidjson_defs.h"  // IWYU pragma: keep
 #include "arrow/pretty_print.h"
+#include "arrow/record_batch.h"
 #include "arrow/status.h"
 #include "arrow/table.h"
 #include "arrow/tensor.h"
@@ -847,17 +851,17 @@ Future<> SleepABitAsync() {
 ///////////////////////////////////////////////////////////////////////////
 // Extension types
 
-bool UuidType::ExtensionEquals(const ExtensionType& other) const {
+bool ExampleUuidType::ExtensionEquals(const ExtensionType& other) const {
   return (other.extension_name() == this->extension_name());
 }
 
-std::shared_ptr<Array> UuidType::MakeArray(std::shared_ptr<ArrayData> data) 
const {
+std::shared_ptr<Array> ExampleUuidType::MakeArray(std::shared_ptr<ArrayData> 
data) const {
   DCHECK_EQ(data->type->id(), Type::EXTENSION);
   DCHECK_EQ("uuid", static_cast<const 
ExtensionType&>(*data->type).extension_name());
-  return std::make_shared<UuidArray>(data);
+  return std::make_shared<ExampleUuidArray>(data);
 }
 
-Result<std::shared_ptr<DataType>> UuidType::Deserialize(
+Result<std::shared_ptr<DataType>> ExampleUuidType::Deserialize(
     std::shared_ptr<DataType> storage_type, const std::string& serialized) 
const {
   if (serialized != "uuid-serialized") {
     return Status::Invalid("Type identifier did not match: '", serialized, 
"'");
@@ -866,7 +870,7 @@ Result<std::shared_ptr<DataType>> UuidType::Deserialize(
     return Status::Invalid("Invalid storage type for UuidType: ",
                            storage_type->ToString());
   }
-  return std::make_shared<UuidType>();
+  return std::make_shared<ExampleUuidType>();
 }
 
 bool SmallintType::ExtensionEquals(const ExtensionType& other) const {
@@ -982,7 +986,7 @@ Result<std::shared_ptr<DataType>> 
Complex128Type::Deserialize(
   return std::make_shared<Complex128Type>();
 }
 
-std::shared_ptr<DataType> uuid() { return std::make_shared<UuidType>(); }
+std::shared_ptr<DataType> uuid() { return std::make_shared<ExampleUuidType>(); 
}
 
 std::shared_ptr<DataType> smallint() { return 
std::make_shared<SmallintType>(); }
 
diff --git a/dev/archery/archery/integration/datagen.py 
b/dev/archery/archery/integration/datagen.py
index d395d26cb7..f63aa0d95a 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1845,7 +1845,7 @@ def generate_nested_dictionary_case():
 def generate_extension_case():
     dict0 = Dictionary(0, StringField('dictionary0'), size=5, name='DICT0')
 
-    uuid_type = ExtensionType('uuid', 'uuid-serialized',
+    uuid_type = ExtensionType('arrow.uuid', '',
                               FixedSizeBinaryField('', 16))
     dict_ext_type = ExtensionType(
         'dict-extension', 'dict-extension-serialized',
diff --git a/docs/source/format/CanonicalExtensions.rst 
b/docs/source/format/CanonicalExtensions.rst
index 5658f949ce..1106f8aaff 100644
--- a/docs/source/format/CanonicalExtensions.rst
+++ b/docs/source/format/CanonicalExtensions.rst
@@ -272,6 +272,8 @@ JSON
   In the future, additional fields may be added, but they are not required
   to interpret the array.
 
+.. _uuid_extension:
+
 UUID
 ====
 
diff --git a/docs/source/status.rst b/docs/source/status.rst
index 5e2c2cc19c..b685d4bbf8 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -121,7 +121,7 @@ Data Types
 
+-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | JSON                  |       |       | ✓     |            |       |       | 
      |       |
 
+-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| UUID                  |       |       | ✓     |            |       |       | 
      |       |
+| UUID                  | ✓     |       | ✓     |            |       |       | 
      |       |
 
+-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | 8-bit Boolean         | ✓     |       | ✓     |            |       |       | 
      |       |
 
+-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 807bcdc315..d31c93119b 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -172,9 +172,7 @@ from pyarrow.lib import (null, bool_,
                          union, sparse_union, dense_union,
                          dictionary,
                          run_end_encoded,
-                         fixed_shape_tensor,
-                         opaque,
-                         bool8,
+                         bool8, fixed_shape_tensor, opaque, uuid,
                          field,
                          type_for_alias,
                          DataType, DictionaryType, StructType,
@@ -184,8 +182,9 @@ from pyarrow.lib import (null, bool_,
                          TimestampType, Time32Type, Time64Type, DurationType,
                          FixedSizeBinaryType, Decimal128Type, Decimal256Type,
                          BaseExtensionType, ExtensionType,
-                         RunEndEncodedType, FixedShapeTensorType, OpaqueType,
-                         Bool8Type, PyExtensionType, UnknownExtensionType,
+                         RunEndEncodedType, Bool8Type, FixedShapeTensorType,
+                         OpaqueType, UuidType,
+                         PyExtensionType, UnknownExtensionType,
                          register_extension_type, unregister_extension_type,
                          DictionaryMemo,
                          KeyValueMetadata,
@@ -218,8 +217,9 @@ from pyarrow.lib import (null, bool_,
                          Time32Array, Time64Array, DurationArray,
                          MonthDayNanoIntervalArray,
                          Decimal128Array, Decimal256Array, StructArray, 
ExtensionArray,
-                         RunEndEncodedArray, FixedShapeTensorArray, 
OpaqueArray,
-                         Bool8Array, scalar, NA, _NULL as NULL, Scalar,
+                         RunEndEncodedArray, Bool8Array, FixedShapeTensorArray,
+                         OpaqueArray, UuidArray,
+                         scalar, NA, _NULL as NULL, Scalar,
                          NullScalar, BooleanScalar,
                          Int8Scalar, Int16Scalar, Int32Scalar, Int64Scalar,
                          UInt8Scalar, UInt16Scalar, UInt32Scalar, UInt64Scalar,
@@ -235,8 +235,8 @@ from pyarrow.lib import (null, bool_,
                          StringScalar, LargeStringScalar, StringViewScalar,
                          FixedSizeBinaryScalar, DictionaryScalar,
                          MapScalar, StructScalar, UnionScalar,
-                         RunEndEncodedScalar, ExtensionScalar,
-                         FixedShapeTensorScalar, OpaqueScalar, Bool8Scalar)
+                         RunEndEncodedScalar, Bool8Scalar, ExtensionScalar,
+                         FixedShapeTensorScalar, OpaqueScalar, UuidScalar)
 
 # Buffers, allocation
 from pyarrow.lib import (DeviceAllocationType, Device, MemoryManager,
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 77d6c9c06d..1587de0e6b 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -4338,6 +4338,12 @@ cdef class ExtensionArray(Array):
         return result
 
 
+class UuidArray(ExtensionArray):
+    """
+    Concrete class for Arrow arrays of UUID data type.
+    """
+
+
 cdef class FixedShapeTensorArray(ExtensionArray):
     """
     Concrete class for fixed shape tensor extension arrays.
diff --git a/python/pyarrow/includes/libarrow.pxd 
b/python/pyarrow/includes/libarrow.pxd
index 6f510cfc0c..c2346750a1 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -2865,6 +2865,16 @@ cdef extern from "arrow/extension_type.h" namespace 
"arrow":
         shared_ptr[CArray] storage()
 
 
+cdef extern from "arrow/extension/uuid.h" namespace "arrow::extension" nogil:
+    cdef cppclass CUuidType" arrow::extension::UuidType"(CExtensionType):
+
+        @staticmethod
+        CResult[shared_ptr[CDataType]] Make()
+
+    cdef cppclass CUuidArray" arrow::extension::UuidArray"(CExtensionArray):
+        pass
+
+
 cdef extern from "arrow/extension/fixed_shape_tensor.h" namespace 
"arrow::extension" nogil:
     cdef cppclass CFixedShapeTensorType \
             " arrow::extension::FixedShapeTensorType"(CExtensionType):
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index a7c3b496a0..5c3d981c3a 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -222,6 +222,9 @@ cdef class OpaqueType(BaseExtensionType):
     cdef:
         const COpaqueType* opaque_ext_type
 
+cdef class UuidType(BaseExtensionType):
+    cdef:
+        const CUuidType* uuid_ext_type
 
 cdef class PyExtensionType(ExtensionType):
     pass
diff --git a/python/pyarrow/public-api.pxi b/python/pyarrow/public-api.pxi
index 19a26bd6c6..d3e2ff2e99 100644
--- a/python/pyarrow/public-api.pxi
+++ b/python/pyarrow/public-api.pxi
@@ -120,14 +120,17 @@ cdef api object pyarrow_wrap_data_type(
     elif type.get().id() == _Type_EXTENSION:
         ext_type = <const CExtensionType*> type.get()
         cpy_ext_type = dynamic_cast[_CPyExtensionTypePtr](ext_type)
+        extension_name = ext_type.extension_name()
         if cpy_ext_type != nullptr:
             return cpy_ext_type.GetInstance()
-        elif ext_type.extension_name() == b"arrow.fixed_shape_tensor":
+        elif extension_name == b"arrow.bool8":
+            out = Bool8Type.__new__(Bool8Type)
+        elif extension_name == b"arrow.fixed_shape_tensor":
             out = FixedShapeTensorType.__new__(FixedShapeTensorType)
-        elif ext_type.extension_name() == b"arrow.opaque":
+        elif extension_name == b"arrow.opaque":
             out = OpaqueType.__new__(OpaqueType)
-        elif ext_type.extension_name() == b"arrow.bool8":
-            out = Bool8Type.__new__(Bool8Type)
+        elif extension_name == b"arrow.uuid":
+            out = UuidType.__new__(UuidType)
         else:
             out = BaseExtensionType.__new__(BaseExtensionType)
     else:
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 72ae2aee5f..68f77832c4 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -17,6 +17,7 @@
 
 import collections
 from cython cimport binding
+from uuid import UUID
 
 
 cdef class Scalar(_Weakrefable):
@@ -1043,6 +1044,15 @@ cdef class ExtensionScalar(Scalar):
         return pyarrow_wrap_scalar(<shared_ptr[CScalar]> sp_scalar)
 
 
+class UuidScalar(ExtensionScalar):
+    """
+    Concrete class for Uuid extension scalar.
+    """
+
+    def as_py(self):
+        return None if self.value is None else UUID(bytes=self.value.as_py())
+
+
 cdef class FixedShapeTensorScalar(ExtensionScalar):
     """
     Concrete class for fixed shape tensor extension scalar.
diff --git a/python/pyarrow/src/arrow/python/gdb.cc 
b/python/pyarrow/src/arrow/python/gdb.cc
index 6941769e4e..7c58bae334 100644
--- a/python/pyarrow/src/arrow/python/gdb.cc
+++ b/python/pyarrow/src/arrow/python/gdb.cc
@@ -22,7 +22,7 @@
 #include "arrow/array.h"
 #include "arrow/chunked_array.h"
 #include "arrow/datum.h"
-#include "arrow/extension_type.h"
+#include "arrow/extension/uuid.h"
 #include "arrow/ipc/json_simple.h"
 #include "arrow/python/gdb.h"
 #include "arrow/record_batch.h"
@@ -37,6 +37,8 @@
 
 namespace arrow {
 
+using extension::uuid;
+using extension::UuidType;
 using ipc::internal::json::ArrayFromJSON;
 using ipc::internal::json::ChunkedArrayFromJSON;
 using ipc::internal::json::ScalarFromJSON;
@@ -56,29 +58,6 @@ class CustomStatusDetail : public StatusDetail {
   std::string ToString() const override { return "This is a detail"; }
 };
 
-class UuidType : public ExtensionType {
- public:
-  UuidType() : ExtensionType(fixed_size_binary(16)) {}
-
-  std::string extension_name() const override { return "uuid"; }
-
-  bool ExtensionEquals(const ExtensionType& other) const override {
-    return (other.extension_name() == this->extension_name());
-  }
-
-  std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const 
override {
-    return std::make_shared<ExtensionArray>(data);
-  }
-
-  Result<std::shared_ptr<DataType>> Deserialize(
-      std::shared_ptr<DataType> storage_type,
-      const std::string& serialized) const override {
-    return Status::NotImplemented("");
-  }
-
-  std::string Serialize() const override { return "uuid-serialized"; }
-};
-
 std::shared_ptr<Array> SliceArrayFromJSON(const std::shared_ptr<DataType>& ty,
                                           std::string_view json, int64_t 
offset = 0,
                                           int64_t length = -1) {
diff --git a/python/pyarrow/tests/extensions.pyx 
b/python/pyarrow/tests/extensions.pyx
index c1bf9aae1e..309b574dc0 100644
--- a/python/pyarrow/tests/extensions.pyx
+++ b/python/pyarrow/tests/extensions.pyx
@@ -37,7 +37,7 @@ cdef extern from * namespace "arrow::py" nogil:
     class UuidType : public ExtensionType {
     public:
         UuidType() : ExtensionType(fixed_size_binary(16)) {}
-        std::string extension_name() const override { return "uuid"; }
+        std::string extension_name() const override { return "example-uuid"; }
 
         bool ExtensionEquals(const ExtensionType& other) const override {
             return other.extension_name() == this->extension_name();
diff --git a/python/pyarrow/tests/test_extension_type.py 
b/python/pyarrow/tests/test_extension_type.py
index 0d50c467e9..aacbd2cb6e 100644
--- a/python/pyarrow/tests/test_extension_type.py
+++ b/python/pyarrow/tests/test_extension_type.py
@@ -95,18 +95,21 @@ class IntegerEmbeddedType(pa.ExtensionType):
         return cls()
 
 
-class UuidScalarType(pa.ExtensionScalar):
+class ExampleUuidScalarType(pa.ExtensionScalar):
     def as_py(self):
         return None if self.value is None else UUID(bytes=self.value.as_py())
 
 
-class UuidType(pa.ExtensionType):
+class ExampleUuidType(pa.ExtensionType):
 
     def __init__(self):
-        super().__init__(pa.binary(16), 'pyarrow.tests.UuidType')
+        super().__init__(pa.binary(16), 'pyarrow.tests.ExampleUuidType')
+
+    def __reduce__(self):
+        return ExampleUuidType, ()
 
     def __arrow_ext_scalar_class__(self):
-        return UuidScalarType
+        return ExampleUuidScalarType
 
     def __arrow_ext_serialize__(self):
         return b''
@@ -116,10 +119,10 @@ class UuidType(pa.ExtensionType):
         return cls()
 
 
-class UuidType2(pa.ExtensionType):
+class ExampleUuidType2(pa.ExtensionType):
 
     def __init__(self):
-        super().__init__(pa.binary(16), 'pyarrow.tests.UuidType2')
+        super().__init__(pa.binary(16), 'pyarrow.tests.ExampleUuidType2')
 
     def __arrow_ext_serialize__(self):
         return b''
@@ -250,8 +253,8 @@ def ipc_read_batch(buf):
 
 
 def test_ext_type_basics():
-    ty = UuidType()
-    assert ty.extension_name == "pyarrow.tests.UuidType"
+    ty = ExampleUuidType()
+    assert ty.extension_name == "pyarrow.tests.ExampleUuidType"
 
 
 def test_ext_type_str():
@@ -267,16 +270,16 @@ def test_ext_type_repr():
 
 
 def test_ext_type_lifetime():
-    ty = UuidType()
+    ty = ExampleUuidType()
     wr = weakref.ref(ty)
     del ty
     assert wr() is None
 
 
 def test_ext_type_storage_type():
-    ty = UuidType()
+    ty = ExampleUuidType()
     assert ty.storage_type == pa.binary(16)
-    assert ty.__class__ is UuidType
+    assert ty.__class__ is ExampleUuidType
     ty = ParamExtType(5)
     assert ty.storage_type == pa.binary(5)
     assert ty.__class__ is ParamExtType
@@ -284,7 +287,7 @@ def test_ext_type_storage_type():
 
 def test_ext_type_byte_width():
     # Test for fixed-size binary types
-    ty = UuidType()
+    ty = pa.uuid()
     assert ty.byte_width == 16
     ty = ParamExtType(5)
     assert ty.byte_width == 5
@@ -297,7 +300,7 @@ def test_ext_type_byte_width():
 
 def test_ext_type_bit_width():
     # Test for fixed-size binary types
-    ty = UuidType()
+    ty = pa.uuid()
     assert ty.bit_width == 128
     ty = ParamExtType(5)
     assert ty.bit_width == 40
@@ -309,7 +312,7 @@ def test_ext_type_bit_width():
 
 
 def test_ext_type_as_py():
-    ty = UuidType()
+    ty = ExampleUuidType()
     expected = uuid4()
     scalar = pa.ExtensionScalar.from_storage(ty, expected.bytes)
     assert scalar.as_py() == expected
@@ -342,12 +345,22 @@ def test_ext_type_as_py():
 
 def test_uuid_type_pickle(pickle_module):
     for proto in range(0, pickle_module.HIGHEST_PROTOCOL + 1):
-        ty = UuidType()
+        ty = ExampleUuidType()
         ser = pickle_module.dumps(ty, protocol=proto)
         del ty
         ty = pickle_module.loads(ser)
         wr = weakref.ref(ty)
-        assert ty.extension_name == "pyarrow.tests.UuidType"
+        assert ty.extension_name == "pyarrow.tests.ExampleUuidType"
+        del ty
+        assert wr() is None
+
+    for proto in range(0, pickle_module.HIGHEST_PROTOCOL + 1):
+        ty = pa.uuid()
+        ser = pickle_module.dumps(ty, protocol=proto)
+        del ty
+        ty = pickle_module.loads(ser)
+        wr = weakref.ref(ty)
+        assert ty.extension_name == "arrow.uuid"
         del ty
         assert wr() is None
 
@@ -358,8 +371,8 @@ def test_ext_type_equality():
     c = ParamExtType(6)
     assert a != b
     assert b == c
-    d = UuidType()
-    e = UuidType()
+    d = ExampleUuidType()
+    e = ExampleUuidType()
     assert a != d
     assert d == e
 
@@ -403,7 +416,7 @@ def test_ext_array_equality():
     storage1 = pa.array([b"0123456789abcdef"], type=pa.binary(16))
     storage2 = pa.array([b"0123456789abcdef"], type=pa.binary(16))
     storage3 = pa.array([], type=pa.binary(16))
-    ty1 = UuidType()
+    ty1 = ExampleUuidType()
     ty2 = ParamExtType(16)
 
     a = pa.ExtensionArray.from_storage(ty1, storage1)
@@ -451,9 +464,9 @@ def test_ext_scalar_from_array():
     data = [b"0123456789abcdef", b"0123456789abcdef",
             b"zyxwvutsrqponmlk", None]
     storage = pa.array(data, type=pa.binary(16))
-    ty1 = UuidType()
+    ty1 = ExampleUuidType()
     ty2 = ParamExtType(16)
-    ty3 = UuidType2()
+    ty3 = ExampleUuidType2()
 
     a = pa.ExtensionArray.from_storage(ty1, storage)
     b = pa.ExtensionArray.from_storage(ty2, storage)
@@ -462,9 +475,9 @@ def test_ext_scalar_from_array():
     scalars_a = list(a)
     assert len(scalars_a) == 4
 
-    assert ty1.__arrow_ext_scalar_class__() == UuidScalarType
-    assert isinstance(a[0], UuidScalarType)
-    assert isinstance(scalars_a[0], UuidScalarType)
+    assert ty1.__arrow_ext_scalar_class__() == ExampleUuidScalarType
+    assert isinstance(a[0], ExampleUuidScalarType)
+    assert isinstance(scalars_a[0], ExampleUuidScalarType)
 
     for s, val in zip(scalars_a, data):
         assert isinstance(s, pa.ExtensionScalar)
@@ -505,7 +518,7 @@ def test_ext_scalar_from_array():
 
 
 def test_ext_scalar_from_storage():
-    ty = UuidType()
+    ty = ExampleUuidType()
 
     s = pa.ExtensionScalar.from_storage(ty, None)
     assert isinstance(s, pa.ExtensionScalar)
@@ -706,14 +719,14 @@ def test_cast_between_extension_types():
     tiny_int_arr.cast(pa.int64()).cast(IntegerType())
 
     # Between the same extension types is okay
-    array = pa.array([b'1' * 16, b'2' * 16], pa.binary(16)).cast(UuidType())
-    out = array.cast(UuidType())
-    assert out.type == UuidType()
+    array = pa.array([b'1' * 16, b'2' * 16], 
pa.binary(16)).cast(ExampleUuidType())
+    out = array.cast(ExampleUuidType())
+    assert out.type == ExampleUuidType()
 
     # Will still fail casting between extensions who share storage type,
     # can only cast between exactly the same extension types.
     with pytest.raises(TypeError, match='Casting from *'):
-        array.cast(UuidType2())
+        array.cast(ExampleUuidType2())
 
 
 def test_cast_to_extension_with_extension_storage():
@@ -744,10 +757,10 @@ def test_cast_nested_extension_types(data, type_factory):
 
 def test_casting_dict_array_to_extension_type():
     storage = pa.array([b"0123456789abcdef"], type=pa.binary(16))
-    arr = pa.ExtensionArray.from_storage(UuidType(), storage)
+    arr = pa.ExtensionArray.from_storage(ExampleUuidType(), storage)
     dict_arr = pa.DictionaryArray.from_arrays(pa.array([0, 0], pa.int32()),
                                               arr)
-    out = dict_arr.cast(UuidType())
+    out = dict_arr.cast(ExampleUuidType())
     assert isinstance(out, pa.ExtensionArray)
     assert out.to_pylist() == [UUID('30313233-3435-3637-3839-616263646566'),
                                UUID('30313233-3435-3637-3839-616263646566')]
@@ -1347,7 +1360,7 @@ def test_cpp_extension_in_python(tmpdir):
     mod = __import__('extensions')
 
     uuid_type = mod._make_uuid_type()
-    assert uuid_type.extension_name == "uuid"
+    assert uuid_type.extension_name == "example-uuid"
     assert uuid_type.storage_type == pa.binary(16)
 
     array = mod._make_uuid_array()
@@ -1356,6 +1369,31 @@ def test_cpp_extension_in_python(tmpdir):
     assert array[0].as_py() == b'abcdefghijklmno0'
     assert array[1].as_py() == b'0onmlkjihgfedcba'
 
+    buf = ipc_write_batch(pa.RecordBatch.from_arrays([array], 
["example-uuid"]))
+
+    batch = ipc_read_batch(buf)
+    reconstructed_array = batch.column(0)
+    assert reconstructed_array.type == uuid_type
+    assert reconstructed_array == array
+
+
+def test_uuid_extension():
+    data = [b"0123456789abcdef", b"0123456789abcdef",
+            b"zyxwvutsrqponmlk", None]
+
+    uuid_type = pa.uuid()
+    assert uuid_type.extension_name == "arrow.uuid"
+    assert uuid_type.storage_type == pa.binary(16)
+    assert uuid_type.__class__ is pa.UuidType
+
+    storage = pa.array(data, pa.binary(16))
+    array = pa.ExtensionArray.from_storage(uuid_type, storage)
+    assert array.type == uuid_type
+
+    assert array.to_pylist() == [x if x is None else UUID(bytes=x) for x in 
data]
+    assert array[0].as_py() == UUID(bytes=data[0])
+    assert array[3].as_py() is None
+
     buf = ipc_write_batch(pa.RecordBatch.from_arrays([array], ["uuid"]))
 
     batch = ipc_read_batch(buf)
@@ -1363,6 +1401,9 @@ def test_cpp_extension_in_python(tmpdir):
     assert reconstructed_array.type == uuid_type
     assert reconstructed_array == array
 
+    assert uuid_type.__arrow_ext_scalar_class__() == pa.UuidScalar
+    assert isinstance(array[0], pa.UuidScalar)
+
 
 def test_tensor_type():
     tensor_type = pa.fixed_shape_tensor(pa.int8(), [2, 3])
diff --git a/python/pyarrow/tests/test_gdb.py b/python/pyarrow/tests/test_gdb.py
index 0d12d710dc..2ac2f55754 100644
--- a/python/pyarrow/tests/test_gdb.py
+++ b/python/pyarrow/tests/test_gdb.py
@@ -409,7 +409,7 @@ def test_types_stack(gdb_arrow):
 
     check_stack_repr(
         gdb_arrow, "uuid_type",
-        ('arrow::ExtensionType "extension<uuid>" '
+        ('arrow::ExtensionType "extension<arrow.uuid>" '
          'with storage type arrow::fixed_size_binary(16)'))
 
 
@@ -447,7 +447,7 @@ def test_types_heap(gdb_arrow):
 
     check_heap_repr(
         gdb_arrow, "heap_uuid_type",
-        ('arrow::ExtensionType "extension<uuid>" '
+        ('arrow::ExtensionType "extension<arrow.uuid>" '
          'with storage type arrow::fixed_size_binary(16)'))
 
 
@@ -716,12 +716,12 @@ def test_scalars_stack(gdb_arrow):
 
     check_stack_repr(
         gdb_arrow, "extension_scalar",
-        ('arrow::ExtensionScalar of type "extension<uuid>", '
+        ('arrow::ExtensionScalar of type "extension<arrow.uuid>", '
          'value arrow::FixedSizeBinaryScalar of size 16, '
          'value "0123456789abcdef"'))
     check_stack_repr(
         gdb_arrow, "extension_scalar_null",
-        'arrow::ExtensionScalar of type "extension<uuid>", null value')
+        'arrow::ExtensionScalar of type "extension<arrow.uuid>", null value')
 
 
 def test_scalars_heap(gdb_arrow):
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index 563782f0c2..f83ecc3aa4 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -1765,6 +1765,25 @@ cdef class ExtensionType(BaseExtensionType):
         return ExtensionScalar
 
 
+cdef class UuidType(BaseExtensionType):
+    """
+    Concrete class for UUID extension type.
+    """
+
+    cdef void init(self, const shared_ptr[CDataType]& type) except *:
+        BaseExtensionType.init(self, type)
+        self.uuid_ext_type = <const CUuidType*> type.get()
+
+    def __arrow_ext_class__(self):
+        return UuidArray
+
+    def __reduce__(self):
+        return uuid, ()
+
+    def __arrow_ext_scalar_class__(self):
+        return UuidScalar
+
+
 cdef class FixedShapeTensorType(BaseExtensionType):
     """
     Concrete class for fixed shape tensor extension type.
@@ -5208,6 +5227,21 @@ def run_end_encoded(run_end_type, value_type):
     return pyarrow_wrap_data_type(ree_type)
 
 
+def uuid():
+    """
+    Create UuidType instance.
+
+    Returns
+    -------
+    type : UuidType
+    """
+
+    cdef UuidType out = UuidType.__new__(UuidType)
+    c_uuid_ext_type = GetResultValue(CUuidType.Make())
+    out.init(c_uuid_ext_type)
+    return out
+
+
 def fixed_shape_tensor(DataType value_type, shape, dim_names=None, 
permutation=None):
     """
     Create instance of fixed shape tensor extension type with shape and 
optional

Reply via email to