This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new a82edf90ce GH-49081: [C++][Parquet] Correct variant's extension name 
(#49082)
a82edf90ce is described below

commit a82edf90ce66eb9a9a9e3bbac514e5d51f531c1f
Author: Zehua Zou <[email protected]>
AuthorDate: Mon Feb 9 11:09:04 2026 +0800

    GH-49081: [C++][Parquet] Correct variant's extension name (#49082)
    
    ### Rationale for this change
    
    Correct variant extension according to arrow's specification.
    
    ### What changes are included in this PR?
    
    Modified variant's hardcoded extension name.
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    No.
    
    * GitHub Issue: #49081
    
    Authored-by: Zehua Zou <[email protected]>
    Signed-off-by: Gang Wu <[email protected]>
---
 cpp/src/arrow/CMakeLists.txt                       |  1 +
 .../extension/parquet_variant.cc}                  | 30 +++++----------
 .../extension/parquet_variant.h}                   | 43 ++++++++++------------
 cpp/src/parquet/CMakeLists.txt                     |  1 -
 cpp/src/parquet/arrow/arrow_schema_test.cc         |  4 +-
 cpp/src/parquet/arrow/schema.cc                    | 23 +++++++-----
 cpp/src/parquet/arrow/variant_test.cc              | 21 ++++++-----
 7 files changed, 57 insertions(+), 66 deletions(-)

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index d9f04a627b..6e9d76a61e 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -366,6 +366,7 @@ set(ARROW_SRCS
     extension_type.cc
     extension/bool8.cc
     extension/json.cc
+    extension/parquet_variant.cc
     extension/uuid.cc
     pretty_print.cc
     record_batch.cc
diff --git a/cpp/src/parquet/arrow/variant_internal.cc 
b/cpp/src/arrow/extension/parquet_variant.cc
similarity index 84%
rename from cpp/src/parquet/arrow/variant_internal.cc
rename to cpp/src/arrow/extension/parquet_variant.cc
index 87f88efaac..95aa5a0eb6 100644
--- a/cpp/src/parquet/arrow/variant_internal.cc
+++ b/cpp/src/arrow/extension/parquet_variant.cc
@@ -15,28 +15,19 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "parquet/arrow/variant_internal.h"
+#include "arrow/extension/parquet_variant.h"
 
 #include <string>
 
 #include "arrow/extension_type.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
-#include "arrow/type_fwd.h"
 #include "arrow/util/logging_internal.h"
 
-namespace parquet::arrow {
+namespace arrow::extension {
 
-using ::arrow::Array;
-using ::arrow::ArrayData;
-using ::arrow::DataType;
-using ::arrow::ExtensionType;
-using ::arrow::Result;
-using ::arrow::Type;
-
-VariantExtensionType::VariantExtensionType(
-    const std::shared_ptr<::arrow::DataType>& storage_type)
-    : ::arrow::ExtensionType(storage_type) {
+VariantExtensionType::VariantExtensionType(const std::shared_ptr<DataType>& 
storage_type)
+    : ExtensionType(storage_type) {
   // GH-45948: Shredded variants will need to handle an optional 
shredded_value as
   // well as value_ becoming optional.
 
@@ -66,14 +57,13 @@ std::string VariantExtensionType::Serialize() const { 
return ""; }
 std::shared_ptr<Array> VariantExtensionType::MakeArray(
     std::shared_ptr<ArrayData> data) const {
   DCHECK_EQ(data->type->id(), Type::EXTENSION);
-  DCHECK_EQ("parquet.variant",
-            ::arrow::internal::checked_cast<const ExtensionType&>(*data->type)
-                .extension_name());
+  DCHECK_EQ("arrow.parquet.variant",
+            internal::checked_cast<const 
ExtensionType&>(*data->type).extension_name());
   return std::make_shared<VariantArray>(data);
 }
 
 namespace {
-bool IsBinaryField(const std::shared_ptr<::arrow::Field> field) {
+bool IsBinaryField(const std::shared_ptr<Field> field) {
   return field->type()->storage_id() == Type::BINARY ||
          field->type()->storage_id() == Type::LARGE_BINARY;
 }
@@ -116,8 +106,8 @@ bool VariantExtensionType::IsSupportedStorageType(
 Result<std::shared_ptr<DataType>> VariantExtensionType::Make(
     std::shared_ptr<DataType> storage_type) {
   if (!IsSupportedStorageType(storage_type)) {
-    return ::arrow::Status::Invalid("Invalid storage type for 
VariantExtensionType: ",
-                                    storage_type->ToString());
+    return Status::Invalid("Invalid storage type for VariantExtensionType: ",
+                           storage_type->ToString());
   }
 
   return std::make_shared<VariantExtensionType>(std::move(storage_type));
@@ -130,4 +120,4 @@ std::shared_ptr<DataType> variant(std::shared_ptr<DataType> 
storage_type) {
   return VariantExtensionType::Make(std::move(storage_type)).ValueOrDie();
 }
 
-}  // namespace parquet::arrow
+}  // namespace arrow::extension
diff --git a/cpp/src/parquet/arrow/variant_internal.h 
b/cpp/src/arrow/extension/parquet_variant.h
similarity index 56%
rename from cpp/src/parquet/arrow/variant_internal.h
rename to cpp/src/arrow/extension/parquet_variant.h
index d0b77c72c6..be90923f14 100644
--- a/cpp/src/parquet/arrow/variant_internal.h
+++ b/cpp/src/arrow/extension/parquet_variant.h
@@ -17,17 +17,16 @@
 
 #pragma once
 
-#include <stdexcept>
 #include <string>
 
 #include "arrow/extension_type.h"
-#include "parquet/platform.h"
+#include "arrow/util/visibility.h"
 
-namespace parquet::arrow {
+namespace arrow::extension {
 
-class PARQUET_EXPORT VariantArray : public ::arrow::ExtensionArray {
+class ARROW_EXPORT VariantArray : public ExtensionArray {
  public:
-  using ::arrow::ExtensionArray::ExtensionArray;
+  using ExtensionArray::ExtensionArray;
 };
 
 /// EXPERIMENTAL: Variant is not yet fully supported.
@@ -46,41 +45,37 @@ class PARQUET_EXPORT VariantArray : public 
::arrow::ExtensionArray {
 ///
 /// To read more about variant shredding, see the variant shredding spec at
 /// https://github.com/apache/parquet-format/blob/master/VariantShredding.md
-class PARQUET_EXPORT VariantExtensionType : public ::arrow::ExtensionType {
+class ARROW_EXPORT VariantExtensionType : public ExtensionType {
  public:
-  explicit VariantExtensionType(const std::shared_ptr<::arrow::DataType>& 
storage_type);
+  explicit VariantExtensionType(const std::shared_ptr<DataType>& storage_type);
 
-  std::string extension_name() const override { return "parquet.variant"; }
+  std::string extension_name() const override { return 
"arrow.parquet.variant"; }
 
-  bool ExtensionEquals(const ::arrow::ExtensionType& other) const override;
+  bool ExtensionEquals(const ExtensionType& other) const override;
 
-  ::arrow::Result<std::shared_ptr<::arrow::DataType>> Deserialize(
-      std::shared_ptr<::arrow::DataType> storage_type,
+  Result<std::shared_ptr<DataType>> Deserialize(
+      std::shared_ptr<DataType> storage_type,
       const std::string& serialized_data) const override;
 
   std::string Serialize() const override;
 
-  std::shared_ptr<::arrow::Array> MakeArray(
-      std::shared_ptr<::arrow::ArrayData> data) const override;
+  std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const 
override;
 
-  static ::arrow::Result<std::shared_ptr<::arrow::DataType>> Make(
-      std::shared_ptr<::arrow::DataType> storage_type);
+  static Result<std::shared_ptr<DataType>> Make(std::shared_ptr<DataType> 
storage_type);
 
-  static bool IsSupportedStorageType(
-      const std::shared_ptr<::arrow::DataType>& storage_type);
+  static bool IsSupportedStorageType(const std::shared_ptr<DataType>& 
storage_type);
 
-  std::shared_ptr<::arrow::Field> metadata() const { return metadata_; }
+  std::shared_ptr<Field> metadata() const { return metadata_; }
 
-  std::shared_ptr<::arrow::Field> value() const { return value_; }
+  std::shared_ptr<Field> value() const { return value_; }
 
  private:
   // TODO GH-45948 added shredded_value
-  std::shared_ptr<::arrow::Field> metadata_;
-  std::shared_ptr<::arrow::Field> value_;
+  std::shared_ptr<Field> metadata_;
+  std::shared_ptr<Field> value_;
 };
 
 /// \brief Return a VariantExtensionType instance.
-PARQUET_EXPORT std::shared_ptr<::arrow::DataType> variant(
-    std::shared_ptr<::arrow::DataType> storage_type);
+ARROW_EXPORT std::shared_ptr<DataType> variant(std::shared_ptr<DataType> 
storage_type);
 
-}  // namespace parquet::arrow
+}  // namespace arrow::extension
diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt
index feeb1805f6..6c1550dcc2 100644
--- a/cpp/src/parquet/CMakeLists.txt
+++ b/cpp/src/parquet/CMakeLists.txt
@@ -157,7 +157,6 @@ set(PARQUET_SRCS
     arrow/reader_internal.cc
     arrow/schema.cc
     arrow/schema_internal.cc
-    arrow/variant_internal.cc
     arrow/writer.cc
     bloom_filter.cc
     bloom_filter_reader.cc
diff --git a/cpp/src/parquet/arrow/arrow_schema_test.cc 
b/cpp/src/parquet/arrow/arrow_schema_test.cc
index 73ce8ea69e..f930d3d7bd 100644
--- a/cpp/src/parquet/arrow/arrow_schema_test.cc
+++ b/cpp/src/parquet/arrow/arrow_schema_test.cc
@@ -25,7 +25,6 @@
 #include "parquet/arrow/reader.h"
 #include "parquet/arrow/reader_internal.h"
 #include "parquet/arrow/schema.h"
-#include "parquet/arrow/variant_internal.h"
 #include "parquet/file_reader.h"
 #include "parquet/schema.h"
 #include "parquet/schema_internal.h"
@@ -34,6 +33,7 @@
 
 #include "arrow/array.h"
 #include "arrow/extension/json.h"
+#include "arrow/extension/parquet_variant.h"
 #include "arrow/extension/uuid.h"
 #include "arrow/ipc/writer.h"
 #include "arrow/testing/extension_type.h"
@@ -950,7 +950,7 @@ TEST_F(TestConvertParquetSchema, ParquetVariant) {
   auto arrow_metadata = ::arrow::field("metadata", ::arrow::binary(), 
/*nullable=*/false);
   auto arrow_value = ::arrow::field("value", ::arrow::binary(), 
/*nullable=*/false);
   auto arrow_variant = ::arrow::struct_({arrow_metadata, arrow_value});
-  auto variant_extension = 
std::make_shared<VariantExtensionType>(arrow_variant);
+  auto variant_extension = ::arrow::extension::variant(arrow_variant);
 
   {
     // Parquet file does not contain Arrow schema.
diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc
index 266215a810..9c0db1d533 100644
--- a/cpp/src/parquet/arrow/schema.cc
+++ b/cpp/src/parquet/arrow/schema.cc
@@ -22,6 +22,7 @@
 #include <vector>
 
 #include "arrow/extension/json.h"
+#include "arrow/extension/parquet_variant.h"
 #include "arrow/extension/uuid.h"
 #include "arrow/extension_type.h"
 #include "arrow/io/memory.h"
@@ -36,7 +37,6 @@
 #include "arrow/util/value_parsing.h"
 
 #include "parquet/arrow/schema_internal.h"
-#include "parquet/arrow/variant_internal.h"
 #include "parquet/exception.h"
 #include "parquet/geospatial/util_json_internal.h"
 #include "parquet/metadata.h"
@@ -129,10 +129,11 @@ Status MapToNode(const std::shared_ptr<::arrow::MapType>& 
type, const std::strin
   return Status::OK();
 }
 
-Status VariantToNode(const std::shared_ptr<VariantExtensionType>& type,
-                     const std::string& name, bool nullable, int field_id,
-                     const WriterProperties& properties,
-                     const ArrowWriterProperties& arrow_properties, NodePtr* 
out) {
+Status VariantToNode(
+    const std::shared_ptr<::arrow::extension::VariantExtensionType>& type,
+    const std::string& name, bool nullable, int field_id,
+    const WriterProperties& properties, const ArrowWriterProperties& 
arrow_properties,
+    NodePtr* out) {
   NodePtr metadata_node;
   RETURN_NOT_OK(FieldToNode("metadata", type->metadata(), properties, 
arrow_properties,
                             &metadata_node));
@@ -485,8 +486,10 @@ Status FieldToNode(const std::string& name, const 
std::shared_ptr<Field>& field,
         ARROW_ASSIGN_OR_RAISE(logical_type,
                               
LogicalTypeFromGeoArrowMetadata(ext_type->Serialize()));
         break;
-      } else if (ext_type->extension_name() == std::string("parquet.variant")) 
{
-        auto variant_type = 
std::static_pointer_cast<VariantExtensionType>(field->type());
+      } else if (ext_type->extension_name() == 
std::string("arrow.parquet.variant")) {
+        auto variant_type =
+            std::static_pointer_cast<::arrow::extension::VariantExtensionType>(
+                field->type());
 
         return VariantToNode(variant_type, name, field->nullable(), field_id, 
properties,
                              arrow_properties, out);
@@ -597,7 +600,7 @@ Status GroupToStruct(const GroupNode& node, LevelInfo 
current_levels,
   auto struct_type = ::arrow::struct_(arrow_fields);
   if (ctx->properties.get_arrow_extensions_enabled() &&
       node.logical_type()->is_variant()) {
-    auto extension_type = ::arrow::GetExtensionType("parquet.variant");
+    auto extension_type = ::arrow::GetExtensionType("arrow.parquet.variant");
     if (extension_type) {
       ARROW_ASSIGN_OR_RAISE(
           struct_type,
@@ -1147,10 +1150,10 @@ Result<bool> ApplyOriginalMetadata(const Field& 
origin_field, SchemaField* infer
       extension_supports_inferred_storage =
           arrow_extension_inferred ||
           ::arrow::extension::UuidType::IsSupportedStorageType(inferred_type);
-    } else if (origin_extension_name == "parquet.variant") {
+    } else if (origin_extension_name == "arrow.parquet.variant") {
       extension_supports_inferred_storage =
           arrow_extension_inferred ||
-          VariantExtensionType::IsSupportedStorageType(inferred_type);
+          
::arrow::extension::VariantExtensionType::IsSupportedStorageType(inferred_type);
     } else {
       extension_supports_inferred_storage =
           origin_extension_type.storage_type()->Equals(*inferred_type);
diff --git a/cpp/src/parquet/arrow/variant_test.cc 
b/cpp/src/parquet/arrow/variant_test.cc
index caf63d8e3d..04f46d2e44 100644
--- a/cpp/src/parquet/arrow/variant_test.cc
+++ b/cpp/src/parquet/arrow/variant_test.cc
@@ -15,9 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "parquet/arrow/variant_internal.h"
-
 #include "arrow/array/validate.h"
+#include "arrow/extension/parquet_variant.h"
 #include "arrow/ipc/test_common.h"
 #include "arrow/record_batch.h"
 #include "arrow/testing/gtest_util.h"
@@ -29,16 +28,20 @@ using ::arrow::binary;
 using ::arrow::struct_;
 
 TEST(TestVariantExtensionType, StorageTypeValidation) {
-  auto variant1 = variant(struct_({field("metadata", binary(), 
/*nullable=*/false),
-                                   field("value", binary(), 
/*nullable=*/false)}));
-  auto variant2 = variant(struct_({field("metadata", binary(), 
/*nullable=*/false),
-                                   field("value", binary(), 
/*nullable=*/false)}));
+  auto variant1 = ::arrow::extension::variant(
+      struct_({field("metadata", binary(), /*nullable=*/false),
+               field("value", binary(), /*nullable=*/false)}));
+  auto variant2 = ::arrow::extension::variant(
+      struct_({field("metadata", binary(), /*nullable=*/false),
+               field("value", binary(), /*nullable=*/false)}));
 
   ASSERT_TRUE(variant1->Equals(variant2));
 
   // Metadata and value fields can be provided in either order
-  auto variantFieldsFlipped = std::dynamic_pointer_cast<VariantExtensionType>(
-      variant(struct_({field("value", binary(), /*nullable=*/false),
+  auto variantFieldsFlipped =
+      std::dynamic_pointer_cast<::arrow::extension::VariantExtensionType>(
+          ::arrow::extension::variant(
+              struct_({field("value", binary(), /*nullable=*/false),
                        field("metadata", binary(), /*nullable=*/false)})));
 
   ASSERT_EQ("metadata", variantFieldsFlipped->metadata()->name());
@@ -62,7 +65,7 @@ TEST(TestVariantExtensionType, StorageTypeValidation) {
         Invalid,
         "Invalid: Invalid storage type for VariantExtensionType: " +
             storage_type->ToString(),
-        VariantExtensionType::Make(storage_type));
+        ::arrow::extension::VariantExtensionType::Make(storage_type));
   }
 }
 

Reply via email to