mbrobbel commented on code in PR #8392:
URL: https://github.com/apache/arrow-rs/pull/8392#discussion_r2367568364


##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -18,36 +18,191 @@
 //! [`VariantArray`] implementation
 
 use crate::type_conversion::primitive_conversion_single_value;
-use arrow::array::{Array, ArrayData, ArrayRef, AsArray, BinaryViewArray, 
StructArray};
+use arrow::array::{Array, ArrayRef, AsArray, BinaryViewArray, StructArray};
 use arrow::buffer::NullBuffer;
+use arrow::compute::cast;
 use arrow::datatypes::{
     Date32Type, Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, 
Int64Type, Int8Type,
     UInt16Type, UInt32Type, UInt64Type, UInt8Type,
 };
+use arrow_schema::extension::ExtensionType;
 use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields};
 use parquet_variant::Uuid;
 use parquet_variant::Variant;
-use std::any::Any;
 use std::sync::Arc;
 
+/// Arrow Variant [`ExtensionType`].
+///
+/// Represents the canonical Arrow Extension Type for storing variants.
+/// See [`VariantArray`] for more examples of using this extension type.
+pub struct VariantType;
+
+impl ExtensionType for VariantType {
+    const NAME: &'static str = "parquet.variant";

Review Comment:
   
https://github.com/apache/arrow/blob/d803afcc43f5d132506318fd9e162d33b2c3d4cd/docs/source/format/CanonicalExtensions.rst?plain=1#L440:
   ```suggestion
       const NAME: &'static str = "arrow.parquet.variant";
   ```



##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -18,36 +18,191 @@
 //! [`VariantArray`] implementation
 
 use crate::type_conversion::primitive_conversion_single_value;
-use arrow::array::{Array, ArrayData, ArrayRef, AsArray, BinaryViewArray, 
StructArray};
+use arrow::array::{Array, ArrayRef, AsArray, BinaryViewArray, StructArray};
 use arrow::buffer::NullBuffer;
+use arrow::compute::cast;
 use arrow::datatypes::{
     Date32Type, Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, 
Int64Type, Int8Type,
     UInt16Type, UInt32Type, UInt64Type, UInt8Type,
 };
+use arrow_schema::extension::ExtensionType;
 use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields};
 use parquet_variant::Uuid;
 use parquet_variant::Variant;
-use std::any::Any;
 use std::sync::Arc;
 
+/// Arrow Variant [`ExtensionType`].
+///
+/// Represents the canonical Arrow Extension Type for storing variants.
+/// See [`VariantArray`] for more examples of using this extension type.
+pub struct VariantType;
+
+impl ExtensionType for VariantType {
+    const NAME: &'static str = "parquet.variant";
+
+    // Variants have no extension metadata
+    type Metadata = ();

Review Comment:
   The metadata is an empty string:
   
   
https://github.com/apache/arrow/blob/d803afcc43f5d132506318fd9e162d33b2c3d4cd/docs/source/format/CanonicalExtensions.rst?plain=1#L473



##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -18,36 +18,191 @@
 //! [`VariantArray`] implementation
 
 use crate::type_conversion::primitive_conversion_single_value;
-use arrow::array::{Array, ArrayData, ArrayRef, AsArray, BinaryViewArray, 
StructArray};
+use arrow::array::{Array, ArrayRef, AsArray, BinaryViewArray, StructArray};
 use arrow::buffer::NullBuffer;
+use arrow::compute::cast;
 use arrow::datatypes::{
     Date32Type, Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, 
Int64Type, Int8Type,
     UInt16Type, UInt32Type, UInt64Type, UInt8Type,
 };
+use arrow_schema::extension::ExtensionType;
 use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields};
 use parquet_variant::Uuid;
 use parquet_variant::Variant;
-use std::any::Any;
 use std::sync::Arc;
 
+/// Arrow Variant [`ExtensionType`].
+///
+/// Represents the canonical Arrow Extension Type for storing variants.
+/// See [`VariantArray`] for more examples of using this extension type.
+pub struct VariantType;
+
+impl ExtensionType for VariantType {
+    const NAME: &'static str = "parquet.variant";
+
+    // Variants have no extension metadata
+    type Metadata = ();
+
+    fn metadata(&self) -> &Self::Metadata {
+        &()
+    }
+
+    fn serialize_metadata(&self) -> Option<String> {
+        None
+    }
+
+    fn deserialize_metadata(_metadata: Option<&str>) -> Result<Self::Metadata, 
ArrowError> {
+        Ok(())
+    }
+
+    fn supports_data_type(&self, data_type: &DataType) -> Result<(), 
ArrowError> {
+        if matches!(data_type, DataType::Struct(_)) {

Review Comment:
   I assume we want to validate the rules from 
https://github.com/apache/arrow/blob/d803afcc43f5d132506318fd9e162d33b2c3d4cd/docs/source/format/CanonicalExtensions.rst?plain=1#L442-L465
 here?



##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -18,36 +18,191 @@
 //! [`VariantArray`] implementation
 
 use crate::type_conversion::primitive_conversion_single_value;
-use arrow::array::{Array, ArrayData, ArrayRef, AsArray, BinaryViewArray, 
StructArray};
+use arrow::array::{Array, ArrayRef, AsArray, BinaryViewArray, StructArray};
 use arrow::buffer::NullBuffer;
+use arrow::compute::cast;
 use arrow::datatypes::{
     Date32Type, Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, 
Int64Type, Int8Type,
     UInt16Type, UInt32Type, UInt64Type, UInt8Type,
 };
+use arrow_schema::extension::ExtensionType;
 use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields};
 use parquet_variant::Uuid;
 use parquet_variant::Variant;
-use std::any::Any;
 use std::sync::Arc;
 
+/// Arrow Variant [`ExtensionType`].
+///
+/// Represents the canonical Arrow Extension Type for storing variants.
+/// See [`VariantArray`] for more examples of using this extension type.
+pub struct VariantType;
+
+impl ExtensionType for VariantType {
+    const NAME: &'static str = "parquet.variant";
+
+    // Variants have no extension metadata
+    type Metadata = ();
+
+    fn metadata(&self) -> &Self::Metadata {
+        &()
+    }
+
+    fn serialize_metadata(&self) -> Option<String> {
+        None
+    }
+
+    fn deserialize_metadata(_metadata: Option<&str>) -> Result<Self::Metadata, 
ArrowError> {
+        Ok(())
+    }
+
+    fn supports_data_type(&self, data_type: &DataType) -> Result<(), 
ArrowError> {
+        if matches!(data_type, DataType::Struct(_)) {
+            Ok(())
+        } else {
+            Err(ArrowError::InvalidArgumentError(format!(
+                "VariantType only supports StructArray, got {}",
+                data_type

Review Comment:
   ```suggestion
                   "VariantType only supports StructArray, got {data_type}"
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to