klion26 commented on code in PR #8392: URL: https://github.com/apache/arrow-rs/pull/8392#discussion_r2366902225
########## parquet-variant-compute/src/variant_array.rs: ########## @@ -18,36 +18,194 @@ //! [`VariantArray`] implementation use crate::type_conversion::primitive_conversion_single_value; -use arrow::array::{Array, ArrayData, ArrayRef, AsArray, BinaryViewArray, StructArray}; +use arrow::array::{Array, ArrayRef, AsArray, BinaryViewArray, StructArray}; use arrow::buffer::NullBuffer; +use arrow::compute::cast; use arrow::datatypes::{ Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type, }; +use arrow_schema::extension::ExtensionType; use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields}; use parquet_variant::Uuid; use parquet_variant::Variant; -use std::any::Any; use std::sync::Arc; +/// Arrow Variant [`ExtensionType`]. +/// +/// Represents the canonical Arrow Extension Type for storing variants. +/// See [`VariantArray`] for more examples of using this extension type. +pub struct VariantType; + +impl ExtensionType for VariantType { + const NAME: &'static str = "parquet.variant"; + + // Variants have no extension metadata + type Metadata = (); + + fn metadata(&self) -> &Self::Metadata { + &() + } + + fn serialize_metadata(&self) -> Option<String> { + None + } + + fn deserialize_metadata(_metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> { + Ok(()) + } + + fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> { + // Note don't check for metadata/value fields here because they may be + // absent in shredded variants + if matches!(data_type, DataType::Struct(_)) { + Ok(()) + } else { + Err(ArrowError::InvalidArgumentError(format!( + "VariantType only supports StructArray, got {}", + data_type + ))) + } + } + + fn try_new(data_type: &DataType, _metadata: Self::Metadata) -> Result<Self, ArrowError> { + let new_self = Self; + new_self.supports_data_type(data_type)?; + Ok(new_self) + } +} + /// An array of Parquet [`Variant`] values /// Review Comment: The doc here is very helpful! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org