mbrobbel commented on code in PR #8392: URL: https://github.com/apache/arrow-rs/pull/8392#discussion_r2367568364
########## parquet-variant-compute/src/variant_array.rs: ########## @@ -18,36 +18,191 @@ //! [`VariantArray`] implementation use crate::type_conversion::primitive_conversion_single_value; -use arrow::array::{Array, ArrayData, ArrayRef, AsArray, BinaryViewArray, StructArray}; +use arrow::array::{Array, ArrayRef, AsArray, BinaryViewArray, StructArray}; use arrow::buffer::NullBuffer; +use arrow::compute::cast; use arrow::datatypes::{ Date32Type, Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type, }; +use arrow_schema::extension::ExtensionType; use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields}; use parquet_variant::Uuid; use parquet_variant::Variant; -use std::any::Any; use std::sync::Arc; +/// Arrow Variant [`ExtensionType`]. +/// +/// Represents the canonical Arrow Extension Type for storing variants. +/// See [`VariantArray`] for more examples of using this extension type. +pub struct VariantType; + +impl ExtensionType for VariantType { + const NAME: &'static str = "parquet.variant"; Review Comment: https://github.com/apache/arrow/blob/d803afcc43f5d132506318fd9e162d33b2c3d4cd/docs/source/format/CanonicalExtensions.rst?plain=1#L440: ```suggestion const NAME: &'static str = "arrow.parquet.variant"; ``` ########## parquet-variant-compute/src/variant_array.rs: ########## @@ -18,36 +18,191 @@ //! [`VariantArray`] implementation use crate::type_conversion::primitive_conversion_single_value; -use arrow::array::{Array, ArrayData, ArrayRef, AsArray, BinaryViewArray, StructArray}; +use arrow::array::{Array, ArrayRef, AsArray, BinaryViewArray, StructArray}; use arrow::buffer::NullBuffer; +use arrow::compute::cast; use arrow::datatypes::{ Date32Type, Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type, }; +use arrow_schema::extension::ExtensionType; use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields}; use parquet_variant::Uuid; use parquet_variant::Variant; -use std::any::Any; use std::sync::Arc; +/// Arrow Variant [`ExtensionType`]. +/// +/// Represents the canonical Arrow Extension Type for storing variants. +/// See [`VariantArray`] for more examples of using this extension type. +pub struct VariantType; + +impl ExtensionType for VariantType { + const NAME: &'static str = "parquet.variant"; + + // Variants have no extension metadata + type Metadata = (); Review Comment: The metadata is an empty string: https://github.com/apache/arrow/blob/d803afcc43f5d132506318fd9e162d33b2c3d4cd/docs/source/format/CanonicalExtensions.rst?plain=1#L473 ########## parquet-variant-compute/src/variant_array.rs: ########## @@ -18,36 +18,191 @@ //! [`VariantArray`] implementation use crate::type_conversion::primitive_conversion_single_value; -use arrow::array::{Array, ArrayData, ArrayRef, AsArray, BinaryViewArray, StructArray}; +use arrow::array::{Array, ArrayRef, AsArray, BinaryViewArray, StructArray}; use arrow::buffer::NullBuffer; +use arrow::compute::cast; use arrow::datatypes::{ Date32Type, Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type, }; +use arrow_schema::extension::ExtensionType; use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields}; use parquet_variant::Uuid; use parquet_variant::Variant; -use std::any::Any; use std::sync::Arc; +/// Arrow Variant [`ExtensionType`]. +/// +/// Represents the canonical Arrow Extension Type for storing variants. +/// See [`VariantArray`] for more examples of using this extension type. +pub struct VariantType; + +impl ExtensionType for VariantType { + const NAME: &'static str = "parquet.variant"; + + // Variants have no extension metadata + type Metadata = (); + + fn metadata(&self) -> &Self::Metadata { + &() + } + + fn serialize_metadata(&self) -> Option<String> { + None + } + + fn deserialize_metadata(_metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> { + Ok(()) + } + + fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> { + if matches!(data_type, DataType::Struct(_)) { Review Comment: I assume we want to validate the rules from https://github.com/apache/arrow/blob/d803afcc43f5d132506318fd9e162d33b2c3d4cd/docs/source/format/CanonicalExtensions.rst?plain=1#L442-L465 here? ########## parquet-variant-compute/src/variant_array.rs: ########## @@ -18,36 +18,191 @@ //! [`VariantArray`] implementation use crate::type_conversion::primitive_conversion_single_value; -use arrow::array::{Array, ArrayData, ArrayRef, AsArray, BinaryViewArray, StructArray}; +use arrow::array::{Array, ArrayRef, AsArray, BinaryViewArray, StructArray}; use arrow::buffer::NullBuffer; +use arrow::compute::cast; use arrow::datatypes::{ Date32Type, Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type, }; +use arrow_schema::extension::ExtensionType; use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields}; use parquet_variant::Uuid; use parquet_variant::Variant; -use std::any::Any; use std::sync::Arc; +/// Arrow Variant [`ExtensionType`]. +/// +/// Represents the canonical Arrow Extension Type for storing variants. +/// See [`VariantArray`] for more examples of using this extension type. +pub struct VariantType; + +impl ExtensionType for VariantType { + const NAME: &'static str = "parquet.variant"; + + // Variants have no extension metadata + type Metadata = (); + + fn metadata(&self) -> &Self::Metadata { + &() + } + + fn serialize_metadata(&self) -> Option<String> { + None + } + + fn deserialize_metadata(_metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> { + Ok(()) + } + + fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> { + if matches!(data_type, DataType::Struct(_)) { + Ok(()) + } else { + Err(ArrowError::InvalidArgumentError(format!( + "VariantType only supports StructArray, got {}", + data_type Review Comment: ```suggestion "VariantType only supports StructArray, got {data_type}" ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org