klion26 commented on code in PR #8796:
URL: https://github.com/apache/arrow-rs/pull/8796#discussion_r2498459579


##########
parquet-variant-compute/src/shred_variant.rs:
##########
@@ -123,13 +123,39 @@ pub(crate) fn 
make_variant_to_shredded_variant_arrow_row_builder<'a>(
                 "Shredding variant array values as arrow lists".to_string(),
             ));
         }
-        _ => {
+        // Supported shredded primitive types, see Variant shredding spec:
+        // 
https://github.com/apache/parquet-format/blob/master/VariantShredding.md#shredded-value-types
+        DataType::Boolean

Review Comment:
   We can't make do the type cast after this? not sure if this is ok.
   
   Currently, we will do the type check in 
`make_primitive_variant_to_arrow_row_builder` with the match arms, so maybe we 
don't need to add it here, and add the type check in two places seems will add 
maintaince.



##########
parquet-variant-compute/src/variant_to_arrow.rs:
##########
@@ -210,129 +210,111 @@ impl<'a> VariantToArrowRowBuilder<'a> {
     }
 }
 
-/// Creates a primitive row builder, returning Err if the requested data type 
is not primitive.
+/// Creates a row builder that converts primitive `Variant` values into the 
requested Arrow data type.
 pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>(
     data_type: &'a DataType,
     cast_options: &'a CastOptions,
     capacity: usize,
 ) -> Result<PrimitiveVariantToArrowRowBuilder<'a>> {
     use PrimitiveVariantToArrowRowBuilder::*;
 
-    let builder = match data_type {
-        DataType::Null => Null(VariantToNullArrowRowBuilder::new(cast_options, 
capacity)),
-        DataType::Boolean => 
Boolean(VariantToBooleanArrowRowBuilder::new(cast_options, capacity)),
-        DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Decimal32(precision, scale) => 
Decimal32(VariantToDecimalArrowRowBuilder::new(
-            cast_options,
-            capacity,
-            *precision,
-            *scale,
-        )?),
-        DataType::Decimal64(precision, scale) => 
Decimal64(VariantToDecimalArrowRowBuilder::new(
-            cast_options,
-            capacity,
-            *precision,
-            *scale,
-        )?),
-        DataType::Decimal128(precision, scale) => 
Decimal128(VariantToDecimalArrowRowBuilder::new(
-            cast_options,
-            capacity,
-            *precision,
-            *scale,
-        )?),
-        DataType::Decimal256(precision, scale) => 
Decimal256(VariantToDecimalArrowRowBuilder::new(
-            cast_options,
-            capacity,
-            *precision,
-            *scale,
-        )?),
-        DataType::Timestamp(TimeUnit::Microsecond, None) => TimestampMicroNtz(
-            VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
-        ),
-        DataType::Timestamp(TimeUnit::Microsecond, tz) => TimestampMicro(
-            VariantToTimestampArrowRowBuilder::new(cast_options, capacity, 
tz.clone()),
-        ),
-        DataType::Timestamp(TimeUnit::Nanosecond, None) => TimestampNanoNtz(
-            VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
-        ),
-        DataType::Timestamp(TimeUnit::Nanosecond, tz) => TimestampNano(
-            VariantToTimestampArrowRowBuilder::new(cast_options, capacity, 
tz.clone()),
-        ),
-        DataType::Date32 => Date(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Time64(TimeUnit::Microsecond) => 
Time(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::FixedSizeBinary(16) => {
-            Uuid(VariantToUuidArrowRowBuilder::new(cast_options, capacity))
-        }
-        DataType::FixedSizeBinary(size) => {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "FixedSizeBinary({size}) is not a valid variant shredding 
type. Only FixedSizeBinary(16) for UUID is supported."
-            )));
-        }
-        DataType::Utf8 => 
String(VariantToStringArrowBuilder::new(cast_options, capacity)),
-        DataType::LargeUtf8 => {
-            LargeString(VariantToStringArrowBuilder::new(cast_options, 
capacity))
-        }
-        DataType::Utf8View => 
StringView(VariantToStringArrowBuilder::new(cast_options, capacity)),
-        _ if data_type.is_primitive() => {
-            return Err(ArrowError::NotYetImplemented(format!(
-                "Primitive data_type {data_type:?} not yet implemented"
-            )));
-        }
-        _ => {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "Not a primitive type: {data_type:?}"
-            )));
-        }
-    };
+    let builder =
+        match data_type {
+            DataType::Null => 
Null(VariantToNullArrowRowBuilder::new(cast_options, capacity)),
+            DataType::Boolean => {
+                Boolean(VariantToBooleanArrowRowBuilder::new(cast_options, 
capacity))
+            }
+            DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Float16 => 
Float16(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Float32 => 
Float32(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Float64 => 
Float64(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Decimal32(precision, scale) => Decimal32(
+                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, 
*precision, *scale)?,
+            ),
+            DataType::Decimal64(precision, scale) => Decimal64(
+                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, 
*precision, *scale)?,
+            ),
+            DataType::Decimal128(precision, scale) => Decimal128(
+                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, 
*precision, *scale)?,
+            ),
+            DataType::Decimal256(precision, scale) => Decimal256(
+                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, 
*precision, *scale)?,
+            ),
+            DataType::Timestamp(TimeUnit::Microsecond, None) => 
TimestampMicroNtz(
+                VariantToTimestampNtzArrowRowBuilder::new(cast_options, 
capacity),
+            ),
+            DataType::Timestamp(TimeUnit::Microsecond, tz) => TimestampMicro(
+                VariantToTimestampArrowRowBuilder::new(cast_options, capacity, 
tz.clone()),
+            ),
+            DataType::Timestamp(TimeUnit::Nanosecond, None) => 
TimestampNanoNtz(
+                VariantToTimestampNtzArrowRowBuilder::new(cast_options, 
capacity),
+            ),
+            DataType::Timestamp(TimeUnit::Nanosecond, tz) => TimestampNano(
+                VariantToTimestampArrowRowBuilder::new(cast_options, capacity, 
tz.clone()),
+            ),
+            DataType::Date32 => Date(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Time64(TimeUnit::Microsecond) => Time(
+                VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
+            ),
+            DataType::FixedSizeBinary(16) => {
+                Uuid(VariantToUuidArrowRowBuilder::new(cast_options, capacity))
+            }
+            DataType::Utf8 => 
String(VariantToStringArrowBuilder::new(cast_options, capacity)),
+            DataType::LargeUtf8 => {
+                LargeString(VariantToStringArrowBuilder::new(cast_options, 
capacity))
+            }
+            DataType::Utf8View => {
+                StringView(VariantToStringArrowBuilder::new(cast_options, 
capacity))
+            }
+            _ => {
+                return Err(ArrowError::NotYetImplemented(format!(

Review Comment:
   Maybe we can't remove the `_ if data_type.is_primitive()` for now, seems 
this arm is for the type we _may but  have not_ implement, and `_` match arm is 
for the types invalid.
   
   After #8768 merged, we complete the 1-1 mapping(and some transforms for some 
types) for all `Variant` primitive types, but we _may_ support some `DataType`s 
here which is not a valid variant primitive(e.g. Timestamp with different 
unit), and keep the `_ if data_type.is_primitive()` so that we know we _may_ 
support the required, and remove it after we have a conclusion.
   
   I am sorting out possible conversions and will create an issue to discuss 
them after the work done.



##########
parquet-variant-compute/src/shred_variant.rs:
##########
@@ -123,13 +123,39 @@ pub(crate) fn 
make_variant_to_shredded_variant_arrow_row_builder<'a>(
                 "Shredding variant array values as arrow lists".to_string(),
             ));
         }
-        _ => {
+        // Supported shredded primitive types, see Variant shredding spec:
+        // 
https://github.com/apache/parquet-format/blob/master/VariantShredding.md#shredded-value-types
+        DataType::Boolean
+        | DataType::Int8
+        | DataType::Int16
+        | DataType::Int32
+        | DataType::Int64
+        | DataType::Float32
+        | DataType::Float64
+        | DataType::Decimal32(..)
+        | DataType::Decimal64(..)
+        | DataType::Decimal128(..)
+        | DataType::Date32
+        | DataType::Time64(TimeUnit::Microsecond)
+        | DataType::Timestamp(TimeUnit::Microsecond | TimeUnit::Nanosecond, _)
+        | DataType::Binary
+        | DataType::BinaryView
+        | DataType::Utf8
+        | DataType::Utf8View
+        | DataType::FixedSizeBinary(16) // UUID
+        => {
             let builder =
                 make_primitive_variant_to_arrow_row_builder(data_type, 
cast_options, capacity)?;
             let typed_value_builder =
                 VariantToShreddedPrimitiveVariantRowBuilder::new(builder, 
capacity, top_level);
             VariantToShreddedVariantRowBuilder::Primitive(typed_value_builder)
         }
+        DataType::FixedSizeBinary(_) => {
+            return Err(ArrowError::InvalidArgumentError(format!("{data_type} 
is not a valid variant shredding type. Only FixedSizeBinary(16) for UUID is 
supported.")))

Review Comment:
   Do we need to distinguish this with the `_`  match arm?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to