liamzwbao commented on code in PR #8796:
URL: https://github.com/apache/arrow-rs/pull/8796#discussion_r2496685416


##########
parquet-variant-compute/src/variant_to_arrow.rs:
##########
@@ -210,129 +210,111 @@ impl<'a> VariantToArrowRowBuilder<'a> {
     }
 }
 
-/// Creates a primitive row builder, returning Err if the requested data type 
is not primitive.
+/// Creates a row builder that converts primitive `Variant` values into the 
requested Arrow data type.
 pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>(
     data_type: &'a DataType,
     cast_options: &'a CastOptions,
     capacity: usize,
 ) -> Result<PrimitiveVariantToArrowRowBuilder<'a>> {
     use PrimitiveVariantToArrowRowBuilder::*;
 
-    let builder = match data_type {
-        DataType::Null => Null(VariantToNullArrowRowBuilder::new(cast_options, 
capacity)),
-        DataType::Boolean => 
Boolean(VariantToBooleanArrowRowBuilder::new(cast_options, capacity)),
-        DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Decimal32(precision, scale) => 
Decimal32(VariantToDecimalArrowRowBuilder::new(
-            cast_options,
-            capacity,
-            *precision,
-            *scale,
-        )?),
-        DataType::Decimal64(precision, scale) => 
Decimal64(VariantToDecimalArrowRowBuilder::new(
-            cast_options,
-            capacity,
-            *precision,
-            *scale,
-        )?),
-        DataType::Decimal128(precision, scale) => 
Decimal128(VariantToDecimalArrowRowBuilder::new(
-            cast_options,
-            capacity,
-            *precision,
-            *scale,
-        )?),
-        DataType::Decimal256(precision, scale) => 
Decimal256(VariantToDecimalArrowRowBuilder::new(
-            cast_options,
-            capacity,
-            *precision,
-            *scale,
-        )?),
-        DataType::Timestamp(TimeUnit::Microsecond, None) => TimestampMicroNtz(
-            VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
-        ),
-        DataType::Timestamp(TimeUnit::Microsecond, tz) => TimestampMicro(
-            VariantToTimestampArrowRowBuilder::new(cast_options, capacity, 
tz.clone()),
-        ),
-        DataType::Timestamp(TimeUnit::Nanosecond, None) => TimestampNanoNtz(
-            VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
-        ),
-        DataType::Timestamp(TimeUnit::Nanosecond, tz) => TimestampNano(
-            VariantToTimestampArrowRowBuilder::new(cast_options, capacity, 
tz.clone()),
-        ),
-        DataType::Date32 => Date(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Time64(TimeUnit::Microsecond) => 
Time(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::FixedSizeBinary(16) => {
-            Uuid(VariantToUuidArrowRowBuilder::new(cast_options, capacity))
-        }
-        DataType::FixedSizeBinary(size) => {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "FixedSizeBinary({size}) is not a valid variant shredding 
type. Only FixedSizeBinary(16) for UUID is supported."
-            )));
-        }
-        DataType::Utf8 => 
String(VariantToStringArrowBuilder::new(cast_options, capacity)),
-        DataType::LargeUtf8 => {
-            LargeString(VariantToStringArrowBuilder::new(cast_options, 
capacity))
-        }
-        DataType::Utf8View => 
StringView(VariantToStringArrowBuilder::new(cast_options, capacity)),
-        _ if data_type.is_primitive() => {
-            return Err(ArrowError::NotYetImplemented(format!(
-                "Primitive data_type {data_type:?} not yet implemented"
-            )));

Review Comment:
   Here, *primitive* refers to **Variant** primitive, not Arrow primitive. So 
we shouldn’t throw an error about “not an Arrow primitive.” Ideally this 
function should accept any Arrow type, therefore changed this to 
`NotYetImplemented` instead.
   



##########
parquet-variant-compute/src/variant_to_arrow.rs:
##########
@@ -210,129 +210,111 @@ impl<'a> VariantToArrowRowBuilder<'a> {
     }
 }
 
-/// Creates a primitive row builder, returning Err if the requested data type 
is not primitive.
+/// Creates a row builder that converts primitive `Variant` values into the 
requested Arrow data type.
 pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>(
     data_type: &'a DataType,
     cast_options: &'a CastOptions,
     capacity: usize,
 ) -> Result<PrimitiveVariantToArrowRowBuilder<'a>> {
     use PrimitiveVariantToArrowRowBuilder::*;
 
-    let builder = match data_type {
-        DataType::Null => Null(VariantToNullArrowRowBuilder::new(cast_options, 
capacity)),
-        DataType::Boolean => 
Boolean(VariantToBooleanArrowRowBuilder::new(cast_options, capacity)),
-        DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Decimal32(precision, scale) => 
Decimal32(VariantToDecimalArrowRowBuilder::new(
-            cast_options,
-            capacity,
-            *precision,
-            *scale,
-        )?),
-        DataType::Decimal64(precision, scale) => 
Decimal64(VariantToDecimalArrowRowBuilder::new(
-            cast_options,
-            capacity,
-            *precision,
-            *scale,
-        )?),
-        DataType::Decimal128(precision, scale) => 
Decimal128(VariantToDecimalArrowRowBuilder::new(
-            cast_options,
-            capacity,
-            *precision,
-            *scale,
-        )?),
-        DataType::Decimal256(precision, scale) => 
Decimal256(VariantToDecimalArrowRowBuilder::new(
-            cast_options,
-            capacity,
-            *precision,
-            *scale,
-        )?),
-        DataType::Timestamp(TimeUnit::Microsecond, None) => TimestampMicroNtz(
-            VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
-        ),
-        DataType::Timestamp(TimeUnit::Microsecond, tz) => TimestampMicro(
-            VariantToTimestampArrowRowBuilder::new(cast_options, capacity, 
tz.clone()),
-        ),
-        DataType::Timestamp(TimeUnit::Nanosecond, None) => TimestampNanoNtz(
-            VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
-        ),
-        DataType::Timestamp(TimeUnit::Nanosecond, tz) => TimestampNano(
-            VariantToTimestampArrowRowBuilder::new(cast_options, capacity, 
tz.clone()),
-        ),
-        DataType::Date32 => Date(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Time64(TimeUnit::Microsecond) => 
Time(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::FixedSizeBinary(16) => {
-            Uuid(VariantToUuidArrowRowBuilder::new(cast_options, capacity))
-        }
-        DataType::FixedSizeBinary(size) => {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "FixedSizeBinary({size}) is not a valid variant shredding 
type. Only FixedSizeBinary(16) for UUID is supported."
-            )));
-        }

Review Comment:
   Moved to `shred_variant`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to