klion26 commented on code in PR #8796:
URL: https://github.com/apache/arrow-rs/pull/8796#discussion_r2502064599
##########
parquet-variant-compute/src/variant_to_arrow.rs:
##########
@@ -210,129 +210,111 @@ impl<'a> VariantToArrowRowBuilder<'a> {
}
}
-/// Creates a primitive row builder, returning Err if the requested data type
is not primitive.
+/// Creates a row builder that converts primitive `Variant` values into the
requested Arrow data type.
pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>(
data_type: &'a DataType,
cast_options: &'a CastOptions,
capacity: usize,
) -> Result<PrimitiveVariantToArrowRowBuilder<'a>> {
use PrimitiveVariantToArrowRowBuilder::*;
- let builder = match data_type {
- DataType::Null => Null(VariantToNullArrowRowBuilder::new(cast_options,
capacity)),
- DataType::Boolean =>
Boolean(VariantToBooleanArrowRowBuilder::new(cast_options, capacity)),
- DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Decimal32(precision, scale) =>
Decimal32(VariantToDecimalArrowRowBuilder::new(
- cast_options,
- capacity,
- *precision,
- *scale,
- )?),
- DataType::Decimal64(precision, scale) =>
Decimal64(VariantToDecimalArrowRowBuilder::new(
- cast_options,
- capacity,
- *precision,
- *scale,
- )?),
- DataType::Decimal128(precision, scale) =>
Decimal128(VariantToDecimalArrowRowBuilder::new(
- cast_options,
- capacity,
- *precision,
- *scale,
- )?),
- DataType::Decimal256(precision, scale) =>
Decimal256(VariantToDecimalArrowRowBuilder::new(
- cast_options,
- capacity,
- *precision,
- *scale,
- )?),
- DataType::Timestamp(TimeUnit::Microsecond, None) => TimestampMicroNtz(
- VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
- ),
- DataType::Timestamp(TimeUnit::Microsecond, tz) => TimestampMicro(
- VariantToTimestampArrowRowBuilder::new(cast_options, capacity,
tz.clone()),
- ),
- DataType::Timestamp(TimeUnit::Nanosecond, None) => TimestampNanoNtz(
- VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
- ),
- DataType::Timestamp(TimeUnit::Nanosecond, tz) => TimestampNano(
- VariantToTimestampArrowRowBuilder::new(cast_options, capacity,
tz.clone()),
- ),
- DataType::Date32 => Date(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Time64(TimeUnit::Microsecond) =>
Time(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::FixedSizeBinary(16) => {
- Uuid(VariantToUuidArrowRowBuilder::new(cast_options, capacity))
- }
- DataType::FixedSizeBinary(size) => {
- return Err(ArrowError::InvalidArgumentError(format!(
- "FixedSizeBinary({size}) is not a valid variant shredding
type. Only FixedSizeBinary(16) for UUID is supported."
- )));
- }
- DataType::Utf8 =>
String(VariantToStringArrowBuilder::new(cast_options, capacity)),
- DataType::LargeUtf8 => {
- LargeString(VariantToStringArrowBuilder::new(cast_options,
capacity))
- }
- DataType::Utf8View =>
StringView(VariantToStringArrowBuilder::new(cast_options, capacity)),
- _ if data_type.is_primitive() => {
- return Err(ArrowError::NotYetImplemented(format!(
- "Primitive data_type {data_type:?} not yet implemented"
- )));
- }
- _ => {
- return Err(ArrowError::InvalidArgumentError(format!(
- "Not a primitive type: {data_type:?}"
- )));
- }
- };
+ let builder =
+ match data_type {
+ DataType::Null =>
Null(VariantToNullArrowRowBuilder::new(cast_options, capacity)),
+ DataType::Boolean => {
+ Boolean(VariantToBooleanArrowRowBuilder::new(cast_options,
capacity))
+ }
+ DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Float16 =>
Float16(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Float32 =>
Float32(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Float64 =>
Float64(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Decimal32(precision, scale) => Decimal32(
+ VariantToDecimalArrowRowBuilder::new(cast_options, capacity,
*precision, *scale)?,
+ ),
+ DataType::Decimal64(precision, scale) => Decimal64(
+ VariantToDecimalArrowRowBuilder::new(cast_options, capacity,
*precision, *scale)?,
+ ),
+ DataType::Decimal128(precision, scale) => Decimal128(
+ VariantToDecimalArrowRowBuilder::new(cast_options, capacity,
*precision, *scale)?,
+ ),
+ DataType::Decimal256(precision, scale) => Decimal256(
+ VariantToDecimalArrowRowBuilder::new(cast_options, capacity,
*precision, *scale)?,
+ ),
+ DataType::Timestamp(TimeUnit::Microsecond, None) =>
TimestampMicroNtz(
+ VariantToTimestampNtzArrowRowBuilder::new(cast_options,
capacity),
+ ),
+ DataType::Timestamp(TimeUnit::Microsecond, tz) => TimestampMicro(
+ VariantToTimestampArrowRowBuilder::new(cast_options, capacity,
tz.clone()),
+ ),
+ DataType::Timestamp(TimeUnit::Nanosecond, None) =>
TimestampNanoNtz(
+ VariantToTimestampNtzArrowRowBuilder::new(cast_options,
capacity),
+ ),
+ DataType::Timestamp(TimeUnit::Nanosecond, tz) => TimestampNano(
+ VariantToTimestampArrowRowBuilder::new(cast_options, capacity,
tz.clone()),
+ ),
+ DataType::Date32 => Date(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Time64(TimeUnit::Microsecond) => Time(
+ VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
+ ),
+ DataType::FixedSizeBinary(16) => {
+ Uuid(VariantToUuidArrowRowBuilder::new(cast_options, capacity))
+ }
+ DataType::Utf8 =>
String(VariantToStringArrowBuilder::new(cast_options, capacity)),
+ DataType::LargeUtf8 => {
+ LargeString(VariantToStringArrowBuilder::new(cast_options,
capacity))
+ }
+ DataType::Utf8View => {
+ StringView(VariantToStringArrowBuilder::new(cast_options,
capacity))
+ }
+ _ => {
+ return Err(ArrowError::NotYetImplemented(format!(
Review Comment:
My understanding is that
- All Parquet Variant primitive types will map to Arrow DataTypes
- Some Parquet Variant primitive types _can_ be converted to other Arrow
DataTypes(with cast)
- Here we want to support all parquet variant primitive types, and the
target type here is _Arrow DataType_, we check the _Arrow DataTypes_ here
- The `dataype.is_primitive()` check(my guess) is that we can support
**all** Arrow DataTypes, but has not done(when this code written), so add a
`NotYetImplement` here to mark this.
However, it is true that we might need an expert to confirm our
understanding here.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]