liamzwbao commented on code in PR #8796:
URL: https://github.com/apache/arrow-rs/pull/8796#discussion_r2511645962
##########
parquet-variant-compute/src/variant_to_arrow.rs:
##########
@@ -210,129 +210,111 @@ impl<'a> VariantToArrowRowBuilder<'a> {
}
}
-/// Creates a primitive row builder, returning Err if the requested data type
is not primitive.
+/// Creates a row builder that converts primitive `Variant` values into the
requested Arrow data type.
pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>(
data_type: &'a DataType,
cast_options: &'a CastOptions,
capacity: usize,
) -> Result<PrimitiveVariantToArrowRowBuilder<'a>> {
use PrimitiveVariantToArrowRowBuilder::*;
- let builder = match data_type {
- DataType::Null => Null(VariantToNullArrowRowBuilder::new(cast_options,
capacity)),
- DataType::Boolean =>
Boolean(VariantToBooleanArrowRowBuilder::new(cast_options, capacity)),
- DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Decimal32(precision, scale) =>
Decimal32(VariantToDecimalArrowRowBuilder::new(
- cast_options,
- capacity,
- *precision,
- *scale,
- )?),
- DataType::Decimal64(precision, scale) =>
Decimal64(VariantToDecimalArrowRowBuilder::new(
- cast_options,
- capacity,
- *precision,
- *scale,
- )?),
- DataType::Decimal128(precision, scale) =>
Decimal128(VariantToDecimalArrowRowBuilder::new(
- cast_options,
- capacity,
- *precision,
- *scale,
- )?),
- DataType::Decimal256(precision, scale) =>
Decimal256(VariantToDecimalArrowRowBuilder::new(
- cast_options,
- capacity,
- *precision,
- *scale,
- )?),
- DataType::Timestamp(TimeUnit::Microsecond, None) => TimestampMicroNtz(
- VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
- ),
- DataType::Timestamp(TimeUnit::Microsecond, tz) => TimestampMicro(
- VariantToTimestampArrowRowBuilder::new(cast_options, capacity,
tz.clone()),
- ),
- DataType::Timestamp(TimeUnit::Nanosecond, None) => TimestampNanoNtz(
- VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
- ),
- DataType::Timestamp(TimeUnit::Nanosecond, tz) => TimestampNano(
- VariantToTimestampArrowRowBuilder::new(cast_options, capacity,
tz.clone()),
- ),
- DataType::Date32 => Date(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Time64(TimeUnit::Microsecond) =>
Time(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::FixedSizeBinary(16) => {
- Uuid(VariantToUuidArrowRowBuilder::new(cast_options, capacity))
- }
- DataType::FixedSizeBinary(size) => {
- return Err(ArrowError::InvalidArgumentError(format!(
- "FixedSizeBinary({size}) is not a valid variant shredding
type. Only FixedSizeBinary(16) for UUID is supported."
- )));
- }
- DataType::Utf8 =>
String(VariantToStringArrowBuilder::new(cast_options, capacity)),
- DataType::LargeUtf8 => {
- LargeString(VariantToStringArrowBuilder::new(cast_options,
capacity))
- }
- DataType::Utf8View =>
StringView(VariantToStringArrowBuilder::new(cast_options, capacity)),
- _ if data_type.is_primitive() => {
- return Err(ArrowError::NotYetImplemented(format!(
- "Primitive data_type {data_type:?} not yet implemented"
- )));
- }
- _ => {
- return Err(ArrowError::InvalidArgumentError(format!(
- "Not a primitive type: {data_type:?}"
- )));
- }
- };
+ let builder =
+ match data_type {
+ DataType::Null =>
Null(VariantToNullArrowRowBuilder::new(cast_options, capacity)),
+ DataType::Boolean => {
+ Boolean(VariantToBooleanArrowRowBuilder::new(cast_options,
capacity))
+ }
+ DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Float16 =>
Float16(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Float32 =>
Float32(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Float64 =>
Float64(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Decimal32(precision, scale) => Decimal32(
+ VariantToDecimalArrowRowBuilder::new(cast_options, capacity,
*precision, *scale)?,
+ ),
+ DataType::Decimal64(precision, scale) => Decimal64(
+ VariantToDecimalArrowRowBuilder::new(cast_options, capacity,
*precision, *scale)?,
+ ),
+ DataType::Decimal128(precision, scale) => Decimal128(
+ VariantToDecimalArrowRowBuilder::new(cast_options, capacity,
*precision, *scale)?,
+ ),
+ DataType::Decimal256(precision, scale) => Decimal256(
+ VariantToDecimalArrowRowBuilder::new(cast_options, capacity,
*precision, *scale)?,
+ ),
+ DataType::Timestamp(TimeUnit::Microsecond, None) =>
TimestampMicroNtz(
+ VariantToTimestampNtzArrowRowBuilder::new(cast_options,
capacity),
+ ),
+ DataType::Timestamp(TimeUnit::Microsecond, tz) => TimestampMicro(
+ VariantToTimestampArrowRowBuilder::new(cast_options, capacity,
tz.clone()),
+ ),
+ DataType::Timestamp(TimeUnit::Nanosecond, None) =>
TimestampNanoNtz(
+ VariantToTimestampNtzArrowRowBuilder::new(cast_options,
capacity),
+ ),
+ DataType::Timestamp(TimeUnit::Nanosecond, tz) => TimestampNano(
+ VariantToTimestampArrowRowBuilder::new(cast_options, capacity,
tz.clone()),
+ ),
+ DataType::Date32 => Date(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Time64(TimeUnit::Microsecond) => Time(
+ VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
+ ),
+ DataType::FixedSizeBinary(16) => {
+ Uuid(VariantToUuidArrowRowBuilder::new(cast_options, capacity))
+ }
+ DataType::Utf8 =>
String(VariantToStringArrowBuilder::new(cast_options, capacity)),
+ DataType::LargeUtf8 => {
+ LargeString(VariantToStringArrowBuilder::new(cast_options,
capacity))
+ }
+ DataType::Utf8View => {
+ StringView(VariantToStringArrowBuilder::new(cast_options,
capacity))
+ }
+ _ => {
+ return Err(ArrowError::NotYetImplemented(format!(
+ "Data_type {data_type:?} not yet implemented"
Review Comment:
Makes sense to me, done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]