alamb commented on code in PR #8366: URL: https://github.com/apache/arrow-rs/pull/8366#discussion_r2364374103
########## parquet-variant-compute/src/variant_to_arrow.rs: ########## @@ -102,77 +104,142 @@ impl<'a> VariantToArrowRowBuilder<'a> { Float16(b) => b.finish(), Float32(b) => b.finish(), Float64(b) => b.finish(), + } + } +} + +impl<'a> VariantToArrowRowBuilder<'a> { + pub fn append_null(&mut self) -> Result<()> { + use VariantToArrowRowBuilder::*; + match self { + Primitive(b) => b.append_null(), + BinaryVariant(b) => b.append_null(), + WithPath(path_builder) => path_builder.append_null(), + } + } + + pub fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> { + use VariantToArrowRowBuilder::*; + match self { + Primitive(b) => b.append_value(&value), + BinaryVariant(b) => b.append_value(value), + WithPath(path_builder) => path_builder.append_value(value), + } + } + + pub fn finish(self) -> Result<ArrayRef> { + use VariantToArrowRowBuilder::*; + match self { + Primitive(b) => b.finish(), BinaryVariant(b) => b.finish(), WithPath(path_builder) => path_builder.finish(), } } } -pub(crate) fn make_variant_to_arrow_row_builder<'a>( - metadata: &BinaryViewArray, - path: VariantPath<'a>, - data_type: Option<&'a DataType>, +/// Creates a primitive row builder, returning Err if the requested data type is not primitive. +pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>( + data_type: &'a DataType, cast_options: &'a CastOptions, capacity: usize, -) -> Result<VariantToArrowRowBuilder<'a>> { - use VariantToArrowRowBuilder::*; +) -> Result<PrimitiveVariantToArrowRowBuilder<'a>> { + use PrimitiveVariantToArrowRowBuilder::*; - let mut builder = match data_type { - // If no data type was requested, build an unshredded VariantArray. - None => BinaryVariant(VariantToBinaryVariantArrowRowBuilder::new( - metadata.clone(), - capacity, - )), - Some(DataType::Int8) => Int8(VariantToPrimitiveArrowRowBuilder::new( + let builder = match data_type { + DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new( cast_options, capacity, )), - Some(DataType::Int16) => Int16(VariantToPrimitiveArrowRowBuilder::new( + DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new( cast_options, capacity, )), - Some(DataType::Int32) => Int32(VariantToPrimitiveArrowRowBuilder::new( + DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new( cast_options, capacity, )), - Some(DataType::Int64) => Int64(VariantToPrimitiveArrowRowBuilder::new( + DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new( cast_options, capacity, )), - Some(DataType::Float16) => Float16(VariantToPrimitiveArrowRowBuilder::new( + DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new( cast_options, capacity, )), - Some(DataType::Float32) => Float32(VariantToPrimitiveArrowRowBuilder::new( + DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new( cast_options, capacity, )), - Some(DataType::Float64) => Float64(VariantToPrimitiveArrowRowBuilder::new( + DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new( cast_options, capacity, )), - Some(DataType::UInt8) => UInt8(VariantToPrimitiveArrowRowBuilder::new( + DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new( cast_options, capacity, )), - Some(DataType::UInt16) => UInt16(VariantToPrimitiveArrowRowBuilder::new( + DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new( cast_options, capacity, )), - Some(DataType::UInt32) => UInt32(VariantToPrimitiveArrowRowBuilder::new( + DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new( cast_options, capacity, )), - Some(DataType::UInt64) => UInt64(VariantToPrimitiveArrowRowBuilder::new( + DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new( cast_options, capacity, )), - _ => { + _ if data_type.is_primitive() => { return Err(ArrowError::NotYetImplemented(format!( - "variant_get with path={:?} and data_type={:?} not yet implemented", - path, data_type + "Primitive data_type {data_type:?} not yet implemented" ))); } + _ => { + return Err(ArrowError::InvalidArgumentError(format!( Review Comment: I tried to write a benchmark for shredding and I hit the fact that I can't shred Utf8 columns (which is fine, we'll do it as a follow on PR) but I will file a ticket to track ########## parquet-variant-compute/src/variant_array.rs: ########## @@ -456,16 +473,13 @@ pub enum ShreddingState { } impl ShreddingState { - /// try to create a new `ShreddingState` from the given fields - pub fn try_new( - value: Option<BinaryViewArray>, - typed_value: Option<ArrayRef>, - ) -> Result<Self, ArrowError> { + /// Create a new `ShreddingState` from the given fields + pub fn new(value: Option<BinaryViewArray>, typed_value: Option<ArrayRef>) -> Self { Review Comment: Amusingly, I did this exact change in https://github.com/apache/arrow-rs/pull/8392 too. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org