alamb commented on code in PR #8021: URL: https://github.com/apache/arrow-rs/pull/8021#discussion_r2254732843
########## parquet-variant-compute/src/variant_array.rs: ########## @@ -135,36 +238,87 @@ impl VariantArray { self.inner } + /// Return the shredding state of this `VariantArray` + pub fn shredding_state(&self) -> &ShreddingState { + &self.shredding_state + } + /// Return the [`Variant`] instance stored at the given row /// - /// Panics if the index is out of bounds. + /// Consistently with other Arrow arrays types, this API requires you to + /// check for nulls first using [`Self::is_valid`]. + /// + /// # Panics + /// * if the index is out of bounds + /// * if the array value is null + /// + /// If this is a shredded variant but has no value at the shredded location, it + /// will return [`Variant::Null`]. + /// + /// + /// # Performance Note + /// + /// This is certainly not the most efficient way to access values in a + /// `VariantArray`, but it is useful for testing and debugging. /// /// Note: Does not do deep validation of the [`Variant`], so it is up to the /// caller to ensure that the metadata and value were constructed correctly. pub fn value(&self, index: usize) -> Variant { - let metadata = self.metadata_field().as_binary_view().value(index); - let value = self.value_field().as_binary_view().value(index); - Variant::new(metadata, value) + match &self.shredding_state { + ShreddingState::Unshredded { metadata, value } => { + Variant::new(metadata.value(index), value.value(index)) + } + ShreddingState::FullyShredded { + metadata: _, + typed_value, + } => { + if typed_value.is_null(index) { + Variant::Null + } else { + typed_value_to_variant(typed_value, index) + } + } + ShreddingState::PartiallyShredded { + metadata, + value, + typed_value, + } => { + if typed_value.is_null(index) { + Variant::new(metadata.value(index), value.value(index)) + } else { + typed_value_to_variant(typed_value, index) + } + } + } } - fn find_metadata_field(array: &StructArray) -> Option<ArrayRef> { - array.column_by_name("metadata").cloned() + /// Return a reference to the metadata field of the [`StructArray`] + pub fn metadata_field(&self) -> &BinaryViewArray { + self.shredding_state.metadata_field() } - fn find_value_field(array: &StructArray) -> Option<ArrayRef> { - array.column_by_name("value").cloned() + /// Return a reference to the value field of the `StructArray` + pub fn value_field(&self) -> Option<&BinaryViewArray> { + self.shredding_state.value_field() } - /// Return a reference to the metadata field of the [`StructArray`] - pub fn metadata_field(&self) -> &ArrayRef { - // spec says fields order is not guaranteed, so we search by name - &self.metadata_ref + /// Return a reference to the typed_value field of the `StructArray`, if present + pub fn typed_value_field(&self) -> Option<&ArrayRef> { + self.shredding_state.typed_value_field() } +} - /// Return a reference to the value field of the `StructArray` - pub fn value_field(&self) -> &ArrayRef { - // spec says fields order is not guaranteed, so we search by name - &self.value_ref +/// returns the non-null element at index as a Variant +fn typed_value_to_variant(typed_value: &ArrayRef, index: usize) -> Variant { + match typed_value.data_type() { + DataType::Int32 => { + let typed_value = typed_value.as_primitive::<Int32Type>(); + Variant::from(typed_value.value(index)) + } + // todo other types here + _ => { + todo!(); // Unsupported typed_value type + } } } Review Comment: New PR - https://github.com/apache/arrow-rs/pull/8044 ########## parquet-variant-compute/src/variant_array.rs: ########## @@ -135,36 +238,87 @@ impl VariantArray { self.inner } + /// Return the shredding state of this `VariantArray` + pub fn shredding_state(&self) -> &ShreddingState { + &self.shredding_state + } + /// Return the [`Variant`] instance stored at the given row /// - /// Panics if the index is out of bounds. + /// Consistently with other Arrow arrays types, this API requires you to + /// check for nulls first using [`Self::is_valid`]. + /// + /// # Panics + /// * if the index is out of bounds + /// * if the array value is null + /// + /// If this is a shredded variant but has no value at the shredded location, it + /// will return [`Variant::Null`]. + /// + /// + /// # Performance Note + /// + /// This is certainly not the most efficient way to access values in a + /// `VariantArray`, but it is useful for testing and debugging. /// /// Note: Does not do deep validation of the [`Variant`], so it is up to the /// caller to ensure that the metadata and value were constructed correctly. pub fn value(&self, index: usize) -> Variant { - let metadata = self.metadata_field().as_binary_view().value(index); - let value = self.value_field().as_binary_view().value(index); - Variant::new(metadata, value) + match &self.shredding_state { + ShreddingState::Unshredded { metadata, value } => { + Variant::new(metadata.value(index), value.value(index)) + } + ShreddingState::FullyShredded { + metadata: _, + typed_value, + } => { + if typed_value.is_null(index) { + Variant::Null + } else { + typed_value_to_variant(typed_value, index) + } + } + ShreddingState::PartiallyShredded { + metadata, + value, + typed_value, + } => { + if typed_value.is_null(index) { + Variant::new(metadata.value(index), value.value(index)) + } else { + typed_value_to_variant(typed_value, index) + } + } + } } - fn find_metadata_field(array: &StructArray) -> Option<ArrayRef> { - array.column_by_name("metadata").cloned() + /// Return a reference to the metadata field of the [`StructArray`] + pub fn metadata_field(&self) -> &BinaryViewArray { + self.shredding_state.metadata_field() } - fn find_value_field(array: &StructArray) -> Option<ArrayRef> { - array.column_by_name("value").cloned() + /// Return a reference to the value field of the `StructArray` + pub fn value_field(&self) -> Option<&BinaryViewArray> { + self.shredding_state.value_field() } - /// Return a reference to the metadata field of the [`StructArray`] - pub fn metadata_field(&self) -> &ArrayRef { - // spec says fields order is not guaranteed, so we search by name - &self.metadata_ref + /// Return a reference to the typed_value field of the `StructArray`, if present + pub fn typed_value_field(&self) -> Option<&ArrayRef> { + self.shredding_state.typed_value_field() } +} - /// Return a reference to the value field of the `StructArray` - pub fn value_field(&self) -> &ArrayRef { - // spec says fields order is not guaranteed, so we search by name - &self.value_ref +/// returns the non-null element at index as a Variant +fn typed_value_to_variant(typed_value: &ArrayRef, index: usize) -> Variant { + match typed_value.data_type() { + DataType::Int32 => { + let typed_value = typed_value.as_primitive::<Int32Type>(); + Variant::from(typed_value.value(index)) + } + // todo other types here + _ => { + todo!(); // Unsupported typed_value type + } } } Review Comment: Note I couldn't quite figure out how to reuse that kernel code in this PR -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org