friendlymatthew commented on code in PR #7871: URL: https://github.com/apache/arrow-rs/pull/7871#discussion_r2187359442
########## parquet-variant/src/variant/list.rs: ########## @@ -232,52 +232,57 @@ impl<'m, 'v> VariantList<'m, 'v> { /// [invalid]: Self#Validation pub fn get(&self, index: usize) -> Option<Variant<'m, 'v>> { (index < self.num_elements).then(|| { - self.try_get_impl(index) - .and_then(Variant::validate) + self.try_get_with_shallow_validation(index) + .and_then(Variant::with_deep_validation) .expect("Invalid variant array element") }) } /// Fallible version of `get`. Returns element by index, capturing validation errors pub fn try_get(&self, index: usize) -> Result<Variant<'m, 'v>, ArrowError> { - self.try_get_impl(index)?.validate() + self.try_get_with_shallow_validation(index)? + .with_deep_validation() } - /// Fallible iteration over the elements of this list. - pub fn iter_try(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>, ArrowError>> + '_ { - self.iter_try_impl().map(|result| result?.validate()) + // Fallible version of `get`, performing only basic (constant-time) validation. + fn try_get_with_shallow_validation(&self, index: usize) -> Result<Variant<'m, 'v>, ArrowError> { + // Fetch the value bytes between the two offsets for this index, from the value array region + // of the byte buffer + let byte_range = self.get_offset(index)?..self.get_offset(index + 1)?; + let value_bytes = + slice_from_slice_at_offset(self.value, self.first_value_byte, byte_range)?; + + Variant::try_new_with_metadata_and_shallow_validation(self.metadata, value_bytes) } - // Fallible iteration that only performs basic (constant-time) validation. - fn iter_try_impl(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>, ArrowError>> + '_ { - (0..self.len()).map(move |i| self.try_get_impl(i)) + /// Fallible iteration over the elements of this list. + pub fn try_iter(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>, ArrowError>> + '_ { Review Comment: Makes sense to me, I reverted it back to `iter_try`. ########## parquet-variant/src/variant.rs: ########## @@ -407,16 +412,16 @@ impl<'m, 'v> Variant<'m, 'v> { /// Variant leaf values are always valid by construction, but [objects] and [arrays] can be /// constructed in unvalidated (and potentially invalid) state. /// - /// If [`Self::is_validated`] is true, validation is a no-op. Otherwise, the cost is `O(m + v)` + /// If [`Self::is_fully_validated`] is true, validation is a no-op. Otherwise, the cost is `O(m + v)` /// where `m` and `v` are the sizes of metadata and value buffers, respectively. /// /// [objects]: VariantObject#Validation /// [arrays]: VariantList#Validation - pub fn validate(self) -> Result<Self, ArrowError> { + pub fn with_deep_validation(self) -> Result<Self, ArrowError> { Review Comment: Using `with_full_validation` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org