This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 3126dad034 [Variant] Remove superflous validate call and rename
methods (#7871)
3126dad034 is described below
commit 3126dad0348035bc5fadc8ec61b7150b9ce6aad5
Author: Matthew Kim <[email protected]>
AuthorDate: Mon Jul 7 14:09:51 2025 -0400
[Variant] Remove superflous validate call and rename methods (#7871)
# Rationale for this change
I was investigating https://github.com/apache/arrow-rs/issues/7869, when
I found we were performing deep validation in areas where we only want
shallow validation
For example:
`try_get_impl` is aimed to perform shallow validation
https://github.com/apache/arrow-rs/blob/13d79b35884bf1fb2b761dc8e70b39bb24ae6c6b/parquet-variant/src/variant/list.rs#L272-L280
However, `Variant::try_new_with_metadata` _will_ perform deep
validation, which is undesired.
https://github.com/apache/arrow-rs/blob/13d79b35884bf1fb2b761dc8e70b39bb24ae6c6b/parquet-variant/src/variant.rs#L322-L327
Also fallible versions like `try_get` and `iter_try` will call (1)
`validate` through `try_get_impl` -> `Variant::try_new_with_metadata`
and then (2) manually call `validate` again. This is also a bit
undesired, but it doesn't hurt us perf-wise, since we set a flag to make
sure the full validation is run only once.
https://github.com/apache/arrow-rs/blob/13d79b35884bf1fb2b761dc8e70b39bb24ae6c6b/parquet-variant/src/variant/list.rs#L241-L249
I personally found the `_impl` convention a bit hard to reason about.
From what I understand, `_impl` functions should only perform shallow
validation.
Here are my proposed name changes:
- `iter_try` -> `try_iter` to follow other `try_..` methods
- `_impl` -> `with_shallow_validation` to make it clear to the reader
that this function does basic validation
- `validate` -> `with_deep_validation`, the builder method will perform
linear validation
- `is_validated` -> `is_fully_validated`, both shallow and deep
validation has been done
---
parquet-variant/benches/variant_builder.rs | 2 +-
parquet-variant/src/variant.rs | 45 +++++++++++----------
parquet-variant/src/variant/list.rs | 63 ++++++++++++++++--------------
parquet-variant/src/variant/metadata.rs | 14 +++----
parquet-variant/src/variant/object.rs | 43 +++++++++++---------
parquet-variant/tests/variant_interop.rs | 4 +-
6 files changed, 93 insertions(+), 78 deletions(-)
diff --git a/parquet-variant/benches/variant_builder.rs
b/parquet-variant/benches/variant_builder.rs
index 8481ca9c8f..8e24a63c3a 100644
--- a/parquet-variant/benches/variant_builder.rs
+++ b/parquet-variant/benches/variant_builder.rs
@@ -441,7 +441,7 @@ fn bench_validation_validated_vs_unvalidated(c: &mut
Criterion) {
b.iter(|| {
for variant in &unvalidated {
- let validated = variant.clone().validate().unwrap();
+ let validated =
variant.clone().with_full_validation().unwrap();
hint::black_box(validated);
}
})
diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs
index 96cdb53c15..6bcf61c036 100644
--- a/parquet-variant/src/variant.rs
+++ b/parquet-variant/src/variant.rs
@@ -1,5 +1,3 @@
-use std::ops::Deref;
-
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
@@ -16,6 +14,7 @@ use std::ops::Deref;
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
+
pub use self::decimal::{VariantDecimal16, VariantDecimal4, VariantDecimal8};
pub use self::list::VariantList;
pub use self::metadata::VariantMetadata;
@@ -24,6 +23,7 @@ use crate::decoder::{
self, get_basic_type, get_primitive_type, VariantBasicType,
VariantPrimitiveType,
};
use crate::utils::{first_byte_from_slice, slice_from_slice};
+use std::ops::Deref;
use arrow_schema::ArrowError;
use chrono::{DateTime, NaiveDate, NaiveDateTime, Utc};
@@ -184,7 +184,7 @@ impl Deref for ShortString<'_> {
/// Every instance of variant is either _valid_ or _invalid_. depending on
whether the
/// underlying bytes are a valid encoding of a variant value (see below).
///
-/// Instances produced by [`Self::try_new`], [`Self::try_new_with_metadata`],
or [`Self::validate`]
+/// Instances produced by [`Self::try_new`], [`Self::try_new_with_metadata`],
or [`Self::with_full_validation`]
/// are fully _validated_. They always contain _valid_ data, and infallible
accesses such as
/// iteration and indexing are panic-free. The validation cost is `O(m + v)`
where `m` and
/// `v` are the number of bytes in the metadata and value buffers,
respectively.
@@ -192,7 +192,7 @@ impl Deref for ShortString<'_> {
/// Instances produced by [`Self::new`] and [`Self::new_with_metadata`] are
_unvalidated_ and so
/// they may contain either _valid_ or _invalid_ data. Infallible accesses to
variant objects and
/// arrays, such as iteration and indexing will panic if the underlying bytes
are _invalid_, and
-/// fallible alternatives are provided as panic-free alternatives.
[`Self::validate`] can also be
+/// fallible alternatives are provided as panic-free alternatives.
[`Self::with_full_validation`] can also be
/// used to _validate_ an _unvalidated_ instance, if desired.
///
/// _Unvalidated_ instances can be constructed in constant time. This can be
useful if the caller
@@ -297,8 +297,10 @@ impl<'m, 'v> Variant<'m, 'v> {
///
/// [unvalidated]: Self#Validation
pub fn new(metadata: &'m [u8], value: &'v [u8]) -> Self {
- let metadata = VariantMetadata::try_new_impl(metadata).expect("Invalid
variant metadata");
- Self::try_new_with_metadata_impl(metadata, value).expect("Invalid
variant data")
+ let metadata =
VariantMetadata::try_new_with_shallow_validation(metadata)
+ .expect("Invalid variant metadata");
+ Self::try_new_with_metadata_and_shallow_validation(metadata, value)
+ .expect("Invalid variant data")
}
/// Create a new variant with existing metadata.
@@ -323,18 +325,19 @@ impl<'m, 'v> Variant<'m, 'v> {
metadata: VariantMetadata<'m>,
value: &'v [u8],
) -> Result<Self, ArrowError> {
- Self::try_new_with_metadata_impl(metadata, value)?.validate()
+ Self::try_new_with_metadata_and_shallow_validation(metadata,
value)?.with_full_validation()
}
/// Similar to [`Self::try_new_with_metadata`], but [unvalidated].
///
/// [unvalidated]: Self#Validation
pub fn new_with_metadata(metadata: VariantMetadata<'m>, value: &'v [u8])
-> Self {
- Self::try_new_with_metadata_impl(metadata, value).expect("Invalid
variant")
+ Self::try_new_with_metadata_and_shallow_validation(metadata, value)
+ .expect("Invalid variant")
}
// The actual constructor, which only performs shallow (constant-time)
validation.
- fn try_new_with_metadata_impl(
+ fn try_new_with_metadata_and_shallow_validation(
metadata: VariantMetadata<'m>,
value: &'v [u8],
) -> Result<Self, ArrowError> {
@@ -382,10 +385,12 @@ impl<'m, 'v> Variant<'m, 'v> {
VariantBasicType::ShortString => {
Variant::ShortString(decoder::decode_short_string(value_metadata, value_data)?)
}
- VariantBasicType::Object => {
- Variant::Object(VariantObject::try_new_impl(metadata, value)?)
- }
- VariantBasicType::Array =>
Variant::List(VariantList::try_new_impl(metadata, value)?),
+ VariantBasicType::Object => Variant::Object(
+ VariantObject::try_new_with_shallow_validation(metadata,
value)?,
+ ),
+ VariantBasicType::Array =>
Variant::List(VariantList::try_new_with_shallow_validation(
+ metadata, value,
+ )?),
};
Ok(new_self)
}
@@ -393,10 +398,10 @@ impl<'m, 'v> Variant<'m, 'v> {
/// True if this variant instance has already been [validated].
///
/// [validated]: Self#Validation
- pub fn is_validated(&self) -> bool {
+ pub fn is_fully_validated(&self) -> bool {
match self {
- Variant::List(list) => list.is_validated(),
- Variant::Object(obj) => obj.is_validated(),
+ Variant::List(list) => list.is_fully_validated(),
+ Variant::Object(obj) => obj.is_fully_validated(),
_ => true,
}
}
@@ -407,16 +412,16 @@ impl<'m, 'v> Variant<'m, 'v> {
/// Variant leaf values are always valid by construction, but [objects]
and [arrays] can be
/// constructed in unvalidated (and potentially invalid) state.
///
- /// If [`Self::is_validated`] is true, validation is a no-op. Otherwise,
the cost is `O(m + v)`
+ /// If [`Self::is_fully_validated`] is true, validation is a no-op.
Otherwise, the cost is `O(m + v)`
/// where `m` and `v` are the sizes of metadata and value buffers,
respectively.
///
/// [objects]: VariantObject#Validation
/// [arrays]: VariantList#Validation
- pub fn validate(self) -> Result<Self, ArrowError> {
+ pub fn with_full_validation(self) -> Result<Self, ArrowError> {
use Variant::*;
match self {
- List(list) => list.validate().map(List),
- Object(obj) => obj.validate().map(Object),
+ List(list) => list.with_full_validation().map(List),
+ Object(obj) => obj.with_full_validation().map(Object),
_ => Ok(self),
}
}
diff --git a/parquet-variant/src/variant/list.rs
b/parquet-variant/src/variant/list.rs
index f9a50e7ef8..5257ec6a02 100644
--- a/parquet-variant/src/variant/list.rs
+++ b/parquet-variant/src/variant/list.rs
@@ -82,14 +82,14 @@ impl VariantListHeader {
/// Every instance of variant list is either _valid_ or _invalid_. depending
on whether the
/// underlying bytes are a valid encoding of a variant array (see below).
///
-/// Instances produced by [`Self::try_new`] or [`Self::validate`] are fully
_validated_. They always
+/// Instances produced by [`Self::try_new`] or [`Self::with_full_validation`]
are fully _validated_. They always
/// contain _valid_ data, and infallible accesses such as iteration and
indexing are panic-free. The
/// validation cost is linear in the number of underlying bytes.
///
/// Instances produced by [`Self::new`] are _unvalidated_ and so they may
contain either _valid_ or
/// _invalid_ data. Infallible accesses such as iteration and indexing will
panic if the underlying
/// bytes are _invalid_, and fallible alternatives such as [`Self::iter_try`]
and [`Self::get`] are
-/// provided as panic-free alternatives. [`Self::validate`] can also be used
to _validate_ an
+/// provided as panic-free alternatives. [`Self::with_full_validation`] can
also be used to _validate_ an
/// _unvalidated_ instance, if desired.
///
/// _Unvalidated_ instances can be constructed in constant time. This can be
useful if the caller
@@ -136,18 +136,18 @@ impl<'m, 'v> VariantList<'m, 'v> {
/// This constructor verifies that `value` points to a valid variant array
value. In particular,
/// that all offsets are in-bounds and point to valid (recursively
validated) objects.
pub fn try_new(metadata: VariantMetadata<'m>, value: &'v [u8]) ->
Result<Self, ArrowError> {
- Self::try_new_impl(metadata, value)?.validate()
+ Self::try_new_with_shallow_validation(metadata,
value)?.with_full_validation()
}
pub fn new(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Self {
- Self::try_new_impl(metadata, value).expect("Invalid variant list
value")
+ Self::try_new_with_shallow_validation(metadata, value).expect("Invalid
variant list value")
}
/// Attempts to interpet `metadata` and `value` as a variant array,
performing only basic
/// (constant-cost) [validation].
///
/// [validation]: Self#Validation
- pub(crate) fn try_new_impl(
+ pub(crate) fn try_new_with_shallow_validation(
metadata: VariantMetadata<'m>,
value: &'v [u8],
) -> Result<Self, ArrowError> {
@@ -196,18 +196,18 @@ impl<'m, 'v> VariantList<'m, 'v> {
/// True if this instance is fully [validated] for panic-free infallible
accesses.
///
/// [validated]: Self#Validation
- pub fn is_validated(&self) -> bool {
+ pub fn is_fully_validated(&self) -> bool {
self.validated
}
/// Performs a full [validation] of this variant array and returns the
result.
///
/// [validation]: Self#Validation
- pub fn validate(mut self) -> Result<Self, ArrowError> {
+ pub fn with_full_validation(mut self) -> Result<Self, ArrowError> {
if !self.validated {
// Validate the metadata dictionary first, if not already
validated, because we pass it
// by value to all the children (who would otherwise re-validate
it repeatedly).
- self.metadata = self.metadata.validate()?;
+ self.metadata = self.metadata.with_full_validation()?;
// Iterate over all string keys in this dictionary in order to
prove that the offset
// array is valid, all offsets are in bounds, and all string bytes
are valid utf-8.
@@ -232,25 +232,25 @@ impl<'m, 'v> VariantList<'m, 'v> {
/// [invalid]: Self#Validation
pub fn get(&self, index: usize) -> Option<Variant<'m, 'v>> {
(index < self.num_elements).then(|| {
- self.try_get_impl(index)
- .and_then(Variant::validate)
+ self.try_get_with_shallow_validation(index)
.expect("Invalid variant array element")
})
}
/// Fallible version of `get`. Returns element by index, capturing
validation errors
pub fn try_get(&self, index: usize) -> Result<Variant<'m, 'v>, ArrowError>
{
- self.try_get_impl(index)?.validate()
+ self.try_get_with_shallow_validation(index)?
+ .with_full_validation()
}
- /// Fallible iteration over the elements of this list.
- pub fn iter_try(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>,
ArrowError>> + '_ {
- self.iter_try_impl().map(|result| result?.validate())
- }
-
- // Fallible iteration that only performs basic (constant-time) validation.
- fn iter_try_impl(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>,
ArrowError>> + '_ {
- (0..self.len()).map(move |i| self.try_get_impl(i))
+ // Fallible version of `get`, performing only basic (constant-time)
validation.
+ fn try_get_with_shallow_validation(&self, index: usize) ->
Result<Variant<'m, 'v>, ArrowError> {
+ // Fetch the value bytes between the two offsets for this index, from
the value array region
+ // of the byte buffer
+ let byte_range = self.get_offset(index)?..self.get_offset(index + 1)?;
+ let value_bytes =
+ slice_from_slice_at_offset(self.value, self.first_value_byte,
byte_range)?;
+ Variant::try_new_with_metadata_and_shallow_validation(self.metadata,
value_bytes)
}
/// Iterates over the values of this list. When working with [unvalidated]
input, consider
@@ -258,26 +258,29 @@ impl<'m, 'v> VariantList<'m, 'v> {
///
/// [unvalidated]: Self#Validation
pub fn iter(&self) -> impl Iterator<Item = Variant<'m, 'v>> + '_ {
- self.iter_try_impl()
+ self.iter_try_with_shallow_validation()
.map(|result| result.expect("Invalid variant list entry"))
}
+ /// Fallible iteration over the elements of this list.
+ pub fn iter_try(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>,
ArrowError>> + '_ {
+ self.iter_try_with_shallow_validation()
+ .map(|result| result?.with_full_validation())
+ }
+
+ // Fallible iteration that only performs basic (constant-time) validation.
+ fn iter_try_with_shallow_validation(
+ &self,
+ ) -> impl Iterator<Item = Result<Variant<'m, 'v>, ArrowError>> + '_ {
+ (0..self.len()).map(move |i| self.try_get_with_shallow_validation(i))
+ }
+
// Attempts to retrieve the ith offset from the offset array region of the
byte buffer.
fn get_offset(&self, index: usize) -> Result<usize, ArrowError> {
let byte_range =
self.header.first_offset_byte()..self.first_value_byte;
let offset_bytes = slice_from_slice(self.value, byte_range)?;
self.header.offset_size.unpack_usize(offset_bytes, index)
}
-
- // Fallible version of `get`, performing only basic (constant-time)
validation.
- fn try_get_impl(&self, index: usize) -> Result<Variant<'m, 'v>,
ArrowError> {
- // Fetch the value bytes between the two offsets for this index, from
the value array region
- // of the byte buffer
- let byte_range = self.get_offset(index)?..self.get_offset(index + 1)?;
- let value_bytes =
- slice_from_slice_at_offset(self.value, self.first_value_byte,
byte_range)?;
- Variant::try_new_with_metadata(self.metadata, value_bytes)
- }
}
#[cfg(test)]
diff --git a/parquet-variant/src/variant/metadata.rs
b/parquet-variant/src/variant/metadata.rs
index 742f586fb3..0aad22ea72 100644
--- a/parquet-variant/src/variant/metadata.rs
+++ b/parquet-variant/src/variant/metadata.rs
@@ -93,14 +93,14 @@ impl VariantMetadataHeader {
/// Every instance of variant metadata is either _valid_ or _invalid_.
depending on whether the
/// underlying bytes are a valid encoding of variant metadata (see below).
///
-/// Instances produced by [`Self::try_new`] or [`Self::validate`] are fully
_validated_. They always
+/// Instances produced by [`Self::try_new`] or [`Self::with_full_validation`]
are fully _validated_. They always
/// contain _valid_ data, and infallible accesses such as iteration and
indexing are panic-free. The
/// validation cost is linear in the number of underlying bytes.
///
/// Instances produced by [`Self::new`] are _unvalidated_ and so they may
contain either _valid_ or
/// _invalid_ data. Infallible accesses such as iteration and indexing will
panic if the underlying
/// bytes are _invalid_, and fallible alternatives such as [`Self::iter_try`]
and [`Self::get`] are
-/// provided as panic-free alternatives. [`Self::validate`] can also be used
to _validate_ an
+/// provided as panic-free alternatives. [`Self::with_full_validation`] can
also be used to _validate_ an
/// _unvalidated_ instance, if desired.
///
/// _Unvalidated_ instances can be constructed in constant time. This can be
useful if the caller
@@ -143,7 +143,7 @@ impl<'m> VariantMetadata<'m> {
///
/// [validation]: Self#Validation
pub fn try_new(bytes: &'m [u8]) -> Result<Self, ArrowError> {
- Self::try_new_impl(bytes)?.validate()
+ Self::try_new_with_shallow_validation(bytes)?.with_full_validation()
}
/// Interprets `bytes` as a variant metadata instance, without attempting
to [validate] dictionary
@@ -157,11 +157,11 @@ impl<'m> VariantMetadata<'m> {
///
/// [validate]: Self#Validation
pub fn new(bytes: &'m [u8]) -> Self {
- Self::try_new_impl(bytes).expect("Invalid variant metadata")
+ Self::try_new_with_shallow_validation(bytes).expect("Invalid variant
metadata")
}
// The actual constructor, which performs only basic (constant-const)
validation.
- pub(crate) fn try_new_impl(bytes: &'m [u8]) -> Result<Self, ArrowError> {
+ pub(crate) fn try_new_with_shallow_validation(bytes: &'m [u8]) ->
Result<Self, ArrowError> {
let header_byte = first_byte_from_slice(bytes)?;
let header = VariantMetadataHeader::try_new(header_byte)?;
@@ -219,14 +219,14 @@ impl<'m> VariantMetadata<'m> {
/// True if this instance is fully [validated] for panic-free infallible
accesses.
///
/// [validated]: Self#Validation
- pub fn is_validated(&self) -> bool {
+ pub fn is_fully_validated(&self) -> bool {
self.validated
}
/// Performs a full [validation] of this metadata dictionary and returns
the result.
///
/// [validation]: Self#Validation
- pub fn validate(mut self) -> Result<Self, ArrowError> {
+ pub fn with_full_validation(mut self) -> Result<Self, ArrowError> {
if !self.validated {
// Iterate over all string keys in this dictionary in order to
prove that the offset
// array is valid, all offsets are in bounds, and all string bytes
are valid utf-8.
diff --git a/parquet-variant/src/variant/object.rs
b/parquet-variant/src/variant/object.rs
index 15c67c9796..3991f76e95 100644
--- a/parquet-variant/src/variant/object.rs
+++ b/parquet-variant/src/variant/object.rs
@@ -78,14 +78,14 @@ impl VariantObjectHeader {
/// Every instance of variant object is either _valid_ or _invalid_. depending
on whether the
/// underlying bytes are a valid encoding of a variant object subtype (see
below).
///
-/// Instances produced by [`Self::try_new`] or [`Self::validate`] are fully
(and recursively)
+/// Instances produced by [`Self::try_new`] or [`Self::with_full_validation`]
are fully (and recursively)
/// _validated_. They always contain _valid_ data, and infallible accesses
such as iteration and
/// indexing are panic-free. The validation cost is linear in the number of
underlying bytes.
///
/// Instances produced by [`Self::new`] are _unvalidated_ and so they may
contain either _valid_ or
/// _invalid_ data. Infallible accesses such as iteration and indexing will
panic if the underlying
/// bytes are _invalid_, and fallible alternatives such as [`Self::iter_try`]
and [`Self::get`] are
-/// provided as panic-free alternatives. [`Self::validate`] can also be used
to _validate_ an
+/// provided as panic-free alternatives. [`Self::with_full_validation`] can
also be used to _validate_ an
/// _unvalidated_ instance, if desired.
///
/// _Unvalidated_ instances can be constructed in constant time. They can be
useful if the caller
@@ -128,7 +128,7 @@ pub struct VariantObject<'m, 'v> {
impl<'m, 'v> VariantObject<'m, 'v> {
pub fn new(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Self {
- Self::try_new_impl(metadata, value).expect("Invalid variant object")
+ Self::try_new_with_shallow_validation(metadata, value).expect("Invalid
variant object")
}
/// Attempts to interpet `metadata` and `value` as a variant object.
@@ -139,14 +139,14 @@ impl<'m, 'v> VariantObject<'m, 'v> {
/// particular, that all field ids exist in `metadata`, and all offsets
are in-bounds and point
/// to valid objects.
pub fn try_new(metadata: VariantMetadata<'m>, value: &'v [u8]) ->
Result<Self, ArrowError> {
- Self::try_new_impl(metadata, value)?.validate()
+ Self::try_new_with_shallow_validation(metadata,
value)?.with_full_validation()
}
/// Attempts to interpet `metadata` and `value` as a variant object,
performing only basic
/// (constant-cost) [validation].
///
/// [validation]: Self#Validation
- pub(crate) fn try_new_impl(
+ pub(crate) fn try_new_with_shallow_validation(
metadata: VariantMetadata<'m>,
value: &'v [u8],
) -> Result<Self, ArrowError> {
@@ -197,22 +197,22 @@ impl<'m, 'v> VariantObject<'m, 'v> {
/// True if this instance is fully [validated] for panic-free infallible
accesses.
///
/// [validated]: Self#Validation
- pub fn is_validated(&self) -> bool {
+ pub fn is_fully_validated(&self) -> bool {
self.validated
}
/// Performs a full [validation] of this variant object.
///
/// [validation]: Self#Validation
- pub fn validate(mut self) -> Result<Self, ArrowError> {
+ pub fn with_full_validation(mut self) -> Result<Self, ArrowError> {
if !self.validated {
// Validate the metadata dictionary first, if not already
validated, because we pass it
// by value to all the children (who would otherwise re-validate
it repeatedly).
- self.metadata = self.metadata.validate()?;
+ self.metadata = self.metadata.with_full_validation()?;
// Iterate over all string keys in this dictionary in order to
prove that the offset
// array is valid, all offsets are in bounds, and all string bytes
are valid utf-8.
- validate_fallible_iterator(self.iter_try_impl())?;
+ validate_fallible_iterator(self.iter_try())?;
self.validated = true;
}
Ok(self)
@@ -236,20 +236,24 @@ impl<'m, 'v> VariantObject<'m, 'v> {
/// field IDs). The latter can only happen when working with an
unvalidated object produced by
/// [`Self::new`].
pub fn field(&self, i: usize) -> Option<Variant<'m, 'v>> {
- (i < self.len()).then(|| self.try_field_impl(i).expect("Invalid object
field value"))
+ (i < self.len()).then(|| {
+ self.try_field_with_shallow_validation(i)
+ .expect("Invalid object field value")
+ })
}
/// Fallible version of `field`. Returns field value by index, capturing
validation errors
pub fn try_field(&self, i: usize) -> Result<Variant<'m, 'v>, ArrowError> {
- self.try_field_impl(i)?.validate()
+ self.try_field_with_shallow_validation(i)?
+ .with_full_validation()
}
// Attempts to retrieve the ith field value from the value region of the
byte buffer; it
// performs only basic (constant-cost) validation.
- fn try_field_impl(&self, i: usize) -> Result<Variant<'m, 'v>, ArrowError> {
+ fn try_field_with_shallow_validation(&self, i: usize) ->
Result<Variant<'m, 'v>, ArrowError> {
let value_bytes = slice_from_slice(self.value,
self.first_value_byte..)?;
let value_bytes = slice_from_slice(value_bytes,
self.get_offset(i)?..)?;
- Variant::try_new_with_metadata(self.metadata, value_bytes)
+ Variant::try_new_with_metadata_and_shallow_validation(self.metadata,
value_bytes)
}
// Attempts to retrieve the ith offset from the field offset region of the
byte buffer.
@@ -281,7 +285,7 @@ impl<'m, 'v> VariantObject<'m, 'v> {
/// Returns an iterator of (name, value) pairs over the fields of this
object.
pub fn iter(&self) -> impl Iterator<Item = (&'m str, Variant<'m, 'v>)> +
'_ {
- self.iter_try_impl()
+ self.iter_try_with_shallow_validation()
.map(|result| result.expect("Invalid variant object field value"))
}
@@ -289,18 +293,21 @@ impl<'m, 'v> VariantObject<'m, 'v> {
pub fn iter_try(
&self,
) -> impl Iterator<Item = Result<(&'m str, Variant<'m, 'v>), ArrowError>>
+ '_ {
- self.iter_try_impl().map(|result| {
+ self.iter_try_with_shallow_validation().map(|result| {
let (name, value) = result?;
- Ok((name, value.validate()?))
+ Ok((name, value.with_full_validation()?))
})
}
// Fallible iteration over the fields of this object that performs only
shallow (constant-cost)
// validation of field values.
- fn iter_try_impl(
+ fn iter_try_with_shallow_validation(
&self,
) -> impl Iterator<Item = Result<(&'m str, Variant<'m, 'v>), ArrowError>>
+ '_ {
- (0..self.num_elements).map(move |i| Ok((self.try_field_name(i)?,
self.try_field(i)?)))
+ (0..self.num_elements).map(move |i| {
+ let field = self.try_field_with_shallow_validation(i)?;
+ Ok((self.try_field_name(i)?, field))
+ })
}
/// Returns the value of the field with the specified name, if any.
diff --git a/parquet-variant/tests/variant_interop.rs
b/parquet-variant/tests/variant_interop.rs
index c95d81a3e9..e37172a7d5 100644
--- a/parquet-variant/tests/variant_interop.rs
+++ b/parquet-variant/tests/variant_interop.rs
@@ -417,7 +417,7 @@ fn test_validation_workflow(metadata: &[u8], value: &[u8]) {
};
// Step 2: Try validation
- let validation_result = std::panic::catch_unwind(||
variant.clone().validate());
+ let validation_result = std::panic::catch_unwind(||
variant.clone().with_full_validation());
match validation_result {
Ok(Ok(validated)) => {
@@ -515,7 +515,7 @@ fn test_validation_workflow_simple(metadata: &[u8], value:
&[u8]) {
};
// Step 2: Try validation
- let validation_result = std::panic::catch_unwind(||
variant.clone().validate());
+ let validation_result = std::panic::catch_unwind(||
variant.clone().with_full_validation());
match validation_result {
Ok(Ok(validated)) => {