scovich commented on code in PR #7871:
URL: https://github.com/apache/arrow-rs/pull/7871#discussion_r2186470644
##########
parquet-variant/src/variant.rs:
##########
@@ -407,16 +412,16 @@ impl<'m, 'v> Variant<'m, 'v> {
/// Variant leaf values are always valid by construction, but [objects]
and [arrays] can be
/// constructed in unvalidated (and potentially invalid) state.
///
- /// If [`Self::is_validated`] is true, validation is a no-op. Otherwise,
the cost is `O(m + v)`
+ /// If [`Self::is_fully_validated`] is true, validation is a no-op.
Otherwise, the cost is `O(m + v)`
/// where `m` and `v` are the sizes of metadata and value buffers,
respectively.
///
/// [objects]: VariantObject#Validation
/// [arrays]: VariantList#Validation
- pub fn validate(self) -> Result<Self, ArrowError> {
+ pub fn with_deep_validation(self) -> Result<Self, ArrowError> {
Review Comment:
Right now we have "full" and "deep" as ~synonyms? Should we pick just one?
```suggestion
pub fn with_full_validation(self) -> Result<Self, ArrowError> {
```
(IMO `is_deeply_validated` doesn't sound quite as nice as
`is_fully_validated`)
##########
parquet-variant/src/variant/object.rs:
##########
@@ -281,23 +286,23 @@ impl<'m, 'v> VariantObject<'m, 'v> {
/// Returns an iterator of (name, value) pairs over the fields of this
object.
pub fn iter(&self) -> impl Iterator<Item = (&'m str, Variant<'m, 'v>)> +
'_ {
- self.iter_try_impl()
+ self.try_iter_with_shallow_validation()
.map(|result| result.expect("Invalid variant object field value"))
}
/// Fallible iteration over the fields of this object.
- pub fn iter_try(
+ pub fn try_iter(
&self,
) -> impl Iterator<Item = Result<(&'m str, Variant<'m, 'v>), ArrowError>>
+ '_ {
- self.iter_try_impl().map(|result| {
+ self.try_iter_with_shallow_validation().map(|result| {
let (name, value) = result?;
- Ok((name, value.validate()?))
+ Ok((name, value.with_deep_validation()?))
})
}
// Fallible iteration over the fields of this object that performs only
shallow (constant-cost)
// validation of field values.
- fn iter_try_impl(
+ fn try_iter_with_shallow_validation(
&self,
) -> impl Iterator<Item = Result<(&'m str, Variant<'m, 'v>), ArrowError>>
+ '_ {
(0..self.num_elements).map(move |i| Ok((self.try_field_name(i)?,
self.try_field(i)?)))
Review Comment:
A missed fix:
```suggestion
(0..self.num_elements).map(move |i| {
let field = self.try_field_with_shallow_validation(i)?;
Ok((self.try_field_name(i)?, field))
})
```
##########
parquet-variant/src/variant/list.rs:
##########
@@ -232,52 +232,57 @@ impl<'m, 'v> VariantList<'m, 'v> {
/// [invalid]: Self#Validation
pub fn get(&self, index: usize) -> Option<Variant<'m, 'v>> {
(index < self.num_elements).then(|| {
- self.try_get_impl(index)
- .and_then(Variant::validate)
+ self.try_get_with_shallow_validation(index)
+ .and_then(Variant::with_deep_validation)
.expect("Invalid variant array element")
})
}
/// Fallible version of `get`. Returns element by index, capturing
validation errors
pub fn try_get(&self, index: usize) -> Result<Variant<'m, 'v>, ArrowError>
{
- self.try_get_impl(index)?.validate()
+ self.try_get_with_shallow_validation(index)?
+ .with_deep_validation()
}
- /// Fallible iteration over the elements of this list.
- pub fn iter_try(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>,
ArrowError>> + '_ {
- self.iter_try_impl().map(|result| result?.validate())
+ // Fallible version of `get`, performing only basic (constant-time)
validation.
+ fn try_get_with_shallow_validation(&self, index: usize) ->
Result<Variant<'m, 'v>, ArrowError> {
+ // Fetch the value bytes between the two offsets for this index, from
the value array region
+ // of the byte buffer
+ let byte_range = self.get_offset(index)?..self.get_offset(index + 1)?;
+ let value_bytes =
+ slice_from_slice_at_offset(self.value, self.first_value_byte,
byte_range)?;
+
+ Variant::try_new_with_metadata_and_shallow_validation(self.metadata,
value_bytes)
}
- // Fallible iteration that only performs basic (constant-time) validation.
- fn iter_try_impl(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>,
ArrowError>> + '_ {
- (0..self.len()).map(move |i| self.try_get_impl(i))
+ /// Fallible iteration over the elements of this list.
+ pub fn try_iter(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>,
ArrowError>> + '_ {
Review Comment:
I actually had `try_iter` at first, but I changed because the name suggests
something that returns a Result of Iterator, rather than an Iterator of Result.
No strong opinions tho -- if people like `try_iter` better, let's go for it.
##########
parquet-variant/src/variant.rs:
##########
@@ -323,18 +325,19 @@ impl<'m, 'v> Variant<'m, 'v> {
metadata: VariantMetadata<'m>,
value: &'v [u8],
) -> Result<Self, ArrowError> {
- Self::try_new_with_metadata_impl(metadata, value)?.validate()
+ Self::try_new_with_metadata_and_shallow_validation(metadata,
value)?.with_deep_validation()
}
/// Similar to [`Self::try_new_with_metadata`], but [unvalidated].
///
/// [unvalidated]: Self#Validation
pub fn new_with_metadata(metadata: VariantMetadata<'m>, value: &'v [u8])
-> Self {
- Self::try_new_with_metadata_impl(metadata, value).expect("Invalid
variant")
+ Self::try_new_with_metadata_and_shallow_validation(metadata, value)
+ .expect("Invalid variant")
}
// The actual constructor, which only performs shallow (constant-time)
validation.
- fn try_new_with_metadata_impl(
+ fn try_new_with_metadata_and_shallow_validation(
Review Comment:
Good thing this isn't part of the public API... quite a mouthful!
##########
parquet-variant/src/variant/list.rs:
##########
@@ -232,52 +232,57 @@ impl<'m, 'v> VariantList<'m, 'v> {
/// [invalid]: Self#Validation
pub fn get(&self, index: usize) -> Option<Variant<'m, 'v>> {
(index < self.num_elements).then(|| {
- self.try_get_impl(index)
- .and_then(Variant::validate)
+ self.try_get_with_shallow_validation(index)
+ .and_then(Variant::with_deep_validation)
.expect("Invalid variant array element")
})
}
/// Fallible version of `get`. Returns element by index, capturing
validation errors
pub fn try_get(&self, index: usize) -> Result<Variant<'m, 'v>, ArrowError>
{
- self.try_get_impl(index)?.validate()
+ self.try_get_with_shallow_validation(index)?
+ .with_deep_validation()
}
- /// Fallible iteration over the elements of this list.
- pub fn iter_try(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>,
ArrowError>> + '_ {
- self.iter_try_impl().map(|result| result?.validate())
+ // Fallible version of `get`, performing only basic (constant-time)
validation.
+ fn try_get_with_shallow_validation(&self, index: usize) ->
Result<Variant<'m, 'v>, ArrowError> {
+ // Fetch the value bytes between the two offsets for this index, from
the value array region
+ // of the byte buffer
+ let byte_range = self.get_offset(index)?..self.get_offset(index + 1)?;
+ let value_bytes =
+ slice_from_slice_at_offset(self.value, self.first_value_byte,
byte_range)?;
+
+ Variant::try_new_with_metadata_and_shallow_validation(self.metadata,
value_bytes)
}
- // Fallible iteration that only performs basic (constant-time) validation.
- fn iter_try_impl(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>,
ArrowError>> + '_ {
- (0..self.len()).map(move |i| self.try_get_impl(i))
+ /// Fallible iteration over the elements of this list.
+ pub fn try_iter(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>,
ArrowError>> + '_ {
+ self.try_iter_with_shallow_validation()
+ .map(|result| result?.with_deep_validation())
}
/// Iterates over the values of this list. When working with [unvalidated]
input, consider
- /// [`Self::iter_try`] to avoid panics due to invalid data.
+ /// [`Self::try_iter`] to avoid panics due to invalid data.
///
/// [unvalidated]: Self#Validation
pub fn iter(&self) -> impl Iterator<Item = Variant<'m, 'v>> + '_ {
- self.iter_try_impl()
+ self.try_iter_with_shallow_validation()
.map(|result| result.expect("Invalid variant list entry"))
}
+ // Fallible iteration that only performs basic (constant-time) validation.
+ fn try_iter_with_shallow_validation(
+ &self,
+ ) -> impl Iterator<Item = Result<Variant<'m, 'v>, ArrowError>> + '_ {
+ (0..self.len()).map(move |i| self.try_get_with_shallow_validation(i))
+ }
+
// Attempts to retrieve the ith offset from the offset array region of the
byte buffer.
fn get_offset(&self, index: usize) -> Result<usize, ArrowError> {
let byte_range =
self.header.first_offset_byte()..self.first_value_byte;
let offset_bytes = slice_from_slice(self.value, byte_range)?;
self.header.offset_size.unpack_usize(offset_bytes, index)
}
-
- // Fallible version of `get`, performing only basic (constant-time)
validation.
- fn try_get_impl(&self, index: usize) -> Result<Variant<'m, 'v>,
ArrowError> {
Review Comment:
note: moving and changing code in the same commit makes review a _lot_ more
difficult, and refactor + bug fix in the same commit suffers a similar issue. I
know this code inside out and it's still hard to be sure I'm catching
everything -- imagine the burden on a reviewer who isn't as familiar with the
code?
In the future, you might consider curating a stack of three commits for
changes like this:
* code movement only (big, but trivial to review because it doesn't change
anything)
* function renames only (big, but purely mechanical and thus easy to review)
* the actual fix (tiny, and therefore easy to review)
For big changes, some of those commits might even be their own PR... but a
commit stack is often good enough, if called out so reviewers know to look at
the commits in isolation)
##########
parquet-variant/src/variant/list.rs:
##########
@@ -232,52 +232,57 @@ impl<'m, 'v> VariantList<'m, 'v> {
/// [invalid]: Self#Validation
pub fn get(&self, index: usize) -> Option<Variant<'m, 'v>> {
(index < self.num_elements).then(|| {
- self.try_get_impl(index)
- .and_then(Variant::validate)
+ self.try_get_with_shallow_validation(index)
+ .and_then(Variant::with_deep_validation)
.expect("Invalid variant array element")
})
}
/// Fallible version of `get`. Returns element by index, capturing
validation errors
pub fn try_get(&self, index: usize) -> Result<Variant<'m, 'v>, ArrowError>
{
- self.try_get_impl(index)?.validate()
+ self.try_get_with_shallow_validation(index)?
+ .with_deep_validation()
}
- /// Fallible iteration over the elements of this list.
- pub fn iter_try(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>,
ArrowError>> + '_ {
- self.iter_try_impl().map(|result| result?.validate())
+ // Fallible version of `get`, performing only basic (constant-time)
validation.
+ fn try_get_with_shallow_validation(&self, index: usize) ->
Result<Variant<'m, 'v>, ArrowError> {
+ // Fetch the value bytes between the two offsets for this index, from
the value array region
+ // of the byte buffer
+ let byte_range = self.get_offset(index)?..self.get_offset(index + 1)?;
+ let value_bytes =
+ slice_from_slice_at_offset(self.value, self.first_value_byte,
byte_range)?;
+
+ Variant::try_new_with_metadata_and_shallow_validation(self.metadata,
value_bytes)
}
- // Fallible iteration that only performs basic (constant-time) validation.
- fn iter_try_impl(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>,
ArrowError>> + '_ {
- (0..self.len()).map(move |i| self.try_get_impl(i))
+ /// Fallible iteration over the elements of this list.
+ pub fn try_iter(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>,
ArrowError>> + '_ {
+ self.try_iter_with_shallow_validation()
+ .map(|result| result?.with_deep_validation())
}
/// Iterates over the values of this list. When working with [unvalidated]
input, consider
- /// [`Self::iter_try`] to avoid panics due to invalid data.
+ /// [`Self::try_iter`] to avoid panics due to invalid data.
///
/// [unvalidated]: Self#Validation
pub fn iter(&self) -> impl Iterator<Item = Variant<'m, 'v>> + '_ {
- self.iter_try_impl()
+ self.try_iter_with_shallow_validation()
.map(|result| result.expect("Invalid variant list entry"))
}
+ // Fallible iteration that only performs basic (constant-time) validation.
+ fn try_iter_with_shallow_validation(
+ &self,
+ ) -> impl Iterator<Item = Result<Variant<'m, 'v>, ArrowError>> + '_ {
+ (0..self.len()).map(move |i| self.try_get_with_shallow_validation(i))
Review Comment:
First fix -- good catch!
##########
parquet-variant/src/variant.rs:
##########
@@ -382,21 +385,23 @@ impl<'m, 'v> Variant<'m, 'v> {
VariantBasicType::ShortString => {
Variant::ShortString(decoder::decode_short_string(value_metadata, value_data)?)
}
- VariantBasicType::Object => {
- Variant::Object(VariantObject::try_new_impl(metadata, value)?)
- }
- VariantBasicType::Array =>
Variant::List(VariantList::try_new_impl(metadata, value)?),
+ VariantBasicType::Object => Variant::Object(
+ VariantObject::try_new_with_shallow_validation(metadata,
value)?,
+ ),
+ VariantBasicType::Array =>
Variant::List(VariantList::try_new_with_shallow_validation(
+ metadata, value,
Review Comment:
aside: I don't think fmt usually likes multiple args on a single line? (I
sure sometimes _wish_ it did)
##########
parquet-variant/src/variant/list.rs:
##########
@@ -232,52 +232,57 @@ impl<'m, 'v> VariantList<'m, 'v> {
/// [invalid]: Self#Validation
pub fn get(&self, index: usize) -> Option<Variant<'m, 'v>> {
(index < self.num_elements).then(|| {
- self.try_get_impl(index)
- .and_then(Variant::validate)
+ self.try_get_with_shallow_validation(index)
+ .and_then(Variant::with_deep_validation)
.expect("Invalid variant array element")
})
}
/// Fallible version of `get`. Returns element by index, capturing
validation errors
pub fn try_get(&self, index: usize) -> Result<Variant<'m, 'v>, ArrowError>
{
- self.try_get_impl(index)?.validate()
+ self.try_get_with_shallow_validation(index)?
+ .with_deep_validation()
}
- /// Fallible iteration over the elements of this list.
- pub fn iter_try(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>,
ArrowError>> + '_ {
- self.iter_try_impl().map(|result| result?.validate())
+ // Fallible version of `get`, performing only basic (constant-time)
validation.
+ fn try_get_with_shallow_validation(&self, index: usize) ->
Result<Variant<'m, 'v>, ArrowError> {
+ // Fetch the value bytes between the two offsets for this index, from
the value array region
+ // of the byte buffer
+ let byte_range = self.get_offset(index)?..self.get_offset(index + 1)?;
+ let value_bytes =
+ slice_from_slice_at_offset(self.value, self.first_value_byte,
byte_range)?;
+
+ Variant::try_new_with_metadata_and_shallow_validation(self.metadata,
value_bytes)
}
- // Fallible iteration that only performs basic (constant-time) validation.
- fn iter_try_impl(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>,
ArrowError>> + '_ {
- (0..self.len()).map(move |i| self.try_get_impl(i))
+ /// Fallible iteration over the elements of this list.
+ pub fn try_iter(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>,
ArrowError>> + '_ {
+ self.try_iter_with_shallow_validation()
+ .map(|result| result?.with_deep_validation())
}
/// Iterates over the values of this list. When working with [unvalidated]
input, consider
- /// [`Self::iter_try`] to avoid panics due to invalid data.
+ /// [`Self::try_iter`] to avoid panics due to invalid data.
///
/// [unvalidated]: Self#Validation
pub fn iter(&self) -> impl Iterator<Item = Variant<'m, 'v>> + '_ {
- self.iter_try_impl()
+ self.try_iter_with_shallow_validation()
.map(|result| result.expect("Invalid variant list entry"))
}
+ // Fallible iteration that only performs basic (constant-time) validation.
+ fn try_iter_with_shallow_validation(
+ &self,
+ ) -> impl Iterator<Item = Result<Variant<'m, 'v>, ArrowError>> + '_ {
+ (0..self.len()).map(move |i| self.try_get_with_shallow_validation(i))
+ }
+
// Attempts to retrieve the ith offset from the offset array region of the
byte buffer.
fn get_offset(&self, index: usize) -> Result<usize, ArrowError> {
let byte_range =
self.header.first_offset_byte()..self.first_value_byte;
let offset_bytes = slice_from_slice(self.value, byte_range)?;
self.header.offset_size.unpack_usize(offset_bytes, index)
}
-
- // Fallible version of `get`, performing only basic (constant-time)
validation.
- fn try_get_impl(&self, index: usize) -> Result<Variant<'m, 'v>,
ArrowError> {
Review Comment:
Plus, in my experience, it benefits the author as well. I can't count the
number of times I've discovered unnecessarily ugliness, incomplete
refactorings, test gaps, and even bugs (by inspection) when I did the work to
tease apart a big complicated change into a stack of commits. A bit more work
up front, but higher quality code with review stamps landing faster (sometimes
a _lot_ faster).
##########
parquet-variant/src/variant/list.rs:
##########
@@ -136,18 +136,18 @@ impl<'m, 'v> VariantList<'m, 'v> {
/// This constructor verifies that `value` points to a valid variant array
value. In particular,
/// that all offsets are in-bounds and point to valid (recursively
validated) objects.
pub fn try_new(metadata: VariantMetadata<'m>, value: &'v [u8]) ->
Result<Self, ArrowError> {
- Self::try_new_impl(metadata, value)?.validate()
+ Self::try_new_with_shallow_validation(metadata,
value)?.with_deep_validation()
}
pub fn new(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Self {
- Self::try_new_impl(metadata, value).expect("Invalid variant list
value")
+ Self::try_new_with_shallow_validation(metadata, value).expect("Invalid
variant list value")
Review Comment:
Another long line fmt would split?
##########
parquet-variant/src/variant.rs:
##########
@@ -382,21 +385,23 @@ impl<'m, 'v> Variant<'m, 'v> {
VariantBasicType::ShortString => {
Variant::ShortString(decoder::decode_short_string(value_metadata, value_data)?)
}
- VariantBasicType::Object => {
- Variant::Object(VariantObject::try_new_impl(metadata, value)?)
- }
- VariantBasicType::Array =>
Variant::List(VariantList::try_new_impl(metadata, value)?),
+ VariantBasicType::Object => Variant::Object(
+ VariantObject::try_new_with_shallow_validation(metadata,
value)?,
+ ),
+ VariantBasicType::Array =>
Variant::List(VariantList::try_new_with_shallow_validation(
+ metadata, value,
+ )?),
};
Ok(new_self)
}
/// True if this variant instance has already been [validated].
///
/// [validated]: Self#Validation
- pub fn is_validated(&self) -> bool {
+ pub fn is_fully_validated(&self) -> bool {
match self {
Variant::List(list) => list.is_validated(),
Review Comment:
missed a rename?
```suggestion
Variant::List(list) => list.is_fully_validated(),
```
##########
parquet-variant/src/variant/object.rs:
##########
@@ -236,20 +236,25 @@ impl<'m, 'v> VariantObject<'m, 'v> {
/// field IDs). The latter can only happen when working with an
unvalidated object produced by
/// [`Self::new`].
pub fn field(&self, i: usize) -> Option<Variant<'m, 'v>> {
- (i < self.len()).then(|| self.try_field_impl(i).expect("Invalid object
field value"))
+ (i < self.len()).then(|| {
+ self.try_field_with_shallow_validation(i)
+ .expect("Invalid object field value")
+ })
}
/// Fallible version of `field`. Returns field value by index, capturing
validation errors
pub fn try_field(&self, i: usize) -> Result<Variant<'m, 'v>, ArrowError> {
- self.try_field_impl(i)?.validate()
+ self.try_field_with_shallow_validation(i)?
+ .with_deep_validation()
}
// Attempts to retrieve the ith field value from the value region of the
byte buffer; it
// performs only basic (constant-cost) validation.
- fn try_field_impl(&self, i: usize) -> Result<Variant<'m, 'v>, ArrowError> {
+ fn try_field_with_shallow_validation(&self, i: usize) ->
Result<Variant<'m, 'v>, ArrowError> {
let value_bytes = slice_from_slice(self.value,
self.first_value_byte..)?;
let value_bytes = slice_from_slice(value_bytes,
self.get_offset(i)?..)?;
- Variant::try_new_with_metadata(self.metadata, value_bytes)
+
+ Variant::try_new_with_metadata_and_shallow_validation(self.metadata,
value_bytes)
Review Comment:
Good catch. This and the one in ListBuilder were definitely an oversight.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]