scovich commented on code in PR #7871:
URL: https://github.com/apache/arrow-rs/pull/7871#discussion_r2186470644


##########
parquet-variant/src/variant.rs:
##########
@@ -407,16 +412,16 @@ impl<'m, 'v> Variant<'m, 'v> {
     /// Variant leaf values are always valid by construction, but [objects] 
and [arrays] can be
     /// constructed in unvalidated (and potentially invalid) state.
     ///
-    /// If [`Self::is_validated`] is true, validation is a no-op. Otherwise, 
the cost is `O(m + v)`
+    /// If [`Self::is_fully_validated`] is true, validation is a no-op. 
Otherwise, the cost is `O(m + v)`
     /// where `m` and `v` are the sizes of metadata and value buffers, 
respectively.
     ///
     /// [objects]: VariantObject#Validation
     /// [arrays]: VariantList#Validation
-    pub fn validate(self) -> Result<Self, ArrowError> {
+    pub fn with_deep_validation(self) -> Result<Self, ArrowError> {

Review Comment:
   Right now we have "full" and "deep" as ~synonyms? Should we pick just one?
   ```suggestion
       pub fn with_full_validation(self) -> Result<Self, ArrowError> {
   ```
   (IMO `is_deeply_validated` doesn't sound quite as nice as 
`is_fully_validated`)



##########
parquet-variant/src/variant/object.rs:
##########
@@ -281,23 +286,23 @@ impl<'m, 'v> VariantObject<'m, 'v> {
 
     /// Returns an iterator of (name, value) pairs over the fields of this 
object.
     pub fn iter(&self) -> impl Iterator<Item = (&'m str, Variant<'m, 'v>)> + 
'_ {
-        self.iter_try_impl()
+        self.try_iter_with_shallow_validation()
             .map(|result| result.expect("Invalid variant object field value"))
     }
 
     /// Fallible iteration over the fields of this object.
-    pub fn iter_try(
+    pub fn try_iter(
         &self,
     ) -> impl Iterator<Item = Result<(&'m str, Variant<'m, 'v>), ArrowError>> 
+ '_ {
-        self.iter_try_impl().map(|result| {
+        self.try_iter_with_shallow_validation().map(|result| {
             let (name, value) = result?;
-            Ok((name, value.validate()?))
+            Ok((name, value.with_deep_validation()?))
         })
     }
 
     // Fallible iteration over the fields of this object that performs only 
shallow (constant-cost)
     // validation of field values.
-    fn iter_try_impl(
+    fn try_iter_with_shallow_validation(
         &self,
     ) -> impl Iterator<Item = Result<(&'m str, Variant<'m, 'v>), ArrowError>> 
+ '_ {
         (0..self.num_elements).map(move |i| Ok((self.try_field_name(i)?, 
self.try_field(i)?)))

Review Comment:
   A missed fix:
   ```suggestion
           (0..self.num_elements).map(move |i| {
               let field = self.try_field_with_shallow_validation(i)?;
               Ok((self.try_field_name(i)?, field))
           })
   ```



##########
parquet-variant/src/variant/list.rs:
##########
@@ -232,52 +232,57 @@ impl<'m, 'v> VariantList<'m, 'v> {
     /// [invalid]: Self#Validation
     pub fn get(&self, index: usize) -> Option<Variant<'m, 'v>> {
         (index < self.num_elements).then(|| {
-            self.try_get_impl(index)
-                .and_then(Variant::validate)
+            self.try_get_with_shallow_validation(index)
+                .and_then(Variant::with_deep_validation)
                 .expect("Invalid variant array element")
         })
     }
 
     /// Fallible version of `get`. Returns element by index, capturing 
validation errors
     pub fn try_get(&self, index: usize) -> Result<Variant<'m, 'v>, ArrowError> 
{
-        self.try_get_impl(index)?.validate()
+        self.try_get_with_shallow_validation(index)?
+            .with_deep_validation()
     }
 
-    /// Fallible iteration over the elements of this list.
-    pub fn iter_try(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>, 
ArrowError>> + '_ {
-        self.iter_try_impl().map(|result| result?.validate())
+    // Fallible version of `get`, performing only basic (constant-time) 
validation.
+    fn try_get_with_shallow_validation(&self, index: usize) -> 
Result<Variant<'m, 'v>, ArrowError> {
+        // Fetch the value bytes between the two offsets for this index, from 
the value array region
+        // of the byte buffer
+        let byte_range = self.get_offset(index)?..self.get_offset(index + 1)?;
+        let value_bytes =
+            slice_from_slice_at_offset(self.value, self.first_value_byte, 
byte_range)?;
+
+        Variant::try_new_with_metadata_and_shallow_validation(self.metadata, 
value_bytes)
     }
 
-    // Fallible iteration that only performs basic (constant-time) validation.
-    fn iter_try_impl(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>, 
ArrowError>> + '_ {
-        (0..self.len()).map(move |i| self.try_get_impl(i))
+    /// Fallible iteration over the elements of this list.
+    pub fn try_iter(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>, 
ArrowError>> + '_ {

Review Comment:
   I actually had `try_iter` at first, but I changed because the name suggests 
something that returns a Result of Iterator, rather than an Iterator of Result.
   
   No strong opinions tho -- if people like `try_iter` better, let's go for it.



##########
parquet-variant/src/variant.rs:
##########
@@ -323,18 +325,19 @@ impl<'m, 'v> Variant<'m, 'v> {
         metadata: VariantMetadata<'m>,
         value: &'v [u8],
     ) -> Result<Self, ArrowError> {
-        Self::try_new_with_metadata_impl(metadata, value)?.validate()
+        Self::try_new_with_metadata_and_shallow_validation(metadata, 
value)?.with_deep_validation()
     }
 
     /// Similar to [`Self::try_new_with_metadata`], but [unvalidated].
     ///
     /// [unvalidated]: Self#Validation
     pub fn new_with_metadata(metadata: VariantMetadata<'m>, value: &'v [u8]) 
-> Self {
-        Self::try_new_with_metadata_impl(metadata, value).expect("Invalid 
variant")
+        Self::try_new_with_metadata_and_shallow_validation(metadata, value)
+            .expect("Invalid variant")
     }
 
     // The actual constructor, which only performs shallow (constant-time) 
validation.
-    fn try_new_with_metadata_impl(
+    fn try_new_with_metadata_and_shallow_validation(

Review Comment:
   Good thing this isn't part of the public API... quite a mouthful!



##########
parquet-variant/src/variant/list.rs:
##########
@@ -232,52 +232,57 @@ impl<'m, 'v> VariantList<'m, 'v> {
     /// [invalid]: Self#Validation
     pub fn get(&self, index: usize) -> Option<Variant<'m, 'v>> {
         (index < self.num_elements).then(|| {
-            self.try_get_impl(index)
-                .and_then(Variant::validate)
+            self.try_get_with_shallow_validation(index)
+                .and_then(Variant::with_deep_validation)
                 .expect("Invalid variant array element")
         })
     }
 
     /// Fallible version of `get`. Returns element by index, capturing 
validation errors
     pub fn try_get(&self, index: usize) -> Result<Variant<'m, 'v>, ArrowError> 
{
-        self.try_get_impl(index)?.validate()
+        self.try_get_with_shallow_validation(index)?
+            .with_deep_validation()
     }
 
-    /// Fallible iteration over the elements of this list.
-    pub fn iter_try(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>, 
ArrowError>> + '_ {
-        self.iter_try_impl().map(|result| result?.validate())
+    // Fallible version of `get`, performing only basic (constant-time) 
validation.
+    fn try_get_with_shallow_validation(&self, index: usize) -> 
Result<Variant<'m, 'v>, ArrowError> {
+        // Fetch the value bytes between the two offsets for this index, from 
the value array region
+        // of the byte buffer
+        let byte_range = self.get_offset(index)?..self.get_offset(index + 1)?;
+        let value_bytes =
+            slice_from_slice_at_offset(self.value, self.first_value_byte, 
byte_range)?;
+
+        Variant::try_new_with_metadata_and_shallow_validation(self.metadata, 
value_bytes)
     }
 
-    // Fallible iteration that only performs basic (constant-time) validation.
-    fn iter_try_impl(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>, 
ArrowError>> + '_ {
-        (0..self.len()).map(move |i| self.try_get_impl(i))
+    /// Fallible iteration over the elements of this list.
+    pub fn try_iter(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>, 
ArrowError>> + '_ {
+        self.try_iter_with_shallow_validation()
+            .map(|result| result?.with_deep_validation())
     }
 
     /// Iterates over the values of this list. When working with [unvalidated] 
input, consider
-    /// [`Self::iter_try`] to avoid panics due to invalid data.
+    /// [`Self::try_iter`] to avoid panics due to invalid data.
     ///
     /// [unvalidated]: Self#Validation
     pub fn iter(&self) -> impl Iterator<Item = Variant<'m, 'v>> + '_ {
-        self.iter_try_impl()
+        self.try_iter_with_shallow_validation()
             .map(|result| result.expect("Invalid variant list entry"))
     }
 
+    // Fallible iteration that only performs basic (constant-time) validation.
+    fn try_iter_with_shallow_validation(
+        &self,
+    ) -> impl Iterator<Item = Result<Variant<'m, 'v>, ArrowError>> + '_ {
+        (0..self.len()).map(move |i| self.try_get_with_shallow_validation(i))
+    }
+
     // Attempts to retrieve the ith offset from the offset array region of the 
byte buffer.
     fn get_offset(&self, index: usize) -> Result<usize, ArrowError> {
         let byte_range = 
self.header.first_offset_byte()..self.first_value_byte;
         let offset_bytes = slice_from_slice(self.value, byte_range)?;
         self.header.offset_size.unpack_usize(offset_bytes, index)
     }
-
-    // Fallible version of `get`, performing only basic (constant-time) 
validation.
-    fn try_get_impl(&self, index: usize) -> Result<Variant<'m, 'v>, 
ArrowError> {

Review Comment:
   note: moving and changing code in the same commit makes review a _lot_ more 
difficult, and refactor + bug fix in the same commit suffers a similar issue. I 
know this code inside out and it's still hard to be sure I'm catching 
everything -- imagine the burden on a reviewer who isn't as familiar with the 
code?
   
   In the future, you might consider curating a stack of three commits for 
changes like this:
   * code movement only (big, but trivial to review because it doesn't change 
anything)
   * function renames only (big, but purely mechanical and thus easy to review)
   * the actual fix (tiny, and therefore easy to review)
   
   For big changes, some of those commits might even be their own PR... but a 
commit stack is often good enough, if called out so reviewers know to look at 
the commits in isolation)



##########
parquet-variant/src/variant/list.rs:
##########
@@ -232,52 +232,57 @@ impl<'m, 'v> VariantList<'m, 'v> {
     /// [invalid]: Self#Validation
     pub fn get(&self, index: usize) -> Option<Variant<'m, 'v>> {
         (index < self.num_elements).then(|| {
-            self.try_get_impl(index)
-                .and_then(Variant::validate)
+            self.try_get_with_shallow_validation(index)
+                .and_then(Variant::with_deep_validation)
                 .expect("Invalid variant array element")
         })
     }
 
     /// Fallible version of `get`. Returns element by index, capturing 
validation errors
     pub fn try_get(&self, index: usize) -> Result<Variant<'m, 'v>, ArrowError> 
{
-        self.try_get_impl(index)?.validate()
+        self.try_get_with_shallow_validation(index)?
+            .with_deep_validation()
     }
 
-    /// Fallible iteration over the elements of this list.
-    pub fn iter_try(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>, 
ArrowError>> + '_ {
-        self.iter_try_impl().map(|result| result?.validate())
+    // Fallible version of `get`, performing only basic (constant-time) 
validation.
+    fn try_get_with_shallow_validation(&self, index: usize) -> 
Result<Variant<'m, 'v>, ArrowError> {
+        // Fetch the value bytes between the two offsets for this index, from 
the value array region
+        // of the byte buffer
+        let byte_range = self.get_offset(index)?..self.get_offset(index + 1)?;
+        let value_bytes =
+            slice_from_slice_at_offset(self.value, self.first_value_byte, 
byte_range)?;
+
+        Variant::try_new_with_metadata_and_shallow_validation(self.metadata, 
value_bytes)
     }
 
-    // Fallible iteration that only performs basic (constant-time) validation.
-    fn iter_try_impl(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>, 
ArrowError>> + '_ {
-        (0..self.len()).map(move |i| self.try_get_impl(i))
+    /// Fallible iteration over the elements of this list.
+    pub fn try_iter(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>, 
ArrowError>> + '_ {
+        self.try_iter_with_shallow_validation()
+            .map(|result| result?.with_deep_validation())
     }
 
     /// Iterates over the values of this list. When working with [unvalidated] 
input, consider
-    /// [`Self::iter_try`] to avoid panics due to invalid data.
+    /// [`Self::try_iter`] to avoid panics due to invalid data.
     ///
     /// [unvalidated]: Self#Validation
     pub fn iter(&self) -> impl Iterator<Item = Variant<'m, 'v>> + '_ {
-        self.iter_try_impl()
+        self.try_iter_with_shallow_validation()
             .map(|result| result.expect("Invalid variant list entry"))
     }
 
+    // Fallible iteration that only performs basic (constant-time) validation.
+    fn try_iter_with_shallow_validation(
+        &self,
+    ) -> impl Iterator<Item = Result<Variant<'m, 'v>, ArrowError>> + '_ {
+        (0..self.len()).map(move |i| self.try_get_with_shallow_validation(i))

Review Comment:
   First fix -- good catch!



##########
parquet-variant/src/variant.rs:
##########
@@ -382,21 +385,23 @@ impl<'m, 'v> Variant<'m, 'v> {
             VariantBasicType::ShortString => {
                 
Variant::ShortString(decoder::decode_short_string(value_metadata, value_data)?)
             }
-            VariantBasicType::Object => {
-                Variant::Object(VariantObject::try_new_impl(metadata, value)?)
-            }
-            VariantBasicType::Array => 
Variant::List(VariantList::try_new_impl(metadata, value)?),
+            VariantBasicType::Object => Variant::Object(
+                VariantObject::try_new_with_shallow_validation(metadata, 
value)?,
+            ),
+            VariantBasicType::Array => 
Variant::List(VariantList::try_new_with_shallow_validation(
+                metadata, value,

Review Comment:
   aside: I don't think fmt usually likes multiple args on a single line? (I 
sure sometimes _wish_ it did)



##########
parquet-variant/src/variant/list.rs:
##########
@@ -232,52 +232,57 @@ impl<'m, 'v> VariantList<'m, 'v> {
     /// [invalid]: Self#Validation
     pub fn get(&self, index: usize) -> Option<Variant<'m, 'v>> {
         (index < self.num_elements).then(|| {
-            self.try_get_impl(index)
-                .and_then(Variant::validate)
+            self.try_get_with_shallow_validation(index)
+                .and_then(Variant::with_deep_validation)
                 .expect("Invalid variant array element")
         })
     }
 
     /// Fallible version of `get`. Returns element by index, capturing 
validation errors
     pub fn try_get(&self, index: usize) -> Result<Variant<'m, 'v>, ArrowError> 
{
-        self.try_get_impl(index)?.validate()
+        self.try_get_with_shallow_validation(index)?
+            .with_deep_validation()
     }
 
-    /// Fallible iteration over the elements of this list.
-    pub fn iter_try(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>, 
ArrowError>> + '_ {
-        self.iter_try_impl().map(|result| result?.validate())
+    // Fallible version of `get`, performing only basic (constant-time) 
validation.
+    fn try_get_with_shallow_validation(&self, index: usize) -> 
Result<Variant<'m, 'v>, ArrowError> {
+        // Fetch the value bytes between the two offsets for this index, from 
the value array region
+        // of the byte buffer
+        let byte_range = self.get_offset(index)?..self.get_offset(index + 1)?;
+        let value_bytes =
+            slice_from_slice_at_offset(self.value, self.first_value_byte, 
byte_range)?;
+
+        Variant::try_new_with_metadata_and_shallow_validation(self.metadata, 
value_bytes)
     }
 
-    // Fallible iteration that only performs basic (constant-time) validation.
-    fn iter_try_impl(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>, 
ArrowError>> + '_ {
-        (0..self.len()).map(move |i| self.try_get_impl(i))
+    /// Fallible iteration over the elements of this list.
+    pub fn try_iter(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>, 
ArrowError>> + '_ {
+        self.try_iter_with_shallow_validation()
+            .map(|result| result?.with_deep_validation())
     }
 
     /// Iterates over the values of this list. When working with [unvalidated] 
input, consider
-    /// [`Self::iter_try`] to avoid panics due to invalid data.
+    /// [`Self::try_iter`] to avoid panics due to invalid data.
     ///
     /// [unvalidated]: Self#Validation
     pub fn iter(&self) -> impl Iterator<Item = Variant<'m, 'v>> + '_ {
-        self.iter_try_impl()
+        self.try_iter_with_shallow_validation()
             .map(|result| result.expect("Invalid variant list entry"))
     }
 
+    // Fallible iteration that only performs basic (constant-time) validation.
+    fn try_iter_with_shallow_validation(
+        &self,
+    ) -> impl Iterator<Item = Result<Variant<'m, 'v>, ArrowError>> + '_ {
+        (0..self.len()).map(move |i| self.try_get_with_shallow_validation(i))
+    }
+
     // Attempts to retrieve the ith offset from the offset array region of the 
byte buffer.
     fn get_offset(&self, index: usize) -> Result<usize, ArrowError> {
         let byte_range = 
self.header.first_offset_byte()..self.first_value_byte;
         let offset_bytes = slice_from_slice(self.value, byte_range)?;
         self.header.offset_size.unpack_usize(offset_bytes, index)
     }
-
-    // Fallible version of `get`, performing only basic (constant-time) 
validation.
-    fn try_get_impl(&self, index: usize) -> Result<Variant<'m, 'v>, 
ArrowError> {

Review Comment:
   Plus, in my experience, it benefits the author as well. I can't count the 
number of times I've discovered unnecessarily ugliness, incomplete 
refactorings, test gaps, and even bugs (by inspection) when I did the work to 
tease apart a big complicated change into a stack of commits. A bit more work 
up front, but higher quality code with review stamps landing faster (sometimes 
a _lot_ faster).



##########
parquet-variant/src/variant/list.rs:
##########
@@ -136,18 +136,18 @@ impl<'m, 'v> VariantList<'m, 'v> {
     /// This constructor verifies that `value` points to a valid variant array 
value. In particular,
     /// that all offsets are in-bounds and point to valid (recursively 
validated) objects.
     pub fn try_new(metadata: VariantMetadata<'m>, value: &'v [u8]) -> 
Result<Self, ArrowError> {
-        Self::try_new_impl(metadata, value)?.validate()
+        Self::try_new_with_shallow_validation(metadata, 
value)?.with_deep_validation()
     }
 
     pub fn new(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Self {
-        Self::try_new_impl(metadata, value).expect("Invalid variant list 
value")
+        Self::try_new_with_shallow_validation(metadata, value).expect("Invalid 
variant list value")

Review Comment:
   Another long line fmt would split?



##########
parquet-variant/src/variant.rs:
##########
@@ -382,21 +385,23 @@ impl<'m, 'v> Variant<'m, 'v> {
             VariantBasicType::ShortString => {
                 
Variant::ShortString(decoder::decode_short_string(value_metadata, value_data)?)
             }
-            VariantBasicType::Object => {
-                Variant::Object(VariantObject::try_new_impl(metadata, value)?)
-            }
-            VariantBasicType::Array => 
Variant::List(VariantList::try_new_impl(metadata, value)?),
+            VariantBasicType::Object => Variant::Object(
+                VariantObject::try_new_with_shallow_validation(metadata, 
value)?,
+            ),
+            VariantBasicType::Array => 
Variant::List(VariantList::try_new_with_shallow_validation(
+                metadata, value,
+            )?),
         };
         Ok(new_self)
     }
 
     /// True if this variant instance has already been [validated].
     ///
     /// [validated]: Self#Validation
-    pub fn is_validated(&self) -> bool {
+    pub fn is_fully_validated(&self) -> bool {
         match self {
             Variant::List(list) => list.is_validated(),

Review Comment:
   missed a rename?
   ```suggestion
               Variant::List(list) => list.is_fully_validated(),
   ```



##########
parquet-variant/src/variant/object.rs:
##########
@@ -236,20 +236,25 @@ impl<'m, 'v> VariantObject<'m, 'v> {
     /// field IDs). The latter can only happen when working with an 
unvalidated object produced by
     /// [`Self::new`].
     pub fn field(&self, i: usize) -> Option<Variant<'m, 'v>> {
-        (i < self.len()).then(|| self.try_field_impl(i).expect("Invalid object 
field value"))
+        (i < self.len()).then(|| {
+            self.try_field_with_shallow_validation(i)
+                .expect("Invalid object field value")
+        })
     }
 
     /// Fallible version of `field`. Returns field value by index, capturing 
validation errors
     pub fn try_field(&self, i: usize) -> Result<Variant<'m, 'v>, ArrowError> {
-        self.try_field_impl(i)?.validate()
+        self.try_field_with_shallow_validation(i)?
+            .with_deep_validation()
     }
 
     // Attempts to retrieve the ith field value from the value region of the 
byte buffer; it
     // performs only basic (constant-cost) validation.
-    fn try_field_impl(&self, i: usize) -> Result<Variant<'m, 'v>, ArrowError> {
+    fn try_field_with_shallow_validation(&self, i: usize) -> 
Result<Variant<'m, 'v>, ArrowError> {
         let value_bytes = slice_from_slice(self.value, 
self.first_value_byte..)?;
         let value_bytes = slice_from_slice(value_bytes, 
self.get_offset(i)?..)?;
-        Variant::try_new_with_metadata(self.metadata, value_bytes)
+
+        Variant::try_new_with_metadata_and_shallow_validation(self.metadata, 
value_bytes)

Review Comment:
   Good catch. This and the one in ListBuilder were definitely an oversight.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to