This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 1fdb3184c2 [Variant] Improve getter API for `VariantList` and 
`VariantObject` (#7757)
1fdb3184c2 is described below

commit 1fdb3184c206c28417271763c2d8626f498ffd8a
Author: Matthew Kim <[email protected]>
AuthorDate: Fri Jun 27 06:35:00 2025 -0400

    [Variant] Improve getter API for `VariantList` and `VariantObject` (#7757)
    
    # Which issue does this PR close?
    
    - Closes https://github.com/apache/arrow-rs/issues/7756
    
    # Rationale for this change
    
    Updates `VariantList` and `VariantObject` getter methods to follow
    similar conventions to `std::Vec` and `std::HashMap`:
    
    | Existing method | Proposed method  |
    |----------------|----------------|
    | `VariantList::get(index) -> Result<Variant>` |
    `VariantList::get(index) -> Option<Variant>` |
    | `VariantObject::field_by_name(name) -> Result<Option<Variant>>` |
    `VariantObject::get(name) -> Option<Variant>` |
    | `VariantObject::field(i) -> Result<Variant>` |
    `VariantObject::field(i) -> Option<Variant>` |
    | `VariantObject::field_name(i) -> Result<Variant>` |
    `VariantObject::field_name(i) -> Option<Variant>` |
    
    
    One thing to note, however, the existing methods all returned
    `Result<T>` since these getters are not only exposed as public API, but
    also used for validation inside the constructor `try_new`.
    
    Since the latter usage requires an error message, I chose to rename the
    original fallible methods with an `_err` suffix and use them internally.
    The new public methods now act as wrappers over these `_err` variants.
    
    
    # Are there any user-facing changes?
    
    New API (and docs with tests)
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 parquet-variant/src/builder.rs        | 12 +++---
 parquet-variant/src/utils.rs          | 20 +++++-----
 parquet-variant/src/variant.rs        |  2 +-
 parquet-variant/src/variant/list.rs   | 25 ++++++++----
 parquet-variant/src/variant/object.rs | 74 +++++++++++++++++++++++++++++------
 5 files changed, 96 insertions(+), 37 deletions(-)

diff --git a/parquet-variant/src/builder.rs b/parquet-variant/src/builder.rs
index d67ab9c001..74f8bf2a68 100644
--- a/parquet-variant/src/builder.rs
+++ b/parquet-variant/src/builder.rs
@@ -288,11 +288,11 @@ impl MetadataBuilder {
 /// let variant = Variant::try_new(&metadata, &value).unwrap();
 /// let variant_object = variant.as_object().unwrap();
 /// assert_eq!(
-///   variant_object.field_by_name("first_name").unwrap(),
+///   variant_object.get("first_name"),
 ///   Some(Variant::from("Jiaying"))
 /// );
 /// assert_eq!(
-///   variant_object.field_by_name("last_name").unwrap(),
+///   variant_object.get("last_name"),
 ///   Some(Variant::from("Li"))
 /// );
 /// ```
@@ -367,11 +367,11 @@ impl MetadataBuilder {
 /// let obj1_variant = variant_list.get(0).unwrap();
 /// let obj1 = obj1_variant.as_object().unwrap();
 /// assert_eq!(
-///     obj1.field_by_name("id").unwrap(),
+///     obj1.get("id"),
 ///     Some(Variant::from(1))
 /// );
 /// assert_eq!(
-///     obj1.field_by_name("type").unwrap(),
+///     obj1.get("type"),
 ///     Some(Variant::from("Cauliflower"))
 /// );
 ///
@@ -379,11 +379,11 @@ impl MetadataBuilder {
 /// let obj2 = obj2_variant.as_object().unwrap();
 ///
 /// assert_eq!(
-///     obj2.field_by_name("id").unwrap(),
+///     obj2.get("id"),
 ///     Some(Variant::from(2))
 /// );
 /// assert_eq!(
-///     obj2.field_by_name("type").unwrap(),
+///     obj2.get("type"),
 ///     Some(Variant::from("Beets"))
 /// );
 ///
diff --git a/parquet-variant/src/utils.rs b/parquet-variant/src/utils.rs
index e0f966cab8..765ea04ae6 100644
--- a/parquet-variant/src/utils.rs
+++ b/parquet-variant/src/utils.rs
@@ -94,33 +94,33 @@ pub(crate) fn string_from_slice(
 /// * `range` - The range to search in
 /// * `target` - The target value to search for
 /// * `key_extractor` - A function that extracts a comparable key from slice 
elements.
-///   This function can fail and return an error.
+///   This function can fail and return None.
 ///
 /// # Returns
-/// * `Ok(Ok(index))` - Element found at the given index
-/// * `Ok(Err(index))` - Element not found, but would be inserted at the given 
index
-/// * `Err(e)` - Key extraction failed with error `e`
-pub(crate) fn try_binary_search_range_by<K, E, F>(
+/// * `Some(Ok(index))` - Element found at the given index
+/// * `Some(Err(index))` - Element not found, but would be inserted at the 
given index
+/// * `None` - Key extraction failed
+pub(crate) fn try_binary_search_range_by<K, F>(
     range: Range<usize>,
     target: &K,
-    mut key_extractor: F,
-) -> Result<Result<usize, usize>, E>
+    key_extractor: F,
+) -> Option<Result<usize, usize>>
 where
     K: Ord,
-    F: FnMut(usize) -> Result<K, E>,
+    F: Fn(usize) -> Option<K>,
 {
     let Range { mut start, mut end } = range;
     while start < end {
         let mid = start + (end - start) / 2;
         let key = key_extractor(mid)?;
         match key.cmp(target) {
-            std::cmp::Ordering::Equal => return Ok(Ok(mid)),
+            std::cmp::Ordering::Equal => return Some(Ok(mid)),
             std::cmp::Ordering::Greater => end = mid,
             std::cmp::Ordering::Less => start = mid + 1,
         }
     }
 
-    Ok(Err(start))
+    Some(Err(start))
 }
 
 /// Attempts to prove a fallible iterator is actually infallible in practice, 
by consuming every
diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs
index 4ca23aee0f..28583f1658 100644
--- a/parquet-variant/src/variant.rs
+++ b/parquet-variant/src/variant.rs
@@ -830,7 +830,7 @@ impl<'m, 'v> Variant<'m, 'v> {
     ///  let variant = Variant::try_new(&metadata, &value).unwrap();
     /// // use the `as_object` method to access the object
     /// let obj = variant.as_object().expect("variant should be an object");
-    /// assert_eq!(obj.field_by_name("name").unwrap(), 
Some(Variant::from("John")));
+    /// assert_eq!(obj.get("name"), Some(Variant::from("John")));
     /// ```
     pub fn as_object(&'m self) -> Option<&'m VariantObject<'m, 'v>> {
         if let Variant::Object(obj) = self {
diff --git a/parquet-variant/src/variant/list.rs 
b/parquet-variant/src/variant/list.rs
index 703761b420..42f97fa0d3 100644
--- a/parquet-variant/src/variant/list.rs
+++ b/parquet-variant/src/variant/list.rs
@@ -119,7 +119,20 @@ impl<'m, 'v> VariantList<'m, 'v> {
         self.len() == 0
     }
 
-    pub fn get(&self, index: usize) -> Result<Variant<'m, 'v>, ArrowError> {
+    /// Returns element by index in `0..self.len()`, if any
+    pub fn get(&self, index: usize) -> Option<Variant<'m, 'v>> {
+        if index >= self.num_elements {
+            return None;
+        }
+
+        match self.try_get(index) {
+            Ok(variant) => Some(variant),
+            Err(err) => panic!("validation error: {}", err),
+        }
+    }
+
+    /// Fallible version of `get`. Returns element by index, capturing 
validation errors
+    fn try_get(&self, index: usize) -> Result<Variant<'m, 'v>, ArrowError> {
         if index >= self.num_elements {
             return Err(ArrowError::InvalidArgumentError(format!(
                 "Index {} out of bounds for list of length {}",
@@ -153,7 +166,7 @@ impl<'m, 'v> VariantList<'m, 'v> {
     // Fallible iteration over the fields of this dictionary. The constructor 
traverses the iterator
     // to prove it has no errors, so that all other use sites can blindly 
`unwrap` the result.
     fn iter_checked(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>, 
ArrowError>> + '_ {
-        (0..self.len()).map(move |i| self.get(i))
+        (0..self.len()).map(move |i| self.try_get(i))
     }
 }
 
@@ -208,11 +221,7 @@ mod tests {
 
         // Test out of bounds access
         let out_of_bounds = variant_list.get(3);
-        assert!(out_of_bounds.is_err());
-        assert!(matches!(
-            out_of_bounds.unwrap_err(),
-            ArrowError::InvalidArgumentError(ref msg) if msg.contains("out of 
bounds")
-        ));
+        assert!(out_of_bounds.is_none());
 
         // Test values iterator
         let values: Vec<_> = variant_list.iter().collect();
@@ -248,7 +257,7 @@ mod tests {
 
         // Test out of bounds access on empty list
         let out_of_bounds = variant_list.get(0);
-        assert!(out_of_bounds.is_err());
+        assert!(out_of_bounds.is_none());
 
         // Test values iterator on empty list
         let values: Vec<_> = variant_list.iter().collect();
diff --git a/parquet-variant/src/variant/object.rs 
b/parquet-variant/src/variant/object.rs
index b52701f8bb..9530f111f1 100644
--- a/parquet-variant/src/variant/object.rs
+++ b/parquet-variant/src/variant/object.rs
@@ -145,7 +145,19 @@ impl<'m, 'v> VariantObject<'m, 'v> {
     }
 
     /// Get a field's value by index in `0..self.len()`
-    pub fn field(&self, i: usize) -> Result<Variant<'m, 'v>, ArrowError> {
+    ///
+    /// # Panics
+    /// If the variant object is corrupted (e.g., invalid offsets or field 
IDs).
+    /// This should never happen since the constructor validates all data 
upfront.
+    pub fn field(&self, i: usize) -> Option<Variant<'m, 'v>> {
+        Some(
+            self.try_field(i)
+                .expect("validation error after construction"),
+        )
+    }
+
+    /// Fallible version of `field`. Returns field value by index, capturing 
validation errors
+    fn try_field(&self, i: usize) -> Result<Variant<'m, 'v>, ArrowError> {
         let start_offset = self.header.field_offset_size.unpack_usize(
             self.value,
             self.field_offsets_start_byte,
@@ -160,7 +172,19 @@ impl<'m, 'v> VariantObject<'m, 'v> {
     }
 
     /// Get a field's name by index in `0..self.len()`
-    pub fn field_name(&self, i: usize) -> Result<&'m str, ArrowError> {
+    ///
+    /// # Panics
+    /// If the variant object is corrupted (e.g., invalid offsets or field 
IDs).
+    /// This should never happen since the constructor validates all data 
upfront.
+    pub fn field_name(&self, i: usize) -> Option<&'m str> {
+        Some(
+            self.try_field_name(i)
+                .expect("validation error after construction"),
+        )
+    }
+
+    /// Fallible version of `field_name`. Returns field name by index, 
capturing validation errors
+    fn try_field_name(&self, i: usize) -> Result<&'m str, ArrowError> {
         let field_id =
             self.header
                 .field_id_size
@@ -179,22 +203,22 @@ impl<'m, 'v> VariantObject<'m, 'v> {
     fn iter_checked(
         &self,
     ) -> impl Iterator<Item = Result<(&'m str, Variant<'m, 'v>), ArrowError>> 
+ '_ {
-        (0..self.num_elements).map(move |i| Ok((self.field_name(i)?, 
self.field(i)?)))
+        (0..self.num_elements).map(move |i| Ok((self.try_field_name(i)?, 
self.try_field(i)?)))
     }
 
     /// Returns the value of the field with the specified name, if any.
     ///
     /// `Ok(None)` means the field does not exist; `Err` means the search 
encountered an error.
-    pub fn field_by_name(&self, name: &str) -> Result<Option<Variant<'m, 'v>>, 
ArrowError> {
+    pub fn get(&self, name: &str) -> Option<Variant<'m, 'v>> {
         // Binary search through the field IDs of this object to find the 
requested field name.
         //
         // NOTE: This does not require a sorted metadata dictionary, because 
the variant spec
         // requires object field ids to be lexically sorted by their 
corresponding string values,
         // and probing the dictionary for a field id is always O(1) work.
-        let search_result =
-            try_binary_search_range_by(0..self.num_elements, &name, |i| 
self.field_name(i))?;
+        let i = try_binary_search_range_by(0..self.num_elements, &name, |i| 
self.field_name(i))?
+            .ok()?;
 
-        search_result.ok().map(|i| self.field(i)).transpose()
+        self.field(i)
     }
 }
 
@@ -260,22 +284,37 @@ mod tests {
         assert!(!variant_obj.is_empty());
 
         // Test field access
-        let active_field = variant_obj.field_by_name("active").unwrap();
+        let active_field = variant_obj.get("active");
         assert!(active_field.is_some());
         assert_eq!(active_field.unwrap().as_boolean(), Some(true));
 
-        let age_field = variant_obj.field_by_name("age").unwrap();
+        let age_field = variant_obj.get("age");
         assert!(age_field.is_some());
         assert_eq!(age_field.unwrap().as_int8(), Some(42));
 
-        let name_field = variant_obj.field_by_name("name").unwrap();
+        let name_field = variant_obj.get("name");
         assert!(name_field.is_some());
         assert_eq!(name_field.unwrap().as_string(), Some("hello"));
 
         // Test non-existent field
-        let missing_field = variant_obj.field_by_name("missing").unwrap();
+        let missing_field = variant_obj.get("missing");
         assert!(missing_field.is_none());
 
+        // https://github.com/apache/arrow-rs/issues/7784
+        // Fixme: The following assertion will panic! That is not good
+        // let missing_field_name = variant_obj.field_name(3);
+        // assert!(missing_field_name.is_none());
+        //
+        // Fixme: The `.field_name()` will panic! This is not good
+        // let missing_field_name = variant_obj.field_name(300);
+        // assert!(missing_field_name.is_none());
+
+        // let missing_field_value = variant_obj.field(3);
+        // assert!(missing_field_value.is_none());
+
+        // let missing_field_value = variant_obj.field(300);
+        // assert!(missing_field_value.is_none());
+
         // Test fields iterator
         let fields: Vec<_> = variant_obj.iter().collect();
         assert_eq!(fields.len(), 3);
@@ -289,6 +328,17 @@ mod tests {
 
         assert_eq!(fields[2].0, "name");
         assert_eq!(fields[2].1.as_string(), Some("hello"));
+
+        // Test field access by index
+        // Fields should be in sorted order: active, age, name
+        assert_eq!(variant_obj.field_name(0), Some("active"));
+        assert_eq!(variant_obj.field(0).unwrap().as_boolean(), Some(true));
+
+        assert_eq!(variant_obj.field_name(1), Some("age"));
+        assert_eq!(variant_obj.field(1).unwrap().as_int8(), Some(42));
+
+        assert_eq!(variant_obj.field_name(2), Some("name"));
+        assert_eq!(variant_obj.field(2).unwrap().as_string(), Some("hello"));
     }
 
     #[test]
@@ -316,7 +366,7 @@ mod tests {
         assert!(variant_obj.is_empty());
 
         // Test field access on empty object
-        let missing_field = variant_obj.field_by_name("anything").unwrap();
+        let missing_field = variant_obj.get("anything");
         assert!(missing_field.is_none());
 
         // Test fields iterator on empty object

Reply via email to