This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new d809f19bc0 [Variant] Add documentation, tests and cleaner api for 
Variant::get_path (#7942)
d809f19bc0 is described below

commit d809f19bc0fe2c3c1968f5111b6afa785d2e8bcd
Author: Andrew Lamb <and...@nerdnetworks.org>
AuthorDate: Thu Jul 17 07:38:12 2025 -0400

    [Variant] Add documentation, tests and cleaner api for Variant::get_path 
(#7942)
    
    # Which issue does this PR close?
    
    We generally require a GitHub issue to be filed for all bug fixes and
    enhancements and this helps us generate change logs for our releases.
    You can link an issue to this PR using the GitHub syntax.
    
    - Follow on to https://github.com/apache/arrow-rs/pull/7919
    
    # Rationale for this change
    
    While reviewing https://github.com/apache/arrow-rs/pull/7919 from
    @Samyak2 I found I wanted to write some additional tests directly for
    `Variant::get_path`
    
    When I started doing that I found it was somewhat awkward to write
    examples, so I added some new conversion routines to make it easier.
    
    # What changes are included in this PR?
    
    1. Add doc examples (and thus tests) of `VaraintGet` and `VariantPath`
    2. Add more documentation
    
    # Are these changes tested?
    Yes, by doc examples which run in CI
    # Are there any user-facing changes?
    
    If there are user-facing changes then we may require documentation to be
    updated before approving the PR.
    
    If there are any breaking changes to public APIs, please call them out.
---
 parquet-variant-compute/src/variant_get.rs |  35 +++------
 parquet-variant/src/lib.rs                 |   7 +-
 parquet-variant/src/path.rs                | 117 +++++++++++++++++++++++++++--
 parquet-variant/src/variant.rs             |  33 ++++++++
 4 files changed, 160 insertions(+), 32 deletions(-)

diff --git a/parquet-variant-compute/src/variant_get.rs 
b/parquet-variant-compute/src/variant_get.rs
index 7d37a8b645..b3a3d9e41f 100644
--- a/parquet-variant-compute/src/variant_get.rs
+++ b/parquet-variant-compute/src/variant_get.rs
@@ -22,7 +22,7 @@ use arrow::{
     error::Result,
 };
 use arrow_schema::{ArrowError, Field};
-use parquet_variant::path::VariantPath;
+use parquet_variant::VariantPath;
 
 use crate::{VariantArray, VariantArrayBuilder};
 
@@ -41,8 +41,7 @@ pub fn variant_get(input: &ArrayRef, options: GetOptions) -> 
Result<ArrayRef> {
 
     if let Some(as_type) = options.as_type {
         return Err(ArrowError::NotYetImplemented(format!(
-            "getting a {} from a VariantArray is not implemented yet",
-            as_type
+            "getting a {as_type} from a VariantArray is not implemented yet",
         )));
     }
 
@@ -91,7 +90,7 @@ mod test {
     use std::sync::Arc;
 
     use arrow::array::{Array, ArrayRef, StringArray};
-    use parquet_variant::path::{VariantPath, VariantPathElement};
+    use parquet_variant::VariantPath;
 
     use crate::batch_json_string_to_variant;
     use crate::VariantArray;
@@ -133,29 +132,21 @@ mod test {
     fn get_primitive_variant_field() {
         single_variant_get_test(
             r#"{"some_field": 1234}"#,
-            vec![VariantPathElement::field("some_field".into())].into(),
+            VariantPath::from("some_field"),
             "1234",
         );
     }
 
     #[test]
     fn get_primitive_variant_list_index() {
-        single_variant_get_test(
-            "[1234, 5678]",
-            vec![VariantPathElement::index(0)].into(),
-            "1234",
-        );
+        single_variant_get_test("[1234, 5678]", VariantPath::from(0), "1234");
     }
 
     #[test]
     fn get_primitive_variant_inside_object_of_object() {
         single_variant_get_test(
             r#"{"top_level_field": {"inner_field": 1234}}"#,
-            vec![
-                VariantPathElement::field("top_level_field".into()),
-                VariantPathElement::field("inner_field".into()),
-            ]
-            .into(),
+            VariantPath::from("top_level_field").join("inner_field"),
             "1234",
         );
     }
@@ -164,11 +155,7 @@ mod test {
     fn get_primitive_variant_inside_list_of_object() {
         single_variant_get_test(
             r#"[{"some_field": 1234}]"#,
-            vec![
-                VariantPathElement::index(0),
-                VariantPathElement::field("some_field".into()),
-            ]
-            .into(),
+            VariantPath::from(0).join("some_field"),
             "1234",
         );
     }
@@ -177,11 +164,7 @@ mod test {
     fn get_primitive_variant_inside_object_of_list() {
         single_variant_get_test(
             r#"{"some_field": [1234]}"#,
-            vec![
-                VariantPathElement::field("some_field".into()),
-                VariantPathElement::index(0),
-            ]
-            .into(),
+            VariantPath::from("some_field").join(0),
             "1234",
         );
     }
@@ -190,7 +173,7 @@ mod test {
     fn get_complex_variant() {
         single_variant_get_test(
             r#"{"top_level_field": {"inner_field": 1234}}"#,
-            vec![VariantPathElement::field("top_level_field".into())].into(),
+            VariantPath::from("top_level_field"),
             r#"{"inner_field": 1234}"#,
         );
     }
diff --git a/parquet-variant/src/lib.rs b/parquet-variant/src/lib.rs
index d04c59605f..a57b470979 100644
--- a/parquet-variant/src/lib.rs
+++ b/parquet-variant/src/lib.rs
@@ -20,6 +20,10 @@
 //! [Variant Binary Encoding]: 
https://github.com/apache/parquet-format/blob/master/VariantEncoding.md
 //! [Apache Parquet]: https://parquet.apache.org/
 //!
+//! ## Main APIs
+//! - [`Variant`]: Represents a variant value, which can be an object, list, 
or primitive.
+//! - [`VariantBuilder`]: For building `Variant` values.
+//!
 //! ## 🚧 Work In Progress
 //!
 //! This crate is under active development and is not yet ready for production 
use.
@@ -29,9 +33,10 @@
 
 mod builder;
 mod decoder;
-pub mod path;
+mod path;
 mod utils;
 mod variant;
 
 pub use builder::*;
+pub use path::{VariantPath, VariantPathElement};
 pub use variant::*;
diff --git a/parquet-variant/src/path.rs b/parquet-variant/src/path.rs
index 1643d9c87c..42dbdb3abc 100644
--- a/parquet-variant/src/path.rs
+++ b/parquet-variant/src/path.rs
@@ -16,18 +16,77 @@
 // under the License.
 use std::{borrow::Cow, ops::Deref};
 
-/// Represents a qualified path to a potential subfield or index of a variant 
value.
-#[derive(Debug, Clone)]
+/// Represents a qualified path to a potential subfield or index of a variant
+/// value.
+///
+/// Can be used with [`Variant::get_path`] to retrieve a specific subfield of
+/// a variant value.
+///
+/// [`Variant::get_path`]: crate::Variant::get_path
+///
+/// Create a [`VariantPath`] from a vector of [`VariantPathElement`], or
+/// from a single field name or index.
+///
+/// # Example: Simple paths
+/// ```rust
+/// # use parquet_variant::{VariantPath, VariantPathElement};
+/// // access the field "foo" in a variant object value
+/// let path = VariantPath::from("foo");
+/// // access the first element in a variant list vale
+/// let path = VariantPath::from(0);
+/// ```
+///
+/// # Example: Compound paths
+/// ```
+/// # use parquet_variant::{VariantPath, VariantPathElement};
+/// /// You can also create a path by joining elements together:
+/// // access the field "foo" and then the first element in a variant list 
value
+/// let path = VariantPath::from("foo").join(0);
+/// // this is the same as the previous one
+/// let path2 = VariantPath::new(vec!["foo".into(), 0.into()]);
+/// assert_eq!(path, path2);
+/// // you can also create a path from a vector of `VariantPathElement` 
directly
+/// let path3 = VariantPath::new(vec![
+///   VariantPathElement::field("foo"),
+///   VariantPathElement::index(0)
+/// ]);
+/// assert_eq!(path, path3);
+/// ```
+///
+/// # Example: Accessing Compound paths
+/// ```
+/// # use parquet_variant::{VariantPath, VariantPathElement};
+/// /// You can access the paths using slices
+/// // access the field "foo" and then the first element in a variant list 
value
+/// let path = VariantPath::from("foo")
+///   .join("bar")
+///   .join("baz");
+/// assert_eq!(path[1], VariantPathElement::field("bar"));
+/// ```
+#[derive(Debug, Clone, PartialEq)]
 pub struct VariantPath<'a>(Vec<VariantPathElement<'a>>);
 
 impl<'a> VariantPath<'a> {
+    /// Create a new `VariantPath` from a vector of `VariantPathElement`.
     pub fn new(path: Vec<VariantPathElement<'a>>) -> Self {
         Self(path)
     }
 
+    /// Return the inner path elements.
     pub fn path(&self) -> &Vec<VariantPathElement> {
         &self.0
     }
+
+    /// Return a new `VariantPath` with element appended
+    pub fn join(mut self, element: impl Into<VariantPathElement<'a>>) -> Self {
+        self.push(element);
+        self
+    }
+
+    /// Append a new element to the path
+    pub fn push(&mut self, element: impl Into<VariantPathElement<'a>>) {
+        self.0.push(element.into());
+    }
 }
 
 impl<'a> From<Vec<VariantPathElement<'a>>> for VariantPath<'a> {
@@ -36,6 +95,20 @@ impl<'a> From<Vec<VariantPathElement<'a>>> for 
VariantPath<'a> {
     }
 }
 
+/// Create from &str
+impl<'a> From<&'a str> for VariantPath<'a> {
+    fn from(path: &'a str) -> Self {
+        VariantPath::new(vec![path.into()])
+    }
+}
+
+/// Create from usize
+impl<'a> From<usize> for VariantPath<'a> {
+    fn from(index: usize) -> Self {
+        VariantPath::new(vec![VariantPathElement::index(index)])
+    }
+}
+
 impl<'a> Deref for VariantPath<'a> {
     type Target = [VariantPathElement<'a>];
 
@@ -44,8 +117,10 @@ impl<'a> Deref for VariantPath<'a> {
     }
 }
 
-/// Element of a path
-#[derive(Debug, Clone)]
+/// Element of a [`VariantPath`] that can be a field name or an index.
+///
+/// See [`VariantPath`] for more details and examples.
+#[derive(Debug, Clone, PartialEq)]
 pub enum VariantPathElement<'a> {
     /// Access field with name `name`
     Field { name: Cow<'a, str> },
@@ -54,7 +129,8 @@ pub enum VariantPathElement<'a> {
 }
 
 impl<'a> VariantPathElement<'a> {
-    pub fn field(name: Cow<'a, str>) -> VariantPathElement<'a> {
+    pub fn field(name: impl Into<Cow<'a, str>>) -> VariantPathElement<'a> {
+        let name = name.into();
         VariantPathElement::Field { name }
     }
 
@@ -62,3 +138,34 @@ impl<'a> VariantPathElement<'a> {
         VariantPathElement::Index { index }
     }
 }
+
+// Conversion utilities for `VariantPathElement` from string types
+impl<'a> From<Cow<'a, str>> for VariantPathElement<'a> {
+    fn from(name: Cow<'a, str>) -> Self {
+        VariantPathElement::field(name)
+    }
+}
+
+impl<'a> From<&'a str> for VariantPathElement<'a> {
+    fn from(name: &'a str) -> Self {
+        VariantPathElement::field(Cow::Borrowed(name))
+    }
+}
+
+impl<'a> From<String> for VariantPathElement<'a> {
+    fn from(name: String) -> Self {
+        VariantPathElement::field(Cow::Owned(name))
+    }
+}
+
+impl<'a> From<&'a String> for VariantPathElement<'a> {
+    fn from(name: &'a String) -> Self {
+        VariantPathElement::field(Cow::Borrowed(name.as_str()))
+    }
+}
+
+impl<'a> From<usize> for VariantPathElement<'a> {
+    fn from(index: usize) -> Self {
+        VariantPathElement::index(index)
+    }
+}
diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs
index 29b1919708..7792d9bdb5 100644
--- a/parquet-variant/src/variant.rs
+++ b/parquet-variant/src/variant.rs
@@ -942,6 +942,8 @@ impl<'m, 'v> Variant<'m, 'v> {
     /// Returns `Some(&VariantObject)` for object variants,
     /// `None` for non-object variants.
     ///
+    /// See [`Self::get_path`] to dynamically traverse objects
+    ///
     /// # Examples
     /// ```
     /// # use parquet_variant::{Variant, VariantBuilder, VariantObject};
@@ -999,6 +1001,8 @@ impl<'m, 'v> Variant<'m, 'v> {
     /// Returns `Some(&VariantList)` for list variants,
     /// `None` for non-list variants.
     ///
+    /// See [`Self::get_path`] to dynamically traverse lists
+    ///
     /// # Examples
     /// ```
     /// # use parquet_variant::{Variant, VariantBuilder, VariantList};
@@ -1068,6 +1072,35 @@ impl<'m, 'v> Variant<'m, 'v> {
     /// Return a new Variant with the path followed.
     ///
     /// If the path is not found, `None` is returned.
+    ///
+    /// # Example
+    /// ```
+    /// # use parquet_variant::{Variant, VariantBuilder, VariantObject, 
VariantPath};
+    /// # let mut builder = VariantBuilder::new();
+    /// # let mut obj = builder.new_object();
+    /// # let mut list = obj.new_list("foo");
+    /// # list.append_value("bar");
+    /// # list.append_value("baz");
+    /// # list.finish();
+    /// # obj.finish().unwrap();
+    /// # let (metadata, value) = builder.finish();
+    /// // given a variant like `{"foo": ["bar", "baz"]}`
+    /// let variant = Variant::new(&metadata, &value);
+    /// // Accessing a non existent path returns None
+    /// assert_eq!(variant.get_path(&VariantPath::from("non_existent")), None);
+    /// // Access obj["foo"]
+    /// let path = VariantPath::from("foo");
+    /// let foo = variant.get_path(&path).expect("field `foo` should exist");
+    /// assert!(foo.as_list().is_some(), "field `foo` should be a list");
+    /// // Access foo[0]
+    /// let path = VariantPath::from(0);
+    /// let bar = foo.get_path(&path).expect("element 0 should exist");
+    /// // bar is a string
+    /// assert_eq!(bar.as_string(), Some("bar"));
+    /// // You can also access nested paths
+    /// let path = VariantPath::from("foo").join(0);
+    /// assert_eq!(variant.get_path(&path).unwrap(), bar);
+    /// ```
     pub fn get_path(&self, path: &VariantPath) -> Option<Variant> {
         path.iter()
             .try_fold(self.clone(), |output, element| match element {

Reply via email to