This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new f7f8f95368 [Variant] Add `VariantBuilder` values check (#10016)
f7f8f95368 is described below

commit f7f8f953686add2c4979ac88eb3db884d92504ea
Author: Konstantin Tarasov <[email protected]>
AuthorDate: Mon Jun 22 14:15:37 2026 -0400

    [Variant] Add `VariantBuilder` values check (#10016)
    
    # Which issue does this PR close?
    
    <!--
    We generally require a GitHub issue to be filed for all bug fixes and
    enhancements and this helps us generate change logs for our releases.
    You can link an issue to this PR using the GitHub syntax.
    -->
    
    - Closes #7870.
    
    # Rationale for this change
    
    - (Single) `VariantBuilder` accepts 0/1+ values on finish.
    <!--
    Why are you proposing this change? If this is already explained clearly
    in the issue then this section is not needed.
    Explaining clearly why changes are proposed helps reviewers understand
    your changes and offer better suggestions for fixes.
    -->
    
    # What changes are included in this PR?
    - Add only 1 value validation for `VariantBuilder`
    - Move `try_new_list/try_new_object` to `VariantBuilder`
    - Replaced `VariantBuilder` with `WritableMetadataBuilder` in tests that
    only build metadata.
    <!--
    There is no need to duplicate the description in the issue here but it
    is sometimes worth providing a summary of the individual changes in this
    PR.
    -->
    
    # Are these changes tested?
    - Yes, existing unit tests pass.
    - Ran benchmarks locally and added some `[#inline]`
    
    
    <!--
    We typically require tests for all PRs in order to:
    1. Prevent the code from being accidentally broken by subsequent changes
    2. Serve as another way to document the expected behavior of the code
    
    If tests are not included in your PR, please explain why (for example,
    are they covered by existing tests)?
    
    If this PR claims a performance improvement, please include evidence
    such as benchmark results.
    -->
    
    # Are there any user-facing changes?
    
    <!--
    If there are user-facing changes then we may require documentation to be
    updated before approving the PR.
    
    If there are any breaking changes to public APIs, please call them out.
    -->
---
 parquet-variant/src/builder.rs           | 236 ++++++++++++++++++++++++++++---
 parquet-variant/src/builder/metadata.rs  |  44 +-----
 parquet-variant/src/builder/object.rs    |  15 +-
 parquet-variant/tests/variant_interop.rs |  11 +-
 4 files changed, 239 insertions(+), 67 deletions(-)

diff --git a/parquet-variant/src/builder.rs b/parquet-variant/src/builder.rs
index e6122f062c..b304dd8102 100644
--- a/parquet-variant/src/builder.rs
+++ b/parquet-variant/src/builder.rs
@@ -52,6 +52,31 @@ pub(crate) fn int_size(v: usize) -> OffsetSizeBytes {
     }
 }
 
+const ONE_TOP_LEVEL_VALUE_MSG: &str =
+    "VariantBuilder already contains a top-level variant value; only one is 
allowed";
+const EMPTY_BUILDER_MSG: &str =
+    "VariantBuilder is empty; append a top-level value before calling 
finish()";
+
+// Error/panic construction is kept out-of-line and cold so the per-call 
guards on the
+// builder hot paths stay small enough to inline as a single predictable 
branch.
+#[cold]
+#[inline(never)]
+fn top_level_value_panic() -> ! {
+    panic!("{ONE_TOP_LEVEL_VALUE_MSG}");
+}
+
+#[cold]
+#[inline(never)]
+fn top_level_value_error() -> ArrowError {
+    ArrowError::InvalidArgumentError(ONE_TOP_LEVEL_VALUE_MSG.into())
+}
+
+#[cold]
+#[inline(never)]
+fn empty_builder_error() -> ArrowError {
+    ArrowError::InvalidArgumentError(EMPTY_BUILDER_MSG.into())
+}
+
 /// Wrapper around a `Vec<u8>` that provides methods for appending
 /// primitive values, variant types, and metadata.
 ///
@@ -792,30 +817,85 @@ impl VariantBuilder {
         self.metadata_builder.upsert_field_name(field_name);
     }
 
+    /// Returns true if a top-level variant value has already been written to 
this builder.
+    ///
+    /// A [`VariantBuilder`] holds exactly one top-level variant value. Any 
committed top-level
+    /// value leaves bytes in the value buffer; a child builder dropped 
without `finish()` has
+    /// its bytes rolled back by [`ParentState`], so the offset is a faithful 
indicator.
+    #[inline]
+    fn has_top_level_value(&self) -> bool {
+        self.value_builder.offset() != 0
+    }
+
+    #[inline]
+    fn ensure_no_top_level_value(&self) {
+        if self.has_top_level_value() {
+            top_level_value_panic();
+        }
+    }
+
+    #[inline]
+    fn check_no_top_level_value(&self) -> Result<(), ArrowError> {
+        if self.has_top_level_value() {
+            return Err(top_level_value_error());
+        }
+        Ok(())
+    }
+
     /// Create an [`ListBuilder`] for creating [`Variant::List`] values.
     ///
     /// See the examples on [`VariantBuilder`] for usage.
+    ///
+    /// # Panics
+    ///
+    /// Panics if a top-level variant value has already been written to this 
builder. For a
+    /// fallible version, use [`VariantBuilder::try_new_list`].
     pub fn new_list(&mut self) -> ListBuilder<'_, ()> {
+        self.try_new_list().unwrap()
+    }
+
+    /// Create an [`ListBuilder`] for creating [`Variant::List`] values.
+    ///
+    /// Returns an error if a top-level variant value has already been written 
to this builder.
+    pub fn try_new_list(&mut self) -> Result<ListBuilder<'_, ()>, ArrowError> {
+        self.check_no_top_level_value()?;
         let parent_state =
             ParentState::variant(&mut self.value_builder, &mut 
self.metadata_builder);
-        ListBuilder::new(parent_state, self.validate_unique_fields)
+        Ok(ListBuilder::new(parent_state, self.validate_unique_fields))
     }
 
     /// Create an [`ObjectBuilder`] for creating [`Variant::Object`] values.
     ///
     /// See the examples on [`VariantBuilder`] for usage.
+    ///
+    /// # Panics
+    ///
+    /// Panics if a top-level variant value has already been written to this 
builder. For a
+    /// fallible version, use [`VariantBuilder::try_new_object`].
     pub fn new_object(&mut self) -> ObjectBuilder<'_, ()> {
+        self.try_new_object().unwrap()
+    }
+
+    /// Create an [`ObjectBuilder`] for creating [`Variant::Object`] values.
+    ///
+    /// Returns an error if a top-level variant value has already been written 
to this builder.
+    pub fn try_new_object(&mut self) -> Result<ObjectBuilder<'_, ()>, 
ArrowError> {
+        self.check_no_top_level_value()?;
         let parent_state =
             ParentState::variant(&mut self.value_builder, &mut 
self.metadata_builder);
-        ObjectBuilder::new(parent_state, self.validate_unique_fields)
+        Ok(ObjectBuilder::new(
+            parent_state,
+            self.validate_unique_fields,
+        ))
     }
 
     /// Append a value to the builder.
     ///
     /// # Panics
     ///
-    /// This method will panic if the variant contains duplicate field names 
in objects
-    /// when validation is enabled. For a fallible version, use 
[`VariantBuilder::try_append_value`]
+    /// Panics if a top-level variant value has already been written to this 
builder, or if the
+    /// variant contains duplicate field names in objects when validation is 
enabled. For a
+    /// fallible version, use [`VariantBuilder::try_append_value`].
     ///
     /// # Example
     /// ```
@@ -825,15 +905,19 @@ impl VariantBuilder {
     /// builder.append_value(42i8);
     /// ```
     pub fn append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, value: T) 
{
+        self.ensure_no_top_level_value();
         let state = ParentState::variant(&mut self.value_builder, &mut 
self.metadata_builder);
         ValueBuilder::append_variant(state, value.into())
     }
 
     /// Append a value to the builder.
+    ///
+    /// Returns an error if a top-level variant value has already been written 
to this builder.
     pub fn try_append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(
         &mut self,
         value: T,
     ) -> Result<(), ArrowError> {
+        self.check_no_top_level_value()?;
         let state = ParentState::variant(&mut self.value_builder, &mut 
self.metadata_builder);
         ValueBuilder::try_append_variant(state, value.into())
     }
@@ -846,18 +930,51 @@ impl VariantBuilder {
     ///
     /// The caller must ensure that the metadata dictionary entries are 
already built and correct for
     /// any objects or lists being appended.
+    ///
+    /// # Panics
+    ///
+    /// Panics if a top-level variant value has already been written to this 
builder. For a
+    /// fallible version, use [`VariantBuilder::try_append_value_bytes`].
     pub fn append_value_bytes<'m, 'd>(&mut self, value: impl Into<Variant<'m, 
'd>>) {
+        self.try_append_value_bytes(value).unwrap()
+    }
+
+    /// Tries to append a variant value to the builder by copying raw bytes 
when possible.
+    ///
+    /// This is the fallible version of 
[`VariantBuilder::append_value_bytes`]. Returns an error
+    /// if a top-level variant value has already been written to this builder.
+    pub fn try_append_value_bytes<'m, 'd>(
+        &mut self,
+        value: impl Into<Variant<'m, 'd>>,
+    ) -> Result<(), ArrowError> {
+        self.check_no_top_level_value()?;
         let state = ParentState::variant(&mut self.value_builder, &mut 
self.metadata_builder);
         ValueBuilder::append_variant_bytes(state, value.into());
+        Ok(())
+    }
+
+    /// Finish the builder and return the metadata and value buffers.
+    ///
+    /// # Panics
+    ///
+    /// Panics if no top-level variant value has been appended. For a fallible 
version, use
+    /// [`VariantBuilder::try_finish`].
+    pub fn finish(self) -> (Vec<u8>, Vec<u8>) {
+        self.try_finish().expect(EMPTY_BUILDER_MSG)
     }
 
     /// Finish the builder and return the metadata and value buffers.
-    pub fn finish(mut self) -> (Vec<u8>, Vec<u8>) {
+    ///
+    /// Returns an error if no top-level variant value has been appended.
+    pub fn try_finish(mut self) -> Result<(Vec<u8>, Vec<u8>), ArrowError> {
+        if !self.has_top_level_value() {
+            return Err(empty_builder_error());
+        }
         self.metadata_builder.finish();
-        (
+        Ok((
             self.metadata_builder.into_inner(),
             self.value_builder.into_inner(),
-        )
+        ))
     }
 }
 
@@ -915,11 +1032,11 @@ impl VariantBuilderExt for VariantBuilder {
     }
 
     fn try_new_list(&mut self) -> Result<ListBuilder<'_, Self::State<'_>>, 
ArrowError> {
-        Ok(self.new_list())
+        self.try_new_list()
     }
 
     fn try_new_object(&mut self) -> Result<ObjectBuilder<'_, Self::State<'_>>, 
ArrowError> {
-        Ok(self.new_object())
+        self.try_new_object()
     }
 }
 
@@ -964,6 +1081,96 @@ mod tests {
         assert_eq!(variant, expected);
     }
 
+    #[test]
+    fn test_try_finish_empty_builder_errors() {
+        let builder = VariantBuilder::new();
+        let err = builder.try_finish().unwrap_err();
+        assert!(err.to_string().contains("empty"), "unexpected error: {err}");
+    }
+
+    #[test]
+    #[should_panic(expected = "empty")]
+    fn test_finish_empty_builder_panics() {
+        let builder = VariantBuilder::new();
+        let _ = builder.finish();
+    }
+
+    #[test]
+    fn test_try_append_value_after_value_errors() {
+        let mut builder = VariantBuilder::new();
+        builder.append_value(1i32);
+        let err = builder.try_append_value(2i32).unwrap_err();
+        assert!(
+            err.to_string().contains("only one is allowed"),
+            "unexpected error: {err}"
+        );
+    }
+
+    #[test]
+    fn test_try_append_value_bytes_after_value_errors() {
+        let mut builder = VariantBuilder::new();
+        builder.append_value(1i32);
+        let err = builder.try_append_value_bytes(2i32).unwrap_err();
+        assert!(
+            err.to_string().contains("only one is allowed"),
+            "unexpected error: {err}"
+        );
+    }
+
+    #[test]
+    fn test_try_new_list_after_value_errors() {
+        let mut builder = VariantBuilder::new();
+        builder.append_value(1i32);
+        let err = builder.try_new_list().expect_err("expected error");
+        assert!(
+            err.to_string().contains("only one is allowed"),
+            "unexpected error: {err}"
+        );
+    }
+
+    #[test]
+    fn test_try_new_object_after_value_errors() {
+        let mut builder = VariantBuilder::new();
+        builder.append_value(1i32);
+        let err = builder.try_new_object().expect_err("expected error");
+        assert!(
+            err.to_string().contains("only one is allowed"),
+            "unexpected error: {err}"
+        );
+    }
+
+    #[test]
+    #[should_panic(expected = "only one is allowed")]
+    fn test_append_value_after_value_panics() {
+        let mut builder = VariantBuilder::new();
+        builder.append_value(1i32);
+        builder.append_value(2i32);
+    }
+
+    #[test]
+    #[should_panic(expected = "only one is allowed")]
+    fn test_append_value_bytes_after_value_panics() {
+        let mut builder = VariantBuilder::new();
+        builder.append_value(1i32);
+        builder.append_value_bytes(2i32);
+    }
+
+    #[test]
+    #[should_panic(expected = "only one is allowed")]
+    fn test_new_list_after_value_panics() {
+        let mut builder = VariantBuilder::new();
+        builder.append_value(1i32);
+        let _ = builder.new_list();
+    }
+
+    #[test]
+    #[should_panic(expected = "only one is allowed")]
+    fn test_new_object_after_value_panics() {
+        let mut builder = VariantBuilder::new();
+        builder.append_value(1i32);
+        let _ = builder.new_object();
+    }
+
     #[test]
     fn test_nested_object_with_lists() {
         /*
@@ -1044,14 +1251,9 @@ mod tests {
             variant2.add_field_name("a");
             assert!(!variant2.metadata_builder.is_sorted);
 
-            // per the spec, make sure the variant will fail to build if only 
metadata is provided
-            let (m, v) = variant2.finish();
-            let res = Variant::try_new(&m, &v);
-            assert!(res.is_err());
-
-            // since it is not sorted, make sure the metadata says so
-            let header = VariantMetadata::try_new(&m).unwrap();
-            assert!(!header.is_sorted());
+            // per the spec, a variant must have a top-level value; 
try_finish() rejects empty
+            let err = variant2.try_finish().unwrap_err();
+            assert!(err.to_string().contains("empty"), "unexpected error: 
{err}");
         }
 
         // write out variant1 and make sure the sorted flag is properly encoded
diff --git a/parquet-variant/src/builder/metadata.rs 
b/parquet-variant/src/builder/metadata.rs
index efccc2e4c6..10b34ced03 100644
--- a/parquet-variant/src/builder/metadata.rs
+++ b/parquet-variant/src/builder/metadata.rs
@@ -268,7 +268,7 @@ impl<S: AsRef<str>> Extend<S> for WritableMetadataBuilder {
 #[cfg(test)]
 mod test {
     use crate::{
-        ParentState, ValueBuilder, Variant, VariantBuilder, VariantMetadata,
+        ParentState, ValueBuilder, VariantMetadata,
         builder::{
             metadata::{ReadOnlyMetadataBuilder, WritableMetadataBuilder},
             object::ObjectBuilder,
@@ -356,47 +356,13 @@ mod test {
         assert_eq!(metadata.num_field_names(), 3);
     }
 
-    #[test]
-    fn test_read_only_metadata_builder() {
-        // First create some metadata with a few field names
-        let mut default_builder = VariantBuilder::new();
-        default_builder.add_field_name("name");
-        default_builder.add_field_name("age");
-        default_builder.add_field_name("active");
-        let (metadata_bytes, _) = default_builder.finish();
-
-        // Use the metadata to build new variant values
-        let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
-        let mut metadata_builder = ReadOnlyMetadataBuilder::new(&metadata);
-        let mut value_builder = ValueBuilder::new();
-
-        {
-            let state = ParentState::variant(&mut value_builder, &mut 
metadata_builder);
-            let mut obj = ObjectBuilder::new(state, false);
-
-            // These should succeed because the fields exist in the metadata
-            obj.insert("name", "Alice");
-            obj.insert("age", 30i8);
-            obj.insert("active", true);
-            obj.finish();
-        }
-
-        let value = value_builder.into_inner();
-
-        // Verify the variant was built correctly
-        let variant = Variant::try_new(&metadata_bytes, &value).unwrap();
-        let obj = variant.as_object().unwrap();
-        assert_eq!(obj.get("name"), Some(Variant::from("Alice")));
-        assert_eq!(obj.get("age"), Some(Variant::Int8(30)));
-        assert_eq!(obj.get("active"), Some(Variant::from(true)));
-    }
-
     #[test]
     fn test_read_only_metadata_builder_fails_on_unknown_field() {
         // Create metadata with only one field
-        let mut default_builder = VariantBuilder::new();
-        default_builder.add_field_name("known_field");
-        let (metadata_bytes, _) = default_builder.finish();
+        let mut default_builder = WritableMetadataBuilder::default();
+        default_builder.upsert_field_name("known_field");
+        default_builder.finish();
+        let metadata_bytes = default_builder.into_inner();
 
         // Use the metadata to build new variant values
         let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
diff --git a/parquet-variant/src/builder/object.rs 
b/parquet-variant/src/builder/object.rs
index 876c2e2d4c..670e3f7d76 100644
--- a/parquet-variant/src/builder/object.rs
+++ b/parquet-variant/src/builder/object.rs
@@ -431,6 +431,7 @@ impl<S: BuilderSpecificState> VariantBuilderExt for 
ObjectFieldBuilder<'_, '_, '
 mod tests {
     use crate::{
         ParentState, ValueBuilder, Variant, VariantBuilder, VariantMetadata,
+        WritableMetadataBuilder,
         builder::{metadata::ReadOnlyMetadataBuilder, object::ObjectBuilder},
         decoder::VariantBasicType,
     };
@@ -501,11 +502,12 @@ mod tests {
     #[test]
     fn test_read_only_metadata_builder() {
         // First create some metadata with a few field names
-        let mut default_builder = VariantBuilder::new();
-        default_builder.add_field_name("name");
-        default_builder.add_field_name("age");
-        default_builder.add_field_name("active");
-        let (metadata_bytes, _) = default_builder.finish();
+        let mut default_builder = WritableMetadataBuilder::default();
+        default_builder.upsert_field_name("name");
+        default_builder.upsert_field_name("age");
+        default_builder.upsert_field_name("active");
+        default_builder.finish();
+        let metadata_bytes = default_builder.into_inner();
 
         // Use the metadata to build new variant values
         let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
@@ -899,7 +901,8 @@ mod tests {
         inner_list.finish();
         outer_list.finish();
 
-        // Valid object should succeed
+        // Valid object should succeed (fresh builder — one top-level value 
per VariantBuilder)
+        let mut builder = 
VariantBuilder::new().with_validate_unique_fields(true);
         let mut list = builder.new_list();
         let mut valid_obj = list.new_object();
         valid_obj.insert("m", 1);
diff --git a/parquet-variant/tests/variant_interop.rs 
b/parquet-variant/tests/variant_interop.rs
index 70b42e1f3c..4c7c10d51a 100644
--- a/parquet-variant/tests/variant_interop.rs
+++ b/parquet-variant/tests/variant_interop.rs
@@ -466,11 +466,12 @@ fn generate_random_value(rng: &mut StdRng, builder: &mut 
VariantBuilder, max_dep
             )
             .unwrap();
 
-            // timestamp w/o timezone
-            builder.append_value(data_time.naive_local());
-
-            // timestamp with timezone
-            builder.append_value(data_time.naive_utc().and_utc());
+            // randomly pick timestamp with or without timezone
+            if rng.random_bool(0.5) {
+                builder.append_value(data_time.naive_local());
+            } else {
+                builder.append_value(data_time.naive_utc().and_utc());
+            }
         }
         17 => {
             builder.append_value(Uuid::new_v4());

Reply via email to