This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new df8b38ef41 [Variant] Add constants for empty variant metadata (#8359)
df8b38ef41 is described below

commit df8b38ef41e742fb5f3d492954ee404364eac212
Author: Ryan Johnson <scov...@users.noreply.github.com>
AuthorDate: Tue Sep 16 12:02:45 2025 -0600

    [Variant] Add constants for empty variant metadata (#8359)
    
    # Which issue does this PR close?
    
    We generally require a GitHub issue to be filed for all bug fixes and
    enhancements and this helps us generate change logs for our releases.
    You can link an issue to this PR using the GitHub syntax.
    
    - Closes #NNN.
    
    # Rationale for this change
    
    Variant metadata only "matters" for variant values that contain objects.
    Especially in unit tests, it is common for a given variant value to have
    an empty variant metadata -- often one created separately and replicated
    across many rows.
    
    # What changes are included in this PR?
    
    Define new constants, `EMPTY_VARIANT_METADATA_BYTES` and
    `EMPTY_VARIANT_METADATA`, which are exactly what they sound like.
    
    # Are these changes tested?
    
    New doc tests, and several unit tests were updated to use it as well.
    
    # Are there any user-facing changes?
    
    New constants
---
 parquet-variant-compute/src/variant_get.rs | 17 +++++++--------
 parquet-variant/src/variant.rs             |  2 +-
 parquet-variant/src/variant/metadata.rs    | 33 ++++++++++++++++++++++++++++++
 3 files changed, 43 insertions(+), 9 deletions(-)

diff --git a/parquet-variant-compute/src/variant_get.rs 
b/parquet-variant-compute/src/variant_get.rs
index 44c3ebbbc0..a5819fc459 100644
--- a/parquet-variant-compute/src/variant_get.rs
+++ b/parquet-variant-compute/src/variant_get.rs
@@ -305,7 +305,7 @@ mod test {
     use arrow::buffer::NullBuffer;
     use arrow::compute::CastOptions;
     use arrow_schema::{DataType, Field, FieldRef, Fields};
-    use parquet_variant::{Variant, VariantPath};
+    use parquet_variant::{Variant, VariantPath, EMPTY_VARIANT_METADATA_BYTES};
 
     use crate::json_to_variant;
     use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder};
@@ -702,8 +702,10 @@ mod test {
             fn $func() -> ArrayRef {
                 // At the time of writing, the `VariantArrayBuilder` does not 
support shredding.
                 // so we must construct the array manually.  see 
https://github.com/apache/arrow-rs/issues/7895
-                let (metadata, _value) = { 
parquet_variant::VariantBuilder::new().finish() };
-                let metadata = 
BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 3));
+                let metadata = 
BinaryViewArray::from_iter_values(std::iter::repeat_n(
+                    EMPTY_VARIANT_METADATA_BYTES,
+                    3,
+                ));
                 let typed_value = $array_type::from(vec![
                     Some(<$primitive_type>::try_from(1u8).unwrap()),
                     Some(<$primitive_type>::try_from(2u8).unwrap()),
@@ -1033,8 +1035,6 @@ mod test {
     /// }
     /// ```
     fn all_null_variant_array() -> ArrayRef {
-        let (metadata, _value) = { 
parquet_variant::VariantBuilder::new().finish() };
-
         let nulls = NullBuffer::from(vec![
             false, // row 0 is null
             false, // row 1 is null
@@ -1042,7 +1042,8 @@ mod test {
         ]);
 
         // metadata is the same for all rows (though they're all null)
-        let metadata = 
BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 3));
+        let metadata =
+            
BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES,
 3));
 
         let struct_array = StructArrayBuilder::new()
             .with_field("metadata", Arc::new(metadata), false)
@@ -2503,8 +2504,8 @@ mod test {
             .build();
 
         // Build final VariantArray with top-level nulls
-        let (metadata, _) = parquet_variant::VariantBuilder::new().finish();
-        let metadata_array = 
BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4));
+        let metadata_array =
+            
BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES,
 4));
         let nulls = NullBuffer::from(vec![
             true,  // row 0: inner struct exists with typed_value=42
             true,  // row 1: inner field NULL
diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs
index 3dae4daa0f..cc4c3bcadd 100644
--- a/parquet-variant/src/variant.rs
+++ b/parquet-variant/src/variant.rs
@@ -17,7 +17,7 @@
 
 pub use self::decimal::{VariantDecimal16, VariantDecimal4, VariantDecimal8};
 pub use self::list::VariantList;
-pub use self::metadata::VariantMetadata;
+pub use self::metadata::{VariantMetadata, EMPTY_VARIANT_METADATA, 
EMPTY_VARIANT_METADATA_BYTES};
 pub use self::object::VariantObject;
 use crate::decoder::{
     self, get_basic_type, get_primitive_type, VariantBasicType, 
VariantPrimitiveType,
diff --git a/parquet-variant/src/variant/metadata.rs 
b/parquet-variant/src/variant/metadata.rs
index 1c9da6bcc0..941247c9f2 100644
--- a/parquet-variant/src/variant/metadata.rs
+++ b/parquet-variant/src/variant/metadata.rs
@@ -141,6 +141,39 @@ pub struct VariantMetadata<'m> {
 // could increase the size of Variant. All those size increases could hurt 
performance.
 const _: () = crate::utils::expect_size_of::<VariantMetadata>(32);
 
+/// The canonical byte slice corresponding to an empty metadata dictionary.
+///
+/// ```
+/// # use parquet_variant::{EMPTY_VARIANT_METADATA_BYTES, VariantMetadata, 
WritableMetadataBuilder};
+/// let mut metadata_builder = WritableMetadataBuilder::default();
+/// metadata_builder.finish();
+/// let metadata_bytes = metadata_builder.into_inner();
+/// assert_eq!(&metadata_bytes, EMPTY_VARIANT_METADATA_BYTES);
+/// ```
+pub const EMPTY_VARIANT_METADATA_BYTES: &[u8] = &[1, 0, 0];
+
+/// The empty metadata dictionary.
+///
+/// ```
+/// # use parquet_variant::{EMPTY_VARIANT_METADATA, VariantMetadata, 
WritableMetadataBuilder};
+/// let mut metadata_builder = WritableMetadataBuilder::default();
+/// metadata_builder.finish();
+/// let metadata_bytes = metadata_builder.into_inner();
+/// let empty_metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
+/// assert_eq!(empty_metadata, EMPTY_VARIANT_METADATA);
+/// ```
+pub const EMPTY_VARIANT_METADATA: VariantMetadata = VariantMetadata {
+    bytes: EMPTY_VARIANT_METADATA_BYTES,
+    header: VariantMetadataHeader {
+        version: CORRECT_VERSION_VALUE,
+        is_sorted: false,
+        offset_size: OffsetSizeBytes::One,
+    },
+    dictionary_size: 0,
+    first_value_byte: 3,
+    validated: true,
+};
+
 impl<'m> VariantMetadata<'m> {
     /// Attempts to interpret `bytes` as a variant metadata instance, with 
full [validation] of all
     /// dictionary entries.

Reply via email to