This is an automated email from the ASF dual-hosted git repository. alamb pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push: new df8b38ef41 [Variant] Add constants for empty variant metadata (#8359) df8b38ef41 is described below commit df8b38ef41e742fb5f3d492954ee404364eac212 Author: Ryan Johnson <scov...@users.noreply.github.com> AuthorDate: Tue Sep 16 12:02:45 2025 -0600 [Variant] Add constants for empty variant metadata (#8359) # Which issue does this PR close? We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. - Closes #NNN. # Rationale for this change Variant metadata only "matters" for variant values that contain objects. Especially in unit tests, it is common for a given variant value to have an empty variant metadata -- often one created separately and replicated across many rows. # What changes are included in this PR? Define new constants, `EMPTY_VARIANT_METADATA_BYTES` and `EMPTY_VARIANT_METADATA`, which are exactly what they sound like. # Are these changes tested? New doc tests, and several unit tests were updated to use it as well. # Are there any user-facing changes? New constants --- parquet-variant-compute/src/variant_get.rs | 17 +++++++-------- parquet-variant/src/variant.rs | 2 +- parquet-variant/src/variant/metadata.rs | 33 ++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 44c3ebbbc0..a5819fc459 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -305,7 +305,7 @@ mod test { use arrow::buffer::NullBuffer; use arrow::compute::CastOptions; use arrow_schema::{DataType, Field, FieldRef, Fields}; - use parquet_variant::{Variant, VariantPath}; + use parquet_variant::{Variant, VariantPath, EMPTY_VARIANT_METADATA_BYTES}; use crate::json_to_variant; use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder}; @@ -702,8 +702,10 @@ mod test { fn $func() -> ArrayRef { // At the time of writing, the `VariantArrayBuilder` does not support shredding. // so we must construct the array manually. see https://github.com/apache/arrow-rs/issues/7895 - let (metadata, _value) = { parquet_variant::VariantBuilder::new().finish() }; - let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 3)); + let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n( + EMPTY_VARIANT_METADATA_BYTES, + 3, + )); let typed_value = $array_type::from(vec![ Some(<$primitive_type>::try_from(1u8).unwrap()), Some(<$primitive_type>::try_from(2u8).unwrap()), @@ -1033,8 +1035,6 @@ mod test { /// } /// ``` fn all_null_variant_array() -> ArrayRef { - let (metadata, _value) = { parquet_variant::VariantBuilder::new().finish() }; - let nulls = NullBuffer::from(vec![ false, // row 0 is null false, // row 1 is null @@ -1042,7 +1042,8 @@ mod test { ]); // metadata is the same for all rows (though they're all null) - let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 3)); + let metadata = + BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, 3)); let struct_array = StructArrayBuilder::new() .with_field("metadata", Arc::new(metadata), false) @@ -2503,8 +2504,8 @@ mod test { .build(); // Build final VariantArray with top-level nulls - let (metadata, _) = parquet_variant::VariantBuilder::new().finish(); - let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4)); + let metadata_array = + BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, 4)); let nulls = NullBuffer::from(vec![ true, // row 0: inner struct exists with typed_value=42 true, // row 1: inner field NULL diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index 3dae4daa0f..cc4c3bcadd 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -17,7 +17,7 @@ pub use self::decimal::{VariantDecimal16, VariantDecimal4, VariantDecimal8}; pub use self::list::VariantList; -pub use self::metadata::VariantMetadata; +pub use self::metadata::{VariantMetadata, EMPTY_VARIANT_METADATA, EMPTY_VARIANT_METADATA_BYTES}; pub use self::object::VariantObject; use crate::decoder::{ self, get_basic_type, get_primitive_type, VariantBasicType, VariantPrimitiveType, diff --git a/parquet-variant/src/variant/metadata.rs b/parquet-variant/src/variant/metadata.rs index 1c9da6bcc0..941247c9f2 100644 --- a/parquet-variant/src/variant/metadata.rs +++ b/parquet-variant/src/variant/metadata.rs @@ -141,6 +141,39 @@ pub struct VariantMetadata<'m> { // could increase the size of Variant. All those size increases could hurt performance. const _: () = crate::utils::expect_size_of::<VariantMetadata>(32); +/// The canonical byte slice corresponding to an empty metadata dictionary. +/// +/// ``` +/// # use parquet_variant::{EMPTY_VARIANT_METADATA_BYTES, VariantMetadata, WritableMetadataBuilder}; +/// let mut metadata_builder = WritableMetadataBuilder::default(); +/// metadata_builder.finish(); +/// let metadata_bytes = metadata_builder.into_inner(); +/// assert_eq!(&metadata_bytes, EMPTY_VARIANT_METADATA_BYTES); +/// ``` +pub const EMPTY_VARIANT_METADATA_BYTES: &[u8] = &[1, 0, 0]; + +/// The empty metadata dictionary. +/// +/// ``` +/// # use parquet_variant::{EMPTY_VARIANT_METADATA, VariantMetadata, WritableMetadataBuilder}; +/// let mut metadata_builder = WritableMetadataBuilder::default(); +/// metadata_builder.finish(); +/// let metadata_bytes = metadata_builder.into_inner(); +/// let empty_metadata = VariantMetadata::try_new(&metadata_bytes).unwrap(); +/// assert_eq!(empty_metadata, EMPTY_VARIANT_METADATA); +/// ``` +pub const EMPTY_VARIANT_METADATA: VariantMetadata = VariantMetadata { + bytes: EMPTY_VARIANT_METADATA_BYTES, + header: VariantMetadataHeader { + version: CORRECT_VERSION_VALUE, + is_sorted: false, + offset_size: OffsetSizeBytes::One, + }, + dictionary_size: 0, + first_value_byte: 3, + validated: true, +}; + impl<'m> VariantMetadata<'m> { /// Attempts to interpret `bytes` as a variant metadata instance, with full [validation] of all /// dictionary entries.