This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new b06996bb77 [Variant] [Shredding] Support typed_access for Utf8 and 
BinaryView (#8364)
b06996bb77 is described below

commit b06996bb77ff51ddbb1a07c5d6d64fe9a0f5505c
Author: Peter Nguyen <[email protected]>
AuthorDate: Wed Sep 17 08:18:35 2025 -0700

    [Variant] [Shredding] Support typed_access for Utf8 and BinaryView (#8364)
    
    # Which issue does this PR close?
    
    We generally require a GitHub issue to be filed for all bug fixes and
    enhancements and this helps us generate change logs for our releases.
    You can link an issue to this PR using the GitHub syntax.
    
    - Closes #8333
    
    # Rationale for this change
    
    See Issue
    
    # What changes are included in this PR?
    
    Support typed_access for Utf8 and BinaryView
    
    # Are these changes tested?
    
    Yes
    
    # Are there any user-facing changes?
    
    N/A. Variant support still in development
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 .github/workflows/parquet.yml                |   3 +
 parquet-variant-compute/src/variant_array.rs |  10 +++
 parquet-variant-compute/src/variant_get.rs   | 128 +++++++++++++++++++++++++++
 parquet/tests/variant_integration.rs         |   5 +-
 4 files changed, 143 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/parquet.yml b/.github/workflows/parquet.yml
index 0d1a01ca5e..09fc18e351 100644
--- a/.github/workflows/parquet.yml
+++ b/.github/workflows/parquet.yml
@@ -42,6 +42,9 @@ on:
       - arrow-json/**
       - arrow-avro/**
       - parquet/**
+      - parquet-variant/**
+      - parquet-variant-compute/**
+      - parquet-variant-json/**
       - .github/**
 
 jobs:
diff --git a/parquet-variant-compute/src/variant_array.rs 
b/parquet-variant-compute/src/variant_array.rs
index e87d03f88c..4abffa65c2 100644
--- a/parquet-variant-compute/src/variant_array.rs
+++ b/parquet-variant-compute/src/variant_array.rs
@@ -568,6 +568,16 @@ fn typed_value_to_variant(typed_value: &ArrayRef, index: 
usize) -> Variant<'_, '
             let value = array.value(index);
             Variant::from(value)
         }
+        DataType::BinaryView => {
+            let array = typed_value.as_binary_view();
+            let value = array.value(index);
+            Variant::from(value)
+        }
+        DataType::Utf8 => {
+            let array = typed_value.as_string::<i32>();
+            let value = array.value(index);
+            Variant::from(value)
+        }
         DataType::Int8 => {
             primitive_conversion_single_value!(Int8Type, typed_value, index)
         }
diff --git a/parquet-variant-compute/src/variant_get.rs 
b/parquet-variant-compute/src/variant_get.rs
index a5819fc459..5cd3c094e2 100644
--- a/parquet-variant-compute/src/variant_get.rs
+++ b/parquet-variant-compute/src/variant_get.rs
@@ -508,6 +508,40 @@ mod test {
         assert_eq!(result.value(3), Variant::from(&[4u8, 5u8, 6u8][..]));
     }
 
+    #[test]
+    fn get_variant_partially_shredded_utf8_as_variant() {
+        let array = partially_shredded_utf8_variant_array();
+        let options = GetOptions::new();
+        let result = variant_get(&array, options).unwrap();
+
+        // expect the result is a VariantArray
+        let result: &VariantArray = result.as_any().downcast_ref().unwrap();
+        assert_eq!(result.len(), 4);
+
+        // Expect the values are the same as the original values
+        assert_eq!(result.value(0), Variant::from("hello"));
+        assert!(!result.is_valid(1));
+        assert_eq!(result.value(2), Variant::from("n/a"));
+        assert_eq!(result.value(3), Variant::from("world"));
+    }
+
+    #[test]
+    fn get_variant_partially_shredded_binary_view_as_variant() {
+        let array = partially_shredded_binary_view_variant_array();
+        let options = GetOptions::new();
+        let result = variant_get(&array, options).unwrap();
+
+        // expect the result is a VariantArray
+        let result: &VariantArray = result.as_any().downcast_ref().unwrap();
+        assert_eq!(result.len(), 4);
+
+        // Expect the values are the same as the original values
+        assert_eq!(result.value(0), Variant::from(&[1u8, 2u8, 3u8][..]));
+        assert!(!result.is_valid(1));
+        assert_eq!(result.value(2), Variant::from("n/a"));
+        assert_eq!(result.value(3), Variant::from(&[4u8, 5u8, 6u8][..]));
+    }
+
     /// Shredding: extract a value as an Int32Array
     #[test]
     fn get_variant_shredded_int32_as_int32_safe_cast() {
@@ -1018,6 +1052,100 @@ mod test {
         )
     }
 
+    /// Return a VariantArray that represents a partially "shredded" variant 
for UTF8
+    fn partially_shredded_utf8_variant_array() -> ArrayRef {
+        let (metadata, string_value) = {
+            let mut builder = parquet_variant::VariantBuilder::new();
+            builder.append_value("n/a");
+            builder.finish()
+        };
+
+        // Create the null buffer for the overall array
+        let nulls = NullBuffer::from(vec![
+            true,  // row 0 non null
+            false, // row 1 is null
+            true,  // row 2 non null
+            true,  // row 3 non null
+        ]);
+
+        // metadata is the same for all rows
+        let metadata = 
BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4));
+
+        // See 
https://docs.google.com/document/d/1pw0AWoMQY3SjD7R4LgbPvMjG_xSCtXp3rZHkVp9jpZ4/edit?disco=AAABml8WQrY
+        // about why row1 is an empty but non null, value.
+        let values = BinaryViewArray::from(vec![
+            None,                // row 0 is shredded, so no value
+            Some(b"" as &[u8]),  // row 1 is null, so empty value
+            Some(&string_value), // copy the string value "N/A"
+            None,                // row 3 is shredded, so no value
+        ]);
+
+        let typed_value = StringArray::from(vec![
+            Some("hello"), // row 0 is shredded
+            None,          // row 1 is null
+            None,          // row 2 is a string
+            Some("world"), // row 3 is shredded
+        ]);
+
+        let struct_array = StructArrayBuilder::new()
+            .with_field("metadata", Arc::new(metadata), true)
+            .with_field("typed_value", Arc::new(typed_value), true)
+            .with_field("value", Arc::new(values), true)
+            .with_nulls(nulls)
+            .build();
+
+        Arc::new(
+            VariantArray::try_new(Arc::new(struct_array)).expect("should 
create variant array"),
+        )
+    }
+
+    /// Return a VariantArray that represents a partially "shredded" variant 
for BinaryView
+    fn partially_shredded_binary_view_variant_array() -> ArrayRef {
+        let (metadata, string_value) = {
+            let mut builder = parquet_variant::VariantBuilder::new();
+            builder.append_value("n/a");
+            builder.finish()
+        };
+
+        // Create the null buffer for the overall array
+        let nulls = NullBuffer::from(vec![
+            true,  // row 0 non null
+            false, // row 1 is null
+            true,  // row 2 non null
+            true,  // row 3 non null
+        ]);
+
+        // metadata is the same for all rows
+        let metadata = 
BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4));
+
+        // See 
https://docs.google.com/document/d/1pw0AWoMQY3SjD7R4LgbPvMjG_xSCtXp3rZHkVp9jpZ4/edit?disco=AAABml8WQrY
+        // about why row1 is an empty but non null, value.
+        let values = BinaryViewArray::from(vec![
+            None,                // row 0 is shredded, so no value
+            Some(b"" as &[u8]),  // row 1 is null, so empty value
+            Some(&string_value), // copy the string value "N/A"
+            None,                // row 3 is shredded, so no value
+        ]);
+
+        let typed_value = BinaryViewArray::from(vec![
+            Some(&[1u8, 2u8, 3u8][..]), // row 0 is shredded
+            None,                       // row 1 is null
+            None,                       // row 2 is a string
+            Some(&[4u8, 5u8, 6u8][..]), // row 3 is shredded
+        ]);
+
+        let struct_array = StructArrayBuilder::new()
+            .with_field("metadata", Arc::new(metadata), true)
+            .with_field("typed_value", Arc::new(typed_value), true)
+            .with_field("value", Arc::new(values), true)
+            .with_nulls(nulls)
+            .build();
+
+        Arc::new(
+            VariantArray::try_new(Arc::new(struct_array)).expect("should 
create variant array"),
+        )
+    }
+
     /// Return a VariantArray that represents an "all null" variant
     /// for the following example (3 null values):
     ///
diff --git a/parquet/tests/variant_integration.rs 
b/parquet/tests/variant_integration.rs
index 6a586e013e..97fb6b8801 100644
--- a/parquet/tests/variant_integration.rs
+++ b/parquet/tests/variant_integration.rs
@@ -119,9 +119,8 @@ variant_test_case!(26, "Unsupported typed_value type: 
Decimal128(18, 9)");
 variant_test_case!(27, "Unsupported typed_value type: Decimal128(18, 9)");
 variant_test_case!(28, "Unsupported typed_value type: Decimal128(38, 9)");
 variant_test_case!(29, "Unsupported typed_value type: Decimal128(38, 9)");
-// https://github.com/apache/arrow-rs/issues/8333
-variant_test_case!(30, "Unsupported typed_value type: BinaryView");
-variant_test_case!(31, "Unsupported typed_value type: Utf8");
+variant_test_case!(30);
+variant_test_case!(31);
 // https://github.com/apache/arrow-rs/issues/8334
 variant_test_case!(32, "Unsupported typed_value type: Time64(Microsecond)");
 // https://github.com/apache/arrow-rs/issues/8331

Reply via email to