This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 4676c06686 [Variant] Take top-level nulls into consideration when 
extracting perfectly shredded children (#9702)
4676c06686 is described below

commit 4676c06686a5be476b5cd632493a73c803d64a04
Author: Adam Gutglick <[email protected]>
AuthorDate: Thu Apr 16 14:03:03 2026 +0100

    [Variant] Take top-level nulls into consideration when extracting perfectly 
shredded children (#9702)
    
    # Which issue does this PR close?
    
    - Closes #9701
    
    # Rationale for this change
    
    Fixes a correctness issue, where top-level nullability will be dropped
    in these cases.
    Its important to note that due to the current canonicalization behavior,
    some types (like `Binary`) actually do behave correctly, this will be
    fully addressed in #9610 where we can support more underlying types,
    which simplifies it significantly.
    
    # What changes are included in this PR?
    
    Union the nullability buffers of perfectly shredded variant children
    with the array's top-level nullability.
    
    # Are these changes tested?
    
    In addition to existing tests, add tests that verify that the nulls are
    applied, both when the child is has no-nulls and when it does.
    
    # Are there any user-facing changes?
    
    Fixes incorrect behavior
---
 parquet-variant-compute/src/variant_get.rs | 155 ++++++++++++++++++++++++++++-
 1 file changed, 151 insertions(+), 4 deletions(-)

diff --git a/parquet-variant-compute/src/variant_get.rs 
b/parquet-variant-compute/src/variant_get.rs
index 1b4b3354e2..e95c774f29 100644
--- a/parquet-variant-compute/src/variant_get.rs
+++ b/parquet-variant-compute/src/variant_get.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 use arrow::{
-    array::{self, Array, ArrayRef, StructArray},
+    array::{self, Array, ArrayRef, StructArray, make_array},
     buffer::NullBuffer,
     compute::CastOptions,
     datatypes::Field,
@@ -261,8 +261,24 @@ fn try_perfect_shredding(variant_array: &VariantArray, 
as_field: &Field) -> Opti
         // 2. If every row in the `value` column is null
 
         // This is a perfect shredding, where the value is entirely shredded 
out,
-        // so we can just return the typed value.
-        return Some(typed_value.clone());
+        // so we can just return the typed value after merging the accumulated 
nulls.
+        let parent_nulls = variant_array.nulls();
+
+        // If we have no nulls OR the shredded array is `Null`, which doesn't 
support external nulls.
+        let target_array = if parent_nulls.is_none() || 
typed_value.data_type().is_null() {
+            typed_value.clone()
+        } else {
+            let merged_nulls = NullBuffer::union(parent_nulls, 
typed_value.nulls());
+            let data = typed_value
+                .to_data()
+                .into_builder()
+                .nulls(merged_nulls)
+                .build()
+                .ok()?;
+            make_array(data)
+        };
+
+        return Some(target_array);
     }
 
     None
@@ -347,7 +363,7 @@ mod test {
         Date64Array, Decimal32Array, Decimal64Array, Decimal128Array, 
Decimal256Array,
         Float32Array, Float64Array, Int8Array, Int16Array, Int32Array, 
Int64Array,
         LargeBinaryArray, LargeListArray, LargeListViewArray, 
LargeStringArray, ListArray,
-        ListViewArray, NullBuilder, StringArray, StringViewArray, StructArray,
+        ListViewArray, NullArray, NullBuilder, StringArray, StringViewArray, 
StructArray,
         Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, 
Time64NanosecondArray,
     };
     use arrow::buffer::{NullBuffer, OffsetBuffer, ScalarBuffer};
@@ -4331,4 +4347,135 @@ mod test {
             );
         }
     }
+
+    macro_rules! perfectly_shredded_preserves_top_level_nulls_test {
+        ($name:ident, $result_type:expr, $typed_value:expr, 
$expected_array:expr) => {
+            perfectly_shredded_preserves_top_level_nulls_test!(
+                $name,
+                $result_type,
+                $typed_value,
+                Some(NullBuffer::from(vec![true, false, true])),
+                $expected_array
+            );
+        };
+        ($name:ident, $result_type:expr, $typed_value:expr, 
$parent_nulls:expr, $expected_array:expr) => {
+            #[test]
+            fn $name() {
+                let metadata = 
Arc::new(BinaryViewArray::from_iter_values(std::iter::repeat_n(
+                    EMPTY_VARIANT_METADATA_BYTES,
+                    3,
+                )));
+                let typed_value: ArrayRef = Arc::new($typed_value);
+                let variant_array: ArrayRef =
+                    VariantArray::from_parts(metadata, None, 
Some(typed_value), $parent_nulls)
+                        .into();
+
+                let result = variant_get(
+                    &variant_array,
+                    
GetOptions::new().with_as_type(Some(FieldRef::from(Field::new(
+                        "result",
+                        $result_type,
+                        true,
+                    )))),
+                )
+                .unwrap();
+
+                let expected_array: ArrayRef = Arc::new($expected_array);
+                assert_eq!(&result, &expected_array);
+            }
+        };
+    }
+
+    perfectly_shredded_preserves_top_level_nulls_test!(
+        test_variant_get_perfectly_shredded_integer_preserves_top_level_nulls,
+        DataType::Int32,
+        Int32Array::from(vec![Some(0_i32), Some(1_i32), Some(2_i32)]),
+        Int32Array::from(vec![Some(0_i32), None, Some(2_i32)])
+    );
+
+    perfectly_shredded_preserves_top_level_nulls_test!(
+        
test_variant_get_perfectly_shredded_integer_unions_child_and_top_level_nulls,
+        DataType::Int32,
+        Int32Array::from(vec![None, Some(1_i32), Some(2_i32)]),
+        Some(NullBuffer::from(vec![true, false, true])),
+        Int32Array::from(vec![None, None, Some(2_i32)])
+    );
+
+    perfectly_shredded_preserves_top_level_nulls_test!(
+        test_variant_get_perfectly_shredded_null_preserves_top_level_nulls,
+        DataType::Null,
+        NullArray::new(3),
+        NullArray::new(3)
+    );
+
+    perfectly_shredded_preserves_top_level_nulls_test!(
+        
test_variant_get_perfectly_shredded_binary_view_preserves_top_level_nulls,
+        DataType::BinaryView,
+        BinaryViewArray::from(vec![
+            Some(b"Apache" as &[u8]),
+            Some(b"masked-null" as &[u8]),
+            Some(b"Parquet-variant" as &[u8]),
+        ]),
+        BinaryViewArray::from(vec![
+            Some(b"Apache" as &[u8]),
+            None,
+            Some(b"Parquet-variant" as &[u8]),
+        ])
+    );
+
+    perfectly_shredded_preserves_top_level_nulls_test!(
+        test_variant_get_perfectly_shredded_binary_preserves_top_level_nulls,
+        DataType::Binary,
+        BinaryArray::from(vec![
+            Some(b"Apache" as &[u8]),
+            Some(b"masked-null" as &[u8]),
+            Some(b"Parquet-variant" as &[u8]),
+        ]),
+        BinaryArray::from(vec![
+            Some(b"Apache" as &[u8]),
+            None,
+            Some(b"Parquet-variant" as &[u8]),
+        ])
+    );
+
+    perfectly_shredded_preserves_top_level_nulls_test!(
+        test_variant_get_perfectly_shredded_decimal4_preserves_top_level_nulls,
+        DataType::Decimal32(5, 2),
+        Decimal32Array::from(vec![Some(12345), Some(23400), Some(-12342)])
+            .with_precision_and_scale(5, 2)
+            .unwrap(),
+        Decimal32Array::from(vec![Some(12345), None, Some(-12342)])
+            .with_precision_and_scale(5, 2)
+            .unwrap()
+    );
+
+    perfectly_shredded_preserves_top_level_nulls_test!(
+        test_variant_get_perfectly_shredded_decimal8_preserves_top_level_nulls,
+        DataType::Decimal64(10, 1),
+        Decimal64Array::from(vec![Some(1234567809), Some(1456787000), 
Some(-1234561203)])
+            .with_precision_and_scale(10, 1)
+            .unwrap(),
+        Decimal64Array::from(vec![Some(1234567809), None, Some(-1234561203)])
+            .with_precision_and_scale(10, 1)
+            .unwrap()
+    );
+
+    perfectly_shredded_preserves_top_level_nulls_test!(
+        
test_variant_get_perfectly_shredded_decimal16_preserves_top_level_nulls,
+        DataType::Decimal128(20, 3),
+        Decimal128Array::from(vec![
+            Some(i128::from_str("12345678901234567899").unwrap()),
+            Some(i128::from_str("23445677483748324300").unwrap()),
+            Some(i128::from_str("-12345678901234567899").unwrap()),
+        ])
+        .with_precision_and_scale(20, 3)
+        .unwrap(),
+        Decimal128Array::from(vec![
+            Some(i128::from_str("12345678901234567899").unwrap()),
+            None,
+            Some(i128::from_str("-12345678901234567899").unwrap()),
+        ])
+        .with_precision_and_scale(20, 3)
+        .unwrap()
+    );
 }

Reply via email to