This is an automated email from the ASF dual-hosted git repository. alamb pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push: new 7ac9db7f62 [Variant] [Shredding] feat: Support typed_access for Date32 (#8379) 7ac9db7f62 is described below commit 7ac9db7f62946bda26344dc2cdb0292a31996abd Author: Li Jiaying <76034984+pinkcrow...@users.noreply.github.com> AuthorDate: Fri Sep 19 15:04:12 2025 -0400 [Variant] [Shredding] feat: Support typed_access for Date32 (#8379) # Which issue does this PR close? - Closes #8330. # Rationale for this change # What changes are included in this PR? # Are these changes tested? Yes # Are there any user-facing changes? N/A --- parquet-variant-compute/src/variant_array.rs | 10 +++- parquet-variant-compute/src/variant_get.rs | 73 ++++++++++++++++++++++++++-- parquet/tests/variant_integration.rs | 5 +- 3 files changed, 80 insertions(+), 8 deletions(-) diff --git a/parquet-variant-compute/src/variant_array.rs b/parquet-variant-compute/src/variant_array.rs index 4abffa65c2..faaa1611ef 100644 --- a/parquet-variant-compute/src/variant_array.rs +++ b/parquet-variant-compute/src/variant_array.rs @@ -21,8 +21,8 @@ use crate::type_conversion::primitive_conversion_single_value; use arrow::array::{Array, ArrayData, ArrayRef, AsArray, BinaryViewArray, StructArray}; use arrow::buffer::NullBuffer; use arrow::datatypes::{ - Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, - UInt32Type, UInt64Type, UInt8Type, + Date32Type, Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, + UInt16Type, UInt32Type, UInt64Type, UInt8Type, }; use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields}; use parquet_variant::Uuid; @@ -556,6 +556,12 @@ fn typed_value_to_variant(typed_value: &ArrayRef, index: usize) -> Variant<'_, ' let value = boolean_array.value(index); Variant::from(value) } + DataType::Date32 => { + let array = typed_value.as_primitive::<Date32Type>(); + let value = array.value(index); + let date = Date32Type::to_naive_date(value); + Variant::from(date) + } DataType::FixedSizeBinary(binary_len) => { let array = typed_value.as_fixed_size_binary(); // Try to treat 16 byte FixedSizeBinary as UUID diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 0e11168516..8bb34166ae 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -303,9 +303,9 @@ mod test { use std::sync::Arc; use arrow::array::{ - Array, ArrayRef, BinaryViewArray, Float16Array, Float32Array, Float64Array, Int16Array, - Int32Array, Int64Array, Int8Array, StringArray, StructArray, UInt16Array, UInt32Array, - UInt64Array, UInt8Array, + Array, ArrayRef, BinaryViewArray, Date32Array, Float16Array, Float32Array, Float64Array, + Int16Array, Int32Array, Int64Array, Int8Array, StringArray, StructArray, UInt16Array, + UInt32Array, UInt64Array, UInt8Array, }; use arrow::buffer::NullBuffer; use arrow::compute::CastOptions; @@ -531,6 +531,26 @@ mod test { assert_eq!(result.value(3), Variant::from("world")); } + #[test] + fn get_variant_partially_shredded_date32_as_variant() { + let array = partially_shredded_date32_variant_array(); + let options = GetOptions::new(); + let result = variant_get(&array, options).unwrap(); + + // expect the result is a VariantArray + let result: &VariantArray = result.as_any().downcast_ref().unwrap(); + assert_eq!(result.len(), 4); + + // Expect the values are the same as the original values + use chrono::NaiveDate; + let date1 = NaiveDate::from_ymd_opt(2025, 9, 17).unwrap(); + let date2 = NaiveDate::from_ymd_opt(2025, 9, 9).unwrap(); + assert_eq!(result.value(0), Variant::from(date1)); + assert!(!result.is_valid(1)); + assert_eq!(result.value(2), Variant::from("n/a")); + assert_eq!(result.value(3), Variant::from(date2)); + } + #[test] fn get_variant_partially_shredded_binary_view_as_variant() { let array = partially_shredded_binary_view_variant_array(); @@ -1143,6 +1163,53 @@ mod test { ) } + /// Return a VariantArray that represents a partially "shredded" variant for Date32 + fn partially_shredded_date32_variant_array() -> ArrayRef { + let (metadata, string_value) = { + let mut builder = parquet_variant::VariantBuilder::new(); + builder.append_value("n/a"); + builder.finish() + }; + + // Create the null buffer for the overall array + let nulls = NullBuffer::from(vec![ + true, // row 0 non null + false, // row 1 is null + true, // row 2 non null + true, // row 3 non null + ]); + + // metadata is the same for all rows + let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4)); + + // See https://docs.google.com/document/d/1pw0AWoMQY3SjD7R4LgbPvMjG_xSCtXp3rZHkVp9jpZ4/edit?disco=AAABml8WQrY + // about why row1 is an empty but non null, value. + let values = BinaryViewArray::from(vec![ + None, // row 0 is shredded, so no value + Some(b"" as &[u8]), // row 1 is null, so empty value + Some(&string_value), // copy the string value "N/A" + None, // row 3 is shredded, so no value + ]); + + let typed_value = Date32Array::from(vec![ + Some(20348), // row 0 is shredded, 2025-09-17 + None, // row 1 is null + None, // row 2 is a string, not a date + Some(20340), // row 3 is shredded, 2025-09-09 + ]); + + let struct_array = StructArrayBuilder::new() + .with_field("metadata", Arc::new(metadata), true) + .with_field("typed_value", Arc::new(typed_value), true) + .with_field("value", Arc::new(values), true) + .with_nulls(nulls) + .build(); + + Arc::new( + VariantArray::try_new(Arc::new(struct_array)).expect("should create variant array"), + ) + } + /// Return a VariantArray that represents a partially "shredded" variant for BinaryView fn partially_shredded_binary_view_variant_array() -> ArrayRef { let (metadata, string_value) = { diff --git a/parquet/tests/variant_integration.rs b/parquet/tests/variant_integration.rs index 97fb6b8801..ebce056cc4 100644 --- a/parquet/tests/variant_integration.rs +++ b/parquet/tests/variant_integration.rs @@ -92,9 +92,8 @@ variant_test_case!(14); variant_test_case!(15); variant_test_case!(16); variant_test_case!(17); -// https://github.com/apache/arrow-rs/issues/8330 -variant_test_case!(18, "Unsupported typed_value type: Date32"); -variant_test_case!(19, "Unsupported typed_value type: Date32"); +variant_test_case!(18); +variant_test_case!(19); // https://github.com/apache/arrow-rs/issues/8331 variant_test_case!( 20,