This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new ebb6ede98b [Variant]: Implement `DataType::RunEndEncoded` support for 
`cast_to_variant` kernel (#8174)
ebb6ede98b is described below

commit ebb6ede98b2b4d96a1a4f501a28ab42a3b937f73
Author: Liam Bao <[email protected]>
AuthorDate: Wed Aug 20 15:09:13 2025 -0400

    [Variant]: Implement `DataType::RunEndEncoded` support for 
`cast_to_variant` kernel (#8174)
    
    # Which issue does this PR close?
    
    - Closes #8064.
    
    # Rationale for this change
    
    # What changes are included in this PR?
    
    Implement `DataType::RunEndEncoded` for `cast_to_variant`
    
    # Are these changes tested?
    
    Yes
    
    # Are there any user-facing changes?
    
    New cast type supported
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 parquet-variant-compute/src/cast_to_variant.rs | 113 +++++++++++++++++++++++--
 1 file changed, 106 insertions(+), 7 deletions(-)

diff --git a/parquet-variant-compute/src/cast_to_variant.rs 
b/parquet-variant-compute/src/cast_to_variant.rs
index cdafb64b32..43ee8ccb39 100644
--- a/parquet-variant-compute/src/cast_to_variant.rs
+++ b/parquet-variant-compute/src/cast_to_variant.rs
@@ -23,10 +23,11 @@ use arrow::array::{
     TimestampSecondArray,
 };
 use arrow::datatypes::{
-    i256, BinaryType, BinaryViewType, Date32Type, Date64Type, Decimal128Type, 
Decimal256Type,
-    Decimal32Type, Decimal64Type, Float16Type, Float32Type, Float64Type, 
Int16Type, Int32Type,
-    Int64Type, Int8Type, LargeBinaryType, Time32MillisecondType, 
Time32SecondType,
-    Time64MicrosecondType, Time64NanosecondType, UInt16Type, UInt32Type, 
UInt64Type, UInt8Type,
+    i256, ArrowNativeType, BinaryType, BinaryViewType, Date32Type, Date64Type, 
Decimal128Type,
+    Decimal256Type, Decimal32Type, Decimal64Type, Float16Type, Float32Type, 
Float64Type, Int16Type,
+    Int32Type, Int64Type, Int8Type, LargeBinaryType, RunEndIndexType, 
Time32MillisecondType,
+    Time32SecondType, Time64MicrosecondType, Time64NanosecondType, UInt16Type, 
UInt32Type,
+    UInt64Type, UInt8Type,
 };
 use arrow::temporal_conversions::{
     timestamp_ms_to_datetime, timestamp_ns_to_datetime, 
timestamp_s_to_datetime,
@@ -502,6 +503,17 @@ pub fn cast_to_variant(input: &dyn Array) -> 
Result<VariantArray, ArrowError> {
                 builder
             );
         }
+        DataType::RunEndEncoded(run_ends, _) => match run_ends.data_type() {
+            DataType::Int16 => process_run_end_encoded::<Int16Type>(input, 
&mut builder)?,
+            DataType::Int32 => process_run_end_encoded::<Int32Type>(input, 
&mut builder)?,
+            DataType::Int64 => process_run_end_encoded::<Int64Type>(input, 
&mut builder)?,
+            _ => {
+                return Err(ArrowError::CastError(format!(
+                    "Unsupported run ends type: {:?}",
+                    run_ends.data_type()
+                )));
+            }
+        },
         DataType::Dictionary(_, _) => {
             let dict_array = input.as_any_dictionary();
             let values_variant_array = 
cast_to_variant(dict_array.values().as_ref())?;
@@ -532,6 +544,41 @@ pub fn cast_to_variant(input: &dyn Array) -> 
Result<VariantArray, ArrowError> {
     Ok(builder.build())
 }
 
+/// Generic function to process run-end encoded arrays
+fn process_run_end_encoded<R: RunEndIndexType>(
+    input: &dyn Array,
+    builder: &mut VariantArrayBuilder,
+) -> Result<(), ArrowError> {
+    let run_array = input.as_run::<R>();
+    let values_variant_array = cast_to_variant(run_array.values().as_ref())?;
+
+    // Process runs in batches for better performance
+    let run_ends = run_array.run_ends().values();
+    let mut logical_start = 0;
+
+    for (physical_idx, &run_end) in run_ends.iter().enumerate() {
+        let logical_end = run_end.as_usize();
+        let run_length = logical_end - logical_start;
+
+        if values_variant_array.is_null(physical_idx) {
+            // Append nulls for the entire run
+            for _ in 0..run_length {
+                builder.append_null();
+            }
+        } else {
+            // Get the value once and append it for the entire run
+            let value = values_variant_array.value(physical_idx);
+            for _ in 0..run_length {
+                builder.append_variant(value.clone());
+            }
+        }
+
+        logical_start = logical_end;
+    }
+
+    Ok(())
+}
+
 // TODO do we need a cast_with_options to allow specifying conversion behavior,
 // e.g. how to handle overflows, whether to convert to Variant::Null or return
 // an error, etc. ?
@@ -544,9 +591,9 @@ mod tests {
         Decimal256Array, Decimal32Array, Decimal64Array, DictionaryArray, 
FixedSizeBinaryBuilder,
         Float16Array, Float32Array, Float64Array, GenericByteBuilder, 
GenericByteViewBuilder,
         Int16Array, Int32Array, Int64Array, Int8Array, IntervalYearMonthArray, 
LargeStringArray,
-        NullArray, StringArray, StringViewArray, StructArray, 
Time32MillisecondArray,
-        Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray, 
UInt16Array, UInt32Array,
-        UInt64Array, UInt8Array,
+        NullArray, StringArray, StringRunBuilder, StringViewArray, StructArray,
+        Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, 
Time64NanosecondArray,
+        UInt16Array, UInt32Array, UInt64Array, UInt8Array,
     };
     use arrow::buffer::NullBuffer;
     use arrow_schema::{Field, Fields};
@@ -1847,6 +1894,58 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_cast_to_variant_run_end_encoded() {
+        let mut builder = StringRunBuilder::<Int32Type>::new();
+        builder.append_value("apple");
+        builder.append_value("apple");
+        builder.append_value("banana");
+        builder.append_value("banana");
+        builder.append_value("banana");
+        builder.append_value("cherry");
+        let run_array = builder.finish();
+
+        run_test(
+            Arc::new(run_array),
+            vec![
+                Some(Variant::from("apple")),
+                Some(Variant::from("apple")),
+                Some(Variant::from("banana")),
+                Some(Variant::from("banana")),
+                Some(Variant::from("banana")),
+                Some(Variant::from("cherry")),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_run_end_encoded_with_nulls() {
+        use arrow::array::StringRunBuilder;
+        use arrow::datatypes::Int32Type;
+
+        // Test run-end encoded array with nulls
+        let mut builder = StringRunBuilder::<Int32Type>::new();
+        builder.append_value("apple");
+        builder.append_null();
+        builder.append_value("banana");
+        builder.append_value("banana");
+        builder.append_null();
+        builder.append_null();
+        let run_array = builder.finish();
+
+        run_test(
+            Arc::new(run_array),
+            vec![
+                Some(Variant::from("apple")),
+                None,
+                Some(Variant::from("banana")),
+                Some(Variant::from("banana")),
+                None,
+                None,
+            ],
+        );
+    }
+
     #[test]
     fn test_cast_to_variant_dictionary() {
         let values = StringArray::from(vec!["apple", "banana", "cherry", 
"date"]);

Reply via email to