alexanderbianchi commented on code in PR #7713:
URL: https://github.com/apache/arrow-rs/pull/7713#discussion_r2159223364


##########
arrow-cast/src/cast/run_array.rs:
##########
@@ -0,0 +1,357 @@
+use crate::cast::*;
+
+pub(crate) fn run_end_encoded_cast<K: RunEndIndexType>(
+    array: &dyn Array,
+    to_type: &DataType,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+    match array.data_type() {
+        DataType::RunEndEncoded(_run_end_field, _values_field) => {
+            let run_array = 
array.as_any().downcast_ref::<RunArray<K>>().unwrap();
+
+            let values = run_array.values();
+
+            // Cast the values to the target type
+            let cast_values = cast_with_options(values, to_type, 
cast_options)?;
+
+            // Create a PrimitiveArray from the run_ends buffer
+            let run_ends_buffer = run_array.run_ends();
+            let run_ends_array =
+                
PrimitiveArray::<K>::from_iter_values(run_ends_buffer.values().iter().copied());
+
+            // Create new RunArray with the same run_ends but cast values
+            let new_run_array = RunArray::<K>::try_new(&run_ends_array, 
cast_values.as_ref())?;
+
+            Ok(Arc::new(new_run_array))
+        }
+        _ => Err(ArrowError::CastError(format!(
+            "Cannot cast array of type {:?} to RunEndEncodedArray",
+            array.data_type()
+        ))),
+    }
+}
+
+/// Attempts to cast an array to a RunEndEncoded array with the specified 
index type K
+/// and value type. This function performs run-length encoding on the input 
array.
+///
+/// # Arguments
+/// * `array` - The input array to be run-length encoded
+/// * `value_type` - The target data type for the values in the RunEndEncoded 
array
+/// * `cast_options` - Options controlling the casting behavior
+///
+/// # Returns
+/// A `Result` containing the new `RunArray` or an `ArrowError` if casting 
fails
+///
+/// # Process
+/// 1. Cast the input array to the target value type if needed
+/// 2. Iterate through the array to identify runs of consecutive equal values
+/// 3. Build run_ends array indicating where each run terminates
+/// 4. Build values array containing the unique values for each run
+/// 5. Construct and return the RunArray
+pub(crate) fn cast_to_run_end_encoded<K: RunEndIndexType>(
+    array: &dyn Array,
+    value_type: &DataType,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+    // Step 1: Cast the input array to the target value type if necessary
+    let cast_array = if array.data_type() == value_type {
+        // No casting needed, use the array as-is
+        make_array(array.to_data())
+    } else {
+        // Cast to the target value type
+        cast_with_options(array, value_type, cast_options)?
+    };
+
+    // Step 2: Run-length encode the cast array
+    // We'll use a builder to construct the RunArray efficiently
+    let mut run_ends_builder = PrimitiveBuilder::<K>::new();
+
+    if cast_array.len() == 0 {
+        // Handle empty array case
+        let empty_run_ends = run_ends_builder.finish();
+        let empty_values = make_array(ArrayData::new_empty(value_type));
+        return Ok(Arc::new(RunArray::<K>::try_new(
+            &empty_run_ends,
+            empty_values.as_ref(),
+        )?));
+    }
+
+    // Step 3: Use a simpler approach - use existing Arrow builders for 
run-length encoding

Review Comment:
   nit:run-end



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to