vegarsti commented on code in PR #8589:
URL: https://github.com/apache/arrow-rs/pull/8589#discussion_r2462633576
##########
arrow-cast/src/cast/run_array.rs:
##########
@@ -0,0 +1,262 @@
+use crate::cast::*;
+use arrow_ord::partition::partition;
+
+/// Attempts to cast a Run-End Encoded array to another type, handling both
REE-to-REE
+/// and REE-to-other type conversions with proper validation and error
handling.
+///
+/// # Arguments
+/// * `array` - The input Run-End Encoded array to be cast
+/// * `to_type` - The target data type for the casting operation
+/// * `cast_options` - Options controlling the casting behavior (e.g., safe vs
unsafe)
+///
+/// # Returns
+/// A `Result` containing the new `ArrayRef` or an `ArrowError` if casting
fails
+///
+/// # Behavior
+/// This function handles two main casting scenarios:
+///
+/// ## Case 1: REE-to-REE Casting
+/// When casting to another Run-End Encoded type:
+/// - Casts both the `values` and `run_ends` to their target types
+/// - Validates that run-end casting only allows upcasts (Int16→Int32,
Int16→Int64, Int32→Int64)
+/// - Preserves the REE structure while updating both fields
+/// - Returns a new `RunArray` with the appropriate run-end type (Int16,
Int32, or Int64)
+///
+/// ## Case 2: REE-to-Other Casting
+/// When casting to a non-REE type:
+/// - Expands the REE array to its logical form by unpacking all values
+/// - Applies the target type casting to the expanded array
+/// - Returns a regular array of the target type (e.g., StringArray,
Int64Array)
+///
+/// # Error Handling, error occurs if:
+/// - the input array is not a Run-End Encoded array
+/// - run-end downcasting would cause overflow
+/// - the target run-end type is unsupported
+/// - Propagates errors from underlying casting operations
+///
+/// # Safety Considerations
+/// - Run-end casting uses `safe: false` to prevent silent overflow
+/// - Only upcasts are allowed for run-ends to maintain valid REE structure
+/// - Unpacking preserves null values and array length
+/// - Type validation ensures only supported run-end types (Int16, Int32,
Int64)
+///
+/// # Performance Notes
+/// - REE-to-REE casting is efficient as it operates on the compressed
structure
+/// - REE-to-other casting requires full unpacking, which may be expensive for
large arrays
+/// - Run-end validation adds minimal overhead for safety
+pub(crate) fn run_end_encoded_cast<K: RunEndIndexType>(
+ array: &dyn Array,
+ to_type: &DataType,
+ cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+ match array.data_type() {
+ DataType::RunEndEncoded(_, _) => {
+ let run_array = array
+ .as_any()
+ .downcast_ref::<RunArray<K>>()
+ .ok_or_else(|| ArrowError::CastError("Expected
RunArray".to_string()))?;
+
+ let values = run_array.values();
+
+ match to_type {
+ // CASE 1: Stay as RunEndEncoded, cast only the values
+ DataType::RunEndEncoded(target_index_field,
target_value_field) => {
+ let cast_values =
+ cast_with_options(values,
target_value_field.data_type(), cast_options)?;
+
+ let run_ends_array = PrimitiveArray::<K>::from_iter_values(
+ run_array.run_ends().values().iter().copied(),
+ );
+ let cast_run_ends = cast_with_options(
+ &run_ends_array,
+ target_index_field.data_type(),
+ cast_options,
Review Comment:
17f4f6f
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]