This is an automated email from the ASF dual-hosted git repository.

jeffreyvo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 3a019d0104 fix: take_run return empty array instead of panic. (#9182)
3a019d0104 is described below

commit 3a019d0104668146c44c9da4dca0c5aed0a5324c
Author: Thor <[email protected]>
AuthorDate: Fri Jan 16 22:31:39 2026 -0600

    fix: take_run return empty array instead of panic. (#9182)
    
    If empty indices were passed into take_run it would panic with an index
    out of bounds error. Instead of panicing return an empty array.
    
    This was happening when a list array with empty entries were taken.
    
    # Which issue does this PR close?
    
    - Closes #9179.
    
    # Rationale for this change
    
    Causing unexpected panics to occur and is different behavior than
    `take_impl` for other types.
    
    # What changes are included in this PR?
    
    Check to return an empty run array if no indices are given for take, and
    a unit test that validates this behavior.
    
    # Are these changes tested?
    
    Yes
    <!--
    We typically require tests for all PRs in order to:
    1. Prevent the code from being accidentally broken by subsequent changes
    2. Serve as another way to document the expected behavior of the code
    
    If tests are not included in your PR, please explain why (for example,
    are they covered by existing tests)?
    -->
    
    # Are there any user-facing changes?
    
    No
    <!--
    If there are user-facing changes then we may require documentation to be
    updated before approving the PR.
    
    If there are any breaking changes to public APIs, please call them out.
    -->
---
 arrow-data/src/data.rs   | 63 +++++++++++++++++++++++++++---------------------
 arrow-select/src/take.rs | 27 +++++++++++++++++++++
 2 files changed, 62 insertions(+), 28 deletions(-)

diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs
index 4917691e23..21cf4e5b5e 100644
--- a/arrow-data/src/data.rs
+++ b/arrow-data/src/data.rs
@@ -699,34 +699,41 @@ impl ArrayData {
                     (buffers, children, false)
                 }
                 DataType::RunEndEncoded(r, v) => {
-                    let runs = match r.data_type() {
-                        DataType::Int16 => {
-                            let i = i16::from_usize(len).expect("run 
overflow");
-                            Buffer::from_slice_ref([i])
-                        }
-                        DataType::Int32 => {
-                            let i = i32::from_usize(len).expect("run 
overflow");
-                            Buffer::from_slice_ref([i])
-                        }
-                        DataType::Int64 => {
-                            let i = i64::from_usize(len).expect("run 
overflow");
-                            Buffer::from_slice_ref([i])
-                        }
-                        dt => unreachable!("Invalid run ends data type {dt}"),
-                    };
-
-                    let builder = ArrayData::builder(r.data_type().clone())
-                        .len(1)
-                        .buffers(vec![runs]);
-
-                    // SAFETY:
-                    // Valid by construction
-                    let runs = unsafe { builder.build_unchecked() };
-                    (
-                        vec![],
-                        vec![runs, ArrayData::new_null(v.data_type(), 1)],
-                        false,
-                    )
+                    if len == 0 {
+                        // For empty arrays, create zero-length child arrays.
+                        let runs = ArrayData::new_empty(r.data_type());
+                        let values = ArrayData::new_empty(v.data_type());
+                        (vec![], vec![runs, values], false)
+                    } else {
+                        let runs = match r.data_type() {
+                            DataType::Int16 => {
+                                let i = i16::from_usize(len).expect("run 
overflow");
+                                Buffer::from_slice_ref([i])
+                            }
+                            DataType::Int32 => {
+                                let i = i32::from_usize(len).expect("run 
overflow");
+                                Buffer::from_slice_ref([i])
+                            }
+                            DataType::Int64 => {
+                                let i = i64::from_usize(len).expect("run 
overflow");
+                                Buffer::from_slice_ref([i])
+                            }
+                            dt => unreachable!("Invalid run ends data type 
{dt}"),
+                        };
+
+                        let builder = ArrayData::builder(r.data_type().clone())
+                            .len(1)
+                            .buffers(vec![runs]);
+
+                        // SAFETY:
+                        // Valid by construction
+                        let runs = unsafe { builder.build_unchecked() };
+                        (
+                            vec![],
+                            vec![runs, ArrayData::new_null(v.data_type(), 1)],
+                            false,
+                        )
+                    }
                 }
                 // Handled by Some(width) branch above
                 DataType::Int8
diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs
index 1961a604d9..3e34e794f1 100644
--- a/arrow-select/src/take.rs
+++ b/arrow-select/src/take.rs
@@ -214,6 +214,9 @@ fn take_impl<IndexType: ArrowPrimitiveType>(
     values: &dyn Array,
     indices: &PrimitiveArray<IndexType>,
 ) -> Result<ArrayRef, ArrowError> {
+    if indices.is_empty() {
+        return Ok(new_empty_array(values.data_type()));
+    }
     downcast_primitive_array! {
         values => Ok(Arc::new(take_primitive(values, indices)?)),
         DataType::Boolean => {
@@ -2682,4 +2685,28 @@ mod tests {
             Err(ArrowError::OffsetOverflowError(_))
         ));
     }
+
+    #[test]
+    fn test_take_run_empty_indices() {
+        let mut builder = PrimitiveRunBuilder::<Int32Type, Int32Type>::new();
+        builder.extend([Some(1), Some(1), Some(2), Some(2)]);
+        let run_array = builder.finish();
+
+        let logical_indices: PrimitiveArray<Int32Type> = 
PrimitiveArray::from(Vec::<i32>::new());
+
+        let result = take_impl(&run_array, &logical_indices).expect("take_run 
with empty indices");
+
+        // Verify the result is a valid empty RunArray
+        assert_eq!(result.len(), 0);
+        assert_eq!(result.null_count(), 0);
+
+        // Verify that the result can be downcast and used without validation 
errors
+        // This specifically tests that "The values in run_ends array should 
be strictly positive" is not triggered
+        let run_result = result
+            .as_any()
+            .downcast_ref::<RunArray<Int32Type>>()
+            .expect("result should be a RunArray");
+        assert_eq!(run_result.run_ends().len(), 0);
+        assert_eq!(run_result.values().len(), 0);
+    }
 }

Reply via email to