This is an automated email from the ASF dual-hosted git repository.

kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 376c9ad  ARROW-4854: [Rust] Use zero-copy slice for limit kernel
376c9ad is described below

commit 376c9ad63b21d872ecf9cbac221ff8b37a08b3af
Author: Neville Dipale <[email protected]>
AuthorDate: Thu Mar 21 14:29:49 2019 +0100

    ARROW-4854: [Rust] Use zero-copy slice for limit kernel
    
    Has the benefit of being faster and supporting all array types
    
    Author: Neville Dipale <[email protected]>
    
    Closes #4000 from nevi-me/ARROW-4854 and squashes the following commits:
    
    304647bc <Neville Dipale> ARROW-4854:  Use zero-copy slice for limit kernel
---
 rust/arrow/src/compute/array_ops.rs | 173 ++++++++++++++++++++++++++----------
 1 file changed, 126 insertions(+), 47 deletions(-)

diff --git a/rust/arrow/src/compute/array_ops.rs 
b/rust/arrow/src/compute/array_ops.rs
index 088661d..d9de6d7 100644
--- a/rust/arrow/src/compute/array_ops.rs
+++ b/rust/arrow/src/compute/array_ops.rs
@@ -153,6 +153,7 @@ macro_rules! filter_array {
     }};
 }
 
+/// Returns the array, taking only the elements matching the filter
 pub fn filter(array: &Array, filter: &BooleanArray) -> Result<ArrayRef> {
     match array.data_type() {
         DataType::UInt8 => filter_array!(array, filter, UInt8Array),
@@ -183,61 +184,25 @@ pub fn filter(array: &Array, filter: &BooleanArray) -> 
Result<ArrayRef> {
     }
 }
 
-macro_rules! limit_array {
-    ($array:expr, $num_elements:expr, $array_type:ident) => {{
-        let b = $array.as_any().downcast_ref::<$array_type>().unwrap();
-        let mut builder = $array_type::builder($num_elements);
-        for i in 0..$num_elements {
-            if b.is_null(i) {
-                builder.append_null()?;
-            } else {
-                builder.append_value(b.value(i))?;
-            }
-        }
-        Ok(Arc::new(builder.finish()))
-    }};
-}
-
 /// Returns the array, taking only the number of elements specified
 ///
-/// Returns the whole array if the number of elements specified is larger than 
the length
-/// of the array
+/// Limit performs a zero-copy slice of the array, and is a convenience method 
on slice
+/// where:
+/// * it performs a bounds-check on the array
+/// * it slices from offset 0
 pub fn limit(array: &ArrayRef, num_elements: usize) -> Result<ArrayRef> {
-    if num_elements >= array.len() {
-        return Ok(array.clone());
-    }
-
-    match array.data_type() {
-        DataType::UInt8 => limit_array!(array, num_elements, UInt8Array),
-        DataType::UInt16 => limit_array!(array, num_elements, UInt16Array),
-        DataType::UInt32 => limit_array!(array, num_elements, UInt32Array),
-        DataType::UInt64 => limit_array!(array, num_elements, UInt64Array),
-        DataType::Int8 => limit_array!(array, num_elements, Int8Array),
-        DataType::Int16 => limit_array!(array, num_elements, Int16Array),
-        DataType::Int32 => limit_array!(array, num_elements, Int32Array),
-        DataType::Int64 => limit_array!(array, num_elements, Int64Array),
-        DataType::Float32 => limit_array!(array, num_elements, Float32Array),
-        DataType::Float64 => limit_array!(array, num_elements, Float64Array),
-        DataType::Boolean => limit_array!(array, num_elements, BooleanArray),
-        DataType::Utf8 => {
-            let b = array.as_any().downcast_ref::<BinaryArray>().unwrap();
-            let mut values: Vec<&[u8]> = Vec::with_capacity(num_elements);
-            for i in 0..num_elements {
-                values.push(b.value(i));
-            }
-            Ok(Arc::new(BinaryArray::from(values)))
-        }
-        other => Err(ArrowError::ComputeError(format!(
-            "limit not supported for {:?}",
-            other
-        ))),
-    }
+    let lim = num_elements.min(array.len());
+    Ok(array.slice(0, lim))
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::array::{ArrayRef, Float64Array, Int32Array};
+    use crate::array::*;
+    use crate::array_data::ArrayData;
+    use crate::buffer::Buffer;
+    use crate::datatypes::{Field, ToByteSlice};
+    use crate::util::bit_util;
 
     use std::sync::Arc;
 
@@ -355,4 +320,118 @@ mod tests {
         assert_eq!(8, c.value(3));
         assert_eq!(9, c.value(4));
     }
+
+    #[test]
+    fn test_list_array_limit() {
+        // adapted from crate::array::test::test_list_array_slice
+        // Construct a value array
+        let value_data = ArrayData::builder(DataType::Int32)
+            .len(10)
+            .add_buffer(Buffer::from(
+                &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9].to_byte_slice(),
+            ))
+            .build();
+
+        // Construct a buffer for value offsets, for the nested array:
+        //  [[0, 1], null, [2, 3], null, [4, 5], null, [6, 7, 8], null, [9]]
+        let value_offsets =
+            Buffer::from(&[0, 2, 2, 4, 4, 6, 6, 9, 9, 10].to_byte_slice());
+        // 01010101 00000001
+        let mut null_bits: [u8; 2] = [0; 2];
+        bit_util::set_bit(&mut null_bits, 0);
+        bit_util::set_bit(&mut null_bits, 2);
+        bit_util::set_bit(&mut null_bits, 4);
+        bit_util::set_bit(&mut null_bits, 6);
+        bit_util::set_bit(&mut null_bits, 8);
+
+        // Construct a list array from the above two
+        let list_data_type = DataType::List(Box::new(DataType::Int32));
+        let list_data = ArrayData::builder(list_data_type.clone())
+            .len(9)
+            .add_buffer(value_offsets.clone())
+            .add_child_data(value_data.clone())
+            .null_bit_buffer(Buffer::from(null_bits))
+            .build();
+        let list_array: ArrayRef = Arc::new(ListArray::from(list_data));
+
+        let limit_array = limit(&list_array, 6).unwrap();
+        assert_eq!(6, limit_array.len());
+        assert_eq!(0, limit_array.offset());
+        assert_eq!(3, limit_array.null_count());
+
+        // Check offset and length for each non-null value.
+        let limit_array: &ListArray =
+            limit_array.as_any().downcast_ref::<ListArray>().unwrap();
+        for i in 0..limit_array.len() {
+            let offset = limit_array.value_offset(i);
+            let length = limit_array.value_length(i);
+            if i % 2 == 0 {
+                assert_eq!(2, length);
+                assert_eq!(i as i32, offset);
+            } else {
+                assert_eq!(0, length);
+            }
+        }
+    }
+
+    #[test]
+    fn test_struct_array_limit() {
+        // adapted from crate::array::test::test_struct_array_slice
+        let boolean_data = ArrayData::builder(DataType::Boolean)
+            .len(5)
+            .add_buffer(Buffer::from([0b00010000]))
+            .null_bit_buffer(Buffer::from([0b00010001]))
+            .build();
+        let int_data = ArrayData::builder(DataType::Int32)
+            .len(5)
+            .add_buffer(Buffer::from([0, 28, 42, 0, 0].to_byte_slice()))
+            .null_bit_buffer(Buffer::from([0b00000110]))
+            .build();
+
+        let mut field_types = vec![];
+        field_types.push(Field::new("a", DataType::Boolean, false));
+        field_types.push(Field::new("b", DataType::Int32, false));
+        let struct_array_data = 
ArrayData::builder(DataType::Struct(field_types))
+            .len(5)
+            .add_child_data(boolean_data.clone())
+            .add_child_data(int_data.clone())
+            .null_bit_buffer(Buffer::from([0b00010111]))
+            .build();
+        let struct_array = StructArray::from(struct_array_data);
+
+        assert_eq!(5, struct_array.len());
+        assert_eq!(1, struct_array.null_count());
+        assert_eq!(boolean_data, struct_array.column(0).data());
+        assert_eq!(int_data, struct_array.column(1).data());
+
+        let array: ArrayRef = Arc::new(struct_array);
+
+        let sliced_array = limit(&array, 3).unwrap();
+        let sliced_array = 
sliced_array.as_any().downcast_ref::<StructArray>().unwrap();
+        assert_eq!(3, sliced_array.len());
+        assert_eq!(0, sliced_array.offset());
+        assert_eq!(0, sliced_array.null_count());
+        assert!(sliced_array.is_valid(0));
+        assert!(sliced_array.is_valid(1));
+        assert!(sliced_array.is_valid(2));
+
+        let sliced_c0 = sliced_array.column(0);
+        let sliced_c0 = 
sliced_c0.as_any().downcast_ref::<BooleanArray>().unwrap();
+        assert_eq!(3, sliced_c0.len());
+        assert_eq!(0, sliced_c0.offset());
+        assert_eq!(2, sliced_c0.null_count());
+        assert!(sliced_c0.is_valid(0));
+        assert!(sliced_c0.is_null(1));
+        assert!(sliced_c0.is_null(2));
+        assert_eq!(false, sliced_c0.value(0));
+
+        let sliced_c1 = sliced_array.column(1);
+        let sliced_c1 = 
sliced_c1.as_any().downcast_ref::<Int32Array>().unwrap();
+        assert_eq!(3, sliced_c1.len());
+        assert_eq!(0, sliced_c1.offset());
+        assert_eq!(1, sliced_c1.null_count());
+        assert!(sliced_c1.is_null(0));
+        assert_eq!(28, sliced_c1.value(1));
+        assert_eq!(42, sliced_c1.value(2));
+    }
 }

Reply via email to