This is an automated email from the ASF dual-hosted git repository.

viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 20d81f578 Add FixedSizeBinaryArray::try_from_sparse_iter_with_size 
(#3054)
20d81f578 is described below

commit 20d81f5784e5609666fa3d0599f2f95e5e5a40c6
Author: Max Burke <[email protected]>
AuthorDate: Sun Nov 13 10:23:29 2022 -0800

    Add FixedSizeBinaryArray::try_from_sparse_iter_with_size (#3054)
    
    * implement ord for FixedSizeBinary types
    
    * add ord test
    
    * add compare for fixed size binary arrays
    
    * FixedSizeBinaryArray::try_from_sparse_iter has fails to generate a valid 
array when the iterator only produces None values
    
    * Tweak try_from_sparse_iter_with_size to take an Option<i32>; pass tests
    
    * simplify try_from_sparse_iter_with_size, make size parameter non-optional
    
    * add test for fixed size binary comparisons
    
    * move tests to use FixedSizeBinaryArray::from_sparse_iter_with_size, add 
docstring
    
    * format + fix failing tests
    
    * fix build
---
 arrow-array/src/array/fixed_size_binary_array.rs | 121 +++++++++++++++++++++--
 arrow-select/src/take.rs                         |   7 +-
 arrow/src/array/ffi.rs                           |   3 +-
 arrow/src/compute/kernels/comparison.rs          |  41 ++++++++
 arrow/src/compute/kernels/sort.rs                |  15 ++-
 arrow/src/compute/kernels/substring.rs           |   3 +-
 arrow/src/ffi.rs                                 |   8 +-
 arrow/src/util/bench_util.rs                     |  25 ++---
 arrow/tests/array_transform.rs                   |   9 +-
 9 files changed, 198 insertions(+), 34 deletions(-)

diff --git a/arrow-array/src/array/fixed_size_binary_array.rs 
b/arrow-array/src/array/fixed_size_binary_array.rs
index f37d1e3e5..9bac49810 100644
--- a/arrow-array/src/array/fixed_size_binary_array.rs
+++ b/arrow-array/src/array/fixed_size_binary_array.rs
@@ -129,6 +129,9 @@ impl FixedSizeBinaryArray {
     /// # Errors
     ///
     /// Returns error if argument has length zero, or sizes of nested slices 
don't match.
+    #[deprecated(
+        note = "This function will fail if the iterator produces only None 
values; prefer `try_from_sparse_iter_with_size`"
+    )]
     pub fn try_from_sparse_iter<T, U>(mut iter: T) -> Result<Self, ArrowError>
     where
         T: Iterator<Item = Option<U>>,
@@ -196,6 +199,86 @@ impl FixedSizeBinaryArray {
         Ok(FixedSizeBinaryArray::from(array_data))
     }
 
+    /// Create an array from an iterable argument of sparse byte slices.
+    /// Sparsity means that items returned by the iterator are optional, i.e 
input argument can
+    /// contain `None` items. In cases where the iterator returns only `None` 
values, this
+    /// also takes a size parameter to ensure that the a valid 
FixedSizeBinaryArray is still
+    /// created.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use arrow_array::FixedSizeBinaryArray;
+    /// let input_arg = vec![
+    ///     None,
+    ///     Some(vec![7, 8]),
+    ///     Some(vec![9, 10]),
+    ///     None,
+    ///     Some(vec![13, 14]),
+    ///     None,
+    /// ];
+    /// let array = 
FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 
2).unwrap();
+    /// ```
+    ///
+    /// # Errors
+    ///
+    /// Returns error if argument has length zero, or sizes of nested slices 
don't match.
+    pub fn try_from_sparse_iter_with_size<T, U>(
+        mut iter: T,
+        size: i32,
+    ) -> Result<Self, ArrowError>
+    where
+        T: Iterator<Item = Option<U>>,
+        U: AsRef<[u8]>,
+    {
+        let mut len = 0;
+        let mut byte = 0;
+        let mut null_buf = MutableBuffer::from_len_zeroed(0);
+        let mut buffer = MutableBuffer::from_len_zeroed(0);
+
+        iter.try_for_each(|item| -> Result<(), ArrowError> {
+            // extend null bitmask by one byte per each 8 items
+            if byte == 0 {
+                null_buf.push(0u8);
+                byte = 8;
+            }
+            byte -= 1;
+
+            if let Some(slice) = item {
+                let slice = slice.as_ref();
+                if size as usize != slice.len() {
+                    return Err(ArrowError::InvalidArgumentError(format!(
+                        "Nested array size mismatch: one is {}, and the other 
is {}",
+                        size,
+                        slice.len()
+                    )));
+                }
+
+                bit_util::set_bit(null_buf.as_slice_mut(), len);
+                buffer.extend_from_slice(slice);
+            } else {
+                buffer.extend_zeros(size as usize);
+            }
+
+            len += 1;
+
+            Ok(())
+        })?;
+
+        let array_data = unsafe {
+            ArrayData::new_unchecked(
+                DataType::FixedSizeBinary(size),
+                len,
+                None,
+                Some(null_buf.into()),
+                0,
+                vec![buffer.into()],
+                vec![],
+            )
+        };
+        Ok(FixedSizeBinaryArray::from(array_data))
+    }
+
     /// Create an array from an iterable argument of byte slices.
     ///
     /// # Examples
@@ -333,6 +416,7 @@ impl From<FixedSizeListArray> for FixedSizeBinaryArray {
 
 impl From<Vec<Option<&[u8]>>> for FixedSizeBinaryArray {
     fn from(v: Vec<Option<&[u8]>>) -> Self {
+        #[allow(deprecated)]
         Self::try_from_sparse_iter(v.into_iter()).unwrap()
     }
 }
@@ -561,6 +645,7 @@ mod tests {
     fn test_all_none_fixed_size_binary_array_from_sparse_iter() {
         let none_option: Option<[u8; 32]> = None;
         let input_arg = vec![none_option, none_option, none_option];
+        #[allow(deprecated)]
         let arr =
             
FixedSizeBinaryArray::try_from_sparse_iter(input_arg.into_iter()).unwrap();
         assert_eq!(0, arr.value_length());
@@ -576,9 +661,31 @@ mod tests {
             None,
             Some(vec![13, 14]),
         ];
-        let arr =
-            
FixedSizeBinaryArray::try_from_sparse_iter(input_arg.into_iter()).unwrap();
+        #[allow(deprecated)]
+        let arr = 
FixedSizeBinaryArray::try_from_sparse_iter(input_arg.iter().cloned())
+            .unwrap();
         assert_eq!(2, arr.value_length());
+        assert_eq!(5, arr.len());
+
+        let arr = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
+            input_arg.into_iter(),
+            2,
+        )
+        .unwrap();
+        assert_eq!(2, arr.value_length());
+        assert_eq!(5, arr.len());
+    }
+
+    #[test]
+    fn test_fixed_size_binary_array_from_sparse_iter_with_size_all_none() {
+        let input_arg = vec![None, None, None, None, None] as 
Vec<Option<Vec<u8>>>;
+
+        let arr = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
+            input_arg.into_iter(),
+            16,
+        )
+        .unwrap();
+        assert_eq!(16, arr.value_length());
         assert_eq!(5, arr.len())
     }
 
@@ -643,7 +750,9 @@ mod tests {
     #[test]
     fn fixed_size_binary_array_all_null() {
         let data = vec![None] as Vec<Option<String>>;
-        let array = 
FixedSizeBinaryArray::try_from_sparse_iter(data.into_iter()).unwrap();
+        let array =
+            
FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.into_iter(), 0)
+                .unwrap();
         array
             .data()
             .validate_full()
@@ -652,16 +761,14 @@ mod tests {
 
     #[test]
     // Test for https://github.com/apache/arrow-rs/issues/1390
-    #[should_panic(
-        expected = "column types must match schema types, expected 
FixedSizeBinary(2) but found FixedSizeBinary(0) at column index 0"
-    )]
     fn fixed_size_binary_array_all_null_in_batch_with_schema() {
         let schema =
             Schema::new(vec![Field::new("a", DataType::FixedSizeBinary(2), 
true)]);
 
         let none_option: Option<[u8; 2]> = None;
-        let item = FixedSizeBinaryArray::try_from_sparse_iter(
+        let item = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
             vec![none_option, none_option, none_option].into_iter(),
+            2,
         )
         .unwrap();
 
diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs
index d34a88ba5..4af876a79 100644
--- a/arrow-select/src/take.rs
+++ b/arrow-select/src/take.rs
@@ -207,12 +207,12 @@ where
         DataType::LargeBinary => {
             Ok(Arc::new(take_bytes(as_generic_binary_array::<i64>(values), 
indices)?))
         }
-        DataType::FixedSizeBinary(_) => {
+        DataType::FixedSizeBinary(size) => {
             let values = values
                 .as_any()
                 .downcast_ref::<FixedSizeBinaryArray>()
                 .unwrap();
-            Ok(Arc::new(take_fixed_size_binary(values, indices)?))
+            Ok(Arc::new(take_fixed_size_binary(values, indices, *size)?))
         }
         DataType::Null => {
             // Take applied to a null array produces a null array.
@@ -769,6 +769,7 @@ where
 fn take_fixed_size_binary<IndexType>(
     values: &FixedSizeBinaryArray,
     indices: &PrimitiveArray<IndexType>,
+    size: i32,
 ) -> Result<FixedSizeBinaryArray, ArrowError>
 where
     IndexType: ArrowPrimitiveType,
@@ -789,7 +790,7 @@ where
         .collect::<Result<Vec<_>, ArrowError>>()?
         .into_iter();
 
-    FixedSizeBinaryArray::try_from_sparse_iter(array_iter)
+    FixedSizeBinaryArray::try_from_sparse_iter_with_size(array_iter, size)
 }
 
 /// `take` implementation for dictionary arrays
diff --git a/arrow/src/array/ffi.rs b/arrow/src/array/ffi.rs
index 72030f900..a18f408a4 100644
--- a/arrow/src/array/ffi.rs
+++ b/arrow/src/array/ffi.rs
@@ -212,7 +212,8 @@ mod tests {
             Some(vec![30, 30, 30]),
             None,
         ];
-        let array = 
FixedSizeBinaryArray::try_from_sparse_iter(values.into_iter())?;
+        let array =
+            
FixedSizeBinaryArray::try_from_sparse_iter_with_size(values.into_iter(), 3)?;
 
         let data = array.data();
         test_round_trip(data)
diff --git a/arrow/src/compute/kernels/comparison.rs 
b/arrow/src/compute/kernels/comparison.rs
index a286eedd1..4566b4969 100644
--- a/arrow/src/compute/kernels/comparison.rs
+++ b/arrow/src/compute/kernels/comparison.rs
@@ -2270,6 +2270,18 @@ macro_rules! typed_compares {
                 as_largestring_array($RIGHT),
                 $OP,
             ),
+            (DataType::FixedSizeBinary(_), DataType::FixedSizeBinary(_)) => {
+                let lhs = $LEFT
+                    .as_any()
+                    .downcast_ref::<FixedSizeBinaryArray>()
+                    .unwrap();
+                let rhs = $RIGHT
+                    .as_any()
+                    .downcast_ref::<FixedSizeBinaryArray>()
+                    .unwrap();
+
+                compare_op(lhs, rhs, $OP)
+            }
             (DataType::Binary, DataType::Binary) => compare_op(
                 as_generic_binary_array::<i32>($LEFT),
                 as_generic_binary_array::<i32>($RIGHT),
@@ -5449,6 +5461,35 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_eq_dyn_neq_dyn_fixed_size_binary() {
+        use crate::array::FixedSizeBinaryArray;
+
+        let values1: Vec<Option<&[u8]>> =
+            vec![Some(&[0xfc, 0xa9]), None, Some(&[0x36, 0x01])];
+        let values2: Vec<Option<&[u8]>> =
+            vec![Some(&[0xfc, 0xa9]), None, Some(&[0x36, 0x00])];
+
+        let array1 =
+            
FixedSizeBinaryArray::try_from_sparse_iter_with_size(values1.into_iter(), 2)
+                .unwrap();
+        let array2 =
+            
FixedSizeBinaryArray::try_from_sparse_iter_with_size(values2.into_iter(), 2)
+                .unwrap();
+
+        let result = eq_dyn(&array1, &array2).unwrap();
+        assert_eq!(
+            BooleanArray::from(vec![Some(true), None, Some(false)]),
+            result
+        );
+
+        let result = neq_dyn(&array1, &array2).unwrap();
+        assert_eq!(
+            BooleanArray::from(vec![Some(false), None, Some(true)]),
+            result
+        );
+    }
+
     #[test]
     #[cfg(feature = "dyn_cmp_dict")]
     fn test_eq_dyn_neq_dyn_dictionary_i8_array() {
diff --git a/arrow/src/compute/kernels/sort.rs 
b/arrow/src/compute/kernels/sort.rs
index 97b0758e5..81895760e 100644
--- a/arrow/src/compute/kernels/sort.rs
+++ b/arrow/src/compute/kernels/sort.rs
@@ -1437,17 +1437,24 @@ mod tests {
         fixed_length: Option<i32>,
     ) {
         // Fixed size binary array
-        if fixed_length.is_some() {
+        if let Some(length) = fixed_length {
             let input = Arc::new(
-                
FixedSizeBinaryArray::try_from_sparse_iter(data.iter().cloned()).unwrap(),
+                FixedSizeBinaryArray::try_from_sparse_iter_with_size(
+                    data.iter().cloned(),
+                    length,
+                )
+                .unwrap(),
             );
             let sorted = match limit {
                 Some(_) => sort_limit(&(input as ArrayRef), options, 
limit).unwrap(),
                 None => sort(&(input as ArrayRef), options).unwrap(),
             };
             let expected = Arc::new(
-                
FixedSizeBinaryArray::try_from_sparse_iter(expected_data.iter().cloned())
-                    .unwrap(),
+                FixedSizeBinaryArray::try_from_sparse_iter_with_size(
+                    expected_data.iter().cloned(),
+                    length,
+                )
+                .unwrap(),
             ) as ArrayRef;
 
             assert_eq!(&sorted, &expected);
diff --git a/arrow/src/compute/kernels/substring.rs 
b/arrow/src/compute/kernels/substring.rs
index f52ddb3bc..76568ae0d 100644
--- a/arrow/src/compute/kernels/substring.rs
+++ b/arrow/src/compute/kernels/substring.rs
@@ -713,8 +713,9 @@ mod tests {
             .as_any()
             .downcast_ref::<FixedSizeBinaryArray>()
             .unwrap();
-        let expected = FixedSizeBinaryArray::try_from_sparse_iter(
+        let expected = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
             vec![None, Some(b"rrow")].into_iter(),
+            4,
         )
         .unwrap();
         assert_eq!(result, &expected);
diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs
index 95e6dce3c..03c265318 100644
--- a/arrow/src/ffi.rs
+++ b/arrow/src/ffi.rs
@@ -1231,7 +1231,8 @@ mod tests {
             Some(vec![30, 30, 30]),
             None,
         ];
-        let array = 
FixedSizeBinaryArray::try_from_sparse_iter(values.into_iter())?;
+        let array =
+            
FixedSizeBinaryArray::try_from_sparse_iter_with_size(values.into_iter(), 3)?;
 
         // export it
         let array = ArrowArray::try_from(array.into_data())?;
@@ -1250,7 +1251,7 @@ mod tests {
         // verify
         assert_eq!(
             array,
-            &FixedSizeBinaryArray::try_from_sparse_iter(
+            &FixedSizeBinaryArray::try_from_sparse_iter_with_size(
                 vec![
                     None,
                     Some(vec![10, 10, 10]),
@@ -1265,7 +1266,8 @@ mod tests {
                     Some(vec![30, 30, 30]),
                     None,
                 ]
-                .into_iter()
+                .into_iter(),
+                3
             )?
         );
 
diff --git a/arrow/src/util/bench_util.rs b/arrow/src/util/bench_util.rs
index d07443301..6420b6346 100644
--- a/arrow/src/util/bench_util.rs
+++ b/arrow/src/util/bench_util.rs
@@ -176,17 +176,20 @@ pub fn create_fsb_array(
 ) -> FixedSizeBinaryArray {
     let rng = &mut seedable_rng();
 
-    FixedSizeBinaryArray::try_from_sparse_iter((0..size).map(|_| {
-        if rng.gen::<f32>() < null_density {
-            None
-        } else {
-            let value = rng
-                .sample_iter::<u8, _>(Standard)
-                .take(value_len)
-                .collect::<Vec<u8>>();
-            Some(value)
-        }
-    }))
+    FixedSizeBinaryArray::try_from_sparse_iter_with_size(
+        (0..size).map(|_| {
+            if rng.gen::<f32>() < null_density {
+                None
+            } else {
+                let value = rng
+                    .sample_iter::<u8, _>(Standard)
+                    .take(value_len)
+                    .collect::<Vec<u8>>();
+                Some(value)
+            }
+        }),
+        value_len as i32,
+    )
     .unwrap()
 }
 
diff --git a/arrow/tests/array_transform.rs b/arrow/tests/array_transform.rs
index 3619abacd..03942be10 100644
--- a/arrow/tests/array_transform.rs
+++ b/arrow/tests/array_transform.rs
@@ -868,7 +868,7 @@ fn test_list_of_strings_append() {
 #[test]
 fn test_fixed_size_binary_append() {
     let a = vec![Some(vec![1, 2]), Some(vec![3, 4]), Some(vec![5, 6])];
-    let a = FixedSizeBinaryArray::try_from_sparse_iter(a.into_iter())
+    let a = 
FixedSizeBinaryArray::try_from_sparse_iter_with_size(a.into_iter(), 2)
         .expect("Failed to create FixedSizeBinaryArray from iterable");
 
     let b = vec![
@@ -879,7 +879,7 @@ fn test_fixed_size_binary_append() {
         Some(vec![13, 14]),
         None,
     ];
-    let b = FixedSizeBinaryArray::try_from_sparse_iter(b.into_iter())
+    let b = 
FixedSizeBinaryArray::try_from_sparse_iter_with_size(b.into_iter(), 2)
         .expect("Failed to create FixedSizeBinaryArray from iterable");
 
     let mut mutable = MutableArrayData::new(vec![a.data(), b.data()], false, 
10);
@@ -911,8 +911,9 @@ fn test_fixed_size_binary_append() {
         Some(vec![9, 10]),
         // b[4..4]
     ];
-    let expected = 
FixedSizeBinaryArray::try_from_sparse_iter(expected.into_iter())
-        .expect("Failed to create FixedSizeBinaryArray from iterable");
+    let expected =
+        
FixedSizeBinaryArray::try_from_sparse_iter_with_size(expected.into_iter(), 2)
+            .expect("Failed to create FixedSizeBinaryArray from iterable");
     assert_eq!(&result, expected.data());
 }
 

Reply via email to