rluvaton commented on code in PR #6913:
URL: https://github.com/apache/arrow-rs/pull/6913#discussion_r1901662382


##########
arrow-ord/src/sort.rs:
##########
@@ -801,19 +801,186 @@ mod tests {
     use half::f16;
     use rand::rngs::StdRng;
     use rand::{Rng, RngCore, SeedableRng};
+    use std::ops::Deref;
+
+    mod build_arrays_helper {
+        use crate::sort::tests::get_same_lists_length;
+        use arrow_array::{
+            ArrayRef, ArrowPrimitiveType, Decimal128Array, Decimal256Array, 
FixedSizeBinaryArray,
+            FixedSizeListArray, GenericBinaryArray, GenericListArray, 
LargeStringArray,
+            OffsetSizeTrait, PrimitiveArray, StringArray, StringViewArray,
+        };
+        use arrow_buffer::i256;
+        use std::sync::Arc;
+
+        pub fn decimal128_array(data: Vec<Option<i128>>) -> Vec<ArrayRef> {
+            vec![Arc::new(
+                data.into_iter()
+                    .collect::<Decimal128Array>()
+                    .with_precision_and_scale(23, 6)
+                    .unwrap(),
+            )]
+        }
 
-    fn create_decimal128_array(data: Vec<Option<i128>>) -> Decimal128Array {
-        data.into_iter()
-            .collect::<Decimal128Array>()
-            .with_precision_and_scale(23, 6)
-            .unwrap()
+        pub fn decimal256_array(data: Vec<Option<i256>>) -> Vec<ArrayRef> {
+            vec![Arc::new(
+                data.into_iter()
+                    .collect::<Decimal256Array>()
+                    .with_precision_and_scale(53, 6)
+                    .unwrap(),
+            )]
+        }
+
+        pub fn primitive_arrays<T>(data: Vec<Option<T::Native>>) -> 
Vec<ArrayRef>
+        where
+            T: ArrowPrimitiveType,
+            PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
+        {
+            vec![Arc::new(PrimitiveArray::<T>::from(data)) as ArrayRef]
+        }
+
+        pub fn string_arrays(data: Vec<Option<&str>>) -> Vec<ArrayRef> {
+            vec![
+                Arc::new(StringArray::from(data.clone())) as ArrayRef,
+                Arc::new(LargeStringArray::from(data.clone())) as ArrayRef,
+                Arc::new(StringViewArray::from(data)) as ArrayRef,
+            ]
+        }
+
+        pub fn binary_arrays(data: Vec<Option<Vec<u8>>>) -> Vec<ArrayRef> {
+            // Generic size binary array
+            fn generic_binary_array<S: OffsetSizeTrait>(
+                data: &[Option<Vec<u8>>],
+            ) -> Arc<GenericBinaryArray<S>> {
+                Arc::new(GenericBinaryArray::<S>::from_opt_vec(
+                    data.iter()
+                        .map(|binary| binary.as_ref().map(Vec::as_slice))
+                        .collect(),
+                ))
+            }
+
+            let mut arrays = vec![
+                generic_binary_array::<i32>(&data) as ArrayRef,
+                generic_binary_array::<i64>(&data) as ArrayRef,
+            ];
+
+            if let Some(first_length) = get_same_lists_length(&data) {
+                arrays.push(Arc::new(
+                    FixedSizeBinaryArray::try_from_sparse_iter_with_size(
+                        data.iter().cloned(),
+                        first_length as i32,
+                    )
+                    .unwrap(),
+                ));
+            }
+
+            arrays
+        }
+
+        pub fn primitive_generic_list_array<OffsetSize, T>(
+            data: &[Option<Vec<Option<T::Native>>>],
+        ) -> ArrayRef
+        where
+            OffsetSize: OffsetSizeTrait,
+            T: ArrowPrimitiveType,
+            PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
+        {
+            Arc::new(GenericListArray::<OffsetSize>::from_iter_primitive::<T, 
_, _>(data.to_vec()))
+        }
+
+        pub fn primitive_fixed_list_array<T>(
+            data: &[Option<Vec<Option<T::Native>>>],
+            length: i32,
+        ) -> ArrayRef
+        where
+            T: ArrowPrimitiveType,
+            PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
+        {
+            Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
+                data.to_vec(),
+                length,
+            ))
+        }
+
+        pub fn primitive_list_arrays<T>(data: 
Vec<Option<Vec<Option<T::Native>>>>) -> Vec<ArrayRef>
+        where
+            T: ArrowPrimitiveType,
+            PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
+        {
+            let mut arrays = vec![
+                primitive_generic_list_array::<i32, T>(&data),
+                primitive_generic_list_array::<i64, T>(&data),
+            ];
+
+            if let Some(first_length) = get_same_lists_length(&data) {
+                arrays.push(primitive_fixed_list_array::<T>(&data, 
first_length as i32));
+            }
+
+            arrays
+        }
     }
 
-    fn create_decimal256_array(data: Vec<Option<i256>>) -> Decimal256Array {
-        data.into_iter()
-            .collect::<Decimal256Array>()
-            .with_precision_and_scale(53, 6)
-            .unwrap()
+    /// Return Some(length) if all lists have the same length, None otherwise
+    fn get_same_lists_length<T>(data: &[Option<Vec<T>>]) -> Option<usize> {
+        let first_length = data
+            .iter()
+            .find_map(|x| x.as_ref().map(|x| x.len()))
+            .unwrap_or(0);
+        let first_non_match_length = data
+            .iter()
+            .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
+            .any(|x| x != first_length);
+
+        if first_non_match_length {
+            None
+        } else {
+            Some(first_length)
+        }
+    }
+
+    /// Test sorting arrays
+    ///
+    /// the `into_lists_fn` function is used to convert the data into the list 
variants wanted to be tested, (e.g. ListArray, LargeListArray, 
FixedSizeListArray or any other list variant)
+    /// The ordering of the returned vector must be consistent as it will be 
called with the data and the expected data
+    fn test_sort_arrays<
+        ListItemType: Clone,
+        IntoListFn: Fn(Vec<Option<ListItemType>>) -> Vec<ArrayRef>,
+    >(
+        into_lists_fn: IntoListFn,
+        data: Vec<Option<ListItemType>>,
+        options: Option<SortOptions>,
+        limit: Option<usize>,
+        expected_data: Vec<Option<ListItemType>>,
+    ) {
+        let input = into_lists_fn(data);
+        let mut expected = into_lists_fn(expected_data);
+
+        // Filter out mismatched data types
+        // This can happen for getting some type of Fixed array in the 
expected (due to limit or something), but not in the input
+        if input.len() != expected.len() {
+            let all_input_specific_class = input
+                .iter()
+                .map(|array| array.data_type())
+                .collect::<Vec<_>>();
+            expected = expected
+                .into_iter()
+                .filter(|array| 
all_input_specific_class.contains(&array.data_type()))
+                .collect::<Vec<_>>()
+        }
+
+        assert_eq!(
+            input.len(),
+            expected.len(),
+            "The input and expected data must have the same number of array 
variants"
+        );
+
+        for (input, expected) in input.iter().zip(expected.iter()) {
+            let sorted = match limit {
+                Some(_) => sort_limit(input, options, limit).unwrap(),
+                _ => sort(input, options).unwrap(),
+            };

Review Comment:
   I prefer to keep it as is as the `sort_limit` does not call `sort` when 
`limit` is `None`. and this is how it was in the original tests



##########
arrow-ord/src/sort.rs:
##########
@@ -801,19 +801,186 @@ mod tests {
     use half::f16;
     use rand::rngs::StdRng;
     use rand::{Rng, RngCore, SeedableRng};
+    use std::ops::Deref;
+
+    mod build_arrays_helper {
+        use crate::sort::tests::get_same_lists_length;
+        use arrow_array::{
+            ArrayRef, ArrowPrimitiveType, Decimal128Array, Decimal256Array, 
FixedSizeBinaryArray,
+            FixedSizeListArray, GenericBinaryArray, GenericListArray, 
LargeStringArray,
+            OffsetSizeTrait, PrimitiveArray, StringArray, StringViewArray,
+        };
+        use arrow_buffer::i256;
+        use std::sync::Arc;
+
+        pub fn decimal128_array(data: Vec<Option<i128>>) -> Vec<ArrayRef> {
+            vec![Arc::new(
+                data.into_iter()
+                    .collect::<Decimal128Array>()
+                    .with_precision_and_scale(23, 6)
+                    .unwrap(),
+            )]
+        }
 
-    fn create_decimal128_array(data: Vec<Option<i128>>) -> Decimal128Array {
-        data.into_iter()
-            .collect::<Decimal128Array>()
-            .with_precision_and_scale(23, 6)
-            .unwrap()
+        pub fn decimal256_array(data: Vec<Option<i256>>) -> Vec<ArrayRef> {
+            vec![Arc::new(
+                data.into_iter()
+                    .collect::<Decimal256Array>()
+                    .with_precision_and_scale(53, 6)
+                    .unwrap(),
+            )]
+        }
+
+        pub fn primitive_arrays<T>(data: Vec<Option<T::Native>>) -> 
Vec<ArrayRef>
+        where
+            T: ArrowPrimitiveType,
+            PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
+        {
+            vec![Arc::new(PrimitiveArray::<T>::from(data)) as ArrayRef]
+        }
+
+        pub fn string_arrays(data: Vec<Option<&str>>) -> Vec<ArrayRef> {
+            vec![
+                Arc::new(StringArray::from(data.clone())) as ArrayRef,
+                Arc::new(LargeStringArray::from(data.clone())) as ArrayRef,
+                Arc::new(StringViewArray::from(data)) as ArrayRef,
+            ]
+        }
+
+        pub fn binary_arrays(data: Vec<Option<Vec<u8>>>) -> Vec<ArrayRef> {
+            // Generic size binary array
+            fn generic_binary_array<S: OffsetSizeTrait>(
+                data: &[Option<Vec<u8>>],
+            ) -> Arc<GenericBinaryArray<S>> {
+                Arc::new(GenericBinaryArray::<S>::from_opt_vec(
+                    data.iter()
+                        .map(|binary| binary.as_ref().map(Vec::as_slice))
+                        .collect(),
+                ))
+            }
+
+            let mut arrays = vec![
+                generic_binary_array::<i32>(&data) as ArrayRef,
+                generic_binary_array::<i64>(&data) as ArrayRef,
+            ];
+
+            if let Some(first_length) = get_same_lists_length(&data) {
+                arrays.push(Arc::new(
+                    FixedSizeBinaryArray::try_from_sparse_iter_with_size(
+                        data.iter().cloned(),
+                        first_length as i32,
+                    )
+                    .unwrap(),
+                ));
+            }
+
+            arrays
+        }
+
+        pub fn primitive_generic_list_array<OffsetSize, T>(
+            data: &[Option<Vec<Option<T::Native>>>],
+        ) -> ArrayRef
+        where
+            OffsetSize: OffsetSizeTrait,
+            T: ArrowPrimitiveType,
+            PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
+        {
+            Arc::new(GenericListArray::<OffsetSize>::from_iter_primitive::<T, 
_, _>(data.to_vec()))
+        }
+
+        pub fn primitive_fixed_list_array<T>(
+            data: &[Option<Vec<Option<T::Native>>>],
+            length: i32,
+        ) -> ArrayRef
+        where
+            T: ArrowPrimitiveType,
+            PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
+        {
+            Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
+                data.to_vec(),
+                length,
+            ))
+        }
+
+        pub fn primitive_list_arrays<T>(data: 
Vec<Option<Vec<Option<T::Native>>>>) -> Vec<ArrayRef>
+        where
+            T: ArrowPrimitiveType,
+            PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
+        {
+            let mut arrays = vec![
+                primitive_generic_list_array::<i32, T>(&data),
+                primitive_generic_list_array::<i64, T>(&data),
+            ];
+
+            if let Some(first_length) = get_same_lists_length(&data) {
+                arrays.push(primitive_fixed_list_array::<T>(&data, 
first_length as i32));
+            }
+
+            arrays
+        }
     }
 
-    fn create_decimal256_array(data: Vec<Option<i256>>) -> Decimal256Array {
-        data.into_iter()
-            .collect::<Decimal256Array>()
-            .with_precision_and_scale(53, 6)
-            .unwrap()
+    /// Return Some(length) if all lists have the same length, None otherwise
+    fn get_same_lists_length<T>(data: &[Option<Vec<T>>]) -> Option<usize> {
+        let first_length = data
+            .iter()
+            .find_map(|x| x.as_ref().map(|x| x.len()))
+            .unwrap_or(0);
+        let first_non_match_length = data
+            .iter()
+            .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
+            .any(|x| x != first_length);
+
+        if first_non_match_length {
+            None
+        } else {
+            Some(first_length)
+        }
+    }
+
+    /// Test sorting arrays
+    ///
+    /// the `into_lists_fn` function is used to convert the data into the list 
variants wanted to be tested, (e.g. ListArray, LargeListArray, 
FixedSizeListArray or any other list variant)
+    /// The ordering of the returned vector must be consistent as it will be 
called with the data and the expected data
+    fn test_sort_arrays<
+        ListItemType: Clone,
+        IntoListFn: Fn(Vec<Option<ListItemType>>) -> Vec<ArrayRef>,
+    >(
+        into_lists_fn: IntoListFn,
+        data: Vec<Option<ListItemType>>,
+        options: Option<SortOptions>,
+        limit: Option<usize>,
+        expected_data: Vec<Option<ListItemType>>,
+    ) {

Review Comment:
   thanks, applied



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to