This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 5e8b0e0922 Fix PartialOrd for ScalarValue::List/FixSizeList/LargeList 
(#8253)
5e8b0e0922 is described below

commit 5e8b0e09228925b01c8bcc7afe448a7487347872
Author: Jay Zhan <[email protected]>
AuthorDate: Thu Dec 7 23:23:00 2023 +0800

    Fix PartialOrd for ScalarValue::List/FixSizeList/LargeList (#8253)
    
    * list cmp
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * remove cfg
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    ---------
    
    Signed-off-by: jayzhan211 <[email protected]>
    Co-authored-by: Andrew Lamb <[email protected]>
---
 datafusion/common/src/scalar.rs | 110 +++++++++++++---------------------------
 1 file changed, 35 insertions(+), 75 deletions(-)

diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs
index 1f302c7509..7e18c313e0 100644
--- a/datafusion/common/src/scalar.rs
+++ b/datafusion/common/src/scalar.rs
@@ -358,69 +358,47 @@ impl PartialOrd for ScalarValue {
             (FixedSizeBinary(_, _), _) => None,
             (LargeBinary(v1), LargeBinary(v2)) => v1.partial_cmp(v2),
             (LargeBinary(_), _) => None,
-            (List(arr1), List(arr2)) | (FixedSizeList(arr1), 
FixedSizeList(arr2)) => {
-                if arr1.data_type() == arr2.data_type() {
-                    let list_arr1 = as_list_array(arr1);
-                    let list_arr2 = as_list_array(arr2);
-                    if list_arr1.len() != list_arr2.len() {
-                        return None;
-                    }
-                    for i in 0..list_arr1.len() {
-                        let arr1 = list_arr1.value(i);
-                        let arr2 = list_arr2.value(i);
-
-                        let lt_res =
-                            arrow::compute::kernels::cmp::lt(&arr1, 
&arr2).ok()?;
-                        let eq_res =
-                            arrow::compute::kernels::cmp::eq(&arr1, 
&arr2).ok()?;
-
-                        for j in 0..lt_res.len() {
-                            if lt_res.is_valid(j) && lt_res.value(j) {
-                                return Some(Ordering::Less);
-                            }
-                            if eq_res.is_valid(j) && !eq_res.value(j) {
-                                return Some(Ordering::Greater);
-                            }
-                        }
+            (List(arr1), List(arr2))
+            | (FixedSizeList(arr1), FixedSizeList(arr2))
+            | (LargeList(arr1), LargeList(arr2)) => {
+                // ScalarValue::List / ScalarValue::FixedSizeList / 
ScalarValue::LargeList are ensure to have length 1
+                assert_eq!(arr1.len(), 1);
+                assert_eq!(arr2.len(), 1);
+
+                if arr1.data_type() != arr2.data_type() {
+                    return None;
+                }
+
+                fn first_array_for_list(arr: &ArrayRef) -> ArrayRef {
+                    if let Some(arr) = arr.as_list_opt::<i32>() {
+                        arr.value(0)
+                    } else if let Some(arr) = arr.as_list_opt::<i64>() {
+                        arr.value(0)
+                    } else if let Some(arr) = arr.as_fixed_size_list_opt() {
+                        arr.value(0)
+                    } else {
+                        unreachable!("Since only List / LargeList / 
FixedSizeList are supported, this should never happen")
                     }
-                    Some(Ordering::Equal)
-                } else {
-                    None
                 }
-            }
-            (LargeList(arr1), LargeList(arr2)) => {
-                if arr1.data_type() == arr2.data_type() {
-                    let list_arr1 = as_large_list_array(arr1);
-                    let list_arr2 = as_large_list_array(arr2);
-                    if list_arr1.len() != list_arr2.len() {
-                        return None;
+
+                let arr1 = first_array_for_list(arr1);
+                let arr2 = first_array_for_list(arr2);
+
+                let lt_res = arrow::compute::kernels::cmp::lt(&arr1, 
&arr2).ok()?;
+                let eq_res = arrow::compute::kernels::cmp::eq(&arr1, 
&arr2).ok()?;
+
+                for j in 0..lt_res.len() {
+                    if lt_res.is_valid(j) && lt_res.value(j) {
+                        return Some(Ordering::Less);
                     }
-                    for i in 0..list_arr1.len() {
-                        let arr1 = list_arr1.value(i);
-                        let arr2 = list_arr2.value(i);
-
-                        let lt_res =
-                            arrow::compute::kernels::cmp::lt(&arr1, 
&arr2).ok()?;
-                        let eq_res =
-                            arrow::compute::kernels::cmp::eq(&arr1, 
&arr2).ok()?;
-
-                        for j in 0..lt_res.len() {
-                            if lt_res.is_valid(j) && lt_res.value(j) {
-                                return Some(Ordering::Less);
-                            }
-                            if eq_res.is_valid(j) && !eq_res.value(j) {
-                                return Some(Ordering::Greater);
-                            }
-                        }
+                    if eq_res.is_valid(j) && !eq_res.value(j) {
+                        return Some(Ordering::Greater);
                     }
-                    Some(Ordering::Equal)
-                } else {
-                    None
                 }
+
+                Some(Ordering::Equal)
             }
-            (List(_), _) => None,
-            (LargeList(_), _) => None,
-            (FixedSizeList(_), _) => None,
+            (List(_), _) | (LargeList(_), _) | (FixedSizeList(_), _) => None,
             (Date32(v1), Date32(v2)) => v1.partial_cmp(v2),
             (Date32(_), _) => None,
             (Date64(v1), Date64(v2)) => v1.partial_cmp(v2),
@@ -3644,24 +3622,6 @@ mod tests {
                 ])]),
             ));
         assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
-
-        let a =
-            ScalarValue::List(Arc::new(
-                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
-                    Some(vec![Some(10), Some(2), Some(3)]),
-                    None,
-                    Some(vec![Some(10), Some(2), Some(3)]),
-                ]),
-            ));
-        let b =
-            ScalarValue::List(Arc::new(
-                ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
-                    Some(vec![Some(10), Some(2), Some(3)]),
-                    None,
-                    Some(vec![Some(10), Some(2), Some(3)]),
-                ]),
-            ));
-        assert_eq!(a.partial_cmp(&b), Some(Ordering::Equal));
     }
 
     #[test]

Reply via email to