This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 5e8b0e0922 Fix PartialOrd for ScalarValue::List/FixSizeList/LargeList
(#8253)
5e8b0e0922 is described below
commit 5e8b0e09228925b01c8bcc7afe448a7487347872
Author: Jay Zhan <[email protected]>
AuthorDate: Thu Dec 7 23:23:00 2023 +0800
Fix PartialOrd for ScalarValue::List/FixSizeList/LargeList (#8253)
* list cmp
Signed-off-by: jayzhan211 <[email protected]>
* remove cfg
Signed-off-by: jayzhan211 <[email protected]>
---------
Signed-off-by: jayzhan211 <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion/common/src/scalar.rs | 110 +++++++++++++---------------------------
1 file changed, 35 insertions(+), 75 deletions(-)
diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs
index 1f302c7509..7e18c313e0 100644
--- a/datafusion/common/src/scalar.rs
+++ b/datafusion/common/src/scalar.rs
@@ -358,69 +358,47 @@ impl PartialOrd for ScalarValue {
(FixedSizeBinary(_, _), _) => None,
(LargeBinary(v1), LargeBinary(v2)) => v1.partial_cmp(v2),
(LargeBinary(_), _) => None,
- (List(arr1), List(arr2)) | (FixedSizeList(arr1),
FixedSizeList(arr2)) => {
- if arr1.data_type() == arr2.data_type() {
- let list_arr1 = as_list_array(arr1);
- let list_arr2 = as_list_array(arr2);
- if list_arr1.len() != list_arr2.len() {
- return None;
- }
- for i in 0..list_arr1.len() {
- let arr1 = list_arr1.value(i);
- let arr2 = list_arr2.value(i);
-
- let lt_res =
- arrow::compute::kernels::cmp::lt(&arr1,
&arr2).ok()?;
- let eq_res =
- arrow::compute::kernels::cmp::eq(&arr1,
&arr2).ok()?;
-
- for j in 0..lt_res.len() {
- if lt_res.is_valid(j) && lt_res.value(j) {
- return Some(Ordering::Less);
- }
- if eq_res.is_valid(j) && !eq_res.value(j) {
- return Some(Ordering::Greater);
- }
- }
+ (List(arr1), List(arr2))
+ | (FixedSizeList(arr1), FixedSizeList(arr2))
+ | (LargeList(arr1), LargeList(arr2)) => {
+ // ScalarValue::List / ScalarValue::FixedSizeList /
ScalarValue::LargeList are ensure to have length 1
+ assert_eq!(arr1.len(), 1);
+ assert_eq!(arr2.len(), 1);
+
+ if arr1.data_type() != arr2.data_type() {
+ return None;
+ }
+
+ fn first_array_for_list(arr: &ArrayRef) -> ArrayRef {
+ if let Some(arr) = arr.as_list_opt::<i32>() {
+ arr.value(0)
+ } else if let Some(arr) = arr.as_list_opt::<i64>() {
+ arr.value(0)
+ } else if let Some(arr) = arr.as_fixed_size_list_opt() {
+ arr.value(0)
+ } else {
+ unreachable!("Since only List / LargeList /
FixedSizeList are supported, this should never happen")
}
- Some(Ordering::Equal)
- } else {
- None
}
- }
- (LargeList(arr1), LargeList(arr2)) => {
- if arr1.data_type() == arr2.data_type() {
- let list_arr1 = as_large_list_array(arr1);
- let list_arr2 = as_large_list_array(arr2);
- if list_arr1.len() != list_arr2.len() {
- return None;
+
+ let arr1 = first_array_for_list(arr1);
+ let arr2 = first_array_for_list(arr2);
+
+ let lt_res = arrow::compute::kernels::cmp::lt(&arr1,
&arr2).ok()?;
+ let eq_res = arrow::compute::kernels::cmp::eq(&arr1,
&arr2).ok()?;
+
+ for j in 0..lt_res.len() {
+ if lt_res.is_valid(j) && lt_res.value(j) {
+ return Some(Ordering::Less);
}
- for i in 0..list_arr1.len() {
- let arr1 = list_arr1.value(i);
- let arr2 = list_arr2.value(i);
-
- let lt_res =
- arrow::compute::kernels::cmp::lt(&arr1,
&arr2).ok()?;
- let eq_res =
- arrow::compute::kernels::cmp::eq(&arr1,
&arr2).ok()?;
-
- for j in 0..lt_res.len() {
- if lt_res.is_valid(j) && lt_res.value(j) {
- return Some(Ordering::Less);
- }
- if eq_res.is_valid(j) && !eq_res.value(j) {
- return Some(Ordering::Greater);
- }
- }
+ if eq_res.is_valid(j) && !eq_res.value(j) {
+ return Some(Ordering::Greater);
}
- Some(Ordering::Equal)
- } else {
- None
}
+
+ Some(Ordering::Equal)
}
- (List(_), _) => None,
- (LargeList(_), _) => None,
- (FixedSizeList(_), _) => None,
+ (List(_), _) | (LargeList(_), _) | (FixedSizeList(_), _) => None,
(Date32(v1), Date32(v2)) => v1.partial_cmp(v2),
(Date32(_), _) => None,
(Date64(v1), Date64(v2)) => v1.partial_cmp(v2),
@@ -3644,24 +3622,6 @@ mod tests {
])]),
));
assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
-
- let a =
- ScalarValue::List(Arc::new(
- ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
- Some(vec![Some(10), Some(2), Some(3)]),
- None,
- Some(vec![Some(10), Some(2), Some(3)]),
- ]),
- ));
- let b =
- ScalarValue::List(Arc::new(
- ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
- Some(vec![Some(10), Some(2), Some(3)]),
- None,
- Some(vec![Some(10), Some(2), Some(3)]),
- ]),
- ));
- assert_eq!(a.partial_cmp(&b), Some(Ordering::Equal));
}
#[test]