Weijun-H commented on code in PR #8253:
URL: https://github.com/apache/arrow-datafusion/pull/8253#discussion_r1406227109
##########
datafusion/common/src/scalar.rs:
##########
@@ -317,69 +317,47 @@ impl PartialOrd for ScalarValue {
(FixedSizeBinary(_, _), _) => None,
(LargeBinary(v1), LargeBinary(v2)) => v1.partial_cmp(v2),
(LargeBinary(_), _) => None,
- (List(arr1), List(arr2)) | (FixedSizeList(arr1),
FixedSizeList(arr2)) => {
- if arr1.data_type() == arr2.data_type() {
- let list_arr1 = as_list_array(arr1);
- let list_arr2 = as_list_array(arr2);
- if list_arr1.len() != list_arr2.len() {
- return None;
- }
- for i in 0..list_arr1.len() {
- let arr1 = list_arr1.value(i);
- let arr2 = list_arr2.value(i);
-
- let lt_res =
- arrow::compute::kernels::cmp::lt(&arr1,
&arr2).ok()?;
- let eq_res =
- arrow::compute::kernels::cmp::eq(&arr1,
&arr2).ok()?;
-
- for j in 0..lt_res.len() {
- if lt_res.is_valid(j) && lt_res.value(j) {
- return Some(Ordering::Less);
- }
- if eq_res.is_valid(j) && !eq_res.value(j) {
- return Some(Ordering::Greater);
- }
- }
+ (List(arr1), List(arr2))
+ | (FixedSizeList(arr1), FixedSizeList(arr2))
+ | (LargeList(arr1), LargeList(arr2)) => {
+ // ScalarValue::List / ScalarValue::FixedSizeList /
ScalarValue::LargeList are ensure to have length 1
+ assert_eq!(arr1.len(), 1);
+ assert_eq!(arr2.len(), 1);
+
+ if arr1.data_type() != arr2.data_type() {
+ return None;
+ }
+
+ fn first_array_for_list(arr: &ArrayRef) -> ArrayRef {
+ if let Some(arr) = arr.as_list_opt::<i32>() {
+ arr.value(0)
+ } else if let Some(arr) = arr.as_list_opt::<i64>() {
+ arr.value(0)
+ } else if let Some(arr) = arr.as_fixed_size_list_opt() {
+ arr.value(0)
+ } else {
+ unreachable!("Since only List / LargeList /
FixedSizeList are supported, this should never happen")
Review Comment:
> 'This was likely caused by a bug in DataFusion's code and we would
welcome that you file a bug report in our issue tracker'.
I rechecked the Internal Error definition, which is for an unobserved bug
report. Because here is an if-else branch, it would be more proper for internal
error.
##########
datafusion/common/src/scalar.rs:
##########
@@ -317,69 +317,47 @@ impl PartialOrd for ScalarValue {
(FixedSizeBinary(_, _), _) => None,
(LargeBinary(v1), LargeBinary(v2)) => v1.partial_cmp(v2),
(LargeBinary(_), _) => None,
- (List(arr1), List(arr2)) | (FixedSizeList(arr1),
FixedSizeList(arr2)) => {
- if arr1.data_type() == arr2.data_type() {
- let list_arr1 = as_list_array(arr1);
- let list_arr2 = as_list_array(arr2);
- if list_arr1.len() != list_arr2.len() {
- return None;
- }
- for i in 0..list_arr1.len() {
- let arr1 = list_arr1.value(i);
- let arr2 = list_arr2.value(i);
-
- let lt_res =
- arrow::compute::kernels::cmp::lt(&arr1,
&arr2).ok()?;
- let eq_res =
- arrow::compute::kernels::cmp::eq(&arr1,
&arr2).ok()?;
-
- for j in 0..lt_res.len() {
- if lt_res.is_valid(j) && lt_res.value(j) {
- return Some(Ordering::Less);
- }
- if eq_res.is_valid(j) && !eq_res.value(j) {
- return Some(Ordering::Greater);
- }
- }
+ (List(arr1), List(arr2))
+ | (FixedSizeList(arr1), FixedSizeList(arr2))
+ | (LargeList(arr1), LargeList(arr2)) => {
+ // ScalarValue::List / ScalarValue::FixedSizeList /
ScalarValue::LargeList are ensure to have length 1
+ assert_eq!(arr1.len(), 1);
+ assert_eq!(arr2.len(), 1);
+
+ if arr1.data_type() != arr2.data_type() {
+ return None;
+ }
+
+ fn first_array_for_list(arr: &ArrayRef) -> ArrayRef {
+ if let Some(arr) = arr.as_list_opt::<i32>() {
+ arr.value(0)
+ } else if let Some(arr) = arr.as_list_opt::<i64>() {
+ arr.value(0)
+ } else if let Some(arr) = arr.as_fixed_size_list_opt() {
+ arr.value(0)
+ } else {
+ unreachable!("Since only List / LargeList /
FixedSizeList are supported, this should never happen")
Review Comment:
> 'This was likely caused by a bug in DataFusion's code and we would
welcome that you file a bug report in our issue tracker'.
I rechecked the Internal Error definition, which is for an unobserved bug
report. Because here is an if-else branch, it would be more proper for internal
error 🤔 .
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]