alamb commented on code in PR #6990:
URL: https://github.com/apache/arrow-datafusion/pull/6990#discussion_r1269850366
##########
datafusion/physical-expr/src/array_expressions.rs:
##########
@@ -1363,69 +1363,278 @@ pub fn array_ndims(args: &[ArrayRef]) ->
Result<ArrayRef> {
Ok(Arc::new(result) as ArrayRef)
}
-macro_rules! contains {
- ($FIRST_ARRAY:expr, $SECOND_ARRAY:expr, $ARRAY_TYPE:ident) => {{
- let first_array = downcast_arg!($FIRST_ARRAY, $ARRAY_TYPE);
- let second_array = downcast_arg!($SECOND_ARRAY, $ARRAY_TYPE);
- let mut res = true;
- for x in second_array.values().iter().dedup() {
- if !first_array.values().contains(x) {
- res = false;
- break;
+macro_rules! non_list_contains {
+ ($ARRAY:expr, $SUB_ARRAY:expr, $ARRAY_TYPE:ident) => {{
+ let sub_array = downcast_arg!($SUB_ARRAY, $ARRAY_TYPE);
+ let mut boolean_builder = BooleanArray::builder($ARRAY.len());
+
+ for (arr, elem) in $ARRAY.iter().zip(sub_array.iter()) {
+ if let (Some(arr), Some(elem)) = (arr, elem) {
+ let arr = downcast_arg!(arr, $ARRAY_TYPE);
+ let res = arr.iter().dedup().flatten().any(|x| x == elem);
+ boolean_builder.append_value(res);
}
}
+ Ok(Arc::new(boolean_builder.finish()))
+ }};
+}
+
+/// Array_has SQL function
+pub fn array_has(args: &[ArrayRef]) -> Result<ArrayRef> {
+ assert_eq!(args.len(), 2);
+ let array = args[0].as_list::<i32>();
+
+ match args[1].data_type() {
+ DataType::List(_) => {
+ let sub_array = args[1].as_list::<i32>();
+ let mut boolean_builder = BooleanArray::builder(array.len());
+
+ for (arr, elem) in array.iter().zip(sub_array.iter()) {
+ if let (Some(arr), Some(elem)) = (arr, elem) {
+ let list_arr = arr.as_list::<i32>();
+ let res = list_arr.iter().dedup().flatten().any(|x| *x ==
*elem);
+ boolean_builder.append_value(res);
+ }
+ }
+ Ok(Arc::new(boolean_builder.finish()))
+ }
+
+ // Int64, Int32, Int16, Int8
+ // UInt64, UInt32, UInt16, UInt8
+ DataType::Int64 => {
+ non_list_contains!(array, args[1], Int64Array)
+ }
+ DataType::Int32 => {
+ non_list_contains!(array, args[1], Int32Array)
+ }
+ DataType::Int16 => {
+ non_list_contains!(array, args[1], Int16Array)
+ }
+ DataType::Int8 => {
+ non_list_contains!(array, args[1], Int8Array)
+ }
+ DataType::UInt64 => {
+ non_list_contains!(array, args[1], UInt64Array)
+ }
+ DataType::UInt32 => {
+ non_list_contains!(array, args[1], UInt32Array)
+ }
+ DataType::UInt16 => {
+ non_list_contains!(array, args[1], UInt16Array)
+ }
+ DataType::UInt8 => {
+ non_list_contains!(array, args[1], UInt8Array)
+ }
+
+ DataType::Float64 => {
+ non_list_contains!(array, args[1], Float64Array)
+ }
+ DataType::Float32 => {
+ non_list_contains!(array, args[1], Float32Array)
+ }
+ DataType::Utf8 => {
+ non_list_contains!(array, args[1], StringArray)
+ }
+ DataType::LargeUtf8 => {
+ non_list_contains!(array, args[1], LargeStringArray)
+ }
+ DataType::Boolean => {
+ non_list_contains!(array, args[1], BooleanArray)
+ }
+ _ => {
+ todo!(
Review Comment:
Can we please return `DataFusionError::NotYetImplemented` here rather than
`todo!()`, which will panic?
##########
datafusion/physical-expr/src/array_expressions.rs:
##########
@@ -1363,69 +1363,278 @@ pub fn array_ndims(args: &[ArrayRef]) ->
Result<ArrayRef> {
Ok(Arc::new(result) as ArrayRef)
}
-macro_rules! contains {
- ($FIRST_ARRAY:expr, $SECOND_ARRAY:expr, $ARRAY_TYPE:ident) => {{
- let first_array = downcast_arg!($FIRST_ARRAY, $ARRAY_TYPE);
- let second_array = downcast_arg!($SECOND_ARRAY, $ARRAY_TYPE);
- let mut res = true;
- for x in second_array.values().iter().dedup() {
- if !first_array.values().contains(x) {
- res = false;
- break;
+macro_rules! non_list_contains {
+ ($ARRAY:expr, $SUB_ARRAY:expr, $ARRAY_TYPE:ident) => {{
+ let sub_array = downcast_arg!($SUB_ARRAY, $ARRAY_TYPE);
+ let mut boolean_builder = BooleanArray::builder($ARRAY.len());
+
+ for (arr, elem) in $ARRAY.iter().zip(sub_array.iter()) {
+ if let (Some(arr), Some(elem)) = (arr, elem) {
+ let arr = downcast_arg!(arr, $ARRAY_TYPE);
+ let res = arr.iter().dedup().flatten().any(|x| x == elem);
+ boolean_builder.append_value(res);
}
}
+ Ok(Arc::new(boolean_builder.finish()))
+ }};
+}
+
+/// Array_has SQL function
+pub fn array_has(args: &[ArrayRef]) -> Result<ArrayRef> {
+ assert_eq!(args.len(), 2);
+ let array = args[0].as_list::<i32>();
+
+ match args[1].data_type() {
+ DataType::List(_) => {
+ let sub_array = args[1].as_list::<i32>();
+ let mut boolean_builder = BooleanArray::builder(array.len());
+
+ for (arr, elem) in array.iter().zip(sub_array.iter()) {
+ if let (Some(arr), Some(elem)) = (arr, elem) {
+ let list_arr = arr.as_list::<i32>();
+ let res = list_arr.iter().dedup().flatten().any(|x| *x ==
*elem);
+ boolean_builder.append_value(res);
+ }
+ }
+ Ok(Arc::new(boolean_builder.finish()))
+ }
+
+ // Int64, Int32, Int16, Int8
+ // UInt64, UInt32, UInt16, UInt8
+ DataType::Int64 => {
+ non_list_contains!(array, args[1], Int64Array)
+ }
+ DataType::Int32 => {
+ non_list_contains!(array, args[1], Int32Array)
+ }
+ DataType::Int16 => {
+ non_list_contains!(array, args[1], Int16Array)
+ }
+ DataType::Int8 => {
+ non_list_contains!(array, args[1], Int8Array)
+ }
+ DataType::UInt64 => {
+ non_list_contains!(array, args[1], UInt64Array)
+ }
+ DataType::UInt32 => {
+ non_list_contains!(array, args[1], UInt32Array)
+ }
+ DataType::UInt16 => {
+ non_list_contains!(array, args[1], UInt16Array)
+ }
+ DataType::UInt8 => {
+ non_list_contains!(array, args[1], UInt8Array)
+ }
+
+ DataType::Float64 => {
+ non_list_contains!(array, args[1], Float64Array)
+ }
+ DataType::Float32 => {
+ non_list_contains!(array, args[1], Float32Array)
+ }
+ DataType::Utf8 => {
+ non_list_contains!(array, args[1], StringArray)
+ }
+ DataType::LargeUtf8 => {
+ non_list_contains!(array, args[1], LargeStringArray)
+ }
+ DataType::Boolean => {
+ non_list_contains!(array, args[1], BooleanArray)
+ }
+ _ => {
+ todo!(
+ "array_has not implemented for type: {:?}",
+ args[1].data_type()
+ )
+ }
+ }
+}
+
+macro_rules! array_has_any_non_list_check {
+ ($ARRAY:expr, $SUB_ARRAY:expr, $ARRAY_TYPE:ident) => {{
+ let arr = downcast_arg!($ARRAY, $ARRAY_TYPE);
+ let sub_arr = downcast_arg!($SUB_ARRAY, $ARRAY_TYPE);
+
+ let mut res = false;
+ for elem in sub_arr.iter().dedup() {
+ res |= arr
+ .iter()
+ .dedup()
+ .flatten()
+ .any(|x| x == elem.expect("null type not supported"));
Review Comment:
Again, can we please try and avoid panic's on not supported errors -- it
makes for a much easier to understand error message for users
##########
datafusion/physical-expr/src/array_expressions.rs:
##########
@@ -2267,63 +2267,6 @@ mod tests {
assert_eq!(result, &UInt64Array::from_value(2, 1));
}
- #[test]
Review Comment:
> Sqllogictests are more important than unit tests: unit tests are mainly
for convenience.
I agree with this
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]