alamb commented on a change in pull request #813:
URL: https://github.com/apache/arrow-datafusion/pull/813#discussion_r682053016



##########
File path: datafusion/src/physical_plan/expressions/in_list.rs
##########
@@ -99,6 +124,104 @@ macro_rules! make_contains {
     }};
 }
 
+macro_rules! make_contains_primitive {
+    ($ARRAY:expr, $LIST_VALUES:expr, $NEGATED:expr, $SCALAR_VALUE:ident, 
$ARRAY_TYPE:ident) => {{
+        let array = $ARRAY.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap();
+
+        let mut contains_null = false;
+        let values = $LIST_VALUES
+            .iter()
+            .flat_map(|expr| match expr {
+                ColumnarValue::Scalar(s) => match s {

Review comment:
       Given this code just seems to pass along the values,  wonder if it would 
be clearer / faster here to look for nulls using the `ScalarValue::is_null()`, 
perhaps like
   
   ```rust
   let contains_nulls = $LIST_VALUES.iter().any(|expr| expr.is_null());
   ```
   
   And that way you could stop when you hit the first one as well as 
potentially avoid a collect into a new `Vec`

##########
File path: datafusion/src/physical_plan/expressions/in_list.rs
##########
@@ -234,16 +363,40 @@ impl PhysicalExpr for InListExpr {
 
         match value_data_type {
             DataType::Float32 => {
-                make_contains!(array, list_values, self.negated, Float32, 
Float32Array)
+                make_contains_primitive!(
+                    array,
+                    list_values,
+                    self.negated,
+                    Float32,
+                    Float32Array
+                )
             }
             DataType::Float64 => {
-                make_contains!(array, list_values, self.negated, Float64, 
Float64Array)
+                make_contains_primitive!(
+                    array,
+                    list_values,
+                    self.negated,
+                    Float64,
+                    Float64Array
+                )
             }
             DataType::Int16 => {
-                make_contains!(array, list_values, self.negated, Int16, 
Int16Array)
+                make_contains_primitive!(
+                    array,
+                    list_values,
+                    self.negated,
+                    Int16,
+                    Int16Array
+                )
             }
             DataType::Int32 => {
-                make_contains!(array, list_values, self.negated, Int32, 
Int32Array)
+                make_contains_primitive!(
+                    array,
+                    list_values,
+                    self.negated,
+                    Int32,
+                    Int32Array
+                )
             }
             DataType::Int64 => {
                 make_contains!(array, list_values, self.negated, Int64, 
Int64Array)

Review comment:
       Is there a reason not to apply the same transformation to `Int64` and 
`Int8`?

##########
File path: datafusion/src/physical_plan/expressions/in_list.rs
##########
@@ -99,6 +124,104 @@ macro_rules! make_contains {
     }};
 }
 
+macro_rules! make_contains_primitive {
+    ($ARRAY:expr, $LIST_VALUES:expr, $NEGATED:expr, $SCALAR_VALUE:ident, 
$ARRAY_TYPE:ident) => {{
+        let array = $ARRAY.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap();
+
+        let mut contains_null = false;
+        let values = $LIST_VALUES
+            .iter()
+            .flat_map(|expr| match expr {
+                ColumnarValue::Scalar(s) => match s {
+                    ScalarValue::$SCALAR_VALUE(Some(v)) => Some(*v),
+                    ScalarValue::$SCALAR_VALUE(None) => {
+                        contains_null = true;
+                        None
+                    }
+                    ScalarValue::Utf8(None) => {
+                        contains_null = true;
+                        None
+                    }
+                    datatype => unimplemented!("Unexpected type {} for 
InList", datatype),
+                },
+                ColumnarValue::Array(_) => {
+                    unimplemented!("InList does not yet support nested 
columns.")
+                }
+            })
+            .collect::<Vec<_>>();
+
+        if $NEGATED {
+            if contains_null {
+                Ok(ColumnarValue::Array(Arc::new(
+                    array
+                        .iter()
+                        .map(|x| match x.map(|v| !values.contains(&v)) {
+                            Some(true) => None,
+                            x => x,
+                        })
+                        .collect::<BooleanArray>(),
+                )))
+            } else {
+                Ok(ColumnarValue::Array(Arc::new(
+                    values_not_in_list_primitive(array, &values)?,
+                )))
+            }
+        } else {
+            if contains_null {
+                Ok(ColumnarValue::Array(Arc::new(
+                    array
+                        .iter()
+                        .map(|x| match x.map(|v| values.contains(&v)) {
+                            Some(false) => None,
+                            x => x,
+                        })
+                        .collect::<BooleanArray>(),
+                )))
+            } else {
+                Ok(ColumnarValue::Array(Arc::new(values_in_list_primitive(
+                    array, &values,
+                )?)))
+            }
+        }
+    }};
+}
+
+fn values_in_list_primitive<T: ArrowPrimitiveType>(

Review comment:
       I suggest adding some comments here noting these functions assume that 
there are no null values in the column

##########
File path: datafusion/src/physical_plan/expressions/in_list.rs
##########
@@ -270,9 +447,10 @@ impl PhysicalExpr for InListExpr {
             DataType::LargeUtf8 => {
                 self.compare_utf8::<i64>(array, list_values, self.negated)
             }
-            datatype => {
-                unimplemented!("InList does not support datatype {:?}.", 
datatype)
-            }
+            datatype => Result::Err(DataFusionError::NotImplemented(format!(
+                "InList does not support datatype {:?}.",

Review comment:
       👨‍🍳 👌 

##########
File path: datafusion/src/physical_plan/expressions/in_list.rs
##########
@@ -164,33 +287,39 @@ impl InListExpr {
             })
             .collect::<Vec<&str>>();
 
-        Ok(ColumnarValue::Array(Arc::new(
-            array
-                .iter()
-                .map(|x| {
-                    let contains = x.map(|x| values.contains(&x));
-                    match contains {
-                        Some(true) => {
-                            if negated {
-                                Some(false)
-                            } else {
-                                Some(true)
-                            }
-                        }
-                        Some(false) => {
-                            if contains_null {
-                                None
-                            } else if negated {
-                                Some(true)
-                            } else {
-                                Some(false)
-                            }
-                        }
-                        None => None,
-                    }
-                })
-                .collect::<BooleanArray>(),
-        )))
+        if negated {

Review comment:
       The same comment about finding NULL by checking for the first match 
applies to the code right above this as well




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to