NGA-TRAN commented on code in PR #3975:
URL: https://github.com/apache/arrow-datafusion/pull/3975#discussion_r1006421585


##########
datafusion/physical-expr/src/expressions/in_list.rs:
##########
@@ -714,188 +911,31 @@ impl PhysicalExpr for InListExpr {
             };
 
             match value_data_type {
-                DataType::Float32 => {
-                    make_contains_primitive!(
-                        array,
-                        list_values,
-                        self.negated,
-                        Float32,
-                        Float32Array
-                    )
-                }
-                DataType::Float64 => {
-                    make_contains_primitive!(
-                        array,
-                        list_values,
-                        self.negated,
-                        Float64,
-                        Float64Array
-                    )
-                }
-                DataType::Int16 => {
-                    make_contains_primitive!(
-                        array,
-                        list_values,
-                        self.negated,
-                        Int16,
-                        Int16Array
-                    )
-                }
-                DataType::Int32 => {
-                    make_contains_primitive!(
-                        array,
-                        list_values,
-                        self.negated,
-                        Int32,
-                        Int32Array
-                    )
-                }
-                DataType::Int64 => {
-                    make_contains_primitive!(
-                        array,
-                        list_values,
-                        self.negated,
-                        Int64,
-                        Int64Array
-                    )
-                }
-                DataType::Int8 => {
-                    make_contains_primitive!(
-                        array,
-                        list_values,
-                        self.negated,
-                        Int8,
-                        Int8Array
-                    )
-                }
-                DataType::UInt16 => {
-                    make_contains_primitive!(
-                        array,
-                        list_values,
-                        self.negated,
-                        UInt16,
-                        UInt16Array
-                    )
-                }
-                DataType::UInt32 => {
-                    make_contains_primitive!(
-                        array,
-                        list_values,
-                        self.negated,
-                        UInt32,
-                        UInt32Array
-                    )
-                }
-                DataType::UInt64 => {
-                    make_contains_primitive!(
-                        array,
-                        list_values,
-                        self.negated,
-                        UInt64,
-                        UInt64Array
-                    )
-                }
-                DataType::UInt8 => {
-                    make_contains_primitive!(
-                        array,
-                        list_values,
-                        self.negated,
-                        UInt8,
-                        UInt8Array
-                    )
-                }
-                DataType::Date32 => {
-                    make_contains_primitive!(
-                        array,
-                        list_values,
-                        self.negated,
-                        Date32,
-                        Date32Array
-                    )
-                }
-                DataType::Date64 => {
-                    make_contains_primitive!(
-                        array,
-                        list_values,
-                        self.negated,
-                        Date64,
-                        Date64Array
-                    )
-                }
-                DataType::Boolean => Ok(make_contains!(
-                    array,
-                    list_values,
-                    self.negated,
-                    Boolean,
-                    BooleanArray
-                )),
-                DataType::Utf8 => {
-                    self.compare_utf8::<i32>(array, list_values, self.negated)
-                }
-                DataType::LargeUtf8 => {
-                    self.compare_utf8::<i64>(array, list_values, self.negated)
-                }
-                DataType::Binary => {
-                    self.compare_binary::<i32>(array, list_values, 
self.negated)
-                }
-                DataType::LargeBinary => {
-                    self.compare_binary::<i64>(array, list_values, 
self.negated)
-                }
-                DataType::Null => {
-                    let null_array = new_null_array(&DataType::Boolean, 
array.len());
-                    Ok(ColumnarValue::Array(Arc::new(null_array)))
-                }
-                DataType::Decimal128(_, _) => {
-                    let decimal_array =
-                        
array.as_any().downcast_ref::<Decimal128Array>().unwrap();
-                    Ok(make_list_contains_decimal(
-                        decimal_array,
-                        list_values,
-                        self.negated,
-                    ))
-                }
-                DataType::Timestamp(unit, _) => match unit {
-                    TimeUnit::Second => {
-                        make_contains_primitive!(
-                            array,
-                            list_values,
-                            self.negated,
-                            TimestampSecond,
-                            TimestampSecondArray
-                        )
-                    }
-                    TimeUnit::Millisecond => {
-                        make_contains_primitive!(
-                            array,
-                            list_values,
-                            self.negated,
-                            TimestampMillisecond,
-                            TimestampMillisecondArray
-                        )
-                    }
-                    TimeUnit::Microsecond => {
-                        make_contains_primitive!(
-                            array,
-                            list_values,
-                            self.negated,
-                            TimestampMicrosecond,
-                            TimestampMicrosecondArray
-                        )
-                    }
-                    TimeUnit::Nanosecond => {
-                        make_contains_primitive!(
-                            array,
-                            list_values,
-                            self.negated,
-                            TimestampNanosecond,
-                            TimestampNanosecondArray
-                        )
+                DataType::Dictionary(_key_type, value_type) => {
+                    // Get values from the dictionary that include nulls for 
none values
+                    let dict_array = array
+                        .as_any()
+                        .downcast_ref::<DictionaryArray<Int32Type>>()

Review Comment:
   I will make this general after the null data work. I tried to move this 
inside a function with trait to make if `<DictionaryArray<K>>`.  It works for 
the `downcast_ref` but it does not work with `take`. I need to write more code 
for all cases if we use `take`



##########
datafusion/physical-expr/src/expressions/in_list.rs:
##########
@@ -714,188 +911,31 @@ impl PhysicalExpr for InListExpr {
             };
 
             match value_data_type {
-                DataType::Float32 => {
-                    make_contains_primitive!(
-                        array,
-                        list_values,
-                        self.negated,
-                        Float32,
-                        Float32Array
-                    )
-                }
-                DataType::Float64 => {
-                    make_contains_primitive!(
-                        array,
-                        list_values,
-                        self.negated,
-                        Float64,
-                        Float64Array
-                    )
-                }
-                DataType::Int16 => {
-                    make_contains_primitive!(
-                        array,
-                        list_values,
-                        self.negated,
-                        Int16,
-                        Int16Array
-                    )
-                }
-                DataType::Int32 => {
-                    make_contains_primitive!(
-                        array,
-                        list_values,
-                        self.negated,
-                        Int32,
-                        Int32Array
-                    )
-                }
-                DataType::Int64 => {
-                    make_contains_primitive!(
-                        array,
-                        list_values,
-                        self.negated,
-                        Int64,
-                        Int64Array
-                    )
-                }
-                DataType::Int8 => {
-                    make_contains_primitive!(
-                        array,
-                        list_values,
-                        self.negated,
-                        Int8,
-                        Int8Array
-                    )
-                }
-                DataType::UInt16 => {
-                    make_contains_primitive!(
-                        array,
-                        list_values,
-                        self.negated,
-                        UInt16,
-                        UInt16Array
-                    )
-                }
-                DataType::UInt32 => {
-                    make_contains_primitive!(
-                        array,
-                        list_values,
-                        self.negated,
-                        UInt32,
-                        UInt32Array
-                    )
-                }
-                DataType::UInt64 => {
-                    make_contains_primitive!(
-                        array,
-                        list_values,
-                        self.negated,
-                        UInt64,
-                        UInt64Array
-                    )
-                }
-                DataType::UInt8 => {
-                    make_contains_primitive!(
-                        array,
-                        list_values,
-                        self.negated,
-                        UInt8,
-                        UInt8Array
-                    )
-                }
-                DataType::Date32 => {
-                    make_contains_primitive!(
-                        array,
-                        list_values,
-                        self.negated,
-                        Date32,
-                        Date32Array
-                    )
-                }
-                DataType::Date64 => {
-                    make_contains_primitive!(
-                        array,
-                        list_values,
-                        self.negated,
-                        Date64,
-                        Date64Array
-                    )
-                }
-                DataType::Boolean => Ok(make_contains!(
-                    array,
-                    list_values,
-                    self.negated,
-                    Boolean,
-                    BooleanArray
-                )),
-                DataType::Utf8 => {
-                    self.compare_utf8::<i32>(array, list_values, self.negated)
-                }
-                DataType::LargeUtf8 => {
-                    self.compare_utf8::<i64>(array, list_values, self.negated)
-                }
-                DataType::Binary => {
-                    self.compare_binary::<i32>(array, list_values, 
self.negated)
-                }
-                DataType::LargeBinary => {
-                    self.compare_binary::<i64>(array, list_values, 
self.negated)
-                }
-                DataType::Null => {
-                    let null_array = new_null_array(&DataType::Boolean, 
array.len());
-                    Ok(ColumnarValue::Array(Arc::new(null_array)))
-                }
-                DataType::Decimal128(_, _) => {
-                    let decimal_array =
-                        
array.as_any().downcast_ref::<Decimal128Array>().unwrap();
-                    Ok(make_list_contains_decimal(
-                        decimal_array,
-                        list_values,
-                        self.negated,
-                    ))
-                }
-                DataType::Timestamp(unit, _) => match unit {
-                    TimeUnit::Second => {
-                        make_contains_primitive!(
-                            array,
-                            list_values,
-                            self.negated,
-                            TimestampSecond,
-                            TimestampSecondArray
-                        )
-                    }
-                    TimeUnit::Millisecond => {
-                        make_contains_primitive!(
-                            array,
-                            list_values,
-                            self.negated,
-                            TimestampMillisecond,
-                            TimestampMillisecondArray
-                        )
-                    }
-                    TimeUnit::Microsecond => {
-                        make_contains_primitive!(
-                            array,
-                            list_values,
-                            self.negated,
-                            TimestampMicrosecond,
-                            TimestampMicrosecondArray
-                        )
-                    }
-                    TimeUnit::Nanosecond => {
-                        make_contains_primitive!(
-                            array,
-                            list_values,
-                            self.negated,
-                            TimestampNanosecond,
-                            TimestampNanosecondArray
-                        )
+                DataType::Dictionary(_key_type, value_type) => {
+                    // Get values from the dictionary that include nulls for 
none values
+                    let dict_array = array

Review Comment:
   @alamb and @tustvold 
   I did what as above but it only works well if there are no nulls in the 
data. I may miss something or there will be a lot more work to make it work 
this way.
   
   FYI `// 1` is the new implementation per your suggestion. `// 2` is the 
expensive one I temporarily keep to run some tests



##########
datafusion/core/tests/sql/predicates.rs:
##########
@@ -450,7 +471,53 @@ async fn in_set_string_dictionaries() -> Result<()> {
         "| fazzz |",
         "+-------+",
     ];
+    assert_batches_eq!(expected, &actual);
+    Ok(())
+}
+
+#[tokio::test]
+async fn in_list_string_dictionaries_with_null() -> Result<()> {

Review Comment:
   This test do not pass with method `// 1`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to