NGA-TRAN commented on code in PR #3975:
URL: https://github.com/apache/arrow-datafusion/pull/3975#discussion_r1006421585
##########
datafusion/physical-expr/src/expressions/in_list.rs:
##########
@@ -714,188 +911,31 @@ impl PhysicalExpr for InListExpr {
};
match value_data_type {
- DataType::Float32 => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- Float32,
- Float32Array
- )
- }
- DataType::Float64 => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- Float64,
- Float64Array
- )
- }
- DataType::Int16 => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- Int16,
- Int16Array
- )
- }
- DataType::Int32 => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- Int32,
- Int32Array
- )
- }
- DataType::Int64 => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- Int64,
- Int64Array
- )
- }
- DataType::Int8 => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- Int8,
- Int8Array
- )
- }
- DataType::UInt16 => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- UInt16,
- UInt16Array
- )
- }
- DataType::UInt32 => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- UInt32,
- UInt32Array
- )
- }
- DataType::UInt64 => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- UInt64,
- UInt64Array
- )
- }
- DataType::UInt8 => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- UInt8,
- UInt8Array
- )
- }
- DataType::Date32 => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- Date32,
- Date32Array
- )
- }
- DataType::Date64 => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- Date64,
- Date64Array
- )
- }
- DataType::Boolean => Ok(make_contains!(
- array,
- list_values,
- self.negated,
- Boolean,
- BooleanArray
- )),
- DataType::Utf8 => {
- self.compare_utf8::<i32>(array, list_values, self.negated)
- }
- DataType::LargeUtf8 => {
- self.compare_utf8::<i64>(array, list_values, self.negated)
- }
- DataType::Binary => {
- self.compare_binary::<i32>(array, list_values,
self.negated)
- }
- DataType::LargeBinary => {
- self.compare_binary::<i64>(array, list_values,
self.negated)
- }
- DataType::Null => {
- let null_array = new_null_array(&DataType::Boolean,
array.len());
- Ok(ColumnarValue::Array(Arc::new(null_array)))
- }
- DataType::Decimal128(_, _) => {
- let decimal_array =
-
array.as_any().downcast_ref::<Decimal128Array>().unwrap();
- Ok(make_list_contains_decimal(
- decimal_array,
- list_values,
- self.negated,
- ))
- }
- DataType::Timestamp(unit, _) => match unit {
- TimeUnit::Second => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- TimestampSecond,
- TimestampSecondArray
- )
- }
- TimeUnit::Millisecond => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- TimestampMillisecond,
- TimestampMillisecondArray
- )
- }
- TimeUnit::Microsecond => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- TimestampMicrosecond,
- TimestampMicrosecondArray
- )
- }
- TimeUnit::Nanosecond => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- TimestampNanosecond,
- TimestampNanosecondArray
- )
+ DataType::Dictionary(_key_type, value_type) => {
+ // Get values from the dictionary that include nulls for
none values
+ let dict_array = array
+ .as_any()
+ .downcast_ref::<DictionaryArray<Int32Type>>()
Review Comment:
I will make this general after the null data work. I tried to move this
inside a function with trait to make if `<DictionaryArray<K>>`. It works for
the `downcast_ref` but it does not work with `take`. I need to write more code
for all cases if we use `take`
##########
datafusion/physical-expr/src/expressions/in_list.rs:
##########
@@ -714,188 +911,31 @@ impl PhysicalExpr for InListExpr {
};
match value_data_type {
- DataType::Float32 => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- Float32,
- Float32Array
- )
- }
- DataType::Float64 => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- Float64,
- Float64Array
- )
- }
- DataType::Int16 => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- Int16,
- Int16Array
- )
- }
- DataType::Int32 => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- Int32,
- Int32Array
- )
- }
- DataType::Int64 => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- Int64,
- Int64Array
- )
- }
- DataType::Int8 => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- Int8,
- Int8Array
- )
- }
- DataType::UInt16 => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- UInt16,
- UInt16Array
- )
- }
- DataType::UInt32 => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- UInt32,
- UInt32Array
- )
- }
- DataType::UInt64 => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- UInt64,
- UInt64Array
- )
- }
- DataType::UInt8 => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- UInt8,
- UInt8Array
- )
- }
- DataType::Date32 => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- Date32,
- Date32Array
- )
- }
- DataType::Date64 => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- Date64,
- Date64Array
- )
- }
- DataType::Boolean => Ok(make_contains!(
- array,
- list_values,
- self.negated,
- Boolean,
- BooleanArray
- )),
- DataType::Utf8 => {
- self.compare_utf8::<i32>(array, list_values, self.negated)
- }
- DataType::LargeUtf8 => {
- self.compare_utf8::<i64>(array, list_values, self.negated)
- }
- DataType::Binary => {
- self.compare_binary::<i32>(array, list_values,
self.negated)
- }
- DataType::LargeBinary => {
- self.compare_binary::<i64>(array, list_values,
self.negated)
- }
- DataType::Null => {
- let null_array = new_null_array(&DataType::Boolean,
array.len());
- Ok(ColumnarValue::Array(Arc::new(null_array)))
- }
- DataType::Decimal128(_, _) => {
- let decimal_array =
-
array.as_any().downcast_ref::<Decimal128Array>().unwrap();
- Ok(make_list_contains_decimal(
- decimal_array,
- list_values,
- self.negated,
- ))
- }
- DataType::Timestamp(unit, _) => match unit {
- TimeUnit::Second => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- TimestampSecond,
- TimestampSecondArray
- )
- }
- TimeUnit::Millisecond => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- TimestampMillisecond,
- TimestampMillisecondArray
- )
- }
- TimeUnit::Microsecond => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- TimestampMicrosecond,
- TimestampMicrosecondArray
- )
- }
- TimeUnit::Nanosecond => {
- make_contains_primitive!(
- array,
- list_values,
- self.negated,
- TimestampNanosecond,
- TimestampNanosecondArray
- )
+ DataType::Dictionary(_key_type, value_type) => {
+ // Get values from the dictionary that include nulls for
none values
+ let dict_array = array
Review Comment:
@alamb and @tustvold
I did what as above but it only works well if there are no nulls in the
data. I may miss something or there will be a lot more work to make it work
this way.
FYI `// 1` is the new implementation per your suggestion. `// 2` is the
expensive one I temporarily keep to run some tests
##########
datafusion/core/tests/sql/predicates.rs:
##########
@@ -450,7 +471,53 @@ async fn in_set_string_dictionaries() -> Result<()> {
"| fazzz |",
"+-------+",
];
+ assert_batches_eq!(expected, &actual);
+ Ok(())
+}
+
+#[tokio::test]
+async fn in_list_string_dictionaries_with_null() -> Result<()> {
Review Comment:
This test do not pass with method `// 1`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]