alamb commented on code in PR #18832:
URL: https://github.com/apache/datafusion/pull/18832#discussion_r2552997998
##########
datafusion/physical-expr/src/expressions/in_list.rs:
##########
@@ -1028,6 +1104,612 @@ mod tests {
Ok(())
}
+ #[test]
+ fn in_list_int8() -> Result<()> {
Review Comment:
Can we please reduce the duplication in tests here? It seems like we there
are like 16 copies of the same test
Reducing the duplication will make it easier to understand what is being
covered
##########
datafusion/physical-expr/src/expressions/in_list.rs:
##########
@@ -198,68 +211,127 @@ impl ArrayStaticFilter {
}
}
-struct Int32StaticFilter {
- null_count: usize,
- values: HashSet<i32>,
-}
+// Macro to generate specialized StaticFilter implementations for primitive
types
+macro_rules! primitive_static_filter {
+ ($Name:ident, $ArrowType:ty) => {
+ struct $Name {
+ null_count: usize,
+ values: HashSet<<$ArrowType as ArrowPrimitiveType>::Native>,
+ }
-impl Int32StaticFilter {
- fn try_new(in_array: &ArrayRef) -> Result<Self> {
- let in_array = in_array
- .as_primitive_opt::<Int32Type>()
- .ok_or_else(|| exec_datafusion_err!("Failed to downcast array"))?;
+ impl $Name {
+ fn try_new(in_array: &ArrayRef) -> Result<Self> {
+ let in_array = in_array
+ .as_primitive_opt::<$ArrowType>()
+ .ok_or_else(|| exec_datafusion_err!("Failed to downcast an
array to a '{}' array", stringify!($ArrowType)))?;
- let mut values = HashSet::with_capacity(in_array.len());
- let null_count = in_array.null_count();
+ let mut values = HashSet::with_capacity(in_array.len());
+ let null_count = in_array.null_count();
+
+ for v in in_array.iter().flatten() {
+ values.insert(v);
+ }
- for v in in_array.iter().flatten() {
- values.insert(v);
+ Ok(Self { null_count, values })
+ }
}
- Ok(Self { null_count, values })
- }
-}
+ impl StaticFilter for $Name {
+ fn null_count(&self) -> usize {
+ self.null_count
+ }
-impl StaticFilter for Int32StaticFilter {
- fn null_count(&self) -> usize {
- self.null_count
- }
+ fn contains(&self, v: &dyn Array, negated: bool) ->
Result<BooleanArray> {
+ // Handle dictionary arrays by recursing on the values
+ downcast_dictionary_array! {
Review Comment:
I didn't see any tests for dictionaries 🤔
##########
datafusion/physical-expr/src/expressions/in_list.rs:
##########
@@ -198,68 +211,127 @@ impl ArrayStaticFilter {
}
}
-struct Int32StaticFilter {
- null_count: usize,
- values: HashSet<i32>,
-}
+// Macro to generate specialized StaticFilter implementations for primitive
types
+macro_rules! primitive_static_filter {
+ ($Name:ident, $ArrowType:ty) => {
+ struct $Name {
+ null_count: usize,
+ values: HashSet<<$ArrowType as ArrowPrimitiveType>::Native>,
+ }
-impl Int32StaticFilter {
- fn try_new(in_array: &ArrayRef) -> Result<Self> {
- let in_array = in_array
- .as_primitive_opt::<Int32Type>()
- .ok_or_else(|| exec_datafusion_err!("Failed to downcast array"))?;
+ impl $Name {
+ fn try_new(in_array: &ArrayRef) -> Result<Self> {
+ let in_array = in_array
+ .as_primitive_opt::<$ArrowType>()
+ .ok_or_else(|| exec_datafusion_err!("Failed to downcast an
array to a '{}' array", stringify!($ArrowType)))?;
- let mut values = HashSet::with_capacity(in_array.len());
- let null_count = in_array.null_count();
+ let mut values = HashSet::with_capacity(in_array.len());
+ let null_count = in_array.null_count();
+
+ for v in in_array.iter().flatten() {
+ values.insert(v);
+ }
- for v in in_array.iter().flatten() {
- values.insert(v);
+ Ok(Self { null_count, values })
+ }
}
- Ok(Self { null_count, values })
- }
-}
+ impl StaticFilter for $Name {
+ fn null_count(&self) -> usize {
+ self.null_count
+ }
-impl StaticFilter for Int32StaticFilter {
- fn null_count(&self) -> usize {
- self.null_count
- }
+ fn contains(&self, v: &dyn Array, negated: bool) ->
Result<BooleanArray> {
+ // Handle dictionary arrays by recursing on the values
+ downcast_dictionary_array! {
+ v => {
+ let values_contains =
self.contains(v.values().as_ref(), negated)?;
+ let result = take(&values_contains, v.keys(), None)?;
+ return Ok(downcast_array(result.as_ref()))
+ }
+ _ => {}
+ }
- fn contains(&self, v: &dyn Array, negated: bool) -> Result<BooleanArray> {
- let v = v
- .as_primitive_opt::<Int32Type>()
- .ok_or_else(|| exec_datafusion_err!("Failed to downcast array"))?;
-
- let result = match (v.null_count() > 0, negated) {
- (true, false) => {
- // has nulls, not negated"
- BooleanArray::from_iter(
- v.iter().map(|value| Some(self.values.contains(&value?))),
- )
- }
- (true, true) => {
- // has nulls, negated
- BooleanArray::from_iter(
- v.iter().map(|value| Some(!self.values.contains(&value?))),
- )
- }
- (false, false) => {
- //no null, not negated
- BooleanArray::from_iter(
- v.values().iter().map(|value| self.values.contains(value)),
- )
- }
- (false, true) => {
- // no null, negated
- BooleanArray::from_iter(
- v.values().iter().map(|value|
!self.values.contains(value)),
- )
+ let v = v
+ .as_primitive_opt::<$ArrowType>()
+ .ok_or_else(|| exec_datafusion_err!("Failed to downcast an
array to a '{}' array", stringify!($ArrowType)))?;
+
+ let haystack_has_nulls = self.null_count > 0;
+
+ let result = match (v.null_count() > 0, haystack_has_nulls,
negated) {
+ (true, _, false) | (false, true, false) => {
+ // Either needle or haystack has nulls, not negated
+ BooleanArray::from_iter(v.iter().map(|value| {
+ match value {
+ // SQL three-valued logic: null IN (...) is
always null
+ None => None,
+ Some(v) => {
+ if self.values.contains(&v) {
+ Some(true)
+ } else if haystack_has_nulls {
+ // value not in set, but set has nulls
-> null
+ None
+ } else {
+ Some(false)
+ }
+ }
+ }
+ }))
+ }
+ (true, _, true) | (false, true, true) => {
+ // Either needle or haystack has nulls, negated
+ BooleanArray::from_iter(v.iter().map(|value| {
+ match value {
+ // SQL three-valued logic: null NOT IN (...)
is always null
+ None => None,
+ Some(v) => {
+ if self.values.contains(&v) {
+ Some(false)
+ } else if haystack_has_nulls {
+ // value not in set, but set has nulls
-> null
+ None
+ } else {
+ Some(true)
+ }
+ }
+ }
+ }))
+ }
+ (false, false, false) => {
+ // no nulls anywhere, not negated
+ let values = v.values();
+ let mut builder =
BooleanBufferBuilder::new(values.len());
+ for value in values.iter() {
+ builder.append(self.values.contains(value));
+ }
+ BooleanArray::new(builder.finish(), None)
+ }
+ (false, false, true) => {
+ let values = v.values();
Review Comment:
This code appears to be uncovered by tests. I tested using
```rust
cargo llvm-cov test --html -p datafusion-physical-expr --lib -- in_lis
```
<img width="965" height="335" alt="Image"
src="https://github.com/user-attachments/assets/3e8694cb-e727-47d7-929b-8f7703dcfe06"
/>
Here is the whole report in case that is useful
[llvm-cov.zip](https://github.com/user-attachments/files/23688528/llvm-cov.zip)
##########
datafusion/physical-expr/src/expressions/in_list.rs:
##########
@@ -1028,6 +1104,612 @@ mod tests {
Ok(())
}
+ #[test]
+ fn in_list_int8() -> Result<()> {
+ let schema = Schema::new(vec![Field::new("a", DataType::Int8, true)]);
+ let a = Int8Array::from(vec![Some(0), Some(2), None]);
+ let col_a = col("a", &schema)?;
+ let batch = RecordBatch::try_new(Arc::new(schema.clone()),
vec![Arc::new(a)])?;
+
+ // expression: "a in (0, 1)"
+ let list = vec![lit(0i8), lit(1i8)];
+ in_list!(
+ batch,
+ list,
+ &false,
+ vec![Some(true), Some(false), None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ // expression: "a not in (0, 1)"
+ let list = vec![lit(0i8), lit(1i8)];
+ in_list!(
+ batch,
+ list,
+ &true,
+ vec![Some(false), Some(true), None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ // expression: "a in (0, 1, NULL)"
+ let list = vec![lit(0i8), lit(1i8), lit(ScalarValue::Null)];
+ in_list!(
+ batch,
+ list,
+ &false,
+ vec![Some(true), None, None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ // expression: "a not in (0, 1, NULL)"
+ let list = vec![lit(0i8), lit(1i8), lit(ScalarValue::Null)];
+ in_list!(
+ batch,
+ list,
+ &true,
+ vec![Some(false), None, None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn in_list_int16() -> Result<()> {
+ let schema = Schema::new(vec![Field::new("a", DataType::Int16, true)]);
+ let a = Int16Array::from(vec![Some(0), Some(2), None]);
+ let col_a = col("a", &schema)?;
+ let batch = RecordBatch::try_new(Arc::new(schema.clone()),
vec![Arc::new(a)])?;
+
+ // expression: "a in (0, 1)"
+ let list = vec![lit(0i16), lit(1i16)];
+ in_list!(
+ batch,
+ list,
+ &false,
+ vec![Some(true), Some(false), None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ // expression: "a not in (0, 1)"
+ let list = vec![lit(0i16), lit(1i16)];
+ in_list!(
+ batch,
+ list,
+ &true,
+ vec![Some(false), Some(true), None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ // expression: "a in (0, 1, NULL)"
+ let list = vec![lit(0i16), lit(1i16), lit(ScalarValue::Null)];
+ in_list!(
+ batch,
+ list,
+ &false,
+ vec![Some(true), None, None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ // expression: "a not in (0, 1, NULL)"
+ let list = vec![lit(0i16), lit(1i16), lit(ScalarValue::Null)];
+ in_list!(
+ batch,
+ list,
+ &true,
+ vec![Some(false), None, None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn in_list_int32() -> Result<()> {
+ let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
+ let a = Int32Array::from(vec![Some(0), Some(2), None]);
+ let col_a = col("a", &schema)?;
+ let batch = RecordBatch::try_new(Arc::new(schema.clone()),
vec![Arc::new(a)])?;
+
+ // expression: "a in (0, 1)"
+ let list = vec![lit(0i32), lit(1i32)];
+ in_list!(
+ batch,
+ list,
+ &false,
+ vec![Some(true), Some(false), None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ // expression: "a not in (0, 1)"
+ let list = vec![lit(0i32), lit(1i32)];
+ in_list!(
+ batch,
+ list,
+ &true,
+ vec![Some(false), Some(true), None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ // expression: "a in (0, 1, NULL)"
+ let list = vec![lit(0i32), lit(1i32), lit(ScalarValue::Null)];
+ in_list!(
+ batch,
+ list,
+ &false,
+ vec![Some(true), None, None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ // expression: "a not in (0, 1, NULL)"
+ let list = vec![lit(0i32), lit(1i32), lit(ScalarValue::Null)];
+ in_list!(
+ batch,
+ list,
+ &true,
+ vec![Some(false), None, None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn in_list_uint8() -> Result<()> {
+ let schema = Schema::new(vec![Field::new("a", DataType::UInt8, true)]);
+ let a = UInt8Array::from(vec![Some(0), Some(2), None]);
+ let col_a = col("a", &schema)?;
+ let batch = RecordBatch::try_new(Arc::new(schema.clone()),
vec![Arc::new(a)])?;
+
+ // expression: "a in (0, 1)"
+ let list = vec![lit(0u8), lit(1u8)];
+ in_list!(
+ batch,
+ list,
+ &false,
+ vec![Some(true), Some(false), None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ // expression: "a not in (0, 1)"
+ let list = vec![lit(0u8), lit(1u8)];
+ in_list!(
+ batch,
+ list,
+ &true,
+ vec![Some(false), Some(true), None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ // expression: "a in (0, 1, NULL)"
+ let list = vec![lit(0u8), lit(1u8), lit(ScalarValue::Null)];
+ in_list!(
+ batch,
+ list,
+ &false,
+ vec![Some(true), None, None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ // expression: "a not in (0, 1, NULL)"
+ let list = vec![lit(0u8), lit(1u8), lit(ScalarValue::Null)];
+ in_list!(
+ batch,
+ list,
+ &true,
+ vec![Some(false), None, None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn in_list_uint16() -> Result<()> {
+ let schema = Schema::new(vec![Field::new("a", DataType::UInt16,
true)]);
+ let a = UInt16Array::from(vec![Some(0), Some(2), None]);
+ let col_a = col("a", &schema)?;
+ let batch = RecordBatch::try_new(Arc::new(schema.clone()),
vec![Arc::new(a)])?;
+
+ // expression: "a in (0, 1)"
+ let list = vec![lit(0u16), lit(1u16)];
+ in_list!(
+ batch,
+ list,
+ &false,
+ vec![Some(true), Some(false), None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ // expression: "a not in (0, 1)"
+ let list = vec![lit(0u16), lit(1u16)];
+ in_list!(
+ batch,
+ list,
+ &true,
+ vec![Some(false), Some(true), None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ // expression: "a in (0, 1, NULL)"
+ let list = vec![lit(0u16), lit(1u16), lit(ScalarValue::Null)];
+ in_list!(
+ batch,
+ list,
+ &false,
+ vec![Some(true), None, None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ // expression: "a not in (0, 1, NULL)"
+ let list = vec![lit(0u16), lit(1u16), lit(ScalarValue::Null)];
+ in_list!(
+ batch,
+ list,
+ &true,
+ vec![Some(false), None, None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn in_list_uint32() -> Result<()> {
+ let schema = Schema::new(vec![Field::new("a", DataType::UInt32,
true)]);
+ let a = UInt32Array::from(vec![Some(0), Some(2), None]);
+ let col_a = col("a", &schema)?;
+ let batch = RecordBatch::try_new(Arc::new(schema.clone()),
vec![Arc::new(a)])?;
+
+ // expression: "a in (0, 1)"
+ let list = vec![lit(0u32), lit(1u32)];
+ in_list!(
+ batch,
+ list,
+ &false,
+ vec![Some(true), Some(false), None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ // expression: "a not in (0, 1)"
+ let list = vec![lit(0u32), lit(1u32)];
+ in_list!(
+ batch,
+ list,
+ &true,
+ vec![Some(false), Some(true), None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ // expression: "a in (0, 1, NULL)"
+ let list = vec![lit(0u32), lit(1u32), lit(ScalarValue::Null)];
+ in_list!(
+ batch,
+ list,
+ &false,
+ vec![Some(true), None, None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ // expression: "a not in (0, 1, NULL)"
+ let list = vec![lit(0u32), lit(1u32), lit(ScalarValue::Null)];
+ in_list!(
+ batch,
+ list,
+ &true,
+ vec![Some(false), None, None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn in_list_uint64() -> Result<()> {
+ let schema = Schema::new(vec![Field::new("a", DataType::UInt64,
true)]);
+ let a = UInt64Array::from(vec![Some(0), Some(2), None]);
+ let col_a = col("a", &schema)?;
+ let batch = RecordBatch::try_new(Arc::new(schema.clone()),
vec![Arc::new(a)])?;
+
+ // expression: "a in (0, 1)"
+ let list = vec![lit(0u64), lit(1u64)];
+ in_list!(
+ batch,
+ list,
+ &false,
+ vec![Some(true), Some(false), None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ // expression: "a not in (0, 1)"
+ let list = vec![lit(0u64), lit(1u64)];
+ in_list!(
+ batch,
+ list,
+ &true,
+ vec![Some(false), Some(true), None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ // expression: "a in (0, 1, NULL)"
+ let list = vec![lit(0u64), lit(1u64), lit(ScalarValue::Null)];
+ in_list!(
+ batch,
+ list,
+ &false,
+ vec![Some(true), None, None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ // expression: "a not in (0, 1, NULL)"
+ let list = vec![lit(0u64), lit(1u64), lit(ScalarValue::Null)];
+ in_list!(
+ batch,
+ list,
+ &true,
+ vec![Some(false), None, None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn in_list_large_utf8() -> Result<()> {
+ let schema = Schema::new(vec![Field::new("a", DataType::LargeUtf8,
true)]);
+ let a = LargeStringArray::from(vec![Some("a"), Some("d"), None]);
+ let col_a = col("a", &schema)?;
+ let batch = RecordBatch::try_new(Arc::new(schema.clone()),
vec![Arc::new(a)])?;
+
+ // expression: "a in ("a", "b")"
+ let list = vec![lit("a"), lit("b")];
+ in_list!(
+ batch,
+ list,
+ &false,
+ vec![Some(true), Some(false), None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ // expression: "a not in ("a", "b")"
+ let list = vec![lit("a"), lit("b")];
+ in_list!(
+ batch,
+ list,
+ &true,
+ vec![Some(false), Some(true), None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ // expression: "a in ("a", "b", null)"
+ let list = vec![lit("a"), lit("b"), lit(ScalarValue::LargeUtf8(None))];
+ in_list!(
+ batch,
+ list,
+ &false,
+ vec![Some(true), None, None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ // expression: "a not in ("a", "b", null)"
+ let list = vec![lit("a"), lit("b"), lit(ScalarValue::LargeUtf8(None))];
+ in_list!(
+ batch,
+ list,
+ &true,
+ vec![Some(false), None, None],
+ Arc::clone(&col_a),
+ &schema
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn in_list_utf8_view() -> Result<()> {
Review Comment:
this PR has tests for utf8 but no changes for those types. Is that your
intention?
##########
datafusion/physical-expr/src/expressions/in_list.rs:
##########
@@ -198,68 +206,122 @@ impl ArrayStaticFilter {
}
}
-struct Int32StaticFilter {
- null_count: usize,
- values: HashSet<i32>,
-}
+// Macro to generate specialized StaticFilter implementations for primitive
types
+macro_rules! primitive_static_filter {
+ ($Name:ident, $ArrowType:ty) => {
+ struct $Name {
+ null_count: usize,
+ values: HashSet<<$ArrowType as ArrowPrimitiveType>::Native>,
+ }
-impl Int32StaticFilter {
- fn try_new(in_array: &ArrayRef) -> Result<Self> {
- let in_array = in_array
- .as_primitive_opt::<Int32Type>()
- .ok_or_else(|| exec_datafusion_err!("Failed to downcast array"))?;
+ impl $Name {
+ fn try_new(in_array: &ArrayRef) -> Result<Self> {
+ let in_array = in_array
+ .as_primitive_opt::<$ArrowType>()
+ .ok_or_else(|| exec_datafusion_err!(format!("Failed to
downcast an array to a '{}' array", stringify!($ArrowType))))?;
- let mut values = HashSet::with_capacity(in_array.len());
- let null_count = in_array.null_count();
+ let mut values = HashSet::with_capacity(in_array.len());
+ let null_count = in_array.null_count();
+
+ for v in in_array.iter().flatten() {
+ values.insert(v);
+ }
- for v in in_array.iter().flatten() {
- values.insert(v);
+ Ok(Self { null_count, values })
+ }
}
- Ok(Self { null_count, values })
- }
-}
+ impl StaticFilter for $Name {
+ fn null_count(&self) -> usize {
+ self.null_count
+ }
-impl StaticFilter for Int32StaticFilter {
- fn null_count(&self) -> usize {
- self.null_count
- }
+ fn contains(&self, v: &dyn Array, negated: bool) ->
Result<BooleanArray> {
+ // Handle dictionary arrays by recursing on the values
+ downcast_dictionary_array! {
+ v => {
+ let values_contains =
self.contains(v.values().as_ref(), negated)?;
+ let result = take(&values_contains, v.keys(), None)?;
+ return Ok(downcast_array(result.as_ref()))
+ }
+ _ => {}
+ }
- fn contains(&self, v: &dyn Array, negated: bool) -> Result<BooleanArray> {
- let v = v
- .as_primitive_opt::<Int32Type>()
- .ok_or_else(|| exec_datafusion_err!("Failed to downcast array"))?;
-
- let result = match (v.null_count() > 0, negated) {
- (true, false) => {
- // has nulls, not negated"
- BooleanArray::from_iter(
- v.iter().map(|value| Some(self.values.contains(&value?))),
- )
- }
- (true, true) => {
- // has nulls, negated
- BooleanArray::from_iter(
- v.iter().map(|value| Some(!self.values.contains(&value?))),
- )
- }
- (false, false) => {
- //no null, not negated
- BooleanArray::from_iter(
- v.values().iter().map(|value| self.values.contains(value)),
- )
- }
- (false, true) => {
- // no null, negated
- BooleanArray::from_iter(
- v.values().iter().map(|value|
!self.values.contains(value)),
- )
+ let v = v
+ .as_primitive_opt::<$ArrowType>()
+ .ok_or_else(|| exec_datafusion_err!(format!("Failed to
downcast an array to a '{}' array", stringify!($ArrowType))))?;
+
+ let haystack_has_nulls = self.null_count > 0;
+
+ let result = match (v.null_count() > 0, haystack_has_nulls,
negated) {
+ (true, _, false) | (false, true, false) => {
+ // Either needle or haystack has nulls, not negated
+ BooleanArray::from_iter(v.iter().map(|value| {
+ match value {
+ // SQL three-valued logic: null IN (...) is
always null
+ None => None,
+ Some(v) => {
+ if self.values.contains(&v) {
+ Some(true)
+ } else if haystack_has_nulls {
+ // value not in set, but set has nulls
-> null
+ None
+ } else {
+ Some(false)
+ }
+ }
+ }
+ }))
+ }
+ (true, _, true) | (false, true, true) => {
+ // Either needle or haystack has nulls, negated
+ BooleanArray::from_iter(v.iter().map(|value| {
+ match value {
+ // SQL three-valued logic: null NOT IN (...)
is always null
+ None => None,
+ Some(v) => {
+ if self.values.contains(&v) {
+ Some(false)
+ } else if haystack_has_nulls {
+ // value not in set, but set has nulls
-> null
+ None
+ } else {
+ Some(true)
+ }
+ }
+ }
+ }))
+ }
+ (false, false, false) => {
+ // no nulls anywhere, not negated
+ BooleanArray::from_iter(
Review Comment:
We have been discussing various improvements:
- https://github.com/apache/arrow-rs/issues/8561
- https://github.com/apache/arrow-rs/issues/8806
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]