abey79 opened a new issue, #17416:
URL: https://github.com/apache/datafusion/issues/17416
### Describe the bug
The following filter expression generates a `InternalError("type mismatch")`
for a column of type `List[nullable Bool]` (aka inner nullability is `false`):
```
.filter(array_distinct(col(column_name)).eq(make_array(vec![lit(false)])))
```
### To Reproduce
This test fails as per the comments:
```rust
use std::sync::Arc;
use arrow::array::{ArrayRef, BooleanArray, ListArray, RecordBatch};
use arrow::buffer::OffsetBuffer;
use arrow::datatypes::{DataType, Field, Schema};
use datafusion::catalog::MemTable;
use datafusion::error::DataFusionError;
use datafusion::logical_expr::{col, lit};
use datafusion::prelude::{SessionContext, array_distinct, make_array};
async fn run_test(
inner_nullable: bool,
outer_nullable: bool,
) -> Result<Vec<RecordBatch>, DataFusionError> {
// Some data
let column_name = "bool_column";
let values = BooleanArray::from(vec![true, false, true, true, false,
false, true, false]);
let offsets = OffsetBuffer::new(vec![0i32, 1, 2, 4, 6, 8].into());
let bool_lists = ListArray::try_new(
Arc::new(Field::new("item", DataType::Boolean, inner_nullable)),
offsets,
Arc::new(values),
None,
)
.expect("failed to create a bool list array");
let array = Arc::new(bool_lists) as ArrayRef;
let field = Field::new(column_name, array.data_type().clone(),
outer_nullable);
let schema = Arc::new(Schema::new(vec![field]));
let ctx = SessionContext::new();
ctx.register_table(
"test_table",
Arc::new(
MemTable::try_new(
Arc::clone(&schema),
vec![vec![
RecordBatch::try_new_with_options(schema, vec![array],
&Default::default())
.expect("failed to create the record batch"),
]],
)
.expect("failed to create mem table"),
),
)
.expect("failed to register table");
//
let filter_expr =
array_distinct(col(column_name)).eq(make_array(vec![lit(false)]));
// no filtering
ctx.table("test_table")
.await
.unwrap()
.filter(filter_expr)
.unwrap()
.collect()
.await
}
#[tokio::test]
async fn internal_error_repro() {
assert!(run_test(true, true).await.is_ok());
assert!(run_test(true, false).await.is_ok());
assert!(dbg!(run_test(false, true).await).is_ok()); // FAILS:
InternalError("type mismatch")
assert!(dbg!(run_test(false, false).await).is_ok()); // FAILS:
InternalError("type mismatch")
}
```
### Expected behavior
My expectation would be that this filter expression should work regardless
of the nullability.
### Additional context
_No response_
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]