ddupg opened a new issue, #17681:
URL: https://github.com/apache/datafusion/issues/17681
### Describe the bug
When using an `IN` clause containing NULL (e.g., `col IN (1, NULL)`),
`ExprSimplifier` appears to return incorrect results.
### To Reproduce
Reproduce test:
```rust
mod tests {
use std::sync::Arc;
use arrow_schema::{DataType, Field, SchemaRef};
use datafusion_common::{Column, DFSchema, ScalarValue};
use datafusion_expr::{BinaryExpr, Cast, Expr, Operator};
use datafusion_expr::execution_props::ExecutionProps;
use datafusion_expr::simplify::SimplifyContext;
use datafusion_optimizer::simplify_expressions::ExprSimplifier;
pub fn optimize_expr(schema: SchemaRef, expr: Expr) ->
datafusion_common::Result<Expr> {
let df_schema =
Arc::new(DFSchema::try_from(schema.as_ref().clone())?);
// DataFusion needs the simplify and coerce passes to be applied
before
// expressions can be handled by the physical planner.
let props = ExecutionProps::default();
let simplify_context =
SimplifyContext::new(&props).with_schema(df_schema.clone());
let simplifier = ExprSimplifier::new(simplify_context);
let expr = simplifier.simplify(expr)?;
let expr = simplifier.coerce(expr, &df_schema)?;
Ok(expr)
}
#[test]
fn test() {
let schema = Arc::new(arrow_schema::Schema::new(vec![
Field::new("x", DataType::Int32, true),
]));
// x IN (1, NULL)
let expr = Expr::BinaryExpr(BinaryExpr::new(
Box::new(Expr::BinaryExpr(BinaryExpr::new(
Box::new(Expr::Column(Column::new(None::<String>, "x"))),
Operator::Eq,
Box::new(Expr::Literal(ScalarValue::Int32(Some(1)), None)),
))),
Operator::Or,
Box::new(Expr::BinaryExpr(BinaryExpr::new(
Box::new(Expr::Column(Column::new(None::<String>, "x"))),
Operator::Eq,
Box::new(Expr::Cast(Cast::new(Box::new(Expr::Literal(ScalarValue::Null, None)),
DataType::Int32))),
))),
));
println!("expr: {:#?}", expr);
let logical_expr = optimize_expr(schema.clone(), expr).unwrap();
println!("logical_expr: {:#?}", logical_expr);
}
}
```
the output:
```
expr: BinaryExpr(
BinaryExpr {
left: BinaryExpr(
BinaryExpr {
left: Column(
Column {
relation: None,
name: "x",
},
),
op: Eq,
right: Literal(
Int32(1),
None,
),
},
),
op: Or,
right: BinaryExpr(
BinaryExpr {
left: Column(
Column {
relation: None,
name: "x",
},
),
op: Eq,
right: Cast(
Cast {
expr: Literal(
NULL,
None,
),
data_type: Int32,
},
),
},
),
},
)
logical_expr: BinaryExpr(
BinaryExpr {
left: BinaryExpr(
BinaryExpr {
left: Column(
Column {
relation: None,
name: "x",
},
),
op: Eq,
right: Literal(
Int32(1),
None,
),
},
),
op: Or,
right: Literal(
Boolean(NULL),
None,
),
},
)
```
the output of datafusion 49.0.2 is:
```
logical_expr: BinaryExpr(
BinaryExpr {
left: BinaryExpr(
BinaryExpr {
left: Column(
Column {
relation: None,
name: "x",
},
),
op: Eq,
right: Literal(
Int32(1),
None,
),
},
),
op: Or,
right: BinaryExpr(
BinaryExpr {
left: Column(
Column {
relation: None,
name: "x",
},
),
op: Eq,
right: Literal(
Int32(NULL),
None,
),
},
),
},
)
```
### Expected behavior
_No response_
### Additional context
_No response_
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]