alamb commented on code in PR #3636:
URL: https://github.com/apache/arrow-datafusion/pull/3636#discussion_r982803361
##########
datafusion/core/tests/sql/subqueries.rs:
##########
@@ -336,10 +336,10 @@ order by s_name;
Projection: #part.p_partkey AS p_partkey, alias=__sq_1
Filter: #part.p_name LIKE Utf8("forest%")
TableScan: part projection=[p_partkey, p_name],
partial_filters=[#part.p_name LIKE Utf8("forest%")]
- Projection: #lineitem.l_partkey, #lineitem.l_suppkey,
Decimal128(Some(50000000000000000),38,17) * CAST(#SUM(lineitem.l_quantity) AS
Decimal128(38, 17)) AS __value, alias=__sq_3
+ Projection: #lineitem.l_partkey, #lineitem.l_suppkey,
CAST(Float64(0.5) AS Decimal128(38, 17)) * CAST(#SUM(lineitem.l_quantity) AS
Decimal128(38, 17)) AS __value, alias=__sq_3
Aggregate: groupBy=[[#lineitem.l_partkey, #lineitem.l_suppkey]],
aggr=[[SUM(#lineitem.l_quantity)]]
- Filter: #lineitem.l_shipdate >= Date32("8766")
- TableScan: lineitem projection=[l_partkey, l_suppkey,
l_quantity, l_shipdate], partial_filters=[#lineitem.l_shipdate >=
Date32("8766")]"#
+ Filter: #lineitem.l_shipdate >= CAST(Utf8("1994-01-01") AS
Date32)
Review Comment:
likewise, this isn't right because it should have been evaluated to a
constant
##########
datafusion/core/src/execution/context.rs:
##########
@@ -1466,10 +1466,9 @@ impl SessionState {
}
let mut rules: Vec<Arc<dyn OptimizerRule + Sync + Send>> = vec![
- // Simplify expressions first to maximize the chance
- // of applying other optimizations
- Arc::new(SimplifyExpressions::new()),
Arc::new(PreCastLitInComparisonExpressions::new()),
Review Comment:
Now that we have `TypeCoercion` I wonder if we still need
`PreCastLitInComparisonExpressions`
https://github.com/apache/arrow-datafusion/blob/d16457a0ba129b077935078e5cf89d028f598e0b/datafusion/optimizer/src/pre_cast_lit_in_comparison.rs#L31-L50
Appears to be a subset of what TypeCoercion is doing
##########
datafusion/core/tests/sql/subqueries.rs:
##########
@@ -336,10 +336,10 @@ order by s_name;
Projection: #part.p_partkey AS p_partkey, alias=__sq_1
Filter: #part.p_name LIKE Utf8("forest%")
TableScan: part projection=[p_partkey, p_name],
partial_filters=[#part.p_name LIKE Utf8("forest%")]
- Projection: #lineitem.l_partkey, #lineitem.l_suppkey,
Decimal128(Some(50000000000000000),38,17) * CAST(#SUM(lineitem.l_quantity) AS
Decimal128(38, 17)) AS __value, alias=__sq_3
+ Projection: #lineitem.l_partkey, #lineitem.l_suppkey,
CAST(Float64(0.5) AS Decimal128(38, 17)) * CAST(#SUM(lineitem.l_quantity) AS
Decimal128(38, 17)) AS __value, alias=__sq_3
Review Comment:
This seems like a regression somehow -- the `CAST` hasn't been evaluated
into a constant
##########
datafusion/core/src/execution/context.rs:
##########
@@ -1466,10 +1466,9 @@ impl SessionState {
}
let mut rules: Vec<Arc<dyn OptimizerRule + Sync + Send>> = vec![
- // Simplify expressions first to maximize the chance
- // of applying other optimizations
- Arc::new(SimplifyExpressions::new()),
Arc::new(PreCastLitInComparisonExpressions::new()),
Review Comment:
Though perhaps this is something similar to what you have described in
https://github.com/apache/arrow-datafusion/issues/3622
##########
datafusion/optimizer/src/type_coercion.rs:
##########
@@ -50,56 +51,70 @@ impl OptimizerRule for TypeCoercion {
plan: &LogicalPlan,
optimizer_config: &mut OptimizerConfig,
) -> Result<LogicalPlan> {
- // optimize child plans first
- let new_inputs = plan
- .inputs()
- .iter()
- .map(|p| self.optimize(p, optimizer_config))
- .collect::<Result<Vec<_>>>()?;
-
- // get schema representing all available input fields. This is used
for data type
- // resolution only, so order does not matter here
- let schema = new_inputs.iter().map(|input| input.schema()).fold(
- DFSchema::empty(),
- |mut lhs, rhs| {
- lhs.merge(rhs);
- lhs
- },
- );
+ optimize_internal(&DFSchema::empty(), plan, optimizer_config)
+ }
+}
- let mut expr_rewrite = TypeCoercionRewriter {
- schema: Arc::new(schema),
- };
+fn optimize_internal(
+ // use the external schema to handle the correlated subqueries case
+ externel_schema: &DFSchema,
Review Comment:
```suggestion
external_schema: &DFSchema,
```
##########
datafusion/core/tests/sql/explain_analyze.rs:
##########
@@ -792,7 +811,6 @@ async fn csv_explain() {
\n CsvExec:
files=[ARROW_TEST_DATA/csv/aggregate_test_100.csv], has_header=true,
limit=None, projection=[c1, c2]\
\n"
]];
- assert_eq!(expected, actual);
// Also, expect same result with lowercase explain
Review Comment:
This comment seems to be out of date -- the different is not just just
`explain` vs `EXPLAIN` it is also the explicit cast that was added
##########
datafusion/optimizer/src/type_coercion.rs:
##########
@@ -119,6 +134,41 @@ impl ExprRewriter for TypeCoercionRewriter {
fn mutate(&mut self, expr: Expr) -> Result<Expr> {
match expr {
+ Expr::ScalarSubquery(Subquery { subquery }) => {
+ let mut optimizer_config = OptimizerConfig::new();
+ let new_plan =
+ optimize_internal(&self.schema, &subquery, &mut
optimizer_config)?;
+ Ok(Expr::ScalarSubquery(Subquery::new(new_plan)))
+ }
+ Expr::Exists { subquery, negated } => {
+ let mut optimizer_config = OptimizerConfig::new();
+ let new_plan = optimize_internal(
Review Comment:
I don't understand why the outer query's schema needs to be passed while
optimizing the subquery. I would expect that we would recursively optimize the
`subquery`. The subquery doesn't have the same schema as its containing query 🤔
##########
datafusion/optimizer/src/type_coercion.rs:
##########
@@ -119,6 +134,41 @@ impl ExprRewriter for TypeCoercionRewriter {
fn mutate(&mut self, expr: Expr) -> Result<Expr> {
match expr {
+ Expr::ScalarSubquery(Subquery { subquery }) => {
+ let mut optimizer_config = OptimizerConfig::new();
+ let new_plan =
+ optimize_internal(&self.schema, &subquery, &mut
optimizer_config)?;
+ Ok(Expr::ScalarSubquery(Subquery::new(new_plan)))
+ }
+ Expr::Exists { subquery, negated } => {
+ let mut optimizer_config = OptimizerConfig::new();
+ let new_plan = optimize_internal(
Review Comment:
As in I think this should be more like:
```rust
let optimizer = TypeCoercion::new();
let new_plan = optimizer.optimize(&subquery.subquery, &mut optimizer_config)
```
##########
datafusion/optimizer/src/type_coercion.rs:
##########
@@ -119,6 +134,41 @@ impl ExprRewriter for TypeCoercionRewriter {
fn mutate(&mut self, expr: Expr) -> Result<Expr> {
match expr {
+ Expr::ScalarSubquery(Subquery { subquery }) => {
+ let mut optimizer_config = OptimizerConfig::new();
Review Comment:
I think we should pass through the same OptimizerConfig (so it has the same
context needed to evaluate `now()` rather than creating new ones
##########
datafusion/optimizer/tests/integration-test.rs:
##########
@@ -109,10 +109,9 @@ fn test_sql(sql: &str) -> Result<LogicalPlan> {
// TODO should make align with rules in the context
// https://github.com/apache/arrow-datafusion/issues/3524
let rules: Vec<Arc<dyn OptimizerRule + Sync + Send>> = vec![
- // Simplify expressions first to maximize the chance
- // of applying other optimizations
- Arc::new(SimplifyExpressions::new()),
Arc::new(PreCastLitInComparisonExpressions::new()),
+ Arc::new(TypeCoercion::new()),
Review Comment:
👍
##########
datafusion/optimizer/src/type_coercion.rs:
##########
@@ -735,4 +786,27 @@ mod test {
),
}))
}
+
+ #[test]
+ fn test_type_coercion_rewrite() -> Result<()> {
+ let schema = Arc::new(
+ DFSchema::new_with_metadata(
+ vec![DFField::new(None, "a", DataType::Int64, true)],
+ std::collections::HashMap::new(),
+ )
+ .unwrap(),
+ );
+ let mut rewriter = TypeCoercionRewriter::new(schema);
+ let expr = is_true(
+
lit(ScalarValue::Int32(Some(12))).eq(lit(ScalarValue::Int64(Some(13)))),
+ );
Review Comment:
I wonder if you can write these tests like this to make them easier to read?
```suggestion
let expr = is_true(
lit(12i32).eq(lit(12i64),
);
```
##########
datafusion/optimizer/src/type_coercion.rs:
##########
@@ -735,4 +786,27 @@ mod test {
),
}))
}
+
+ #[test]
+ fn test_type_coercion_rewrite() -> Result<()> {
+ let schema = Arc::new(
+ DFSchema::new_with_metadata(
+ vec![DFField::new(None, "a", DataType::Int64, true)],
+ std::collections::HashMap::new(),
+ )
+ .unwrap(),
+ );
+ let mut rewriter = TypeCoercionRewriter::new(schema);
+ let expr = is_true(
+
lit(ScalarValue::Int32(Some(12))).eq(lit(ScalarValue::Int64(Some(13)))),
+ );
+ let expected = is_true(
+ cast(lit(ScalarValue::Int32(Some(12))), DataType::Int64)
+ .eq(lit(ScalarValue::Int64(Some(13)))),
+ );
+ let result = expr.rewrite(&mut rewriter)?;
+ assert_eq!(expected, result);
+ Ok(())
+ // TODO add more test for this
Review Comment:
I agree some more tests would be good
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]