gruuya commented on code in PR #12943:
URL: https://github.com/apache/datafusion/pull/12943#discussion_r1820766069
##########
datafusion/optimizer/src/push_down_filter.rs:
##########
@@ -1615,6 +1659,91 @@ mod tests {
assert_optimized_plan_eq(plan, expected)
}
+ #[test]
+ fn distinct_on() -> Result<()> {
Review Comment:
I think we also need a "false negative" test here, i.e. a filter on the same
subquery as the distinct (so without an `alias`) but not on a distinct column.
Something like (not tested):
```rust
fn distinct_on_() -> Result<()> {
let table_scan = test_table_scan()?;
let plan = LogicalPlanBuilder::from(table_scan)
.distinct_on(vec![col("a")], vec![col("a"), col("b")], None)?
.filter(col("b").eq(lit(1i64)))?
.build()?;
// filter is on the same subquery as the distinct, so it should be
pushed down even though it isn't in the ON clause
let expected = "\
DistinctOn: on_expr=[[test.a]], select_expr=[[a, b]], sort_expr=[[]]\
\n TableScan: test, full_filters=[b = Int64(1)]";
assert_optimized_plan_eq(plan, expected)
}
```
##########
datafusion/optimizer/src/push_down_filter.rs:
##########
@@ -1615,6 +1659,91 @@ mod tests {
assert_optimized_plan_eq(plan, expected)
}
+ #[test]
+ fn distinct_on() -> Result<()> {
+ let table_scan = test_table_scan()?;
+ let plan = LogicalPlanBuilder::from(table_scan)
+ .distinct_on(vec![col("a")], vec![col("a"), col("b")], None)?
+ .filter(col("a").eq(lit(1i64)))?
+ .build()?;
+ // filter is on the same subquery as the distinct, so it should be
pushed down
+ let expected = "\
+ DistinctOn: on_expr=[[test.a]], select_expr=[[a, b]], sort_expr=[[]]\
+ \n TableScan: test, full_filters=[a = Int64(1)]";
+ assert_optimized_plan_eq(plan, expected)
+ }
+
+ #[test]
+ fn subquery_distinct_on_filter_on_distinct_column() -> Result<()> {
+ let table_scan = test_table_scan()?;
+ let plan = LogicalPlanBuilder::from(table_scan)
+ .distinct_on(vec![col("a")], vec![col("a"), col("b")], None)?
+ .alias("test2")?
+ .filter(col("a").eq(lit(1i64)))?
+ .build()?;
+ // filter is on the distinct column, so it can be pushed down
+ let expected = "\
+ SubqueryAlias: test2\
+ \n DistinctOn: on_expr=[[test.a]], select_expr=[[a, b]],
sort_expr=[[]]\
+ \n TableScan: test, full_filters=[a = Int64(1)]";
+ assert_optimized_plan_eq(plan, expected)
+ }
+
+ #[test]
+ fn subquery_distinct_on_filter_on_volatile_function() -> Result<()> {
+ let table_scan = test_table_scan()?;
+ let plan = LogicalPlanBuilder::from(table_scan)
+ .distinct_on(vec![col("a")], vec![col("a"), col("b")], None)?
+ .alias("test2")?
+ .filter(col("a").eq(Expr::ScalarFunction(ScalarFunction::new_udf(
+ random(),
+ vec![],
+ ))))?
+ .build()?;
+ // filter is on volatile function, so it should not be pushed down
+ let expected = "\
+ Filter: test2.a = random()\
+ \n SubqueryAlias: test2\
+ \n DistinctOn: on_expr=[[test.a]], select_expr=[[a, b]],
sort_expr=[[]]\
+ \n TableScan: test";
+ assert_optimized_plan_eq(plan, expected)
+ }
+
+ #[test]
+ fn subquery_distinct_on_filter_on_non_volatile_function() -> Result<()> {
+ let table_scan = test_table_scan()?;
+ let plan = LogicalPlanBuilder::from(table_scan)
+ .distinct_on(vec![col("a")], vec![col("a"), col("b")], None)?
+ .alias("test2")?
+ .filter(col("a").eq(Expr::ScalarFunction(ScalarFunction::new_udf(
+ abs(),
+ vec![col("a")],
+ ))))?
+ .build()?;
+ // filter is on volatile function, so it should not be pushed down
Review Comment:
```suggestion
// filter is on a non-volatile function, so it should be pushed down
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]