gruuya commented on code in PR #12943: URL: https://github.com/apache/datafusion/pull/12943#discussion_r1820766069
########## datafusion/optimizer/src/push_down_filter.rs: ########## @@ -1615,6 +1659,91 @@ mod tests { assert_optimized_plan_eq(plan, expected) } + #[test] + fn distinct_on() -> Result<()> { Review Comment: I think we also need a "false negative" test here, i.e. a filter on the same subquery as the distinct (so without an `alias`) but not on a distinct column. Something like (not tested): ```rust fn distinct_on_() -> Result<()> { let table_scan = test_table_scan()?; let plan = LogicalPlanBuilder::from(table_scan) .distinct_on(vec![col("a")], vec![col("a"), col("b")], None)? .filter(col("b").eq(lit(1i64)))? .build()?; // filter is on the same subquery as the distinct, so it should be pushed down even though it isn't in the ON clause let expected = "\ DistinctOn: on_expr=[[test.a]], select_expr=[[a, b]], sort_expr=[[]]\ \n TableScan: test, full_filters=[b = Int64(1)]"; assert_optimized_plan_eq(plan, expected) } ``` ########## datafusion/optimizer/src/push_down_filter.rs: ########## @@ -1615,6 +1659,91 @@ mod tests { assert_optimized_plan_eq(plan, expected) } + #[test] + fn distinct_on() -> Result<()> { + let table_scan = test_table_scan()?; + let plan = LogicalPlanBuilder::from(table_scan) + .distinct_on(vec![col("a")], vec![col("a"), col("b")], None)? + .filter(col("a").eq(lit(1i64)))? + .build()?; + // filter is on the same subquery as the distinct, so it should be pushed down + let expected = "\ + DistinctOn: on_expr=[[test.a]], select_expr=[[a, b]], sort_expr=[[]]\ + \n TableScan: test, full_filters=[a = Int64(1)]"; + assert_optimized_plan_eq(plan, expected) + } + + #[test] + fn subquery_distinct_on_filter_on_distinct_column() -> Result<()> { + let table_scan = test_table_scan()?; + let plan = LogicalPlanBuilder::from(table_scan) + .distinct_on(vec![col("a")], vec![col("a"), col("b")], None)? + .alias("test2")? + .filter(col("a").eq(lit(1i64)))? + .build()?; + // filter is on the distinct column, so it can be pushed down + let expected = "\ + SubqueryAlias: test2\ + \n DistinctOn: on_expr=[[test.a]], select_expr=[[a, b]], sort_expr=[[]]\ + \n TableScan: test, full_filters=[a = Int64(1)]"; + assert_optimized_plan_eq(plan, expected) + } + + #[test] + fn subquery_distinct_on_filter_on_volatile_function() -> Result<()> { + let table_scan = test_table_scan()?; + let plan = LogicalPlanBuilder::from(table_scan) + .distinct_on(vec![col("a")], vec![col("a"), col("b")], None)? + .alias("test2")? + .filter(col("a").eq(Expr::ScalarFunction(ScalarFunction::new_udf( + random(), + vec![], + ))))? + .build()?; + // filter is on volatile function, so it should not be pushed down + let expected = "\ + Filter: test2.a = random()\ + \n SubqueryAlias: test2\ + \n DistinctOn: on_expr=[[test.a]], select_expr=[[a, b]], sort_expr=[[]]\ + \n TableScan: test"; + assert_optimized_plan_eq(plan, expected) + } + + #[test] + fn subquery_distinct_on_filter_on_non_volatile_function() -> Result<()> { + let table_scan = test_table_scan()?; + let plan = LogicalPlanBuilder::from(table_scan) + .distinct_on(vec![col("a")], vec![col("a"), col("b")], None)? + .alias("test2")? + .filter(col("a").eq(Expr::ScalarFunction(ScalarFunction::new_udf( + abs(), + vec![col("a")], + ))))? + .build()?; + // filter is on volatile function, so it should not be pushed down Review Comment: ```suggestion // filter is on a non-volatile function, so it should be pushed down ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org