gruuya commented on code in PR #12943:
URL: https://github.com/apache/datafusion/pull/12943#discussion_r1820766069


##########
datafusion/optimizer/src/push_down_filter.rs:
##########
@@ -1615,6 +1659,91 @@ mod tests {
         assert_optimized_plan_eq(plan, expected)
     }
 
+    #[test]
+    fn distinct_on() -> Result<()> {

Review Comment:
   I think we also need a "false negative" test here, i.e. a filter on the same 
subquery as the distinct (so without an `alias`) but not on a distinct column.
   
   Something like (not tested):
   ```rust
       fn distinct_on_() -> Result<()> {
           let table_scan = test_table_scan()?;
           let plan = LogicalPlanBuilder::from(table_scan)
               .distinct_on(vec![col("a")], vec![col("a"), col("b")], None)?
               .filter(col("b").eq(lit(1i64)))?
               .build()?;
           // filter is on the same subquery as the distinct, so it should be 
pushed down even though it isn't in the ON clause
           let expected = "\
           DistinctOn: on_expr=[[test.a]], select_expr=[[a, b]], sort_expr=[[]]\
           \n  TableScan: test, full_filters=[b = Int64(1)]";
           assert_optimized_plan_eq(plan, expected)
       }
   ```



##########
datafusion/optimizer/src/push_down_filter.rs:
##########
@@ -1615,6 +1659,91 @@ mod tests {
         assert_optimized_plan_eq(plan, expected)
     }
 
+    #[test]
+    fn distinct_on() -> Result<()> {
+        let table_scan = test_table_scan()?;
+        let plan = LogicalPlanBuilder::from(table_scan)
+            .distinct_on(vec![col("a")], vec![col("a"), col("b")], None)?
+            .filter(col("a").eq(lit(1i64)))?
+            .build()?;
+        // filter is on the same subquery as the distinct, so it should be 
pushed down
+        let expected = "\
+        DistinctOn: on_expr=[[test.a]], select_expr=[[a, b]], sort_expr=[[]]\
+        \n  TableScan: test, full_filters=[a = Int64(1)]";
+        assert_optimized_plan_eq(plan, expected)
+    }
+
+    #[test]
+    fn subquery_distinct_on_filter_on_distinct_column() -> Result<()> {
+        let table_scan = test_table_scan()?;
+        let plan = LogicalPlanBuilder::from(table_scan)
+            .distinct_on(vec![col("a")], vec![col("a"), col("b")], None)?
+            .alias("test2")?
+            .filter(col("a").eq(lit(1i64)))?
+            .build()?;
+        // filter is on the distinct column, so it can be pushed down
+        let expected = "\
+        SubqueryAlias: test2\
+        \n  DistinctOn: on_expr=[[test.a]], select_expr=[[a, b]], 
sort_expr=[[]]\
+        \n    TableScan: test, full_filters=[a = Int64(1)]";
+        assert_optimized_plan_eq(plan, expected)
+    }
+
+    #[test]
+    fn subquery_distinct_on_filter_on_volatile_function() -> Result<()> {
+        let table_scan = test_table_scan()?;
+        let plan = LogicalPlanBuilder::from(table_scan)
+            .distinct_on(vec![col("a")], vec![col("a"), col("b")], None)?
+            .alias("test2")?
+            .filter(col("a").eq(Expr::ScalarFunction(ScalarFunction::new_udf(
+                random(),
+                vec![],
+            ))))?
+            .build()?;
+        // filter is on volatile function, so it should not be pushed down
+        let expected = "\
+        Filter: test2.a = random()\
+        \n  SubqueryAlias: test2\
+        \n    DistinctOn: on_expr=[[test.a]], select_expr=[[a, b]], 
sort_expr=[[]]\
+        \n      TableScan: test";
+        assert_optimized_plan_eq(plan, expected)
+    }
+
+    #[test]
+    fn subquery_distinct_on_filter_on_non_volatile_function() -> Result<()> {
+        let table_scan = test_table_scan()?;
+        let plan = LogicalPlanBuilder::from(table_scan)
+            .distinct_on(vec![col("a")], vec![col("a"), col("b")], None)?
+            .alias("test2")?
+            .filter(col("a").eq(Expr::ScalarFunction(ScalarFunction::new_udf(
+                abs(),
+                vec![col("a")],
+            ))))?
+            .build()?;
+        // filter is on volatile function, so it should not be pushed down

Review Comment:
   ```suggestion
           // filter is on a non-volatile function, so it should be pushed down
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to