xudong963 commented on a change in pull request #1339: URL: https://github.com/apache/arrow-datafusion/pull/1339#discussion_r753655676
########## File path: datafusion/tests/sql.rs ########## @@ -5999,3 +5999,86 @@ async fn test_expect_distinct() -> Result<()> { assert_batches_eq!(expected, &actual); Ok(()) } + +#[tokio::test] +async fn test_predicate_push_down_with_unsafe_null() -> Result<()> { + let mut ctx = ExecutionContext::new(); + let create_table1_sql = + "create table table1 as SELECT * FROM (VALUES (1), (2), (null)) as t"; + ctx.sql(create_table1_sql).await.unwrap(); + let create_table2_sql = + "create table table2 as SELECT * FROM (VALUES (1), (3), (null)) as t"; + ctx.sql(create_table2_sql).await.unwrap(); + // left join with is_not_null filter + let sql = "SELECT * FROM table1 LEFT JOIN table2 ON table1.column1 = table2.column1 WHERE table2.column1 IS NOT NULL"; + let actual = execute_to_batches(&mut ctx, sql).await; + let expected = vec![ + "+---------+---------+", + "| column1 | column1 |", + "+---------+---------+", + "| 1 | 1 |", + "+---------+---------+", + ]; + assert_batches_eq!(expected, &actual); + // left join with is_null filter + let sql = "SELECT * FROM table1 LEFT JOIN table2 ON table1.column1 = table2.column1 WHERE table2.column1 IS NULL ORDER BY table1.column1"; + let actual = execute_to_batches(&mut ctx, sql).await; + let expected = vec![ + "+---------+---------+", + "| column1 | column1 |", + "+---------+---------+", + "| | |", + "| 2 | |", + "+---------+---------+", Review comment: This is because currently DF is by default nulls first to be consistent with spark. I'll pull a request to make it be consistent with postgresql. FYI, pg rule: https://www.postgresql.org/docs/current/queries-order.html > The NULLS FIRST and NULLS LAST options can be used to determine whether nulls appear before or after non-null values in the sort ordering. By default, null values sort as if larger than any non-null value; that is, NULLS FIRST is the default for DESC order, and NULLS LAST otherwise. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org