SubhamSinghal commented on code in PR #21303:
URL: https://github.com/apache/datafusion/pull/21303#discussion_r3027721541
##########
datafusion/optimizer/src/eliminate_outer_join.rs:
##########
@@ -436,6 +454,221 @@ mod tests {
")
}
+ #[test]
+ fn eliminate_left_with_in_list() -> Result<()> {
+ let t1 = test_table_scan_with_name("t1")?;
+ let t2 = test_table_scan_with_name("t2")?;
+
+ // t2.b IN (1, 2, 3) rejects nulls — if t2.b is NULL the IN returns
+ // NULL which is filtered out. So Left Join should become Inner Join.
+ let plan = LogicalPlanBuilder::from(t1)
+ .join(
+ t2,
+ JoinType::Left,
+ (vec![Column::from_name("a")], vec![Column::from_name("a")]),
+ None,
+ )?
+ .filter(col("t2.b").in_list(vec![lit(1u32), lit(2u32), lit(3u32)],
false))?
+ .build()?;
+
+ assert_optimized_plan_equal!(plan, @r"
+ Filter: t2.b IN ([UInt32(1), UInt32(2), UInt32(3)])
+ Inner Join: t1.a = t2.a
+ TableScan: t1
+ TableScan: t2
+ ")
+ }
+
+ #[test]
+ fn eliminate_left_with_in_list_containing_null() -> Result<()> {
+ let t1 = test_table_scan_with_name("t1")?;
+ let t2 = test_table_scan_with_name("t2")?;
+
+ // IN list with NULL still rejects null input columns:
+ // if t2.b is NULL, NULL IN (1, NULL) evaluates to NULL, which is
filtered out
+ let plan = LogicalPlanBuilder::from(t1)
+ .join(
+ t2,
+ JoinType::Left,
+ (vec![Column::from_name("a")], vec![Column::from_name("a")]),
+ None,
+ )?
+ .filter(
+ col("t2.b")
+ .in_list(vec![lit(1u32), lit(ScalarValue::UInt32(None))],
false),
+ )?
+ .build()?;
+
+ assert_optimized_plan_equal!(plan, @r"
+ Filter: t2.b IN ([UInt32(1), UInt32(NULL)])
+ Inner Join: t1.a = t2.a
+ TableScan: t1
+ TableScan: t2
+ ")
+ }
+
+ #[test]
+ fn eliminate_left_with_not_in_list() -> Result<()> {
+ let t1 = test_table_scan_with_name("t1")?;
+ let t2 = test_table_scan_with_name("t2")?;
+
+ // NOT IN also rejects nulls: if t2.b is NULL, NOT (NULL IN (...))
+ // evaluates to NULL, which is filtered out
+ let plan = LogicalPlanBuilder::from(t1)
+ .join(
+ t2,
+ JoinType::Left,
+ (vec![Column::from_name("a")], vec![Column::from_name("a")]),
+ None,
+ )?
+ .filter(col("t2.b").in_list(vec![lit(1u32), lit(2u32)], true))?
+ .build()?;
+
+ assert_optimized_plan_equal!(plan, @r"
+ Filter: t2.b NOT IN ([UInt32(1), UInt32(2)])
+ Inner Join: t1.a = t2.a
+ TableScan: t1
+ TableScan: t2
+ ")
+ }
+
+ #[test]
+ fn eliminate_left_with_between() -> Result<()> {
+ let t1 = test_table_scan_with_name("t1")?;
+ let t2 = test_table_scan_with_name("t2")?;
+
+ // BETWEEN rejects nulls: if t2.b is NULL, NULL BETWEEN 1 AND 10
+ // evaluates to NULL, which is filtered out
+ let plan = LogicalPlanBuilder::from(t1)
+ .join(
+ t2,
+ JoinType::Left,
+ (vec![Column::from_name("a")], vec![Column::from_name("a")]),
+ None,
+ )?
+ .filter(col("t2.b").between(lit(1u32), lit(10u32)))?
+ .build()?;
+
+ assert_optimized_plan_equal!(plan, @r"
+ Filter: t2.b BETWEEN UInt32(1) AND UInt32(10)
+ Inner Join: t1.a = t2.a
+ TableScan: t1
+ TableScan: t2
+ ")
+ }
+
+ #[test]
+ fn eliminate_right_with_between() -> Result<()> {
+ let t1 = test_table_scan_with_name("t1")?;
+ let t2 = test_table_scan_with_name("t2")?;
+
+ // Right join: filter on left (nullable) side with BETWEEN should
convert to Inner
+ let plan = LogicalPlanBuilder::from(t1)
+ .join(
+ t2,
+ JoinType::Right,
+ (vec![Column::from_name("a")], vec![Column::from_name("a")]),
+ None,
+ )?
+ .filter(col("t1.b").between(lit(1u32), lit(10u32)))?
+ .build()?;
+
+ assert_optimized_plan_equal!(plan, @r"
+ Filter: t1.b BETWEEN UInt32(1) AND UInt32(10)
+ Inner Join: t1.a = t2.a
+ TableScan: t1
+ TableScan: t2
+ ")
+ }
+
+ #[test]
+ fn eliminate_full_with_between() -> Result<()> {
+ let t1 = test_table_scan_with_name("t1")?;
+ let t2 = test_table_scan_with_name("t2")?;
+
+ // Full join with BETWEEN on both sides should become Inner
+ let plan = LogicalPlanBuilder::from(t1)
+ .join(
+ t2,
+ JoinType::Full,
+ (vec![Column::from_name("a")], vec![Column::from_name("a")]),
+ None,
+ )?
+ .filter(binary_expr(
+ col("t1.b").between(lit(1u32), lit(10u32)),
+ And,
+ col("t2.b").between(lit(5u32), lit(20u32)),
+ ))?
+ .build()?;
+
+ assert_optimized_plan_equal!(plan, @r"
+ Filter: t1.b BETWEEN UInt32(1) AND UInt32(10) AND t2.b BETWEEN
UInt32(5) AND UInt32(20)
+ Inner Join: t1.a = t2.a
+ TableScan: t1
+ TableScan: t2
+ ")
+ }
+
+ #[test]
+ fn eliminate_full_with_in_list() -> Result<()> {
+ let t1 = test_table_scan_with_name("t1")?;
+ let t2 = test_table_scan_with_name("t2")?;
+
+ // Full join with IN filters on both sides should become Inner
+ let plan = LogicalPlanBuilder::from(t1)
+ .join(
+ t2,
+ JoinType::Full,
+ (vec![Column::from_name("a")], vec![Column::from_name("a")]),
+ None,
+ )?
+ .filter(binary_expr(
+ col("t1.b").in_list(vec![lit(1u32), lit(2u32)], false),
+ And,
+ col("t2.b").in_list(vec![lit(3u32), lit(4u32)], false),
+ ))?
+ .build()?;
+
+ assert_optimized_plan_equal!(plan, @r"
+ Filter: t1.b IN ([UInt32(1), UInt32(2)]) AND t2.b IN ([UInt32(3),
UInt32(4)])
+ Inner Join: t1.a = t2.a
+ TableScan: t1
+ TableScan: t2
+ ")
+ }
+
+ #[test]
+ fn no_eliminate_left_with_in_list_or_is_null() -> Result<()> {
Review Comment:
added slt tests
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]