Copilot commented on code in PR #22612:
URL: https://github.com/apache/datafusion/pull/22612#discussion_r3322924520
##########
datafusion/optimizer/src/eliminate_cross_join.rs:
##########
@@ -207,6 +218,25 @@ impl OptimizerRule for EliminateCrossJoin {
}
}
+/// Returns `true` if `plan` contains at least one [`LogicalPlan::Join`]
+/// node anywhere in its tree.
+///
+/// Used as a fast-path gate at the top of [`EliminateCrossJoin::rewrite`]
+/// so that join-free plans skip the full recursive rewrite — read-only
+/// `apply` walk with early stop on first match, no allocations.
+fn plan_has_joins(plan: &LogicalPlan) -> bool {
+ let mut found = false;
+ let _ = plan.apply(|node| {
+ if matches!(node, LogicalPlan::Join(_)) {
+ found = true;
+ Ok(TreeNodeRecursion::Stop)
+ } else {
+ Ok(TreeNodeRecursion::Continue)
+ }
+ });
+ found
+}
Review Comment:
`plan_has_joins` uses `LogicalPlan::apply`, which only walks direct inputs
and does **not** traverse embedded subquery plans (e.g. `Expr::ScalarSubquery`,
`IN (SELECT ...)`). This can incorrectly return `false` for an outer join-free
plan that contains joins inside an uncorrelated subquery, causing
`EliminateCrossJoin::rewrite` to short-circuit and skip optimizing those
subqueries.
##########
datafusion/optimizer/src/eliminate_cross_join.rs:
##########
@@ -1418,4 +1448,72 @@ mod tests {
Ok(())
}
+
+ // ---------------- fast-path tests ----------------
+
+ /// `plan_has_joins` detects a `Join` at the root of the plan.
+ #[test]
+ fn plan_has_joins_detects_root_join() -> Result<()> {
+ let plan = LogicalPlanBuilder::from(test_table_scan_with_name("t1")?)
+ .cross_join(test_table_scan_with_name("t2")?)?
+ .build()?;
+ assert!(plan_has_joins(&plan));
+ Ok(())
+ }
+
+ /// `plan_has_joins` detects a `Join` nested under other operators.
+ #[test]
+ fn plan_has_joins_detects_nested_join() -> Result<()> {
+ let plan = LogicalPlanBuilder::from(test_table_scan_with_name("t1")?)
+ .cross_join(test_table_scan_with_name("t2")?)?
+ .filter(col("t1.a").eq(col("t2.a")))?
+ .project(vec![col("t1.a")])?
+ .build()?;
+ assert!(plan_has_joins(&plan));
+ Ok(())
+ }
+
+ /// Join-free plans return `false` so the fast-path in `rewrite` can
+ /// bail out before doing any recursion.
+ #[test]
+ fn plan_has_joins_returns_false_for_join_free_plan() -> Result<()> {
+ let plan = LogicalPlanBuilder::from(test_table_scan_with_name("t1")?)
+ .filter(col("a").gt(lit(0_i32)))?
+ .project(vec![col("a"), col("b")])?
+ .build()?;
+ assert!(!plan_has_joins(&plan));
+ Ok(())
+ }
+
+ /// `EliminateCrossJoin::rewrite` short-circuits on join-free plans:
+ /// no recursion into `rewrite_children`, no `Transformed::yes`,
+ /// the plan comes back identical.
+ #[test]
+ fn rewrite_short_circuits_when_plan_has_no_joins() -> Result<()> {
+ let plan = LogicalPlanBuilder::from(test_table_scan_with_name("t1")?)
+ .filter(col("a").gt(lit(0_i32)))?
+ .project(vec![col("a"), col("b")])?
+ .build()?;
+
+ let starting_display = plan.display_indent_schema().to_string();
+ let starting_schema = Arc::clone(plan.schema());
+
+ let rule = EliminateCrossJoin::new();
+ let Transformed {
+ transformed,
+ data: optimized_plan,
+ ..
+ } = rule.rewrite(plan, &OptimizerContext::new())?;
+
+ assert!(
+ !transformed,
+ "join-free plan should not be marked as transformed"
+ );
+ assert_eq!(&starting_schema, optimized_plan.schema());
+ assert_eq!(
+ starting_display,
+ optimized_plan.display_indent_schema().to_string()
+ );
+ Ok(())
+ }
}
Review Comment:
The new fast-path tests don't cover the important case of a join that exists
only inside an embedded subquery plan. Adding a regression test here helps
ensure `plan_has_joins` (and thus the rewrite short-circuit) continues to
consider joins in subqueries.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]