mihaibudiu commented on code in PR #4280:
URL: https://github.com/apache/calcite/pull/4280#discussion_r2036363334
##########
core/src/main/java/org/apache/calcite/rel/rules/JoinConditionOrExpansionRule.java:
##########
@@ -55,40 +62,92 @@ protected JoinConditionOrExpansionRule(Config config) {
@Override public void onMatch(RelOptRuleCall call) {
Join join = call.rel(0);
RelBuilder relBuilder = call.builder();
- List<RexNode> orConds = RelOptUtil.disjunctions(join.getCondition());
-
- if (orConds.size() <= 1) {
- return;
- }
RelNode converted;
switch (join.getJoinType()) {
case INNER:
- converted = expandInnerJoin(join, orConds, relBuilder);
- break;
- case SEMI:
- converted =
- expandSemiOrAntiJoin(join, JoinRelType.SEMI, orConds, relBuilder);
+ converted = expandInnerJoin(join, relBuilder);
break;
case ANTI:
- converted =
- expandSemiOrAntiJoin(join, JoinRelType.ANTI, orConds, relBuilder);
+ converted = expandAntiJoin(join, relBuilder);
break;
case LEFT:
- converted = expandLeftJoin(join, orConds, relBuilder);
+ converted = expandLeftOrRightJoin(join, true, relBuilder);
break;
case RIGHT:
- converted = expandRightJoin(join, orConds, relBuilder);
+ converted = expandLeftOrRightJoin(join, false, relBuilder);
break;
case FULL:
- converted = expandFullJoin(join, orConds, relBuilder);
+ converted = expandFullJoin(join, relBuilder);
break;
default:
return;
}
+
+ if (converted == null) {
+ return;
+ }
call.transformTo(converted);
}
+ private boolean skip(Join join) {
+ List<RexNode> orConds = RelOptUtil.disjunctions(join.getCondition());
+ return orConds.size() <= 1;
+ }
+
+ private List<RexNode> splitCond(Join join) {
+ RexBuilder builder = join.getCluster().getRexBuilder();
+ List<RexNode> conds = new ArrayList<>();
+ List<RexNode> otherConds = new ArrayList<>();
+ List<RexNode> orConds = RelOptUtil.disjunctions(join.getCondition());
+ int leftFieldCount = join.getLeft().getRowType().getFieldCount();
+
+ for (RexNode cond : orConds) {
+ if (!isUsefulCond(cond, leftFieldCount)) {
Review Comment:
So you are not preserving the order...
This may cause problems with operations that cause runtime exceptions.
Note that almost anything can cause runtime exceptions, e.g., arithmetic
overflow.
##########
core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml:
##########
@@ -6613,26 +6613,26 @@ EnumerableProject(EMPNO=[$0], ENAME=[$1], JOB=[$2],
MGR=[$3], HIREDATE=[$4], SAL
<Resource name="sql">
<![CDATA[select *
from EMP as p1
-inner join EMP as p2 on p1.empno = p2.empno or p1.sal = p2.sal]]>
+inner join EMP as p2 on p1.empno = p2.empno or p1.mgr = p2.mgr]]>
</Resource>
<Resource name="planBefore">
<![CDATA[
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4],
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], EMPNO0=[$9], ENAME0=[$10],
JOB0=[$11], MGR0=[$12], HIREDATE0=[$13], SAL0=[$14], COMM0=[$15],
DEPTNO0=[$16], SLACKER0=[$17])
- LogicalJoin(condition=[OR(=($0, $9), =($5, $14))], joinType=[inner])
+ LogicalJoin(condition=[OR(=($0, $9), =($3, $12))], joinType=[inner])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
-EnumerableProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4],
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], EMPNO0=[$9], ENAME0=[$10],
JOB0=[$11], MGR0=[$12], HIREDATE0=[$13], SAL0=[$14], COMM0=[$15],
DEPTNO0=[$16], SLACKER0=[$17])
- EnumerableUnion(all=[true])
- EnumerableHashJoin(condition=[=($0, $9)], joinType=[inner])
- EnumerableTableScan(table=[[CATALOG, SALES, EMP]])
- EnumerableTableScan(table=[[CATALOG, SALES, EMP]])
- EnumerableHashJoin(condition=[AND(=($5, $14), <>($0, $9))],
joinType=[inner])
- EnumerableTableScan(table=[[CATALOG, SALES, EMP]])
- EnumerableTableScan(table=[[CATALOG, SALES, EMP]])
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4],
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], EMPNO0=[$9], ENAME0=[$10],
JOB0=[$11], MGR0=[$12], HIREDATE0=[$13], SAL0=[$14], COMM0=[$15],
DEPTNO0=[$16], SLACKER0=[$17])
Review Comment:
this is better
##########
core/src/main/java/org/apache/calcite/rel/rules/JoinCommuteRule.java:
##########
@@ -137,6 +137,12 @@ public JoinCommuteRule(Class<? extends Join> clazz,
.build();
}
+ public static RexNode swapJoinCond(RexNode cond, Join join, RexBuilder
rexBuilder) {
Review Comment:
Does this have to be public or can it be moved to the file where it's used?
##########
core/src/main/java/org/apache/calcite/rel/rules/JoinConditionOrExpansionRule.java:
##########
@@ -256,26 +313,50 @@ private List<RelNode> expandInnerJoinToRelNodes(Join
join, List<RexNode> orConds
* │ └── TableScan[tbl]
* └── TableScan[tbl]
*/
- private RelNode expandSemiOrAntiJoin(Join join, JoinRelType joinType,
- List<RexNode> orConds, RelBuilder relBuilder) {
- RelNode left = join.getLeft();
+ private @Nullable RelNode expandAntiJoin(Join join, RelBuilder relBuilder) {
+ if (skip(join)) {
+ return null;
+ }
+ List<RexNode> orConds = splitCond(join);
+ return expandAntiJoinToRelNode(join, orConds, true, false, relBuilder);
+ }
+
+ private RelNode expandAntiJoinToRelNode(Join join, List<RexNode> orConds,
+ boolean isLeftAnti, boolean isAppendNulls, RelBuilder relBuilder) {
+ RelNode left = isLeftAnti ? join.getLeft() : join.getRight();
+ RelNode right = isLeftAnti ? join.getRight() : join.getLeft();
+
+ RelNode top = left;
for (int i = 0; i < orConds.size(); i++) {
RexNode orCond = orConds.get(i);
- relBuilder.push(left)
- .push(join.getRight())
- .join(joinType, orCond);
- left = relBuilder.build();
+ relBuilder.push(top)
+ .push(right)
+ .join(JoinRelType.ANTI,
+ isLeftAnti
+ ? orCond
+ : JoinCommuteRule.swapJoinCond(orCond, join,
relBuilder.getRexBuilder()));
+ top = relBuilder.build();
}
- relBuilder.push(left);
- List<RexNode> projects = new ArrayList<>(relBuilder.fields());
- int fieldCount = relBuilder.fields().size();
- while (fieldCount < join.getRowType().getFieldCount()) {
- projects.add(
+ if (!isAppendNulls) {
+ return top;
+ }
+
+ relBuilder.push(top);
+ int fieldCount = 0;
+ List<RexNode> fields = new ArrayList<>(relBuilder.fields());
+ List<RexNode> nulls = new ArrayList<>();
+ while (fieldCount < right.getRowType().getFieldCount()) {
Review Comment:
maybe a for loop is better
##########
core/src/main/java/org/apache/calcite/rel/rules/JoinConditionOrExpansionRule.java:
##########
@@ -55,40 +62,92 @@ protected JoinConditionOrExpansionRule(Config config) {
@Override public void onMatch(RelOptRuleCall call) {
Join join = call.rel(0);
RelBuilder relBuilder = call.builder();
- List<RexNode> orConds = RelOptUtil.disjunctions(join.getCondition());
-
- if (orConds.size() <= 1) {
- return;
- }
RelNode converted;
switch (join.getJoinType()) {
case INNER:
- converted = expandInnerJoin(join, orConds, relBuilder);
- break;
- case SEMI:
- converted =
- expandSemiOrAntiJoin(join, JoinRelType.SEMI, orConds, relBuilder);
+ converted = expandInnerJoin(join, relBuilder);
break;
case ANTI:
- converted =
- expandSemiOrAntiJoin(join, JoinRelType.ANTI, orConds, relBuilder);
+ converted = expandAntiJoin(join, relBuilder);
break;
case LEFT:
- converted = expandLeftJoin(join, orConds, relBuilder);
+ converted = expandLeftOrRightJoin(join, true, relBuilder);
break;
case RIGHT:
- converted = expandRightJoin(join, orConds, relBuilder);
+ converted = expandLeftOrRightJoin(join, false, relBuilder);
break;
case FULL:
- converted = expandFullJoin(join, orConds, relBuilder);
+ converted = expandFullJoin(join, relBuilder);
break;
default:
return;
}
+
+ if (converted == null) {
+ return;
+ }
call.transformTo(converted);
}
+ private boolean skip(Join join) {
+ List<RexNode> orConds = RelOptUtil.disjunctions(join.getCondition());
+ return orConds.size() <= 1;
+ }
+
+ private List<RexNode> splitCond(Join join) {
+ RexBuilder builder = join.getCluster().getRexBuilder();
+ List<RexNode> conds = new ArrayList<>();
+ List<RexNode> otherConds = new ArrayList<>();
+ List<RexNode> orConds = RelOptUtil.disjunctions(join.getCondition());
+ int leftFieldCount = join.getLeft().getRowType().getFieldCount();
+
+ for (RexNode cond : orConds) {
+ if (!isUsefulCond(cond, leftFieldCount)) {
Review Comment:
If you want to be conservative you have to:
- either detect whether an expression can cause a runtime exception and only
move the ones that never do
- preserve the order and stop at the first expression which is not useful
and put everything from that point on in a single big OR which is not expanded
Can you add a test for such a case so we can see what happens?
For example an expression such as `A <= 0 OR LN(A) <= 0`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]