suibianwanwank commented on PR #4332:
URL: https://github.com/apache/calcite/pull/4332#issuecomment-2853510060
To further identify the issue, I forcibly applied the
EnumerableNestedLoopJoin rule to make the execution plans identical for `=` and
`is not distinct from`. Here are some tests:
```
SELECT *
FROM (
SELECT EMPNO, COMM, COMM + 1000 AS e1
FROM EMP
) e_outer
LEFT JOIN (
SELECT e0, TRUE AS EXISTS_FLAG
FROM (
SELECT t1.EMPNO, t1.e1, t2.e0
FROM (
SELECT EMPNO, COMM + 100 AS e1, COMM
FROM EMP
) t1
JOIN (
SELECT COMM + 1000 AS e0
FROM EMP
GROUP BY COMM + 1000
) t2
ON t1.COMM IS NULL OR t1.e1 > t2.e0
) j
GROUP BY e0
) subq
ON e_outer.e1 = subq.e0;
EMPNO | COMM | E1 | E0 | EXISTS_FLAG
-------+---------+---------+---------+-------------
7369 | | | | null
7499 | 300.00 | 1300.00 | 1300.00 | true
7521 | 500.00 | 1500.00 | 1500.00 | true
7566 | | | | null
7654 | 1400.00 | 2400.00 | 2400.00 | true
7698 | | | | null
7782 | | | | null
7788 | | | | null
7839 | | | | null
7844 | 0.00 | 1000.00 | 1000.00 | true
7876 | | | | null
7900 | | | | null
7902 | | | | null
7934 | | | | null
(14 rows)
EnumerableNestedLoopJoin(condition=[=($2, $3)], joinType=[left])
EnumerableCalc(expr#0..7=[{inputs}], expr#8=[1000], expr#9=[+($t6, $t8)],
EMPNO=[$t0], COMM=[$t6], E1=[$t9])
EnumerableTableScan(table=[[scott, EMP]])
EnumerableCalc(expr#0=[{inputs}], expr#1=[true], proj#0..1=[{exprs}])
EnumerableAggregate(group=[{3}])
EnumerableNestedLoopJoin(condition=[OR(IS NULL($2), >($1, $3))],
joinType=[inner])
EnumerableCalc(expr#0..7=[{inputs}], expr#8=[100], expr#9=[+($t6,
$t8)], EMPNO=[$t0], E1=[$t9], COMM=[$t6])
EnumerableTableScan(table=[[scott, EMP]])
EnumerableAggregate(group=[{0}])
EnumerableCalc(expr#0..7=[{inputs}], expr#8=[1000], expr#9=[+($t6,
$t8)], E0=[$t9])
EnumerableTableScan(table=[[scott, EMP]])
```
Replace = with is not distinct from:
```
SELECT *
FROM (
SELECT EMPNO, COMM, COMM + 1000 AS e1
FROM EMP
) e_outer
LEFT JOIN (
SELECT e0, TRUE AS EXISTS_FLAG
FROM (
SELECT t1.EMPNO, t1.e1, t2.e0
FROM (
SELECT EMPNO, COMM + 100 AS e1, COMM
FROM EMP
) t1
JOIN (
SELECT COMM + 1000 AS e0
FROM EMP
GROUP BY COMM + 1000
) t2
ON t1.COMM IS NULL OR t1.e1 > t2.e0
) j
GROUP BY e0
) subq
ON e_outer.e1 is not distinct from subq.e0;
EMPNO | COMM | E1 | E0 | EXISTS_FLAG
-------+---------+---------+----+-------------
7369 | | | | true
7499 | 300.00 | 1300.00 | | null
7521 | 500.00 | 1500.00 | | null
7566 | | | | true
7654 | 1400.00 | 2400.00 | | null
7698 | | | | true
7782 | | | | true
7788 | | | | true
7839 | | | | true
7844 | 0.00 | 1000.00 | | null
7876 | | | | true
7900 | | | | true
7902 | | | | true
7934 | | | | true
(14 rows)
EnumerableNestedLoopJoin(condition=[IS NOT DISTINCT FROM($2, $3)],
joinType=[left])
EnumerableCalc(expr#0..7=[{inputs}], expr#8=[1000], expr#9=[+($t6, $t8)],
EMPNO=[$t0], COMM=[$t6], E1=[$t9])
EnumerableTableScan(table=[[scott, EMP]])
EnumerableCalc(expr#0=[{inputs}], expr#1=[true], proj#0..1=[{exprs}])
EnumerableAggregate(group=[{3}])
EnumerableNestedLoopJoin(condition=[OR(IS NULL($2), >($1, $3))],
joinType=[inner])
EnumerableCalc(expr#0..7=[{inputs}], expr#8=[100], expr#9=[+($t6,
$t8)], EMPNO=[$t0], E1=[$t9], COMM=[$t6])
EnumerableTableScan(table=[[scott, EMP]])
EnumerableAggregate(group=[{0}])
EnumerableCalc(expr#0..7=[{inputs}], expr#8=[1000], expr#9=[+($t6,
$t8)], E0=[$t9])
EnumerableTableScan(table=[[scott, EMP]])
```
From the execution plan, aside from `IS NOT DISTINCT FROM`, the plans are
identical. However, the `IS NOT DISTINCT FROM` plan failed to join non-NULL
data successfully. I tried simplifying the SQL but couldn't reproduce the
issue—I suspect it might be type-related?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]