suibianwanwank commented on PR #4332:
URL: https://github.com/apache/calcite/pull/4332#issuecomment-2853510060

   To further identify the issue, I forcibly applied the 
EnumerableNestedLoopJoin rule to make the execution plans identical for `=` and 
`is not distinct from`. Here are some tests:
   ```
   SELECT *
   FROM (
     SELECT EMPNO, COMM, COMM + 1000 AS e1
     FROM EMP
   ) e_outer
   LEFT JOIN (
     SELECT e0, TRUE AS EXISTS_FLAG
     FROM (
       SELECT t1.EMPNO, t1.e1, t2.e0
       FROM (
         SELECT EMPNO, COMM + 100 AS e1, COMM
         FROM EMP
       ) t1
       JOIN (
         SELECT COMM + 1000 AS e0
         FROM EMP
         GROUP BY COMM + 1000
       ) t2
       ON t1.COMM IS NULL OR t1.e1 > t2.e0
     ) j
     GROUP BY e0
   ) subq
   ON e_outer.e1 = subq.e0;
   
    EMPNO | COMM    | E1      | E0      | EXISTS_FLAG
   -------+---------+---------+---------+-------------
     7369 |         |         |         | null
     7499 |  300.00 | 1300.00 | 1300.00 | true
     7521 |  500.00 | 1500.00 | 1500.00 | true
     7566 |         |         |         | null
     7654 | 1400.00 | 2400.00 | 2400.00 | true
     7698 |         |         |         | null
     7782 |         |         |         | null
     7788 |         |         |         | null
     7839 |         |         |         | null
     7844 |    0.00 | 1000.00 | 1000.00 | true
     7876 |         |         |         | null
     7900 |         |         |         | null
     7902 |         |         |         | null
     7934 |         |         |         | null
   (14 rows)
   
   EnumerableNestedLoopJoin(condition=[=($2, $3)], joinType=[left])
     EnumerableCalc(expr#0..7=[{inputs}], expr#8=[1000], expr#9=[+($t6, $t8)], 
EMPNO=[$t0], COMM=[$t6], E1=[$t9])
       EnumerableTableScan(table=[[scott, EMP]])
     EnumerableCalc(expr#0=[{inputs}], expr#1=[true], proj#0..1=[{exprs}])
       EnumerableAggregate(group=[{3}])
         EnumerableNestedLoopJoin(condition=[OR(IS NULL($2), >($1, $3))], 
joinType=[inner])
           EnumerableCalc(expr#0..7=[{inputs}], expr#8=[100], expr#9=[+($t6, 
$t8)], EMPNO=[$t0], E1=[$t9], COMM=[$t6])
             EnumerableTableScan(table=[[scott, EMP]])
           EnumerableAggregate(group=[{0}])
             EnumerableCalc(expr#0..7=[{inputs}], expr#8=[1000], expr#9=[+($t6, 
$t8)], E0=[$t9])
               EnumerableTableScan(table=[[scott, EMP]])
   ```
   Replace = with is not distinct from:
   ```
   SELECT *
   FROM (
     SELECT EMPNO, COMM, COMM + 1000 AS e1
     FROM EMP
   ) e_outer
   LEFT JOIN (
     SELECT e0, TRUE AS EXISTS_FLAG
     FROM (
       SELECT t1.EMPNO, t1.e1, t2.e0
       FROM (
         SELECT EMPNO, COMM + 100 AS e1, COMM
         FROM EMP
       ) t1
       JOIN (
         SELECT COMM + 1000 AS e0
         FROM EMP
         GROUP BY COMM + 1000
       ) t2
       ON t1.COMM IS NULL OR t1.e1 > t2.e0
     ) j
     GROUP BY e0
   ) subq
   ON e_outer.e1 is not distinct from subq.e0;
   
    EMPNO | COMM    | E1      | E0 | EXISTS_FLAG
   -------+---------+---------+----+-------------
     7369 |         |         |    | true
     7499 |  300.00 | 1300.00 |    | null
     7521 |  500.00 | 1500.00 |    | null
     7566 |         |         |    | true
     7654 | 1400.00 | 2400.00 |    | null
     7698 |         |         |    | true
     7782 |         |         |    | true
     7788 |         |         |    | true
     7839 |         |         |    | true
     7844 |    0.00 | 1000.00 |    | null
     7876 |         |         |    | true
     7900 |         |         |    | true
     7902 |         |         |    | true
     7934 |         |         |    | true
   (14 rows)
   
   EnumerableNestedLoopJoin(condition=[IS NOT DISTINCT FROM($2, $3)], 
joinType=[left])
     EnumerableCalc(expr#0..7=[{inputs}], expr#8=[1000], expr#9=[+($t6, $t8)], 
EMPNO=[$t0], COMM=[$t6], E1=[$t9])
       EnumerableTableScan(table=[[scott, EMP]])
     EnumerableCalc(expr#0=[{inputs}], expr#1=[true], proj#0..1=[{exprs}])
       EnumerableAggregate(group=[{3}])
         EnumerableNestedLoopJoin(condition=[OR(IS NULL($2), >($1, $3))], 
joinType=[inner])
           EnumerableCalc(expr#0..7=[{inputs}], expr#8=[100], expr#9=[+($t6, 
$t8)], EMPNO=[$t0], E1=[$t9], COMM=[$t6])
             EnumerableTableScan(table=[[scott, EMP]])
           EnumerableAggregate(group=[{0}])
             EnumerableCalc(expr#0..7=[{inputs}], expr#8=[1000], expr#9=[+($t6, 
$t8)], E0=[$t9])
               EnumerableTableScan(table=[[scott, EMP]])
   ```
   
   From the execution plan, aside from `IS NOT DISTINCT FROM`, the plans are 
identical. However, the `IS NOT DISTINCT FROM` plan failed to join non-NULL 
data successfully. I tried simplifying the SQL but couldn't reproduce the 
issue—I suspect it might be type-related?  


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to