[ 
https://issues.apache.org/jira/browse/CALCITE-7006?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17950540#comment-17950540
 ] 

suibianwanwan commented on CALCITE-7006:
----------------------------------------

Here are the execution results from part of the database. For convenience, I 
used WITH clauses to mock tables and executed the following SQL:

 
{code:java}
with emp(empno, ename, job, mgr, hiredate, sal, comm, deptno) as (
  values
    (7369, 'SMITH',  'CLERK',     7902, date '1980-12-17',  800.00, null,     
20),
    (7499, 'ALLEN',  'SALESMAN',  7698, date '1981-02-20', 1600.00, 300.00,   
30),
    (7521, 'WARD',    'SALESMAN',  7698, date '1981-02-22', 1250.00, 500.00,   
30),
    (7566, 'JONES',  'MANAGER',   7839, date '1981-02-04', 2975.00, null,     
20),
    (7654, 'MARTIN', 'SALESMAN',  7698, date '1981-09-28', 1250.00, 1400.00,  
30),
    (7698, 'BLAKE',  'MANAGER',   7839, date '1981-01-05', 2850.00, null,     
30),
    (7782, 'CLARK',  'MANAGER',   7839, date '1981-06-09', 2450.00, null,     
10),
    (7788, 'SCOTT',  'ANALYST',   7566, date '1987-04-19', 3000.00, null,     
20),
    (7839, 'KING',   'PRESIDENT', null, date '1981-11-17', 5000.00, null,     
10),
    (7844, 'TURNER', 'SALESMAN',  7698, date '1981-09-08', 1500.00,   0.00,   
30),
    (7876, 'ADAMS',  'CLERK',     7788, date '1987-05-23', 1100.00, null,     
20),
    (7900, 'JAMES',  'CLERK',     7698, date '1981-12-03',  950.00, null,     
30),
    (7902, 'FORD',   'ANALYST',   7566, date '1981-12-03', 3000.00, null,     
20),
    (7934, 'MILLER', 'CLERK',     7782, date '1982-01-23', 1300.00, null,     
10)
)
SELECT *
FROM (
  SELECT EMPNO, COMM, COMM + 1000 AS e1
  FROM EMP
) e_outer
LEFT JOIN (
  SELECT e0, TRUE AS EXISTS_FLAG
  FROM (
    SELECT t1.EMPNO, t1.e1, t2.e0
    FROM (
      SELECT EMPNO, COMM + 100 AS e1, COMM
      FROM EMP
    ) t1
    JOIN (
      SELECT COMM + 1000 AS e0
      FROM EMP
      GROUP BY COMM + 1000
    ) t2
    ON t1.COMM IS NULL OR t1.e1 > t2.e0
  ) j
  GROUP BY e0
) subq
ON e_outer.e1 is not distinct from subq.e0;{code}
PG:

 

 
{code:java}
empno|comm   |e1     |e0     |exists_flag|
-----+-------+-------+-------+-----------+
 7369|       |       |       |true       |
 7499| 300.00|1300.00|1300.00|true       |
 7521| 500.00|1500.00|1500.00|true       |
 7566|       |       |       |true       |
 7654|1400.00|2400.00|2400.00|true       |
 7698|       |       |       |true       |
 7782|       |       |       |true       |
 7788|       |       |       |true       |
 7839|       |       |       |true       |
 7844|   0.00|1000.00|1000.00|true       |
 7876|       |       |       |true       |
 7900|       |       |       |true       |
 7902|       |       |       |true       |
 7934|       |       |       |true       | {code}
DuckDB:

 

 
{code:java}
------------
empno | comm | e1 | e0 | EXISTS_FLAG
------------
7369 | null | null | null | true
7499 | 300.00 | 1300.00 | 1300.00 | true
7521 | 500.00 | 1500.00 | 1500.00 | true
7566 | null | null | null | true
7654 | 1400.00 | 2400.00 | 2400.00 | true
7698 | null | null | null | true
7782 | null | null | null | true
7788 | null | null | null | true
7839 | null | null | null | true
7844 | 0.00 | 1000.00 | 1000.00 | true
7876 | null | null | null | true
7900 | null | null | null | true
7902 | null | null | null | true
7934 | null | null | null | true
------------ {code}
 

> Incorrect left join results with IS NOT DISTINCT FROM under specific plan
> -------------------------------------------------------------------------
>
>                 Key: CALCITE-7006
>                 URL: https://issues.apache.org/jira/browse/CALCITE-7006
>             Project: Calcite
>          Issue Type: Bug
>            Reporter: suibianwanwan
>            Priority: Major
>
>  I forcibly applied the EnumerableNestedLoopJoin rule to make the execution 
> plans identical for {{=}} and {{{}is not distinct from{}}}. Here are test in 
> Quidem:
> {code:java}
> SELECT *
> FROM (
>   SELECT EMPNO, COMM, COMM + 1000 AS e1
>   FROM EMP
> ) e_outer
> LEFT JOIN (
>   SELECT e0, TRUE AS EXISTS_FLAG
>   FROM (
>     SELECT t1.EMPNO, t1.e1, t2.e0
>     FROM (
>       SELECT EMPNO, COMM + 100 AS e1, COMM
>       FROM EMP
>     ) t1
>     JOIN (
>       SELECT COMM + 1000 AS e0
>       FROM EMP
>       GROUP BY COMM + 1000
>     ) t2
>     ON t1.COMM IS NULL OR t1.e1 > t2.e0
>   ) j
>   GROUP BY e0
> ) subq
> ON e_outer.e1 = subq.e0;
>  EMPNO | COMM    | E1      | E0      | EXISTS_FLAG
> -------+---------+---------+---------+-------------
>   7369 |         |         |         | null
>   7499 |  300.00 | 1300.00 | 1300.00 | true
>   7521 |  500.00 | 1500.00 | 1500.00 | true
>   7566 |         |         |         | null
>   7654 | 1400.00 | 2400.00 | 2400.00 | true
>   7698 |         |         |         | null
>   7782 |         |         |         | null
>   7788 |         |         |         | null
>   7839 |         |         |         | null
>   7844 |    0.00 | 1000.00 | 1000.00 | true
>   7876 |         |         |         | null
>   7900 |         |         |         | null
>   7902 |         |         |         | null
>   7934 |         |         |         | null
> (14 rows)
> EnumerableNestedLoopJoin(condition=[=($2, $3)], joinType=[left])
>   EnumerableCalc(expr#0..7=[{inputs}], expr#8=[1000], expr#9=[+($t6, $t8)], 
> EMPNO=[$t0], COMM=[$t6], E1=[$t9])
>     EnumerableTableScan(table=[[scott, EMP]])
>   EnumerableCalc(expr#0=[{inputs}], expr#1=[true], proj#0..1=[{exprs}])
>     EnumerableAggregate(group=[{3}])
>       EnumerableNestedLoopJoin(condition=[OR(IS NULL($2), >($1, $3))], 
> joinType=[inner])
>         EnumerableCalc(expr#0..7=[{inputs}], expr#8=[100], expr#9=[+($t6, 
> $t8)], EMPNO=[$t0], E1=[$t9], COMM=[$t6])
>           EnumerableTableScan(table=[[scott, EMP]])
>         EnumerableAggregate(group=[{0}])
>           EnumerableCalc(expr#0..7=[{inputs}], expr#8=[1000], expr#9=[+($t6, 
> $t8)], E0=[$t9])
>             EnumerableTableScan(table=[[scott, EMP]]) {code}
> Replace = with is not distinct from:
> {code:java}
> SELECT *
> FROM (
>   SELECT EMPNO, COMM, COMM + 1000 AS e1
>   FROM EMP
> ) e_outer
> LEFT JOIN (
>   SELECT e0, TRUE AS EXISTS_FLAG
>   FROM (
>     SELECT t1.EMPNO, t1.e1, t2.e0
>     FROM (
>       SELECT EMPNO, COMM + 100 AS e1, COMM
>       FROM EMP
>     ) t1
>     JOIN (
>       SELECT COMM + 1000 AS e0
>       FROM EMP
>       GROUP BY COMM + 1000
>     ) t2
>     ON t1.COMM IS NULL OR t1.e1 > t2.e0
>   ) j
>   GROUP BY e0
> ) subq
> ON e_outer.e1 is not distinct from subq.e0;
>  EMPNO | COMM    | E1      | E0 | EXISTS_FLAG
> -------+---------+---------+----+-------------
>   7369 |         |         |    | true
>   7499 |  300.00 | 1300.00 |    | null
>   7521 |  500.00 | 1500.00 |    | null
>   7566 |         |         |    | true
>   7654 | 1400.00 | 2400.00 |    | null
>   7698 |         |         |    | true
>   7782 |         |         |    | true
>   7788 |         |         |    | true
>   7839 |         |         |    | true
>   7844 |    0.00 | 1000.00 |    | null
>   7876 |         |         |    | true
>   7900 |         |         |    | true
>   7902 |         |         |    | true
>   7934 |         |         |    | true
> (14 rows)
> EnumerableNestedLoopJoin(condition=[IS NOT DISTINCT FROM($2, $3)], 
> joinType=[left])
>   EnumerableCalc(expr#0..7=[{inputs}], expr#8=[1000], expr#9=[+($t6, $t8)], 
> EMPNO=[$t0], COMM=[$t6], E1=[$t9])
>     EnumerableTableScan(table=[[scott, EMP]])
>   EnumerableCalc(expr#0=[{inputs}], expr#1=[true], proj#0..1=[{exprs}])
>     EnumerableAggregate(group=[{3}])
>       EnumerableNestedLoopJoin(condition=[OR(IS NULL($2), >($1, $3))], 
> joinType=[inner])
>         EnumerableCalc(expr#0..7=[{inputs}], expr#8=[100], expr#9=[+($t6, 
> $t8)], EMPNO=[$t0], E1=[$t9], COMM=[$t6])
>           EnumerableTableScan(table=[[scott, EMP]])
>         EnumerableAggregate(group=[{0}])
>           EnumerableCalc(expr#0..7=[{inputs}], expr#8=[1000], expr#9=[+($t6, 
> $t8)], E0=[$t9])
>             EnumerableTableScan(table=[[scott, EMP]]) {code}
> From the plan, aside from IS NOT DISTINCT FROM, the plans are identical. 
> However, the IS NOT DISTINCT FROM plan failed to join non-NULL data 
> successfully. I tried simplifying the SQL but couldn't reproduce the issue.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to