[
https://issues.apache.org/jira/browse/CALCITE-4887?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17445754#comment-17445754
]
bingfeng.guo edited comment on CALCITE-4887 at 11/18/21, 9:14 AM:
------------------------------------------------------------------
https://issues.apache.org/jira/browse/CALCITE-2071 I know from this issue.
The extra part in the plan is to prevent incorrect results, such as null
values. However, the currently generated plan has an unconditional inner join,
which is equivalent to a cross join, which has a great impact on performance.
The following is an example in my UT:
sql:
{quote}select count(1)
from struct.t t1 left join emp on t1.c0 = emp.deptno
where c0 = 1 or c0 in (
select deptno from emp where deptno = 1)
{quote}
expected:
{quote}LogicalAggregate(group=[{}], EXPR$0=[COUNT()])
LogicalProject($f0=[1])
LogicalFilter(condition=[OR(=($4, 1), true)])
LogicalJoin(condition=[=($4, $18)], joinType=[left])
LogicalJoin(condition=[=($4, $16)], joinType=[left])
LogicalTableScan(table=[[CATALOG, STRUCT, T]])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalAggregate(group=[\\{0}])
LogicalProject(DEPTNO=[$7])
LogicalFilter(condition=[=($7, 1)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
{quote}
actual:
{quote}LogicalAggregate(group=[{}], EXPR$0=[COUNT()])
LogicalProject($f0=[1])
LogicalFilter(condition=[OR(=($4, 1), CAST(CASE(=($18, 0), false, IS NOT
NULL($22), true, IS NULL($20), null:BOOLEAN, <($19, $18), null:BOOLEAN,
false)):BOOLEAN NOT NULL)])
LogicalJoin(condition=[=($20, $21)], joinType=[left])
LogicalProject("K0"=[$0], "C1"=[$1], "F1"."A0"=[$2], "F2"."A0"=[$3],
"F0"."C0"=[$4], "F1"."C0"=[$5], "F0"."C1"=[$6], "F1"."C2"=[$7], "F2"."C3"=[$8],
EMPNO=[$9], ENAME=[$10], JOB=[$11], MGR=[$12], HIREDATE=[$13], SAL=[$14],
COMM=[$15], DEPTNO=[$16], SLACKER=[$17], $f0=[$18], $f1=[$19], "F0"."C0"0=[$4])
LogicalJoin(condition=[true], joinType=[inner])
LogicalJoin(condition=[=($4, $16)], joinType=[left])
LogicalTableScan(table=[[CATALOG, STRUCT, T]])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalAggregate(group=[{}], agg#0=[COUNT()], agg#1=[COUNT($0)])
LogicalProject(DEPTNO=[$7], $f1=[true])
LogicalFilter(condition=[=($7, 1)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalAggregate(group=[\\{0}], agg#0=[MIN($1)])
LogicalProject(DEPTNO=[$7], $f1=[true])
LogicalFilter(condition=[=($7, 1)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
{quote}
was (Author: bingfeng):
https://issues.apache.org/jira/browse/CALCITE-2071
I know that from this issue, the extra part of the plan is to prevent incorrect
null values from being detected. But the current generated plan is not only
more than this, but also inner join without condition, which is equivalent to
cross join, which will affect performance more.
The following is an example in my UT:
sql:
{quote}select count(1)
from struct.t t1 left join emp on t1.c0 = emp.deptno
where c0 = 1 or c0 in (
select deptno from emp where deptno = 1)
{quote}
expected:
{quote}LogicalAggregate(group=[{}], EXPR$0=[COUNT()])
LogicalProject($f0=[1])
LogicalFilter(condition=[OR(=($4, 1), true)])
LogicalJoin(condition=[=($4, $18)], joinType=[left])
LogicalJoin(condition=[=($4, $16)], joinType=[left])
LogicalTableScan(table=[[CATALOG, STRUCT, T]])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalAggregate(group=[\{0}])
LogicalProject(DEPTNO=[$7])
LogicalFilter(condition=[=($7, 1)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
{quote}
actual:
{quote}
LogicalAggregate(group=[{}], EXPR$0=[COUNT()])
LogicalProject($f0=[1])
LogicalFilter(condition=[OR(=($4, 1), CAST(CASE(=($18, 0), false, IS NOT
NULL($22), true, IS NULL($20), null:BOOLEAN, <($19, $18), null:BOOLEAN,
false)):BOOLEAN NOT NULL)])
LogicalJoin(condition=[=($20, $21)], joinType=[left])
LogicalProject("K0"=[$0], "C1"=[$1], "F1"."A0"=[$2], "F2"."A0"=[$3],
"F0"."C0"=[$4], "F1"."C0"=[$5], "F0"."C1"=[$6], "F1"."C2"=[$7], "F2"."C3"=[$8],
EMPNO=[$9], ENAME=[$10], JOB=[$11], MGR=[$12], HIREDATE=[$13], SAL=[$14],
COMM=[$15], DEPTNO=[$16], SLACKER=[$17], $f0=[$18], $f1=[$19], "F0"."C0"0=[$4])
LogicalJoin(condition=[true], joinType=[inner])
LogicalJoin(condition=[=($4, $16)], joinType=[left])
LogicalTableScan(table=[[CATALOG, STRUCT, T]])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalAggregate(group=[{}], agg#0=[COUNT()], agg#1=[COUNT($0)])
LogicalProject(DEPTNO=[$7], $f1=[true])
LogicalFilter(condition=[=($7, 1)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalAggregate(group=[\{0}], agg#0=[MIN($1)])
LogicalProject(DEPTNO=[$7], $f1=[true])
LogicalFilter(condition=[=($7, 1)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
{quote}
> Filter subQuery remove not fully optimized
> ------------------------------------------
>
> Key: CALCITE-4887
> URL: https://issues.apache.org/jira/browse/CALCITE-4887
> Project: Calcite
> Issue Type: Improvement
> Reporter: bingfeng.guo
> Assignee: bingfeng.guo
> Priority: Major
>
> for example:
> {quote}SELECT
> count(1)
> FROM
> SSB.P_LINEORDER as P_LINEORDER
> LEFT JOIN SSB.PART as PART ON P_LINEORDER.LO_PARTKEY = PART.P_PARTKEY
> LEFT JOIN SSB.DATES as DATES ON P_LINEORDER.LO_ORDERDATE = DATES.D_DATEKEY
> LEFT JOIN SSB.CUSTOMER as CUSTOMER ON P_LINEORDER.LO_CUSTKEY =
> CUSTOMER.C_CUSTKEY
> LEFT JOIN SSB.SUPPLIER as SUPPLIER ON P_LINEORDER.LO_SUPPKEY =
> SUPPLIER.S_SUPPKEY
> WHERE
> P_LINEORDER.LO_ORDERDATE = '1992-01-01'
> or (
> P_LINEORDER.LO_ORDERDATE in (
> select
> D_DATEKEY
> from
> SSB.DATES
> where
> DATES.D_DATEKEY = '1992-01-01'
> )
> )
> LIMIT
> 500
> {quote}
>
> plan is
>
> {quote}LimitRel(ctx=[], fetch=[500])
> AggregateRel(group-set=[[]], groups=[null], EXPR$0=[COUNT()])
> ProjectRel(ctx=[])
> FilterRel(condition=[OR(=($5, 1992-01-01), CAST(AND(IS NOT NULL($65),
> <>($62, 0))):BOOLEAN)])
> JoinRel(condition=[=($5, $64)], joinType=[left])
> JoinRel(condition=[true], joinType=[inner])
> JoinRel(condition=[=($4, $55)], joinType=[left])
> JoinRel(condition=[=($2, $47)], joinType=[left])
> JoinRel(condition=[=($5, $30)], joinType=[left])
> JoinRel(condition=[=($3, $21)], joinType=[left])
> TableScan(table=[[SSB, P_LINEORDER]], fields=[[0, 1, 2,
> 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]])
> TableScan(table=[[SSB, PART]], fields=[[0, 1, 2, 3, 4,
> 5, 6, 7, 8]])
> TableScan(table=[[SSB, DATES]], fields=[[0, 1, 2, 3, 4,
> 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]])
> TableScan(table=[[SSB, CUSTOMER]], fields=[[0, 1, 2, 3, 4,
> 5, 6, 7]])
> TableScan(table=[[SSB, SUPPLIER]], fields=[[0, 1, 2, 3, 4, 5,
> 6]])
> AggregateRel(group-set=[[]], groups=[null], c=[COUNT()],
> ck=[COUNT($0)])
> ProjectRel(D_DATEKEY=[$0])
> FilterRel(condition=[=($0, 1992-01-01)])
> TableScan(table=[[SSB, DATES]], fields=[[0, 1, 2, 3, 4,
> 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]])
> AggregateRel(group-set=[[0, 1]], groups=[null])
> ProjectRel(D_DATEKEY=[$0], i=[true])
> FilterRel(condition=[=($0, 1992-01-01)])
> TableScan(table=[[SSB, DATES]], fields=[[0, 1, 2, 3, 4, 5,
> 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]])
> {quote}
>
>
> But the optimal plan should be
> {quote}
> LimitRel(fetch=[500])
> AggregateRel(group-set=[[]], groups=[null], EXPR$0=[COUNT()])
> ProjectRel(ctx=[])
> FilterRel(condition=[=($5, 1992-01-01)])
> JoinRel(condition=[=($5, $62)], joinType=[inner])
> JoinRel(condition=[=($4, $55)], joinType=[left])
> JoinRel(condition=[=($2, $47)], joinType=[left])
> JoinRel(condition=[=($5, $30)], joinType=[left])
> JoinRel(condition=[=($3, $21)], joinType=[left])
> TableScan(table=[[SSB, P_LINEORDER]], fields=[[0, 1, 2,
> 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]])
> TableScan(table=[[SSB, PART]], fields=[[0, 1, 2, 3, 4, 5,
> 6, 7, 8]])
> TableScan(table=[[SSB, DATES]], fields=[[0, 1, 2, 3, 4, 5,
> 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]])
> TableScan(table=[[SSB, CUSTOMER]], fields=[[0, 1, 2, 3, 4, 5,
> 6, 7]])
> TableScan(table=[[SSB, SUPPLIER]], fields=[[0, 1, 2, 3, 4, 5,
> 6]])
> AggregateRel(group-set=[[0]], groups=[null])
> ProjectRel(D_DATEKEY=[$0])
> FilterRel(condition=[=($0, 1992-01-01)])
> TableScan(table=[[SSB, DATES]], fields=[[0, 1, 2, 3, 4, 5,
> 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]])
>
> {quote}
>
>
--
This message was sent by Atlassian Jira
(v8.20.1#820001)