[
https://issues.apache.org/jira/browse/ASTERIXDB-1266?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Yingyi Bu updated ASTERIXDB-1266:
---------------------------------
Description:
Translating TPCH Q17 from AQL to Sqlpp causes the optimizer to pick NLJ
(instead of HHJ) which drops the performance of the query.
Below you can find the AQL and Sqlpp versions of the query along with their
corresponding optimized plans:
Q17-SQL-PP
{noformat}
use tpch_1g;
declare function tmp() {
(
select element {'t_partkey':l_partkey,'t_avg_quantity':(0.2 * avg((
select element i.l_quantity
from l as i
)))}
from LineItem as l
group by l.l_partkey as l_partkey
)
};
select element ( sum((
select element l.l_extendedprice
from LineItem as l,
Part as p,
tmp() as t
where (((p.p_partkey = l.l_partkey) and (p.p_brand = 'Brand#23') and
(p.p_container = 'MED BOX')) and ((l.l_partkey = t.t_partkey) and (l.l_quantity
< t.t_avg_quantity)))
)) / 7.0);
{noformat}
Query plan:
{noformat}
distribute result [%0->$$54]
-- DISTRIBUTE_RESULT |UNPARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
project ([$$54])
-- STREAM_PROJECT |UNPARTITIONED|
assign [$$54] <- [function-call: asterix:numeric-divide, Args:[%0->$$55,
ADouble: {7.0}]]
-- ASSIGN |UNPARTITIONED|
aggregate [$$55] <- [function-call: asterix:agg-sum, Args:[%0->$$72]]
-- AGGREGATE |UNPARTITIONED|
exchange
-- RANDOM_MERGE_EXCHANGE |PARTITIONED|
aggregate [$$72] <- [function-call: asterix:agg-local-sum,
Args:[%0->$$51]]
-- AGGREGATE |PARTITIONED|
project ([$$51])
-- STREAM_PROJECT |PARTITIONED|
select (function-call: algebricks:lt, Args:[%0->$$65, %0->$$66])
-- STREAM_SELECT |PARTITIONED|
project ([$$65, $$66, $$51])
-- STREAM_PROJECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
join (function-call: algebricks:and, Args:[function-call:
algebricks:eq, Args:[%0->$$57, %0->$$14], function-call: algebricks:eq,
Args:[%0->$$60, %0->$$57]])
-- NESTED_LOOP |PARTITIONED|
exchange
-- BROADCAST_EXCHANGE |PARTITIONED|
project ([$$65, $$51, $$57])
-- STREAM_PROJECT |PARTITIONED|
assign [$$51, $$65, $$57] <- [function-call:
asterix:field-access-by-index, Args:[%0->$$11, AInt32: {5}], function-call:
asterix:field-access-by-index, Args:[%0->$$11, AInt32: {4}], function-call:
asterix:field-access-by-index, Args:[%0->$$11, AInt32: {1}]]
-- ASSIGN |PARTITIONED|
project ([$$11])
-- STREAM_PROJECT |PARTITIONED|
assign [$$11] <- [%0->$$15]
-- ASSIGN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
replicate
-- SPLIT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$15])
-- STREAM_PROJECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
data-scan []<-[$$61, $$62, $$15] <-
tpch_1g:LineItem
-- DATASOURCE_SCAN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE
|PARTITIONED|
empty-tuple-source
-- EMPTY_TUPLE_SOURCE
|PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
join (TRUE)
-- NESTED_LOOP |PARTITIONED|
exchange
-- BROADCAST_EXCHANGE |PARTITIONED|
project ([$$60])
-- STREAM_PROJECT |PARTITIONED|
select (function-call: algebricks:and,
Args:[function-call: algebricks:eq, Args:[function-call:
asterix:field-access-by-index, Args:[%0->$$16, AInt32: {6}], AString: {MED
BOX}], function-call: algebricks:eq, Args:[function-call:
asterix:field-access-by-index, Args:[%0->$$16, AInt32: {3}], AString:
{Brand#23}]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
data-scan []<-[$$60, $$16] <- tpch_1g:Part
-- DATASOURCE_SCAN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
-- EMPTY_TUPLE_SOURCE |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$66, $$14])
-- STREAM_PROJECT |PARTITIONED|
assign [$$66] <- [function-call:
asterix:numeric-multiply, Args:[ADouble: {0.2}, %0->$$63]]
-- ASSIGN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$14 := %0->$$71]) decor ([]) {
aggregate [$$63] <-
[function-call: asterix:agg-global-avg, Args:[%0->$$70]]
-- AGGREGATE |LOCAL|
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
-- PRE_CLUSTERED_GROUP_BY[$$71]
|PARTITIONED|
exchange
-- HASH_PARTITION_MERGE_EXCHANGE
MERGE:[$$71(ASC)] HASH:[$$71] |PARTITIONED|
group by ([$$71 := %0->$$56]) decor
([]) {
aggregate [$$70] <-
[function-call: asterix:agg-local-avg, Args:[%0->$$29]]
-- AGGREGATE |LOCAL|
nested tuple source
-- NESTED_TUPLE_SOURCE
|LOCAL|
}
-- SORT_GROUP_BY[$$56] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$56, $$29])
-- STREAM_PROJECT |PARTITIONED|
assign [$$29, $$56] <-
[function-call: asterix:field-access-by-index, Args:[%0->$$15, AInt32: {4}],
function-call: asterix:field-access-by-index, Args:[%0->$$15, AInt32: {1}]]
-- ASSIGN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE
|PARTITIONED|
replicate
-- SPLIT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE
|PARTITIONED|
project ([$$15])
-- STREAM_PROJECT
|PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE
|PARTITIONED|
data-scan []<-[$$61,
$$62, $$15] <- tpch_1g:LineItem
-- DATASOURCE_SCAN
|PARTITIONED|
exchange
--
ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
--
EMPTY_TUPLE_SOURCE |PARTITIONED|
{noformat}
Q17-AQL
{noformat}
declare function tmp(){
for $l in dataset('LineItem')
group by $l_partkey := $l.l_partkey with $l
return {
"t_partkey": $l_partkey,
"t_avg_quantity": 0.2 * avg(for $i in $l return $i.l_quantity)
}
}
sum(
for $l in dataset('LineItem')
for $p in dataset('Part')
where $p.p_partkey = $l.l_partkey
and $p.p_brand = 'Brand#23'
and $p.p_container = 'MED BOX'
for $t in tmp()
where $l.l_partkey = $t.t_partkey
and $l.l_quantity < $t.t_avg_quantity
return $l.l_extendedprice
)/7.0
{noformat}
{noformat}
distribute result [%0->$$7]
-- DISTRIBUTE_RESULT |UNPARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
project ([$$7])
-- STREAM_PROJECT |UNPARTITIONED|
assign [$$7] <- [function-call: asterix:numeric-divide, Args:[%0->$$41,
ADouble: {7.0}]]
-- ASSIGN |UNPARTITIONED|
aggregate [$$41] <- [function-call: asterix:agg-sum, Args:[%0->$$58]]
-- AGGREGATE |UNPARTITIONED|
exchange
-- RANDOM_MERGE_EXCHANGE |PARTITIONED|
aggregate [$$58] <- [function-call: asterix:agg-local-sum,
Args:[%0->$$38]]
-- AGGREGATE |PARTITIONED|
project ([$$38])
-- STREAM_PROJECT |PARTITIONED|
select (function-call: algebricks:lt, Args:[%0->$$50, %0->$$51])
-- STREAM_SELECT |PARTITIONED|
project ([$$50, $$51, $$38])
-- STREAM_PROJECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
join (function-call: algebricks:eq, Args:[%0->$$43,
%0->$$9])
-- HYBRID_HASH_JOIN [$$43][$$9] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$50, $$38, $$43])
-- STREAM_PROJECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
join (function-call: algebricks:eq,
Args:[%0->$$46, %0->$$43])
-- HYBRID_HASH_JOIN [$$43][$$46] |PARTITIONED|
exchange
-- HASH_PARTITION_EXCHANGE [$$43] |PARTITIONED|
project ([$$50, $$38, $$43])
-- STREAM_PROJECT |PARTITIONED|
assign [$$38, $$50, $$43] <-
[function-call: asterix:field-access-by-index, Args:[%0->$$4, AInt32: {5}],
function-call: asterix:field-access-by-index, Args:[%0->$$4, AInt32: {4}],
function-call: asterix:field-access-by-index, Args:[%0->$$4, AInt32: {1}]]
-- ASSIGN |PARTITIONED|
project ([$$4])
-- STREAM_PROJECT |PARTITIONED|
assign [$$4] <- [%0->$$8]
-- ASSIGN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
replicate
-- SPLIT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE
|PARTITIONED|
project ([$$8])
-- STREAM_PROJECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE
|PARTITIONED|
data-scan []<-[$$47, $$48,
$$8] <- tpch_1g:LineItem
-- DATASOURCE_SCAN
|PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE
|PARTITIONED|
empty-tuple-source
-- EMPTY_TUPLE_SOURCE
|PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$46])
-- STREAM_PROJECT |PARTITIONED|
select (function-call: algebricks:and,
Args:[function-call: algebricks:eq, Args:[function-call:
asterix:field-access-by-index, Args:[%0->$$5, AInt32: {3}], AString:
{Brand#23}], function-call: algebricks:eq, Args:[function-call:
asterix:field-access-by-index, Args:[%0->$$5, AInt32: {6}], AString: {MED
BOX}]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
data-scan []<-[$$46, $$5] <-
tpch_1g:Part
-- DATASOURCE_SCAN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
-- EMPTY_TUPLE_SOURCE |PARTITIONED|
exchange
-- HASH_PARTITION_EXCHANGE [$$9] |PARTITIONED|
project ([$$51, $$9])
-- STREAM_PROJECT |PARTITIONED|
assign [$$51] <- [function-call:
asterix:numeric-multiply, Args:[ADouble: {0.2}, %0->$$53]]
-- ASSIGN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$9 := %0->$$57]) decor ([]) {
aggregate [$$53] <- [function-call:
asterix:agg-global-avg, Args:[%0->$$56]]
-- AGGREGATE |LOCAL|
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
-- PRE_CLUSTERED_GROUP_BY[$$57] |PARTITIONED|
exchange
-- HASH_PARTITION_MERGE_EXCHANGE
MERGE:[$$57(ASC)] HASH:[$$57] |PARTITIONED|
group by ([$$57 := %0->$$42]) decor ([]) {
aggregate [$$56] <-
[function-call: asterix:agg-local-avg, Args:[%0->$$27]]
-- AGGREGATE |LOCAL|
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
-- SORT_GROUP_BY[$$42] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$42, $$27])
-- STREAM_PROJECT |PARTITIONED|
assign [$$27, $$42] <-
[function-call: asterix:field-access-by-index, Args:[%0->$$8, AInt32: {4}],
function-call: asterix:field-access-by-index, Args:[%0->$$8, AInt32: {1}]]
-- ASSIGN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE
|PARTITIONED|
replicate
-- SPLIT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE
|PARTITIONED|
project ([$$8])
-- STREAM_PROJECT
|PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE
|PARTITIONED|
data-scan []<-[$$47,
$$48, $$8] <- tpch_1g:LineItem
-- DATASOURCE_SCAN
|PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE
|PARTITIONED|
empty-tuple-source
-- EMPTY_TUPLE_SOURCE
|PARTITIONED|
{noformat}
was:
Translating TPCH Q17 from AQL to Sqlpp causes the optimizer to pick NLJ
(instead of HHJ) which drops the performance of the query.
Below you can find the AQL and Sqlpp versions of the query along with their
corresponding optimized plans:
Q17-SQL-PP
use tpch_1g;
declare function tmp() {
(
select element {'t_partkey':l_partkey,'t_avg_quantity':(0.2 * avg((
select element i.l_quantity
from l as i
)))}
from LineItem as l
group by l.l_partkey as l_partkey
)
};
select element ( sum((
select element l.l_extendedprice
from LineItem as l,
Part as p,
tmp() as t
where (((p.p_partkey = l.l_partkey) and (p.p_brand = 'Brand#23') and
(p.p_container = 'MED BOX')) and ((l.l_partkey = t.t_partkey) and (l.l_quantity
< t.t_avg_quantity)))
)) / 7.0);
distribute result [%0->$$54]
-- DISTRIBUTE_RESULT |UNPARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
project ([$$54])
-- STREAM_PROJECT |UNPARTITIONED|
assign [$$54] <- [function-call: asterix:numeric-divide, Args:[%0->$$55,
ADouble: {7.0}]]
-- ASSIGN |UNPARTITIONED|
aggregate [$$55] <- [function-call: asterix:agg-sum, Args:[%0->$$72]]
-- AGGREGATE |UNPARTITIONED|
exchange
-- RANDOM_MERGE_EXCHANGE |PARTITIONED|
aggregate [$$72] <- [function-call: asterix:agg-local-sum,
Args:[%0->$$51]]
-- AGGREGATE |PARTITIONED|
project ([$$51])
-- STREAM_PROJECT |PARTITIONED|
select (function-call: algebricks:lt, Args:[%0->$$65, %0->$$66])
-- STREAM_SELECT |PARTITIONED|
project ([$$65, $$66, $$51])
-- STREAM_PROJECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
join (function-call: algebricks:and, Args:[function-call:
algebricks:eq, Args:[%0->$$57, %0->$$14], function-call: algebricks:eq,
Args:[%0->$$60, %0->$$57]])
-- NESTED_LOOP |PARTITIONED|
exchange
-- BROADCAST_EXCHANGE |PARTITIONED|
project ([$$65, $$51, $$57])
-- STREAM_PROJECT |PARTITIONED|
assign [$$51, $$65, $$57] <- [function-call:
asterix:field-access-by-index, Args:[%0->$$11, AInt32: {5}], function-call:
asterix:field-access-by-index, Args:[%0->$$11, AInt32: {4}], function-call:
asterix:field-access-by-index, Args:[%0->$$11, AInt32: {1}]]
-- ASSIGN |PARTITIONED|
project ([$$11])
-- STREAM_PROJECT |PARTITIONED|
assign [$$11] <- [%0->$$15]
-- ASSIGN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
replicate
-- SPLIT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$15])
-- STREAM_PROJECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
data-scan []<-[$$61, $$62, $$15] <-
tpch_1g:LineItem
-- DATASOURCE_SCAN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE
|PARTITIONED|
empty-tuple-source
-- EMPTY_TUPLE_SOURCE
|PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
join (TRUE)
-- NESTED_LOOP |PARTITIONED|
exchange
-- BROADCAST_EXCHANGE |PARTITIONED|
project ([$$60])
-- STREAM_PROJECT |PARTITIONED|
select (function-call: algebricks:and,
Args:[function-call: algebricks:eq, Args:[function-call:
asterix:field-access-by-index, Args:[%0->$$16, AInt32: {6}], AString: {MED
BOX}], function-call: algebricks:eq, Args:[function-call:
asterix:field-access-by-index, Args:[%0->$$16, AInt32: {3}], AString:
{Brand#23}]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
data-scan []<-[$$60, $$16] <- tpch_1g:Part
-- DATASOURCE_SCAN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
-- EMPTY_TUPLE_SOURCE |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$66, $$14])
-- STREAM_PROJECT |PARTITIONED|
assign [$$66] <- [function-call:
asterix:numeric-multiply, Args:[ADouble: {0.2}, %0->$$63]]
-- ASSIGN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$14 := %0->$$71]) decor ([]) {
aggregate [$$63] <-
[function-call: asterix:agg-global-avg, Args:[%0->$$70]]
-- AGGREGATE |LOCAL|
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
-- PRE_CLUSTERED_GROUP_BY[$$71]
|PARTITIONED|
exchange
-- HASH_PARTITION_MERGE_EXCHANGE
MERGE:[$$71(ASC)] HASH:[$$71] |PARTITIONED|
group by ([$$71 := %0->$$56]) decor
([]) {
aggregate [$$70] <-
[function-call: asterix:agg-local-avg, Args:[%0->$$29]]
-- AGGREGATE |LOCAL|
nested tuple source
-- NESTED_TUPLE_SOURCE
|LOCAL|
}
-- SORT_GROUP_BY[$$56] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$56, $$29])
-- STREAM_PROJECT |PARTITIONED|
assign [$$29, $$56] <-
[function-call: asterix:field-access-by-index, Args:[%0->$$15, AInt32: {4}],
function-call: asterix:field-access-by-index, Args:[%0->$$15, AInt32: {1}]]
-- ASSIGN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE
|PARTITIONED|
replicate
-- SPLIT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE
|PARTITIONED|
project ([$$15])
-- STREAM_PROJECT
|PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE
|PARTITIONED|
data-scan []<-[$$61,
$$62, $$15] <- tpch_1g:LineItem
-- DATASOURCE_SCAN
|PARTITIONED|
exchange
--
ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
--
EMPTY_TUPLE_SOURCE |PARTITIONED|
Q17-AQL
declare function tmp(){
for $l in dataset('LineItem')
group by $l_partkey := $l.l_partkey with $l
return {
"t_partkey": $l_partkey,
"t_avg_quantity": 0.2 * avg(for $i in $l return $i.l_quantity)
}
}
sum(
for $l in dataset('LineItem')
for $p in dataset('Part')
where $p.p_partkey = $l.l_partkey
and $p.p_brand = 'Brand#23'
and $p.p_container = 'MED BOX'
for $t in tmp()
where $l.l_partkey = $t.t_partkey
and $l.l_quantity < $t.t_avg_quantity
return $l.l_extendedprice
)/7.0
distribute result [%0->$$7]
-- DISTRIBUTE_RESULT |UNPARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
project ([$$7])
-- STREAM_PROJECT |UNPARTITIONED|
assign [$$7] <- [function-call: asterix:numeric-divide, Args:[%0->$$41,
ADouble: {7.0}]]
-- ASSIGN |UNPARTITIONED|
aggregate [$$41] <- [function-call: asterix:agg-sum, Args:[%0->$$58]]
-- AGGREGATE |UNPARTITIONED|
exchange
-- RANDOM_MERGE_EXCHANGE |PARTITIONED|
aggregate [$$58] <- [function-call: asterix:agg-local-sum,
Args:[%0->$$38]]
-- AGGREGATE |PARTITIONED|
project ([$$38])
-- STREAM_PROJECT |PARTITIONED|
select (function-call: algebricks:lt, Args:[%0->$$50, %0->$$51])
-- STREAM_SELECT |PARTITIONED|
project ([$$50, $$51, $$38])
-- STREAM_PROJECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
join (function-call: algebricks:eq, Args:[%0->$$43,
%0->$$9])
-- HYBRID_HASH_JOIN [$$43][$$9] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$50, $$38, $$43])
-- STREAM_PROJECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
join (function-call: algebricks:eq,
Args:[%0->$$46, %0->$$43])
-- HYBRID_HASH_JOIN [$$43][$$46] |PARTITIONED|
exchange
-- HASH_PARTITION_EXCHANGE [$$43] |PARTITIONED|
project ([$$50, $$38, $$43])
-- STREAM_PROJECT |PARTITIONED|
assign [$$38, $$50, $$43] <-
[function-call: asterix:field-access-by-index, Args:[%0->$$4, AInt32: {5}],
function-call: asterix:field-access-by-index, Args:[%0->$$4, AInt32: {4}],
function-call: asterix:field-access-by-index, Args:[%0->$$4, AInt32: {1}]]
-- ASSIGN |PARTITIONED|
project ([$$4])
-- STREAM_PROJECT |PARTITIONED|
assign [$$4] <- [%0->$$8]
-- ASSIGN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
replicate
-- SPLIT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE
|PARTITIONED|
project ([$$8])
-- STREAM_PROJECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE
|PARTITIONED|
data-scan []<-[$$47, $$48,
$$8] <- tpch_1g:LineItem
-- DATASOURCE_SCAN
|PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE
|PARTITIONED|
empty-tuple-source
-- EMPTY_TUPLE_SOURCE
|PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$46])
-- STREAM_PROJECT |PARTITIONED|
select (function-call: algebricks:and,
Args:[function-call: algebricks:eq, Args:[function-call:
asterix:field-access-by-index, Args:[%0->$$5, AInt32: {3}], AString:
{Brand#23}], function-call: algebricks:eq, Args:[function-call:
asterix:field-access-by-index, Args:[%0->$$5, AInt32: {6}], AString: {MED
BOX}]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
data-scan []<-[$$46, $$5] <-
tpch_1g:Part
-- DATASOURCE_SCAN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
-- EMPTY_TUPLE_SOURCE |PARTITIONED|
exchange
-- HASH_PARTITION_EXCHANGE [$$9] |PARTITIONED|
project ([$$51, $$9])
-- STREAM_PROJECT |PARTITIONED|
assign [$$51] <- [function-call:
asterix:numeric-multiply, Args:[ADouble: {0.2}, %0->$$53]]
-- ASSIGN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$9 := %0->$$57]) decor ([]) {
aggregate [$$53] <- [function-call:
asterix:agg-global-avg, Args:[%0->$$56]]
-- AGGREGATE |LOCAL|
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
-- PRE_CLUSTERED_GROUP_BY[$$57] |PARTITIONED|
exchange
-- HASH_PARTITION_MERGE_EXCHANGE
MERGE:[$$57(ASC)] HASH:[$$57] |PARTITIONED|
group by ([$$57 := %0->$$42]) decor ([]) {
aggregate [$$56] <-
[function-call: asterix:agg-local-avg, Args:[%0->$$27]]
-- AGGREGATE |LOCAL|
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
-- SORT_GROUP_BY[$$42] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$42, $$27])
-- STREAM_PROJECT |PARTITIONED|
assign [$$27, $$42] <-
[function-call: asterix:field-access-by-index, Args:[%0->$$8, AInt32: {4}],
function-call: asterix:field-access-by-index, Args:[%0->$$8, AInt32: {1}]]
-- ASSIGN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE
|PARTITIONED|
replicate
-- SPLIT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE
|PARTITIONED|
project ([$$8])
-- STREAM_PROJECT
|PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE
|PARTITIONED|
data-scan []<-[$$47,
$$48, $$8] <- tpch_1g:LineItem
-- DATASOURCE_SCAN
|PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE
|PARTITIONED|
empty-tuple-source
-- EMPTY_TUPLE_SOURCE
|PARTITIONED|
> Sqlpp query degrades the performance by picking NLJ instead of HHJ
> ------------------------------------------------------------------
>
> Key: ASTERIXDB-1266
> URL: https://issues.apache.org/jira/browse/ASTERIXDB-1266
> Project: Apache AsterixDB
> Issue Type: Bug
> Reporter: Pouria
> Assignee: Yingyi Bu
>
> Translating TPCH Q17 from AQL to Sqlpp causes the optimizer to pick NLJ
> (instead of HHJ) which drops the performance of the query.
> Below you can find the AQL and Sqlpp versions of the query along with their
> corresponding optimized plans:
> Q17-SQL-PP
> {noformat}
> use tpch_1g;
> declare function tmp() {
> (
> select element {'t_partkey':l_partkey,'t_avg_quantity':(0.2 * avg((
> select element i.l_quantity
> from l as i
> )))}
> from LineItem as l
> group by l.l_partkey as l_partkey
> )
> };
> select element ( sum((
> select element l.l_extendedprice
> from LineItem as l,
> Part as p,
> tmp() as t
> where (((p.p_partkey = l.l_partkey) and (p.p_brand = 'Brand#23') and
> (p.p_container = 'MED BOX')) and ((l.l_partkey = t.t_partkey) and
> (l.l_quantity < t.t_avg_quantity)))
> )) / 7.0);
> {noformat}
> Query plan:
> {noformat}
> distribute result [%0->$$54]
> -- DISTRIBUTE_RESULT |UNPARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
> project ([$$54])
> -- STREAM_PROJECT |UNPARTITIONED|
> assign [$$54] <- [function-call: asterix:numeric-divide,
> Args:[%0->$$55, ADouble: {7.0}]]
> -- ASSIGN |UNPARTITIONED|
> aggregate [$$55] <- [function-call: asterix:agg-sum, Args:[%0->$$72]]
> -- AGGREGATE |UNPARTITIONED|
> exchange
> -- RANDOM_MERGE_EXCHANGE |PARTITIONED|
> aggregate [$$72] <- [function-call: asterix:agg-local-sum,
> Args:[%0->$$51]]
> -- AGGREGATE |PARTITIONED|
> project ([$$51])
> -- STREAM_PROJECT |PARTITIONED|
> select (function-call: algebricks:lt, Args:[%0->$$65,
> %0->$$66])
> -- STREAM_SELECT |PARTITIONED|
> project ([$$65, $$66, $$51])
> -- STREAM_PROJECT |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> join (function-call: algebricks:and,
> Args:[function-call: algebricks:eq, Args:[%0->$$57, %0->$$14], function-call:
> algebricks:eq, Args:[%0->$$60, %0->$$57]])
> -- NESTED_LOOP |PARTITIONED|
> exchange
> -- BROADCAST_EXCHANGE |PARTITIONED|
> project ([$$65, $$51, $$57])
> -- STREAM_PROJECT |PARTITIONED|
> assign [$$51, $$65, $$57] <- [function-call:
> asterix:field-access-by-index, Args:[%0->$$11, AInt32: {5}], function-call:
> asterix:field-access-by-index, Args:[%0->$$11, AInt32: {4}], function-call:
> asterix:field-access-by-index, Args:[%0->$$11, AInt32: {1}]]
> -- ASSIGN |PARTITIONED|
> project ([$$11])
> -- STREAM_PROJECT |PARTITIONED|
> assign [$$11] <- [%0->$$15]
> -- ASSIGN |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> replicate
> -- SPLIT |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> project ([$$15])
> -- STREAM_PROJECT |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE
> |PARTITIONED|
> data-scan []<-[$$61, $$62, $$15]
> <- tpch_1g:LineItem
> -- DATASOURCE_SCAN |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE
> |PARTITIONED|
> empty-tuple-source
> -- EMPTY_TUPLE_SOURCE
> |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> join (TRUE)
> -- NESTED_LOOP |PARTITIONED|
> exchange
> -- BROADCAST_EXCHANGE |PARTITIONED|
> project ([$$60])
> -- STREAM_PROJECT |PARTITIONED|
> select (function-call: algebricks:and,
> Args:[function-call: algebricks:eq, Args:[function-call:
> asterix:field-access-by-index, Args:[%0->$$16, AInt32: {6}], AString: {MED
> BOX}], function-call: algebricks:eq, Args:[function-call:
> asterix:field-access-by-index, Args:[%0->$$16, AInt32: {3}], AString:
> {Brand#23}]])
> -- STREAM_SELECT |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> data-scan []<-[$$60, $$16] <- tpch_1g:Part
> -- DATASOURCE_SCAN |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> empty-tuple-source
> -- EMPTY_TUPLE_SOURCE |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> project ([$$66, $$14])
> -- STREAM_PROJECT |PARTITIONED|
> assign [$$66] <- [function-call:
> asterix:numeric-multiply, Args:[ADouble: {0.2}, %0->$$63]]
> -- ASSIGN |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> group by ([$$14 := %0->$$71]) decor ([]) {
> aggregate [$$63] <-
> [function-call: asterix:agg-global-avg, Args:[%0->$$70]]
> -- AGGREGATE |LOCAL|
> nested tuple source
> -- NESTED_TUPLE_SOURCE
> |LOCAL|
> }
> -- PRE_CLUSTERED_GROUP_BY[$$71]
> |PARTITIONED|
> exchange
> -- HASH_PARTITION_MERGE_EXCHANGE
> MERGE:[$$71(ASC)] HASH:[$$71] |PARTITIONED|
> group by ([$$71 := %0->$$56]) decor
> ([]) {
> aggregate [$$70] <-
> [function-call: asterix:agg-local-avg, Args:[%0->$$29]]
> -- AGGREGATE |LOCAL|
> nested tuple source
> -- NESTED_TUPLE_SOURCE
> |LOCAL|
> }
> -- SORT_GROUP_BY[$$56] |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE
> |PARTITIONED|
> project ([$$56, $$29])
> -- STREAM_PROJECT |PARTITIONED|
> assign [$$29, $$56] <-
> [function-call: asterix:field-access-by-index, Args:[%0->$$15, AInt32: {4}],
> function-call: asterix:field-access-by-index, Args:[%0->$$15, AInt32: {1}]]
> -- ASSIGN |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE
> |PARTITIONED|
> replicate
> -- SPLIT |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE
> |PARTITIONED|
> project ([$$15])
> -- STREAM_PROJECT
> |PARTITIONED|
> exchange
> --
> ONE_TO_ONE_EXCHANGE |PARTITIONED|
> data-scan
> []<-[$$61, $$62, $$15] <- tpch_1g:LineItem
> -- DATASOURCE_SCAN
> |PARTITIONED|
> exchange
> --
> ONE_TO_ONE_EXCHANGE |PARTITIONED|
>
> empty-tuple-source
> --
> EMPTY_TUPLE_SOURCE |PARTITIONED|
> {noformat}
> Q17-AQL
> {noformat}
> declare function tmp(){
> for $l in dataset('LineItem')
> group by $l_partkey := $l.l_partkey with $l
> return {
> "t_partkey": $l_partkey,
> "t_avg_quantity": 0.2 * avg(for $i in $l return $i.l_quantity)
> }
> }
> sum(
> for $l in dataset('LineItem')
> for $p in dataset('Part')
> where $p.p_partkey = $l.l_partkey
> and $p.p_brand = 'Brand#23'
> and $p.p_container = 'MED BOX'
> for $t in tmp()
> where $l.l_partkey = $t.t_partkey
> and $l.l_quantity < $t.t_avg_quantity
> return $l.l_extendedprice
> )/7.0
> {noformat}
> {noformat}
> distribute result [%0->$$7]
> -- DISTRIBUTE_RESULT |UNPARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
> project ([$$7])
> -- STREAM_PROJECT |UNPARTITIONED|
> assign [$$7] <- [function-call: asterix:numeric-divide, Args:[%0->$$41,
> ADouble: {7.0}]]
> -- ASSIGN |UNPARTITIONED|
> aggregate [$$41] <- [function-call: asterix:agg-sum, Args:[%0->$$58]]
> -- AGGREGATE |UNPARTITIONED|
> exchange
> -- RANDOM_MERGE_EXCHANGE |PARTITIONED|
> aggregate [$$58] <- [function-call: asterix:agg-local-sum,
> Args:[%0->$$38]]
> -- AGGREGATE |PARTITIONED|
> project ([$$38])
> -- STREAM_PROJECT |PARTITIONED|
> select (function-call: algebricks:lt, Args:[%0->$$50,
> %0->$$51])
> -- STREAM_SELECT |PARTITIONED|
> project ([$$50, $$51, $$38])
> -- STREAM_PROJECT |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> join (function-call: algebricks:eq, Args:[%0->$$43,
> %0->$$9])
> -- HYBRID_HASH_JOIN [$$43][$$9] |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> project ([$$50, $$38, $$43])
> -- STREAM_PROJECT |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> join (function-call: algebricks:eq,
> Args:[%0->$$46, %0->$$43])
> -- HYBRID_HASH_JOIN [$$43][$$46] |PARTITIONED|
> exchange
> -- HASH_PARTITION_EXCHANGE [$$43]
> |PARTITIONED|
> project ([$$50, $$38, $$43])
> -- STREAM_PROJECT |PARTITIONED|
> assign [$$38, $$50, $$43] <-
> [function-call: asterix:field-access-by-index, Args:[%0->$$4, AInt32: {5}],
> function-call: asterix:field-access-by-index, Args:[%0->$$4, AInt32: {4}],
> function-call: asterix:field-access-by-index, Args:[%0->$$4, AInt32: {1}]]
> -- ASSIGN |PARTITIONED|
> project ([$$4])
> -- STREAM_PROJECT |PARTITIONED|
> assign [$$4] <- [%0->$$8]
> -- ASSIGN |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE
> |PARTITIONED|
> replicate
> -- SPLIT |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE
> |PARTITIONED|
> project ([$$8])
> -- STREAM_PROJECT
> |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE
> |PARTITIONED|
> data-scan []<-[$$47,
> $$48, $$8] <- tpch_1g:LineItem
> -- DATASOURCE_SCAN
> |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE
> |PARTITIONED|
> empty-tuple-source
> -- EMPTY_TUPLE_SOURCE
> |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> project ([$$46])
> -- STREAM_PROJECT |PARTITIONED|
> select (function-call: algebricks:and,
> Args:[function-call: algebricks:eq, Args:[function-call:
> asterix:field-access-by-index, Args:[%0->$$5, AInt32: {3}], AString:
> {Brand#23}], function-call: algebricks:eq, Args:[function-call:
> asterix:field-access-by-index, Args:[%0->$$5, AInt32: {6}], AString: {MED
> BOX}]])
> -- STREAM_SELECT |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> data-scan []<-[$$46, $$5] <-
> tpch_1g:Part
> -- DATASOURCE_SCAN |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE
> |PARTITIONED|
> empty-tuple-source
> -- EMPTY_TUPLE_SOURCE
> |PARTITIONED|
> exchange
> -- HASH_PARTITION_EXCHANGE [$$9] |PARTITIONED|
> project ([$$51, $$9])
> -- STREAM_PROJECT |PARTITIONED|
> assign [$$51] <- [function-call:
> asterix:numeric-multiply, Args:[ADouble: {0.2}, %0->$$53]]
> -- ASSIGN |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> group by ([$$9 := %0->$$57]) decor ([]) {
> aggregate [$$53] <- [function-call:
> asterix:agg-global-avg, Args:[%0->$$56]]
> -- AGGREGATE |LOCAL|
> nested tuple source
> -- NESTED_TUPLE_SOURCE |LOCAL|
> }
> -- PRE_CLUSTERED_GROUP_BY[$$57] |PARTITIONED|
> exchange
> -- HASH_PARTITION_MERGE_EXCHANGE
> MERGE:[$$57(ASC)] HASH:[$$57] |PARTITIONED|
> group by ([$$57 := %0->$$42]) decor ([]) {
> aggregate [$$56] <-
> [function-call: asterix:agg-local-avg, Args:[%0->$$27]]
> -- AGGREGATE |LOCAL|
> nested tuple source
> -- NESTED_TUPLE_SOURCE
> |LOCAL|
> }
> -- SORT_GROUP_BY[$$42] |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> project ([$$42, $$27])
> -- STREAM_PROJECT |PARTITIONED|
> assign [$$27, $$42] <-
> [function-call: asterix:field-access-by-index, Args:[%0->$$8, AInt32: {4}],
> function-call: asterix:field-access-by-index, Args:[%0->$$8, AInt32: {1}]]
> -- ASSIGN |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE
> |PARTITIONED|
> replicate
> -- SPLIT |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE
> |PARTITIONED|
> project ([$$8])
> -- STREAM_PROJECT
> |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE
> |PARTITIONED|
> data-scan []<-[$$47,
> $$48, $$8] <- tpch_1g:LineItem
> -- DATASOURCE_SCAN
> |PARTITIONED|
> exchange
> --
> ONE_TO_ONE_EXCHANGE |PARTITIONED|
> empty-tuple-source
> --
> EMPTY_TUPLE_SOURCE |PARTITIONED|
> {noformat}
>
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)