[ 
https://issues.apache.org/jira/browse/ASTERIXDB-1295?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Yingyi Bu updated ASTERIXDB-1295:
---------------------------------
    Issue Type: Improvement  (was: Bug)

> Unable to switch an implicit "sort-based" grouping into "hash-based"
> --------------------------------------------------------------------
>
>                 Key: ASTERIXDB-1295
>                 URL: https://issues.apache.org/jira/browse/ASTERIXDB-1295
>             Project: Apache AsterixDB
>          Issue Type: Improvement
>            Reporter: Pouria
>
> Switching from the default "sort-based" grouping into "hash-based" via a hint 
> needs an explicit "group by" clause in the query. However, there are cases - 
> such as left-outer joins (see below) - where an implicit grouping is required 
> and gets added to the query plan, by the Optimizer, and it is impossible to 
> provide the "hash" hint to switch the physical grouping operator.
> drop dataverse test if exists;
> create dataverse test;
> use dataverse test;
> create type typeA as {
> id: int64
> }
> create type typeB as {
> id: int64,
> fk: int64
> }
> create dataset dsa(typeA) primary key id;
> create dataset dsb(typeB) primary key id;
> insert into dataset dsa ( {"id": 1} );
> insert into dataset dsa ( {"id": 2} );
>  
> insert into dataset dsb ( {"id": 1000, "fk": 1} );
> insert into dataset dsb ( {"id": 1001, "fk": 1} );
> insert into dataset dsb ( {"id": 2000, "fk": 2} );
> for $a in dataset dsa
> return {
> "aid" : $a.id,
> "val" : for $b in dataset dsb
>         where $b.fk = $a.id
>       return $b.id
> } 
> distribute result [%0->$$3]
> -- DISTRIBUTE_RESULT  |PARTITIONED|
>   exchange 
>   -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>     project ([$$3])
>     -- STREAM_PROJECT  |PARTITIONED|
>       assign [$$3] <- [function-call: asterix:closed-record-constructor, 
> Args:[AString: {aid}, %0->$$17, AString: {val}, %0->$$10]]
>       -- ASSIGN  |PARTITIONED|
>         project ([$$17, $$10])
>         -- STREAM_PROJECT  |PARTITIONED|
>           exchange 
>           -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>             group by ([$$11 := %0->$$14]) decor ([$$17 := %0->$$14]) {
>                       aggregate [$$10] <- [function-call: asterix:listify, 
> Args:[%0->$$12]]
>                       -- AGGREGATE  |LOCAL|
>                         select (function-call: algebricks:not, 
> Args:[function-call: algebricks:is-null, Args:[%0->$$13]])
>                         -- STREAM_SELECT  |LOCAL|
>                           nested tuple source
>                           -- NESTED_TUPLE_SOURCE  |LOCAL|
>                    }
>             -- PRE_CLUSTERED_GROUP_BY[$$14]  |PARTITIONED|
>               exchange 
>               -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                 order (ASC, %0->$$14) 
>                 -- STABLE_SORT [$$14(ASC)]  |PARTITIONED|
>                   exchange 
>                   -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                     project ([$$12, $$13, $$14])
>                     -- STREAM_PROJECT  |PARTITIONED|
>                       exchange 
>                       -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                         left outer join (function-call: algebricks:eq, 
> Args:[%0->$$15, %0->$$14])
>                         -- HYBRID_HASH_JOIN [$$14][$$15]  |PARTITIONED|
>                           exchange 
>                           -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                             project ([$$14])
>                             -- STREAM_PROJECT  |PARTITIONED|
>                               exchange 
>                               -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                                 data-scan []<-[$$14, $$0] <- test:dsa
>                                 -- DATASOURCE_SCAN  |PARTITIONED|
>                                   exchange 
>                                   -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                                     empty-tuple-source
>                                     -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
>                           exchange 
>                           -- HASH_PARTITION_EXCHANGE [$$15]  |PARTITIONED|
>                             project ([$$12, $$13, $$15])
>                             -- STREAM_PROJECT  |PARTITIONED|
>                               assign [$$13, $$15] <- [TRUE, function-call: 
> asterix:field-access-by-index, Args:[%0->$$1, AInt32: {1}]]
>                               -- ASSIGN  |PARTITIONED|
>                                 exchange 
>                                 -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                                   data-scan []<-[$$12, $$1] <- test:dsb
>                                   -- DATASOURCE_SCAN  |PARTITIONED|
>                                     exchange 
>                                     -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                                       empty-tuple-source
>                                       -- EMPTY_TUPLE_SOURCE  |PARTITIONED|



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to