Pouria created ASTERIXDB-1295:
---------------------------------

             Summary: Unable to switch an implicit "sort-based" grouping into 
"hash-based"
                 Key: ASTERIXDB-1295
                 URL: https://issues.apache.org/jira/browse/ASTERIXDB-1295
             Project: Apache AsterixDB
          Issue Type: Bug
            Reporter: Pouria


Switching from the default "sort-based" grouping into "hash-based" via a hint 
needs an explicit "group by" clause in the query. However, there are cases - 
such as left-outer joins (see below) - where an implicit grouping is required 
and gets added to the query plan, by the Optimizer, and it is impossible to 
provide the "hash" hint to switch the physical grouping operator.


drop dataverse test if exists;
create dataverse test;

use dataverse test;

create type typeA as {
id: int64
}

create type typeB as {
id: int64,
fk: int64
}


create dataset dsa(typeA) primary key id;
create dataset dsb(typeB) primary key id;

insert into dataset dsa ( {"id": 1} );
insert into dataset dsa ( {"id": 2} );
 
insert into dataset dsb ( {"id": 1000, "fk": 1} );
insert into dataset dsb ( {"id": 1001, "fk": 1} );
insert into dataset dsb ( {"id": 2000, "fk": 2} );



for $a in dataset dsa
return {
"aid" : $a.id,
"val" : for $b in dataset dsb
        where $b.fk = $a.id
        return $b.id
} 

distribute result [%0->$$3]
-- DISTRIBUTE_RESULT  |PARTITIONED|
  exchange 
  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
    project ([$$3])
    -- STREAM_PROJECT  |PARTITIONED|
      assign [$$3] <- [function-call: asterix:closed-record-constructor, 
Args:[AString: {aid}, %0->$$17, AString: {val}, %0->$$10]]
      -- ASSIGN  |PARTITIONED|
        project ([$$17, $$10])
        -- STREAM_PROJECT  |PARTITIONED|
          exchange 
          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
            group by ([$$11 := %0->$$14]) decor ([$$17 := %0->$$14]) {
                      aggregate [$$10] <- [function-call: asterix:listify, 
Args:[%0->$$12]]
                      -- AGGREGATE  |LOCAL|
                        select (function-call: algebricks:not, 
Args:[function-call: algebricks:is-null, Args:[%0->$$13]])
                        -- STREAM_SELECT  |LOCAL|
                          nested tuple source
                          -- NESTED_TUPLE_SOURCE  |LOCAL|
                   }
            -- PRE_CLUSTERED_GROUP_BY[$$14]  |PARTITIONED|
              exchange 
              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                order (ASC, %0->$$14) 
                -- STABLE_SORT [$$14(ASC)]  |PARTITIONED|
                  exchange 
                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                    project ([$$12, $$13, $$14])
                    -- STREAM_PROJECT  |PARTITIONED|
                      exchange 
                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                        left outer join (function-call: algebricks:eq, 
Args:[%0->$$15, %0->$$14])
                        -- HYBRID_HASH_JOIN [$$14][$$15]  |PARTITIONED|
                          exchange 
                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                            project ([$$14])
                            -- STREAM_PROJECT  |PARTITIONED|
                              exchange 
                              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                data-scan []<-[$$14, $$0] <- test:dsa
                                -- DATASOURCE_SCAN  |PARTITIONED|
                                  exchange 
                                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                    empty-tuple-source
                                    -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
                          exchange 
                          -- HASH_PARTITION_EXCHANGE [$$15]  |PARTITIONED|
                            project ([$$12, $$13, $$15])
                            -- STREAM_PROJECT  |PARTITIONED|
                              assign [$$13, $$15] <- [TRUE, function-call: 
asterix:field-access-by-index, Args:[%0->$$1, AInt32: {1}]]
                              -- ASSIGN  |PARTITIONED|
                                exchange 
                                -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                  data-scan []<-[$$12, $$1] <- test:dsb
                                  -- DATASOURCE_SCAN  |PARTITIONED|
                                    exchange 
                                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                      empty-tuple-source
                                      -- EMPTY_TUPLE_SOURCE  |PARTITIONED|




--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to