Pouria created ASTERIXDB-1296:
---------------------------------

             Summary: Common filtering predicate needs to be pushed down on 
both sides of a join
                 Key: ASTERIXDB-1296
                 URL: https://issues.apache.org/jira/browse/ASTERIXDB-1296
             Project: Apache AsterixDB
          Issue Type: Bug
            Reporter: Pouria


A common filtering predicate on the join attributes needs to be pushed down 
into both sides of the join.

As an example, in the following PK-FK join query the dataset of the FK-side is 
fully scanned and hash-partitioned during the execution, while only those 
records for which (fk > 2) are relevant. The secondary index on this attribute 
can be exploited to filter them (or even they can be filtered out during scan, 
in case of no secondary index). From the performance perspective such a 
filtering could be helpful if the filtering predicate is highly selective. 
Currently one needs to add the filter on the FK-side explicitly beside the join 
predicate as: 

(where $b.fk = $a.id and $b.fk > 2 )


drop dataverse test if exists;
create dataverse test;

use dataverse test;

create type typeA as {
id: int64
}

create type typeB as {
id: int64 ,
fk: int64
}


create dataset dsa(typeA) primary key id;
create dataset dsb(typeB) primary key id;

insert into dataset dsa ( {"id": 1} );
insert into dataset dsa ( {"id": 2} );
insert into dataset dsa ( {"id": 3} );

 
insert into dataset dsb ( {"id": 1000, "fk": 1} );
insert into dataset dsb ( {"id": 2000, "fk": 2} );
insert into dataset dsb ( {"id": 3000, "fk": 3} );

create index dsbFkIx on dsb(fk);

for $a in dataset dsa
where $a.id > 2
return {
"aid" : $a.id,
"val" : for $b in dataset dsb
        where $b.fk = $a.id
        return $b.id
} 

distribute result [%0->$$5]
-- DISTRIBUTE_RESULT  |PARTITIONED|
  exchange 
  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
    project ([$$5])
    -- STREAM_PROJECT  |PARTITIONED|
      assign [$$5] <- [function-call: asterix:closed-record-constructor, 
Args:[AString: {aid}, %0->$$14, AString: {val}, %0->$$12]]
      -- ASSIGN  |PARTITIONED|
        exchange 
        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
          group by ([$$14 := %0->$$17]) decor ([]) {
                    aggregate [$$12] <- [function-call: asterix:listify, 
Args:[%0->$$15]]
                    -- AGGREGATE  |LOCAL|
                      select (function-call: algebricks:not, 
Args:[function-call: algebricks:is-null, Args:[%0->$$16]])
                      -- STREAM_SELECT  |LOCAL|
                        nested tuple source
                        -- NESTED_TUPLE_SOURCE  |LOCAL|
                 }
          -- PRE_CLUSTERED_GROUP_BY[$$17]  |PARTITIONED|
            exchange 
            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
              order (ASC, %0->$$17) 
              -- STABLE_SORT [$$17(ASC)]  |PARTITIONED|
                exchange 
                -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                  project ([$$16, $$17, $$15])
                  -- STREAM_PROJECT  |PARTITIONED|
                    exchange 
                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                      left outer join (function-call: algebricks:eq, 
Args:[%0->$$18, %0->$$17])
                      -- HYBRID_HASH_JOIN [$$17][$$18]  |PARTITIONED|
                        exchange 
                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                          project ([$$17])
                          -- STREAM_PROJECT  |PARTITIONED|
                            exchange 
                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                              unnest-map [$$17, $$0] <- function-call: 
asterix:index-search, Args:[AString: {dsa}, AInt32: {0}, AString: {test}, 
AString: {dsa}, ABoolean: {false}, ABoolean: {false}, ABoolean: {false}, 
AInt32: {1}, %0->$$20, AInt32: {0}, FALSE, TRUE, FALSE]
                              -- BTREE_SEARCH  |PARTITIONED|
                                exchange 
                                -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                  assign [$$20] <- [AInt64: {2}]
                                  -- ASSIGN  |PARTITIONED|
                                    empty-tuple-source
                                    -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
                        exchange 
                        -- HASH_PARTITION_EXCHANGE [$$18]  |PARTITIONED|
                          project ([$$16, $$18, $$15])
                          -- STREAM_PROJECT  |PARTITIONED|
                            assign [$$16, $$18] <- [TRUE, function-call: 
asterix:field-access-by-index, Args:[%0->$$1, AInt32: {1}]]
                            -- ASSIGN  |PARTITIONED|
                              exchange 
                              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                data-scan []<-[$$15, $$1] <- test:dsb
                                -- DATASOURCE_SCAN  |PARTITIONED|
                                  exchange 
                                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                    empty-tuple-source
                                    -- EMPTY_TUPLE_SOURCE  |PARTITIONED|



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to