[ 
https://issues.apache.org/jira/browse/ASTERIXDB-1296?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Till updated ASTERIXDB-1296:
----------------------------
    Component/s: Optimizer

> Common filtering predicate needs to be pushed down on both sides of a join
> --------------------------------------------------------------------------
>
>                 Key: ASTERIXDB-1296
>                 URL: https://issues.apache.org/jira/browse/ASTERIXDB-1296
>             Project: Apache AsterixDB
>          Issue Type: Improvement
>          Components: Optimizer
>            Reporter: Pouria
>            Assignee: Yingyi Bu
>
> A common filtering predicate on the join attributes needs to be pushed down 
> into both sides of the join.
> As an example, in the following PK-FK join query the dataset of the FK-side 
> is fully scanned and hash-partitioned during the execution, while only those 
> records for which (fk > 2) are relevant. The secondary index on this 
> attribute can be exploited to filter them (or even they can be filtered out 
> during scan, in case of no secondary index). From the performance perspective 
> such a filtering could be helpful if the filtering predicate is highly 
> selective. Currently one needs to add the filter on the FK-side explicitly 
> beside the join predicate as: 
> (where $b.fk = $a.id and $b.fk > 2 )
> drop dataverse test if exists;
> create dataverse test;
> use dataverse test;
> create type typeA as {
> id: int64
> }
> create type typeB as {
> id: int64 ,
> fk: int64
> }
> create dataset dsa(typeA) primary key id;
> create dataset dsb(typeB) primary key id;
> insert into dataset dsa ( {"id": 1} );
> insert into dataset dsa ( {"id": 2} );
> insert into dataset dsa ( {"id": 3} );
>  
> insert into dataset dsb ( {"id": 1000, "fk": 1} );
> insert into dataset dsb ( {"id": 2000, "fk": 2} );
> insert into dataset dsb ( {"id": 3000, "fk": 3} );
> create index dsbFkIx on dsb(fk);
> for $a in dataset dsa
> where $a.id > 2
> return {
> "aid" : $a.id,
> "val" : for $b in dataset dsb
>         where $b.fk = $a.id
>       return $b.id
> } 
> distribute result [%0->$$5]
> -- DISTRIBUTE_RESULT  |PARTITIONED|
>   exchange 
>   -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>     project ([$$5])
>     -- STREAM_PROJECT  |PARTITIONED|
>       assign [$$5] <- [function-call: asterix:closed-record-constructor, 
> Args:[AString: {aid}, %0->$$14, AString: {val}, %0->$$12]]
>       -- ASSIGN  |PARTITIONED|
>         exchange 
>         -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>           group by ([$$14 := %0->$$17]) decor ([]) {
>                     aggregate [$$12] <- [function-call: asterix:listify, 
> Args:[%0->$$15]]
>                     -- AGGREGATE  |LOCAL|
>                       select (function-call: algebricks:not, 
> Args:[function-call: algebricks:is-null, Args:[%0->$$16]])
>                       -- STREAM_SELECT  |LOCAL|
>                         nested tuple source
>                         -- NESTED_TUPLE_SOURCE  |LOCAL|
>                  }
>           -- PRE_CLUSTERED_GROUP_BY[$$17]  |PARTITIONED|
>             exchange 
>             -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>               order (ASC, %0->$$17) 
>               -- STABLE_SORT [$$17(ASC)]  |PARTITIONED|
>                 exchange 
>                 -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                   project ([$$16, $$17, $$15])
>                   -- STREAM_PROJECT  |PARTITIONED|
>                     exchange 
>                     -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                       left outer join (function-call: algebricks:eq, 
> Args:[%0->$$18, %0->$$17])
>                       -- HYBRID_HASH_JOIN [$$17][$$18]  |PARTITIONED|
>                         exchange 
>                         -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                           project ([$$17])
>                           -- STREAM_PROJECT  |PARTITIONED|
>                             exchange 
>                             -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                               unnest-map [$$17, $$0] <- function-call: 
> asterix:index-search, Args:[AString: {dsa}, AInt32: {0}, AString: {test}, 
> AString: {dsa}, ABoolean: {false}, ABoolean: {false}, ABoolean: {false}, 
> AInt32: {1}, %0->$$20, AInt32: {0}, FALSE, TRUE, FALSE]
>                               -- BTREE_SEARCH  |PARTITIONED|
>                                 exchange 
>                                 -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                                   assign [$$20] <- [AInt64: {2}]
>                                   -- ASSIGN  |PARTITIONED|
>                                     empty-tuple-source
>                                     -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
>                         exchange 
>                         -- HASH_PARTITION_EXCHANGE [$$18]  |PARTITIONED|
>                           project ([$$16, $$18, $$15])
>                           -- STREAM_PROJECT  |PARTITIONED|
>                             assign [$$16, $$18] <- [TRUE, function-call: 
> asterix:field-access-by-index, Args:[%0->$$1, AInt32: {1}]]
>                             -- ASSIGN  |PARTITIONED|
>                               exchange 
>                               -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                                 data-scan []<-[$$15, $$1] <- test:dsb
>                                 -- DATASOURCE_SCAN  |PARTITIONED|
>                                   exchange 
>                                   -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                                     empty-tuple-source
>                                     -- EMPTY_TUPLE_SOURCE  |PARTITIONED|



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to