yjhjstz commented on code in PR #724:
URL: https://github.com/apache/cloudberry/pull/724#discussion_r1853703384


##########
src/backend/executor/nodeHashjoin.c:
##########
@@ -2157,3 +2174,225 @@ ExecHashJoinInitializeWorker(HashJoinState *state,
                ExecSetExecProcNode(&state->js.ps, ExecParallelHashJoin);
        }
 }
+
+/*
+ * Find "inner var = outer var" in hj->hashclauses and create runtime filter
+ * for it.
+ */
+void
+CreateRuntimeFilter(HashJoinState* hjstate)
+{
+       AttrNumber lattno, rattno;
+       Expr            *expr;
+       JoinType        jointype;
+       HashJoin        *hj;
+       HashState       *hstate;
+       PlanState       *target;
+       AttrFilter      *attr_filter;
+       ListCell        *lc;
+
+       /*
+        * Only applicatable for inner, right and semi join,
+        */
+       jointype = hjstate->js.jointype;
+       if (jointype != JOIN_INNER
+               && jointype != JOIN_RIGHT
+               && jointype != JOIN_SEMI
+          )
+               return;
+
+       hstate = castNode(HashState, innerPlanState(hjstate));
+       hstate->filters = NIL;
+
+       /*
+        * check and initialize the runtime filter for all hash conds in
+        * hj->hashclauses
+        */
+       hj = castNode(HashJoin, hjstate->js.ps.plan);
+       foreach (lc, hj->hashclauses)
+       {
+               expr = (Expr *)lfirst(lc);
+
+               if (!IsEqualOp(expr))
+                       continue;
+
+               lattno = -1;
+               rattno = -1;
+               if (!CheckEqualArgs(expr, &lattno, &rattno))
+                       continue;
+
+               if (lattno < 1 || rattno < 1)
+                       continue;
+
+               target = FindTargetAttr(hjstate, lattno, &lattno);
+               if (lattno == -1 || target == NULL || IsA(target, 
HashJoinState))
+                       continue;
+               Assert(IsA(target, SeqScanState));
+
+               attr_filter = CreateAttrFilter(target, lattno, rattno,
+                                                                          
hstate->ps.plan->plan_rows);
+               if (attr_filter->blm_filter)
+                       hstate->filters = lappend(hstate->filters, attr_filter);
+               else
+                       pfree(attr_filter);
+       }
+}
+
+static bool
+IsEqualOp(Expr *expr)
+{
+       Oid funcid = InvalidOid;
+
+       if (!IsA(expr, OpExpr) && !IsA(expr, FuncExpr))
+               return false;
+
+       if (IsA(expr, OpExpr))
+               funcid = ((OpExpr *)expr)->opfuncid;
+       else if (IsA(expr, FuncExpr))
+               funcid = ((FuncExpr *)expr)->funcid;
+       else
+               return false;
+
+       if (funcid == F_INT2EQ  || funcid == F_INT4EQ  || funcid == F_INT8EQ
+               || funcid == F_INT24EQ || funcid == F_INT42EQ
+               || funcid == F_INT28EQ || funcid == F_INT82EQ
+               || funcid == F_INT48EQ || funcid == F_INT84EQ
+          )
+               return true;
+
+       return false;
+}
+
+/*
+ * runtime filters which can be pushed down:
+ * 1. hash expr MUST BE equal op;
+ * 2. args MUST BE Var node;
+ * 3. the data type MUST BE integer;
+ */
+static bool
+CheckEqualArgs(Expr *expr, AttrNumber *lattno, AttrNumber *rattno)
+{
+       Var             *var;
+       bool    match;
+       List    *args;
+       ListCell *lc;
+
+       if (lattno == NULL || rattno == NULL)
+               return false;
+
+       if (!IsA(expr, OpExpr) && !IsA(expr, FuncExpr))
+               return false;
+
+       if (IsA(expr, OpExpr))
+               args = ((OpExpr *)expr)->args;
+       else if (IsA(expr, FuncExpr))
+               args = ((FuncExpr *)expr)->args;
+       else
+               return false;
+
+       if (!args || list_length(args) != 2)
+               return false;
+
+       match = false;
+       foreach (lc, args)
+       {
+               match = false;
+
+               if (!IsA(lfirst(lc), Var))
+                       break;
+
+               var = lfirst(lc);
+               if (var->varno == INNER_VAR)
+                       *rattno = var->varattno;
+               else if (var->varno == OUTER_VAR)
+                       *lattno = var->varattno;
+               else
+                       break;
+
+               match = true;
+       }
+
+       return match;
+}
+
+/*
+ * it's just allowed like this:
+ *   HashJoin
+ *      ... a series of HashJoin nodes
+ *        HashJoin
+ *          SeqScan <- target
+ */
+static PlanState *
+FindTargetAttr(HashJoinState *hjstate, AttrNumber attno, AttrNumber *lattno)
+{
+       Var *var;
+       PlanState *child, *parent;
+       TargetEntry *te;
+
+       parent = (PlanState *)hjstate;
+       child  = outerPlanState(hjstate);
+       Assert(child);
+
+       *lattno = -1;
+       while (child)
+       {
+               /* target is seqscan */
+               if (IsA(child, SeqScanState))
+               {
+                       te = (TargetEntry *)list_nth(child->plan->targetlist, 
attno - 1);
+                       if (!IsA(te->expr, Var))
+                               return NULL;
+
+                       var = castNode(Var, te->expr);
+
+                       /* system column is not allowed */
+                       if (var->varattno <= 0)
+                               return NULL;
+
+                       *lattno = var->varattno;
+                       return child;
+               }
+
+               /*
+                * hashjoin
+                *   result (hash filter)
+                *     seqscan on t1, t1 is replicated table
+                */
+               if (!IsA(child, HashJoinState) && !IsA(child, ResultState))

Review Comment:
   ```c++
   Hash Join  (cost=0.00..4019.55 rows=37 width=9) (actual 
time=3203.012..9927.435 rows=1399 loops=1)
                                                      Hash Cond: 
(web_sales_1_prt_2.ws_item_sk = item.i_item_sk)
                                                      Join Filter: 
(web_sales_1_prt_2.ws_ext_discount_amt > ((1.3 * 
avg(web_sales_1_prt_2_1.ws_ext_discount_amt))))
                                                      Rows Removed by Join 
Filter: 4763
                                                      Extra Text: (seg2)   Hash 
chain length 1.0 avg, 1 max, using 198 of 2097152 buckets.
                                                      ->  Append  
(cost=0.00..676.44 rows=2399189 width=13) (actual time=16.899..5572.473 
rows=3090021 loops=1)
                                                            ->  Seq Scan on 
web_sales_1_prt_2  (cost=0.00..676.44 rows=2399189 width=13) (actual 
time=16.895..1138.267 rows=662
   149 loops=1)
                                                            ->  Seq Scan on 
web_sales_1_prt_3  (cost=0.00..676.44 rows=2399189 width=13) (actual 
time=8.947..1102.409 rows=6621
   36 loops=1)
                                                            ->  Seq Scan on 
web_sales_1_prt_4  (cost=0.00..676.44 rows=2399189 width=13) (actual 
time=8.822..1100.839 rows=6621
   48 loops=1)
                                                            ->  Seq Scan on 
web_sales_1_prt_5  (cost=0.00..676.44 rows=2399189 width=13) (actual 
time=11.391..1083.785 rows=662
   179 loops=1)
                                                            ->  Seq Scan on 
web_sales_1_prt_6  (cost=0.00..676.44 rows=2399189 width=13) (actual 
time=13.030..649.141 rows=4414
   09 loops=1)
                                                            ->  Seq Scan on 
web_sales_1_prt_7  (cost=0.00..676.44 rows=2399189 width=13) (never executed)
                                                            ->  Seq Scan on 
web_sales_1_prt_others  (cost=0.00..676.44 rows=2399189 width=13) (actual 
time=1.213..3.203 rows=17
   88 loops=1)
                                                      ->  Hash  
(cost=2432.09..2432.09 rows=109 width=12) (actual time=3177.768..3177.770 
rows=198 loops=1)
                                                            Buckets: 2097152  
Batches: 1  Memory Usage: 16392kB
                                                            ->  Broadcast 
Motion 3:3  (slice3; segments: 3)  (cost=
   ```
   need to consider partitioned table .



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to