fanfuxiaoran commented on code in PR #724:
URL: https://github.com/apache/cloudberry/pull/724#discussion_r1870569206
##########
src/backend/executor/nodeHashjoin.c:
##########
@@ -2157,3 +2174,225 @@ ExecHashJoinInitializeWorker(HashJoinState *state,
ExecSetExecProcNode(&state->js.ps, ExecParallelHashJoin);
}
}
+
+/*
+ * Find "inner var = outer var" in hj->hashclauses and create runtime filter
+ * for it.
+ */
+void
+CreateRuntimeFilter(HashJoinState* hjstate)
+{
+ AttrNumber lattno, rattno;
+ Expr *expr;
+ JoinType jointype;
+ HashJoin *hj;
+ HashState *hstate;
+ PlanState *target;
+ AttrFilter *attr_filter;
+ ListCell *lc;
+
+ /*
+ * Only applicatable for inner, right and semi join,
+ */
+ jointype = hjstate->js.jointype;
+ if (jointype != JOIN_INNER
+ && jointype != JOIN_RIGHT
+ && jointype != JOIN_SEMI
+ )
+ return;
+
+ hstate = castNode(HashState, innerPlanState(hjstate));
+ hstate->filters = NIL;
+
+ /*
+ * check and initialize the runtime filter for all hash conds in
+ * hj->hashclauses
+ */
+ hj = castNode(HashJoin, hjstate->js.ps.plan);
+ foreach (lc, hj->hashclauses)
+ {
+ expr = (Expr *)lfirst(lc);
+
+ if (!IsEqualOp(expr))
+ continue;
+
+ lattno = -1;
+ rattno = -1;
+ if (!CheckEqualArgs(expr, &lattno, &rattno))
+ continue;
+
+ if (lattno < 1 || rattno < 1)
+ continue;
+
+ target = FindTargetAttr(hjstate, lattno, &lattno);
+ if (lattno == -1 || target == NULL || IsA(target,
HashJoinState))
+ continue;
+ Assert(IsA(target, SeqScanState));
+
+ attr_filter = CreateAttrFilter(target, lattno, rattno,
+
hstate->ps.plan->plan_rows);
+ if (attr_filter->blm_filter)
+ hstate->filters = lappend(hstate->filters, attr_filter);
+ else
+ pfree(attr_filter);
+ }
+}
+
+static bool
+IsEqualOp(Expr *expr)
+{
+ Oid funcid = InvalidOid;
+
+ if (!IsA(expr, OpExpr) && !IsA(expr, FuncExpr))
+ return false;
+
+ if (IsA(expr, OpExpr))
+ funcid = ((OpExpr *)expr)->opfuncid;
+ else if (IsA(expr, FuncExpr))
+ funcid = ((FuncExpr *)expr)->funcid;
+ else
+ return false;
+
+ if (funcid == F_INT2EQ || funcid == F_INT4EQ || funcid == F_INT8EQ
+ || funcid == F_INT24EQ || funcid == F_INT42EQ
+ || funcid == F_INT28EQ || funcid == F_INT82EQ
+ || funcid == F_INT48EQ || funcid == F_INT84EQ
+ )
+ return true;
+
+ return false;
+}
+
+/*
+ * runtime filters which can be pushed down:
+ * 1. hash expr MUST BE equal op;
+ * 2. args MUST BE Var node;
+ * 3. the data type MUST BE integer;
+ */
+static bool
+CheckEqualArgs(Expr *expr, AttrNumber *lattno, AttrNumber *rattno)
+{
+ Var *var;
+ bool match;
+ List *args;
+ ListCell *lc;
+
+ if (lattno == NULL || rattno == NULL)
+ return false;
+
+ if (!IsA(expr, OpExpr) && !IsA(expr, FuncExpr))
+ return false;
+
+ if (IsA(expr, OpExpr))
+ args = ((OpExpr *)expr)->args;
+ else if (IsA(expr, FuncExpr))
+ args = ((FuncExpr *)expr)->args;
+ else
+ return false;
+
+ if (!args || list_length(args) != 2)
+ return false;
+
+ match = false;
+ foreach (lc, args)
+ {
+ match = false;
+
+ if (!IsA(lfirst(lc), Var))
Review Comment:
Sorry, I didn't make it clear. I don't mean the predication on the var.
like the below sql
```
EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
SELECT t1.c3 FROM t1, t2 WHERE t1.c2 = (t2.c2 + 10);
QUERY PLAN
-------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (actual rows=0 loops=1)
-> Hash Join (actual rows=0 loops=1)
Hash Cond: (t1.c2 = (t2.c2 + 10))
Extra Text: (seg2) Hash chain length 8.0 avg, 8 max, using 4 of
524288 buckets.
-> Seq Scan on t1 (actual rows=128 loops=1)
-> Hash (actual rows=32 loops=1)
Buckets: 524288 Batches: 1 Memory Usage: 4098kB
-> Seq Scan on t2 (actual rows=32 loops=1)
Optimizer: Postgres query optimizer
(9 rows)
```
As `t2.c2 + 10` is not a `Var` but a `T_OpExpr` , the runtime filter cannot
handle it.
Could we just iterate the expression tree and check if it only contains var
and const ?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]