From e155dc70e0370435061da70362175255d83a36ea Mon Sep 17 00:00:00 2001
From: Amit Langote <amitlan@postgresql.org>
Date: Mon, 26 Jan 2026 11:01:44 +0900
Subject: [PATCH v5 4/5] WIP: Add ExecQualBatch() for batched qual evaluation

Introduce batched qual evaluation for SeqScan when quals are simple
AND-trees of Var op Const, Var op Var, or NullTest expressions.
The batch is evaluated using a bitmask, avoiding per-tuple ExecQual()
overhead.

Only leakproof operators are eligible for batching, since batching
changes evaluation order which could otherwise leak data through
side channels before security barrier quals filter rows.

Add supporting infrastructure: EEOP_SCAN_FETCHSOME_BATCH to deform
all tuples in a batch and ExprContext.scan_batch field.

The postgres_fdw regression test is updated to disable batching for
a query with LIMIT, since batching processes entire batches before
checking LIMIT, resulting in different "Rows Removed by Filter"
counts in EXPLAIN ANALYZE output.
---
 .../postgres_fdw/expected/postgres_fdw.out    |   1 +
 contrib/postgres_fdw/sql/postgres_fdw.sql     |   1 +
 src/backend/executor/execExpr.c               | 335 ++++++++++++++++++
 src/backend/executor/execExprInterp.c         | 224 ++++++++++++
 src/backend/executor/execTuples.c             |  32 ++
 src/backend/executor/nodeSeqscan.c            |  28 +-
 src/backend/jit/llvm/llvmjit_expr.c           |  35 ++
 src/backend/jit/llvm/llvmjit_types.c          |   3 +
 src/include/executor/execExpr.h               |  84 ++++-
 src/include/executor/execScan.h               |  46 +++
 src/include/executor/executor.h               |   3 +
 src/include/executor/tuptable.h               |   2 +
 src/include/nodes/execnodes.h                 |  11 +-
 13 files changed, 802 insertions(+), 3 deletions(-)

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 6066510c7c0..67df4233235 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -12208,6 +12208,7 @@ SELECT * FROM async_pt t1 WHERE t1.b === 505 LIMIT 1;
                Filter: (t1_3.b === 505)
 (14 rows)
 
+SET executor_batch_rows = 1;
 EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
 SELECT * FROM async_pt t1 WHERE t1.b === 505 LIMIT 1;
                                  QUERY PLAN                                 
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 4f7ab2ed0ac..daffc545a5c 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -4126,6 +4126,7 @@ SELECT * FROM local_tbl t1 LEFT JOIN (SELECT *, (SELECT count(*) FROM async_pt W
 
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT * FROM async_pt t1 WHERE t1.b === 505 LIMIT 1;
+SET executor_batch_rows = 1;
 EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
 SELECT * FROM async_pt t1 WHERE t1.b === 505 LIMIT 1;
 SELECT * FROM async_pt t1 WHERE t1.b === 505 LIMIT 1;
diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index 088eca24021..cc76b760ee7 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -104,6 +104,16 @@ static void ExecInitJsonCoercion(ExprState *state, JsonReturning *returning,
 								 bool exists_coerce,
 								 Datum *resv, bool *resnull);
 
+/* private context for the walker */
+typedef struct QualBatchContext
+{
+	List      *leaves;      /* List<Node*> of accepted leaves */
+	Bitmapset *attnos;      /* Vars referenced by accepted leaves */
+	bool		ok;			/* stays true if batchable */
+	AttrNumber	last_scan;	/* last needed attribute in scan slot */
+} QualBatchContext;
+
+static bool qual_batchable_walker(Node *node, void *context);
 
 /*
  * ExecInitExpr: prepare an expression tree for execution
@@ -5064,3 +5074,328 @@ ExecInitJsonCoercion(ExprState *state, JsonReturning *returning,
 		DomainHasConstraints(returning->typid);
 	ExprEvalPushStep(state, &scratch);
 }
+
+/*
+ * Extract Var attno from expression, unwrapping RelabelType/TargetEntry.
+ * Returns attno > 0 on success, 0 on failure (not a Var, or system column).
+ */
+static AttrNumber
+extract_var_attno(Expr *expr)
+{
+	if (expr == NULL)
+		return 0;
+	if (IsA(expr, TargetEntry))
+		return extract_var_attno(((TargetEntry *) expr)->expr);
+	if (IsA(expr, RelabelType))
+		return extract_var_attno((Expr *) ((RelabelType *) expr)->arg);
+	if (IsA(expr, Var) && ((Var *) expr)->varattno > 0)
+		return ((Var *) expr)->varattno;
+	return 0;
+}
+
+/*
+ * qual_batchable_walker
+ *		Check if a qual tree is eligible for batched evaluation.
+ *
+ * Walks the qual tree and validates that it consists only of:
+ *   - AND expressions (OR/NOT disqualify)
+ *   - NullTest on simple Vars
+ *   - Binary OpExpr with Var op Const or Var op Var arguments
+ *
+ * For OpExpr, the operator must be:
+ *   - Strict: ensures NULL inputs produce NULL/false, matching WHERE semantics
+ *   - Leakproof: required because batching evaluates all rows before filtering,
+ *     which could leak data to non-leakproof operators before security barrier
+ *     quals have a chance to filter rows
+ *
+ * On success, populates cxt->leaves with the leaf nodes and cxt->attnos with
+ * the referenced attribute numbers. Sets cxt->ok = false if any node fails
+ * validation.
+ */
+static bool
+qual_batchable_walker(Node *node, void *context)
+{
+	QualBatchContext *cxt = (QualBatchContext *) context;
+
+	if (node == NULL || !cxt->ok)
+		return false;
+
+	switch (nodeTag(node))
+	{
+		case T_List:
+			return expression_tree_walker(node, qual_batchable_walker, cxt);
+
+		case T_BoolExpr:
+		{
+			BoolExpr *b = (BoolExpr *) node;
+
+			/* Only AND trees are allowed */
+			if (b->boolop != AND_EXPR)
+			{
+				cxt->ok = false;
+				return true;
+			}
+			/* Recurse normally over children */
+			return expression_tree_walker(node, qual_batchable_walker, cxt);
+		}
+
+		case T_NullTest:
+		{
+			NullTest *nt = (NullTest *) node;
+			AttrNumber  attno = extract_var_attno(nt->arg);
+
+			if (attno == 0)
+			{
+				cxt->ok = false;
+				return true;
+			}
+
+			cxt->attnos = bms_add_member(cxt->attnos, attno);
+			if (attno > cxt->last_scan)
+				cxt->last_scan = attno;
+			cxt->leaves = lappend(cxt->leaves, node);
+
+			/* Do NOT recurse into leaf */
+			return false;
+		}
+
+		case T_OpExpr:
+		{
+			OpExpr *op = (OpExpr *) node;
+			List   *args = op->args;
+			AttrNumber	lattno,
+						rattno;
+
+			/* Only binary operators */
+			if (list_length(args) != 2)
+			{
+				cxt->ok = false;
+				return true;
+			}
+			/* Must be strict (NULL input -> NULL/false result) */
+			if (!func_strict(op->opfuncid))
+			{
+				cxt->ok = false;
+				return true;
+			}
+			/*
+			 * Must be leakproof. Batching changes evaluation order, which
+			 * could leak data through side channels before security barrier
+			 * quals filter rows.
+			 */
+			if (!get_func_leakproof(op->opfuncid))
+			{
+				cxt->ok = false;
+				return true;
+			}
+
+			/* Left arg must be a Var */
+			lattno = extract_var_attno(linitial(op->args));
+			if (lattno == 0)
+			{
+				cxt->ok = false;
+				return true;
+			}
+			cxt->attnos = bms_add_member(cxt->attnos, lattno);
+			if (lattno > cxt->last_scan)
+				cxt->last_scan = lattno;
+
+			/* Right arg must be Const or Var */
+			if (!IsA(lsecond(op->args), Const))
+			{
+				rattno = extract_var_attno(lsecond(op->args));
+				if (rattno == 0)
+				{
+					cxt->ok = false;
+					return true;
+				}
+				cxt->attnos = bms_add_member(cxt->attnos, rattno);
+				if (rattno > cxt->last_scan)
+					cxt->last_scan = rattno;
+			}
+
+			cxt->leaves = lappend(cxt->leaves, node);
+
+			return false;	/* leaf; don't recurse */
+		}
+
+		/* Unhandled node type; fall back to per-tuple evaluation */
+		default:
+			cxt->ok = false;
+			break;
+	}
+
+	return true;
+}
+
+/* build a BatchQualTerm from a validated leaf */
+static BatchQualTerm *
+build_term_from_leaf(Node *n)
+{
+	BatchQualTerm *term;
+	BatchQualTermKind kind;
+	bool		strict;
+	AttrNumber	l_attno;
+	AttrNumber	r_attno;
+	Datum		r_const = (Datum) 0;
+	bool		r_isnull = false;
+	FmgrInfo   *finfo = NULL;
+	Oid			collation;
+
+	if (IsA(n, NullTest))
+	{
+		NullTest *nt = (NullTest *) n;
+
+		kind = nt->nulltesttype == IS_NULL ? BQTK_IS_NULL : BQTK_IS_NOT_NULL;
+		l_attno = extract_var_attno(nt->arg);
+		r_attno = 0;
+		strict = false;
+		collation = InvalidOid;
+
+		if (l_attno == 0)
+			return NULL;
+	}
+	else if (IsA(n, OpExpr))
+	{
+		OpExpr *op = (OpExpr *) n;
+		Expr   *l  = linitial(op->args);
+		Expr   *r  = lsecond(op->args);
+
+		l_attno = extract_var_attno(l);
+		if (l_attno == 0)
+			return NULL;
+
+		if (IsA(r, Const))
+		{
+			Const *c = (Const *) r;
+
+			kind = BQTK_VAR_CONST;
+			r_const = c->constvalue;
+			r_isnull = c->constisnull;
+			r_attno = 0;
+		}
+		else
+		{
+			r_attno = extract_var_attno(r);
+			if (r_attno == 0)
+				return NULL;
+			kind = BQTK_VAR_VAR;
+		}
+
+		strict = func_strict(op->opfuncid);
+		collation = exprInputCollation((Node *) op);
+		finfo = palloc(sizeof(FmgrInfo));
+		fmgr_info(op->opfuncid, finfo);
+	}
+	else
+		return NULL;
+
+	term = palloc(sizeof(BatchQualTerm));
+	term->kind = kind;
+	term->strict = strict;
+	term->l_attno = l_attno;
+	term->r_attno = r_attno;
+	term->r_const = r_const;
+	term->r_isnull = r_isnull;
+	term->finfo = finfo;
+	term->collation = collation;
+
+	return term;
+}
+
+/*
+ * ExecInitQualBatch
+ *		Build a batched-qual ExprState for evaluating scan quals over a TupleBatch.
+ *
+ * Returns a dedicated ExprState that evaluates the plan's quals in batch mode,
+ * or NULL if the quals are not eligible for batching. The caller should retain
+ * the regular ps->qual for fallback when batching is not used.
+ *
+ * Batching is only possible when the qual tree consists of:
+ *	- Top-level AND of simple clauses (no OR, NOT)
+ *	- NullTest on a simple Var
+ *	- Binary OpExpr with (Var op Const) or (Var op Var), where the operator
+ *	  is both strict (for proper NULL handling) and leakproof (to avoid
+ *	  leaking data when evaluation order changes vs. security barrier quals)
+ *
+ * The generated EEOP program:
+ *	1. EEOP_SCAN_FETCHSOME_BATCH - deforms all slots in the batch
+ *	2. EEOP_QUAL_BATCH_INITMASK - initializes bitmask to all-pass
+ *	3. EEOP_QUAL_BATCH_TERM (per leaf) - evaluates term, clears failing bits
+ *
+ * The result bitmask is stored in BatchQualRuntime (via ExprState.batch_private)
+ * for the caller to use when populating output slots.
+ */
+ExprState *
+ExecInitQualBatch(PlanState *ps)
+{
+	Node	   *qual = (Node *) ps->plan->qual;
+	QualBatchContext cxt = {NIL, NULL, true, 0};
+	BatchQualRuntime *rt;
+	ExprState  *state;
+	int			maxrows = executor_batch_rows;
+	uint64	   *mask;
+	int			mask_words;
+	ListCell   *lc;
+	ExprEvalStep scratch = {0};
+
+	if (qual == NULL)
+		return NULL;
+
+	/*
+	 * Check if qual tree is batchable; collect leaf nodes and referenced
+	 * attnos.
+	 */
+	(void) qual_batchable_walker(qual, &cxt);
+	if (!cxt.ok || cxt.leaves == NIL || bms_is_empty(cxt.attnos))
+		return NULL;
+
+	/* Allocate bitmask: one bit per row, rounded up to 64-bit words */
+	mask_words = (maxrows + 63) >> 6;
+	mask = (uint64 *) palloc0(sizeof(uint64) * mask_words);
+
+	/* Bundle runtime state; attached to ExprState for access during execution */
+	rt = palloc0(sizeof(BatchQualRuntime));
+	rt->mask = mask;
+	rt->mask_words = mask_words;
+
+	/* Create ExprState for the batched program */
+	state = makeNode(ExprState);
+	state->expr = (Expr *) qual;
+	state->parent = ps;
+	state->ext_params = NULL;
+	state->flags = EEO_FLAG_IS_QUAL;
+	state->batch_private = (void *) rt;
+
+	/* Step 1: deform all slots in batch up to highest referenced attribute */
+	scratch.opcode = EEOP_SCAN_FETCHSOME_BATCH;
+	scratch.d.fetch_batch.last_var = cxt.last_scan;
+	ExprEvalPushStep(state, &scratch);
+
+	/* Step 2 initialize mask to all-ones (all rows pass initially) */
+	scratch.opcode = EEOP_QUAL_BATCH_INITMASK;
+	scratch.d.qualbatch_init.mask = mask;
+	scratch.d.qualbatch_init.mask_words = mask_words;
+	ExprEvalPushStep(state, &scratch);
+
+	/* Step 3: one TERM per qual leaf; each clears mask bits for failing rows */
+	foreach(lc, cxt.leaves)
+	{
+		BatchQualTerm *term = build_term_from_leaf((Node *) lfirst(lc));
+
+		if (term == NULL)
+			return NULL;
+
+		scratch.opcode = EEOP_QUAL_BATCH_TERM;
+		scratch.d.qualbatch_term.term = term;		/* by value */
+		ExprEvalPushStep(state, &scratch);
+	}
+
+	/* Done; mask now indicates which rows survived all quals */
+	scratch.opcode = EEOP_DONE_NO_RETURN;
+	ExprEvalPushStep(state, &scratch);
+
+	ExecReadyExpr(state);
+
+	return state;
+}
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index a7a5ac1e83b..304c7f4e0fb 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -59,6 +59,7 @@
 #include "access/heaptoast.h"
 #include "catalog/pg_type.h"
 #include "commands/sequence.h"
+#include "executor/execBatch.h"
 #include "executor/execExpr.h"
 #include "executor/nodeSubplan.h"
 #include "funcapi.h"
@@ -466,6 +467,7 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
 	TupleTableSlot *scanslot;
 	TupleTableSlot *oldslot;
 	TupleTableSlot *newslot;
+	TupleBatch *scanbatch;
 
 	/*
 	 * This array has to be in the same order as enum ExprEvalOp.
@@ -592,6 +594,9 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
 		&&CASE_EEOP_AGG_PRESORTED_DISTINCT_MULTI,
 		&&CASE_EEOP_AGG_ORDERED_TRANS_DATUM,
 		&&CASE_EEOP_AGG_ORDERED_TRANS_TUPLE,
+		&&CASE_EEOP_SCAN_FETCHSOME_BATCH,
+		&&CASE_EEOP_QUAL_BATCH_INITMASK,
+		&&CASE_EEOP_QUAL_BATCH_TERM,
 		&&CASE_EEOP_LAST
 	};
 
@@ -612,6 +617,7 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
 	scanslot = econtext->ecxt_scantuple;
 	oldslot = econtext->ecxt_oldtuple;
 	newslot = econtext->ecxt_newtuple;
+	scanbatch = econtext->scan_batch;
 
 #if defined(EEO_USE_COMPUTED_GOTO)
 	EEO_DISPATCH();
@@ -2265,6 +2271,28 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
 			EEO_NEXT();
 		}
 
+		EEO_CASE(EEOP_SCAN_FETCHSOME_BATCH)
+		{
+			CheckOpSlotCompatibility(op, scanslot);
+
+			Assert(scanbatch);
+			slot_getsomeattrs_batch(scanbatch, op->d.fetch_batch.last_var);
+
+			EEO_NEXT();
+		}
+
+		EEO_CASE(EEOP_QUAL_BATCH_INITMASK)
+		{
+			ExecQualBatchInitMask(state, op, econtext);
+			EEO_NEXT();
+		}
+
+		EEO_CASE(EEOP_QUAL_BATCH_TERM)
+		{
+			ExecQualBatchTerm(state, op, econtext);
+			EEO_NEXT();
+		}
+
 		EEO_CASE(EEOP_LAST)
 		{
 			/* unreachable */
@@ -5914,3 +5942,199 @@ ExecAggPlainTransByRef(AggState *aggstate, AggStatePerTrans pertrans,
 
 	MemoryContextSwitchTo(oldContext);
 }
+
+/* set mask bits [0..nvalid_bits) to 1; clear padding in the last word */
+static inline void
+mask_init_all_ones(uint64 *a, int nwords, int nvalid_bits)
+{
+	for (int i = 0; i < nwords; i++)
+		a[i] = ~UINT64CONST(0);
+
+	if ((nvalid_bits & 63) != 0)
+	{
+		int rem = nvalid_bits & 63;
+
+		a[nwords - 1] &= (~UINT64CONST(0)) >> (64 - rem);
+	}
+}
+
+static inline void
+mask_clear_bit(uint64 *a, int i)
+{
+	a[i >> 6] &= ~(UINT64CONST(1) << (i & 63));
+}
+
+static inline bool
+mask_is_empty(const uint64 *mask, int nwords)
+{
+	for (int i = 0; i < nwords; i++)
+	{
+		if (mask[i] != 0)
+			return false;
+	}
+	return true;
+}
+
+void
+ExecQualBatchInitMask(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
+{
+	TupleBatch *b = econtext->scan_batch;
+	uint64      *mask = op->d.qualbatch_init.mask;
+	int          nwords = op->d.qualbatch_init.mask_words;
+	int          n = b->ntuples;
+
+	/* initialize to all-pass for current batch size */
+	mask_init_all_ones(mask, nwords, n);
+}
+
+void
+ExecQualBatchTerm(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
+{
+	BatchQualRuntime *rt = ExecGetBatchQualRuntime(state);
+	TupleBatch *b = econtext->scan_batch;
+	TupleTableSlot **slots = b->activeslots;
+	uint64         *mask = rt->mask;
+	int				mask_words = rt->mask_words;
+	BatchQualTerm  *t    = op->d.qualbatch_term.term;
+	int             n    = b->ntuples;
+
+	/* Early exit if no rows remain */
+	if (mask_is_empty(mask, mask_words))
+		return;
+
+	switch (t->kind)
+	{
+		case BQTK_IS_NULL:
+		{
+			/* keep bit set only if value IS NULL; clear otherwise */
+			for (int i = 0; i < n; i++)
+			{
+				if (!slots[i]->tts_isnull[t->l_attno-1])
+					mask_clear_bit(mask, i);
+			}
+			break;
+		}
+
+		case BQTK_IS_NOT_NULL:
+		{
+			/* keep bit set only if value IS NOT NULL; clear if NULL */
+			for (int i = 0; i < n; i++)
+			{
+				if (slots[i]->tts_isnull[t->l_attno-1])
+					mask_clear_bit(mask, i);
+			}
+			break;
+		}
+
+		case BQTK_VAR_CONST:
+		{
+			const bool  r_isnull = t->r_isnull;
+			const Datum r_const  = t->r_const;
+			const bool  strict   = t->strict;
+			const Oid   coll     = t->collation;
+			FmgrInfo   *finfo    = t->finfo;
+
+			for (int i = 0; i < n; i++)
+			{
+				bool ln = slots[i]->tts_isnull[t->l_attno-1];
+				bool pass;
+
+				/* WHERE treats NULL as false; strict ops short-circuit */
+				if (strict && (ln || r_isnull))
+					pass = false;
+				else
+				{
+					Datum lv = slots[i]->tts_values[t->l_attno-1];
+
+					pass = DatumGetBool(FunctionCall2Coll(finfo, coll, lv, r_const));
+				}
+
+				if (!pass)
+					mask_clear_bit(mask, i);
+			}
+			break;
+		}
+
+		case BQTK_VAR_VAR:
+		{
+			const bool  strict = t->strict;
+			const Oid   coll   = t->collation;
+			FmgrInfo   *finfo  = t->finfo;
+
+			for (int i = 0; i < n; i++)
+			{
+				bool	ln = slots[i]->tts_isnull[t->l_attno-1];
+				bool	rn = slots[i]->tts_isnull[t->r_attno-1];
+				bool	pass;
+
+				if (strict && (ln || rn))
+					pass = false;
+				else
+				{
+					Datum lv = slots[i]->tts_values[t->l_attno-1];
+					Datum rv = slots[i]->tts_values[t->r_attno-1];
+
+					pass = DatumGetBool(FunctionCall2Coll(finfo, coll, lv, rv));
+				}
+
+				if (!pass)
+					mask_clear_bit(mask, i);
+			}
+			break;
+		}
+
+		default:
+			/* should not happen; leave mask unchanged */
+			break;
+	}
+}
+
+/*
+ * ExecQualBatch
+ *		Evaluate a batched qual over all rows in a TupleBatch.
+ *
+ * Runs the EEOP program built by ExecInitQualBatch, which produces a bitmask
+ * indicating which rows pass the qual. Rows that pass are copied to the
+ * batch's output slots (b->outslots).
+ *
+ * Returns the number of qualifying rows. The caller should then call
+ * TupleBatchUseOutput(b, qualified) to switch the batch to return from
+ * outslots.
+ *
+ * The batch must be materialized (slots populated) before calling this.
+ */
+int
+ExecQualBatch(ExprState *state, ExprContext *econtext, TupleBatch *b)
+{
+	int		i;
+	uint64 *mask;
+	int		kept = 0;
+	BatchQualRuntime *rt = ExecGetBatchQualRuntime(state);
+
+	/* verify that expression was compiled using ExecInitQualBatch */
+	Assert(state->flags & EEO_FLAG_IS_QUAL);
+	Assert(rt && rt->mask && rt->mask_words);
+
+	/* run the batched EEOP program once */
+	econtext->scan_batch = b;
+	ExecEvalExprNoReturn(state, econtext);
+
+	mask = rt->mask;
+	if (mask_is_empty(mask, rt->mask_words))
+		return 0;
+
+	/* Add survivors into outslots */
+	TupleBatchRewind(b);
+	i = 0;
+	while (TupleBatchHasMore(b))
+	{
+		TupleTableSlot *slot = TupleBatchGetNextSlot(b);
+
+		/* mask bit set => row survives */
+		if (mask[i >> 6] & (UINT64CONST(1) << (i & 63)))
+			TupleBatchStoreInOut(b, kept++, slot);
+		i++;
+	}
+
+	return kept;
+}
diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c
index b768eae9e53..5082d8ecd3b 100644
--- a/src/backend/executor/execTuples.c
+++ b/src/backend/executor/execTuples.c
@@ -2111,6 +2111,38 @@ slot_getsomeattrs_int(TupleTableSlot *slot, int attnum)
 	}
 }
 
+void
+slot_getsomeattrs_batch(struct TupleBatch *b, int attnum)
+{
+	while (TupleBatchHasMore(b))
+	{
+		TupleTableSlot *slot = TupleBatchGetNextSlot(b);
+
+		/* Check for caller errors */
+		Assert(attnum > 0);
+
+		if (unlikely(attnum > slot->tts_tupleDescriptor->natts))
+			elog(ERROR, "invalid attribute number %d", attnum);
+
+		/* XXX - there should perhaps also be a batch-level att_nvalid */
+		if (attnum < slot->tts_nvalid)
+			continue;
+
+		/* Fetch as many attributes as possible from the underlying tuple. */
+		slot->tts_ops->getsomeattrs(slot, attnum);
+
+		/*
+		 * If the underlying tuple doesn't have enough attributes, tuple
+		 * descriptor must have the missing attributes.
+		 */
+		if (unlikely(slot->tts_nvalid < attnum))
+		{
+			slot_getmissingattrs(slot, slot->tts_nvalid, attnum);
+			slot->tts_nvalid = attnum;
+		}
+	}
+}
+
 /* ----------------------------------------------------------------
  *		ExecTypeFromTL
  *
diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c
index f36b31d4fbb..16f15ed68aa 100644
--- a/src/backend/executor/nodeSeqscan.c
+++ b/src/backend/executor/nodeSeqscan.c
@@ -281,6 +281,28 @@ ExecSeqScanBatchSlot(PlanState *pstate)
 									 NULL, NULL);
 }
 
+static TupleTableSlot *
+ExecSeqScanBatchSlotWithBatchQual(PlanState *pstate)
+{
+	SeqScanState *node = castNode(SeqScanState, pstate);
+	TupleBatch *b = pstate->ps_Batch;
+
+	/*
+	 * Use pg_assume() for != NULL tests to make the compiler realize no
+	 * runtime check for the field is needed in ExecScanExtended().
+	 */
+	Assert(pstate->state->es_epq_active == NULL);
+	pg_assume(pstate->qual_batch != NULL);
+	Assert(pstate->ps_ProjInfo == NULL);
+
+	if (!TupleBatchHasMore(b))
+		b = ExecScanExtendedBatch(&node->ss,
+								  (ExecScanAccessBatchMtd) SeqNextBatchMaterialize,
+								  pstate->qual_batch, NULL);
+
+	return b ? TupleBatchGetNextSlot(b) : NULL;
+}
+
 static TupleTableSlot *
 ExecSeqScanBatchSlotWithQual(PlanState *pstate)
 {
@@ -344,6 +366,7 @@ SeqScanInitBatching(SeqScanState *scanstate, int eflags)
 	bool track_stats = estate->es_instrument && (estate->es_instrument & INSTRUMENT_BATCHES);
 
 	scanstate->ss.ps.ps_Batch = TupleBatchCreate(scandesc, cap, track_stats);
+	scanstate->ss.ps.qual_batch = ExecInitQualBatch((PlanState *) scanstate);
 
 	/* Choose batch variant to preserve your specialization matrix */
 	if (scanstate->ss.ps.qual == NULL)
@@ -361,7 +384,10 @@ SeqScanInitBatching(SeqScanState *scanstate, int eflags)
 	{
 		if (scanstate->ss.ps.ps_ProjInfo == NULL)
 		{
-			scanstate->ss.ps.ExecProcNode = ExecSeqScanBatchSlotWithQual;
+			if (scanstate->ss.ps.qual_batch == NULL)
+				scanstate->ss.ps.ExecProcNode = ExecSeqScanBatchSlotWithQual;
+			else
+				scanstate->ss.ps.ExecProcNode = ExecSeqScanBatchSlotWithBatchQual;
 		}
 		else
 		{
diff --git a/src/backend/jit/llvm/llvmjit_expr.c b/src/backend/jit/llvm/llvmjit_expr.c
index 650f1d42a93..847f265df3b 100644
--- a/src/backend/jit/llvm/llvmjit_expr.c
+++ b/src/backend/jit/llvm/llvmjit_expr.c
@@ -109,6 +109,9 @@ llvm_compile_expr(ExprState *state)
 	LLVMValueRef v_newslot;
 	LLVMValueRef v_resultslot;
 
+	/* batches */
+	LLVMValueRef v_scanbatch;
+
 	/* nulls/values of slots */
 	LLVMValueRef v_innervalues;
 	LLVMValueRef v_innernulls;
@@ -221,6 +224,11 @@ llvm_compile_expr(ExprState *state)
 									 v_state,
 									 FIELDNO_EXPRSTATE_RESULTSLOT,
 									 "v_resultslot");
+	v_scanbatch = l_load_struct_gep(b,
+									StructExprContext,
+									v_econtext,
+									FIELDNO_EXPRCONTEXT_SCANBATCH,
+									"v_scanbatch");
 
 	/* build global values/isnull pointers */
 	v_scanvalues = l_load_struct_gep(b,
@@ -2940,6 +2948,33 @@ llvm_compile_expr(ExprState *state)
 				LLVMBuildBr(b, opblocks[opno + 1]);
 				break;
 
+			case EEOP_SCAN_FETCHSOME_BATCH:
+				{
+					LLVMValueRef params[2];
+
+					params[0] = v_scanbatch;
+					params[1] = l_int32_const(lc, op->d.fetch_batch.last_var);
+
+						l_call(b,
+							   llvm_pg_var_func_type("slot_getsomeattrs_batch"),
+							   llvm_pg_func(mod, "slot_getsomeattrs_batch"),
+							   params, lengthof(params), "");
+
+					LLVMBuildBr(b, opblocks[opno + 1]);
+					break;
+				}
+
+			case EEOP_QUAL_BATCH_INITMASK:
+				build_EvalXFunc(b, mod, "ExecQualBatchInitMask",
+								v_state, op, v_econtext);
+				LLVMBuildBr(b, opblocks[opno + 1]);
+				break;
+			case EEOP_QUAL_BATCH_TERM:
+				build_EvalXFunc(b, mod, "ExecQualBatchTerm",
+								v_state, op, v_econtext);
+				LLVMBuildBr(b, opblocks[opno + 1]);
+				break;
+
 			case EEOP_LAST:
 				Assert(false);
 				break;
diff --git a/src/backend/jit/llvm/llvmjit_types.c b/src/backend/jit/llvm/llvmjit_types.c
index 4636b90cd0f..5ba9920f3fd 100644
--- a/src/backend/jit/llvm/llvmjit_types.c
+++ b/src/backend/jit/llvm/llvmjit_types.c
@@ -179,7 +179,10 @@ void	   *referenced_functions[] =
 	MakeExpandedObjectReadOnlyInternal,
 	slot_getmissingattrs,
 	slot_getsomeattrs_int,
+	slot_getsomeattrs_batch,
 	strlen,
 	varsize_any,
 	ExecInterpExprStillValid,
+	ExecQualBatchInitMask,
+	ExecQualBatchTerm,
 };
diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h
index aa9b361fa31..2672d2674cc 100644
--- a/src/include/executor/execExpr.h
+++ b/src/include/executor/execExpr.h
@@ -292,11 +292,29 @@ typedef enum ExprEvalOp
 	EEOP_AGG_ORDERED_TRANS_DATUM,
 	EEOP_AGG_ORDERED_TRANS_TUPLE,
 
+	/*
+	 * Batched qual evaluation opcodes
+	 *
+	 * These opcodes implement batch-mode qual evaluation where an entire
+	 * TupleBatch is processed at once rather than tuple-by-tuple.
+	 *
+	 * EEOP_SCAN_FETCHSOME_BATCH: Call slot_getsomeattrs() on all slots in
+	 *     the batch to ensure needed attributes are deformed.
+	 *
+	 * EEOP_QUAL_BATCH_INITMASK: Initialize the result bitmask to all-ones
+	 *     (all rows initially pass).
+	 *
+	 * EEOP_QUAL_BATCH_TERM: Evaluate one qual leaf (NullTest or OpExpr) over
+	 *     all rows, clearing mask bits for rows that fail.
+	 */
+	EEOP_SCAN_FETCHSOME_BATCH,
+	EEOP_QUAL_BATCH_INITMASK,
+	EEOP_QUAL_BATCH_TERM,
+
 	/* non-existent operation, used e.g. to check array lengths */
 	EEOP_LAST
 } ExprEvalOp;
 
-
 typedef struct ExprEvalStep
 {
 	/*
@@ -331,6 +349,12 @@ typedef struct ExprEvalStep
 			const TupleTableSlotOps *kind;
 		}			fetch;
 
+		struct
+		{
+			/* attribute number up to which to fetch (inclusive) */
+			int			last_var;
+		}			fetch_batch;
+
 		/* for EEOP_INNER/OUTER/SCAN/OLD/NEW_[SYS]VAR */
 		struct
 		{
@@ -769,6 +793,17 @@ typedef struct ExprEvalStep
 			void	   *json_coercion_cache;
 			ErrorSaveContext *escontext;
 		}			jsonexpr_coercion;
+
+		struct
+		{
+			uint64			   *mask;		/* shared mask buffer for this program */
+			int					mask_words; /* ceil(es_max_batch/64) */
+		}			qualbatch_init;			/* EEOP_QUAL_BATCH_INITMASK */
+
+		struct
+		{
+			struct BatchQualTerm *term;		/* compiled leaf */
+		}			qualbatch_term;			/* EEOP_QUAL_BATCH_TERM */
 	}			d;
 } ExprEvalStep;
 
@@ -917,4 +952,51 @@ extern void ExecEvalAggOrderedTransDatum(ExprState *state, ExprEvalStep *op,
 extern void ExecEvalAggOrderedTransTuple(ExprState *state, ExprEvalStep *op,
 										 ExprContext *econtext);
 
+/* See ExecQualBatchTerm(). */
+typedef enum BatchQualTermKind
+{
+	BQTK_VAR_CONST,
+	BQTK_VAR_VAR,
+	BQTK_IS_NULL,
+	BQTK_IS_NOT_NULL,
+} BatchQualTermKind;
+
+typedef struct BatchQualTerm
+{
+	BatchQualTermKind kind;
+	bool		strict;		/* follow strict NULL semantics if true */
+	AttrNumber	l_attno;	/* left VAR column */
+	AttrNumber	r_attno;	/* right VAR column, or -1 if Const */
+	Datum		r_const;	/* for VAR_CONST */
+	bool		r_isnull;	/* for VAR_CONST */
+	FmgrInfo   *finfo;		/* fmgr for generic binary ops */
+	Oid			collation;	/* op collation */
+} BatchQualTerm;
+
+/*
+ * BatchQualRuntime - execution state for batched qual evaluation
+ *
+ * Attached to ExprState.batch_private for the batched qual program.
+ * Contains the bitmask that tracks which rows pass the qual (bit set = pass),
+ * and references to the BatchVector for EEOP_QUAL_BATCH_TERM to use.
+ *
+ * The mask uses standard bit operations: word = i/64, bit = i%64.
+ * Initialized to all-ones by EEOP_QUAL_BATCH_INITMASK, then each
+ * EEOP_QUAL_BATCH_TERM clears bits for failing rows.
+ */
+typedef struct BatchQualRuntime
+{
+	uint64		   *mask;
+	int				mask_words;
+} BatchQualRuntime;
+
+static inline BatchQualRuntime *
+ExecGetBatchQualRuntime(ExprState *batch_qual)
+{
+	return (BatchQualRuntime *) batch_qual->batch_private;
+}
+
+extern void ExecQualBatchInitMask(ExprState *state, ExprEvalStep *op, ExprContext *econtext);
+extern void ExecQualBatchTerm(ExprState *state, ExprEvalStep *op, ExprContext *econtext);
+
 #endif							/* EXEC_EXPR_H */
diff --git a/src/include/executor/execScan.h b/src/include/executor/execScan.h
index d9185331e22..008780ea230 100644
--- a/src/include/executor/execScan.h
+++ b/src/include/executor/execScan.h
@@ -320,4 +320,50 @@ ExecScanExtendedBatchSlot(ScanState *node,
 	}
 }
 
+/*
+ * ExecScanExtendedBatch
+ *		Batch-driven scan with batched qual evaluation.
+ *
+ * Unlike ExecScanExtendedBatchSlot which evaluates quals tuple-at-a-time,
+ * this function uses ExecQualBatch() to evaluate the entire batch at once
+ * using a bitmask. Qualifying tuples are collected into b->outslots.
+ *
+ * Returns the TupleBatch with nvalid set to the number of qualifying rows,
+ * or NULL at end-of-scan. Caller iterates b->outslots[0..nvalid-1].
+ *
+ * Note: EPQ is not supported; projection is not yet implemented.
+ */
+static inline TupleBatch *
+ExecScanExtendedBatch(ScanState *node,
+					  ExecScanAccessBatchMtd accessBatchMtd,
+					  ExprState *qual_batch, ProjectionInfo *projInfo)
+{
+	ExprContext *econtext = node->ps.ps_ExprContext;
+	TupleBatch *b = node->ps.ps_Batch;
+	int			qualified;
+
+	/* Batch path does not support EPQ */
+	Assert(node->ps.state->es_epq_active == NULL);
+	Assert(TupleBatchIsValid(b));
+
+	for (;;)
+	{
+		CHECK_FOR_INTERRUPTS();
+
+		/* Get next batch from the AM */
+		if (!accessBatchMtd(node))
+			return NULL;
+
+		ResetExprContext(econtext);
+		qualified = ExecQualBatch(qual_batch, econtext, b);
+		InstrCountFiltered1(node, b->nvalid - qualified);
+		/* Update count and start using b->outslots. */
+		TupleBatchUseOutput(b, qualified);
+
+		if (qualified > 0)
+			return b;
+		/* else get the next batch from the AM */
+	}
+}
+
 #endif							/* EXECSCAN_H */
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index e82fd6c0c8a..8cded15dec6 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -326,6 +326,7 @@ ExecProcNode(PlanState *node)
 extern ExprState *ExecInitExpr(Expr *node, PlanState *parent);
 extern ExprState *ExecInitExprWithParams(Expr *node, ParamListInfo ext_params);
 extern ExprState *ExecInitQual(List *qual, PlanState *parent);
+extern ExprState *ExecInitQualBatch(PlanState *ps);
 extern ExprState *ExecInitCheck(List *qual, PlanState *parent);
 extern List *ExecInitExprList(List *nodes, PlanState *parent);
 extern ExprState *ExecBuildAggTrans(AggState *aggstate, struct AggStatePerPhaseData *phase,
@@ -553,6 +554,8 @@ ExecQualAndReset(ExprState *state, ExprContext *econtext)
 }
 #endif
 
+extern int ExecQualBatch(ExprState *state, ExprContext *econtext, TupleBatch *b);
+
 extern bool ExecCheck(ExprState *state, ExprContext *econtext);
 
 /*
diff --git a/src/include/executor/tuptable.h b/src/include/executor/tuptable.h
index a2dfd707e78..b06be83b141 100644
--- a/src/include/executor/tuptable.h
+++ b/src/include/executor/tuptable.h
@@ -346,6 +346,8 @@ extern Datum ExecFetchSlotHeapTupleDatum(TupleTableSlot *slot);
 extern void slot_getmissingattrs(TupleTableSlot *slot, int startAttNum,
 								 int lastAttNum);
 extern void slot_getsomeattrs_int(TupleTableSlot *slot, int attnum);
+struct TupleBatch;
+extern void slot_getsomeattrs_batch(struct TupleBatch *b, int attnum);
 
 
 #ifndef FRONTEND
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 6a191202ced..c79ee965372 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -148,6 +148,9 @@ typedef struct ExprState
 	 * ExecInitExprRec().
 	 */
 	ErrorSaveContext *escontext;
+
+	/* batched-program runtime (e.g., BatchQualRuntime) */
+	void	 *batch_private;
 } ExprState;
 
 
@@ -314,6 +317,10 @@ typedef struct ExprContext
 #define FIELDNO_EXPRCONTEXT_NEWTUPLE 15
 	TupleTableSlot *ecxt_newtuple;
 
+	/* For batched evaluation using batch-aware EEOPs */
+#define FIELDNO_EXPRCONTEXT_SCANBATCH 16
+	TupleBatch	   *scan_batch;
+
 	/* Link to containing EState (NULL if a standalone ExprContext) */
 	struct EState *ecxt_estate;
 
@@ -1186,7 +1193,9 @@ typedef struct PlanState
 	 * state trees parallel links in the associated plan tree (except for the
 	 * subPlan list, which does not exist in the plan tree).
 	 */
-	ExprState  *qual;			/* boolean qual condition */
+	ExprState  *qual;			/* boolean qual condition (per tuple) */
+	ExprState  *qual_batch;		/* batched qual program, NULL if qual not
+								 * batchable */
 	PlanState  *lefttree;		/* input plan tree(s) */
 	PlanState  *righttree;
 
-- 
2.47.3

