Hi,

On 2024-01-22 13:01:31 +0700, John Naylor wrote:
> On Mon, Jul 17, 2023 at 9:58 PM Andres Freund <and...@anarazel.de> wrote:
> > And 397->320ms
> > for something as core as this, is imo worth considering on its own.
> 
> Hi Andres, this interesting work seems to have fallen off the radar --
> are you still planning to move forward with this for v17?

I had completely forgotten about this patch, but some discussion around
streaming read reminded me of it.  Here's a rebased version, with conflicts
resolved and very light comment polish and a commit message. Given that
there's been no changes otherwise in the last months, I'm inclined to push in
a few hours.

Greetings,

Andres Freund
>From f2158455ac1a10eb393e25f7ddf87433a98e2ab0 Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Sat, 6 Apr 2024 20:51:07 -0700
Subject: [PATCH v2] Reduce branches in heapgetpage()'s per-tuple loop

Until now, heapgetpage()'s loop over all tuples performed some conditional
checks for each tuple, even though condition did not change across the loop.

This commit fixes that by moving the loop into an inline function. By calling
it with different constant arguments, the compiler can generate an optimized
loop for the different conditions, at the price of two per-page checks.

For cases of all-visible tables and an isolation level other than
serializable, speedups of up to 25% have been measured.

Reviewed-by: John Naylor <johncnaylo...@gmail.com>
Discussion: https://postgr.es/m/20230716015656.xjvemfbp5fysj...@awork3.anarazel.de
---
 src/backend/access/heap/heapam.c | 102 ++++++++++++++++++++++---------
 1 file changed, 74 insertions(+), 28 deletions(-)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index dada2ecd1e3..cbbc87dec9a 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -364,6 +364,56 @@ heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlk
 	scan->rs_numblocks = numBlks;
 }
 
+/*
+ * Per-tuple loop for heapgetpage() in pagemode. Pulled out so it can be
+ * called multiple times, with constant arguments for all_visible,
+ * check_serializable.
+ */
+pg_attribute_always_inline
+static int
+heapgetpage_collect(HeapScanDesc scan, Snapshot snapshot,
+					Page page, Buffer buffer,
+					BlockNumber block, int lines,
+					bool all_visible, bool check_serializable)
+{
+	int			ntup = 0;
+	OffsetNumber lineoff;
+
+	for (lineoff = FirstOffsetNumber; lineoff <= lines; lineoff++)
+	{
+		ItemId		lpp = PageGetItemId(page, lineoff);
+		bool		valid;
+		HeapTupleData loctup;
+
+		if (!ItemIdIsNormal(lpp))
+			continue;
+
+		loctup.t_data = (HeapTupleHeader) PageGetItem(page, lpp);
+		loctup.t_len = ItemIdGetLength(lpp);
+		loctup.t_tableOid = RelationGetRelid(scan->rs_base.rs_rd);
+		ItemPointerSet(&(loctup.t_self), block, lineoff);
+
+		if (all_visible)
+			valid = true;
+		else
+			valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
+
+		if (check_serializable)
+			HeapCheckForSerializableConflictOut(valid, scan->rs_base.rs_rd,
+												&loctup, buffer, snapshot);
+
+		if (valid)
+		{
+			scan->rs_vistuples[ntup] = lineoff;
+			ntup++;
+		}
+	}
+
+	Assert(ntup <= MaxHeapTuplesPerPage);
+
+	return ntup;
+}
+
 /*
  * heap_prepare_pagescan - Prepare current scan page to be scanned in pagemode
  *
@@ -379,9 +429,8 @@ heap_prepare_pagescan(TableScanDesc sscan)
 	Snapshot	snapshot;
 	Page		page;
 	int			lines;
-	int			ntup;
-	OffsetNumber lineoff;
 	bool		all_visible;
+	bool		check_serializable;
 
 	Assert(BufferGetBlockNumber(buffer) == block);
 
@@ -403,7 +452,6 @@ heap_prepare_pagescan(TableScanDesc sscan)
 
 	page = BufferGetPage(buffer);
 	lines = PageGetMaxOffsetNumber(page);
-	ntup = 0;
 
 	/*
 	 * If the all-visible flag indicates that all tuples on the page are
@@ -426,37 +474,35 @@ heap_prepare_pagescan(TableScanDesc sscan)
 	 * tuple for visibility the hard way.
 	 */
 	all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
+	check_serializable =
+		CheckForSerializableConflictOutNeeded(scan->rs_base.rs_rd, snapshot);
 
-	for (lineoff = FirstOffsetNumber; lineoff <= lines; lineoff++)
+	/*
+	 * We call heapgetpage_collect() with constant arguments, for faster code,
+	 * without having to duplicate too much logic. The separate calls with
+	 * constant arguments are needed on several compilers to actually perform
+	 * constant folding.
+	 */
+	if (likely(all_visible))
 	{
-		ItemId		lpp = PageGetItemId(page, lineoff);
-		HeapTupleData loctup;
-		bool		valid;
-
-		if (!ItemIdIsNormal(lpp))
-			continue;
-
-		loctup.t_tableOid = RelationGetRelid(scan->rs_base.rs_rd);
-		loctup.t_data = (HeapTupleHeader) PageGetItem(page, lpp);
-		loctup.t_len = ItemIdGetLength(lpp);
-		ItemPointerSet(&(loctup.t_self), block, lineoff);
-
-		if (all_visible)
-			valid = true;
+		if (likely(!check_serializable))
+			scan->rs_ntuples = heapgetpage_collect(scan, snapshot, page, buffer,
+												   block, lines, true, false);
 		else
-			valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
-
-		HeapCheckForSerializableConflictOut(valid, scan->rs_base.rs_rd,
-											&loctup, buffer, snapshot);
-
-		if (valid)
-			scan->rs_vistuples[ntup++] = lineoff;
+			scan->rs_ntuples = heapgetpage_collect(scan, snapshot, page, buffer,
+												   block, lines, true, true);
+	}
+	else
+	{
+		if (likely(!check_serializable))
+			scan->rs_ntuples = heapgetpage_collect(scan, snapshot, page, buffer,
+												   block, lines, false, false);
+		else
+			scan->rs_ntuples = heapgetpage_collect(scan, snapshot, page, buffer,
+												   block, lines, false, true);
 	}
 
 	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-
-	Assert(ntup <= MaxHeapTuplesPerPage);
-	scan->rs_ntuples = ntup;
 }
 
 /*
-- 
2.44.0.279.g3bd955d269

Reply via email to