On Wed, Jan 14, 2026 at 1:31 AM Andreas Karlsson <[email protected]> wrote:
> Yeah, that was a quite big thinko. I have a attached a patch with the
> thinko fixed but I am still not happy with it. I think I will try to use
> atomics.h and see if that makes the code nicer to read.
>
> Also will after that see what I can do about pageinspect.

I believe that pageinspect's heap_page_items function needs to use
get_page_from_raw -- see commit 14e9b18fed.

Attached patch 0002-* shows what's required. I'm including your
original 0001-* patch from your v2 here, to keep CFTester happy.

We (Tomas Vondra and I) are treating this as a dependency for our
index prefetching patch. It'd be good to get this done soon.
--
Peter Geoghegan
From a4536e8478eb4a5afc3f56f1eb4b3152e7a54696 Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <[email protected]>
Date: Thu, 5 Feb 2026 01:19:01 -0500
Subject: [PATCH v3 2/2] Make pageinspect's heap_page_items use
 get_page_from_raw.

---
 contrib/pageinspect/heapfuncs.c | 13 +++----------
 contrib/pageinspect/rawpage.c   |  8 +++-----
 2 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c
index 8277fa256..761d24336 100644
--- a/contrib/pageinspect/heapfuncs.c
+++ b/contrib/pageinspect/heapfuncs.c
@@ -32,6 +32,7 @@
 #include "funcapi.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
+#include "pageinspect.h"
 #include "port/pg_bitutils.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
@@ -132,25 +133,17 @@ heap_page_items(PG_FUNCTION_ARGS)
 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
 	heap_page_items_state *inter_call_data = NULL;
 	FuncCallContext *fctx;
-	int			raw_page_size;
 
 	if (!superuser())
 		ereport(ERROR,
 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
 				 errmsg("must be superuser to use raw page functions")));
 
-	raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
-
 	if (SRF_IS_FIRSTCALL())
 	{
 		TupleDesc	tupdesc;
 		MemoryContext mctx;
 
-		if (raw_page_size < SizeOfPageHeaderData)
-			ereport(ERROR,
-					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-					 errmsg("input page too small (%d bytes)", raw_page_size)));
-
 		fctx = SRF_FIRSTCALL_INIT();
 		mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
 
@@ -163,7 +156,7 @@ heap_page_items(PG_FUNCTION_ARGS)
 		inter_call_data->tupd = tupdesc;
 
 		inter_call_data->offset = FirstOffsetNumber;
-		inter_call_data->page = VARDATA(raw_page);
+		inter_call_data->page = get_page_from_raw(raw_page);
 
 		fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page);
 		fctx->user_fctx = inter_call_data;
@@ -209,7 +202,7 @@ heap_page_items(PG_FUNCTION_ARGS)
 		if (ItemIdHasStorage(id) &&
 			lp_len >= MinHeapTupleSize &&
 			lp_offset == MAXALIGN(lp_offset) &&
-			lp_offset + lp_len <= raw_page_size)
+			lp_offset + lp_len <= BLCKSZ)
 		{
 			HeapTupleHeader tuphdr;
 
diff --git a/contrib/pageinspect/rawpage.c b/contrib/pageinspect/rawpage.c
index af54afe5b..86fe245ca 100644
--- a/contrib/pageinspect/rawpage.c
+++ b/contrib/pageinspect/rawpage.c
@@ -208,11 +208,9 @@ get_raw_page_internal(text *relname, ForkNumber forknum, BlockNumber blkno)
  * Get a palloc'd, maxalign'ed page image from the result of get_raw_page()
  *
  * On machines with MAXALIGN = 8, the payload of a bytea is not maxaligned,
- * since it will start 4 bytes into a palloc'd value.  On alignment-picky
- * machines, this will cause failures in accesses to 8-byte-wide values
- * within the page.  We don't need to worry if accessing only 4-byte or
- * smaller fields, but when examining a struct that contains 8-byte fields,
- * use this function for safety.
+ * since it will start 4 bytes into a palloc'd value.  PageHeaderData requires
+ * 8 byte alignment, so always use this function when accessing page header
+ * fields from a raw page bytea.
  */
 Page
 get_page_from_raw(bytea *raw_page)
-- 
2.51.0

From 4b6df3e3944aea238b4bc329a9d595cda6da608d Mon Sep 17 00:00:00 2001
From: Andreas Karlsson <[email protected]>
Date: Fri, 21 Nov 2025 10:15:06 +0100
Subject: [PATCH v3 1/2] Do not lock in BufferGetLSNAtomic() on archs with 8
 byte atomic reads

On platforms where we can read or write the whole LSN to a buffer page
we do not need to lock the buffer header to be sure we do not get a torn
LSN. To ahcieve this we need to make sure we always read and write the
while LSN and do not write it field by field.

While working on this I converted PageXLogRecPtr from a struct with two
uint32 fields into an uint64 to make the intent more clear.

Idea by Andres Freund.
---
 src/include/access/gist.h           |  4 +--
 src/include/storage/bufpage.h       | 48 +++++++++++++++++++++--------
 src/backend/access/common/bufmask.c |  2 +-
 src/backend/storage/buffer/bufmgr.c | 12 ++++++--
 4 files changed, 47 insertions(+), 19 deletions(-)

diff --git a/src/include/access/gist.h b/src/include/access/gist.h
index 8fa30126f..9b385b13a 100644
--- a/src/include/access/gist.h
+++ b/src/include/access/gist.h
@@ -186,8 +186,8 @@ typedef struct GISTENTRY
 #define GistMarkFollowRight(page) ( GistPageGetOpaque(page)->flags |= F_FOLLOW_RIGHT)
 #define GistClearFollowRight(page)	( GistPageGetOpaque(page)->flags &= ~F_FOLLOW_RIGHT)
 
-#define GistPageGetNSN(page) ( PageXLogRecPtrGet(GistPageGetOpaque(page)->nsn))
-#define GistPageSetNSN(page, val) ( PageXLogRecPtrSet(GistPageGetOpaque(page)->nsn, val))
+#define GistPageGetNSN(page) ( PageXLogRecPtrGet(&GistPageGetOpaque(page)->nsn))
+#define GistPageSetNSN(page, val) ( PageXLogRecPtrSet(&GistPageGetOpaque(page)->nsn, val))
 
 
 /*
diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h
index ae3725b3b..2dbd9f19e 100644
--- a/src/include/storage/bufpage.h
+++ b/src/include/storage/bufpage.h
@@ -91,23 +91,45 @@ typedef uint16 LocationIndex;
 
 
 /*
- * For historical reasons, the 64-bit LSN value is stored as two 32-bit
- * values.
+ * For historical reasons, the storage of 64-bit LSN values depends on the
+ * endianess because the field used to be stored as two 32-bit values. When
+ * reading and writing the LSN we need to convert between the two formats.
+ *
+ * We are careful to try to treat the LSN as a single uint64 so callers like
+ * BufferGetLSNAtomic() can be sure there are no torn reads or writes.
  */
-typedef struct
-{
-	uint32		xlogid;			/* high bits */
-	uint32		xrecoff;		/* low bits */
-} PageXLogRecPtr;
+typedef uint64 PageXLogRecPtr;
+
+#ifdef WORDS_BIGENDIAN
 
 static inline XLogRecPtr
-PageXLogRecPtrGet(PageXLogRecPtr val)
+PageXLogRecPtrGet(const PageXLogRecPtr *val)
 {
-	return (uint64) val.xlogid << 32 | val.xrecoff;
+	return *val;
 }
 
-#define PageXLogRecPtrSet(ptr, lsn) \
-	((ptr).xlogid = (uint32) ((lsn) >> 32), (ptr).xrecoff = (uint32) (lsn))
+static inline void
+PageXLogRecPtrSet(PageXLogRecPtr *ptr, XLogRecPtr lsn)
+{
+	*ptr = lsn;
+}
+
+#else
+
+static inline XLogRecPtr
+PageXLogRecPtrGet(const volatile PageXLogRecPtr *val)
+{
+	PageXLogRecPtr tmp = *val;
+	return (tmp << 32) | (tmp >> 32);
+}
+
+static inline void
+PageXLogRecPtrSet(volatile PageXLogRecPtr *ptr, XLogRecPtr lsn)
+{
+	*ptr = (lsn << 32) | (lsn >> 32);
+}
+
+#endif
 
 /*
  * disk page organization
@@ -385,12 +407,12 @@ PageGetMaxOffsetNumber(const PageData *page)
 static inline XLogRecPtr
 PageGetLSN(const PageData *page)
 {
-	return PageXLogRecPtrGet(((const PageHeaderData *) page)->pd_lsn);
+	return PageXLogRecPtrGet(&((const PageHeaderData *) page)->pd_lsn);
 }
 static inline void
 PageSetLSN(Page page, XLogRecPtr lsn)
 {
-	PageXLogRecPtrSet(((PageHeader) page)->pd_lsn, lsn);
+	PageXLogRecPtrSet(&((PageHeader) page)->pd_lsn, lsn);
 }
 
 static inline bool
diff --git a/src/backend/access/common/bufmask.c b/src/backend/access/common/bufmask.c
index 1a9e7bea5..8a67bfa1a 100644
--- a/src/backend/access/common/bufmask.c
+++ b/src/backend/access/common/bufmask.c
@@ -32,7 +32,7 @@ mask_page_lsn_and_checksum(Page page)
 {
 	PageHeader	phdr = (PageHeader) page;
 
-	PageXLogRecPtrSet(phdr->pd_lsn, (uint64) MASK_MARKER);
+	PageXLogRecPtrSet(&phdr->pd_lsn, (uint64) MASK_MARKER);
 	phdr->pd_checksum = MASK_MARKER;
 }
 
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 7241477ca..290239484 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -4626,13 +4626,18 @@ BufferIsPermanent(Buffer buffer)
 
 /*
  * BufferGetLSNAtomic
- *		Retrieves the LSN of the buffer atomically using a buffer header lock.
- *		This is necessary for some callers who may not have an exclusive lock
- *		on the buffer.
+ *		Retrieves the LSN of the buffer atomically.
+ *
+ *		On platforms without 8 byte atomic reads/write we need to take a
+ *		header lock. This is necessary for some callers who may not have an
+ *		exclusive lock on the buffer.
  */
 XLogRecPtr
 BufferGetLSNAtomic(Buffer buffer)
 {
+#ifdef PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
+	return PageGetLSN(BufferGetPage(buffer));
+#else
 	char	   *page = BufferGetPage(buffer);
 	BufferDesc *bufHdr;
 	XLogRecPtr	lsn;
@@ -4653,6 +4658,7 @@ BufferGetLSNAtomic(Buffer buffer)
 	UnlockBufHdr(bufHdr);
 
 	return lsn;
+#endif
 }
 
 /* ---------------------------------------------------------------------
-- 
2.51.0

Reply via email to