Hello hackers

Recently, we were given access to the test server is IBM, 9119-MHE with 8 CPUs 
* 8 cores * 8 threads. We decided to take advantage of this and to find 
bottlenecks for read scalability (pgbench -S).

All detail you can read here: 
http://www.postgrespro.ru/blog/pgsql/2015/08/30/p8scaling

Performance 9.4 stopped growing after 100 clients, and 9.5 / 9.6 stopped after 
150 (at 4 NUMA nodes). After research using pref we saw that inhibits 
ProcArrayLock in GetSnaphotData. But inserting the stub instead of 
GetSnapshotData not significantly increased scalability. Trying to find the 
bottleneck with gdb, we found another place. We have noticed s_lock in 
PinBuffer and UnpinBuffer. For the test we rewrited PinBuffer and UnpinBuffer 
by atomic operations and we liked the result. Degradation of performance 
almost completely disappeared, and went scaling up to 400 clients (4 NUMA 
nodes with 256 "CPUs").

To scale Postgres for large NUMA machine must be ported to the atomic 
operations bufmgr. During our tests, we no found errors in our patch, but most 
likely it is not true and this patch only for test.

Who has any thoughts?

-- 
YUriy Zhuravlev
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c
index 3ae2848..5fdaca7 100644
--- a/src/backend/storage/buffer/buf_init.c
+++ b/src/backend/storage/buffer/buf_init.c
@@ -95,9 +95,9 @@ InitBufferPool(void)
 			BufferDesc *buf = GetBufferDescriptor(i);
 
 			CLEAR_BUFFERTAG(buf->tag);
-			buf->flags = 0;
-			buf->usage_count = 0;
-			buf->refcount = 0;
+			buf->flags.value = 0;
+			buf->usage_count.value = 0;
+			buf->refcount.value = 0;
 			buf->wait_backend_pid = 0;
 
 			SpinLockInit(&buf->buf_hdr_lock);
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index cd3aaad..8cf97cb 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -714,8 +714,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 		if (isLocalBuf)
 		{
 			/* Only need to adjust flags */
-			Assert(bufHdr->flags & BM_VALID);
-			bufHdr->flags &= ~BM_VALID;
+			Assert(bufHdr->flags.value & BM_VALID);
+			bufHdr->flags.value &= ~BM_VALID;
 		}
 		else
 		{
@@ -727,8 +727,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			do
 			{
 				LockBufHdr(bufHdr);
-				Assert(bufHdr->flags & BM_VALID);
-				bufHdr->flags &= ~BM_VALID;
+				Assert(bufHdr->flags.value & BM_VALID);
+				bufHdr->flags.value &= ~BM_VALID;
 				UnlockBufHdr(bufHdr);
 			} while (!StartBufferIO(bufHdr, true));
 		}
@@ -746,7 +746,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	 * it's not been recycled) but come right back here to try smgrextend
 	 * again.
 	 */
-	Assert(!(bufHdr->flags & BM_VALID));		/* spinlock not needed */
+	Assert(!(bufHdr->flags.value & BM_VALID));		/* spinlock not needed */
 
 	bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);
 
@@ -824,7 +824,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	if (isLocalBuf)
 	{
 		/* Only need to adjust flags */
-		bufHdr->flags |= BM_VALID;
+		bufHdr->flags.value |= BM_VALID;
 	}
 	else
 	{
@@ -952,10 +952,10 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 		 */
 		buf = StrategyGetBuffer(strategy);
 
-		Assert(buf->refcount == 0);
+		Assert(buf->refcount.value == 0);
 
 		/* Must copy buffer flags while we still hold the spinlock */
-		oldFlags = buf->flags;
+		oldFlags = buf->flags.value;
 
 		/* Pin the buffer and then release the buffer spinlock */
 		PinBuffer_Locked(buf);
@@ -1149,8 +1149,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 		 * recycle this buffer; we must undo everything we've done and start
 		 * over with a new victim buffer.
 		 */
-		oldFlags = buf->flags;
-		if (buf->refcount == 1 && !(oldFlags & BM_DIRTY))
+		oldFlags = buf->flags.value;
+		if (buf->refcount.value == 1 && !(oldFlags & BM_DIRTY))
 			break;
 
 		UnlockBufHdr(buf);
@@ -1171,12 +1171,12 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	 * 1 so that the buffer can survive one clock-sweep pass.)
 	 */
 	buf->tag = newTag;
-	buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_CHECKPOINT_NEEDED | BM_IO_ERROR | BM_PERMANENT);
+	buf->flags.value &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_CHECKPOINT_NEEDED | BM_IO_ERROR | BM_PERMANENT);
 	if (relpersistence == RELPERSISTENCE_PERMANENT)
-		buf->flags |= BM_TAG_VALID | BM_PERMANENT;
+		buf->flags.value |= BM_TAG_VALID | BM_PERMANENT;
 	else
-		buf->flags |= BM_TAG_VALID;
-	buf->usage_count = 1;
+		buf->flags.value |= BM_TAG_VALID;
+	buf->usage_count.value = 1;
 
 	UnlockBufHdr(buf);
 
@@ -1268,7 +1268,7 @@ retry:
 	 * yet done StartBufferIO, WaitIO will fall through and we'll effectively
 	 * be busy-looping here.)
 	 */
-	if (buf->refcount != 0)
+	if (buf->refcount.value != 0)
 	{
 		UnlockBufHdr(buf);
 		LWLockRelease(oldPartitionLock);
@@ -1283,10 +1283,10 @@ retry:
 	 * Clear out the buffer's tag and flags.  We must do this to ensure that
 	 * linear scans of the buffer array don't think the buffer is valid.
 	 */
-	oldFlags = buf->flags;
+	oldFlags = buf->flags.value;
 	CLEAR_BUFFERTAG(buf->tag);
-	buf->flags = 0;
-	buf->usage_count = 0;
+	buf->flags.value = 0;
+	buf->usage_count.value = 0;
 
 	UnlockBufHdr(buf);
 
@@ -1338,12 +1338,12 @@ MarkBufferDirty(Buffer buffer)
 
 	LockBufHdr(bufHdr);
 
-	Assert(bufHdr->refcount > 0);
+	Assert(bufHdr->refcount.value > 0);
 
 	/*
 	 * If the buffer was not dirty already, do vacuum accounting.
 	 */
-	if (!(bufHdr->flags & BM_DIRTY))
+	if (!(bufHdr->flags.value & BM_DIRTY))
 	{
 		VacuumPageDirty++;
 		pgBufferUsage.shared_blks_dirtied++;
@@ -1351,7 +1351,7 @@ MarkBufferDirty(Buffer buffer)
 			VacuumCostBalance += VacuumCostPageDirty;
 	}
 
-	bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
+	bufHdr->flags.value |= (BM_DIRTY | BM_JUST_DIRTIED);
 
 	UnlockBufHdr(bufHdr);
 }
@@ -1437,20 +1437,23 @@ PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy)
 		ReservePrivateRefCountEntry();
 		ref = NewPrivateRefCountEntry(b);
 
-		LockBufHdr(buf);
-		buf->refcount++;
+		pg_atomic_add_fetch_u32(&buf->refcount, 1);
+
 		if (strategy == NULL)
 		{
-			if (buf->usage_count < BM_MAX_USAGE_COUNT)
-				buf->usage_count++;
+			uint32 expect = buf->usage_count.value;
+			while (expect < BM_MAX_USAGE_COUNT)
+			{
+				if (pg_atomic_compare_exchange_u32(&buf->usage_count, &expect, expect+1))
+					break;
+			}
 		}
 		else
 		{
-			if (buf->usage_count == 0)
-				buf->usage_count = 1;
+			uint32 expect = 0;
+			pg_atomic_compare_exchange_u32(&buf->usage_count, &expect, 1);
 		}
-		result = (buf->flags & BM_VALID) != 0;
-		UnlockBufHdr(buf);
+		result = (buf->flags.value & BM_VALID) != 0;
 	}
 	else
 	{
@@ -1497,7 +1500,7 @@ PinBuffer_Locked(volatile BufferDesc *buf)
 	 */
 	Assert(GetPrivateRefCountEntry(BufferDescriptorGetBuffer(buf), false) == NULL);
 
-	buf->refcount++;
+	buf->refcount.value++;
 	UnlockBufHdr(buf);
 
 	b = BufferDescriptorGetBuffer(buf);
@@ -1521,6 +1524,7 @@ UnpinBuffer(volatile BufferDesc *buf, bool fixOwner)
 {
 	PrivateRefCountEntry *ref;
 	Buffer		b = BufferDescriptorGetBuffer(buf);
+	uint32		refcount;
 
 	/* not moving as we're likely deleting it soon anyway */
 	ref = GetPrivateRefCountEntry(b, false);
@@ -1537,25 +1541,23 @@ UnpinBuffer(volatile BufferDesc *buf, bool fixOwner)
 		Assert(!LWLockHeldByMe(buf->content_lock));
 		Assert(!LWLockHeldByMe(buf->io_in_progress_lock));
 
-		LockBufHdr(buf);
-
 		/* Decrement the shared reference count */
-		Assert(buf->refcount > 0);
-		buf->refcount--;
+		Assert(buf->refcount.value > 0);
+		refcount = pg_atomic_sub_fetch_u32(&buf->refcount, 1);
 
 		/* Support LockBufferForCleanup() */
-		if ((buf->flags & BM_PIN_COUNT_WAITER) &&
-			buf->refcount == 1)
+		if (refcount == 1 &&
+			(buf->flags.value & BM_PIN_COUNT_WAITER))
 		{
 			/* we just released the last pin other than the waiter's */
 			int			wait_backend_pid = buf->wait_backend_pid;
+			uint32		flags;
 
-			buf->flags &= ~BM_PIN_COUNT_WAITER;
-			UnlockBufHdr(buf);
-			ProcSendSignal(wait_backend_pid);
+			flags = pg_atomic_fetch_and_u32(&buf->flags, ~BM_PIN_COUNT_WAITER);
+
+			if (flags & BM_PIN_COUNT_WAITER)
+				ProcSendSignal(wait_backend_pid);
 		}
-		else
-			UnlockBufHdr(buf);
 
 		ForgetPrivateRefCountEntry(ref);
 	}
@@ -1619,9 +1621,9 @@ BufferSync(int flags)
 		 */
 		LockBufHdr(bufHdr);
 
-		if ((bufHdr->flags & mask) == mask)
+		if ((bufHdr->flags.value & mask) == mask)
 		{
-			bufHdr->flags |= BM_CHECKPOINT_NEEDED;
+			bufHdr->flags.value |= BM_CHECKPOINT_NEEDED;
 			num_to_write++;
 		}
 
@@ -1660,7 +1662,7 @@ BufferSync(int flags)
 		 * write the buffer though we didn't need to.  It doesn't seem worth
 		 * guarding against this, though.
 		 */
-		if (bufHdr->flags & BM_CHECKPOINT_NEEDED)
+		if (bufHdr->flags.value & BM_CHECKPOINT_NEEDED)
 		{
 			if (SyncOneBuffer(buf_id, false) & BUF_WRITTEN)
 			{
@@ -2034,7 +2036,7 @@ SyncOneBuffer(int buf_id, bool skip_recently_used)
 	 */
 	LockBufHdr(bufHdr);
 
-	if (bufHdr->refcount == 0 && bufHdr->usage_count == 0)
+	if (bufHdr->refcount.value == 0 && bufHdr->usage_count.value == 0)
 		result |= BUF_REUSABLE;
 	else if (skip_recently_used)
 	{
@@ -2043,7 +2045,7 @@ SyncOneBuffer(int buf_id, bool skip_recently_used)
 		return result;
 	}
 
-	if (!(bufHdr->flags & BM_VALID) || !(bufHdr->flags & BM_DIRTY))
+	if (!(bufHdr->flags.value & BM_VALID) || !(bufHdr->flags.value & BM_DIRTY))
 	{
 		/* It's clean, so nothing to do */
 		UnlockBufHdr(bufHdr);
@@ -2216,8 +2218,8 @@ PrintBufferLeakWarning(Buffer buffer)
 		 "buffer refcount leak: [%03d] "
 		 "(rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)",
 		 buffer, path,
-		 buf->tag.blockNum, buf->flags,
-		 buf->refcount, loccount);
+		 buf->tag.blockNum, buf->flags.value,
+		 buf->refcount.value, loccount);
 	pfree(path);
 }
 
@@ -2363,7 +2365,7 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
 	recptr = BufferGetLSN(buf);
 
 	/* To check if block content changes while flushing. - vadim 01/17/97 */
-	buf->flags &= ~BM_JUST_DIRTIED;
+	buf->flags.value &= ~BM_JUST_DIRTIED;
 	UnlockBufHdr(buf);
 
 	/*
@@ -2383,7 +2385,7 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
 	 * disastrous system-wide consequences.  To make sure that can't happen,
 	 * skip the flush if the buffer isn't permanent.
 	 */
-	if (buf->flags & BM_PERMANENT)
+	if (buf->flags.value & BM_PERMANENT)
 		XLogFlush(recptr);
 
 	/*
@@ -2477,7 +2479,7 @@ BufferIsPermanent(Buffer buffer)
 	 * old value or the new value, but not random garbage.
 	 */
 	bufHdr = GetBufferDescriptor(buffer - 1);
-	return (bufHdr->flags & BM_PERMANENT) != 0;
+	return (bufHdr->flags.value & BM_PERMANENT) != 0;
 }
 
 /*
@@ -2747,8 +2749,8 @@ PrintBufferDescs(void)
 			 "blockNum=%u, flags=0x%x, refcount=%u %d)",
 			 i, buf->freeNext,
 		  relpathbackend(buf->tag.rnode, InvalidBackendId, buf->tag.forkNum),
-			 buf->tag.blockNum, buf->flags,
-			 buf->refcount, GetPrivateRefCount(b));
+			 buf->tag.blockNum, buf->flags.value,
+			 buf->refcount.value, GetPrivateRefCount(b));
 	}
 }
 #endif
@@ -2772,8 +2774,8 @@ PrintPinnedBufs(void)
 				 "blockNum=%u, flags=0x%x, refcount=%u %d)",
 				 i, buf->freeNext,
 				 relpathperm(buf->tag.rnode, buf->tag.forkNum),
-				 buf->tag.blockNum, buf->flags,
-				 buf->refcount, GetPrivateRefCount(b));
+				 buf->tag.blockNum, buf->flags.value,
+				 buf->refcount.value, GetPrivateRefCount(b));
 		}
 	}
 }
@@ -2813,7 +2815,7 @@ FlushRelationBuffers(Relation rel)
 		{
 			bufHdr = GetLocalBufferDescriptor(i);
 			if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
-				(bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
+				(bufHdr->flags.value & BM_VALID) && (bufHdr->flags.value & BM_DIRTY))
 			{
 				ErrorContextCallback errcallback;
 				Page		localpage;
@@ -2834,7 +2836,7 @@ FlushRelationBuffers(Relation rel)
 						  localpage,
 						  false);
 
-				bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
+				bufHdr->flags.value &= ~(BM_DIRTY | BM_JUST_DIRTIED);
 
 				/* Pop the error context stack */
 				error_context_stack = errcallback.previous;
@@ -2862,7 +2864,7 @@ FlushRelationBuffers(Relation rel)
 
 		LockBufHdr(bufHdr);
 		if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
-			(bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
+			(bufHdr->flags.value & BM_VALID) && (bufHdr->flags.value & BM_DIRTY))
 		{
 			PinBuffer_Locked(bufHdr);
 			LWLockAcquire(bufHdr->content_lock, LW_SHARED);
@@ -2914,7 +2916,7 @@ FlushDatabaseBuffers(Oid dbid)
 
 		LockBufHdr(bufHdr);
 		if (bufHdr->tag.rnode.dbNode == dbid &&
-			(bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
+			(bufHdr->flags.value & BM_VALID) && (bufHdr->flags.value & BM_DIRTY))
 		{
 			PinBuffer_Locked(bufHdr);
 			LWLockAcquire(bufHdr->content_lock, LW_SHARED);
@@ -3032,7 +3034,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
 	 * is only intended to be used in cases where failing to write out the
 	 * data would be harmless anyway, it doesn't really matter.
 	 */
-	if ((bufHdr->flags & (BM_DIRTY | BM_JUST_DIRTIED)) !=
+	if ((bufHdr->flags.value & (BM_DIRTY | BM_JUST_DIRTIED)) !=
 		(BM_DIRTY | BM_JUST_DIRTIED))
 	{
 		XLogRecPtr	lsn = InvalidXLogRecPtr;
@@ -3048,7 +3050,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
 		 * We don't check full_page_writes here because that logic is included
 		 * when we call XLogInsert() since the value changes dynamically.
 		 */
-		if (XLogHintBitIsNeeded() && (bufHdr->flags & BM_PERMANENT))
+		if (XLogHintBitIsNeeded() && (bufHdr->flags.value & BM_PERMANENT))
 		{
 			/*
 			 * If we're in recovery we cannot dirty a page because of a hint.
@@ -3088,8 +3090,8 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
 		}
 
 		LockBufHdr(bufHdr);
-		Assert(bufHdr->refcount > 0);
-		if (!(bufHdr->flags & BM_DIRTY))
+		Assert(bufHdr->refcount.value > 0);
+		if (!(bufHdr->flags.value & BM_DIRTY))
 		{
 			dirtied = true;		/* Means "will be dirtied by this action" */
 
@@ -3109,7 +3111,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
 			if (!XLogRecPtrIsInvalid(lsn))
 				PageSetLSN(page, lsn);
 		}
-		bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
+		bufHdr->flags.value |= (BM_DIRTY | BM_JUST_DIRTIED);
 		UnlockBufHdr(bufHdr);
 
 		if (delayChkpt)
@@ -3147,9 +3149,9 @@ UnlockBuffers(void)
 		 * Don't complain if flag bit not set; it could have been reset but we
 		 * got a cancel/die interrupt before getting the signal.
 		 */
-		if ((buf->flags & BM_PIN_COUNT_WAITER) != 0 &&
+		if ((buf->flags.value & BM_PIN_COUNT_WAITER) != 0 &&
 			buf->wait_backend_pid == MyProcPid)
-			buf->flags &= ~BM_PIN_COUNT_WAITER;
+			buf->flags.value &= ~BM_PIN_COUNT_WAITER;
 
 		UnlockBufHdr(buf);
 
@@ -3246,22 +3248,22 @@ LockBufferForCleanup(Buffer buffer)
 		/* Try to acquire lock */
 		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 		LockBufHdr(bufHdr);
-		Assert(bufHdr->refcount > 0);
-		if (bufHdr->refcount == 1)
+		Assert(bufHdr->refcount.value > 0);
+		if (bufHdr->refcount.value == 1)
 		{
 			/* Successfully acquired exclusive lock with pincount 1 */
 			UnlockBufHdr(bufHdr);
 			return;
 		}
 		/* Failed, so mark myself as waiting for pincount 1 */
-		if (bufHdr->flags & BM_PIN_COUNT_WAITER)
+		if (bufHdr->flags.value & BM_PIN_COUNT_WAITER)
 		{
 			UnlockBufHdr(bufHdr);
 			LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 			elog(ERROR, "multiple backends attempting to wait for pincount 1");
 		}
 		bufHdr->wait_backend_pid = MyProcPid;
-		bufHdr->flags |= BM_PIN_COUNT_WAITER;
+		bufHdr->flags.value |= BM_PIN_COUNT_WAITER;
 		PinCountWaitBuf = bufHdr;
 		UnlockBufHdr(bufHdr);
 		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
@@ -3288,9 +3290,9 @@ LockBufferForCleanup(Buffer buffer)
 		 * better be safe.
 		 */
 		LockBufHdr(bufHdr);
-		if ((bufHdr->flags & BM_PIN_COUNT_WAITER) != 0 &&
+		if ((bufHdr->flags.value & BM_PIN_COUNT_WAITER) != 0 &&
 			bufHdr->wait_backend_pid == MyProcPid)
-			bufHdr->flags &= ~BM_PIN_COUNT_WAITER;
+			bufHdr->flags.value &= ~BM_PIN_COUNT_WAITER;
 		UnlockBufHdr(bufHdr);
 
 		PinCountWaitBuf = NULL;
@@ -3356,8 +3358,8 @@ ConditionalLockBufferForCleanup(Buffer buffer)
 
 	bufHdr = GetBufferDescriptor(buffer - 1);
 	LockBufHdr(bufHdr);
-	Assert(bufHdr->refcount > 0);
-	if (bufHdr->refcount == 1)
+	Assert(bufHdr->refcount.value > 0);
+	if (bufHdr->refcount.value == 1)
 	{
 		/* Successfully acquired exclusive lock with pincount 1 */
 		UnlockBufHdr(bufHdr);
@@ -3403,7 +3405,7 @@ WaitIO(volatile BufferDesc *buf)
 		 * play it safe.
 		 */
 		LockBufHdr(buf);
-		sv_flags = buf->flags;
+		sv_flags = buf->flags.value;
 		UnlockBufHdr(buf);
 		if (!(sv_flags & BM_IO_IN_PROGRESS))
 			break;
@@ -3445,7 +3447,7 @@ StartBufferIO(volatile BufferDesc *buf, bool forInput)
 
 		LockBufHdr(buf);
 
-		if (!(buf->flags & BM_IO_IN_PROGRESS))
+		if (!(buf->flags.value & BM_IO_IN_PROGRESS))
 			break;
 
 		/*
@@ -3461,7 +3463,7 @@ StartBufferIO(volatile BufferDesc *buf, bool forInput)
 
 	/* Once we get here, there is definitely no I/O active on this buffer */
 
-	if (forInput ? (buf->flags & BM_VALID) : !(buf->flags & BM_DIRTY))
+	if (forInput ? (buf->flags.value & BM_VALID) : !(buf->flags.value & BM_DIRTY))
 	{
 		/* someone else already did the I/O */
 		UnlockBufHdr(buf);
@@ -3469,7 +3471,7 @@ StartBufferIO(volatile BufferDesc *buf, bool forInput)
 		return false;
 	}
 
-	buf->flags |= BM_IO_IN_PROGRESS;
+	buf->flags.value |= BM_IO_IN_PROGRESS;
 
 	UnlockBufHdr(buf);
 
@@ -3504,11 +3506,11 @@ TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,
 
 	LockBufHdr(buf);
 
-	Assert(buf->flags & BM_IO_IN_PROGRESS);
-	buf->flags &= ~(BM_IO_IN_PROGRESS | BM_IO_ERROR);
-	if (clear_dirty && !(buf->flags & BM_JUST_DIRTIED))
-		buf->flags &= ~(BM_DIRTY | BM_CHECKPOINT_NEEDED);
-	buf->flags |= set_flag_bits;
+	Assert(buf->flags.value & BM_IO_IN_PROGRESS);
+	buf->flags.value &= ~(BM_IO_IN_PROGRESS | BM_IO_ERROR);
+	if (clear_dirty && !(buf->flags.value & BM_JUST_DIRTIED))
+		buf->flags.value &= ~(BM_DIRTY | BM_CHECKPOINT_NEEDED);
+	buf->flags.value |= set_flag_bits;
 
 	UnlockBufHdr(buf);
 
@@ -3542,19 +3544,19 @@ AbortBufferIO(void)
 		LWLockAcquire(buf->io_in_progress_lock, LW_EXCLUSIVE);
 
 		LockBufHdr(buf);
-		Assert(buf->flags & BM_IO_IN_PROGRESS);
+		Assert(buf->flags.value & BM_IO_IN_PROGRESS);
 		if (IsForInput)
 		{
-			Assert(!(buf->flags & BM_DIRTY));
+			Assert(!(buf->flags.value & BM_DIRTY));
 			/* We'd better not think buffer is valid yet */
-			Assert(!(buf->flags & BM_VALID));
+			Assert(!(buf->flags.value & BM_VALID));
 			UnlockBufHdr(buf);
 		}
 		else
 		{
 			BufFlags	sv_flags;
 
-			sv_flags = buf->flags;
+			sv_flags = buf->flags.value;
 			Assert(sv_flags & BM_DIRTY);
 			UnlockBufHdr(buf);
 			/* Issue notice if this is not the first failure... */
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c
index bc2c773..6277a2a 100644
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -280,7 +280,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy)
 			 * of 8.3, but we'd better check anyway.)
 			 */
 			LockBufHdr(buf);
-			if (buf->refcount == 0 && buf->usage_count == 0)
+			if (buf->refcount.value == 0 && buf->usage_count.value == 0)
 			{
 				if (strategy != NULL)
 					AddBufferToRing(strategy, buf);
@@ -303,11 +303,11 @@ StrategyGetBuffer(BufferAccessStrategy strategy)
 		 * it; decrement the usage_count (unless pinned) and keep scanning.
 		 */
 		LockBufHdr(buf);
-		if (buf->refcount == 0)
+		if (buf->refcount.value == 0)
 		{
-			if (buf->usage_count > 0)
+			if (buf->usage_count.value > 0)
 			{
-				buf->usage_count--;
+				buf->usage_count.value--;
 				trycounter = NBuffers;
 			}
 			else
@@ -617,7 +617,7 @@ GetBufferFromRing(BufferAccessStrategy strategy)
 	 */
 	buf = GetBufferDescriptor(bufnum - 1);
 	LockBufHdr(buf);
-	if (buf->refcount == 0 && buf->usage_count <= 1)
+	if (buf->refcount.value == 0 && buf->usage_count.value <= 1)
 	{
 		strategy->current_was_in_ring = true;
 		return buf;
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index 3144afe..650ea00 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -131,13 +131,13 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 		/* this part is equivalent to PinBuffer for a shared buffer */
 		if (LocalRefCount[b] == 0)
 		{
-			if (bufHdr->usage_count < BM_MAX_USAGE_COUNT)
-				bufHdr->usage_count++;
+			if (bufHdr->usage_count.value < BM_MAX_USAGE_COUNT)
+				bufHdr->usage_count.value++;
 		}
 		LocalRefCount[b]++;
 		ResourceOwnerRememberBuffer(CurrentResourceOwner,
 									BufferDescriptorGetBuffer(bufHdr));
-		if (bufHdr->flags & BM_VALID)
+		if (bufHdr->flags.value & BM_VALID)
 			*foundPtr = TRUE;
 		else
 		{
@@ -169,9 +169,9 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 
 		if (LocalRefCount[b] == 0)
 		{
-			if (bufHdr->usage_count > 0)
+			if (bufHdr->usage_count.value > 0)
 			{
-				bufHdr->usage_count--;
+				bufHdr->usage_count.value--;
 				trycounter = NLocBuffer;
 			}
 			else
@@ -193,7 +193,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 	 * this buffer is not referenced but it might still be dirty. if that's
 	 * the case, write it out before reusing it!
 	 */
-	if (bufHdr->flags & BM_DIRTY)
+	if (bufHdr->flags.value & BM_DIRTY)
 	{
 		SMgrRelation oreln;
 		Page		localpage = (char *) LocalBufHdrGetBlock(bufHdr);
@@ -211,7 +211,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 				  false);
 
 		/* Mark not-dirty now in case we error out below */
-		bufHdr->flags &= ~BM_DIRTY;
+		bufHdr->flags.value &= ~BM_DIRTY;
 
 		pgBufferUsage.local_blks_written++;
 	}
@@ -228,7 +228,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 	/*
 	 * Update the hash table: remove old entry, if any, and make new one.
 	 */
-	if (bufHdr->flags & BM_TAG_VALID)
+	if (bufHdr->flags.value & BM_TAG_VALID)
 	{
 		hresult = (LocalBufferLookupEnt *)
 			hash_search(LocalBufHash, (void *) &bufHdr->tag,
@@ -237,7 +237,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 			elog(ERROR, "local buffer hash table corrupted");
 		/* mark buffer invalid just in case hash insert fails */
 		CLEAR_BUFFERTAG(bufHdr->tag);
-		bufHdr->flags &= ~(BM_VALID | BM_TAG_VALID);
+		bufHdr->flags.value &= ~(BM_VALID | BM_TAG_VALID);
 	}
 
 	hresult = (LocalBufferLookupEnt *)
@@ -250,9 +250,9 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 	 * it's all ours now.
 	 */
 	bufHdr->tag = newTag;
-	bufHdr->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
-	bufHdr->flags |= BM_TAG_VALID;
-	bufHdr->usage_count = 1;
+	bufHdr->flags.value &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
+	bufHdr->flags.value |= BM_TAG_VALID;
+	bufHdr->usage_count.value = 1;
 
 	*foundPtr = FALSE;
 	return bufHdr;
@@ -280,10 +280,10 @@ MarkLocalBufferDirty(Buffer buffer)
 
 	bufHdr = GetLocalBufferDescriptor(bufid);
 
-	if (!(bufHdr->flags & BM_DIRTY))
+	if (!(bufHdr->flags.value & BM_DIRTY))
 		pgBufferUsage.local_blks_dirtied++;
 
-	bufHdr->flags |= BM_DIRTY;
+	bufHdr->flags.value |= BM_DIRTY;
 }
 
 /*
@@ -308,7 +308,7 @@ DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum,
 		BufferDesc *bufHdr = GetLocalBufferDescriptor(i);
 		LocalBufferLookupEnt *hresult;
 
-		if ((bufHdr->flags & BM_TAG_VALID) &&
+		if ((bufHdr->flags.value & BM_TAG_VALID) &&
 			RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
 			bufHdr->tag.forkNum == forkNum &&
 			bufHdr->tag.blockNum >= firstDelBlock)
@@ -327,8 +327,8 @@ DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum,
 				elog(ERROR, "local buffer hash table corrupted");
 			/* Mark buffer invalid */
 			CLEAR_BUFFERTAG(bufHdr->tag);
-			bufHdr->flags = 0;
-			bufHdr->usage_count = 0;
+			bufHdr->flags.value = 0;
+			bufHdr->usage_count.value = 0;
 		}
 	}
 }
@@ -350,7 +350,7 @@ DropRelFileNodeAllLocalBuffers(RelFileNode rnode)
 		BufferDesc *bufHdr = GetLocalBufferDescriptor(i);
 		LocalBufferLookupEnt *hresult;
 
-		if ((bufHdr->flags & BM_TAG_VALID) &&
+		if ((bufHdr->flags.value & BM_TAG_VALID) &&
 			RelFileNodeEquals(bufHdr->tag.rnode, rnode))
 		{
 			if (LocalRefCount[i] != 0)
@@ -367,8 +367,8 @@ DropRelFileNodeAllLocalBuffers(RelFileNode rnode)
 				elog(ERROR, "local buffer hash table corrupted");
 			/* Mark buffer invalid */
 			CLEAR_BUFFERTAG(bufHdr->tag);
-			bufHdr->flags = 0;
-			bufHdr->usage_count = 0;
+			bufHdr->flags.value = 0;
+			bufHdr->usage_count.value = 0;
 		}
 	}
 }
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 521ee1c..aa65bfb 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -137,9 +137,9 @@ typedef struct buftag
 typedef struct BufferDesc
 {
 	BufferTag	tag;			/* ID of page contained in buffer */
-	BufFlags	flags;			/* see bit definitions above */
-	uint16		usage_count;	/* usage counter for clock sweep code */
-	unsigned	refcount;		/* # of backends holding pins on buffer */
+	pg_atomic_uint32		usage_count;	/* usage counter for clock sweep code */
+	pg_atomic_uint32		flags;			/* see bit definitions above */
+	pg_atomic_uint32		refcount;		/* # of backends holding pins on buffer */
 	int			wait_backend_pid;		/* backend PID of pin-count waiter */
 
 	slock_t		buf_hdr_lock;	/* protects the above fields */
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to