Hi, 

> Hi,
>
> On 2024-01-22 15:18:35 +0800, Andy Fan wrote:
>> I used sigismember(&BlockSig, SIGQUIT) to detect if a process is doing a
>> quickdie, however this is bad not only because it doesn't work on
>> Windows, but also it has too poor performance even it impacts on
>> USE_ASSERT_CHECKING build only. In v8, I introduced a new global
>> variable quickDieInProgress to handle this.
>
> For reasons you already noted, using sigismember() isn't viable. But I am not
> convinced by quickDieInProgress either. I think we could just reset the state
> for the spinlock check in the code handling PANIC, perhaps via a helper
> function in spin.c.

Handled with the action for your suggestion #3. 

>> +void
>> +VerifyNoSpinLocksHeld(void)
>> +{
>> +#ifdef USE_ASSERT_CHECKING
>> +    if (last_spin_lock_file != NULL)
>> +            elog(PANIC, "A spin lock has been held at %s:%d",
>> +                     last_spin_lock_file, last_spin_lock_lineno);
>> +#endif
>> +}
>
> I think the #ifdef for this needs to be in the header, not here. Otherwise we
> add a pointless external function call to a bunch of performance critical
> code.
>
Done.

>> diff --git a/src/backend/storage/buffer/bufmgr.c 
>> b/src/backend/storage/buffer/bufmgr.c
>> index 7d601bef6d..c600a113cf 100644
>> --- a/src/backend/storage/buffer/bufmgr.c
>> +++ b/src/backend/storage/buffer/bufmgr.c
>> @@ -5409,6 +5409,7 @@ LockBufHdr(BufferDesc *desc)
>>  
>>      init_local_spin_delay(&delayStatus);
>>  
>> +    START_SPIN_LOCK();
>>      while (true)
>>      {
>>              /* set BM_LOCKED flag */
>
> Seems pretty odd that we now need init_local_spin_delay() and
> START_SPIN_LOCK(). Note that init_local_spin_delay() also wraps handling of
> __FILE__, __LINE__ etc, so it seems we're duplicating state here.

Thanks for catching this! Based on the feedbacks so far, it is not OK to
acquire another spin lock when holding one already. So I refactor the
code like this:

 /*
- * Support for spin delay which is useful in various places where
- * spinlock-like procedures take place.
+ * Support for spin delay and spin misuse detection purpose.
+ *
+ * spin delay which is useful in various places where spinlock-like
+ * procedures take place.
+ *
+ * spin misuse is based on global spinStatus to know if a spin lock
+ * is held when a heavy operation is taking.
  */
 typedef struct
 {
@@ -846,22 +854,40 @@ typedef struct
        const char *file;
        int                     line;
        const char *func;
-} SpinDelayStatus;
+       bool            in_panic; /* works for spin lock misuse purpose. */
+} SpinLockStatus;

+extern PGDLLIMPORT SpinLockStatus spinStatus;

Now all the init_local_spin_delay, perform_spin_delay, finish_spin_delay
access the same global variable spinStatus. and just 2 extra functions
added (previously have 3). they are:

extern void VerifyNoSpinLocksHeld(bool check_in_panic);
extern void ResetSpinLockStatus(void);

The panic check stuff is still added into spinLockStatus. 

v9 attached.

-- 
Best Regards
Andy Fan

>From 4c8fd0ab71299e57fbeb18dec70051bd1d035c7c Mon Sep 17 00:00:00 2001
From: "yizhi.fzh" <yizhi....@alibaba-inc.com>
Date: Thu, 25 Jan 2024 15:19:49 +0800
Subject: [PATCH v9 1/1] Detect misuse of spin lock automatically

Spin lock are intended for very short-term locks, but it is possible
to be misused in many cases. e.g. Acquiring another LWLocks or regular
locks, memory allocation, errstart when holding a spin lock. this patch
would detect such misuse automatically in a USE_ASSERT_CHECKING build.

CHECK_FOR_INTERRUPTS should be avoided as well when holding a spin lock.
Depends on what signals are left to handle, PG may raise error/fatal
which would cause the code jump to some other places which is hardly to
release the spin lock anyway.
---
 src/backend/storage/buffer/bufmgr.c | 24 +++++++----
 src/backend/storage/lmgr/lock.c     |  6 +++
 src/backend/storage/lmgr/lwlock.c   | 21 +++++++---
 src/backend/storage/lmgr/s_lock.c   | 63 ++++++++++++++++++++---------
 src/backend/tcop/postgres.c         |  6 +++
 src/backend/utils/error/elog.c      | 10 +++++
 src/backend/utils/mmgr/mcxt.c       | 16 ++++++++
 src/include/miscadmin.h             | 21 +++++++++-
 src/include/storage/buf_internals.h |  1 +
 src/include/storage/s_lock.h        | 56 ++++++++++++++++++-------
 src/tools/pgindent/typedefs.list    |  2 +-
 11 files changed, 176 insertions(+), 50 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 7d601bef6d..739a94209b 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -5402,12 +5402,11 @@ rlocator_comparator(const void *p1, const void *p2)
 uint32
 LockBufHdr(BufferDesc *desc)
 {
-	SpinDelayStatus delayStatus;
 	uint32		old_buf_state;
 
 	Assert(!BufferIsLocal(BufferDescriptorGetBuffer(desc)));
 
-	init_local_spin_delay(&delayStatus);
+	init_local_spin_delay();
 
 	while (true)
 	{
@@ -5416,9 +5415,9 @@ LockBufHdr(BufferDesc *desc)
 		/* if it wasn't set before we're OK */
 		if (!(old_buf_state & BM_LOCKED))
 			break;
-		perform_spin_delay(&delayStatus);
+		perform_spin_delay();
 	}
-	finish_spin_delay(&delayStatus);
+	finish_spin_delay();
 	return old_buf_state | BM_LOCKED;
 }
 
@@ -5432,20 +5431,29 @@ LockBufHdr(BufferDesc *desc)
 static uint32
 WaitBufHdrUnlocked(BufferDesc *buf)
 {
-	SpinDelayStatus delayStatus;
 	uint32		buf_state;
 
-	init_local_spin_delay(&delayStatus);
+	/*
+	 * Suppose the buf will not be locked for a long time, setup a spin on
+	 * this.
+	 */
+	init_local_spin_delay();
 
 	buf_state = pg_atomic_read_u32(&buf->state);
 
 	while (buf_state & BM_LOCKED)
 	{
-		perform_spin_delay(&delayStatus);
+		perform_spin_delay();
 		buf_state = pg_atomic_read_u32(&buf->state);
 	}
 
-	finish_spin_delay(&delayStatus);
+	finish_spin_delay();
+
+	/*
+	 * This just a 'Wait', there is no spin lock is held now semantically
+	 * since the work which the spin is for is done already.
+	 */
+	ResetSpinLockStatus();
 
 	return buf_state;
 }
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index c70a1adb9a..5383335671 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -776,6 +776,12 @@ LockAcquireExtended(const LOCKTAG *locktag,
 	bool		found_conflict;
 	bool		log_lock = false;
 
+	/*
+	 * Spin lock should not be held for a long time, but the time needed here
+	 * may be too long, so let make sure no spin lock is held now.
+	 */
+	VerifyNoSpinLocksHeld(true);
+
 	if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
 		elog(ERROR, "unrecognized lock method: %d", lockmethodid);
 	lockMethodTable = LockMethods[lockmethodid];
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 2f2de5a562..fbc933ccbd 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -902,19 +902,24 @@ LWLockWaitListLock(LWLock *lock)
 
 		/* and then spin without atomic operations until lock is released */
 		{
-			SpinDelayStatus delayStatus;
-
-			init_local_spin_delay(&delayStatus);
+			init_local_spin_delay();
 
 			while (old_state & LW_FLAG_LOCKED)
 			{
-				perform_spin_delay(&delayStatus);
+				perform_spin_delay();
 				old_state = pg_atomic_read_u32(&lock->state);
 			}
 #ifdef LWLOCK_STATS
 			delays += delayStatus.delays;
 #endif
-			finish_spin_delay(&delayStatus);
+			finish_spin_delay();
+
+			/*
+			 * This just a 'Wait', there is no spin lock is held now
+			 * semantically since the work which the spin is for is done
+			 * already.
+			 */
+			ResetSpinLockStatus();
 		}
 
 		/*
@@ -1209,6 +1214,12 @@ LWLockAcquire(LWLock *lock, LWLockMode mode)
 
 	Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
 
+	/*
+	 * Spin lock should not be held for a long time, but the time needed here
+	 * may be too long, so let make sure no spin lock is held now.
+	 */
+	VerifyNoSpinLocksHeld(true);
+
 	PRINT_LWDEBUG("LWLockAcquire", lock, mode);
 
 #ifdef LWLOCK_STATS
diff --git a/src/backend/storage/lmgr/s_lock.c b/src/backend/storage/lmgr/s_lock.c
index 0e5f7ab0b9..fd1cd0eb70 100644
--- a/src/backend/storage/lmgr/s_lock.c
+++ b/src/backend/storage/lmgr/s_lock.c
@@ -65,7 +65,7 @@
 slock_t		dummy_spinlock;
 
 static int	spins_per_delay = DEFAULT_SPINS_PER_DELAY;
-
+SpinLockStatus spinStatus = {0, 0, 0, NULL, -1, NULL, false};
 
 /*
  * s_lock_stuck() - complain about a stuck spinlock
@@ -92,18 +92,17 @@ s_lock_stuck(const char *file, int line, const char *func)
 int
 s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
 {
-	SpinDelayStatus delayStatus;
 
-	init_spin_delay(&delayStatus, file, line, func);
+	init_spin_delay(file, line, func);
 
 	while (TAS_SPIN(lock))
 	{
-		perform_spin_delay(&delayStatus);
+		perform_spin_delay();
 	}
 
-	finish_spin_delay(&delayStatus);
+	finish_spin_delay();
 
-	return delayStatus.delays;
+	return spinStatus.delays;
 }
 
 #ifdef USE_DEFAULT_S_UNLOCK
@@ -123,19 +122,19 @@ s_unlock(volatile slock_t *lock)
  * Wait while spinning on a contended spinlock.
  */
 void
-perform_spin_delay(SpinDelayStatus *status)
+perform_spin_delay()
 {
 	/* CPU-specific delay each time through the loop */
 	SPIN_DELAY();
 
 	/* Block the process every spins_per_delay tries */
-	if (++(status->spins) >= spins_per_delay)
+	if (++(spinStatus.spins) >= spins_per_delay)
 	{
-		if (++(status->delays) > NUM_DELAYS)
-			s_lock_stuck(status->file, status->line, status->func);
+		if (++(spinStatus.delays) > NUM_DELAYS)
+			s_lock_stuck(spinStatus.file, spinStatus.line, spinStatus.func);
 
-		if (status->cur_delay == 0) /* first time to delay? */
-			status->cur_delay = MIN_DELAY_USEC;
+		if (spinStatus.cur_delay == 0)	/* first time to delay? */
+			spinStatus.cur_delay = MIN_DELAY_USEC;
 
 		/*
 		 * Once we start sleeping, the overhead of reporting a wait event is
@@ -146,7 +145,7 @@ perform_spin_delay(SpinDelayStatus *status)
 		 * this is better than nothing.
 		 */
 		pgstat_report_wait_start(WAIT_EVENT_SPIN_DELAY);
-		pg_usleep(status->cur_delay);
+		pg_usleep(spinStatus.cur_delay);
 		pgstat_report_wait_end();
 
 #if defined(S_LOCK_TEST)
@@ -155,13 +154,13 @@ perform_spin_delay(SpinDelayStatus *status)
 #endif
 
 		/* increase delay by a random fraction between 1X and 2X */
-		status->cur_delay += (int) (status->cur_delay *
-									pg_prng_double(&pg_global_prng_state) + 0.5);
+		spinStatus.cur_delay += (int) (spinStatus.cur_delay *
+									   pg_prng_double(&pg_global_prng_state) + 0.5);
 		/* wrap back to minimum delay when max is exceeded */
-		if (status->cur_delay > MAX_DELAY_USEC)
-			status->cur_delay = MIN_DELAY_USEC;
+		if (spinStatus.cur_delay > MAX_DELAY_USEC)
+			spinStatus.cur_delay = MIN_DELAY_USEC;
 
-		status->spins = 0;
+		spinStatus.spins = 0;
 	}
 }
 
@@ -183,9 +182,9 @@ perform_spin_delay(SpinDelayStatus *status)
  * is handled by the two routines below.
  */
 void
-finish_spin_delay(SpinDelayStatus *status)
+finish_spin_delay()
 {
-	if (status->cur_delay == 0)
+	if (spinStatus.cur_delay == 0)
 	{
 		/* we never had to delay */
 		if (spins_per_delay < MAX_SPINS_PER_DELAY)
@@ -322,3 +321,27 @@ main()
 }
 
 #endif							/* S_LOCK_TEST */
+
+#ifdef USE_ASSERT_CHECKING
+void
+VerifyNoSpinLocksHeld(bool check_in_panic)
+{
+	if (!check_in_panic && spinStatus.in_panic)
+		return;
+
+	if (spinStatus.func != NULL)
+	{
+		spinStatus.in_panic = true;
+		elog(PANIC, "A spin lock has been held at %s:%d",
+			 spinStatus.func, spinStatus.line);
+	}
+}
+
+void
+ResetSpinLockStatus(void)
+{
+	spinStatus.func = NULL;
+	spinStatus.line = -1;
+	spinStatus.file = NULL;
+}
+#endif
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 1a34bd3715..2fc8e6d3e6 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -2876,6 +2876,12 @@ quickdie(SIGNAL_ARGS)
 	sigaddset(&BlockSig, SIGQUIT);	/* prevent nested calls */
 	sigprocmask(SIG_SETMASK, &BlockSig, NULL);
 
+	/*
+	 * tell the spin lock system that we are dying. Since this backend will
+	 * exit very soon, so there is no need to reset the variable back.
+	 */
+	spinStatus.in_panic = true;
+
 	/*
 	 * Prevent interrupts while exiting; though we just blocked signals that
 	 * would queue new interrupts, one may have been pending.  We don't want a
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index 2c7a20e3d3..ee75a78d54 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -79,6 +79,7 @@
 #include "postmaster/syslogger.h"
 #include "storage/ipc.h"
 #include "storage/proc.h"
+#include "storage/s_lock.h"
 #include "tcop/tcopprot.h"
 #include "utils/guc_hooks.h"
 #include "utils/memutils.h"
@@ -351,6 +352,15 @@ errstart(int elevel, const char *domain)
 	bool		output_to_client = false;
 	int			i;
 
+	/*
+	 * Logging likely happens in many places without a outstanding attention,
+	 * and it's far more than a few dozen instructions, so it should be only
+	 * called when there is no spin lock is held. However it is allowed in the
+	 * quickdie process where it is possible that some spin lock being held,
+	 * we allow the errstart here since quickdie needs some logging.
+	 */
+	VerifyNoSpinLocksHeld(false);
+
 	/*
 	 * Check some cases in which we want to promote an error into a more
 	 * severe error.  None of this logic applies for non-error messages.
diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c
index 1336944084..c27adbfc94 100644
--- a/src/backend/utils/mmgr/mcxt.c
+++ b/src/backend/utils/mmgr/mcxt.c
@@ -1028,6 +1028,13 @@ MemoryContextAlloc(MemoryContext context, Size size)
 	if (!AllocSizeIsValid(size))
 		elog(ERROR, "invalid memory alloc request size %zu", size);
 
+	/*
+	 * Memory allocation likely happens in many places without a outstanding
+	 * attention, and it's far more than a few dozen instructions, so it
+	 * should be only called when there is no spin lock is held.
+	 */
+	VerifyNoSpinLocksHeld(false);
+
 	context->isReset = false;
 
 	ret = context->methods->alloc(context, size);
@@ -1071,6 +1078,9 @@ MemoryContextAllocZero(MemoryContext context, Size size)
 	if (!AllocSizeIsValid(size))
 		elog(ERROR, "invalid memory alloc request size %zu", size);
 
+	/* see comments in MemoryContextAlloc. */
+	VerifyNoSpinLocksHeld(false);
+
 	context->isReset = false;
 
 	ret = context->methods->alloc(context, size);
@@ -1197,6 +1207,9 @@ palloc(Size size)
 	if (!AllocSizeIsValid(size))
 		elog(ERROR, "invalid memory alloc request size %zu", size);
 
+	/* see comments in MemoryContextAlloc. */
+	VerifyNoSpinLocksHeld(false);
+
 	context->isReset = false;
 
 	ret = context->methods->alloc(context, size);
@@ -1228,6 +1241,9 @@ palloc0(Size size)
 	if (!AllocSizeIsValid(size))
 		elog(ERROR, "invalid memory alloc request size %zu", size);
 
+	/* see comments in MemoryContextAlloc. */
+	VerifyNoSpinLocksHeld(false);
+
 	context->isReset = false;
 
 	ret = context->methods->alloc(context, size);
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 0b01c1f093..9ac725cd57 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -107,6 +107,13 @@ extern PGDLLIMPORT volatile uint32 CritSectionCount;
 /* in tcop/postgres.c */
 extern void ProcessInterrupts(void);
 
+/* in storage/s_lock.h */
+#ifdef USE_ASSERT_CHECKING
+extern void VerifyNoSpinLocksHeld(bool check_in_panic);
+#else
+#define VerifyNoSpinLocksHeld(check_in_panic) ((void) true)
+#endif
+
 /* Test whether an interrupt is pending */
 #ifndef WIN32
 #define INTERRUPTS_PENDING_CONDITION() \
@@ -117,9 +124,21 @@ extern void ProcessInterrupts(void);
 	 unlikely(InterruptPending))
 #endif
 
-/* Service interrupt, if one is pending and it's safe to service it now */
+/*
+ * Service interrupt, if one is pending and it's safe to service it now
+ *
+ * Spin lock doesn't have a overall infrastructure to release all the locks
+ * on ERROR. and ProcessInterrupts likely raises some ERROR or FATAL which
+ * makes the code jump to some other places, then spin lock is leaked.
+ * Let's make sure no spin lock can be held at this point.
+ *
+ * However it is possible that when a spin lock is being held, but it get a
+ * signal. The known signal handler quickdie needs using elog system so we
+ * need to telling this to spin lock detection system to bypass some check.
+ */
 #define CHECK_FOR_INTERRUPTS() \
 do { \
+	VerifyNoSpinLocksHeld(false); \
 	if (INTERRUPTS_PENDING_CONDITION()) \
 		ProcessInterrupts(); \
 } while(0)
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index f190e6e5e4..69e17a9d61 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -363,6 +363,7 @@ UnlockBufHdr(BufferDesc *desc, uint32 buf_state)
 {
 	pg_write_barrier();
 	pg_atomic_write_u32(&desc->state, buf_state & (~BM_LOCKED));
+	ResetSpinLockStatus();
 }
 
 /* in bufmgr.c */
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index aa06e49da2..c3fe75a41d 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -652,7 +652,10 @@ tas(volatile slock_t *lock)
  */
 #if !defined(S_UNLOCK)
 #define S_UNLOCK(lock)	\
-	do { __asm__ __volatile__("" : : : "memory");  *(lock) = 0; } while (0)
+	do { __asm__ __volatile__("" : : : "memory"); \
+		ResetSpinLockStatus(); \
+		*(lock) = 0; \
+} while (0)
 #endif
 
 #endif	/* defined(__GNUC__) || defined(__INTEL_COMPILER) */
@@ -835,8 +838,13 @@ extern void set_spins_per_delay(int shared_spins_per_delay);
 extern int	update_spins_per_delay(int shared_spins_per_delay);
 
 /*
- * Support for spin delay which is useful in various places where
- * spinlock-like procedures take place.
+ * Support for spin delay and spin misuse detection purpose.
+ *
+ * spin delay which is useful in various places where spinlock-like
+ * procedures take place.
+ *
+ * spin misuse is based on global spinStatus to know if a spin lock
+ * is held when a heavy operation is taking.
  */
 typedef struct
 {
@@ -846,22 +854,40 @@ typedef struct
 	const char *file;
 	int			line;
 	const char *func;
-} SpinDelayStatus;
+	bool		in_panic; /* works for spin lock misuse purpose. */
+} SpinLockStatus;
 
+extern PGDLLIMPORT SpinLockStatus spinStatus;
+
+#ifdef USE_ASSERT_CHECKING
+extern void VerifyNoSpinLocksHeld(bool check_in_panic);
+extern void ResetSpinLockStatus(void);
+#else
+#define VerifyNoSpinLocksHeld(check_in_panic) ((void) true)
+#define ResetSpinLockStatus() ((void) true)
+#endif
+
+/*
+ * start the spin delay logic and record the places where the spin lock
+ * is held which is also helpful for spin lock misuse detection purpose.
+ * init_spin_delay should be called with ResetSpinLockStatus in pair.
+ */
 static inline void
-init_spin_delay(SpinDelayStatus *status,
-				const char *file, int line, const char *func)
+init_spin_delay(const char *file, int line, const char *func)
 {
-	status->spins = 0;
-	status->delays = 0;
-	status->cur_delay = 0;
-	status->file = file;
-	status->line = line;
-	status->func = func;
+	/* it is not allowed to spin another lock when holding one already. */
+	VerifyNoSpinLocksHeld(true);
+	spinStatus.spins = 0;
+	spinStatus.delays = 0;
+	spinStatus.cur_delay = 0;
+	spinStatus.file = file;
+	spinStatus.line = line;
+	spinStatus.func = func;
+	spinStatus.in_panic = false;
 }
 
-#define init_local_spin_delay(status) init_spin_delay(status, __FILE__, __LINE__, __func__)
-extern void perform_spin_delay(SpinDelayStatus *status);
-extern void finish_spin_delay(SpinDelayStatus *status);
+#define init_local_spin_delay() init_spin_delay( __FILE__, __LINE__, __func__)
+extern void perform_spin_delay(void);
+extern void finish_spin_delay(void);
 
 #endif	 /* S_LOCK_H */
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index a200e5eb12..10bc6094ff 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2635,7 +2635,7 @@ SpGistSearchItem
 SpGistState
 SpGistTypeDesc
 SpecialJoinInfo
-SpinDelayStatus
+SpinLockStatus
 SplitInterval
 SplitLR
 SplitPageLayout
-- 
2.34.1

Reply via email to