While looking at bug #7969, it occurred to me that it would be nice if we could catch resource leaks in WAL redo routines better. It would be useful during development, to catch bugs earlier, and it could've turned that replay-stopping error into a warning.

For regular transactions, we use ResourceOwners to track buffer pins (like in #7969) and other resources. There's no fundamental reason we couldn't use one during replay. After running a redo routine, there should be no buffer pins held or other resources held.

Lwlocks are not tracked by resource owners, but we could still easily warn if any are held after the redo routine exits.

- Heikki
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index e62286f..1558a8a 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -2282,7 +2282,7 @@ AbortTransaction(void)
 	 * Releasing LW locks is critical since we might try to grab them again
 	 * while cleaning up!
 	 */
-	LWLockReleaseAll();
+	LWLockReleaseAll(false);
 
 	/* Clean up buffer I/O and buffer context locks, too */
 	AbortBufferIO();
@@ -4194,7 +4194,7 @@ AbortSubTransaction(void)
 	 * FIXME This may be incorrect --- Are there some locks we should keep?
 	 * Buffer locks, for example?  I don't think so but I'm not sure.
 	 */
-	LWLockReleaseAll();
+	LWLockReleaseAll(false);
 
 	AbortBufferIO();
 	UnlockBuffers();
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 2f91bc8..0bf87f2 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -5515,6 +5515,7 @@ StartupXLOG(void)
 			bool		recoveryApply = true;
 			ErrorContextCallback errcallback;
 			TimestampTz xtime;
+			ResourceOwner redoOwner;
 
 			InRedo = true;
 
@@ -5523,6 +5524,13 @@ StartupXLOG(void)
 							(uint32) (ReadRecPtr >> 32), (uint32) ReadRecPtr)));
 
 			/*
+			 * Create a resource owner for running redo functions, to catch
+			 * resource leaks.
+			 */
+			redoOwner = ResourceOwnerCreate(NULL, "WAL redo");
+			CurrentResourceOwner = redoOwner;
+
+			/*
 			 * main redo apply loop
 			 */
 			do
@@ -5671,6 +5679,21 @@ StartupXLOG(void)
 				/* Now apply the WAL record itself */
 				RmgrTable[record->xl_rmid].rm_redo(EndRecPtr, record);
 
+				/*
+				 * The redo routine should've released all lwlocks and
+				 * other resources.
+				 */
+				LWLockReleaseAll(true);
+				ResourceOwnerRelease(redoOwner, RESOURCE_RELEASE_BEFORE_LOCKS,
+									 true, true);
+				/*
+				 * Locks are *not* released. In hot standby mode, the startup
+				 * process holds locks on behalf of transactions running in
+				 * the master.
+				 */
+				ResourceOwnerRelease(redoOwner, RESOURCE_RELEASE_AFTER_LOCKS,
+									 true, true);
+
 				/* Pop the error context stack */
 				error_context_stack = errcallback.previous;
 
@@ -5704,6 +5727,9 @@ StartupXLOG(void)
 				record = ReadRecord(xlogreader, InvalidXLogRecPtr, LOG, false);
 			} while (record != NULL);
 
+			CurrentResourceOwner = NULL;
+			ResourceOwnerDelete(redoOwner);
+
 			/*
 			 * end of main redo apply loop
 			 */
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index 287f19b..03554a1 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -582,7 +582,7 @@ bootstrap_signals(void)
 static void
 ShutdownAuxiliaryProcess(int code, Datum arg)
 {
-	LWLockReleaseAll();
+	LWLockReleaseAll(false);
 }
 
 /* ----------------------------------------------------------------
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index 286ae86..ea0e59d 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -175,7 +175,7 @@ BackgroundWriterMain(void)
 		 * AbortTransaction().	We don't have very many resources to worry
 		 * about in bgwriter, but we do have LWLocks, buffers, and temp files.
 		 */
-		LWLockReleaseAll();
+		LWLockReleaseAll(false);
 		AbortBufferIO();
 		UnlockBuffers();
 		/* buffer pins are released here: */
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index 5fb2d81..a29fc54 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -283,7 +283,7 @@ CheckpointerMain(void)
 		 * about in checkpointer, but we do have LWLocks, buffers, and temp
 		 * files.
 		 */
-		LWLockReleaseAll();
+		LWLockReleaseAll(false);
 		AbortBufferIO();
 		UnlockBuffers();
 		/* buffer pins are released here: */
diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c
index 8359da6..fe58e8f 100644
--- a/src/backend/postmaster/walwriter.c
+++ b/src/backend/postmaster/walwriter.c
@@ -179,7 +179,7 @@ WalWriterMain(void)
 		 * AbortTransaction().	We don't have very many resources to worry
 		 * about in walwriter, but we do have LWLocks, and perhaps buffers?
 		 */
-		LWLockReleaseAll();
+		LWLockReleaseAll(false);
 		AbortBufferIO();
 		UnlockBuffers();
 		/* buffer pins are released here: */
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 4f88d3f..b09135b 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -856,17 +856,21 @@ LWLockRelease(LWLockId lockid)
  * decrement it below zero if we allow it to drop for each released lock!
  */
 void
-LWLockReleaseAll(void)
+LWLockReleaseAll(bool warn)
 {
 	while (num_held_lwlocks > 0)
 	{
+		LWLockId lockid = held_lwlocks[num_held_lwlocks - 1];
+
 		HOLD_INTERRUPTS();		/* match the upcoming RESUME_INTERRUPTS */
 
-		LWLockRelease(held_lwlocks[num_held_lwlocks - 1]);
+		if (warn)
+			elog(WARNING, "lwlock leak: %d", lockid);
+
+		LWLockRelease(lockid);
 	}
 }
 
-
 /*
  * LWLockHeldByMe - test whether my process currently holds a lock
  *
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 5809a79..d635411 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -796,7 +796,7 @@ ProcKill(int code, Datum arg)
 	 * it's cheap to check again before we cut the knees off the LWLock
 	 * facility by releasing our PGPROC ...
 	 */
-	LWLockReleaseAll();
+	LWLockReleaseAll(false);
 
 	/* Release ownership of the process's latch, too */
 	DisownLatch(&MyProc->procLatch);
@@ -859,7 +859,7 @@ AuxiliaryProcKill(int code, Datum arg)
 	Assert(MyProc == auxproc);
 
 	/* Release any LW locks I am holding (see notes above) */
-	LWLockReleaseAll();
+	LWLockReleaseAll(false);
 
 	/* Release ownership of the process's latch, too */
 	DisownLatch(&MyProc->procLatch);
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index d8f7e9d..e678bda 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -110,7 +110,7 @@ extern void LWLockAcquire(LWLockId lockid, LWLockMode mode);
 extern bool LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode);
 extern bool LWLockAcquireOrWait(LWLockId lockid, LWLockMode mode);
 extern void LWLockRelease(LWLockId lockid);
-extern void LWLockReleaseAll(void);
+extern void LWLockReleaseAll(bool warn);
 extern bool LWLockHeldByMe(LWLockId lockid);
 
 extern int	NumLWLocks(void);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to