On Tue, 2006-12-05 at 17:26 -0500, Tom Lane wrote:
> "Simon Riggs" <[EMAIL PROTECTED]> writes:
> > On Tue, 2006-12-05 at 16:24 -0500, Tom Lane wrote:
> >> Sure, what would happen is that every backend passing through this code
> >> would execute the several lines of computation needed to decide whether
> >> to call RequestCheckpoint.
> 
> > Right, but the calculation uses RedoRecPtr, which may not be completely
> > up to date. So presumably you want to re-read the shared memory value
> > again to make sure we are exactly accurate and allow only one person to
> > call checkpoint? Either way we have to take a lock. Insert lock causes
> > deadlock, so we would need to use infolock. 
> 
> Not at all.  It's highly unlikely that RedoRecPtr would be so out of
> date as to result in a false request for a checkpoint, and if it does,
> so what?  Worst case is we perform an extra checkpoint.

On its own, I wouldn't normally agree...

> Also, given the current structure of the routine, this is probably not
> the best place for that code at all --- it'd make more sense for it to
> be in the just-finished-a-segment code stretch, which would ensure that
> it's only done by one backend once per segment.

But thats a much better plan since it requires no locking.

There's a lot more changes there for such a simple fix though and lots
more potential bugs, but I've coded it as you suggest and removed the
fields from pg_control.

Patch passes make check, applies cleanly on HEAD.
pg_resetxlog and pgcontroldata tested.


-- 
  Simon Riggs             
  EnterpriseDB   http://www.enterprisedb.com

Index: src/backend/access/transam/xlog.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/access/transam/xlog.c,v
retrieving revision 1.258
diff -c -r1.258 xlog.c
*** src/backend/access/transam/xlog.c	30 Nov 2006 18:29:11 -0000	1.258
--- src/backend/access/transam/xlog.c	6 Dec 2006 00:42:38 -0000
***************
*** 1538,1591 ****
  			openLogFile = XLogFileInit(openLogId, openLogSeg,
  									   &use_existent, true);
  			openLogOff = 0;
- 
- 			/* update pg_control, unless someone else already did */
- 			LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
- 			if (ControlFile->logId < openLogId ||
- 				(ControlFile->logId == openLogId &&
- 				 ControlFile->logSeg < openLogSeg + 1))
- 			{
- 				ControlFile->logId = openLogId;
- 				ControlFile->logSeg = openLogSeg + 1;
- 				ControlFile->time = time(NULL);
- 				UpdateControlFile();
- 
- 				/*
- 				 * Signal bgwriter to start a checkpoint if it's been too long
- 				 * since the last one.	(We look at local copy of RedoRecPtr
- 				 * which might be a little out of date, but should be close
- 				 * enough for this purpose.)
- 				 *
- 				 * A straight computation of segment number could overflow 32
- 				 * bits.  Rather than assuming we have working 64-bit
- 				 * arithmetic, we compare the highest-order bits separately,
- 				 * and force a checkpoint immediately when they change.
- 				 */
- 				if (IsUnderPostmaster)
- 				{
- 					uint32		old_segno,
- 								new_segno;
- 					uint32		old_highbits,
- 								new_highbits;
- 
- 					old_segno = (RedoRecPtr.xlogid % XLogSegSize) * XLogSegsPerFile +
- 						(RedoRecPtr.xrecoff / XLogSegSize);
- 					old_highbits = RedoRecPtr.xlogid / XLogSegSize;
- 					new_segno = (openLogId % XLogSegSize) * XLogSegsPerFile +
- 						openLogSeg;
- 					new_highbits = openLogId / XLogSegSize;
- 					if (new_highbits != old_highbits ||
- 						new_segno >= old_segno + (uint32) CheckPointSegments)
- 					{
- #ifdef WAL_DEBUG
- 						if (XLOG_DEBUG)
- 							elog(LOG, "time for a checkpoint, signaling bgwriter");
- #endif
- 						RequestCheckpoint(false, true);
- 					}
- 				}
- 			}
- 			LWLockRelease(ControlFileLock);
  		}
  
  		/* Make sure we have the current logfile open */
--- 1538,1543 ----
***************
*** 1670,1675 ****
--- 1622,1629 ----
  			 * This is also the right place to notify the Archiver that the
  			 * segment is ready to copy to archival storage, and to update the
  			 * timer for archive_timeout.
+              *
+              * We also check whether we need to signal for a checkpoint
  			 */
  			if (finishing_seg || (xlog_switch && last_iteration))
  			{
***************
*** 1680,1685 ****
--- 1634,1674 ----
  					XLogArchiveNotifySeg(openLogId, openLogSeg);
  
  				Write->lastSegSwitchTime = time(NULL);
+ 
+ 				/*
+ 				 * Signal bgwriter to start a checkpoint if it's been too long
+ 				 * since the last one.	(We look at local copy of RedoRecPtr
+ 				 * which might be a little out of date, but should be close
+ 				 * enough for this purpose.)
+ 				 *
+ 				 * A straight computation of segment number could overflow 32
+ 				 * bits.  Rather than assuming we have working 64-bit
+ 				 * arithmetic, we compare the highest-order bits separately,
+ 				 * and force a checkpoint immediately when they change.
+ 				 */
+ 				if (IsUnderPostmaster)
+ 				{
+ 					uint32		old_segno,
+ 								new_segno;
+ 					uint32		old_highbits,
+ 								new_highbits;
+ 
+ 					old_segno = (RedoRecPtr.xlogid % XLogSegSize) * XLogSegsPerFile +
+ 						(RedoRecPtr.xrecoff / XLogSegSize);
+ 					old_highbits = RedoRecPtr.xlogid / XLogSegSize;
+ 					new_segno = (openLogId % XLogSegSize) * XLogSegsPerFile +
+ 						openLogSeg;
+ 					new_highbits = openLogId / XLogSegSize;
+ 					if (new_highbits != old_highbits ||
+ 						new_segno >= old_segno + (uint32) CheckPointSegments)
+ 					{
+ #ifdef WAL_DEBUG
+ 						if (XLOG_DEBUG)
+ 							elog(LOG, "time for a checkpoint, signaling bgwriter");
+ #endif
+ 						RequestCheckpoint(false, true);
+ 					}
+ 				}
  			}
  		}
  
***************
*** 4199,4206 ****
  	ControlFile->system_identifier = sysidentifier;
  	ControlFile->state = DB_SHUTDOWNED;
  	ControlFile->time = checkPoint.time;
- 	ControlFile->logId = 0;
- 	ControlFile->logSeg = 1;
  	ControlFile->checkPoint = checkPoint.redo;
  	ControlFile->checkPointCopy = checkPoint;
  	/* some additional ControlFile fields are set in WriteControlFile() */
--- 4188,4193 ----
***************
*** 4659,4666 ****
  	 */
  	ReadControlFile();
  
! 	if (ControlFile->logSeg == 0 ||
! 		ControlFile->state < DB_SHUTDOWNED ||
  		ControlFile->state > DB_IN_PRODUCTION ||
  		!XRecOffIsValid(ControlFile->checkPoint.xrecoff))
  		ereport(FATAL,
--- 4646,4652 ----
  	 */
  	ReadControlFile();
  
! 	if (ControlFile->state < DB_SHUTDOWNED ||
  		ControlFile->state > DB_IN_PRODUCTION ||
  		!XRecOffIsValid(ControlFile->checkPoint.xrecoff))
  		ereport(FATAL,
***************
*** 5064,5071 ****
  	openLogSeg = endLogSeg;
  	openLogFile = XLogFileOpen(openLogId, openLogSeg);
  	openLogOff = 0;
- 	ControlFile->logId = openLogId;
- 	ControlFile->logSeg = openLogSeg + 1;
  	Insert = &XLogCtl->Insert;
  	Insert->PrevRecord = LastRec;
  	XLogCtl->xlblocks[0].xlogid = openLogId;
--- 5050,5055 ----
Index: src/bin/pg_controldata/pg_controldata.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/bin/pg_controldata/pg_controldata.c,v
retrieving revision 1.31
diff -c -r1.31 pg_controldata.c
*** src/bin/pg_controldata/pg_controldata.c	21 Aug 2006 16:16:31 -0000	1.31
--- src/bin/pg_controldata/pg_controldata.c	6 Dec 2006 00:42:39 -0000
***************
*** 159,168 ****
  		   dbState(ControlFile.state));
  	printf(_("pg_control last modified:             %s\n"),
  		   pgctime_str);
- 	printf(_("Current log file ID:                  %u\n"),
- 		   ControlFile.logId);
- 	printf(_("Next log file segment:                %u\n"),
- 		   ControlFile.logSeg);
  	printf(_("Latest checkpoint location:           %X/%X\n"),
  		   ControlFile.checkPoint.xlogid,
  		   ControlFile.checkPoint.xrecoff);
--- 159,164 ----
Index: src/bin/pg_resetxlog/pg_resetxlog.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v
retrieving revision 1.53
diff -c -r1.53 pg_resetxlog.c
*** src/bin/pg_resetxlog/pg_resetxlog.c	4 Oct 2006 00:30:05 -0000	1.53
--- src/bin/pg_resetxlog/pg_resetxlog.c	6 Dec 2006 00:42:40 -0000
***************
*** 305,316 ****
  	if (minXlogTli > ControlFile.checkPointCopy.ThisTimeLineID)
  		ControlFile.checkPointCopy.ThisTimeLineID = minXlogTli;
  
! 	if (minXlogId > ControlFile.logId ||
! 		(minXlogId == ControlFile.logId &&
! 		 minXlogSeg > ControlFile.logSeg))
  	{
! 		ControlFile.logId = minXlogId;
! 		ControlFile.logSeg = minXlogSeg;
  	}
  
  	/*
--- 305,323 ----
  	if (minXlogTli > ControlFile.checkPointCopy.ThisTimeLineID)
  		ControlFile.checkPointCopy.ThisTimeLineID = minXlogTli;
  
!     newXlogId = ControlFile.checkPointCopy.redo.xlogid = newXlogId;
!     newXlogSeg = (ControlFile.checkPointCopy.redo.xrecoff / ControlFile.xlog_seg_size) + 1;
! 
! 	/* be sure we wrap around correctly at end of a logfile */
! 	NextLogSeg(newXlogId, newXlogSeg);
! 
!     /* Now adjust, if requested */
! 	if (minXlogId > newXlogId ||
! 		(minXlogId == newXlogId &&
! 		 minXlogSeg > newXlogSeg))
  	{
! 		newXlogId = minXlogId;
! 		newXlogSeg = minXlogSeg;
  	}
  
  	/*
***************
*** 469,476 ****
  
  	ControlFile.state = DB_SHUTDOWNED;
  	ControlFile.time = time(NULL);
- 	ControlFile.logId = 0;
- 	ControlFile.logSeg = 1;
  	ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
  
  	ControlFile.maxAlign = MAXIMUM_ALIGNOF;
--- 476,481 ----
***************
*** 539,548 ****
  		   ControlFile.catalog_version_no);
  	printf(_("Database system identifier:           %s\n"),
  		   sysident_str);
- 	printf(_("Current log file ID:                  %u\n"),
- 		   ControlFile.logId);
- 	printf(_("Next log file segment:                %u\n"),
- 		   ControlFile.logSeg);
  	printf(_("Latest checkpoint's TimeLineID:       %u\n"),
  		   ControlFile.checkPointCopy.ThisTimeLineID);
  	printf(_("Latest checkpoint's NextXID:          %u/%u\n"),
--- 544,549 ----
***************
*** 589,601 ****
  	int			fd;
  	char		buffer[PG_CONTROL_SIZE];		/* need not be aligned */
  
- 	/*
- 	 * Adjust fields as needed to force an empty XLOG starting at the next
- 	 * available segment.
- 	 */
- 	newXlogId = ControlFile.logId;
- 	newXlogSeg = ControlFile.logSeg;
- 
  	/* adjust in case we are changing segment size */
  	newXlogSeg *= ControlFile.xlog_seg_size;
  	newXlogSeg = (newXlogSeg + XLogSegSize - 1) / XLogSegSize;
--- 590,595 ----
***************
*** 614,621 ****
  
  	ControlFile.state = DB_SHUTDOWNED;
  	ControlFile.time = time(NULL);
- 	ControlFile.logId = newXlogId;
- 	ControlFile.logSeg = newXlogSeg + 1;
  	ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
  	ControlFile.prevCheckPoint.xlogid = 0;
  	ControlFile.prevCheckPoint.xrecoff = 0;
--- 608,613 ----
Index: src/include/catalog/pg_control.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/catalog/pg_control.h,v
retrieving revision 1.33
diff -c -r1.33 pg_control.h
*** src/include/catalog/pg_control.h	4 Oct 2006 00:30:07 -0000	1.33
--- src/include/catalog/pg_control.h	6 Dec 2006 00:42:41 -0000
***************
*** 102,109 ****
  	 */
  	DBState		state;			/* see enum above */
  	time_t		time;			/* time stamp of last pg_control update */
- 	uint32		logId;			/* current log file id */
- 	uint32		logSeg;			/* current log file segment, + 1 */
  	XLogRecPtr	checkPoint;		/* last check point record ptr */
  	XLogRecPtr	prevCheckPoint; /* previous check point record ptr */
  
--- 102,107 ----
---------------------------(end of broadcast)---------------------------
TIP 6: explain analyze is your friend

Reply via email to