On Tue, 2006-12-05 at 17:26 -0500, Tom Lane wrote:
> "Simon Riggs" <[EMAIL PROTECTED]> writes:
> > On Tue, 2006-12-05 at 16:24 -0500, Tom Lane wrote:
> >> Sure, what would happen is that every backend passing through this code
> >> would execute the several lines of computation needed to decide whether
> >> to call RequestCheckpoint.
>
> > Right, but the calculation uses RedoRecPtr, which may not be completely
> > up to date. So presumably you want to re-read the shared memory value
> > again to make sure we are exactly accurate and allow only one person to
> > call checkpoint? Either way we have to take a lock. Insert lock causes
> > deadlock, so we would need to use infolock.
>
> Not at all. It's highly unlikely that RedoRecPtr would be so out of
> date as to result in a false request for a checkpoint, and if it does,
> so what? Worst case is we perform an extra checkpoint.
On its own, I wouldn't normally agree...
> Also, given the current structure of the routine, this is probably not
> the best place for that code at all --- it'd make more sense for it to
> be in the just-finished-a-segment code stretch, which would ensure that
> it's only done by one backend once per segment.
But thats a much better plan since it requires no locking.
There's a lot more changes there for such a simple fix though and lots
more potential bugs, but I've coded it as you suggest and removed the
fields from pg_control.
Patch passes make check, applies cleanly on HEAD.
pg_resetxlog and pgcontroldata tested.
--
Simon Riggs
EnterpriseDB http://www.enterprisedb.com
Index: src/backend/access/transam/xlog.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/access/transam/xlog.c,v
retrieving revision 1.258
diff -c -r1.258 xlog.c
*** src/backend/access/transam/xlog.c 30 Nov 2006 18:29:11 -0000 1.258
--- src/backend/access/transam/xlog.c 6 Dec 2006 00:42:38 -0000
***************
*** 1538,1591 ****
openLogFile = XLogFileInit(openLogId, openLogSeg,
&use_existent, true);
openLogOff = 0;
-
- /* update pg_control, unless someone else already did */
- LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
- if (ControlFile->logId < openLogId ||
- (ControlFile->logId == openLogId &&
- ControlFile->logSeg < openLogSeg + 1))
- {
- ControlFile->logId = openLogId;
- ControlFile->logSeg = openLogSeg + 1;
- ControlFile->time = time(NULL);
- UpdateControlFile();
-
- /*
- * Signal bgwriter to start a checkpoint if it's been too long
- * since the last one. (We look at local copy of RedoRecPtr
- * which might be a little out of date, but should be close
- * enough for this purpose.)
- *
- * A straight computation of segment number could overflow 32
- * bits. Rather than assuming we have working 64-bit
- * arithmetic, we compare the highest-order bits separately,
- * and force a checkpoint immediately when they change.
- */
- if (IsUnderPostmaster)
- {
- uint32 old_segno,
- new_segno;
- uint32 old_highbits,
- new_highbits;
-
- old_segno = (RedoRecPtr.xlogid % XLogSegSize) * XLogSegsPerFile +
- (RedoRecPtr.xrecoff / XLogSegSize);
- old_highbits = RedoRecPtr.xlogid / XLogSegSize;
- new_segno = (openLogId % XLogSegSize) * XLogSegsPerFile +
- openLogSeg;
- new_highbits = openLogId / XLogSegSize;
- if (new_highbits != old_highbits ||
- new_segno >= old_segno + (uint32) CheckPointSegments)
- {
- #ifdef WAL_DEBUG
- if (XLOG_DEBUG)
- elog(LOG, "time for a checkpoint, signaling bgwriter");
- #endif
- RequestCheckpoint(false, true);
- }
- }
- }
- LWLockRelease(ControlFileLock);
}
/* Make sure we have the current logfile open */
--- 1538,1543 ----
***************
*** 1670,1675 ****
--- 1622,1629 ----
* This is also the right place to notify the Archiver that the
* segment is ready to copy to archival storage, and to update the
* timer for archive_timeout.
+ *
+ * We also check whether we need to signal for a checkpoint
*/
if (finishing_seg || (xlog_switch && last_iteration))
{
***************
*** 1680,1685 ****
--- 1634,1674 ----
XLogArchiveNotifySeg(openLogId, openLogSeg);
Write->lastSegSwitchTime = time(NULL);
+
+ /*
+ * Signal bgwriter to start a checkpoint if it's been too long
+ * since the last one. (We look at local copy of RedoRecPtr
+ * which might be a little out of date, but should be close
+ * enough for this purpose.)
+ *
+ * A straight computation of segment number could overflow 32
+ * bits. Rather than assuming we have working 64-bit
+ * arithmetic, we compare the highest-order bits separately,
+ * and force a checkpoint immediately when they change.
+ */
+ if (IsUnderPostmaster)
+ {
+ uint32 old_segno,
+ new_segno;
+ uint32 old_highbits,
+ new_highbits;
+
+ old_segno = (RedoRecPtr.xlogid % XLogSegSize) * XLogSegsPerFile +
+ (RedoRecPtr.xrecoff / XLogSegSize);
+ old_highbits = RedoRecPtr.xlogid / XLogSegSize;
+ new_segno = (openLogId % XLogSegSize) * XLogSegsPerFile +
+ openLogSeg;
+ new_highbits = openLogId / XLogSegSize;
+ if (new_highbits != old_highbits ||
+ new_segno >= old_segno + (uint32) CheckPointSegments)
+ {
+ #ifdef WAL_DEBUG
+ if (XLOG_DEBUG)
+ elog(LOG, "time for a checkpoint, signaling bgwriter");
+ #endif
+ RequestCheckpoint(false, true);
+ }
+ }
}
}
***************
*** 4199,4206 ****
ControlFile->system_identifier = sysidentifier;
ControlFile->state = DB_SHUTDOWNED;
ControlFile->time = checkPoint.time;
- ControlFile->logId = 0;
- ControlFile->logSeg = 1;
ControlFile->checkPoint = checkPoint.redo;
ControlFile->checkPointCopy = checkPoint;
/* some additional ControlFile fields are set in WriteControlFile() */
--- 4188,4193 ----
***************
*** 4659,4666 ****
*/
ReadControlFile();
! if (ControlFile->logSeg == 0 ||
! ControlFile->state < DB_SHUTDOWNED ||
ControlFile->state > DB_IN_PRODUCTION ||
!XRecOffIsValid(ControlFile->checkPoint.xrecoff))
ereport(FATAL,
--- 4646,4652 ----
*/
ReadControlFile();
! if (ControlFile->state < DB_SHUTDOWNED ||
ControlFile->state > DB_IN_PRODUCTION ||
!XRecOffIsValid(ControlFile->checkPoint.xrecoff))
ereport(FATAL,
***************
*** 5064,5071 ****
openLogSeg = endLogSeg;
openLogFile = XLogFileOpen(openLogId, openLogSeg);
openLogOff = 0;
- ControlFile->logId = openLogId;
- ControlFile->logSeg = openLogSeg + 1;
Insert = &XLogCtl->Insert;
Insert->PrevRecord = LastRec;
XLogCtl->xlblocks[0].xlogid = openLogId;
--- 5050,5055 ----
Index: src/bin/pg_controldata/pg_controldata.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/bin/pg_controldata/pg_controldata.c,v
retrieving revision 1.31
diff -c -r1.31 pg_controldata.c
*** src/bin/pg_controldata/pg_controldata.c 21 Aug 2006 16:16:31 -0000 1.31
--- src/bin/pg_controldata/pg_controldata.c 6 Dec 2006 00:42:39 -0000
***************
*** 159,168 ****
dbState(ControlFile.state));
printf(_("pg_control last modified: %s\n"),
pgctime_str);
- printf(_("Current log file ID: %u\n"),
- ControlFile.logId);
- printf(_("Next log file segment: %u\n"),
- ControlFile.logSeg);
printf(_("Latest checkpoint location: %X/%X\n"),
ControlFile.checkPoint.xlogid,
ControlFile.checkPoint.xrecoff);
--- 159,164 ----
Index: src/bin/pg_resetxlog/pg_resetxlog.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v
retrieving revision 1.53
diff -c -r1.53 pg_resetxlog.c
*** src/bin/pg_resetxlog/pg_resetxlog.c 4 Oct 2006 00:30:05 -0000 1.53
--- src/bin/pg_resetxlog/pg_resetxlog.c 6 Dec 2006 00:42:40 -0000
***************
*** 305,316 ****
if (minXlogTli > ControlFile.checkPointCopy.ThisTimeLineID)
ControlFile.checkPointCopy.ThisTimeLineID = minXlogTli;
! if (minXlogId > ControlFile.logId ||
! (minXlogId == ControlFile.logId &&
! minXlogSeg > ControlFile.logSeg))
{
! ControlFile.logId = minXlogId;
! ControlFile.logSeg = minXlogSeg;
}
/*
--- 305,323 ----
if (minXlogTli > ControlFile.checkPointCopy.ThisTimeLineID)
ControlFile.checkPointCopy.ThisTimeLineID = minXlogTli;
! newXlogId = ControlFile.checkPointCopy.redo.xlogid = newXlogId;
! newXlogSeg = (ControlFile.checkPointCopy.redo.xrecoff / ControlFile.xlog_seg_size) + 1;
!
! /* be sure we wrap around correctly at end of a logfile */
! NextLogSeg(newXlogId, newXlogSeg);
!
! /* Now adjust, if requested */
! if (minXlogId > newXlogId ||
! (minXlogId == newXlogId &&
! minXlogSeg > newXlogSeg))
{
! newXlogId = minXlogId;
! newXlogSeg = minXlogSeg;
}
/*
***************
*** 469,476 ****
ControlFile.state = DB_SHUTDOWNED;
ControlFile.time = time(NULL);
- ControlFile.logId = 0;
- ControlFile.logSeg = 1;
ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
ControlFile.maxAlign = MAXIMUM_ALIGNOF;
--- 476,481 ----
***************
*** 539,548 ****
ControlFile.catalog_version_no);
printf(_("Database system identifier: %s\n"),
sysident_str);
- printf(_("Current log file ID: %u\n"),
- ControlFile.logId);
- printf(_("Next log file segment: %u\n"),
- ControlFile.logSeg);
printf(_("Latest checkpoint's TimeLineID: %u\n"),
ControlFile.checkPointCopy.ThisTimeLineID);
printf(_("Latest checkpoint's NextXID: %u/%u\n"),
--- 544,549 ----
***************
*** 589,601 ****
int fd;
char buffer[PG_CONTROL_SIZE]; /* need not be aligned */
- /*
- * Adjust fields as needed to force an empty XLOG starting at the next
- * available segment.
- */
- newXlogId = ControlFile.logId;
- newXlogSeg = ControlFile.logSeg;
-
/* adjust in case we are changing segment size */
newXlogSeg *= ControlFile.xlog_seg_size;
newXlogSeg = (newXlogSeg + XLogSegSize - 1) / XLogSegSize;
--- 590,595 ----
***************
*** 614,621 ****
ControlFile.state = DB_SHUTDOWNED;
ControlFile.time = time(NULL);
- ControlFile.logId = newXlogId;
- ControlFile.logSeg = newXlogSeg + 1;
ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
ControlFile.prevCheckPoint.xlogid = 0;
ControlFile.prevCheckPoint.xrecoff = 0;
--- 608,613 ----
Index: src/include/catalog/pg_control.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/catalog/pg_control.h,v
retrieving revision 1.33
diff -c -r1.33 pg_control.h
*** src/include/catalog/pg_control.h 4 Oct 2006 00:30:07 -0000 1.33
--- src/include/catalog/pg_control.h 6 Dec 2006 00:42:41 -0000
***************
*** 102,109 ****
*/
DBState state; /* see enum above */
time_t time; /* time stamp of last pg_control update */
- uint32 logId; /* current log file id */
- uint32 logSeg; /* current log file segment, + 1 */
XLogRecPtr checkPoint; /* last check point record ptr */
XLogRecPtr prevCheckPoint; /* previous check point record ptr */
--- 102,107 ----
---------------------------(end of broadcast)---------------------------
TIP 6: explain analyze is your friend