Lock file to prevent starting with multiple archivers present.
Possibly some debate over exact behaviour, but publish early...
--
Simon Riggs
EnterpriseDB http://www.enterprisedb.com
Index: src/backend/postmaster/pgarch.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/postmaster/pgarch.c,v
retrieving revision 1.20
diff -c -r1.20 pgarch.c
*** src/backend/postmaster/pgarch.c 5 Mar 2006 15:58:35 -0000 1.20
--- src/backend/postmaster/pgarch.c 22 May 2006 16:05:53 -0000
***************
*** 118,124 ****
* Note: if fail, we will be called again from the postmaster main loop.
*/
int
! pgarch_start(void)
{
time_t curtime;
pid_t pgArchPid;
--- 118,124 ----
* Note: if fail, we will be called again from the postmaster main loop.
*/
int
! pgarch_start(int previousArchPid)
{
time_t curtime;
pid_t pgArchPid;
***************
*** 141,146 ****
--- 141,157 ----
return 0;
last_pgarch_start_time = curtime;
+ /*
+ * If we've not started an archiver before, then abort if an archiver
+ * lock file exists. If we've been running an archiver but it died,
+ * then cleanup so we can restart it again.
+ */
+ if (previousArchPid == 0)
+ CreateArchiverLockFile(XLOGSTATUS, true, true);
+ else
+ UnlinkArchiverLockFile(XLOGSTATUS);
+
+
#ifdef EXEC_BACKEND
switch ((pgArchPid = pgarch_forkexec()))
#else
***************
*** 222,227 ****
--- 233,243 ----
MyProcPid = getpid(); /* reset MyProcPid */
+ /*
+ * Now create the lock file from within the archiver
+ */
+ CreateArchiverLockFile(XLOGSTATUS, true, false);
+
/*
* Ignore all signals usually bound to some action in the postmaster,
* except for SIGHUP, SIGUSR1 and SIGQUIT.
***************
*** 257,266 ****
pgarch_exit(SIGNAL_ARGS)
{
/*
! * For now, we just nail the doors shut and get out of town. It might
* seem cleaner to finish up any pending archive copies, but there's a
* nontrivial risk that init will kill us partway through.
*/
exit(0);
}
--- 273,284 ----
pgarch_exit(SIGNAL_ARGS)
{
/*
! * Zap the archiver lockfile then go. It might
* seem cleaner to finish up any pending archive copies, but there's a
* nontrivial risk that init will kill us partway through.
*/
+ UnlinkArchiverLockFile(XLOGSTATUS);
+
exit(0);
}
***************
*** 498,504 ****
struct dirent *rlde;
bool found = false;
! snprintf(XLogArchiveStatusDir, MAXPGPATH, XLOGDIR "/archive_status");
rldir = AllocateDir(XLogArchiveStatusDir);
if (rldir == NULL)
ereport(ERROR,
--- 516,522 ----
struct dirent *rlde;
bool found = false;
! snprintf(XLogArchiveStatusDir, MAXPGPATH, XLOGSTATUS);
rldir = AllocateDir(XLogArchiveStatusDir);
if (rldir == NULL)
ereport(ERROR,
Index: src/backend/postmaster/postmaster.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/postmaster/postmaster.c,v
retrieving revision 1.484
diff -c -r1.484 postmaster.c
*** src/backend/postmaster/postmaster.c 19 May 2006 15:15:37 -0000 1.484
--- src/backend/postmaster/postmaster.c 22 May 2006 16:05:57 -0000
***************
*** 1316,1322 ****
/* If we have lost the archiver, try to start a new one */
if (XLogArchivingActive() && PgArchPID == 0 &&
StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
! PgArchPID = pgarch_start();
/* If we have lost the stats collector, try to start a new one */
if (PgStatPID == 0 &&
--- 1316,1322 ----
/* If we have lost the archiver, try to start a new one */
if (XLogArchivingActive() && PgArchPID == 0 &&
StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
! PgArchPID = pgarch_start(PgArchPID);
/* If we have lost the stats collector, try to start a new one */
if (PgStatPID == 0 &&
***************
*** 2132,2138 ****
else if (Shutdown == NoShutdown)
{
if (XLogArchivingActive() && PgArchPID == 0)
! PgArchPID = pgarch_start();
if (PgStatPID == 0)
PgStatPID = pgstat_start();
}
--- 2132,2138 ----
else if (Shutdown == NoShutdown)
{
if (XLogArchivingActive() && PgArchPID == 0)
! PgArchPID = pgarch_start(0);
if (PgStatPID == 0)
PgStatPID = pgstat_start();
}
***************
*** 2198,2210 ****
*/
if (PgArchPID != 0 && pid == PgArchPID)
{
- PgArchPID = 0;
if (exitstatus != 0)
LogChildExit(LOG, _("archiver process"),
pid, exitstatus);
if (XLogArchivingActive() &&
StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
! PgArchPID = pgarch_start();
continue;
}
--- 2198,2209 ----
*/
if (PgArchPID != 0 && pid == PgArchPID)
{
if (exitstatus != 0)
LogChildExit(LOG, _("archiver process"),
pid, exitstatus);
if (XLogArchivingActive() &&
StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
! PgArchPID = pgarch_start(PgArchPID);
continue;
}
Index: src/backend/utils/init/miscinit.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/utils/init/miscinit.c,v
retrieving revision 1.154
diff -c -r1.154 miscinit.c
*** src/backend/utils/init/miscinit.c 5 Mar 2006 15:58:46 -0000 1.154
--- src/backend/utils/init/miscinit.c 22 May 2006 16:05:58 -0000
***************
*** 44,56 ****
#include "utils/syscache.h"
! #define DIRECTORY_LOCK_FILE "postmaster.pid"
ProcessingMode Mode = InitProcessing;
/* Note: we rely on this to initialize as zeroes */
static char socketLockFile[MAXPGPATH];
/* ----------------------------------------------------------------
* ignoring system indexes support stuff
--- 44,64 ----
#include "utils/syscache.h"
! #define DIRECTORY_LOCK_FILENAME "postmaster.pid"
! #define ARCHIVER_LOCK_FILENAME "archiver.pid"
ProcessingMode Mode = InitProcessing;
/* Note: we rely on this to initialize as zeroes */
static char socketLockFile[MAXPGPATH];
+ typedef enum LockFileType
+ {
+ LOCK_FILE_DATA_DIR,
+ LOCK_FILE_COMM_SOCKET,
+ LOCK_FILE_ARCHIVER
+ } LockFileType;
+
/* ----------------------------------------------------------------
* ignoring system indexes support stuff
***************
*** 608,616 ****
/*-------------------------------------------------------------------------
* Interlock-file support
*
! * These routines are used to create both a data-directory lockfile
! * ($DATADIR/postmaster.pid) and a Unix-socket-file lockfile ($SOCKFILE.lock).
! * Both kinds of files contain the same info:
*
* Owning process' PID
* Data directory path
--- 616,627 ----
/*-------------------------------------------------------------------------
* Interlock-file support
*
! * These routines are used to create
! * - a data-directory lockfile ($DATADIR/postmaster.pid)
! * - a Unix-socket-file lockfile ($SOCKFILE.lock)
! * - an archiver lockfile ($DATADIR/pg_xlog/archive_status/archiver.pid)
! *
! * All kinds of files contain the same info:
*
* Owning process' PID
* Data directory path
***************
*** 623,628 ****
--- 634,642 ----
* A data-directory lockfile can optionally contain a third line, containing
* the key and ID for the shared memory block used by this postmaster.
*
+ * We keep a lock file for the archiver because it may outlive the postmaster
+ * and we cannot allow multiple archivers to co-exist.
+ *
* On successful lockfile creation, a proc_exit callback to remove the
* lockfile is automatically created.
*-------------------------------------------------------------------------
***************
*** 655,661 ****
*/
static void
CreateLockFile(const char *filename, bool amPostmaster,
! bool isDDLock, const char *refName)
{
int fd;
char buffer[MAXPGPATH + 100];
--- 669,675 ----
*/
static void
CreateLockFile(const char *filename, bool amPostmaster,
! LockFileType lockFileType, const char *refName, bool checkOnly)
{
int fd;
char buffer[MAXPGPATH + 100];
***************
*** 670,695 ****
* (for example, a non-writable $PGDATA directory might cause a failure
* that won't go away). 100 tries seems like plenty.
*/
for (ntries = 0;; ntries++)
{
! /*
! * Try to create the lock file --- O_EXCL makes this atomic.
! *
! * Think not to make the file protection weaker than 0600. See
! * comments below.
! */
! fd = open(filename, O_RDWR | O_CREAT | O_EXCL, 0600);
! if (fd >= 0)
! break; /* Success; exit the retry loop */
!
! /*
! * Couldn't create the pid file. Probably it already exists.
! */
! if ((errno != EEXIST && errno != EACCES) || ntries > 100)
! ereport(FATAL,
! (errcode_for_file_access(),
! errmsg("could not create lock file \"%s\": %m",
! filename)));
/*
* Read the file to get the old owner's PID. Note race condition
--- 684,713 ----
* (for example, a non-writable $PGDATA directory might cause a failure
* that won't go away). 100 tries seems like plenty.
*/
+ elog(DEBUG2, "attempting to create lock file %s in %s", filename, refName);
for (ntries = 0;; ntries++)
{
! if (!checkOnly)
! {
! /*
! * Try to create the lock file --- O_EXCL makes this atomic.
! *
! * Think not to make the file protection weaker than 0600. See
! * comments below.
! */
! fd = open(filename, O_RDWR | O_CREAT | O_EXCL, 0600);
! if (fd >= 0)
! break; /* Success; exit the retry loop */
!
! /*
! * Couldn't create the pid file. Probably it already exists.
! */
! if ((errno != EEXIST && errno != EACCES) || ntries > 100)
! ereport(FATAL,
! (errcode_for_file_access(),
! errmsg("could not create lock file \"%s\": %m",
! filename)));
! }
/*
* Read the file to get the old owner's PID. Note race condition
***************
*** 698,703 ****
--- 716,723 ----
fd = open(filename, O_RDONLY, 0600);
if (fd < 0)
{
+ if (checkOnly)
+ break;
if (errno == ENOENT)
continue; /* race condition; try again */
ereport(FATAL,
***************
*** 769,789 ****
(errno != ESRCH && errno != EPERM))
{
/* lockfile belongs to a live process */
! ereport(FATAL,
! (errcode(ERRCODE_LOCK_FILE_EXISTS),
! errmsg("lock file \"%s\" already exists",
! filename),
! isDDLock ?
! (encoded_pid < 0 ?
! errhint("Is another postgres (PID %d) running in data directory \"%s\"?",
! (int) other_pid, refName) :
! errhint("Is another postmaster (PID %d) running in data directory \"%s\"?",
! (int) other_pid, refName)) :
! (encoded_pid < 0 ?
! errhint("Is another postgres (PID %d) using socket file \"%s\"?",
! (int) other_pid, refName) :
! errhint("Is another postmaster (PID %d) using socket file \"%s\"?",
! (int) other_pid, refName))));
}
}
--- 789,833 ----
(errno != ESRCH && errno != EPERM))
{
/* lockfile belongs to a live process */
! switch (lockFileType)
! {
! case LOCK_FILE_DATA_DIR:
! ereport(FATAL,
! (errcode(ERRCODE_LOCK_FILE_EXISTS),
! errmsg("lock file \"%s\" already exists",
! filename),
! (encoded_pid < 0 ?
! errhint("Is another postgres (PID %d) running in data directory \"%s\"?",
! (int) other_pid, refName) :
! errhint("Is another postmaster (PID %d) running in data directory \"%s\"?",
! (int) other_pid, refName))));
! break;
! case LOCK_FILE_COMM_SOCKET:
! ereport(FATAL,
! (errcode(ERRCODE_LOCK_FILE_EXISTS),
! errmsg("lock file \"%s\" already exists",
! filename),
! (encoded_pid < 0 ?
! errhint("Is another postgres (PID %d) using socket file \"%s\"?",
! (int) other_pid, refName) :
! errhint("Is another postmaster (PID %d) using socket file \"%s\"?",
! (int) other_pid, refName))));
! break;
! case LOCK_FILE_ARCHIVER:
! ereport(FATAL,
! (errcode(ERRCODE_LOCK_FILE_EXISTS),
! errmsg("lock file \"%s\" already exists",
! filename),
! errhint("Is another archiver (PID %d) reading archive status directory \"%s\"?",
! (int) other_pid, refName)));
! break;
! default:
! ereport(FATAL,
! (errcode(ERRCODE_LOCK_FILE_EXISTS),
! errmsg("lock file type %d does not exist",
! lockFileType)));
! break;
! }
}
}
***************
*** 794,800 ****
* looking to see if there is an associated shmem segment that is
* still in use.
*/
! if (isDDLock)
{
char *ptr;
unsigned long id1,
--- 838,845 ----
* looking to see if there is an associated shmem segment that is
* still in use.
*/
! if (lockFileType == LOCK_FILE_DATA_DIR ||
! lockFileType == LOCK_FILE_ARCHIVER)
{
char *ptr;
unsigned long id1,
***************
*** 838,877 ****
"by hand and try again.")));
}
! /*
! * Successfully created the file, now fill it.
! */
! snprintf(buffer, sizeof(buffer), "%d\n%s\n",
! amPostmaster ? (int) my_pid : -((int) my_pid),
! DataDir);
! errno = 0;
! if (write(fd, buffer, strlen(buffer)) != strlen(buffer))
! {
! int save_errno = errno;
!
! close(fd);
! unlink(filename);
! /* if write didn't set errno, assume problem is no disk space */
! errno = save_errno ? save_errno : ENOSPC;
! ereport(FATAL,
! (errcode_for_file_access(),
! errmsg("could not write lock file \"%s\": %m", filename)));
! }
! if (close(fd))
! {
! int save_errno = errno;
!
! unlink(filename);
! errno = save_errno;
! ereport(FATAL,
! (errcode_for_file_access(),
! errmsg("could not write lock file \"%s\": %m", filename)));
! }
!
! /*
! * Arrange for automatic removal of lockfile at proc_exit.
! */
! on_proc_exit(UnlinkLockFile, PointerGetDatum(strdup(filename)));
}
/*
--- 883,927 ----
"by hand and try again.")));
}
! if (!checkOnly)
! {
! /*
! * Successfully created the file, now fill it.
! */
! snprintf(buffer, sizeof(buffer), "%d\n%s\n",
! amPostmaster ? (int) my_pid : -((int) my_pid),
! DataDir);
! errno = 0;
! if (write(fd, buffer, strlen(buffer)) != strlen(buffer))
! {
! int save_errno = errno;
!
! close(fd);
! unlink(filename);
! /* if write didn't set errno, assume problem is no disk space */
! errno = save_errno ? save_errno : ENOSPC;
! ereport(FATAL,
! (errcode_for_file_access(),
! errmsg("could not write lock file \"%s\": %m", filename)));
! }
! if (close(fd))
! {
! int save_errno = errno;
!
! unlink(filename);
! errno = save_errno;
! ereport(FATAL,
! (errcode_for_file_access(),
! errmsg("could not write lock file \"%s\": %m", filename)));
! }
!
! /*
! * Arrange for automatic removal of lockfile at proc_exit.
! */
! if (lockFileType == LOCK_FILE_DATA_DIR ||
! lockFileType == LOCK_FILE_COMM_SOCKET)
! on_proc_exit(UnlinkLockFile, PointerGetDatum(strdup(filename)));
! }
}
/*
***************
*** 884,890 ****
void
CreateDataDirLockFile(bool amPostmaster)
{
! CreateLockFile(DIRECTORY_LOCK_FILE, amPostmaster, true, DataDir);
}
/*
--- 934,941 ----
void
CreateDataDirLockFile(bool amPostmaster)
{
! CreateLockFile(DIRECTORY_LOCK_FILENAME, amPostmaster, LOCK_FILE_DATA_DIR,
! DataDir, false);
}
/*
***************
*** 896,906 ****
char lockfile[MAXPGPATH];
snprintf(lockfile, sizeof(lockfile), "%s.lock", socketfile);
! CreateLockFile(lockfile, amPostmaster, false, socketfile);
/* Save name of lockfile for TouchSocketLockFile */
strcpy(socketLockFile, lockfile);
}
/*
* TouchSocketLockFile -- mark socket lock file as recently accessed
*
--- 947,981 ----
char lockfile[MAXPGPATH];
snprintf(lockfile, sizeof(lockfile), "%s.lock", socketfile);
! CreateLockFile(lockfile, amPostmaster, LOCK_FILE_COMM_SOCKET,
! socketfile, false);
/* Save name of lockfile for TouchSocketLockFile */
strcpy(socketLockFile, lockfile);
}
+ void
+ CreateArchiverLockFile(const char *archiveDir, bool amPostmaster, bool checkOnly)
+ {
+ char lockfile[MAXPGPATH];
+
+ /*
+ * Use relative names since we are already in DataDir
+ */
+ snprintf(lockfile, sizeof(lockfile), "%s/" ARCHIVER_LOCK_FILENAME, archiveDir);
+ CreateLockFile(lockfile, amPostmaster, LOCK_FILE_ARCHIVER,
+ archiveDir, checkOnly);
+ }
+
+ void
+ UnlinkArchiverLockFile(const char *archiveDir)
+ {
+ char lockfile[MAXPGPATH];
+
+ snprintf(lockfile, sizeof(lockfile), "%s/" ARCHIVER_LOCK_FILENAME, archiveDir);
+ unlink(lockfile);
+ }
+
+
/*
* TouchSocketLockFile -- mark socket lock file as recently accessed
*
***************
*** 959,971 ****
char *ptr;
char buffer[BLCKSZ];
! fd = open(DIRECTORY_LOCK_FILE, O_RDWR | PG_BINARY, 0);
if (fd < 0)
{
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m",
! DIRECTORY_LOCK_FILE)));
return;
}
len = read(fd, buffer, sizeof(buffer) - 100);
--- 1034,1046 ----
char *ptr;
char buffer[BLCKSZ];
! fd = open(DIRECTORY_LOCK_FILENAME, O_RDWR | PG_BINARY, 0);
if (fd < 0)
{
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m",
! DIRECTORY_LOCK_FILENAME)));
return;
}
len = read(fd, buffer, sizeof(buffer) - 100);
***************
*** 974,980 ****
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not read from file \"%s\": %m",
! DIRECTORY_LOCK_FILE)));
close(fd);
return;
}
--- 1049,1055 ----
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not read from file \"%s\": %m",
! DIRECTORY_LOCK_FILENAME)));
close(fd);
return;
}
***************
*** 987,993 ****
if (ptr == NULL ||
(ptr = strchr(ptr + 1, '\n')) == NULL)
{
! elog(LOG, "bogus data in \"%s\"", DIRECTORY_LOCK_FILE);
close(fd);
return;
}
--- 1062,1068 ----
if (ptr == NULL ||
(ptr = strchr(ptr + 1, '\n')) == NULL)
{
! elog(LOG, "bogus data in \"%s\"", DIRECTORY_LOCK_FILENAME);
close(fd);
return;
}
***************
*** 1014,1020 ****
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m",
! DIRECTORY_LOCK_FILE)));
close(fd);
return;
}
--- 1089,1095 ----
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m",
! DIRECTORY_LOCK_FILENAME)));
close(fd);
return;
}
***************
*** 1023,1029 ****
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m",
! DIRECTORY_LOCK_FILE)));
}
}
--- 1098,1104 ----
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m",
! DIRECTORY_LOCK_FILENAME)));
}
}
Index: src/include/miscadmin.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/miscadmin.h,v
retrieving revision 1.186
diff -c -r1.186 miscadmin.h
*** src/include/miscadmin.h 5 Mar 2006 15:58:53 -0000 1.186
--- src/include/miscadmin.h 22 May 2006 16:06:03 -0000
***************
*** 313,318 ****
--- 313,321 ----
extern bool ReindexIsProcessingIndex(Oid indexOid);
extern void CreateDataDirLockFile(bool amPostmaster);
extern void CreateSocketLockFile(const char *socketfile, bool amPostmaster);
+ extern void CreateArchiverLockFile(const char *archiveDir, bool amPostmaster, bool checkOnly);
+ extern void UnlinkArchiverLockFile(const char *archiveDir);
+
extern void TouchSocketLockFile(void);
extern void RecordSharedMemoryInLockFile(unsigned long id1,
unsigned long id2);
Index: src/include/access/xlog_internal.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/access/xlog_internal.h,v
retrieving revision 1.13
diff -c -r1.13 xlog_internal.h
*** src/include/access/xlog_internal.h 5 Apr 2006 03:34:05 -0000 1.13
--- src/include/access/xlog_internal.h 22 May 2006 16:06:03 -0000
***************
*** 190,195 ****
--- 190,201 ----
* The XLog directory and control file (relative to $PGDATA)
*/
#define XLOGDIR "pg_xlog"
+
+ /*
+ * Definition must also be copied to miscinit.c
+ */
+ #define XLOGSTATUS "pg_xlog/archive_status"
+
#define XLOG_CONTROL_FILE "global/pg_control"
/*
***************
*** 211,217 ****
snprintf(path, MAXPGPATH, XLOGDIR "/%08X.history", tli)
#define StatusFilePath(path, xlog, suffix) \
! snprintf(path, MAXPGPATH, XLOGDIR "/archive_status/%s%s", xlog, suffix)
#define BackupHistoryFileName(fname, tli, log, seg, offset) \
snprintf(fname, MAXFNAMELEN, "%08X%08X%08X.%08X.backup", tli, log, seg, offset)
--- 217,223 ----
snprintf(path, MAXPGPATH, XLOGDIR "/%08X.history", tli)
#define StatusFilePath(path, xlog, suffix) \
! snprintf(path, MAXPGPATH, XLOGSTATUS "/%s%s", xlog, suffix)
#define BackupHistoryFileName(fname, tli, log, seg, offset) \
snprintf(fname, MAXFNAMELEN, "%08X%08X%08X.%08X.backup", tli, log, seg, offset)
Index: src/include/postmaster/pgarch.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/postmaster/pgarch.h,v
retrieving revision 1.4
diff -c -r1.4 pgarch.h
*** src/include/postmaster/pgarch.h 5 Mar 2006 15:58:58 -0000 1.4
--- src/include/postmaster/pgarch.h 22 May 2006 16:06:04 -0000
***************
*** 17,23 ****
* Functions called from postmaster
* ----------
*/
! extern int pgarch_start(void);
#ifdef EXEC_BACKEND
extern void PgArchiverMain(int argc, char *argv[]);
--- 17,23 ----
* Functions called from postmaster
* ----------
*/
! extern int pgarch_start(int previousArchPid);
#ifdef EXEC_BACKEND
extern void PgArchiverMain(int argc, char *argv[]);
---------------------------(end of broadcast)---------------------------
TIP 1: if posting/reading through Usenet, please send an appropriate
subscribe-nomail command to [EMAIL PROTECTED] so that your
message can get through to the mailing list cleanly