Alvaro Herrera wrote:
> Alvaro Herrera wrote:
>
> > Turns out that this problem is not serious at all, because if that
> > palloc() fails, the whole postmaster will exit with a FATAL out of
> > memory message.
> >
> > The problems in the worker code after fork are still an issue though.
>
> It _is_ still an issue -- and a very serious issue at that. If a worker
> fails before getting its entry from the startingWorker pointer, then
> (patched) autovacuum will no longer run any task.
I figured that I could keep the old check there for when the worker
failed, but still add the new signalling mechanism so that a fork()
failure (which I would think is the most likely of all) is taken care of
in a more timely manner.
I've also improved the rest of the code and comments a bit, and the new
code does seem better now. So I'll go ahead and commit it later today.
--
Alvaro Herrera http://www.flickr.com/photos/alvherre/
"Uno puede defenderse de los ataques; contra los elogios se esta indefenso"
Index: src/backend/postmaster/autovacuum.c
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/postmaster/autovacuum.c,v
retrieving revision 1.50
diff -c -p -r1.50 autovacuum.c
*** src/backend/postmaster/autovacuum.c 13 Jun 2007 21:24:55 -0000 1.50
--- src/backend/postmaster/autovacuum.c 20 Jun 2007 17:28:22 -0000
***************
*** 4,9 ****
--- 4,45 ----
*
* PostgreSQL Integrated Autovacuum Daemon
*
+ * The autovacuum system is structured in two different kinds of processes: the
+ * autovacuum launcher and the autovacuum worker. The launcher is an
+ * always-running process, started by the postmaster when the autovacuum GUC
+ * parameter is set. The launcher schedules autovacuum workers to be started
+ * when appropriate. The workers are the processes which execute the actual
+ * vacuuming; they connect to a database as determined in the launcher, and
+ * once connected they examine the catalogs to select the tables to vacuum.
+ *
+ * The autovacuum launcher cannot start the worker processes by itself,
+ * because doing so would cause robustness issues (namely, failure to shut
+ * them down on exceptional conditions; and apart from that, since the launcher
+ * is connected to shared memory and is thus subject to corruption there, it is
+ * not as robust as the postmaster which is not). So it leaves that task to
+ * the postmaster.
+ *
+ * There is an autovacuum shared memory area, where the launcher stores
+ * information about the database it wants vacuumed. When it wants a new
+ * worker to start, it sets a flag in shared memory and sends a special signal
+ * to the postmaster. Then postmaster knows nothing more than it must start a
+ * worker; so it forks a new child, which turns into a worker and examines
+ * shared memory to know what database it must connect to.
+ *
+ * If the fork() call fails, the postmaster sets a flag in the shared memory
+ * area, and it sends a signal to the launcher. The launcher, upon noticing
+ * the flag, can try starting the worker again. Note that the failure can only
+ * be transient (fork failure due to high load, memory pressure, too many
+ * processes, etc); more permanent problems, like failure to connect to a
+ * database, are detected later in the worker and dealt with in a different
+ * way.
+ *
+ * When the worker is done vacuuming it sends SIGUSR1 to the launcher. The
+ * launcher then wakes up and is able to launch another worker, if the schedule
+ * is too tight. Otherwise it will go back to sleeping and awake normally
+ * according to schedule. At this time the launcher can also balance the
+ * settings for the various remaining workers' cost-based vacuum delay feature.
+ *
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*************** typedef struct WorkerInfoData
*** 158,185 ****
typedef struct WorkerInfoData *WorkerInfo;
/*-------------
* The main autovacuum shmem struct. On shared memory we store this main
* struct and the array of WorkerInfo structs. This struct keeps:
*
* av_launcherpid the PID of the autovacuum launcher
* av_freeWorkers the WorkerInfo freelist
* av_runningWorkers the WorkerInfo non-free queue
* av_startingWorker pointer to WorkerInfo currently being started (cleared by
* the worker itself as soon as it's up and running)
- * av_rebalance true when a worker determines that cost limits must be
- * rebalanced
*
! * This struct is protected by AutovacuumLock.
*-------------
*/
typedef struct
{
pid_t av_launcherpid;
SHMEM_OFFSET av_freeWorkers;
SHM_QUEUE av_runningWorkers;
SHMEM_OFFSET av_startingWorker;
- bool av_rebalance;
} AutoVacuumShmemStruct;
static AutoVacuumShmemStruct *AutoVacuumShmem;
--- 194,233 ----
typedef struct WorkerInfoData *WorkerInfo;
+ /*
+ * Possible signals received by the launcher from remote processes. These are
+ * stored atomically in shared memory so that other processes can set them
+ * without locking.
+ */
+ typedef enum
+ {
+ AutoVacForkFailed, /* failed trying to start a worker */
+ AutoVacRebalance, /* rebalance the cost limits */
+ AutoVacNumSignals = AutoVacRebalance /* must be last */
+ } AutoVacuumSignal;
+
/*-------------
* The main autovacuum shmem struct. On shared memory we store this main
* struct and the array of WorkerInfo structs. This struct keeps:
*
+ * av_signal set by other processes to indicate various conditions
* av_launcherpid the PID of the autovacuum launcher
* av_freeWorkers the WorkerInfo freelist
* av_runningWorkers the WorkerInfo non-free queue
* av_startingWorker pointer to WorkerInfo currently being started (cleared by
* the worker itself as soon as it's up and running)
*
! * This struct is protected by AutovacuumLock, except for av_signal and parts
! * of the worker list (see above).
*-------------
*/
typedef struct
{
+ sig_atomic_t av_signal[AutoVacNumSignals];
pid_t av_launcherpid;
SHMEM_OFFSET av_freeWorkers;
SHM_QUEUE av_runningWorkers;
SHMEM_OFFSET av_startingWorker;
} AutoVacuumShmemStruct;
static AutoVacuumShmemStruct *AutoVacuumShmem;
*************** StartAutoVacLauncher(void)
*** 316,344 ****
/*
* Main loop for the autovacuum launcher process.
- *
- * The signalling between launcher and worker is as follows:
- *
- * When the worker has finished starting up, it stores its PID in wi_workerpid
- * and sends a SIGUSR1 signal to the launcher. The launcher then knows that
- * the postmaster is ready to start a new worker. We do it this way because
- * otherwise we risk calling SendPostmasterSignal() when the postmaster hasn't
- * yet processed the last one, in which case the second signal would be lost.
- * This is only useful when two workers need to be started close to one
- * another, which should be rare but it's possible.
- *
- * When a worker exits, it resets the WorkerInfo struct and puts it back into
- * the free list. If there is no free worker slot, it will also signal the
- * launcher, which then wakes up and can launch a new worker if it needs to.
- * Note that we only need to do it when there's no free worker slot, because
- * otherwise there is no need -- the launcher would be awakened normally per
- * schedule.
- *
- * There is a potential problem if, for some reason, a worker starts and is not
- * able to bootstrap itself correctly. To prevent this situation from starving
- * the whole system, the launcher checks the launch time of the "starting
- * worker". If it's too old (older than autovacuum_naptime seconds), it resets
- * the worker entry and puts it back into the free list.
*/
NON_EXEC_STATIC void
AutoVacLauncherMain(int argc, char *argv[])
--- 364,369 ----
*************** AutoVacLauncherMain(int argc, char *argv
*** 547,552 ****
--- 572,581 ----
got_SIGHUP = false;
ProcessConfigFile(PGC_SIGHUP);
+ /* shutdown requested in config file */
+ if (!autovacuum_start_daemon)
+ break;
+
/* rebalance in case the default cost parameters changed */
LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
autovac_balance_cost();
*************** AutoVacLauncherMain(int argc, char *argv
*** 556,574 ****
rebuild_database_list(InvalidOid);
}
! /* a worker started up or finished */
if (got_SIGUSR1)
{
got_SIGUSR1 = false;
/* rebalance cost limits, if needed */
! if (AutoVacuumShmem->av_rebalance)
{
LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
! AutoVacuumShmem->av_rebalance = false;
autovac_balance_cost();
LWLockRelease(AutovacuumLock);
}
}
/*
--- 585,624 ----
rebuild_database_list(InvalidOid);
}
! /*
! * a worker finished, or postmaster signalled failure to start a
! * worker
! */
if (got_SIGUSR1)
{
got_SIGUSR1 = false;
/* rebalance cost limits, if needed */
! if (AutoVacuumShmem->av_signal[AutoVacRebalance])
{
LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
! AutoVacuumShmem->av_signal[AutoVacRebalance] = false;
autovac_balance_cost();
LWLockRelease(AutovacuumLock);
}
+
+ if (AutoVacuumShmem->av_signal[AutoVacForkFailed])
+ {
+ /*
+ * If the postmaster failed to start a new worker, we sleep
+ * for a little while and resend the signal. The new worker's
+ * state is still in memory, so this is sufficient. After
+ * that, we restart the main loop.
+ *
+ * XXX should we put a limit to the number of times we retry?
+ * I don't think it makes much sense, because a future start
+ * of a worker will continue to fail in the same way.
+ */
+ AutoVacuumShmem->av_signal[AutoVacForkFailed] = false;
+ pg_usleep(100000L); /* 100ms */
+ SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER);
+ continue;
+ }
}
/*
*************** AutoVacLauncherMain(int argc, char *argv
*** 578,606 ****
* worker is still starting up.
*/
LWLockAcquire(AutovacuumLock, LW_SHARED);
can_launch = (AutoVacuumShmem->av_freeWorkers != INVALID_OFFSET);
! if (can_launch && AutoVacuumShmem->av_startingWorker != INVALID_OFFSET)
{
! WorkerInfo worker = (WorkerInfo) MAKE_PTR(AutoVacuumShmem->av_startingWorker);
! if (current_time == 0)
! current_time = GetCurrentTimestamp();
/*
* We can't launch another worker when another one is still
* starting up, so just sleep for a bit more; that worker will wake
* us up again as soon as it's ready. We will only wait
! * autovacuum_naptime seconds for this to happen however. Note
! * that failure to connect to a particular database is not a
! * problem here, because the worker removes itself from the
! * startingWorker pointer before trying to connect; only low-level
! * problems, like fork() failure, can get us here.
*/
if (TimestampDifferenceExceeds(worker->wi_launchtime, current_time,
! autovacuum_naptime * 1000))
{
LWLockRelease(AutovacuumLock);
LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
--- 628,661 ----
* worker is still starting up.
*/
+ current_time = GetCurrentTimestamp();
LWLockAcquire(AutovacuumLock, LW_SHARED);
can_launch = (AutoVacuumShmem->av_freeWorkers != INVALID_OFFSET);
! if (AutoVacuumShmem->av_startingWorker != INVALID_OFFSET)
{
! int waittime;
! WorkerInfo worker = (WorkerInfo) MAKE_PTR(AutoVacuumShmem->av_startingWorker);
/*
* We can't launch another worker when another one is still
* starting up, so just sleep for a bit more; that worker will wake
* us up again as soon as it's ready. We will only wait
! * autovacuum_naptime seconds (up to a maximum of 60 seconds) for
! * this to happen however. Note that failure to connect to a
! * particular database is not a problem here, because the worker
! * removes itself from the startingWorker pointer before trying to
! * connect. Problems detected by the postmaster (like fork()
! * failure) are also reported and handled differently. The only
! * problems that may cause this code to fire are errors in the
! * earlier sections of AutoVacWorkerMain, before the worker removes
! * the WorkerInfo from the startingWorker pointer.
*/
+ waittime = Min(autovacuum_naptime, 60) * 1000;
if (TimestampDifferenceExceeds(worker->wi_launchtime, current_time,
! waittime))
{
LWLockRelease(AutovacuumLock);
LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
*************** AutoVacLauncherMain(int argc, char *argv
*** 620,650 ****
worker->wi_links.next = AutoVacuumShmem->av_freeWorkers;
AutoVacuumShmem->av_freeWorkers = MAKE_OFFSET(worker);
AutoVacuumShmem->av_startingWorker = INVALID_OFFSET;
}
}
else
- {
- /*
- * maybe the postmaster neglected this start signal --
- * resend it. Note: the constraints in
- * launcher_determine_sleep keep us from delivering signals too
- * quickly (at most once every 100ms).
- */
- SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER);
can_launch = false;
- }
}
LWLockRelease(AutovacuumLock); /* either shared or exclusive */
if (can_launch)
{
Dlelem *elem;
elem = DLGetTail(DatabaseList);
- if (current_time == 0)
- current_time = GetCurrentTimestamp();
-
if (elem != NULL)
{
avl_dbase *avdb = DLE_VAL(elem);
--- 675,695 ----
worker->wi_links.next = AutoVacuumShmem->av_freeWorkers;
AutoVacuumShmem->av_freeWorkers = MAKE_OFFSET(worker);
AutoVacuumShmem->av_startingWorker = INVALID_OFFSET;
+ elog(WARNING, "worker took too long to start; cancelled");
}
}
else
can_launch = false;
}
LWLockRelease(AutovacuumLock); /* either shared or exclusive */
+ /* We're OK to start a new worker */
if (can_launch)
{
Dlelem *elem;
elem = DLGetTail(DatabaseList);
if (elem != NULL)
{
avl_dbase *avdb = DLE_VAL(elem);
*************** launch_worker(TimestampTz now)
*** 1197,1202 ****
--- 1242,1258 ----
}
}
+ /*
+ * Called from postmaster to signal a failure to fork a process to become
+ * worker. The postmaster should kill(SIGUSR1) the launcher shortly
+ * after calling this function.
+ */
+ void
+ AutoVacWorkerFailed(void)
+ {
+ AutoVacuumShmem->av_signal[AutoVacForkFailed] = true;
+ }
+
/* SIGHUP: set flag to re-read config file at next convenient time */
static void
avl_sighup_handler(SIGNAL_ARGS)
*************** AutoVacWorkerMain(int argc, char *argv[]
*** 1435,1442 ****
LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
/*
! * beware of startingWorker being INVALID; this could happen if the
! * launcher thinks we've taking too long to start.
*/
if (AutoVacuumShmem->av_startingWorker != INVALID_OFFSET)
{
--- 1491,1499 ----
LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
/*
! * beware of startingWorker being INVALID; this should normally not happen,
! * but if a worker fails after forking and before this, the launcher might
! * have decided to remove it from the queue and start again.
*/
if (AutoVacuumShmem->av_startingWorker != INVALID_OFFSET)
{
*************** AutoVacWorkerMain(int argc, char *argv[]
*** 1463,1468 ****
--- 1520,1526 ----
else
{
/* no worker entry for me, go away */
+ elog(WARNING, "autovacuum worker started without a worker entry");
dbid = InvalidOid;
LWLockRelease(AutovacuumLock);
}
*************** FreeWorkerInfo(int code, Datum arg)
*** 1525,1543 ****
LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
/*
! * If this worker shuts down when there is no free worker slot, wake
! * the launcher up so that he can launch a new worker immediately if
! * required. We only save the launcher's PID in local memory here --
! * the actual signal will be sent when the PGPROC is recycled, because
! * that is when the new worker can actually be launched.
*
* We somewhat ignore the risk that the launcher changes its PID
* between we reading it and the actual kill; we expect ProcKill to be
* called shortly after us, and we assume that PIDs are not reused too
* quickly after a process exits.
*/
! if (AutoVacuumShmem->av_freeWorkers == INVALID_OFFSET)
! AutovacuumLauncherPid = AutoVacuumShmem->av_launcherpid;
SHMQueueDelete(&MyWorkerInfo->wi_links);
MyWorkerInfo->wi_links.next = AutoVacuumShmem->av_freeWorkers;
--- 1583,1600 ----
LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
/*
! * Wake the launcher up so that he can launch a new worker immediately
! * if required. We only save the launcher's PID in local memory here;
! * the actual signal will be sent when the PGPROC is recycled. Note
! * that we always do this, so that the launcher can rebalance the cost
! * limit setting of the remaining workers.
*
* We somewhat ignore the risk that the launcher changes its PID
* between we reading it and the actual kill; we expect ProcKill to be
* called shortly after us, and we assume that PIDs are not reused too
* quickly after a process exits.
*/
! AutovacuumLauncherPid = AutoVacuumShmem->av_launcherpid;
SHMQueueDelete(&MyWorkerInfo->wi_links);
MyWorkerInfo->wi_links.next = AutoVacuumShmem->av_freeWorkers;
*************** FreeWorkerInfo(int code, Datum arg)
*** 1556,1562 ****
* now that we're inactive, cause a rebalancing of the surviving
* workers
*/
! AutoVacuumShmem->av_rebalance = true;
LWLockRelease(AutovacuumLock);
}
}
--- 1613,1619 ----
* now that we're inactive, cause a rebalancing of the surviving
* workers
*/
! AutoVacuumShmem->av_signal[AutoVacRebalance] = true;
LWLockRelease(AutovacuumLock);
}
}
Index: src/backend/postmaster/postmaster.c
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/postmaster/postmaster.c,v
retrieving revision 1.527
diff -c -p -r1.527 postmaster.c
*** src/backend/postmaster/postmaster.c 22 Mar 2007 19:53:30 -0000 1.527
--- src/backend/postmaster/postmaster.c 20 Jun 2007 15:44:31 -0000
*************** StartAutovacuumWorker(void)
*** 3830,3861 ****
return;
bn = (Backend *) malloc(sizeof(Backend));
! if (!bn)
{
! ereport(LOG,
! (errcode(ERRCODE_OUT_OF_MEMORY),
! errmsg("out of memory")));
! return;
! }
!
! bn->pid = StartAutoVacWorker();
! bn->is_autovacuum = true;
! /* we don't need a cancel key */
! if (bn->pid > 0)
! {
! DLAddHead(BackendList, DLNewElem(bn));
#ifdef EXEC_BACKEND
! ShmemBackendArrayAdd(bn);
#endif
! }
! else
! {
! /* not much we can do */
! ereport(LOG,
! (errmsg("could not fork new process for autovacuum: %m")));
free(bn);
}
}
/*
--- 3830,3864 ----
return;
bn = (Backend *) malloc(sizeof(Backend));
! if (bn)
{
! bn->pid = StartAutoVacWorker();
! bn->is_autovacuum = true;
! /* we don't need a cancel key */
! if (bn->pid > 0)
! {
! DLAddHead(BackendList, DLNewElem(bn));
#ifdef EXEC_BACKEND
! ShmemBackendArrayAdd(bn);
#endif
! /* all OK */
! return;
! }
!
! /*
! * fork failed, fall through to report -- actual error message was
! * logged by StartAutoVacWorker
! */
free(bn);
}
+ else
+ elog(LOG, "out of memory");
+
+ /* report the failure to the launcher */
+ AutoVacWorkerFailed();
+ if (AutoVacPID != 0)
+ kill(AutoVacPID, SIGUSR1);
}
/*
Index: src/include/postmaster/autovacuum.h
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/include/postmaster/autovacuum.h,v
retrieving revision 1.10
diff -c -p -r1.10 autovacuum.h
*** src/include/postmaster/autovacuum.h 18 Apr 2007 16:44:18 -0000 1.10
--- src/include/postmaster/autovacuum.h 18 Jun 2007 18:37:30 -0000
*************** extern bool IsAutoVacuumWorkerProcess(vo
*** 42,47 ****
--- 42,49 ----
extern void autovac_init(void);
extern int StartAutoVacLauncher(void);
extern int StartAutoVacWorker(void);
+ /* called from postmaster when a worker could not be forked */
+ extern void AutoVacWorkerFailed(void);
/* autovacuum cost-delay balancer */
extern void AutoVacuumUpdateDelay(void);
---------------------------(end of broadcast)---------------------------
TIP 4: Have you searched our list archives?
http://archives.postgresql.org