On Thu, Sep 26, 2019 at 9:48 PM Alvaro Herrera <alvhe...@2ndquadrant.com> wrote:
> On 2019-Aug-26, Magnus Hagander wrote: > > > OK, let's try this again :) > > > > This is work mainly based in the first version of the online checksums > > patch, but based on top of Andres WIP patchset for global barriers ( > > > https://www.postgresql.org/message-id/20181030051643.elbxjww5jjgnjaxg%40alap3.anarazel.de > > ) > > > > Andres patch has been enhanced with wait events per > > > https://www.postgresql.org/message-id/CABUevEwy4LUFqePC5YzanwtzyDDpYvgrj6R5WNznwrO5ouVg1w%40mail.gmail.com > > . > > Travis says your SGML doesn't compile (maybe you just forgot to "git > add" and edit allfiles.sgml?): > Nope, even easier -- the reference pgVerifyChecksums was renamed to pgChecksums and for some reason we missed that in the merge. I've rebased again on top of todays master, but that was the only change I had to make. Other than bots, this patch doesn't seem to have attracted any reviewers > this time around. Perhaps you need to bribe someone? (Maybe "how sad > your committer SSH key stopped working" would do?) > Hmm. I don't think that's a bribe, that's a threat. However, maybe it will work. -- Magnus Hagander Me: https://www.hagander.net/ <http://www.hagander.net/> Work: https://www.redpill-linpro.com/ <http://www.redpill-linpro.com/>
From a52a74193015c8701bf40bb87321dfd56e0dab76 Mon Sep 17 00:00:00 2001 From: Andres Freund <and...@anarazel.de> Date: Mon, 29 Oct 2018 10:14:15 -0700 Subject: [PATCH 1/2] WIP: global barriers This is a squash of three patches from Andres: * Use procsignal_sigusr1_handler for all shmem connected bgworkers. * Use procsignal_sigusr1_handler in all auxiliary processes. * WIP: global barriers. And one from Magnus: * Wait event for global barriers --- src/backend/postmaster/autovacuum.c | 3 +- src/backend/postmaster/bgworker.c | 31 +++++--- src/backend/postmaster/bgwriter.c | 24 ++---- src/backend/postmaster/checkpointer.c | 19 ++--- src/backend/postmaster/pgstat.c | 3 + src/backend/postmaster/startup.c | 18 ++--- src/backend/postmaster/walwriter.c | 17 +--- src/backend/replication/walreceiver.c | 20 +---- src/backend/storage/buffer/bufmgr.c | 4 + src/backend/storage/ipc/procsignal.c | 141 ++++++++++++++++++++++++++++++++++ src/backend/storage/lmgr/proc.c | 20 +++++ src/backend/tcop/postgres.c | 7 ++ src/include/pgstat.h | 1 + src/include/storage/proc.h | 9 +++ src/include/storage/procsignal.h | 23 +++++- 15 files changed, 255 insertions(+), 85 deletions(-) diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 073f313337..24e28dd3a3 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -649,8 +649,9 @@ AutoVacLauncherMain(int argc, char *argv[]) ResetLatch(MyLatch); - /* Process sinval catchup interrupts that happened while sleeping */ + /* Process pending interrupts. */ ProcessCatchupInterrupt(); + ProcessGlobalBarrierIntterupt(); /* the normal shutdown case */ if (got_SIGTERM) diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c index b66b517aca..f300f9285b 100644 --- a/src/backend/postmaster/bgworker.c +++ b/src/backend/postmaster/bgworker.c @@ -734,23 +734,32 @@ StartBackgroundWorker(void) /* * Set up signal handlers. */ + + + /* + * SIGINT is used to signal canceling the current action for processes + * able to run queries. + */ if (worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION) - { - /* - * SIGINT is used to signal canceling the current action - */ pqsignal(SIGINT, StatementCancelHandler); - pqsignal(SIGUSR1, procsignal_sigusr1_handler); - pqsignal(SIGFPE, FloatExceptionHandler); - - /* XXX Any other handlers needed here? */ - } else - { pqsignal(SIGINT, SIG_IGN); + + /* + * Everything with a PGPROC should be able to receive procsignal.h style + * signals. + */ + if (worker->bgw_flags & (BGWORKER_BACKEND_DATABASE_CONNECTION | + BGWORKER_SHMEM_ACCESS)) + pqsignal(SIGUSR1, procsignal_sigusr1_handler); + else pqsignal(SIGUSR1, bgworker_sigusr1_handler); + + if (worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION) + pqsignal(SIGFPE, FloatExceptionHandler); + else pqsignal(SIGFPE, SIG_IGN); - } + pqsignal(SIGTERM, bgworker_die); pqsignal(SIGHUP, SIG_IGN); diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index 8ec16a3fb8..80a8e3cf4b 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -51,6 +51,7 @@ #include "storage/ipc.h" #include "storage/lwlock.h" #include "storage/proc.h" +#include "storage/procsignal.h" #include "storage/shmem.h" #include "storage/smgr.h" #include "storage/spin.h" @@ -97,7 +98,6 @@ static volatile sig_atomic_t shutdown_requested = false; static void bg_quickdie(SIGNAL_ARGS); static void BgSigHupHandler(SIGNAL_ARGS); static void ReqShutdownHandler(SIGNAL_ARGS); -static void bgwriter_sigusr1_handler(SIGNAL_ARGS); /* @@ -115,10 +115,7 @@ BackgroundWriterMain(void) WritebackContext wb_context; /* - * Properly accept or ignore signals the postmaster might send us. - * - * bgwriter doesn't participate in ProcSignal signalling, but a SIGUSR1 - * handler is still needed for latch wakeups. + * Properly accept or ignore signals that might be sent to us. */ pqsignal(SIGHUP, BgSigHupHandler); /* set flag to read config file */ pqsignal(SIGINT, SIG_IGN); @@ -126,7 +123,7 @@ BackgroundWriterMain(void) pqsignal(SIGQUIT, bg_quickdie); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); - pqsignal(SIGUSR1, bgwriter_sigusr1_handler); + pqsignal(SIGUSR1, procsignal_sigusr1_handler); pqsignal(SIGUSR2, SIG_IGN); /* @@ -261,6 +258,10 @@ BackgroundWriterMain(void) proc_exit(0); /* done */ } + /* Process all pending interrupts. */ + if (GlobalBarrierInterruptPending) + ProcessGlobalBarrierIntterupt(); + /* * Do one cycle of dirty-buffer writing. */ @@ -428,14 +429,3 @@ ReqShutdownHandler(SIGNAL_ARGS) errno = save_errno; } - -/* SIGUSR1: used for latch wakeups */ -static void -bgwriter_sigusr1_handler(SIGNAL_ARGS) -{ - int save_errno = errno; - - latch_sigusr1_handler(); - - errno = save_errno; -} diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 61544f65ad..def9aa87d8 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -54,6 +54,7 @@ #include "storage/ipc.h" #include "storage/lwlock.h" #include "storage/proc.h" +#include "storage/procsignal.h" #include "storage/shmem.h" #include "storage/smgr.h" #include "storage/spin.h" @@ -179,7 +180,6 @@ static void UpdateSharedMemoryConfig(void); static void chkpt_quickdie(SIGNAL_ARGS); static void ChkptSigHupHandler(SIGNAL_ARGS); static void ReqCheckpointHandler(SIGNAL_ARGS); -static void chkpt_sigusr1_handler(SIGNAL_ARGS); static void ReqShutdownHandler(SIGNAL_ARGS); @@ -211,7 +211,7 @@ CheckpointerMain(void) pqsignal(SIGQUIT, chkpt_quickdie); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); - pqsignal(SIGUSR1, chkpt_sigusr1_handler); + pqsignal(SIGUSR1, procsignal_sigusr1_handler); pqsignal(SIGUSR2, ReqShutdownHandler); /* request shutdown */ /* @@ -346,6 +346,10 @@ CheckpointerMain(void) /* Clear any already-pending wakeups */ ResetLatch(MyLatch); + /* Process all pending interrupts. */ + if (GlobalBarrierInterruptPending) + ProcessGlobalBarrierIntterupt(); + /* * Process any requests or signals received recently. */ @@ -853,17 +857,6 @@ ReqCheckpointHandler(SIGNAL_ARGS) errno = save_errno; } -/* SIGUSR1: used for latch wakeups */ -static void -chkpt_sigusr1_handler(SIGNAL_ARGS) -{ - int save_errno = errno; - - latch_sigusr1_handler(); - - errno = save_errno; -} - /* SIGUSR2: set flag to run a shutdown checkpoint and exit */ static void ReqShutdownHandler(SIGNAL_ARGS) diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 011076c3e3..819381a2ae 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -3765,6 +3765,9 @@ pgstat_get_wait_ipc(WaitEventIPC w) case WAIT_EVENT_EXECUTE_GATHER: event_name = "ExecuteGather"; break; + case WAIT_EVENT_GLOBAL_BARRIER: + event_name = "GlobalBarrier"; + break; case WAIT_EVENT_HASH_BATCH_ALLOCATING: event_name = "Hash/Batch/Allocating"; break; diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c index 5048a2c2aa..da0a670bdf 100644 --- a/src/backend/postmaster/startup.c +++ b/src/backend/postmaster/startup.c @@ -30,6 +30,7 @@ #include "storage/ipc.h" #include "storage/latch.h" #include "storage/pmsignal.h" +#include "storage/procsignal.h" #include "storage/standby.h" #include "utils/guc.h" #include "utils/timeout.h" @@ -50,7 +51,6 @@ static volatile sig_atomic_t in_restore_command = false; /* Signal handlers */ static void startupproc_quickdie(SIGNAL_ARGS); -static void StartupProcSigUsr1Handler(SIGNAL_ARGS); static void StartupProcTriggerHandler(SIGNAL_ARGS); static void StartupProcSigHupHandler(SIGNAL_ARGS); @@ -87,17 +87,6 @@ startupproc_quickdie(SIGNAL_ARGS) } -/* SIGUSR1: let latch facility handle the signal */ -static void -StartupProcSigUsr1Handler(SIGNAL_ARGS) -{ - int save_errno = errno; - - latch_sigusr1_handler(); - - errno = save_errno; -} - /* SIGUSR2: set flag to finish recovery */ static void StartupProcTriggerHandler(SIGNAL_ARGS) @@ -162,6 +151,9 @@ HandleStartupProcInterrupts(void) */ if (IsUnderPostmaster && !PostmasterIsAlive()) exit(1); + + if (GlobalBarrierInterruptPending) + ProcessGlobalBarrierIntterupt(); } @@ -181,7 +173,7 @@ StartupProcessMain(void) pqsignal(SIGQUIT, startupproc_quickdie); /* hard crash time */ InitializeTimeouts(); /* establishes SIGALRM handler */ pqsignal(SIGPIPE, SIG_IGN); - pqsignal(SIGUSR1, StartupProcSigUsr1Handler); + pqsignal(SIGUSR1, procsignal_sigusr1_handler); pqsignal(SIGUSR2, StartupProcTriggerHandler); /* diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c index a6fdba3f41..19120aa6e1 100644 --- a/src/backend/postmaster/walwriter.c +++ b/src/backend/postmaster/walwriter.c @@ -55,6 +55,7 @@ #include "storage/ipc.h" #include "storage/lwlock.h" #include "storage/proc.h" +#include "storage/procsignal.h" #include "storage/smgr.h" #include "utils/guc.h" #include "utils/hsearch.h" @@ -86,7 +87,6 @@ static volatile sig_atomic_t shutdown_requested = false; static void wal_quickdie(SIGNAL_ARGS); static void WalSigHupHandler(SIGNAL_ARGS); static void WalShutdownHandler(SIGNAL_ARGS); -static void walwriter_sigusr1_handler(SIGNAL_ARGS); /* * Main entry point for walwriter process @@ -114,7 +114,7 @@ WalWriterMain(void) pqsignal(SIGQUIT, wal_quickdie); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); - pqsignal(SIGUSR1, walwriter_sigusr1_handler); + pqsignal(SIGUSR1, procsignal_sigusr1_handler); pqsignal(SIGUSR2, SIG_IGN); /* not used */ /* @@ -255,6 +255,8 @@ WalWriterMain(void) /* Normal exit from the walwriter is here */ proc_exit(0); /* done */ } + if (GlobalBarrierInterruptPending) + ProcessGlobalBarrierIntterupt(); /* * Do what we're here for; then, if XLogBackgroundFlush() found useful @@ -337,14 +339,3 @@ WalShutdownHandler(SIGNAL_ARGS) errno = save_errno; } - -/* SIGUSR1: used for latch wakeups */ -static void -walwriter_sigusr1_handler(SIGNAL_ARGS) -{ - int save_errno = errno; - - latch_sigusr1_handler(); - - errno = save_errno; -} diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index 6abc780778..9acdbdd7c9 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -63,6 +63,7 @@ #include "storage/ipc.h" #include "storage/pmsignal.h" #include "storage/procarray.h" +#include "storage/procsignal.h" #include "utils/builtins.h" #include "utils/guc.h" #include "utils/pg_lsn.h" @@ -125,7 +126,6 @@ static void ProcessWalSndrMessage(XLogRecPtr walEnd, TimestampTz sendTime); /* Signal handlers */ static void WalRcvSigHupHandler(SIGNAL_ARGS); -static void WalRcvSigUsr1Handler(SIGNAL_ARGS); static void WalRcvShutdownHandler(SIGNAL_ARGS); static void WalRcvQuickDieHandler(SIGNAL_ARGS); @@ -147,9 +147,8 @@ void ProcessWalRcvInterrupts(void) { /* - * Although walreceiver interrupt handling doesn't use the same scheme as - * regular backends, call CHECK_FOR_INTERRUPTS() to make sure we receive - * any incoming signals on Win32. + * The CHECK_FOR_INTERRUPTS() call ensures global barriers are handled, + * and incoming signals on Win32 are received. */ CHECK_FOR_INTERRUPTS(); @@ -252,7 +251,7 @@ WalReceiverMain(void) pqsignal(SIGQUIT, WalRcvQuickDieHandler); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); - pqsignal(SIGUSR1, WalRcvSigUsr1Handler); + pqsignal(SIGUSR1, procsignal_sigusr1_handler); pqsignal(SIGUSR2, SIG_IGN); /* Reset some signals that are accepted by postmaster but not here */ @@ -766,17 +765,6 @@ WalRcvSigHupHandler(SIGNAL_ARGS) } -/* SIGUSR1: used by latch mechanism */ -static void -WalRcvSigUsr1Handler(SIGNAL_ARGS) -{ - int save_errno = errno; - - latch_sigusr1_handler(); - - errno = save_errno; -} - /* SIGTERM: set flag for ProcessWalRcvInterrupts */ static void WalRcvShutdownHandler(SIGNAL_ARGS) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 483f705305..c8c48d8497 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -1885,6 +1885,10 @@ BufferSync(int flags) cur_tsid = CkptBufferIds[i].tsId; + /* XXX: need a more principled approach here */ + if (GlobalBarrierInterruptPending) + ProcessGlobalBarrierIntterupt(); + /* * Grow array of per-tablespace status structs, every time a new * tablespace is found. diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c index 7605b2c367..9aed52df4a 100644 --- a/src/backend/storage/ipc/procsignal.c +++ b/src/backend/storage/ipc/procsignal.c @@ -18,8 +18,10 @@ #include <unistd.h> #include "access/parallel.h" +#include "access/twophase.h" #include "commands/async.h" #include "miscadmin.h" +#include "pgstat.h" #include "replication/walsender.h" #include "storage/latch.h" #include "storage/ipc.h" @@ -62,9 +64,11 @@ typedef struct static ProcSignalSlot *ProcSignalSlots = NULL; static volatile ProcSignalSlot *MyProcSignalSlot = NULL; +volatile sig_atomic_t GlobalBarrierInterruptPending = false; static bool CheckProcSignal(ProcSignalReason reason); static void CleanupProcSignalState(int status, Datum arg); +static void HandleGlobalBarrierSignal(void); /* * ProcSignalShmemSize @@ -262,6 +266,8 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) { int save_errno = errno; + pg_read_barrier(); + if (CheckProcSignal(PROCSIG_CATCHUP_INTERRUPT)) HandleCatchupInterrupt(); @@ -292,9 +298,144 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN)) RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); + if (CheckProcSignal(PROCSIG_GLOBAL_BARRIER)) + HandleGlobalBarrierSignal(); + SetLatch(MyLatch); latch_sigusr1_handler(); errno = save_errno; } + +/* + * + */ +uint64 +EmitGlobalBarrier(GlobalBarrierKind kind) +{ + uint64 generation; + + /* + * Broadcast flag, without incrementing generation. This ensures that all + * backends could know about this. + * + * It's OK if the to-be-signalled backend enters after our check here. A + * new backend should have current settings. + */ + for (int i = 0; i < (MaxBackends + max_prepared_xacts); i++) + { + PGPROC *proc = &ProcGlobal->allProcs[i]; + + if (proc->pid == 0) + continue; + + pg_atomic_fetch_or_u32(&proc->barrierFlags, (uint32) kind); + + elog(LOG, "setting flags for %u", proc->pid); + } + + /* + * Broadcast flag generation. If any backend joins after this, it's either + * going to be signalled below, or has read a new enough generation that + * WaitForGlobalBarrier() will not wait for it. + */ + generation = pg_atomic_add_fetch_u64(&ProcGlobal->globalBarrierGen, 1); + + /* Wake up each backend (including ours) */ + for (int i = 0; i < NumProcSignalSlots; i++) + { + ProcSignalSlot *slot = &ProcSignalSlots[i]; + + if (slot->pss_pid == 0) + continue; + + /* Atomically set the proper flag */ + slot->pss_signalFlags[PROCSIG_GLOBAL_BARRIER] = true; + + pg_write_barrier(); + + /* Send signal */ + kill(slot->pss_pid, SIGUSR1); + } + + return generation; +} + +/* + * Wait for all barriers to be absorbed. This guarantees that all changes + * requested by a specific EmitGlobalBarrier() have taken effect. + */ +void +WaitForGlobalBarrier(uint64 generation) +{ + pgstat_report_wait_start(WAIT_EVENT_GLOBAL_BARRIER); + for (int i = 0; i < (MaxBackends + max_prepared_xacts); i++) + { + PGPROC *proc = &ProcGlobal->allProcs[i]; + uint64 oldval; + + pg_memory_barrier(); + oldval = pg_atomic_read_u64(&proc->barrierGen); + + /* + * Unused proc slots get their barrierGen set to UINT64_MAX, so we + * need not care about that. + */ + while (oldval < generation) + { + CHECK_FOR_INTERRUPTS(); + pg_usleep(10000); + + pg_memory_barrier(); + oldval = pg_atomic_read_u64(&proc->barrierGen); + } + } + pgstat_report_wait_end(); +} + +/* + * Absorb the global barrier procsignal. + */ +static void +HandleGlobalBarrierSignal(void) +{ + InterruptPending = true; + GlobalBarrierInterruptPending = true; + SetLatch(MyLatch); +} + +/* + * Perform global barrier related interrupt checking. If CHECK_FOR_INTERRUPTS + * is used, it'll be called by that, if a backend type doesn't do so, it has + * to be called explicitly. + */ +void +ProcessGlobalBarrierIntterupt(void) +{ + if (GlobalBarrierInterruptPending) + { + uint64 generation; + uint32 flags; + + GlobalBarrierInterruptPending = false; + + generation = pg_atomic_read_u64(&ProcGlobal->globalBarrierGen); + pg_memory_barrier(); + flags = pg_atomic_exchange_u32(&MyProc->barrierFlags, 0); + pg_memory_barrier(); + + if (flags & GLOBBAR_CHECKSUM) + { + /* + * By virtue of getting here (i.e. interrupts being processed), we + * know that this backend won't have any in-progress writes (which + * might have missed the checksum change). + */ + } + + pg_atomic_write_u64(&MyProc->barrierGen, generation); + + elog(LOG, "processed interrupts for %u", MyProcPid); + } +} diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 498373fd0e..ae52b9e9ac 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -190,6 +190,7 @@ InitProcGlobal(void) ProcGlobal->checkpointerLatch = NULL; pg_atomic_init_u32(&ProcGlobal->procArrayGroupFirst, INVALID_PGPROCNO); pg_atomic_init_u32(&ProcGlobal->clogGroupFirst, INVALID_PGPROCNO); + pg_atomic_init_u64(&ProcGlobal->globalBarrierGen, 1); /* * Create and initialize all the PGPROC structures we'll need. There are @@ -284,6 +285,9 @@ InitProcGlobal(void) */ pg_atomic_init_u32(&(procs[i].procArrayGroupNext), INVALID_PGPROCNO); pg_atomic_init_u32(&(procs[i].clogGroupNext), INVALID_PGPROCNO); + + pg_atomic_init_u32(&procs[i].barrierFlags, 0); + pg_atomic_init_u64(&procs[i].barrierGen, PG_UINT64_MAX); } /* @@ -442,6 +446,12 @@ InitProcess(void) MyProc->clogGroupMemberLsn = InvalidXLogRecPtr; Assert(pg_atomic_read_u32(&MyProc->clogGroupNext) == INVALID_PGPROCNO); + /* pairs with globalBarrierGen increase */ + pg_memory_barrier(); + pg_atomic_write_u32(&MyProc->barrierFlags, 0); + pg_atomic_write_u64(&MyProc->barrierGen, + pg_atomic_read_u64(&ProcGlobal->globalBarrierGen)); + /* * Acquire ownership of the PGPROC's latch, so that we can use WaitLatch * on it. That allows us to repoint the process latch, which so far @@ -585,6 +595,13 @@ InitAuxiliaryProcess(void) MyProc->lwWaitMode = 0; MyProc->waitLock = NULL; MyProc->waitProcLock = NULL; + + /* pairs with globalBarrierGen increase */ + pg_memory_barrier(); + pg_atomic_write_u32(&MyProc->barrierFlags, 0); + pg_atomic_write_u64(&MyProc->barrierGen, + pg_atomic_read_u64(&ProcGlobal->globalBarrierGen)); + #ifdef USE_ASSERT_CHECKING { int i; @@ -883,6 +900,9 @@ ProcKill(int code, Datum arg) LWLockRelease(leader_lwlock); } + pg_atomic_write_u32(&MyProc->barrierFlags, 0); + pg_atomic_write_u64(&MyProc->barrierGen, PG_UINT64_MAX); + /* * Reset MyLatch to the process local one. This is so that signal * handlers et al can continue using the latch after the shared latch diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index e8d8e6f828..976e966565 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -612,6 +612,10 @@ ProcessClientWriteInterrupt(bool blocked) SetLatch(MyLatch); } + /* safe to handle during client communication */ + if (GlobalBarrierInterruptPending) + ProcessGlobalBarrierIntterupt(); + errno = save_errno; } @@ -3159,6 +3163,9 @@ ProcessInterrupts(void) if (ParallelMessagePending) HandleParallelMessages(); + + if (GlobalBarrierInterruptPending) + ProcessGlobalBarrierIntterupt(); } diff --git a/src/include/pgstat.h b/src/include/pgstat.h index fe076d823d..c997add881 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -824,6 +824,7 @@ typedef enum WAIT_EVENT_CHECKPOINT_DONE, WAIT_EVENT_CHECKPOINT_START, WAIT_EVENT_EXECUTE_GATHER, + WAIT_EVENT_GLOBAL_BARRIER, WAIT_EVENT_HASH_BATCH_ALLOCATING, WAIT_EVENT_HASH_BATCH_ELECTING, WAIT_EVENT_HASH_BATCH_LOADING, diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 281e1db725..f108ac52c6 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -203,6 +203,13 @@ struct PGPROC PGPROC *lockGroupLeader; /* lock group leader, if I'm a member */ dlist_head lockGroupMembers; /* list of members, if I'm a leader */ dlist_node lockGroupLink; /* my member link, if I'm a member */ + + /* + * Support for "super barriers". These can be used to e.g. make sure that + * all backends have acknowledged a configuration change. + */ + pg_atomic_uint64 barrierGen; + pg_atomic_uint32 barrierFlags; }; /* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */ @@ -272,6 +279,8 @@ typedef struct PROC_HDR int startupProcPid; /* Buffer id of the buffer that Startup process waits for pin on, or -1 */ int startupBufferPinWaitBufId; + + pg_atomic_uint64 globalBarrierGen; } PROC_HDR; extern PGDLLIMPORT PROC_HDR *ProcGlobal; diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h index 05b186a05c..a978db9b24 100644 --- a/src/include/storage/procsignal.h +++ b/src/include/storage/procsignal.h @@ -14,8 +14,9 @@ #ifndef PROCSIGNAL_H #define PROCSIGNAL_H -#include "storage/backendid.h" +#include <signal.h> +#include "storage/backendid.h" /* * Reasons for signalling a Postgres child process (a backend or an auxiliary @@ -42,6 +43,8 @@ typedef enum PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK, + PROCSIG_GLOBAL_BARRIER, + NUM_PROCSIGNALS /* Must be last! */ } ProcSignalReason; @@ -57,4 +60,22 @@ extern int SendProcSignal(pid_t pid, ProcSignalReason reason, extern void procsignal_sigusr1_handler(SIGNAL_ARGS); +/* + * These collapse. The flag values better be distinct bits. + */ +typedef enum GlobalBarrierKind +{ + /* + * Guarantee that all processes have the correct view of whether checksums + * enabled/disabled, and no writes are in-progress with previous value(s). + */ + GLOBBAR_CHECKSUM = 1 << 0 +} GlobalBarrierKind; + +extern uint64 EmitGlobalBarrier(GlobalBarrierKind kind); +extern void WaitForGlobalBarrier(uint64 generation); +extern void ProcessGlobalBarrierIntterupt(void); + +extern PGDLLIMPORT volatile sig_atomic_t GlobalBarrierInterruptPending; + #endif /* PROCSIGNAL_H */ -- 2.11.0
From 08ea831dc3e6b22ce914fea50436e0dba0345816 Mon Sep 17 00:00:00 2001 From: Daniel Gustafsson <dan...@yesql.se> Date: Wed, 10 Jul 2019 11:25:25 +0200 Subject: [PATCH 2/2] Online checksums patch for v13 Updated from previous patches and now using the global barriers --- doc/src/sgml/func.sgml | 65 ++ doc/src/sgml/ref/initdb.sgml | 7 +- doc/src/sgml/wal.sgml | 81 +++ src/backend/access/rmgrdesc/xlogdesc.c | 16 + src/backend/access/transam/xlog.c | 131 +++- src/backend/access/transam/xlogfuncs.c | 57 ++ src/backend/catalog/system_views.sql | 5 + src/backend/postmaster/Makefile | 5 +- src/backend/postmaster/bgworker.c | 7 + src/backend/postmaster/checksumhelper.c | 909 ++++++++++++++++++++++++++++ src/backend/postmaster/pgstat.c | 6 + src/backend/replication/basebackup.c | 2 +- src/backend/replication/logical/decode.c | 1 + src/backend/storage/ipc/ipci.c | 2 + src/backend/storage/lmgr/lwlocknames.txt | 1 + src/backend/storage/page/README | 3 +- src/backend/storage/page/bufpage.c | 6 +- src/backend/utils/adt/pgstatfuncs.c | 4 +- src/backend/utils/misc/guc.c | 36 +- src/bin/pg_upgrade/controldata.c | 9 + src/bin/pg_upgrade/pg_upgrade.h | 2 +- src/include/access/xlog.h | 10 +- src/include/access/xlog_internal.h | 7 + src/include/catalog/pg_control.h | 1 + src/include/catalog/pg_proc.dat | 16 + src/include/pgstat.h | 4 +- src/include/postmaster/checksumhelper.h | 31 + src/include/storage/bufpage.h | 1 + src/include/storage/checksum.h | 7 + src/test/Makefile | 3 +- src/test/checksum/.gitignore | 2 + src/test/checksum/Makefile | 24 + src/test/checksum/README | 22 + src/test/checksum/t/001_standby_checksum.pl | 104 ++++ 34 files changed, 1552 insertions(+), 35 deletions(-) create mode 100644 src/backend/postmaster/checksumhelper.c create mode 100644 src/include/postmaster/checksumhelper.h create mode 100644 src/test/checksum/.gitignore create mode 100644 src/test/checksum/Makefile create mode 100644 src/test/checksum/README create mode 100644 src/test/checksum/t/001_standby_checksum.pl diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 0aa399dc2f..bc1f128574 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -21272,6 +21272,71 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup()); </sect2> + <sect2 id="functions-admin-checksum"> + <title>Data Checksum Functions</title> + + <para> + The functions shown in <xref linkend="functions-checksums-table" /> can + be used to enable or disable data checksums in a running cluster. + See <xref linkend="checksums" /> for details. + </para> + + <table id="functions-checksums-table"> + <title>Checksum <acronym>SQL</acronym> Functions</title> + <tgroup cols="3"> + <thead> + <row> + <entry>Function</entry> + <entry>Return Type</entry> + <entry>Description</entry> + </row> + </thead> + <tbody> + <row> + <entry> + <indexterm> + <primary>pg_enable_data_checksums</primary> + </indexterm> + <literal><function>pg_enable_data_checksums(<optional><parameter>cost_delay</parameter> <type>int</type>, <parameter>cost_limit</parameter> <type>int</type></optional>)</function></literal> + </entry> + <entry> + void + </entry> + <entry> + <para> + Initiates data checksums for the cluster. This will switch the data checksums mode + to <literal>in progress</literal> and start a background worker that will process + all data in the database and enable checksums for it. When all data pages have had + checksums enabled, the cluster will automatically switch to checksums + <literal>on</literal>. + </para> + <para> + If <parameter>cost_delay</parameter> and <parameter>cost_limit</parameter> are + specified, the speed of the process is throttled using the same principles as + <link linkend="runtime-config-resource-vacuum-cost">Cost-based Vacuum Delay</link>. + </para> + </entry> + </row> + <row> + <entry> + <indexterm> + <primary>pg_disable_data_checksums</primary> + </indexterm> + <literal><function>pg_disable_data_checksums()</function></literal> + </entry> + <entry> + void + </entry> + <entry> + Disables data checksums for the cluster. + </entry> + </row> + </tbody> + </tgroup> + </table> + + </sect2> + <sect2 id="functions-admin-dbobject"> <title>Database Object Management Functions</title> diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml index da5c8f5307..b545ad73cb 100644 --- a/doc/src/sgml/ref/initdb.sgml +++ b/doc/src/sgml/ref/initdb.sgml @@ -217,9 +217,10 @@ PostgreSQL documentation Use checksums on data pages to help detect corruption by the I/O system that would otherwise be silent. Enabling checksums may incur a noticeable performance penalty. If set, checksums - are calculated for all objects, in all databases. All checksum - failures will be reported in the - <xref linkend="pg-stat-database-view"/> view. + are calculated for all objects, in all databases. All + checksum failures will be reported in the <xref + linkend="pg-stat-database-view"/> view. + See <xref linkend="checksums" /> for details. </para> </listitem> </varlistentry> diff --git a/doc/src/sgml/wal.sgml b/doc/src/sgml/wal.sgml index 4eb8feb903..7838f3616a 100644 --- a/doc/src/sgml/wal.sgml +++ b/doc/src/sgml/wal.sgml @@ -230,6 +230,87 @@ </para> </sect1> + <sect1 id="checksums"> + <title>Data checksums</title> + <indexterm> + <primary>checksums</primary> + </indexterm> + + <para> + Data pages are not checksum protected by default, but this can optionally be enabled for a cluster. + When enabled, each data page will be assigned a checksum that is updated when the page is + written and verified every time the page is read. Only data pages are protected by checksums, + internal data structures and temporary files are not. + </para> + + <para> + Checksums are normally enabled when the cluster is initialized using + <link linkend="app-initdb-data-checksums"><application>initdb</application></link>. They + can also be enabled or disabled at runtime. In all cases, checksums are enabled or disabled + at the full cluster level, and cannot be specified individually for databases or tables. + </para> + + <para> + The current state of checksums in the cluster can be verified by viewing the value + of the read-only configuration variable <xref linkend="guc-data-checksums" /> by + issuing the command <command>SHOW data_checksums</command>. + </para> + + <para> + When attempting to recover from corrupt data it may be necessary to bypass the checksum + protection in order to recover data. To do this, temporarily set the configuration parameter + <xref linkend="guc-ignore-checksum-failure" />. + </para> + + <sect2 id="checksums-enable-disable"> + <title>On-line enabling of checksums</title> + + <para> + Checksums can be enabled or disabled online, by calling the appropriate + <link linkend="functions-admin-checksum">functions</link>. + Disabling of checksums takes effect immediately when the function is called. + </para> + + <para> + Enabling checksums will put the cluster in <literal>inprogress</literal> mode. + During this time, checksums will be written but not verified. In addition to + this, a background worker process is started that enables checksums on all + existing data in the cluster. Once this worker has completed processing all + databases in the cluster, the checksum mode will automatically switch to + <literal>on</literal>. + </para> + + <para> + The process will initially wait for all open transactions to finish before + it starts, so that it can be certain that there are no tables that have been + created inside a transaction that has not committed yet and thus would not + be visible to the process enabling checksums. It will also, for each database, + wait for all pre-existing temporary tables to get removed before it finishes. + If long-lived temporary tables are used in the application it may be necessary + to terminate these application connections to allow the process to complete. + Information about open transactions and connections with temporary tables is + written to log. + </para> + + <para> + If the cluster is stopped while in <literal>inprogress</literal> mode, for + any reason, then this process must be restarted manually. To do this, + re-execute the function <function>pg_enable_data_checksums()</function> + once the cluster has been restarted. It is not possible to resume the work, + the process has to start from scratch. + </para> + + <note> + <para> + Enabling checksums can cause significant I/O to the system, as most of the + database pages will need to be rewritten, and will be written both to the + data files and the WAL. + </para> + </note> + + </sect2> + </sect1> + <sect1 id="wal-intro"> <title>Write-Ahead Logging (<acronym>WAL</acronym>)</title> diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c index 33060f3042..ced4ab6d78 100644 --- a/src/backend/access/rmgrdesc/xlogdesc.c +++ b/src/backend/access/rmgrdesc/xlogdesc.c @@ -18,6 +18,7 @@ #include "access/xlog.h" #include "access/xlog_internal.h" #include "catalog/pg_control.h" +#include "storage/bufpage.h" #include "utils/guc.h" #include "utils/timestamp.h" @@ -140,6 +141,18 @@ xlog_desc(StringInfo buf, XLogReaderState *record) xlrec.ThisTimeLineID, xlrec.PrevTimeLineID, timestamptz_to_str(xlrec.end_time)); } + else if (info == XLOG_CHECKSUMS) + { + xl_checksum_state xlrec; + + memcpy(&xlrec, rec, sizeof(xl_checksum_state)); + if (xlrec.new_checksumtype == PG_DATA_CHECKSUM_VERSION) + appendStringInfo(buf, "on"); + else if (xlrec.new_checksumtype == PG_DATA_CHECKSUM_INPROGRESS_VERSION) + appendStringInfo(buf, "inprogress"); + else + appendStringInfo(buf, "off"); + } } const char * @@ -185,6 +198,9 @@ xlog_identify(uint8 info) case XLOG_FPI_FOR_HINT: id = "FPI_FOR_HINT"; break; + case XLOG_CHECKSUMS: + id = "CHECKSUMS"; + break; } return id; diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 61ba6b852e..a7adae44dd 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -867,6 +867,7 @@ static void SetLatestXTime(TimestampTz xtime); static void SetCurrentChunkStartTime(TimestampTz xtime); static void CheckRequiredParameterValues(void); static void XLogReportParameters(void); +static void XlogChecksums(ChecksumType new_type); static void checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI, TimeLineID prevTLI); static void LocalSetXLogInsertAllowed(void); @@ -1049,7 +1050,7 @@ XLogInsertRecord(XLogRecData *rdata, Assert(RedoRecPtr < Insert->RedoRecPtr); RedoRecPtr = Insert->RedoRecPtr; } - doPageWrites = (Insert->fullPageWrites || Insert->forcePageWrites); + doPageWrites = (Insert->fullPageWrites || Insert->forcePageWrites || DataChecksumsInProgress()); if (doPageWrites && (!prevDoPageWrites || @@ -4779,10 +4780,6 @@ ReadControlFile(void) (SizeOfXLogLongPHD - SizeOfXLogShortPHD); CalculateCheckpointSegments(); - - /* Make the initdb settings visible as GUC variables, too */ - SetConfigOption("data_checksums", DataChecksumsEnabled() ? "yes" : "no", - PGC_INTERNAL, PGC_S_OVERRIDE); } /* @@ -4819,12 +4816,93 @@ GetMockAuthenticationNonce(void) * Are checksums enabled for data pages? */ bool -DataChecksumsEnabled(void) +DataChecksumsNeedWrite(void) { Assert(ControlFile != NULL); return (ControlFile->data_checksum_version > 0); } +bool +DataChecksumsNeedVerify(void) +{ + Assert(ControlFile != NULL); + + /* + * Only verify checksums if they are fully enabled in the cluster. In + * inprogress state they are only updated, not verified. + */ + return (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_VERSION); +} + +bool +DataChecksumsInProgress(void) +{ + Assert(ControlFile != NULL); + return (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_INPROGRESS_VERSION); +} + +void +SetDataChecksumsInProgress(void) +{ + Assert(ControlFile != NULL); + if (ControlFile->data_checksum_version > 0) + return; + + XlogChecksums(PG_DATA_CHECKSUM_INPROGRESS_VERSION); + + LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); + ControlFile->data_checksum_version = PG_DATA_CHECKSUM_INPROGRESS_VERSION; + UpdateControlFile(); + LWLockRelease(ControlFileLock); + WaitForGlobalBarrier(EmitGlobalBarrier(GLOBBAR_CHECKSUM)); +} + +void +SetDataChecksumsOn(void) +{ + Assert(ControlFile != NULL); + + LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); + + if (ControlFile->data_checksum_version != PG_DATA_CHECKSUM_INPROGRESS_VERSION) + { + LWLockRelease(ControlFileLock); + elog(ERROR, "Checksums not in inprogress mode"); + } + + ControlFile->data_checksum_version = PG_DATA_CHECKSUM_VERSION; + UpdateControlFile(); + LWLockRelease(ControlFileLock); + WaitForGlobalBarrier(EmitGlobalBarrier(GLOBBAR_CHECKSUM)); + + XlogChecksums(PG_DATA_CHECKSUM_VERSION); +} + +void +SetDataChecksumsOff(void) +{ + LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); + + ControlFile->data_checksum_version = 0; + UpdateControlFile(); + LWLockRelease(ControlFileLock); + WaitForGlobalBarrier(EmitGlobalBarrier(GLOBBAR_CHECKSUM)); + + XlogChecksums(0); +} + +/* guc hook */ +const char * +show_data_checksums(void) +{ + if (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_VERSION) + return "on"; + else if (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_INPROGRESS_VERSION) + return "inprogress"; + else + return "off"; +} + /* * Returns a fake LSN for unlogged relations. * @@ -7761,6 +7839,18 @@ StartupXLOG(void) CompleteCommitTsInitialization(); /* + * If we reach this point with checksums in inprogress state, we notify + * the user that they need to manually restart the process to enable + * checksums. + * This is because we cannot launch a dynamic background worker directly + * from here, it has to be launched from a regular backend. + */ + if (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_INPROGRESS_VERSION) + ereport(WARNING, + (errmsg("checksum state is \"inprogress\" with no worker"), + errhint("Either disable or enable checksums by calling the pg_disable_data_checksums() or pg_enable_data_checksums() functions."))); + + /* * All done with end-of-recovery actions. * * Now allow backends to write WAL and update the control file status in @@ -9484,6 +9574,24 @@ XLogReportParameters(void) } /* + * Log the new state of checksums + */ +static void +XlogChecksums(ChecksumType new_type) +{ + xl_checksum_state xlrec; + XLogRecPtr recptr; + + xlrec.new_checksumtype = new_type; + + XLogBeginInsert(); + XLogRegisterData((char *) &xlrec, sizeof(xl_checksum_state)); + + recptr = XLogInsert(RM_XLOG_ID, XLOG_CHECKSUMS); + XLogFlush(recptr); +} + +/* * Update full_page_writes in shared memory, and write an * XLOG_FPW_CHANGE record if necessary. * @@ -9934,6 +10042,17 @@ xlog_redo(XLogReaderState *record) /* Keep track of full_page_writes */ lastFullPageWrites = fpw; } + else if (info == XLOG_CHECKSUMS) + { + xl_checksum_state state; + + memcpy(&state, XLogRecGetData(record), sizeof(xl_checksum_state)); + + LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); + ControlFile->data_checksum_version = state.new_checksumtype; + UpdateControlFile(); + LWLockRelease(ControlFileLock); + } } #ifdef WAL_DEBUG diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c index 8f179887ab..4bd87e887c 100644 --- a/src/backend/access/transam/xlogfuncs.c +++ b/src/backend/access/transam/xlogfuncs.c @@ -25,6 +25,7 @@ #include "catalog/pg_type.h" #include "funcapi.h" #include "miscadmin.h" +#include "postmaster/checksumhelper.h" #include "pgstat.h" #include "replication/walreceiver.h" #include "storage/smgr.h" @@ -785,3 +786,59 @@ pg_promote(PG_FUNCTION_ARGS) (errmsg("server did not promote within %d seconds", wait_seconds))); PG_RETURN_BOOL(false); } + +/* + * Disables checksums for the cluster, unless already disabled. + * + * Has immediate effect - the checksums are set to off right away. + */ +Datum +disable_data_checksums(PG_FUNCTION_ARGS) +{ + /* + * If we don't need to write new checksums, then clearly they are already + * disabled. + */ + if (!DataChecksumsNeedWrite()) + ereport(ERROR, + (errmsg("data checksums already disabled"))); + + ShutdownChecksumHelperIfRunning(); + + SetDataChecksumsOff(); + + PG_RETURN_VOID(); +} + +/* + * Enables checksums for the cluster, unless already enabled. + * + * Supports vacuum-like cost-based throttling, to limit system load. + * Starts a background worker that updates checksums on existing data. + */ +Datum +enable_data_checksums(PG_FUNCTION_ARGS) +{ + int cost_delay = PG_GETARG_INT32(0); + int cost_limit = PG_GETARG_INT32(1); + + if (cost_delay < 0) + ereport(ERROR, + (errmsg("cost delay cannot be less than zero"))); + if (cost_limit <= 0) + ereport(ERROR, + (errmsg("cost limit must be a positive value"))); + + /* + * Allow state change from "off" or from "inprogress", since this is how + * we restart the worker if necessary. + */ + if (DataChecksumsNeedVerify()) + ereport(ERROR, + (errmsg("data checksums already enabled"))); + + SetDataChecksumsInProgress(); + StartChecksumHelperLauncher(cost_delay, cost_limit); + + PG_RETURN_VOID(); +} diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 9fe4a4794a..6649c9afe2 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1155,6 +1155,11 @@ CREATE OR REPLACE FUNCTION RETURNS boolean STRICT VOLATILE LANGUAGE INTERNAL AS 'pg_promote' PARALLEL SAFE; +CREATE OR REPLACE FUNCTION pg_enable_data_checksums ( + cost_delay int DEFAULT 0, cost_limit int DEFAULT 100) + RETURNS void STRICT VOLATILE LANGUAGE internal AS 'enable_data_checksums' + PARALLEL RESTRICTED; + -- legacy definition for compatibility with 9.3 CREATE OR REPLACE FUNCTION json_populate_record(base anyelement, from_json json, use_json_as_text boolean DEFAULT false) diff --git a/src/backend/postmaster/Makefile b/src/backend/postmaster/Makefile index 71c23211b2..ee8f8c1cd3 100644 --- a/src/backend/postmaster/Makefile +++ b/src/backend/postmaster/Makefile @@ -12,7 +12,8 @@ subdir = src/backend/postmaster top_builddir = ../../.. include $(top_builddir)/src/Makefile.global -OBJS = autovacuum.o bgworker.o bgwriter.o checkpointer.o fork_process.o \ - pgarch.o pgstat.o postmaster.o startup.o syslogger.o walwriter.o +OBJS = autovacuum.o bgworker.o bgwriter.o checkpointer.o checksumhelper.o \ + fork_process.o pgarch.o pgstat.o postmaster.o startup.o syslogger.o \ + walwriter.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c index f300f9285b..f40b7044bd 100644 --- a/src/backend/postmaster/bgworker.c +++ b/src/backend/postmaster/bgworker.c @@ -20,6 +20,7 @@ #include "pgstat.h" #include "port/atomics.h" #include "postmaster/bgworker_internals.h" +#include "postmaster/checksumhelper.h" #include "postmaster/postmaster.h" #include "replication/logicallauncher.h" #include "replication/logicalworker.h" @@ -129,6 +130,12 @@ static const struct }, { "ApplyWorkerMain", ApplyWorkerMain + }, + { + "ChecksumHelperLauncherMain", ChecksumHelperLauncherMain + }, + { + "ChecksumHelperWorkerMain", ChecksumHelperWorkerMain } }; diff --git a/src/backend/postmaster/checksumhelper.c b/src/backend/postmaster/checksumhelper.c new file mode 100644 index 0000000000..06db05979c --- /dev/null +++ b/src/backend/postmaster/checksumhelper.c @@ -0,0 +1,909 @@ +/*------------------------------------------------------------------------- + * + * checksumhelper.c + * Background worker to walk the database and write checksums to pages + * + * When enabling data checksums on a database at initdb time, no extra process + * is required as each page is checksummed, and verified, at accesses. When + * enabling checksums on an already running cluster, which was not initialized + * with checksums, this helper worker will ensure that all pages are + * checksummed before verification of the checksums is turned on. + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/postmaster/checksumhelper.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/heapam.h" +#include "access/htup_details.h" +#include "access/xact.h" +#include "catalog/pg_database.h" +#include "commands/vacuum.h" +#include "common/relpath.h" +#include "miscadmin.h" +#include "pgstat.h" +#include "postmaster/bgworker.h" +#include "postmaster/bgwriter.h" +#include "postmaster/checksumhelper.h" +#include "storage/bufmgr.h" +#include "storage/checksum.h" +#include "storage/lmgr.h" +#include "storage/ipc.h" +#include "storage/procarray.h" +#include "storage/smgr.h" +#include "tcop/tcopprot.h" +#include "utils/lsyscache.h" +#include "utils/ps_status.h" + + +typedef enum +{ + SUCCESSFUL = 0, + ABORTED, + FAILED +} ChecksumHelperResult; + +typedef struct ChecksumHelperShmemStruct +{ + /* + * Access to launcher_started and abort must be protected by + * ChecksumHelperLock. + */ + bool launcher_started; + bool abort; + + /* + * Access to other members can be done without a lock, as while they + * are in shared memory, they are never concurrently accessed. When + * a worker is running, the launcher is only waiting for that worker + * to finish. + */ + ChecksumHelperResult success; + bool process_shared_catalogs; + /* Parameter values set on start */ + int cost_delay; + int cost_limit; +} ChecksumHelperShmemStruct; + +/* Shared memory segment for checksumhelper */ +static ChecksumHelperShmemStruct * ChecksumHelperShmem; + +/* Bookkeeping for work to do */ +typedef struct ChecksumHelperDatabase +{ + Oid dboid; + char *dbname; +} ChecksumHelperDatabase; + +typedef struct ChecksumHelperRelation +{ + Oid reloid; + char relkind; +} ChecksumHelperRelation; + +/* Prototypes */ +static List *BuildDatabaseList(void); +static List *BuildRelationList(bool include_shared); +static List *BuildTempTableList(void); +static ChecksumHelperResult ProcessDatabase(ChecksumHelperDatabase * db); +static void launcher_cancel_handler(SIGNAL_ARGS); + +/* + * Main entry point for checksumhelper launcher process. + */ +void +StartChecksumHelperLauncher(int cost_delay, int cost_limit) +{ + BackgroundWorker bgw; + BackgroundWorkerHandle *bgw_handle; + + LWLockAcquire(ChecksumHelperLock, LW_EXCLUSIVE); + if (ChecksumHelperShmem->abort) + { + LWLockRelease(ChecksumHelperLock); + ereport(ERROR, + (errmsg("could not start checksumhelper: has been canceled"))); + } + + if (ChecksumHelperShmem->launcher_started) + { + /* Failed to set means somebody else started */ + LWLockRelease(ChecksumHelperLock); + ereport(ERROR, + (errmsg("could not start checksumhelper: already running"))); + } + + ChecksumHelperShmem->cost_delay = cost_delay; + ChecksumHelperShmem->cost_limit = cost_limit; + + memset(&bgw, 0, sizeof(bgw)); + bgw.bgw_flags = BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION; + bgw.bgw_start_time = BgWorkerStart_RecoveryFinished; + snprintf(bgw.bgw_library_name, BGW_MAXLEN, "postgres"); + snprintf(bgw.bgw_function_name, BGW_MAXLEN, "ChecksumHelperLauncherMain"); + snprintf(bgw.bgw_name, BGW_MAXLEN, "checksumhelper launcher"); + snprintf(bgw.bgw_type, BGW_MAXLEN, "checksumhelper launcher"); + bgw.bgw_restart_time = BGW_NEVER_RESTART; + bgw.bgw_notify_pid = MyProcPid; + bgw.bgw_main_arg = (Datum) 0; + + ChecksumHelperShmem->launcher_started = true; + LWLockRelease(ChecksumHelperLock); + + if (!RegisterDynamicBackgroundWorker(&bgw, &bgw_handle)) + { + LWLockAcquire(ChecksumHelperLock, LW_EXCLUSIVE); + ChecksumHelperShmem->launcher_started = false; + LWLockRelease(ChecksumHelperLock); + ereport(ERROR, + (errmsg("failed to start checksum helper launcher"))); + } +} + +/* + * ShutdownChecksumHelperIfRunning + * Request shutdown of the checksumhelper + * + * This does not turn off processing immediately, it signals the checksum + * process to end when done with the current block. + */ +void +ShutdownChecksumHelperIfRunning(void) +{ + /* If the launcher isn't started, there is nothing to shut down */ + LWLockAcquire(ChecksumHelperLock, LW_EXCLUSIVE); + if (ChecksumHelperShmem->launcher_started) + ChecksumHelperShmem->abort = true; + LWLockRelease(ChecksumHelperLock); +} + +/* + * ProcessSingleRelationFork + * Enable checksums in a single relation/fork. + * + * Returns true if successful, and false if *aborted*. On error, an actual + * error is raised in the lower levels. + */ +static bool +ProcessSingleRelationFork(Relation reln, ForkNumber forkNum, BufferAccessStrategy strategy) +{ + BlockNumber numblocks = RelationGetNumberOfBlocksInFork(reln, forkNum); + BlockNumber b; + char activity[NAMEDATALEN * 2 + 128]; + + for (b = 0; b < numblocks; b++) + { + Buffer buf = ReadBufferExtended(reln, forkNum, b, RBM_NORMAL, strategy); + + /* + * Report to pgstat every 100 blocks (so as not to "spam") + */ + if ((b % 100) == 0) + { + snprintf(activity, sizeof(activity) - 1, "processing: %s.%s (%s block %d/%d)", + get_namespace_name(RelationGetNamespace(reln)), RelationGetRelationName(reln), + forkNames[forkNum], b, numblocks); + pgstat_report_activity(STATE_RUNNING, activity); + } + + /* Need to get an exclusive lock before we can flag as dirty */ + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + + /* + * Mark the buffer as dirty and force a full page write. We have to + * re-write the page to WAL even if the checksum hasn't changed, + * because if there is a replica it might have a slightly different + * version of the page with an invalid checksum, caused by unlogged + * changes (e.g. hintbits) on the master happening while checksums + * were off. This can happen if there was a valid checksum on the page + * at one point in the past, so only when checksums are first on, then + * off, and then turned on again. + */ + START_CRIT_SECTION(); + MarkBufferDirty(buf); + log_newpage_buffer(buf, false); + END_CRIT_SECTION(); + + UnlockReleaseBuffer(buf); + + /* + * This is the only place where we check if we are asked to abort, the + * abortion will bubble up from here. + * It's safe to check this without a lock, because if we miss it being + * set, we will try again soon. + */ + if (ChecksumHelperShmem->abort) + return false; + + vacuum_delay_point(); + } + + return true; +} + +/* + * ProcessSingleRelationByOid + * Process a single relation based on oid. + * + * Returns true if successful, and false if *aborted*. On error, an actual error + * is raised in the lower levels. + */ +static bool +ProcessSingleRelationByOid(Oid relationId, BufferAccessStrategy strategy) +{ + Relation rel; + ForkNumber fnum; + bool aborted = false; + + StartTransactionCommand(); + + elog(DEBUG2, "Checksumhelper starting to process relation %d", relationId); + rel = try_relation_open(relationId, AccessShareLock); + if (rel == NULL) + { + /* + * Relation no longer exist. We consider this a success, since there are no + * pages in it that need checksums, and thus return true. + */ + elog(DEBUG1, "Checksumhelper skipping relation %d as it no longer exists", relationId); + CommitTransactionCommand(); + pgstat_report_activity(STATE_IDLE, NULL); + return true; + } + RelationOpenSmgr(rel); + + for (fnum = 0; fnum <= MAX_FORKNUM; fnum++) + { + if (smgrexists(rel->rd_smgr, fnum)) + { + if (!ProcessSingleRelationFork(rel, fnum, strategy)) + { + aborted = true; + break; + } + } + } + relation_close(rel, AccessShareLock); + elog(DEBUG2, "Checksumhelper done with relation %d: %s", + relationId, (aborted ? "aborted" : "finished")); + + CommitTransactionCommand(); + + pgstat_report_activity(STATE_IDLE, NULL); + + return !aborted; +} + +/* + * ProcessDatabase + * Enable checksums in a single database. + * + * We do this by launching a dynamic background worker into this database, and + * waiting for it to finish. We have to do this in a separate worker, since + * each process can only be connected to one database during its lifetime. + */ +static ChecksumHelperResult +ProcessDatabase(ChecksumHelperDatabase * db) +{ + BackgroundWorker bgw; + BackgroundWorkerHandle *bgw_handle; + BgwHandleStatus status; + pid_t pid; + char activity[NAMEDATALEN + 64]; + + ChecksumHelperShmem->success = FAILED; + + memset(&bgw, 0, sizeof(bgw)); + bgw.bgw_flags = BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION; + bgw.bgw_start_time = BgWorkerStart_RecoveryFinished; + snprintf(bgw.bgw_library_name, BGW_MAXLEN, "postgres"); + snprintf(bgw.bgw_function_name, BGW_MAXLEN, "ChecksumHelperWorkerMain"); + snprintf(bgw.bgw_name, BGW_MAXLEN, "checksumhelper worker"); + snprintf(bgw.bgw_type, BGW_MAXLEN, "checksumhelper worker"); + bgw.bgw_restart_time = BGW_NEVER_RESTART; + bgw.bgw_notify_pid = MyProcPid; + bgw.bgw_main_arg = ObjectIdGetDatum(db->dboid); + + if (!RegisterDynamicBackgroundWorker(&bgw, &bgw_handle)) + { + ereport(LOG, + (errmsg("failed to start worker for checksumhelper in \"%s\"", + db->dbname))); + return FAILED; + } + + status = WaitForBackgroundWorkerStartup(bgw_handle, &pid); + if (status != BGWH_STARTED) + { + ereport(LOG, + (errmsg("failed to wait for worker startup for checksumhelper in \"%s\"", + db->dbname))); + return FAILED; + } + + ereport(DEBUG1, + (errmsg("started background worker for checksums in \"%s\"", + db->dbname))); + + snprintf(activity, sizeof(activity) - 1, + "Waiting for worker in database %s (pid %d)", db->dbname, pid); + pgstat_report_activity(STATE_RUNNING, activity); + + + status = WaitForBackgroundWorkerShutdown(bgw_handle); + if (status != BGWH_STOPPED) + { + ereport(LOG, + (errmsg("failed to wait for worker shutdown for checksumhelper in \"%s\"", + db->dbname))); + return FAILED; + } + + if (ChecksumHelperShmem->success == ABORTED) + ereport(LOG, + (errmsg("checksumhelper was aborted during processing in \"%s\"", + db->dbname))); + + ereport(DEBUG1, + (errmsg("background worker for checksums in \"%s\" completed", + db->dbname))); + + pgstat_report_activity(STATE_IDLE, NULL); + + return ChecksumHelperShmem->success; +} + +static void +launcher_exit(int code, Datum arg) +{ + LWLockAcquire(ChecksumHelperLock, LW_EXCLUSIVE); + ChecksumHelperShmem->abort = false; + ChecksumHelperShmem->launcher_started = false; + LWLockRelease(ChecksumHelperLock); +} + +static void +launcher_cancel_handler(SIGNAL_ARGS) +{ + LWLockAcquire(ChecksumHelperLock, LW_EXCLUSIVE); + ChecksumHelperShmem->abort = true; + LWLockRelease(ChecksumHelperLock); +} + +static void +WaitForAllTransactionsToFinish(void) +{ + TransactionId waitforxid; + + LWLockAcquire(XidGenLock, LW_SHARED); + waitforxid = XidFromFullTransactionId(ShmemVariableCache->nextFullXid); + LWLockRelease(XidGenLock); + + while (true) + { + TransactionId oldestxid = GetOldestActiveTransactionId(); + + elog(DEBUG1, "Checking old transactions"); + if (TransactionIdPrecedes(oldestxid, waitforxid)) + { + char activity[64]; + + /* Oldest running xid is older than us, so wait */ + snprintf(activity, sizeof(activity), "Waiting for current transactions to finish (waiting for %d)", waitforxid); + pgstat_report_activity(STATE_RUNNING, activity); + + /* Retry every 5 seconds */ + ResetLatch(MyLatch); + (void) WaitLatch(MyLatch, + WL_LATCH_SET | WL_TIMEOUT, + 5000, + WAIT_EVENT_PG_SLEEP); + } + else + { + pgstat_report_activity(STATE_IDLE, NULL); + return; + } + } +} + +void +ChecksumHelperLauncherMain(Datum arg) +{ + List *DatabaseList; + HTAB *ProcessedDatabases = NULL; + List *FailedDatabases = NIL; + ListCell *lc, + *lc2; + HASHCTL hash_ctl; + bool found_failed = false; + + on_shmem_exit(launcher_exit, 0); + + ereport(DEBUG1, + (errmsg("checksumhelper launcher started"))); + + pqsignal(SIGTERM, die); + pqsignal(SIGINT, launcher_cancel_handler); + + BackgroundWorkerUnblockSignals(); + + init_ps_display(pgstat_get_backend_desc(B_CHECKSUMHELPER_LAUNCHER), "", "", ""); + + memset(&hash_ctl, 0, sizeof(hash_ctl)); + hash_ctl.keysize = sizeof(Oid); + hash_ctl.entrysize = sizeof(ChecksumHelperResult); + ProcessedDatabases = hash_create("Processed databases", + 64, + &hash_ctl, + HASH_ELEM); + + /* + * Initialize a connection to shared catalogs only. + */ + BackgroundWorkerInitializeConnection(NULL, NULL, 0); + + /* + * Set up so first run processes shared catalogs, but not once in every + * db. + */ + ChecksumHelperShmem->process_shared_catalogs = true; + + while (true) + { + int processed_databases; + + /* + * Get a list of all databases to process. This may include databases + * that were created during our runtime. + * + * Since a database can be created as a copy of any other database + * (which may not have existed in our last run), we have to repeat + * this loop until no new databases show up in the list. Since we wait + * for all pre-existing transactions finish, this way we can be + * certain that there are no databases left without checksums. + */ + DatabaseList = BuildDatabaseList(); + + /* + * If there are no databases at all to checksum, we can exit + * immediately as there is no work to do. This can probably never + * happen, but just in case. + */ + if (DatabaseList == NIL || list_length(DatabaseList) == 0) + return; + + processed_databases = 0; + + foreach(lc, DatabaseList) + { + ChecksumHelperDatabase *db = (ChecksumHelperDatabase *) lfirst(lc); + ChecksumHelperResult result; + Oid *oid; + + /* Skup if this database has been processed already */ + if (hash_search(ProcessedDatabases, (void *) &db->dboid, HASH_FIND, NULL)) + { + pfree(db->dbname); + pfree(db); + continue; + } + + result = ProcessDatabase(db); + + /* Make a copy of the oid so we can free the rest of the structure */ + oid = palloc(sizeof(Oid)); + *oid = db->dboid; + pfree(db->dbname); + pfree(db); + + hash_search(ProcessedDatabases, (void *) oid, HASH_ENTER, NULL); + processed_databases++; + + if (result == SUCCESSFUL) + { + /* + * If one database has completed shared catalogs, we + * don't have to process them again. + */ + if (ChecksumHelperShmem->process_shared_catalogs) + ChecksumHelperShmem->process_shared_catalogs = false; + } + else if (result == FAILED) + { + /* + * Put failed databases on the remaining list. + */ + FailedDatabases = lappend(FailedDatabases, db); + } + else + /* Abort flag set, so exit the whole process */ + return; + } + + elog(DEBUG1, "Completed one loop of checksum enabling, %i databases processed", processed_databases); + + list_free(DatabaseList); + + /* + * If no databases were processed in this run of the loop, we have now + * finished all databases and no concurrently created ones can exist. + */ + if (processed_databases == 0) + break; + } + + /* + * FailedDatabases now has all databases that failed one way or another. + * This can be because they actually failed for some reason, or because the + * database was dropped between us getting the database list and trying to + * process it. Get a fresh list of databases to detect the second case + * where the database was dropped before we had started processing it. If a + * database still exists, but enabling checksums failed then we fail the + * entire checksumming process and exit with an error. + */ + DatabaseList = BuildDatabaseList(); + + foreach(lc, FailedDatabases) + { + ChecksumHelperDatabase *db = (ChecksumHelperDatabase *) lfirst(lc); + bool found = false; + + foreach(lc2, DatabaseList) + { + ChecksumHelperDatabase *db2 = (ChecksumHelperDatabase *) lfirst(lc2); + + if (db->dboid == db2->dboid) + { + found = true; + ereport(WARNING, + (errmsg("failed to enable checksums in \"%s\"", + db->dbname))); + break; + } + } + + if (found) + found_failed = true; + else + { + ereport(LOG, + (errmsg("database \"%s\" has been dropped, skipping", + db->dbname))); + } + } + + if (found_failed) + { + /* Disable checksums on cluster, because we failed */ + SetDataChecksumsOff(); + ereport(ERROR, + (errmsg("checksumhelper failed to enable checksums in all databases, aborting"))); + } + + /* + * Force a checkpoint to get everything out to disk. XXX: this should + * probably not be an IMMEDIATE checkpoint, but leave it there for now + * for testing. + */ + RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_WAIT | CHECKPOINT_IMMEDIATE); + + /* + * Everything has been processed, so flag checksums enabled. + */ + SetDataChecksumsOn(); + + ereport(LOG, + (errmsg("checksums enabled, checksumhelper launcher shutting down"))); +} + +/* + * ChecksumHelperShmemSize + * Compute required space for checksumhelper-related shared memory + */ +Size +ChecksumHelperShmemSize(void) +{ + Size size; + + size = sizeof(ChecksumHelperShmemStruct); + size = MAXALIGN(size); + + return size; +} + +/* + * ChecksumHelperShmemInit + * Allocate and initialize checksumhelper-related shared memory + */ +void +ChecksumHelperShmemInit(void) +{ + bool found; + + ChecksumHelperShmem = (ChecksumHelperShmemStruct *) + ShmemInitStruct("ChecksumHelper Data", + ChecksumHelperShmemSize(), + &found); + + if (!found) + { + MemSet(ChecksumHelperShmem, 0, ChecksumHelperShmemSize()); + } +} + +/* + * BuildDatabaseList + * Compile a list of all currently available databases in the cluster + * + * This creates the list of databases for the checksumhelper workers to add + * checksums to. + */ +static List * +BuildDatabaseList(void) +{ + List *DatabaseList = NIL; + Relation rel; + TableScanDesc scan; + HeapTuple tup; + MemoryContext ctx = CurrentMemoryContext; + MemoryContext oldctx; + + StartTransactionCommand(); + + rel = table_open(DatabaseRelationId, AccessShareLock); + + /* + * Before we do this, wait for all pending transactions to finish. This + * will ensure there are no concurrently running CREATE DATABASE, which + * could cause us to miss the creation of a database that was copied + * without checksums. + */ + WaitForAllTransactionsToFinish(); + + scan = table_beginscan_catalog(rel, 0, NULL); + + while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection))) + { + Form_pg_database pgdb = (Form_pg_database) GETSTRUCT(tup); + ChecksumHelperDatabase *db; + + oldctx = MemoryContextSwitchTo(ctx); + + db = (ChecksumHelperDatabase *) palloc(sizeof(ChecksumHelperDatabase)); + + db->dboid = pgdb->oid; + db->dbname = pstrdup(NameStr(pgdb->datname)); + + DatabaseList = lappend(DatabaseList, db); + + MemoryContextSwitchTo(oldctx); + } + + table_endscan(scan); + table_close(rel, AccessShareLock); + + CommitTransactionCommand(); + + return DatabaseList; +} + +/* + * BuildRelationList + * Compile a list of all relations in the database + * + * If shared is true, both shared relations and local ones are returned, else + * all non-shared relations are returned. + * Temp tables are not included. + */ +static List * +BuildRelationList(bool include_shared) +{ + List *RelationList = NIL; + Relation rel; + TableScanDesc scan; + HeapTuple tup; + MemoryContext ctx = CurrentMemoryContext; + MemoryContext oldctx; + + StartTransactionCommand(); + + rel = table_open(RelationRelationId, AccessShareLock); + scan = table_beginscan_catalog(rel, 0, NULL); + + while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection))) + { + Form_pg_class pgc = (Form_pg_class) GETSTRUCT(tup); + ChecksumHelperRelation *relentry; + + if (pgc->relpersistence == 't') + continue; + + if (pgc->relisshared && !include_shared) + continue; + + /* + * Only include relations types that have local storage + */ + if (pgc->relkind == RELKIND_VIEW || + pgc->relkind == RELKIND_COMPOSITE_TYPE || + pgc->relkind == RELKIND_FOREIGN_TABLE) + continue; + + oldctx = MemoryContextSwitchTo(ctx); + relentry = (ChecksumHelperRelation *) palloc(sizeof(ChecksumHelperRelation)); + + relentry->reloid = pgc->oid; + relentry->relkind = pgc->relkind; + + RelationList = lappend(RelationList, relentry); + + MemoryContextSwitchTo(oldctx); + } + + table_endscan(scan); + table_close(rel, AccessShareLock); + + CommitTransactionCommand(); + + return RelationList; +} + +/* + * BuildTempTableList + * Compile a list of all temporary tables in database + * + * Returns a List of oids. + */ +static List * +BuildTempTableList(void) +{ + List *RelationList = NIL; + Relation rel; + TableScanDesc scan; + HeapTuple tup; + MemoryContext ctx = CurrentMemoryContext; + MemoryContext oldctx; + + StartTransactionCommand(); + + rel = table_open(RelationRelationId, AccessShareLock); + scan = table_beginscan_catalog(rel, 0, NULL); + + while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection))) + { + Form_pg_class pgc = (Form_pg_class) GETSTRUCT(tup); + + if (pgc->relpersistence != 't') + continue; + + oldctx = MemoryContextSwitchTo(ctx); + RelationList = lappend_oid(RelationList, pgc->oid); + MemoryContextSwitchTo(oldctx); + } + + table_endscan(scan); + table_close(rel, AccessShareLock); + + CommitTransactionCommand(); + + return RelationList; +} + +/* + * Main function for enabling checksums in a single database + */ +void +ChecksumHelperWorkerMain(Datum arg) +{ + Oid dboid = DatumGetObjectId(arg); + List *RelationList = NIL; + List *InitialTempTableList = NIL; + ListCell *lc; + BufferAccessStrategy strategy; + bool aborted = false; + + pqsignal(SIGTERM, die); + + BackgroundWorkerUnblockSignals(); + + init_ps_display(pgstat_get_backend_desc(B_CHECKSUMHELPER_WORKER), "", "", ""); + + ereport(DEBUG1, + (errmsg("checksum worker starting for database oid %d", dboid))); + + BackgroundWorkerInitializeConnectionByOid(dboid, InvalidOid, BGWORKER_BYPASS_ALLOWCONN); + + /* + * Get a list of all temp tables present as we start in this database. + * We need to wait until they are all gone until we are done, since + * we cannot access those files and modify them. + */ + InitialTempTableList = BuildTempTableList(); + + /* + * Enable vacuum cost delay, if any. + */ + VacuumCostDelay = ChecksumHelperShmem->cost_delay; + VacuumCostLimit = ChecksumHelperShmem->cost_limit; + VacuumCostActive = (VacuumCostDelay > 0); + VacuumCostBalance = 0; + VacuumPageHit = 0; + VacuumPageMiss = 0; + VacuumPageDirty = 0; + + /* + * Create and set the vacuum strategy as our buffer strategy. + */ + strategy = GetAccessStrategy(BAS_VACUUM); + + RelationList = BuildRelationList(ChecksumHelperShmem->process_shared_catalogs); + foreach(lc, RelationList) + { + ChecksumHelperRelation *rel = (ChecksumHelperRelation *) lfirst(lc); + + if (!ProcessSingleRelationByOid(rel->reloid, strategy)) + { + aborted = true; + break; + } + } + list_free_deep(RelationList); + + if (aborted) + { + ChecksumHelperShmem->success = ABORTED; + ereport(DEBUG1, + (errmsg("checksum worker aborted in database oid %d", dboid))); + return; + } + + /* + * Wait for all temp tables that existed when we started to go away. This + * is necessary since we cannot "reach" them to enable checksums. + * Any temp tables created after we started will already have checksums + * in them (due to the inprogress state), so those are safe. + */ + while (true) + { + List *CurrentTempTables; + ListCell *lc; + int numleft; + char activity[64]; + + CurrentTempTables = BuildTempTableList(); + numleft = 0; + foreach(lc, InitialTempTableList) + { + if (list_member_oid(CurrentTempTables, lfirst_oid(lc))) + numleft++; + } + list_free(CurrentTempTables); + + if (numleft == 0) + break; + + /* At least one temp table left to wait for */ + snprintf(activity, sizeof(activity), "Waiting for %d temp tables to be removed", numleft); + pgstat_report_activity(STATE_RUNNING, activity); + + /* Retry every 5 seconds */ + ResetLatch(MyLatch); + (void) WaitLatch(MyLatch, + WL_LATCH_SET | WL_TIMEOUT, + 5000, + WAIT_EVENT_PG_SLEEP); + } + + list_free(InitialTempTableList); + + ChecksumHelperShmem->success = SUCCESSFUL; + ereport(DEBUG1, + (errmsg("checksum worker completed in database oid %d", dboid))); +} diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 819381a2ae..b2aadc1a40 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -4310,6 +4310,12 @@ pgstat_get_backend_desc(BackendType backendType) case B_WAL_WRITER: backendDesc = "walwriter"; break; + case B_CHECKSUMHELPER_LAUNCHER: + backendDesc = "checksumhelper launcher"; + break; + case B_CHECKSUMHELPER_WORKER: + backendDesc = "checksumhelper worker"; + break; } return backendDesc; diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index d0f210de8c..5e04c9bbe8 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -1398,7 +1398,7 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf _tarWriteHeader(tarfilename, NULL, statbuf, false); - if (!noverify_checksums && DataChecksumsEnabled()) + if (!noverify_checksums && DataChecksumsNeedVerify()) { char *filename; diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c index c53e7e2279..2f71e1d1f9 100644 --- a/src/backend/replication/logical/decode.c +++ b/src/backend/replication/logical/decode.c @@ -199,6 +199,7 @@ DecodeXLogOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) case XLOG_FPW_CHANGE: case XLOG_FPI_FOR_HINT: case XLOG_FPI: + case XLOG_CHECKSUMS: break; default: elog(ERROR, "unexpected RM_XLOG_ID record type: %u", info); diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index 885370698f..7f299f8e50 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -27,6 +27,7 @@ #include "postmaster/autovacuum.h" #include "postmaster/bgworker_internals.h" #include "postmaster/bgwriter.h" +#include "postmaster/checksumhelper.h" #include "postmaster/postmaster.h" #include "replication/logicallauncher.h" #include "replication/slot.h" @@ -255,6 +256,7 @@ CreateSharedMemoryAndSemaphores(void) WalSndShmemInit(); WalRcvShmemInit(); ApplyLauncherShmemInit(); + ChecksumHelperShmemInit(); /* * Set up other modules that need some shared memory space diff --git a/src/backend/storage/lmgr/lwlocknames.txt b/src/backend/storage/lmgr/lwlocknames.txt index db47843229..d50b4b13e1 100644 --- a/src/backend/storage/lmgr/lwlocknames.txt +++ b/src/backend/storage/lmgr/lwlocknames.txt @@ -49,3 +49,4 @@ MultiXactTruncationLock 41 OldSnapshotTimeMapLock 42 LogicalRepWorkerLock 43 CLogTruncationLock 44 +ChecksumHelperLock 45 diff --git a/src/backend/storage/page/README b/src/backend/storage/page/README index 5127d98da3..f873fb0eea 100644 --- a/src/backend/storage/page/README +++ b/src/backend/storage/page/README @@ -9,7 +9,8 @@ have a very low measured incidence according to research on large server farms, http://www.cs.toronto.edu/~bianca/papers/sigmetrics09.pdf, discussed 2010/12/22 on -hackers list. -Current implementation requires this be enabled system-wide at initdb time. +Checksums can be enabled at initdb time, but can also be turned on and off +using pg_enable_data_checksums()/pg_disable_data_checksums() at runtime. The checksum is not valid at all times on a data page!! The checksum is valid when the page leaves the shared pool and is checked diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c index 6b49810e37..6e3bfa045a 100644 --- a/src/backend/storage/page/bufpage.c +++ b/src/backend/storage/page/bufpage.c @@ -94,7 +94,7 @@ PageIsVerified(Page page, BlockNumber blkno) */ if (!PageIsNew(page)) { - if (DataChecksumsEnabled()) + if (DataChecksumsNeedVerify()) { checksum = pg_checksum_page((char *) page, blkno); @@ -1171,7 +1171,7 @@ PageSetChecksumCopy(Page page, BlockNumber blkno) static char *pageCopy = NULL; /* If we don't need a checksum, just return the passed-in data */ - if (PageIsNew(page) || !DataChecksumsEnabled()) + if (PageIsNew(page) || !DataChecksumsNeedWrite()) return (char *) page; /* @@ -1198,7 +1198,7 @@ void PageSetChecksumInplace(Page page, BlockNumber blkno) { /* If we don't need a checksum, just return */ - if (PageIsNew(page) || !DataChecksumsEnabled()) + if (PageIsNew(page) || !DataChecksumsNeedWrite()) return; ((PageHeader) page)->pd_checksum = pg_checksum_page((char *) page, blkno); diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 05240bfd14..61e856deac 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1527,7 +1527,7 @@ pg_stat_get_db_checksum_failures(PG_FUNCTION_ARGS) int64 result; PgStat_StatDBEntry *dbentry; - if (!DataChecksumsEnabled()) + if (!DataChecksumsNeedWrite()) PG_RETURN_NULL(); if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL) @@ -1545,7 +1545,7 @@ pg_stat_get_db_checksum_last_failure(PG_FUNCTION_ARGS) TimestampTz result; PgStat_StatDBEntry *dbentry; - if (!DataChecksumsEnabled()) + if (!DataChecksumsNeedWrite()) PG_RETURN_NULL(); if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL) diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 2178e1cf5e..dc4402189d 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -33,6 +33,7 @@ #include "access/transam.h" #include "access/twophase.h" #include "access/xact.h" +#include "access/xlog.h" #include "access/xlog_internal.h" #include "catalog/namespace.h" #include "catalog/pg_authid.h" @@ -71,6 +72,7 @@ #include "replication/walreceiver.h" #include "replication/walsender.h" #include "storage/bufmgr.h" +#include "storage/checksum.h" #include "storage/dsm_impl.h" #include "storage/standby.h" #include "storage/fd.h" @@ -471,6 +473,16 @@ static struct config_enum_entry shared_memory_options[] = { }; /* + * Options for data_checksums enum. + */ +static const struct config_enum_entry data_checksum_options[] = { + {"on", DATA_CHECKSUMS_ON, true}, + {"off", DATA_CHECKSUMS_OFF, true}, + {"inprogress", DATA_CHECKSUMS_INPROGRESS, true}, + {NULL, 0, false} +}; + +/* * Options for enum values stored in other modules */ extern const struct config_enum_entry wal_level_options[]; @@ -572,7 +584,7 @@ static int max_identifier_length; static int block_size; static int segment_size; static int wal_block_size; -static bool data_checksums; +static int data_checksums_tmp; static bool integer_datetimes; static bool assert_enabled; static char *recovery_target_timeline_string; @@ -1824,17 +1836,6 @@ static struct config_bool ConfigureNamesBool[] = }, { - {"data_checksums", PGC_INTERNAL, PRESET_OPTIONS, - gettext_noop("Shows whether data checksums are turned on for this cluster."), - NULL, - GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE - }, - &data_checksums, - false, - NULL, NULL, NULL - }, - - { {"syslog_sequence_numbers", PGC_SIGHUP, LOGGING_WHERE, gettext_noop("Add sequence number to syslog messages to avoid duplicate suppression."), NULL @@ -4537,6 +4538,17 @@ static struct config_enum ConfigureNamesEnum[] = NULL, NULL, NULL }, + { + {"data_checksums", PGC_INTERNAL, PRESET_OPTIONS, + gettext_noop("Shows whether data checksums are turned on for this cluster."), + NULL, + GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE + }, + &data_checksums_tmp, + DATA_CHECKSUMS_OFF, data_checksum_options, + NULL, NULL, show_data_checksums + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, NULL, NULL, NULL, NULL diff --git a/src/bin/pg_upgrade/controldata.c b/src/bin/pg_upgrade/controldata.c index 38236415be..9e196cc2dd 100644 --- a/src/bin/pg_upgrade/controldata.c +++ b/src/bin/pg_upgrade/controldata.c @@ -658,6 +658,15 @@ check_control_data(ControlData *oldctrl, */ /* + * If checksums have been turned on in the old cluster, but the + * checksumhelper have yet to finish, then disallow upgrading. The user + * should either let the process finish, or turn off checksums, before + * retrying. + */ + if (oldctrl->data_checksum_version == 2) + pg_fatal("checksum enabling in old cluster is in progress\n"); + + /* * We might eventually allow upgrades from checksum to no-checksum * clusters. */ diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index f724ecf9ca..66758bbd7f 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -220,7 +220,7 @@ typedef struct uint32 large_object; bool date_is_int; bool float8_pass_by_value; - bool data_checksum_version; + uint32 data_checksum_version; } ControlData; /* diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index d519252aad..ae02c09c84 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -189,7 +189,7 @@ extern PGDLLIMPORT int wal_level; * of the bits make it to disk, but the checksum wouldn't match. Also WAL-log * them if forced by wal_log_hints=on. */ -#define XLogHintBitIsNeeded() (DataChecksumsEnabled() || wal_log_hints) +#define XLogHintBitIsNeeded() (DataChecksumsNeedWrite() || wal_log_hints) /* Do we need to WAL-log information required only for Hot Standby and logical replication? */ #define XLogStandbyInfoActive() (wal_level >= WAL_LEVEL_REPLICA) @@ -293,7 +293,13 @@ extern char *XLogFileNameP(TimeLineID tli, XLogSegNo segno); extern void UpdateControlFile(void); extern uint64 GetSystemIdentifier(void); extern char *GetMockAuthenticationNonce(void); -extern bool DataChecksumsEnabled(void); +extern bool DataChecksumsNeedWrite(void); +extern bool DataChecksumsNeedVerify(void); +extern bool DataChecksumsInProgress(void); +extern void SetDataChecksumsInProgress(void); +extern void SetDataChecksumsOn(void); +extern void SetDataChecksumsOff(void); +extern const char *show_data_checksums(void); extern XLogRecPtr GetFakeLSNForUnloggedRel(void); extern Size XLOGShmemSize(void); extern void XLOGShmemInit(void); diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index 3f0de6625d..d4e3a3eab2 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -25,6 +25,7 @@ #include "lib/stringinfo.h" #include "pgtime.h" #include "storage/block.h" +#include "storage/checksum.h" #include "storage/relfilenode.h" @@ -241,6 +242,12 @@ typedef struct xl_restore_point char rp_name[MAXFNAMELEN]; } xl_restore_point; +/* Information logged when checksum level is changed */ +typedef struct xl_checksum_state +{ + ChecksumType new_checksumtype; +} xl_checksum_state; + /* End of recovery mark, when we don't do an END_OF_RECOVERY checkpoint */ typedef struct xl_end_of_recovery { diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index ff98d9e91a..8177414854 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -76,6 +76,7 @@ typedef struct CheckPoint #define XLOG_END_OF_RECOVERY 0x90 #define XLOG_FPI_FOR_HINT 0xA0 #define XLOG_FPI 0xB0 +#define XLOG_CHECKSUMS 0xC0 /* diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 58ea5b982b..4fc08d19d6 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -10665,6 +10665,22 @@ proargnames => '{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,float4_pass_by_value,float8_pass_by_value,data_page_checksum_version}', prosrc => 'pg_control_init' }, +{ oid => '4142', + descr => 'disable data checksums', + proname => 'pg_disable_data_checksums', provolatile => 'v', prorettype => 'bool', + proparallel => 'r', + proargtypes => '', + prosrc => 'disable_data_checksums' }, + +{ oid => '4035', + descr => 'enable data checksums', + proname => 'pg_enable_data_checksums', provolatile => 'v', prorettype => 'void', + proparallel => 'r', + proargtypes => 'int4 int4', proallargtypes => '{int4,int4}', + proargmodes => '{i,i}', + proargnames => '{cost_delay,cost_limit}', + prosrc => 'enable_data_checksums' }, + # collation management functions { oid => '3445', descr => 'import collations from operating system', proname => 'pg_import_system_collations', procost => '100', diff --git a/src/include/pgstat.h b/src/include/pgstat.h index c997add881..346de83de9 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -727,7 +727,9 @@ typedef enum BackendType B_STARTUP, B_WAL_RECEIVER, B_WAL_SENDER, - B_WAL_WRITER + B_WAL_WRITER, + B_CHECKSUMHELPER_LAUNCHER, + B_CHECKSUMHELPER_WORKER } BackendType; diff --git a/src/include/postmaster/checksumhelper.h b/src/include/postmaster/checksumhelper.h new file mode 100644 index 0000000000..556f801668 --- /dev/null +++ b/src/include/postmaster/checksumhelper.h @@ -0,0 +1,31 @@ +/*------------------------------------------------------------------------- + * + * checksumhelper.h + * header file for checksum helper background worker + * + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/postmaster/checksumhelper.h + * + *------------------------------------------------------------------------- + */ +#ifndef CHECKSUMHELPER_H +#define CHECKSUMHELPER_H + +/* Shared memory */ +extern Size ChecksumHelperShmemSize(void); +extern void ChecksumHelperShmemInit(void); + +/* Start the background processes for enabling checksums */ +void StartChecksumHelperLauncher(int cost_delay, int cost_limit); + +/* Shutdown the background processes, if any */ +void ShutdownChecksumHelperIfRunning(void); + +/* Background worker entrypoints */ +void ChecksumHelperLauncherMain(Datum arg); +void ChecksumHelperWorkerMain(Datum arg); + +#endif /* CHECKSUMHELPER_H */ diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h index 4ef6d8ddd4..cf31f24b01 100644 --- a/src/include/storage/bufpage.h +++ b/src/include/storage/bufpage.h @@ -198,6 +198,7 @@ typedef PageHeaderData *PageHeader; */ #define PG_PAGE_LAYOUT_VERSION 4 #define PG_DATA_CHECKSUM_VERSION 1 +#define PG_DATA_CHECKSUM_INPROGRESS_VERSION 2 /* ---------------------------------------------------------------- * page support macros diff --git a/src/include/storage/checksum.h b/src/include/storage/checksum.h index 7ef32a3baa..2c414aa1e7 100644 --- a/src/include/storage/checksum.h +++ b/src/include/storage/checksum.h @@ -15,6 +15,13 @@ #include "storage/block.h" +typedef enum ChecksumType +{ + DATA_CHECKSUMS_OFF = 0, + DATA_CHECKSUMS_ON, + DATA_CHECKSUMS_INPROGRESS +} ChecksumType; + /* * Compute the checksum for a Postgres page. The page must be aligned on a * 4-byte boundary. diff --git a/src/test/Makefile b/src/test/Makefile index efb206aa75..6469ac94a4 100644 --- a/src/test/Makefile +++ b/src/test/Makefile @@ -12,7 +12,8 @@ subdir = src/test top_builddir = ../.. include $(top_builddir)/src/Makefile.global -SUBDIRS = perl regress isolation modules authentication recovery subscription +SUBDIRS = perl regress isolation modules authentication recovery subscription \ + checksum # Test suites that are not safe by default but can be run if selected # by the user via the whitespace-separated list in variable diff --git a/src/test/checksum/.gitignore b/src/test/checksum/.gitignore new file mode 100644 index 0000000000..871e943d50 --- /dev/null +++ b/src/test/checksum/.gitignore @@ -0,0 +1,2 @@ +# Generated by test suite +/tmp_check/ diff --git a/src/test/checksum/Makefile b/src/test/checksum/Makefile new file mode 100644 index 0000000000..22a3b64dd8 --- /dev/null +++ b/src/test/checksum/Makefile @@ -0,0 +1,24 @@ +#------------------------------------------------------------------------- +# +# Makefile for src/test/checksum +# +# Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group +# Portions Copyright (c) 1994, Regents of the University of California +# +# src/test/checksum/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/test/checksum +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +check: + $(prove_check) + +installcheck: + $(prove_installcheck) + +clean distclean maintainer-clean: + rm -rf tmp_check + diff --git a/src/test/checksum/README b/src/test/checksum/README new file mode 100644 index 0000000000..e3fbd2bdb5 --- /dev/null +++ b/src/test/checksum/README @@ -0,0 +1,22 @@ +src/test/checksum/README + +Regression tests for data checksums +=================================== + +This directory contains a test suite for enabling data checksums +in a running cluster with streaming replication. + +Running the tests +================= + + make check + +or + + make installcheck + +NOTE: This creates a temporary installation (in the case of "check"), +with multiple nodes, be they master or standby(s) for the purpose of +the tests. + +NOTE: This requires the --enable-tap-tests argument to configure. diff --git a/src/test/checksum/t/001_standby_checksum.pl b/src/test/checksum/t/001_standby_checksum.pl new file mode 100644 index 0000000000..891743fa6c --- /dev/null +++ b/src/test/checksum/t/001_standby_checksum.pl @@ -0,0 +1,104 @@ +# Test suite for testing enabling data checksums with streaming replication +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 10; + +my $MAX_TRIES = 30; + +# Initialize master node +my $node_master = get_new_node('master'); +$node_master->init(allows_streaming => 1); +$node_master->start; +my $backup_name = 'my_backup'; + +# Take backup +$node_master->backup($backup_name); + +# Create streaming standby linking to master +my $node_standby_1 = get_new_node('standby_1'); +$node_standby_1->init_from_backup($node_master, $backup_name, + has_streaming => 1); +$node_standby_1->start; + +# Create some content on master to have un-checksummed data in the cluster +$node_master->safe_psql('postgres', + "CREATE TABLE t AS SELECT generate_series(1,10000) AS a;"); + +# Wait for standbys to catch up +$node_master->wait_for_catchup($node_standby_1, 'replay', + $node_master->lsn('insert')); + +# Check that checksums are turned off +my $result = $node_master->safe_psql('postgres', + "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';"); +is($result, "off", 'ensure checksums are turned off on master'); + +$result = $node_standby_1->safe_psql('postgres', + "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';"); +is($result, "off", 'ensure checksums are turned off on standby_1'); + +# Enable checksums for the cluster +$node_master->safe_psql('postgres', "SELECT pg_enable_data_checksums();"); + +# Ensure that the master has switched to inprogress immediately +$result = $node_master->safe_psql('postgres', + "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';"); +is($result, "inprogress", 'ensure checksums are in progress on master'); + +# Wait for checksum enable to be replayed +$node_master->wait_for_catchup($node_standby_1, 'replay'); + +# Ensure that the standby has switched to inprogress or on +# Normally it would be "inprogress", but it is theoretically possible for the master +# to complete the checksum enabling *and* have the standby replay that record before +# we reach the check below. +$result = $node_standby_1->safe_psql('postgres', + "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';"); +cmp_ok($result, '~~', ["inprogress", "on"], 'ensure checksums are on or in progress on standby_1'); + +# Insert some more data which should be checksummed on INSERT +$node_master->safe_psql('postgres', + "INSERT INTO t VALUES (generate_series(1,10000));"); + +# Wait for checksums enabled on the master +for (my $i = 0; $i < $MAX_TRIES; $i++) +{ + $result = $node_master->safe_psql('postgres', + "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';"); + last if ($result eq 'on'); + sleep(1); +} +is ($result, "on", 'ensure checksums are enabled on master'); + +# Wait for checksums enabled on the standby +for (my $i = 0; $i < $MAX_TRIES; $i++) +{ + $result = $node_standby_1->safe_psql('postgres', + "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';"); + last if ($result eq 'on'); + sleep(1); +} +is ($result, "on", 'ensure checksums are enabled on standby'); + +$result = $node_master->safe_psql('postgres', "SELECT count(a) FROM t"); +is ($result, "20000", 'ensure we can safely read all data with checksums'); + +# Disable checksums and ensure it's propagated to standby and that we can +# still read all data +$node_master->safe_psql('postgres', "SELECT pg_disable_data_checksums();"); +$result = $node_master->safe_psql('postgres', + "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';"); +is($result, "off", 'ensure checksums are off on master'); + +# Wait for checksum disable to be replayed +$node_master->wait_for_catchup($node_standby_1, 'replay'); + +# Ensure that the standby has switched to off +$result = $node_standby_1->safe_psql('postgres', + "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';"); +is($result, "off", 'ensure checksums are off on standby_1'); + +$result = $node_master->safe_psql('postgres', "SELECT count(a) FROM t"); +is ($result, "20000", 'ensure we can safely read all data without checksums'); -- 2.11.0