Hi all, I’m new here so maybe someone else already has this in the works ?
Anyway, proposed change/patch : Add a new parameter : synchronous_standalone_master = on | off To control whether a master configured with synchronous_commit = on is allowed to stop waiting for standby WAL sync when all synchronous standby WAL senders are disconnected. Current behavior is that the master waits indefinitely until a synchronous standby becomes available or until synchronous_commit is disabled manually. This would still be the default, so synchronous_standalone_master defaults to off. Previously discussed here : http://archives.postgresql.org/pgsql-hackers/2010-10/msg01009.php I’m attaching a working patch against master/HEAD and I hope the spirit of christmas will make you see kindly on my attempt :) or something ... It works fine and I added some extra logging so that it would be possible to follow more easily from an admins point of view. It looks like this when starting the primary server with synchronous_standalone_master = on : $ ./postgres LOG: database system was shut down at 2011-12-25 20:27:13 CET <-- No standby is connected at startup LOG: not waiting for standby synchronization LOG: autovacuum launcher started LOG: database system is ready to accept connections <-- First sync standby connects here so switch to sync mode LOG: standby "tx0113" is now the synchronous standby with priority 1 LOG: waiting for standby synchronization <-- standby wal receiver on the standby is killed (SIGKILL) LOG: unexpected EOF on standby connection LOG: not waiting for standby synchronization <-- restart standby so that it connects again LOG: standby "tx0113" is now the synchronous standby with priority 1 LOG: waiting for standby synchronization <-- standby wal receiver is first stopped (SIGSTOP) to make sure we have outstanding waits in the primary, then killed (SIGKILL) LOG: could not receive data from client: Connection reset by peer LOG: unexpected EOF on standby connection LOG: not waiting for standby synchronization <-- client now finally receives commit ACK that was hanging due to the SIGSTOP:ed wal receiver on the standby node And so on ... any comments are welcome :) Thanks and cheers, /A
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 0cc3296..6367dcc 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -2182,6 +2182,24 @@ SET ENABLE_SEQSCAN TO OFF; </listitem> </varlistentry> + <varlistentry id="guc-synchronous-standalone-master" xreflabel="synchronous-standalone-master"> + <term><varname>synchronous_standalone_master</varname> (<type>boolean</type>)</term> + <indexterm> + <primary><varname>synchronous_standalone_master</> configuration parameter</primary> + </indexterm> + <listitem> + <para> + Specifies how the master behaves when <xref linkend="guc-synchronous-commit"> + is set to <literal>on</> and <xref linkend="guc-synchronous-standby-names"> is configured but no + appropriate standby servers are currently connected. If enabled, the master will + continue processing transactions alone. If disabled, all the transactions on the + master are blocked until a synchronous standby has appeared. + + The default is disabled. + </para> + </listitem> + </varlistentry> + </variablelist> </sect2> diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index e9ae1e8..706af88 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -353,6 +353,8 @@ CheckpointerMain(void) /* Do this once before starting the loop, then just at SIGHUP time. */ SyncRepUpdateSyncStandbysDefined(); + SyncRepUpdateSyncStandaloneAllowed(); + SyncRepCheckIfStandaloneMaster(); /* * Loop forever @@ -382,6 +384,7 @@ CheckpointerMain(void) ProcessConfigFile(PGC_SIGHUP); /* update global shmem state for sync rep */ SyncRepUpdateSyncStandbysDefined(); + SyncRepUpdateSyncStandaloneAllowed(); } if (checkpoint_requested) { @@ -658,6 +661,7 @@ CheckpointWriteDelay(int flags, double progress) ProcessConfigFile(PGC_SIGHUP); /* update global shmem state for sync rep */ SyncRepUpdateSyncStandbysDefined(); + SyncRepUpdateSyncStandaloneAllowed(); } AbsorbFsyncRequests(); diff --git a/src/backend/replication/syncrep.c b/src/backend/replication/syncrep.c index 95de6c7..fd3e782 100644 --- a/src/backend/replication/syncrep.c +++ b/src/backend/replication/syncrep.c @@ -59,6 +59,8 @@ /* User-settable parameters for sync rep */ char *SyncRepStandbyNames; +bool SyncRepStandaloneMasterAllowed; + #define SyncStandbysDefined() \ (SyncRepStandbyNames != NULL && SyncRepStandbyNames[0] != '\0') @@ -126,6 +128,20 @@ SyncRepWaitForLSN(XLogRecPtr XactCommitLSN) return; } + + /* + * Fast exit also if no synchronous standby servers are presently connected + * and if the primary server has been configured to continue on without them. + */ + if ( SyncRepStandaloneMasterAllowed ) + { + if ( ! SyncRepCheckIfStandaloneMaster() ) + { + LWLockRelease(SyncRepLock); + return; + } + } + /* * Set our waitLSN so WALSender will know when to wake us, and add * ourselves to the queue. @@ -326,6 +344,63 @@ SyncRepCleanupAtProcExit(void) } /* + * Check if the master should switch to standalone mode and stop trying + * to wait for standby synchronization because there are no standby servers currently + * connected. If there are servers connected, then switch back and start waiting for them. + * Must hold SyncRepLock. + */ +bool SyncRepCheckIfStandaloneMaster() +{ + bool standby_connected = false; + int i = 0; + + if (!SyncRepRequested() || !SyncStandbysDefined()) + return false; + + if ( ! WalSndCtl->sync_standalone_allowed ) + return false; + + for (i = 0; i < max_wal_senders && ! standby_connected; i++) + { + volatile WalSnd *walsnd = &WalSndCtl->walsnds[i]; + if ( walsnd->pid != 0 && walsnd->sync_standby_priority ) + { + standby_connected = true; + + if ( WalSndCtl->sync_standalone_master ) + { + ereport(LOG, + (errmsg("waiting for standby synchronization"), + errhidestmt(true))); + + WalSndCtl->sync_standalone_master = false; + } + } + } + + if ( ! standby_connected ) + { + if ( ! WalSndCtl->sync_standalone_master ) + { + ereport(LOG, + (errmsg("not waiting for standby synchronization"), + errhidestmt(true))); + + WalSndCtl->sync_standalone_master = true; + + /* + * We just switched mode and do not want to wait for standby sync anymore. + * Wake others who may be waiting at this point + */ + SyncRepWakeQueue(true); + } + } + + return standby_connected; + +} + +/* * =========================================================== * Synchronous Replication functions for wal sender processes * =========================================================== @@ -603,6 +678,25 @@ SyncRepUpdateSyncStandbysDefined(void) } } + +void +SyncRepUpdateSyncStandaloneAllowed(void) +{ + bool value = SyncRepStandaloneMasterAllowed; + + if ( SyncRepStandaloneMasterAllowed != WalSndCtl->sync_standalone_allowed ) + { + LWLockAcquire(SyncRepLock, LW_EXCLUSIVE); + + if ( SyncRepStandaloneMasterAllowed ) + SyncRepWakeQueue(true); + + WalSndCtl->sync_standalone_allowed = SyncRepStandaloneMasterAllowed; + + LWLockRelease(SyncRepLock); + } +} + #ifdef USE_ASSERT_CHECKING static bool SyncRepQueueIsOrderedByLSN(void) diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index ea86520..ddfaa09 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -932,11 +932,23 @@ InitWalSnd(void) } } if (MyWalSnd == NULL) + { ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("number of requested standby connections " "exceeds max_wal_senders (currently %d)", max_wal_senders))); + } + else + { + /* + * A standby just connected, check if the master should + * switch from standalone to synchronous mode. + */ + LWLockAcquire(SyncRepLock, LW_EXCLUSIVE); + SyncRepCheckIfStandaloneMaster(); + LWLockRelease(SyncRepLock); + } /* Arrange to clean up at walsender exit */ on_shmem_exit(WalSndKill, 0); @@ -955,6 +967,13 @@ WalSndKill(int code, Datum arg) MyWalSnd->pid = 0; DisownLatch(&MyWalSnd->latch); + /* + * Check if this was the last standby + */ + LWLockAcquire(SyncRepLock, LW_EXCLUSIVE); + SyncRepCheckIfStandaloneMaster(); + LWLockRelease(SyncRepLock); + /* WalSnd struct isn't mine anymore */ MyWalSnd = NULL; } diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index da7b6d4..f26ee7a 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1375,6 +1375,16 @@ static struct config_bool ConfigureNamesBool[] = }, { + {"synchronous_standalone_master", PGC_SIGHUP, REPLICATION_MASTER, + gettext_noop("Specifies whether we allow the master to process transactions alone when there is no connected standby."), + NULL + }, + &SyncRepStandaloneMasterAllowed, + false, + NULL, NULL, NULL + }, + + { {"hot_standby", PGC_POSTMASTER, REPLICATION_STANDBY, gettext_noop("Allows connections and queries during recovery."), NULL diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 315db46..812bdf0 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -215,6 +215,11 @@ # from standby(s); '*' = all #vacuum_defer_cleanup_age = 0 # number of xacts by which cleanup is delayed +#synchronous_standalone_master = off # Whether the master can continue processing + # commits when no sync standbys are connected + # or if it has to wait until one connects. + + # - Standby Servers - # These settings are ignored on a master server diff --git a/src/include/replication/syncrep.h b/src/include/replication/syncrep.h index 65b725f..0699e73 100644 --- a/src/include/replication/syncrep.h +++ b/src/include/replication/syncrep.h @@ -23,6 +23,8 @@ /* user-settable parameters for synchronous replication */ extern char *SyncRepStandbyNames; +extern bool SyncRepStandaloneMasterAllowed; + /* called by user backend */ extern void SyncRepWaitForLSN(XLogRecPtr XactCommitLSN); @@ -35,6 +37,9 @@ extern void SyncRepReleaseWaiters(void); /* called by wal writer */ extern void SyncRepUpdateSyncStandbysDefined(void); +extern void SyncRepUpdateSyncStandaloneAllowed(void); + +extern bool SyncRepCheckIfStandaloneMaster(void); /* called by various procs */ extern int SyncRepWakeQueue(bool all); diff --git a/src/include/replication/walsender_private.h b/src/include/replication/walsender_private.h index be7a341..d1dc606 100644 --- a/src/include/replication/walsender_private.h +++ b/src/include/replication/walsender_private.h @@ -85,6 +85,18 @@ typedef struct */ bool sync_standbys_defined; + /* + * Whether the synchronous master is allowed to switch to standalone mode + * when there are not standby servers connected. + */ + bool sync_standalone_allowed; + + /* + * Whether the synchronous master is currently running in standalone mode + * because there are no WAL senders connected. + */ + bool sync_standalone_master; + WalSnd walsnds[1]; /* VARIABLE LENGTH ARRAY */ } WalSndCtlData;
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers