On Wed, Dec 8, 2021 at 1:05 PM Kyotaro Horiguchi
<horikyota....@gmail.com> wrote:
>
> At Wed, 8 Dec 2021 11:47:30 +0530, Bharath Rupireddy 
> <bharath.rupireddyforpostg...@gmail.com> wrote in
> > On Wed, Dec 8, 2021 at 10:59 AM Bossart, Nathan <bossa...@amazon.com> wrote:
> > > >> Another option we might want to consider is to just skip updating the
> > > >> state entirely for end-of-recovery checkpoints.  The state would
> > > >> instead go straight from DB_IN_CRASH_RECOVERY to DB_IN_PRODUCTION.  I
> > > >> don't know if it's crucial to have a dedicated control file state for
> > > >> end-of-recovery checkpoints.
>
> FWIW I find it simple but sufficient since I regarded the
> end-of-recovery checkpoint as a part of recovery.  In that case what
> is strange here is only that the state transition passes the
> DB_SHUTDOWN(ING/ED) states.
>
> On the other hand, when a server is going to shutdown, the state stays
> at DB_IN_PRODUCTION if there are clinging clients even if the shutdown
> procedure has been already started and no new clients can connect to
> the server. There's no reason we need to be so particular about states
> for recovery-end.
>
> I see it a bit too complex for the advantage.  When end-of-recovery
> checkpoint takes so long, that state is shown in server log, which
> operators would look into before the control file.

Thanks for your thoughts. I'm fine either way, hence attaching two
patches here with and I will leave it for the committer 's choice.
1) v1-0001-Add-DB_IN_END_OF_RECOVERY_CHECKPOINT-state-for-co.patch --
adds new db state DB_IN_END_OF_RECOVERY_CHECKPOINT for control file.
2) v1-0001-Skip-control-file-db-state-updation-during-end-of.patch --
just skips setting db state to DB_SHUTDOWNING and DB_SHUTDOWNED in
case of end-of-recovery checkpoint so that the state will be
DB_IN_CRASH_RECOVERY which then changes to DB_IN_PRODUCTION.

Regards,
Bharath Rupireddy.
From 8cc1dfec87a53d3b423712380c934243e93009ac Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Date: Tue, 7 Dec 2021 07:53:53 +0000
Subject: [PATCH v1] Add DB_IN_END_OF_RECOVERY_CHECKPOINT state for control
 file

While the database is performing end-of-recovery checkpoint, the
control file gets updated with db state as "shutting down" in
CreateCheckPoint and at the end it sets it back to "shut down" for
a brief moment and then finally to "in production". If the
end-of-recovery checkpoint takes a lot of time or the db goes down
during the end-of-recovery checkpoint for whatever reasons, the
control file ends up having the wrong db state.

This patch adds a new db state to control file, called
DB_IN_END_OF_RECOVERY_CHECKPOINT or "in end-of-recovery checkpoint"
to represent the correct state of the database server.
---
 src/backend/access/transam/xlog.c       | 13 +++++++++++--
 src/bin/pg_controldata/pg_controldata.c |  2 ++
 src/include/catalog/pg_control.h        |  3 ++-
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index d894af310a..a31a8dda35 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -6747,6 +6747,12 @@ StartupXLOG(void)
 							 " and you might need to choose an earlier recovery target.")));
 			break;
 
+		case DB_IN_END_OF_RECOVERY_CHECKPOINT:
+			ereport(LOG,
+					(errmsg("database system was interrupted while in end-of-recovery checkpoint at %s",
+							str_time(ControlFile->time))));
+			break;
+
 		case DB_IN_PRODUCTION:
 			ereport(LOG,
 					(errmsg("database system was interrupted; last known up at %s",
@@ -9140,7 +9146,10 @@ CreateCheckPoint(int flags)
 	if (shutdown)
 	{
 		LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
-		ControlFile->state = DB_SHUTDOWNING;
+		if (flags & CHECKPOINT_END_OF_RECOVERY)
+			ControlFile->state = DB_IN_END_OF_RECOVERY_CHECKPOINT;
+		else
+			ControlFile->state = DB_SHUTDOWNING;
 		UpdateControlFile();
 		LWLockRelease(ControlFileLock);
 	}
@@ -9406,7 +9415,7 @@ CreateCheckPoint(int flags)
 	 * Update the control file.
 	 */
 	LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
-	if (shutdown)
+	if (flags & CHECKPOINT_IS_SHUTDOWN)
 		ControlFile->state = DB_SHUTDOWNED;
 	ControlFile->checkPoint = ProcLastRecPtr;
 	ControlFile->checkPointCopy = checkPoint;
diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c
index f911f98d94..e47b90143e 100644
--- a/src/bin/pg_controldata/pg_controldata.c
+++ b/src/bin/pg_controldata/pg_controldata.c
@@ -63,6 +63,8 @@ dbState(DBState state)
 			return _("in crash recovery");
 		case DB_IN_ARCHIVE_RECOVERY:
 			return _("in archive recovery");
+		case DB_IN_END_OF_RECOVERY_CHECKPOINT:
+			return _("in end-of-recovery checkpoint");
 		case DB_IN_PRODUCTION:
 			return _("in production");
 	}
diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h
index 749bce0cc6..7ea0fdb273 100644
--- a/src/include/catalog/pg_control.h
+++ b/src/include/catalog/pg_control.h
@@ -22,7 +22,7 @@
 
 
 /* Version identifier for this pg_control format */
-#define PG_CONTROL_VERSION	1300
+#define PG_CONTROL_VERSION	1500
 
 /* Nonce key length, see below */
 #define MOCK_AUTH_NONCE_LEN		32
@@ -92,6 +92,7 @@ typedef enum DBState
 	DB_SHUTDOWNING,
 	DB_IN_CRASH_RECOVERY,
 	DB_IN_ARCHIVE_RECOVERY,
+	DB_IN_END_OF_RECOVERY_CHECKPOINT,
 	DB_IN_PRODUCTION
 } DBState;
 
-- 
2.25.1

From 87597fd8bac041897626a9806000b042ccc7b68a Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Date: Wed, 8 Dec 2021 09:09:56 +0000
Subject: [PATCH v1] Skip control file db state updation during end-of-recovery
 checkpoint

While the database is performing end-of-recovery checkpoint,
the control file gets updated with db state as "shutting down"
in CreateCheckPoint and at the end it sets it back to "shut down"
for a brief moment and then finally to "in production".

Intead, let the db state be in DB_IN_CRASH_RECOVERY which then
changes to DB_IN_PRODUCTION.
---
 src/backend/access/transam/xlog.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index d894af310a..3f83b5e3f2 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -9137,7 +9137,7 @@ CreateCheckPoint(int flags)
 	 */
 	START_CRIT_SECTION();
 
-	if (shutdown)
+	if (flags & CHECKPOINT_IS_SHUTDOWN)
 	{
 		LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
 		ControlFile->state = DB_SHUTDOWNING;
@@ -9406,7 +9406,7 @@ CreateCheckPoint(int flags)
 	 * Update the control file.
 	 */
 	LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
-	if (shutdown)
+	if (flags & CHECKPOINT_IS_SHUTDOWN)
 		ControlFile->state = DB_SHUTDOWNED;
 	ControlFile->checkPoint = ProcLastRecPtr;
 	ControlFile->checkPointCopy = checkPoint;
-- 
2.25.1

Reply via email to