From 6b682ec934728a97b6458709b4b161772d5cfa65 Mon Sep 17 00:00:00 2001
From: Maxim Orlov <orlovmg@gmail.com>
Date: Wed, 14 Dec 2022 15:24:21 +0300
Subject: [PATCH v51 3/3] =?UTF-8?q?Make=20pg=5Fupgrade=20from=2032-bit=20t?=
 =?UTF-8?q?o=2064=E2=80=93bit=20SLRU?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Author: Alexander Korotkov <aekorotkov@gmail.com>
Author: Teodor Sigaev <teodor@sigaev.ru>
Author: Nikita Glukhov <n.gluhov@postgrespro.ru>
Author: Maxim Orlov <orlovmg@gmail.com>
Author: Pavel Borisov <pashkin.elfe@gmail.com>
Author: Yura Sokolov <y.sokolov@postgrespro.ru> <funny.falcon@gmail.com>
Author: Aleksander Alekseev <aleksander@timescale.com>
Reviewed-by: Aleksander Alekseev <aleksander@timescale.com>
Discussion: https://postgr.es/m/CACG%3DezZe1NQSCnfHOr78AtAZxJZeCvxrts0ygrxYwe%3DpyyjVWA%40mail.gmail.com
Discussion: https://postgr.es/m/CAJ7c6TPDOYBYrnCAeyndkBktO0WG2xSdYduTF0nxq%2BvfkmTF5Q%40mail.gmail.com
---
 src/bin/pg_upgrade/pg_upgrade.c | 292 +++++++++++++++++++++++++++++++-
 src/bin/pg_upgrade/pg_upgrade.h |   5 +
 2 files changed, 293 insertions(+), 4 deletions(-)

diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index 115faa222e..0cedc05416 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -44,6 +44,7 @@
 #include <langinfo.h>
 #endif
 
+#include "access/transam.h"
 #include "catalog/pg_class_d.h"
 #include "common/file_perm.h"
 #include "common/logging.h"
@@ -566,6 +567,277 @@ copy_subdir_files(const char *old_subdir, const char *new_subdir)
 	check_ok();
 }
 
+/******************************************************************************/
+#define SLRU_PAGES_PER_SEGMENT		32 /* Should be equal to value from slru.h */
+
+#define CLOG_BITS_PER_XACT		2
+#define CLOG_XACTS_PER_BYTE		4
+#define CLOG_XACTS_PER_PAGE		(BLCKSZ * CLOG_XACTS_PER_BYTE)
+/*
+ * Rename the files from the old cluster into it
+ */
+typedef struct SLRUSegmentState
+{
+	const char	   *dir;
+	FILE		   *file;
+	int64			segno;
+	int64			pageno;
+	bool			is_empty_segment;
+} SLRUSegmentState;
+
+static char *
+slru_filename_old(const char *path, int64 segno)
+{
+	Assert(segno <= PG_INT32_MAX);
+	return psprintf("%s/%04X", path, (int) segno);
+}
+
+static char *
+slru_filename_new(const char *path, int64 segno)
+{
+	return psprintf("%s/%012llX", path, (long long) segno);
+}
+
+static inline FILE *
+open_file(SLRUSegmentState *state,
+		  char * (filename_fn)(const char *path, int64 segno),
+		  char *mode, char *fatal_msg)
+{
+	char	*filename = filename_fn(state->dir, state->segno);
+	FILE	*fd = fopen(filename, mode);
+
+	if (!fd)
+		pg_fatal(fatal_msg, filename);
+
+	pfree(filename);
+
+	return fd;
+}
+
+static void
+close_file(SLRUSegmentState *state,
+		   char * (filename_fn)(const char *path, int64 segno))
+{
+	if (state->file != NULL)
+	{
+		if (fclose(state->file) != 0)
+			pg_fatal("could not close file \"%s\": %m",
+					 filename_fn(state->dir, state->segno));
+		state->file = NULL;
+	}
+}
+
+static inline int
+read_file(SLRUSegmentState *state, void *buf)
+{
+	size_t		n = fread(buf, sizeof(char), BLCKSZ, state->file);
+
+	if (n != 0)
+		return n;
+
+	if (ferror(state->file))
+		pg_fatal("could not read file \"%s\": %m",
+				 slru_filename_old(state->dir, state->segno));
+
+	if (!feof(state->file))
+		pg_fatal("unknown file read state \"%s\": %m",
+				 slru_filename_old(state->dir, state->segno));
+
+	close_file(state, slru_filename_old);
+
+	return 0;
+}
+
+static int
+read_old_segment_page(SLRUSegmentState *state, void *buf, bool *is_empty)
+{
+	int		n;
+
+	/* Open next segment file, if needed */
+	if (!state->file)
+	{
+		state->file = open_file(state, slru_filename_old, "rb",
+								"could not open source file \"%s\": %m");
+
+		/* Set position to the needed page */
+		if (fseek(state->file, state->pageno * BLCKSZ, SEEK_SET))
+			close_file(state, slru_filename_old);
+
+		/*
+		 * Skip segment conversion if segment file doesn't exist.
+		 * First segment file should exist in any case.
+		 */
+		if (state->segno != 0)
+			state->is_empty_segment = true;
+	}
+
+	if (state->file)
+	{
+		/* Segment file does exist, read page from it */
+		state->is_empty_segment = false;
+
+		/* Try to read BLCKSZ bytes */
+		n = read_file(state, buf);
+		*is_empty = (n == 0);
+
+		/* Zeroing buf tail if needed */
+		if (n)
+			memset((char *) buf + n, 0, BLCKSZ - n);
+	}
+	else
+	{
+		n = state->is_empty_segment ?
+				BLCKSZ :	/* Skip empty block at the end of segment */
+				0;			/* We reached the last segment */
+		*is_empty = true;
+
+		if (n)
+			memset((char *) buf, 0, BLCKSZ);
+	}
+
+	state->pageno++;
+
+	if (state->pageno >= SLRU_PAGES_PER_SEGMENT)
+	{
+		/* Start new segment */
+		state->segno++;
+		state->pageno = 0;
+		close_file(state, slru_filename_old);
+	}
+
+	return n;
+}
+
+static void
+write_new_segment_page(SLRUSegmentState *state, void *buf, bool is_empty)
+{
+	/*
+	 * Create a new segment file if we still didn't.  Creation is postponed
+	 * until the first non-empty page is found.  This helps not to create
+	 * completely empty segments.
+	 */
+	if (!state->file && !is_empty)
+	{
+		state->file = open_file(state, slru_filename_new, "wb",
+								"could not open target file \"%s\": %m");
+
+		/* Write zeroes to the previously skipped prefix */
+		if (state->pageno > 0)
+		{
+			char	zerobuf[BLCKSZ] = {0};
+
+			for (int64 i = 0; i < state->pageno; i++)
+			{
+				if (fwrite(zerobuf, sizeof(char), BLCKSZ, state->file) != BLCKSZ)
+					pg_fatal("could not write file \"%s\": %m",
+							 slru_filename_new(state->dir, state->segno));
+			}
+		}
+
+	}
+
+	/* Write page to the new segment (if it was created) */
+	if (state->file)
+	{
+		if (fwrite(buf, sizeof(char), BLCKSZ, state->file) != BLCKSZ)
+			pg_fatal("could not write file \"%s\": %m",
+					 slru_filename_new(state->dir, state->segno));
+	}
+
+	state->pageno++;
+
+	/*
+	 * Did we reach the maximum page number? Then close segment file and
+	 * create a new one on the next iteration
+	 */
+	if (state->pageno >= SLRU_PAGES_PER_SEGMENT)
+	{
+		state->segno++;
+		state->pageno = 0;
+		close_file(state, slru_filename_new);
+	}
+}
+
+static void
+convert_pg_xact_segments(const char *old_subdir, const char *new_subdir)
+{
+	SLRUSegmentState	oldseg = {0};
+	SLRUSegmentState	newseg = {0};
+	char				buf[BLCKSZ] = {0};
+	FullTransactionId	oldestxid;
+	FullTransactionId	nxtxid;
+	uint32				epoch;
+	int64				pageno;
+	uint64				xid;
+
+	oldseg.dir = old_subdir;
+	newseg.dir = new_subdir;
+
+	/* wraparound without epoch is not possible */
+	if (old_cluster.controldata.chkpnt_nxtepoch == 0 &&
+		old_cluster.controldata.chkpnt_oldstxid > old_cluster.controldata.chkpnt_nxtxid)
+	{
+		pg_fatal("inconsistent pg_xact of directory \"%s\"",
+				 old_cluster.pgdata);
+	}
+
+	/* get full transactions bounds from old cluster */
+	epoch = old_cluster.controldata.chkpnt_nxtepoch;
+	nxtxid = FullTransactionIdFromEpochAndXid(epoch,
+											  old_cluster.controldata.chkpnt_nxtxid);
+	if (old_cluster.controldata.chkpnt_oldstxid > XidFromFullTransactionId(nxtxid))
+		--epoch;
+
+	oldestxid = FullTransactionIdFromEpochAndXid(epoch,
+												 old_cluster.controldata.chkpnt_oldstxid);
+
+	/* get init segments and pages */
+	pageno = oldestxid.value / CLOG_XACTS_PER_PAGE;
+
+	oldseg.segno = pageno / SLRU_PAGES_PER_SEGMENT;
+	oldseg.pageno = pageno % SLRU_PAGES_PER_SEGMENT;
+
+	newseg.segno = pageno / SLRU_PAGES_PER_SEGMENT;
+	newseg.pageno = pageno % SLRU_PAGES_PER_SEGMENT;
+
+	/* Copy xid flags reading only needed segment pages */
+	for (xid = oldestxid.value & ~(CLOG_XACTS_PER_PAGE - 1);
+		 xid <= ((nxtxid.value - 1) & ~(CLOG_XACTS_PER_PAGE - 1));
+		 xid += CLOG_XACTS_PER_PAGE)
+	{
+		bool	is_empty;
+		int		len;
+
+		/* Handle possible segment wraparound */
+		if (oldseg.segno > MaxTransactionId / CLOG_XACTS_PER_PAGE / SLRU_PAGES_PER_SEGMENT)
+		{
+			Assert(!oldseg.pageno);
+			Assert(!oldseg.file);
+			Assert(!newseg.pageno);
+			Assert(!newseg.file);
+
+			oldseg.segno = 0;
+		}
+
+		len = read_old_segment_page(&oldseg, buf, &is_empty);
+
+		/*
+		 * Ignore read errors, copy all existing segment pages in the
+		 * interesting xid range.
+		 */
+		is_empty |= len <= 0;
+
+		Assert(len >= 0);
+		Assert(is_empty == false);
+
+		write_new_segment_page(&newseg, buf, is_empty);
+	}
+
+	/* Release resources */
+	close_file(&oldseg, slru_filename_old);
+	close_file(&newseg, slru_filename_new);
+}
+
 static void
 copy_xact_xlog_xid(void)
 {
@@ -573,10 +845,22 @@ copy_xact_xlog_xid(void)
 	 * Copy old commit logs to new data dir. pg_clog has been renamed to
 	 * pg_xact in post-10 clusters.
 	 */
-	copy_subdir_files(GET_MAJOR_VERSION(old_cluster.major_version) <= 906 ?
-					  "pg_clog" : "pg_xact",
-					  GET_MAJOR_VERSION(new_cluster.major_version) <= 906 ?
-					  "pg_clog" : "pg_xact");
+	if (old_cluster.controldata.cat_ver < SLRU_FORMAT_CHANGE_CAT_VER)
+	{
+#define GetClogDirName(cluster) \
+	GET_MAJOR_VERSION(cluster.major_version) <= 906 ? "pg_clog" : "pg_xact"
+		char *old_path = psprintf("%s/%s", old_cluster.pgdata, GetClogDirName(old_cluster));
+		char *new_path = psprintf("%s/%s", new_cluster.pgdata, GetClogDirName(new_cluster));
+
+		convert_pg_xact_segments(old_path, new_path);
+		pfree(old_path);
+		pfree(new_path);
+	}
+	else
+		copy_subdir_files(GET_MAJOR_VERSION(old_cluster.major_version) <= 906 ?
+						  "pg_clog" : "pg_xact",
+						  GET_MAJOR_VERSION(new_cluster.major_version) <= 906 ?
+						  "pg_clog" : "pg_xact");
 
 	prep_status("Setting oldest XID for new cluster");
 	exec_prog(UTILITY_LOG_FILE, NULL, true, true,
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 31589b0fdc..0cfafc8810 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -126,6 +126,11 @@ extern char *output_files[];
  */
 #define JSONB_FORMAT_CHANGE_CAT_VER 201409291
 
+/*
+ * change in SLRU numbering to 64–bit
+ */
+#define SLRU_FORMAT_CHANGE_CAT_VER 202212092
+
 
 /*
  * Each relation is represented by a relinfo structure.
-- 
2.38.1

