On Monday, September 18, 2023 11:54:42 PM CEST Nathan Bossart wrote:
> On Mon, Sep 18, 2023 at 02:52:47PM -0700, Nathan Bossart wrote:
> > On Thu, Jul 27, 2023 at 10:51:11AM +0200, Pierre Ducroquet wrote:
> >> I ended up writing several patches that shaved some time for pg_restore
> >> -l,
> >> and reduced the toc.dat size.
> > 
> > I've only just started taking a look at these patches, and I intend to do
> > a
> > more thorough review in the hopefully-not-too-distant future.
> 
> Since cfbot is failing on some pg_upgrade and pg_dump tests, I've set this
> to waiting-on-author.

Attached updated patches fix this regression, I'm sorry I missed that.

>From bafc4a3775d732895aa919de21e6f512cc726f49 Mon Sep 17 00:00:00 2001
From: Pierre Ducroquet <p.p...@pinaraf.info>
Date: Wed, 26 Jul 2023 21:31:33 +0200
Subject: [PATCH 1/4] drop has oids field instead of having static values

---
 src/bin/pg_dump/pg_backup_archiver.c | 3 +--
 src/bin/pg_dump/pg_backup_archiver.h | 3 ++-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 4d83381d84..f4c782d63d 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -2510,7 +2510,6 @@ WriteToc(ArchiveHandle *AH)
 		WriteStr(AH, te->tablespace);
 		WriteStr(AH, te->tableam);
 		WriteStr(AH, te->owner);
-		WriteStr(AH, "false");
 
 		/* Dump list of dependencies */
 		for (i = 0; i < te->nDeps; i++)
@@ -2618,7 +2617,7 @@ ReadToc(ArchiveHandle *AH)
 		is_supported = true;
 		if (AH->version < K_VERS_1_9)
 			is_supported = false;
-		else
+		else if (AH->version < K_VERS_1_16)
 		{
 			tmp = ReadStr(AH);
 
diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h
index b07673933d..b78e326f73 100644
--- a/src/bin/pg_dump/pg_backup_archiver.h
+++ b/src/bin/pg_dump/pg_backup_archiver.h
@@ -68,10 +68,11 @@
 #define K_VERS_1_15 MAKE_ARCHIVE_VERSION(1, 15, 0)	/* add
 													 * compression_algorithm
 													 * in header */
+#define K_VERS_1_16 MAKE_ARCHIVE_VERSION(1, 16, 0)	/* optimize dump format */
 
 /* Current archive version number (the format we can output) */
 #define K_VERS_MAJOR 1
-#define K_VERS_MINOR 15
+#define K_VERS_MINOR 16
 #define K_VERS_REV 0
 #define K_VERS_SELF MAKE_ARCHIVE_VERSION(K_VERS_MAJOR, K_VERS_MINOR, K_VERS_REV)
 
-- 
2.42.0

>From e8dbb51713daefa1596ce63ea6990d2fd1c4e27f Mon Sep 17 00:00:00 2001
From: Pierre Ducroquet <p.p...@pinaraf.info>
Date: Wed, 26 Jul 2023 22:23:28 +0200
Subject: [PATCH 2/4] convert sscanf to strtoul

---
 src/bin/pg_dump/pg_backup_archiver.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index f4c782d63d..f9efb2badf 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -2556,13 +2556,13 @@ ReadToc(ArchiveHandle *AH)
 		if (AH->version >= K_VERS_1_8)
 		{
 			tmp = ReadStr(AH);
-			sscanf(tmp, "%u", &te->catalogId.tableoid);
+			te->catalogId.tableoid = strtoul(tmp, NULL, 10);
 			free(tmp);
 		}
 		else
 			te->catalogId.tableoid = InvalidOid;
 		tmp = ReadStr(AH);
-		sscanf(tmp, "%u", &te->catalogId.oid);
+		te->catalogId.oid = strtoul(tmp, NULL, 10);
 		free(tmp);
 
 		te->tag = ReadStr(AH);
@@ -2646,7 +2646,7 @@ ReadToc(ArchiveHandle *AH)
 					depSize *= 2;
 					deps = (DumpId *) pg_realloc(deps, sizeof(DumpId) * depSize);
 				}
-				sscanf(tmp, "%d", &deps[depIdx]);
+				deps[depIdx] = strtoul(tmp, NULL, 10);
 				free(tmp);
 				depIdx++;
 			}
-- 
2.42.0

>From eb02760df97ab8e95287662a88ac3439fbc0a4fa Mon Sep 17 00:00:00 2001
From: Pierre Ducroquet <p.p...@pinaraf.info>
Date: Thu, 27 Jul 2023 10:04:51 +0200
Subject: [PATCH 3/4] store oids as integer instead of string

---
 src/bin/pg_dump/pg_backup_archiver.c | 63 ++++++++++++++++++----------
 1 file changed, 42 insertions(+), 21 deletions(-)

diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index f9efb2badf..feacc22701 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -2470,7 +2470,6 @@ void
 WriteToc(ArchiveHandle *AH)
 {
 	TocEntry   *te;
-	char		workbuf[32];
 	int			tocCount;
 	int			i;
 
@@ -2494,11 +2493,8 @@ WriteToc(ArchiveHandle *AH)
 		WriteInt(AH, te->dumpId);
 		WriteInt(AH, te->dataDumper ? 1 : 0);
 
-		/* OID is recorded as a string for historical reasons */
-		sprintf(workbuf, "%u", te->catalogId.tableoid);
-		WriteStr(AH, workbuf);
-		sprintf(workbuf, "%u", te->catalogId.oid);
-		WriteStr(AH, workbuf);
+		WriteInt(AH, te->catalogId.tableoid);
+		WriteInt(AH, te->catalogId.oid);
 
 		WriteStr(AH, te->tag);
 		WriteStr(AH, te->desc);
@@ -2514,10 +2510,9 @@ WriteToc(ArchiveHandle *AH)
 		/* Dump list of dependencies */
 		for (i = 0; i < te->nDeps; i++)
 		{
-			sprintf(workbuf, "%d", te->dependencies[i]);
-			WriteStr(AH, workbuf);
+			WriteInt(AH, te->dependencies[i]);
 		}
-		WriteStr(AH, NULL);		/* Terminate List */
+		WriteInt(AH, -1);		/* Terminate List */
 
 		if (AH->WriteExtraTocPtr)
 			AH->WriteExtraTocPtr(AH, te);
@@ -2529,6 +2524,7 @@ ReadToc(ArchiveHandle *AH)
 {
 	int			i;
 	char	   *tmp;
+	int			depId;
 	DumpId	   *deps;
 	int			depIdx;
 	int			depSize;
@@ -2553,7 +2549,11 @@ ReadToc(ArchiveHandle *AH)
 
 		te->hadDumper = ReadInt(AH);
 
-		if (AH->version >= K_VERS_1_8)
+		if (AH->version >= K_VERS_1_16)
+		{
+			te->catalogId.tableoid = ReadInt(AH);
+		}
+		else if (AH->version >= K_VERS_1_8)
 		{
 			tmp = ReadStr(AH);
 			te->catalogId.tableoid = strtoul(tmp, NULL, 10);
@@ -2561,9 +2561,15 @@ ReadToc(ArchiveHandle *AH)
 		}
 		else
 			te->catalogId.tableoid = InvalidOid;
-		tmp = ReadStr(AH);
-		te->catalogId.oid = strtoul(tmp, NULL, 10);
-		free(tmp);
+
+		if (AH->version >= K_VERS_1_16)
+			te->catalogId.oid = ReadInt(AH);
+		else
+		{
+			tmp = ReadStr(AH);
+			te->catalogId.oid = strtoul(tmp, NULL, 10);
+			free(tmp);
+		}
 
 		te->tag = ReadStr(AH);
 		te->desc = ReadStr(AH);
@@ -2638,16 +2644,31 @@ ReadToc(ArchiveHandle *AH)
 			depIdx = 0;
 			for (;;)
 			{
-				tmp = ReadStr(AH);
-				if (!tmp)
-					break;		/* end of list */
-				if (depIdx >= depSize)
+				if (AH->version >= K_VERS_1_16)
 				{
-					depSize *= 2;
-					deps = (DumpId *) pg_realloc(deps, sizeof(DumpId) * depSize);
+					depId = ReadInt(AH);
+					if (depId == -1)
+						break;		/* end of list */
+					if (depIdx >= depSize)
+					{
+						depSize *= 2;
+						deps = (DumpId *) pg_realloc(deps, sizeof(DumpId) * depSize);
+					}
+					deps[depIdx] = depId;
+				}
+				else
+				{
+					tmp = ReadStr(AH);
+					if (!tmp)
+						break;		/* end of list */
+					if (depIdx >= depSize)
+					{
+						depSize *= 2;
+						deps = (DumpId *) pg_realloc(deps, sizeof(DumpId) * depSize);
+					}
+					deps[depIdx] = strtoul(tmp, NULL, 10);
+					free(tmp);
 				}
-				deps[depIdx] = strtoul(tmp, NULL, 10);
-				free(tmp);
 				depIdx++;
 			}
 
-- 
2.42.0

>From 46e4c941fa960be4feffcdd1fa17cca6d6ecd09d Mon Sep 17 00:00:00 2001
From: Pierre Ducroquet <p.p...@pinaraf.info>
Date: Wed, 26 Jul 2023 22:18:54 +0200
Subject: [PATCH 4/4] move static strings to arrays at beginning

Since namespace, access method, tablespace and owners are unlikely to
be different for each toc entry, it is more efficient to store them as
references to an array at the beginning of the toc.dat file.
---
 src/bin/pg_dump/pg_backup_archiver.c | 291 ++++++++++++++++++++++++++-
 src/bin/pg_dump/pg_backup_archiver.h |   6 +-
 2 files changed, 285 insertions(+), 12 deletions(-)

diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index feacc22701..caa39dcffd 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -2472,16 +2472,156 @@ WriteToc(ArchiveHandle *AH)
 	TocEntry   *te;
 	int			tocCount;
 	int			i;
-
-	/* count entries that will actually be dumped */
+	int			tableam_count;
+	char	  **tableams;
+	int			namespace_count;
+	char	  **namespaces;
+	int			owner_count;
+	char	  **owners;
+	int			tablespace_count;
+	char	  **tablespaces;
+	
+	/* prepare dynamic arrays */
+	tableams = palloc(sizeof(char*) * 1);
+	tableams[0] = NULL;
+	tableam_count = 0;
+	namespaces = palloc(sizeof(char*) * 1);
+	namespaces[0] = NULL;
+	namespace_count = 0;
+	owners = palloc(sizeof(char*) * 1);
+	owners[0] = NULL;
+	owner_count = 0;
+	tablespaces = palloc(sizeof(char*) * 1);
+	tablespaces[0] = NULL;
+	tablespace_count = 0;
+
+	/* count entries that will actually be dumped
+	 * and build the dictionnary */
 	tocCount = 0;
 	for (te = AH->toc->next; te != AH->toc; te = te->next)
 	{
 		if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_SPECIAL)) != 0)
 			tocCount++;
+		te->tableam_idx = -1;
+		if (te->tableam)
+		{
+			for (i = 0 ; i < tableam_count ; i++)
+			{
+				if (strcmp(tableams[i], te->tableam) == 0)
+				{
+					te->tableam_idx = i;
+					break;
+				}
+			}
+			if (te->tableam_idx == -1)
+			{
+				/* append tableam to array */
+				tableams[tableam_count] = te->tableam;
+				te->tableam_idx = tableam_count;
+				tableam_count++;
+				tableams = pg_realloc(tableams, sizeof(char*) * (tableam_count + 1));
+			}
+		}
+
+		te->namespace_idx = -1;
+		if (te->namespace)
+		{
+			/* reverse iterate for increased perfs, ToC entries can be ordered by namespace */
+			for (i = namespace_count - 1 ; i >= 0 ; i--)
+			{
+				if (strcmp(namespaces[i], te->namespace) == 0)
+				{
+					te->namespace_idx = i;
+					break;
+				}
+			}
+			if (te->namespace_idx == -1)
+			{
+				namespaces[namespace_count] = te->namespace;
+				te->namespace_idx = namespace_count;
+				namespace_count++;
+				namespaces = pg_realloc(namespaces, sizeof(char*) * (namespace_count + 1));
+			}
+		}
+
+		te->owner_idx = -1;
+		if (te->owner)
+		{
+			for (i = 0 ; i < owner_count ; i++)
+			{
+				if (strcmp(owners[i], te->owner) == 0)
+				{
+					te->owner_idx = i;
+					break;
+				}
+			}
+			if (te->owner_idx == -1)
+			{
+				owners[owner_count] = te->owner;
+				te->owner_idx = owner_count;
+				owner_count++;
+				owners = pg_realloc(owners, sizeof(char*) * (owner_count + 1));
+			}
+		}
+
+		te->tablespace_idx = -1;
+		if (te->tablespace)
+		{
+			for (i = 0 ; i < tablespace_count ; i++)
+			{
+				if (strcmp(tablespaces[i], te->tablespace) == 0)
+				{
+					te->tablespace_idx = i;
+					break;
+				}
+			}
+			if (te->tablespace_idx == -1)
+			{
+				tablespaces[tablespace_count] = te->tablespace;
+				te->tablespace_idx = tablespace_count;
+				tablespace_count++;
+				tablespaces = pg_realloc(tablespaces, sizeof(char*) * (tablespace_count + 1));
+			}
+		}
+	}
+	
+	/* write the list of am */
+	printf("%d tableams to save\n", tableam_count);
+	WriteInt(AH, tableam_count);
+	for (i = 0 ; i < tableam_count ; i++)
+	{
+		printf("%d is %s\n", i, tableams[i]);
+		WriteStr(AH, tableams[i]);
+	}
+
+	/* write the list of namespaces */
+	printf("%d namespaces to save\n", namespace_count);
+	WriteInt(AH, namespace_count);
+	for (i = 0 ; i < namespace_count ; i++)
+	{
+		printf("%d is %s\n", i, namespaces[i]);
+		WriteStr(AH, namespaces[i]);
+	}
+
+	/* write the list of owners */
+	printf("%d owners to save\n", owner_count);
+	WriteInt(AH, owner_count);
+	for (i = 0 ; i < owner_count ; i++)
+	{
+		printf("%d is %s\n", i, owners[i]);
+		WriteStr(AH, owners[i]);
+	}
+
+	/* write the list of tablespaces */
+	printf("%d tablespaces to save\n", tablespace_count);
+	WriteInt(AH, tablespace_count);
+	for (i = 0 ; i < tablespace_count ; i++)
+	{
+		printf("%d is %s\n", i, tablespaces[i]);
+		WriteStr(AH, tablespaces[i]);
 	}
 
-	/* printf("%d TOC Entries to save\n", tocCount); */
+	printf("%d TOC Entries to save\n", tocCount);
 
 	WriteInt(AH, tocCount);
 
@@ -2502,10 +2642,22 @@ WriteToc(ArchiveHandle *AH)
 		WriteStr(AH, te->defn);
 		WriteStr(AH, te->dropStmt);
 		WriteStr(AH, te->copyStmt);
-		WriteStr(AH, te->namespace);
-		WriteStr(AH, te->tablespace);
-		WriteStr(AH, te->tableam);
-		WriteStr(AH, te->owner);
+		if (namespace_count < 128)
+			AH->WriteBytePtr(AH, te->namespace_idx);
+		else
+			WriteInt(AH, te->namespace_idx);
+		if (tablespace_count < 128)
+			AH->WriteBytePtr(AH, te->tablespace_idx);
+		else
+			WriteInt(AH, te->tablespace_idx);
+		if (tableam_count < 128)
+			AH->WriteBytePtr(AH, te->tableam_idx);
+		else
+			WriteInt(AH, te->tableam_idx);
+		if (owner_count < 128)
+			AH->WriteBytePtr(AH, te->owner_idx);
+		else
+			WriteInt(AH, te->owner_idx);
 
 		/* Dump list of dependencies */
 		for (i = 0; i < te->nDeps; i++)
@@ -2530,6 +2682,53 @@ ReadToc(ArchiveHandle *AH)
 	int			depSize;
 	TocEntry   *te;
 	bool		is_supported;
+	int			tableam_count;
+	char	  **tableams;
+	int			namespace_count;
+	char	  **namespaces;
+	int			owner_count;
+	char	  **owners;
+	int			tablespace_count;
+	char	  **tablespaces;
+	int			invalid_entry;
+
+	if (AH->version < K_VERS_1_16)
+	{
+		tableam_count = 0;
+		tableams = NULL;
+		namespace_count = 0;
+		namespaces = NULL;
+		owner_count = 0;
+		owners = NULL;
+		tablespace_count = 0;
+		tablespaces = NULL;
+	}
+	else
+	{
+		tableam_count = ReadInt(AH);
+		tableams = pg_malloc0(sizeof(char*) * tableam_count);
+		
+		for (i = 0 ; i < tableam_count ; i++)
+			tableams[i] = ReadStr(AH);
+
+		namespace_count = ReadInt(AH);
+		namespaces = pg_malloc0(sizeof(char*) * namespace_count);
+		
+		for (i = 0 ; i < namespace_count ; i++)
+			namespaces[i] = ReadStr(AH);
+
+		owner_count = ReadInt(AH);
+		owners = pg_malloc0(sizeof(char*) * owner_count);
+
+		for (i = 0 ; i < owner_count ; i++)
+			owners[i] = ReadStr(AH);
+
+		tablespace_count = ReadInt(AH);
+		tablespaces = pg_malloc0(sizeof(char*) * tablespace_count);
+
+		for (i = 0 ; i < tablespace_count ; i++)
+			tablespaces[i] = ReadStr(AH);
+	}
 
 	AH->tocCount = ReadInt(AH);
 	AH->maxDumpId = 0;
@@ -2610,16 +2809,86 @@ ReadToc(ArchiveHandle *AH)
 		if (AH->version >= K_VERS_1_3)
 			te->copyStmt = ReadStr(AH);
 
-		if (AH->version >= K_VERS_1_6)
+		if (AH->version >= K_VERS_1_6 && AH->version < K_VERS_1_16)
 			te->namespace = ReadStr(AH);
+		else
+		{
+			if (namespace_count < 128)
+			{
+				te->namespace_idx = AH->ReadBytePtr(AH);
+				invalid_entry = 255;
+			}
+			else
+			{
+				te->namespace_idx = ReadInt(AH);
+				invalid_entry = -1;
+			}
+			if (te->namespace_idx == invalid_entry)
+				te->namespace = "";
+			else
+				te->namespace = namespaces[te->namespace_idx];
+		}
 
-		if (AH->version >= K_VERS_1_10)
+		if (AH->version >= K_VERS_1_10 && AH->version < K_VERS_1_16)
 			te->tablespace = ReadStr(AH);
+		else
+		{
+			if (tablespace_count < 128)
+			{
+				te->tablespace_idx = AH->ReadBytePtr(AH);
+				invalid_entry = 255;
+			}
+			else
+			{
+				te->tablespace_idx = ReadInt(AH);
+				invalid_entry = -1;
+			}
+			if (te->tablespace_idx == invalid_entry)
+				te->tablespace = NULL;
+			else
+				te->tablespace = tablespaces[te->tablespace_idx];
+		}
 
-		if (AH->version >= K_VERS_1_14)
+		if (AH->version >= K_VERS_1_14 && AH->version < K_VERS_1_16)
 			te->tableam = ReadStr(AH);
+		else if (AH->version >= K_VERS_1_16)
+		{
+			if (tableam_count < 128)
+			{
+				te->tableam_idx = AH->ReadBytePtr(AH);
+				invalid_entry = 255;
+			}
+			else
+			{
+				te->tableam_idx = ReadInt(AH);
+				invalid_entry = -1;
+			}
+			if (te->tableam_idx == invalid_entry)
+				te->tableam = NULL;
+			else
+				te->tableam = tableams[te->tableam_idx];
+		}
+
+		if (AH->version < K_VERS_1_16)
+			te->owner = ReadStr(AH);
+		else
+		{
+			if (owner_count < 128)
+			{
+				te->owner_idx = AH->ReadBytePtr(AH);
+				invalid_entry = 255;
+			}
+			else
+			{
+				te->owner_idx = ReadInt(AH);
+				invalid_entry = -1;
+			}
+			if (te->owner_idx == invalid_entry)
+				te->owner = NULL;
+			else
+				te->owner = owners[te->owner_idx];
+		}
 
-		te->owner = ReadStr(AH);
 		is_supported = true;
 		if (AH->version < K_VERS_1_9)
 			is_supported = false;
diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h
index b78e326f73..8b8e163729 100644
--- a/src/bin/pg_dump/pg_backup_archiver.h
+++ b/src/bin/pg_dump/pg_backup_archiver.h
@@ -68,7 +68,7 @@
 #define K_VERS_1_15 MAKE_ARCHIVE_VERSION(1, 15, 0)	/* add
 													 * compression_algorithm
 													 * in header */
-#define K_VERS_1_16 MAKE_ARCHIVE_VERSION(1, 16, 0)	/* optimize dump format */
+#define K_VERS_1_16 MAKE_ARCHIVE_VERSION(1, 16, 0)	/* optimize dump format, add dictionnaries for common strings */
 
 /* Current archive version number (the format we can output) */
 #define K_VERS_MAJOR 1
@@ -346,10 +346,14 @@ struct _tocEntry
 								 * in restore) */
 	char	   *tag;			/* index tag */
 	char	   *namespace;		/* null or empty string if not in a schema */
+	int			namespace_idx;	/* schema idx in dictionnary */
 	char	   *tablespace;		/* null if not in a tablespace; empty string
 								 * means use database default */
+	int			tablespace_idx;	/* tablespace idx in dictionnary */
 	char	   *tableam;		/* table access method, only for TABLE tags */
+	int			tableam_idx;	/* table access method idx in dictionnary, only for TABLE tags */
 	char	   *owner;
+	int			owner_idx;
 	char	   *desc;
 	char	   *defn;
 	char	   *dropStmt;
-- 
2.42.0

Reply via email to