Hi

Following the thread "Inefficiency in parallel pg_restore with many tables", I 
started digging into why the toc.dat files are that big and where time is spent 
when parsing them.

I ended up writing several patches that shaved some time for pg_restore -l, 
and reduced the toc.dat size.

First patch is "finishing" the job of removing has oids support. When this 
support was removed, instead of dropping the field from the dumps and 
increasing the dump versions, the field was kept as is. This field stores a 
boolean as a string, "true" or "false". This is not free, and requires 10 
bytes per toc entry.

The second patch removes calls to sscanf and replaces them with strtoul. This 
was the biggest speedup for pg_restore -l.

The third patch changes the dump format further to remove these strtoul calls 
and store the integers as is instead.

The fourth patch is dirtier and does more changes to the dump format. Instead 
of storing the owner, tablespace, table access method and schema of each 
object as a string, pg_dump builds an array of these, stores them at the 
beginning of the file and replaces the strings with integer fields in the dump. 
This reduces the file size further, and removes a lot of calls to ReadStr, thus 
saving quite some time.

Toc has 453999 entries.

Patch   Toc size        Dump -s duration        pg_restore -l duration
HEAD    214M    23.1s   1.27s
#1 (has oid)    210M    22.9s   1.26s
#2 (scanf)      210M    22.9s   1.07s
#3 (no strtoul) 202M    22.8s   0.94s
#4 (string list)        181M    23.1s   0.87s

Patch four is likely to require more changes. I don't know PostgreSQL code 
enough to do better than calling pgmalloc/pgrealloc and maintaining a char** 
manually, I guess there are structs and functions that do that in a better 
way. And the location of string tables in the file and in the structures is 
probably not acceptable, I suppose these should go to the toc header instead.

I still submit these for comments and first review.

Best regards

 Pierre Ducroquet
>From cf5afd22a6e754ccfab0cd4477e378e32baf425a Mon Sep 17 00:00:00 2001
From: Pierre Ducroquet <p.p...@pinaraf.info>
Date: Wed, 26 Jul 2023 21:31:33 +0200
Subject: [PATCH 1/4] drop has oids field instead of having static values

---
 src/bin/pg_dump/pg_backup_archiver.c | 3 +--
 src/bin/pg_dump/pg_backup_archiver.h | 3 ++-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 39ebcfec32..2888baa168 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -2506,7 +2506,6 @@ WriteToc(ArchiveHandle *AH)
 		WriteStr(AH, te->tablespace);
 		WriteStr(AH, te->tableam);
 		WriteStr(AH, te->owner);
-		WriteStr(AH, "false");
 
 		/* Dump list of dependencies */
 		for (i = 0; i < te->nDeps; i++)
@@ -2614,7 +2613,7 @@ ReadToc(ArchiveHandle *AH)
 		is_supported = true;
 		if (AH->version < K_VERS_1_9)
 			is_supported = false;
-		else
+		else if (AH->version < K_VERS_1_16)
 		{
 			tmp = ReadStr(AH);
 
diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h
index 18b38c17ab..930141a506 100644
--- a/src/bin/pg_dump/pg_backup_archiver.h
+++ b/src/bin/pg_dump/pg_backup_archiver.h
@@ -68,10 +68,11 @@
 #define K_VERS_1_15 MAKE_ARCHIVE_VERSION(1, 15, 0)	/* add
 													 * compression_algorithm
 													 * in header */
+#define K_VERS_1_16 MAKE_ARCHIVE_VERSION(1, 16, 0)	/* optimize dump format */
 
 /* Current archive version number (the format we can output) */
 #define K_VERS_MAJOR 1
-#define K_VERS_MINOR 15
+#define K_VERS_MINOR 16
 #define K_VERS_REV 0
 #define K_VERS_SELF MAKE_ARCHIVE_VERSION(K_VERS_MAJOR, K_VERS_MINOR, K_VERS_REV)
 
-- 
2.41.0

>From 62c6bb9bca491586314760edbf8330308e8528c9 Mon Sep 17 00:00:00 2001
From: Pierre Ducroquet <p.p...@pinaraf.info>
Date: Wed, 26 Jul 2023 22:23:28 +0200
Subject: [PATCH 2/4] convert sscanf to strtoul

---
 src/bin/pg_dump/pg_backup_archiver.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 2888baa168..0fd6510c10 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -2552,13 +2552,13 @@ ReadToc(ArchiveHandle *AH)
 		if (AH->version >= K_VERS_1_8)
 		{
 			tmp = ReadStr(AH);
-			sscanf(tmp, "%u", &te->catalogId.tableoid);
+			te->catalogId.tableoid = strtoul(tmp, NULL, 10);
 			free(tmp);
 		}
 		else
 			te->catalogId.tableoid = InvalidOid;
 		tmp = ReadStr(AH);
-		sscanf(tmp, "%u", &te->catalogId.oid);
+		te->catalogId.oid = strtoul(tmp, NULL, 10);
 		free(tmp);
 
 		te->tag = ReadStr(AH);
@@ -2642,7 +2642,7 @@ ReadToc(ArchiveHandle *AH)
 					depSize *= 2;
 					deps = (DumpId *) pg_realloc(deps, sizeof(DumpId) * depSize);
 				}
-				sscanf(tmp, "%d", &deps[depIdx]);
+				deps[depIdx] = strtoul(tmp, NULL, 10);
 				free(tmp);
 				depIdx++;
 			}
-- 
2.41.0

>From 83d01e473ba951c5a9681126c95fc4604e3c3fad Mon Sep 17 00:00:00 2001
From: Pierre Ducroquet <p.p...@pinaraf.info>
Date: Thu, 27 Jul 2023 10:04:51 +0200
Subject: [PATCH 3/4] store oids as integer instead of string

---
 src/bin/pg_dump/pg_backup_archiver.c | 63 ++++++++++++++++++----------
 1 file changed, 42 insertions(+), 21 deletions(-)

diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 0fd6510c10..b33cf60546 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -2466,7 +2466,6 @@ void
 WriteToc(ArchiveHandle *AH)
 {
 	TocEntry   *te;
-	char		workbuf[32];
 	int			tocCount;
 	int			i;
 
@@ -2490,11 +2489,8 @@ WriteToc(ArchiveHandle *AH)
 		WriteInt(AH, te->dumpId);
 		WriteInt(AH, te->dataDumper ? 1 : 0);
 
-		/* OID is recorded as a string for historical reasons */
-		sprintf(workbuf, "%u", te->catalogId.tableoid);
-		WriteStr(AH, workbuf);
-		sprintf(workbuf, "%u", te->catalogId.oid);
-		WriteStr(AH, workbuf);
+		WriteInt(AH, te->catalogId.tableoid);
+		WriteInt(AH, te->catalogId.oid);
 
 		WriteStr(AH, te->tag);
 		WriteStr(AH, te->desc);
@@ -2510,10 +2506,9 @@ WriteToc(ArchiveHandle *AH)
 		/* Dump list of dependencies */
 		for (i = 0; i < te->nDeps; i++)
 		{
-			sprintf(workbuf, "%d", te->dependencies[i]);
-			WriteStr(AH, workbuf);
+			WriteInt(AH, te->dependencies[i]);
 		}
-		WriteStr(AH, NULL);		/* Terminate List */
+		WriteInt(AH, -1);		/* Terminate List */
 
 		if (AH->WriteExtraTocPtr)
 			AH->WriteExtraTocPtr(AH, te);
@@ -2525,6 +2520,7 @@ ReadToc(ArchiveHandle *AH)
 {
 	int			i;
 	char	   *tmp;
+	int			depId;
 	DumpId	   *deps;
 	int			depIdx;
 	int			depSize;
@@ -2549,7 +2545,11 @@ ReadToc(ArchiveHandle *AH)
 
 		te->hadDumper = ReadInt(AH);
 
-		if (AH->version >= K_VERS_1_8)
+		if (AH->version >= K_VERS_1_16)
+		{
+			te->catalogId.tableoid = ReadInt(AH);
+		}
+		else if (AH->version >= K_VERS_1_8)
 		{
 			tmp = ReadStr(AH);
 			te->catalogId.tableoid = strtoul(tmp, NULL, 10);
@@ -2557,9 +2557,15 @@ ReadToc(ArchiveHandle *AH)
 		}
 		else
 			te->catalogId.tableoid = InvalidOid;
-		tmp = ReadStr(AH);
-		te->catalogId.oid = strtoul(tmp, NULL, 10);
-		free(tmp);
+
+		if (AH->version >= K_VERS_1_16)
+			te->catalogId.oid = ReadInt(AH);
+		else
+		{
+			tmp = ReadStr(AH);
+			te->catalogId.oid = strtoul(tmp, NULL, 10);
+			free(tmp);
+		}
 
 		te->tag = ReadStr(AH);
 		te->desc = ReadStr(AH);
@@ -2634,16 +2640,31 @@ ReadToc(ArchiveHandle *AH)
 			depIdx = 0;
 			for (;;)
 			{
-				tmp = ReadStr(AH);
-				if (!tmp)
-					break;		/* end of list */
-				if (depIdx >= depSize)
+				if (AH->version >= K_VERS_1_16)
 				{
-					depSize *= 2;
-					deps = (DumpId *) pg_realloc(deps, sizeof(DumpId) * depSize);
+					depId = ReadInt(AH);
+					if (depId == -1)
+						break;		/* end of list */
+					if (depIdx >= depSize)
+					{
+						depSize *= 2;
+						deps = (DumpId *) pg_realloc(deps, sizeof(DumpId) * depSize);
+					}
+					deps[depIdx] = depId;
+				}
+				else
+				{
+					tmp = ReadStr(AH);
+					if (!tmp)
+						break;		/* end of list */
+					if (depIdx >= depSize)
+					{
+						depSize *= 2;
+						deps = (DumpId *) pg_realloc(deps, sizeof(DumpId) * depSize);
+					}
+					deps[depIdx] = strtoul(tmp, NULL, 10);
+					free(tmp);
 				}
-				deps[depIdx] = strtoul(tmp, NULL, 10);
-				free(tmp);
 				depIdx++;
 			}
 
-- 
2.41.0

>From 38c45385cc9da5ec7e7ba15f579757e5fb464877 Mon Sep 17 00:00:00 2001
From: Pierre Ducroquet <p.p...@pinaraf.info>
Date: Wed, 26 Jul 2023 22:18:54 +0200
Subject: [PATCH 4/4] move static strings to arrays at beginning

Since namespace, access method, tablespace and owners are unlikely to
be different for each toc entry, it is more efficient to store them as
references to an array at the beginning of the toc.dat file.
---
 src/bin/pg_dump/pg_backup_archiver.c | 291 ++++++++++++++++++++++++++-
 src/bin/pg_dump/pg_backup_archiver.h |   6 +-
 2 files changed, 285 insertions(+), 12 deletions(-)

diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index b33cf60546..e03603011d 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -2468,16 +2468,156 @@ WriteToc(ArchiveHandle *AH)
 	TocEntry   *te;
 	int			tocCount;
 	int			i;
-
-	/* count entries that will actually be dumped */
+	int			tableam_count;
+	char	  **tableams;
+	int			namespace_count;
+	char	  **namespaces;
+	int			owner_count;
+	char	  **owners;
+	int			tablespace_count;
+	char	  **tablespaces;
+	
+	/* prepare dynamic arrays */
+	tableams = palloc(sizeof(char*) * 1);
+	tableams[0] = NULL;
+	tableam_count = 0;
+	namespaces = palloc(sizeof(char*) * 1);
+	namespaces[0] = NULL;
+	namespace_count = 0;
+	owners = palloc(sizeof(char*) * 1);
+	owners[0] = NULL;
+	owner_count = 0;
+	tablespaces = palloc(sizeof(char*) * 1);
+	tablespaces[0] = NULL;
+	tablespace_count = 0;
+
+	/* count entries that will actually be dumped
+	 * and build the dictionnary */
 	tocCount = 0;
 	for (te = AH->toc->next; te != AH->toc; te = te->next)
 	{
 		if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_SPECIAL)) != 0)
 			tocCount++;
+		te->tableam_idx = -1;
+		if (te->tableam)
+		{
+			for (i = 0 ; i < tableam_count ; i++)
+			{
+				if (strcmp(tableams[i], te->tableam) == 0)
+				{
+					te->tableam_idx = i;
+					break;
+				}
+			}
+			if (te->tableam_idx == -1)
+			{
+				/* append tableam to array */
+				tableams[tableam_count] = te->tableam;
+				te->tableam_idx = tableam_count;
+				tableam_count++;
+				tableams = pg_realloc(tableams, sizeof(char*) * (tableam_count + 1));
+			}
+		}
+
+		te->namespace_idx = -1;
+		if (te->namespace)
+		{
+			/* reverse iterate for increased perfs, ToC entries can be ordered by namespace */
+			for (i = namespace_count - 1 ; i >= 0 ; i--)
+			{
+				if (strcmp(namespaces[i], te->namespace) == 0)
+				{
+					te->namespace_idx = i;
+					break;
+				}
+			}
+			if (te->namespace_idx == -1)
+			{
+				namespaces[namespace_count] = te->namespace;
+				te->namespace_idx = namespace_count;
+				namespace_count++;
+				namespaces = pg_realloc(namespaces, sizeof(char*) * (namespace_count + 1));
+			}
+		}
+
+		te->owner_idx = -1;
+		if (te->owner)
+		{
+			for (i = 0 ; i < owner_count ; i++)
+			{
+				if (strcmp(owners[i], te->owner) == 0)
+				{
+					te->owner_idx = i;
+					break;
+				}
+			}
+			if (te->owner_idx == -1)
+			{
+				owners[owner_count] = te->owner;
+				te->owner_idx = owner_count;
+				owner_count++;
+				owners = pg_realloc(owners, sizeof(char*) * (owner_count + 1));
+			}
+		}
+
+		te->tablespace_idx = -1;
+		if (te->tablespace)
+		{
+			for (i = 0 ; i < tablespace_count ; i++)
+			{
+				if (strcmp(tablespaces[i], te->tablespace) == 0)
+				{
+					te->tablespace_idx = i;
+					break;
+				}
+			}
+			if (te->tablespace_idx == -1)
+			{
+				tablespaces[tablespace_count] = te->tablespace;
+				te->tablespace_idx = tablespace_count;
+				tablespace_count++;
+				tablespaces = pg_realloc(tablespaces, sizeof(char*) * (tablespace_count + 1));
+			}
+		}
+	}
+	
+	/* write the list of am */
+	printf("%d tableams to save\n", tableam_count);
+	WriteInt(AH, tableam_count);
+	for (i = 0 ; i < tableam_count ; i++)
+	{
+		printf("%d is %s\n", i, tableams[i]);
+		WriteStr(AH, tableams[i]);
+	}
+
+	/* write the list of namespaces */
+	printf("%d namespaces to save\n", namespace_count);
+	WriteInt(AH, namespace_count);
+	for (i = 0 ; i < namespace_count ; i++)
+	{
+		printf("%d is %s\n", i, namespaces[i]);
+		WriteStr(AH, namespaces[i]);
+	}
+
+	/* write the list of owners */
+	printf("%d owners to save\n", owner_count);
+	WriteInt(AH, owner_count);
+	for (i = 0 ; i < owner_count ; i++)
+	{
+		printf("%d is %s\n", i, owners[i]);
+		WriteStr(AH, owners[i]);
+	}
+
+	/* write the list of tablespaces */
+	printf("%d tablespaces to save\n", tablespace_count);
+	WriteInt(AH, tablespace_count);
+	for (i = 0 ; i < tablespace_count ; i++)
+	{
+		printf("%d is %s\n", i, tablespaces[i]);
+		WriteStr(AH, tablespaces[i]);
 	}
 
-	/* printf("%d TOC Entries to save\n", tocCount); */
+	printf("%d TOC Entries to save\n", tocCount);
 
 	WriteInt(AH, tocCount);
 
@@ -2498,10 +2638,22 @@ WriteToc(ArchiveHandle *AH)
 		WriteStr(AH, te->defn);
 		WriteStr(AH, te->dropStmt);
 		WriteStr(AH, te->copyStmt);
-		WriteStr(AH, te->namespace);
-		WriteStr(AH, te->tablespace);
-		WriteStr(AH, te->tableam);
-		WriteStr(AH, te->owner);
+		if (namespace_count < 128)
+			AH->WriteBytePtr(AH, te->namespace_idx);
+		else
+			WriteInt(AH, te->namespace_idx);
+		if (tablespace_count < 128)
+			AH->WriteBytePtr(AH, te->tablespace_idx);
+		else
+			WriteInt(AH, te->tablespace_idx);
+		if (tableam_count < 128)
+			AH->WriteBytePtr(AH, te->tableam_idx);
+		else
+			WriteInt(AH, te->tableam_idx);
+		if (owner_count < 128)
+			AH->WriteBytePtr(AH, te->owner_idx);
+		else
+			WriteInt(AH, te->owner_idx);
 
 		/* Dump list of dependencies */
 		for (i = 0; i < te->nDeps; i++)
@@ -2526,6 +2678,53 @@ ReadToc(ArchiveHandle *AH)
 	int			depSize;
 	TocEntry   *te;
 	bool		is_supported;
+	int			tableam_count;
+	char	  **tableams;
+	int			namespace_count;
+	char	  **namespaces;
+	int			owner_count;
+	char	  **owners;
+	int			tablespace_count;
+	char	  **tablespaces;
+	int			invalid_entry;
+
+	if (AH->version < K_VERS_1_16)
+	{
+		tableam_count = 0;
+		tableams = NULL;
+		namespace_count = 0;
+		namespaces = NULL;
+		owner_count = 0;
+		owners = NULL;
+		tablespace_count = 0;
+		tablespaces = NULL;
+	}
+	else
+	{
+		tableam_count = ReadInt(AH);
+		tableams = pg_malloc0(sizeof(char*) * tableam_count);
+		
+		for (i = 0 ; i < tableam_count ; i++)
+			tableams[i] = ReadStr(AH);
+
+		namespace_count = ReadInt(AH);
+		namespaces = pg_malloc0(sizeof(char*) * namespace_count);
+		
+		for (i = 0 ; i < namespace_count ; i++)
+			namespaces[i] = ReadStr(AH);
+
+		owner_count = ReadInt(AH);
+		owners = pg_malloc0(sizeof(char*) * owner_count);
+
+		for (i = 0 ; i < owner_count ; i++)
+			owners[i] = ReadStr(AH);
+
+		tablespace_count = ReadInt(AH);
+		tablespaces = pg_malloc0(sizeof(char*) * tablespace_count);
+
+		for (i = 0 ; i < tablespace_count ; i++)
+			tablespaces[i] = ReadStr(AH);
+	}
 
 	AH->tocCount = ReadInt(AH);
 	AH->maxDumpId = 0;
@@ -2606,16 +2805,86 @@ ReadToc(ArchiveHandle *AH)
 		if (AH->version >= K_VERS_1_3)
 			te->copyStmt = ReadStr(AH);
 
-		if (AH->version >= K_VERS_1_6)
+		if (AH->version >= K_VERS_1_6 && AH->version < K_VERS_1_16)
 			te->namespace = ReadStr(AH);
+		else
+		{
+			if (namespace_count < 128)
+			{
+				te->namespace_idx = AH->ReadBytePtr(AH);
+				invalid_entry = 255;
+			}
+			else
+			{
+				te->namespace_idx = ReadInt(AH);
+				invalid_entry = -1;
+			}
+			if (te->namespace_idx == invalid_entry)
+				te->namespace = "";
+			else
+				te->namespace = namespaces[te->namespace_idx];
+		}
 
-		if (AH->version >= K_VERS_1_10)
+		if (AH->version >= K_VERS_1_10 && AH->version < K_VERS_1_16)
 			te->tablespace = ReadStr(AH);
+		else
+		{
+			if (tablespace_count < 128)
+			{
+				te->tablespace_idx = AH->ReadBytePtr(AH);
+				invalid_entry = 255;
+			}
+			else
+			{
+				te->tablespace_idx = ReadInt(AH);
+				invalid_entry = -1;
+			}
+			if (te->tablespace_idx == invalid_entry)
+				te->tablespace = NULL;
+			else
+				te->tablespace = tablespaces[te->tablespace_idx];
+		}
 
-		if (AH->version >= K_VERS_1_14)
+		if (AH->version >= K_VERS_1_14 && AH->version < K_VERS_1_16)
 			te->tableam = ReadStr(AH);
+		else if (AH->version >= K_VERS_1_16)
+		{
+			if (tableam_count < 128)
+			{
+				te->tableam_idx = AH->ReadBytePtr(AH);
+				invalid_entry = 255;
+			}
+			else
+			{
+				te->tableam_idx = ReadInt(AH);
+				invalid_entry = -1;
+			}
+			if (te->tableam_idx == invalid_entry)
+				te->tableam = "";
+			else
+				te->tableam = tableams[te->tableam_idx];
+		}
+
+		if (AH->version < K_VERS_1_16)
+			te->owner = ReadStr(AH);
+		else
+		{
+			if (owner_count < 128)
+			{
+				te->owner_idx = AH->ReadBytePtr(AH);
+				invalid_entry = 255;
+			}
+			else
+			{
+				te->owner_idx = ReadInt(AH);
+				invalid_entry = -1;
+			}
+			if (te->owner_idx == invalid_entry)
+				te->owner = "";
+			else
+				te->owner = owners[te->owner_idx];
+		}
 
-		te->owner = ReadStr(AH);
 		is_supported = true;
 		if (AH->version < K_VERS_1_9)
 			is_supported = false;
diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h
index 930141a506..0eb170b709 100644
--- a/src/bin/pg_dump/pg_backup_archiver.h
+++ b/src/bin/pg_dump/pg_backup_archiver.h
@@ -68,7 +68,7 @@
 #define K_VERS_1_15 MAKE_ARCHIVE_VERSION(1, 15, 0)	/* add
 													 * compression_algorithm
 													 * in header */
-#define K_VERS_1_16 MAKE_ARCHIVE_VERSION(1, 16, 0)	/* optimize dump format */
+#define K_VERS_1_16 MAKE_ARCHIVE_VERSION(1, 16, 0)	/* optimize dump format, add dictionnaries for common strings */
 
 /* Current archive version number (the format we can output) */
 #define K_VERS_MAJOR 1
@@ -345,10 +345,14 @@ struct _tocEntry
 								 * in restore) */
 	char	   *tag;			/* index tag */
 	char	   *namespace;		/* null or empty string if not in a schema */
+	int			namespace_idx;	/* schema idx in dictionnary */
 	char	   *tablespace;		/* null if not in a tablespace; empty string
 								 * means use database default */
+	int			tablespace_idx;	/* tablespace idx in dictionnary */
 	char	   *tableam;		/* table access method, only for TABLE tags */
+	int			tableam_idx;	/* table access method idx in dictionnary, only for TABLE tags */
 	char	   *owner;
+	int			owner_idx;
 	char	   *desc;
 	char	   *defn;
 	char	   *dropStmt;
-- 
2.41.0

Reply via email to