Hi Alvaro,

Am Montag, den 05.03.2018, 13:56 -0300 schrieb Alvaro Herrera:
> I made a few amendments (here's v5) and was ready to push, when I
> noticed that _StartBlobs() does not seem to be doing the right thing.
> Did you test this with a few large objects?

I did test it on the regression database, which leaves one or two large
objects behind:


mba@fock:~$ psql -h /tmp -p 65432 -d regression -c '\dl'
                Large objects
  ID   | Owner |         Description          
-------+-------+------------------------------
  3001 | mba   | testing comments
 36867 | mba   | I Wandered Lonely as a Cloud
 47229 | mba   | 
(3 rows)

What exactly is wrong with _StartBlobs()? It calls setFilePath() which
makes sure /dev/null is opened and not something else.

> The reason I noticed is I wondered about the amount of open() calls
> (plus zlib function calls) we would save by keeping an FD open to
> /dev/null, rather than opening it over and over for each object -- ie.,
> maybe not touch setFilePath() at all, if possible.  That looks perhaps
> too invasive, so maybe not.  But do audit other callsites that may open
> files.

I see your point; I've hacked together the attached additional PoC
patch, which keeps the dataFH open. The parallel case was tricky, I had
to add an additional flag to lclContext so that DEVNULL is only opened
once for data files cause I could not find a place where to set it for
parallel workers and it crashed otherwise.

This cuts down the number of /dev/null opens from 352 to 6 for a -j 2
dump of the regression database to /dev/null.

In my opinion, I think this is too evolved and possibly error-prone for
being just an optimization, so I'd drop that for v11 for now and maybe
revisit it later.


Michael

-- 
Michael Banck
Projektleiter / Senior Berater
Tel.: +49 2166 9901-171
Fax:  +49 2166 9901-100
Email: michael.ba...@credativ.de

credativ GmbH, HRB Mönchengladbach 12080
USt-ID-Nummer: DE204566209
Trompeterallee 108, 41189 Mönchengladbach
Geschäftsführung: Dr. Michael Meskes, Jörg Folz, Sascha Heuer
diff --git a/src/bin/pg_dump/pg_backup_directory.c b/src/bin/pg_dump/pg_backup_directory.c
index 3b77023d77..4ffa6aaef2 100644
--- a/src/bin/pg_dump/pg_backup_directory.c
+++ b/src/bin/pg_dump/pg_backup_directory.c
@@ -55,6 +55,7 @@ typedef struct
 	cfp		   *blobsTocFH;		/* file handle for blobs.toc */
 	ParallelState *pstate;		/* for parallel backup / restore */
 	bool		discard;		/* target is DEVNULL */
+	bool		devnull_open;		/* whether DEVNULL has been opened as dataFH */
 } lclContext;
 
 typedef struct
@@ -340,10 +341,14 @@ _StartData(ArchiveHandle *AH, TocEntry *te)
 
 	setFilePath(AH, fname, tctx->filename);
 
-	ctx->dataFH = cfopen_write(fname, PG_BINARY_W, AH->compression);
-	if (ctx->dataFH == NULL)
-		exit_horribly(modulename, "could not open output file \"%s\": %s\n",
-					  fname, strerror(errno));
+	/* Open the file, unless we are discarding output to the null device */
+	if (!ctx->discard)
+	{
+		ctx->dataFH = cfopen_write(fname, PG_BINARY_W, AH->compression);
+		if (ctx->dataFH == NULL)
+			exit_horribly(modulename, "could not open output file \"%s\": %s\n",
+						  fname, strerror(errno));
+	}
 }
 
 /*
@@ -379,10 +384,13 @@ _EndData(ArchiveHandle *AH, TocEntry *te)
 {
 	lclContext *ctx = (lclContext *) AH->formatData;
 
-	/* Close the file */
-	cfclose(ctx->dataFH);
+	/* Close the file, unless we are discarding output to the null device */
+	if (!ctx->discard)
+	{
+		cfclose(ctx->dataFH);
 
-	ctx->dataFH = NULL;
+		ctx->dataFH = NULL;
+	}
 }
 
 /*
@@ -604,6 +612,14 @@ _CloseArchive(ArchiveHandle *AH)
 		if (cfclose(tocFH) != 0)
 			exit_horribly(modulename, "could not close TOC file: %s\n",
 						  strerror(errno));
+
+		/*
+		 * Open the DEVNULL special file for writing, we only need to open it
+		 * once.
+		 */
+		if (ctx->discard)
+			ctx->dataFH = cfopen_write(DEVNULL, PG_BINARY_W, 0);
+
 		WriteDataChunks(AH, ctx->pstate);
 
 		ParallelBackupEnd(AH, ctx->pstate);
@@ -657,6 +673,20 @@ _StartBlobs(ArchiveHandle *AH, TocEntry *te)
 	if (ctx->blobsTocFH == NULL)
 		exit_horribly(modulename, "could not open output file \"%s\": %s\n",
 					  fname, strerror(errno));
+
+	/*
+	 * If we are discarding output to the null device, open the dataFH here so
+	 * we do not have to open and close it for each blob.
+	 */
+	if (ctx->discard)
+	{
+		snprintf(fname, MAXPGPATH, DEVNULL);
+		ctx->dataFH = cfopen_write(fname, PG_BINARY_W, 0);
+
+		if (ctx->dataFH == NULL)
+			exit_horribly(modulename, "could not open output file \"%s\": %s\n",
+						  fname, strerror(errno));
+	}
 }
 
 /*
@@ -670,20 +700,16 @@ _StartBlob(ArchiveHandle *AH, TocEntry *te, Oid oid)
 	lclContext *ctx = (lclContext *) AH->formatData;
 	char		fname[MAXPGPATH];
 
-	/*
-	 * If we are discarding output to the null device, just use that as
-	 * fname.
-	 */
-	if (ctx->discard)
-		snprintf(fname, MAXPGPATH, DEVNULL);
-	else
+	if (!ctx->discard)
+	{
 		snprintf(fname, MAXPGPATH, "%s/blob_%u.dat", ctx->directory, oid);
 
-	ctx->dataFH = cfopen_write(fname, PG_BINARY_W, AH->compression);
+		ctx->dataFH = cfopen_write(fname, PG_BINARY_W, AH->compression);
 
-	if (ctx->dataFH == NULL)
-		exit_horribly(modulename, "could not open output file \"%s\": %s\n",
-					  fname, strerror(errno));
+		if (ctx->dataFH == NULL)
+			exit_horribly(modulename, "could not open output file \"%s\": %s\n",
+						  fname, strerror(errno));
+	}
 }
 
 /*
@@ -698,9 +724,12 @@ _EndBlob(ArchiveHandle *AH, TocEntry *te, Oid oid)
 	char		buf[50];
 	int			len;
 
-	/* Close the BLOB data file itself */
-	cfclose(ctx->dataFH);
-	ctx->dataFH = NULL;
+	if (!ctx->discard)
+	{
+		/* Close the BLOB data file itself */
+		cfclose(ctx->dataFH);
+		ctx->dataFH = NULL;
+	}
 
 	/* register the blob in blobs.toc */
 	len = snprintf(buf, sizeof(buf), "%u blob_%u.dat\n", oid, oid);
@@ -720,6 +749,12 @@ _EndBlobs(ArchiveHandle *AH, TocEntry *te)
 
 	cfclose(ctx->blobsTocFH);
 	ctx->blobsTocFH = NULL;
+
+	if (ctx->discard)
+	{
+		cfclose(ctx->dataFH);
+		ctx->dataFH = NULL;
+	}
 }
 
 /*
@@ -793,6 +828,14 @@ _DeClone(ArchiveHandle *AH)
 static int
 _WorkerJobDumpDirectory(ArchiveHandle *AH, TocEntry *te)
 {
+	lclContext *ctx = (lclContext *) AH->formatData;
+	if (ctx->discard)
+		if (!ctx->devnull_open)
+		{
+			ctx->dataFH = cfopen_write(DEVNULL, PG_BINARY_W, 0);
+			ctx->devnull_open = true;
+		}
+
 	/*
 	 * This function returns void. We either fail and die horribly or
 	 * succeed... A failure will be detected by the parent when the child dies

Reply via email to