On 8.1.2013 22:30, Alvaro Herrera wrote:
> Tomas Vondra wrote:
>> On 8.1.2013 03:47, Shigeru Hanada wrote:
> 
>>>>> * +1 for Alvaro's suggestion about avoiding palloc traffic by starting
>>>>> with 8 elements or so.
>>>>
>>>> Done.
>>>
>>> Not yet.  Initial size of srels array is still 1 element.
>>
>> I've checked the patch and I see there 'maxrels = 8' - or do you mean
>> something else?
> 
> Well, you need to ensure that the initial palloc is an array of that
> size.

Oh, right - I forgot to modify the palloc() call. Thanks for spotting
this. Attached is a patch with increased threshold and fixed palloc call.

Tomas
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index 8dc622a..b1790eb 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -312,6 +312,11 @@ smgrDoPendingDeletes(bool isCommit)
 	PendingRelDelete *pending;
 	PendingRelDelete *prev;
 	PendingRelDelete *next;
+    
+	int			nrels = 0,
+				i = 0,
+				maxrels = 8;
+	SMgrRelation   *srels = palloc(maxrels * sizeof(SMgrRelation));
 
 	prev = NULL;
 	for (pending = pendingDeletes; pending != NULL; pending = next)
@@ -335,14 +340,32 @@ smgrDoPendingDeletes(bool isCommit)
 				SMgrRelation srel;
 
 				srel = smgropen(pending->relnode, pending->backend);
-				smgrdounlink(srel, false);
-				smgrclose(srel);
+
+				/* extend the array if needed (double the size) */
+				if (maxrels <= nrels)
+				{
+					maxrels *= 2;
+					srels = repalloc(srels, sizeof(SMgrRelation) * maxrels);
+				}
+
+				srels[nrels++] = srel;
 			}
 			/* must explicitly free the list entry */
 			pfree(pending);
 			/* prev does not change */
 		}
 	}
+
+	if (nrels > 0)
+	{
+		smgrdounlinkall(srels, nrels, false);
+
+		for (i = 0; i < nrels; i++)
+			smgrclose(srels[i]);
+	}
+
+	pfree(srels);
+
 }
 
 /*
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index dddb6c0..2330fda 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -62,6 +62,7 @@
 #define BUF_WRITTEN				0x01
 #define BUF_REUSABLE			0x02
 
+#define DROP_RELS_BSEARCH_THRESHOLD		20
 
 /* GUC variables */
 bool		zero_damaged_pages = false;
@@ -108,6 +109,7 @@ static volatile BufferDesc *BufferAlloc(SMgrRelation smgr,
 static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln);
 static void AtProcExit_Buffers(int code, Datum arg);
 
+static int rnode_comparator(const void * p1, const void * p2);
 
 /*
  * PrefetchBuffer -- initiate asynchronous read of a block of a relation
@@ -2094,35 +2096,87 @@ DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber forkNum,
  * --------------------------------------------------------------------
  */
 void
-DropRelFileNodeAllBuffers(RelFileNodeBackend rnode)
+DropRelFileNodeAllBuffers(RelFileNodeBackend *rnodes, int nnodes)
 {
-	int			i;
+	int         i, j, n = 0;
+	RelFileNode *nodes;
+
+	nodes = palloc(sizeof(RelFileNode) * nnodes); /* non-local relations */
 
 	/* If it's a local relation, it's localbuf.c's problem. */
-	if (RelFileNodeBackendIsTemp(rnode))
+	for (i = 0; i < nnodes; i++)
 	{
-		if (rnode.backend == MyBackendId)
-			DropRelFileNodeAllLocalBuffers(rnode.node);
+		if (RelFileNodeBackendIsTemp(rnodes[i]))
+		{
+			if (rnodes[i].backend == MyBackendId)
+				DropRelFileNodeAllLocalBuffers(rnodes[i].node);
+		}
+		else
+		{
+			nodes[n++] = rnodes[i].node;
+		}
+	}
+
+	/*
+	 * If there are no non-local relations, then we're done. Release the memory
+	 * and return.
+	 */
+	if (n == 0)
+	{
+		pfree(nodes);
 		return;
 	}
 
+	/* sort the list of rnodes (but only if we're going to use the bsearch) */
+	if (n > DROP_RELS_BSEARCH_THRESHOLD)
+		pg_qsort(nodes, n, sizeof(RelFileNode), rnode_comparator);
+
 	for (i = 0; i < NBuffers; i++)
 	{
+		RelFileNode *rnode = NULL;
 		volatile BufferDesc *bufHdr = &BufferDescriptors[i];
-
+ 
 		/*
 		 * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
 		 * and saves some cycles.
 		 */
-		if (!RelFileNodeEquals(bufHdr->tag.rnode, rnode.node))
+
+		/*
+		 * For low number of relations to drop just use a simple walk through,
+		 * to save the bsearch overhead. The BSEARCH_LIMIT is rather a guess
+		 * than a exactly determined value, as it depends on many factors (CPU
+		 * and RAM speeds, amount of shared buffers etc.).
+		 */
+		if (n < DROP_RELS_BSEARCH_THRESHOLD)
+		{
+			for (j = 0; j < n; j++)
+			{
+				if (RelFileNodeEquals(bufHdr->tag.rnode, nodes[j]))
+				{
+					rnode = &nodes[j];
+					break;
+				}
+			}
+		}
+		else
+		{
+			rnode = bsearch((const void *) &(bufHdr->tag.rnode),
+											nodes, n, sizeof(RelFileNode),
+											rnode_comparator);
+		}
+
+		/* buffer does not belong to any of the relations */
+		if (rnode == NULL)
 			continue;
 
 		LockBufHdr(bufHdr);
-		if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node))
+		if (RelFileNodeEquals(bufHdr->tag.rnode, (*rnode)))
 			InvalidateBuffer(bufHdr);	/* releases spinlock */
 		else
 			UnlockBufHdr(bufHdr);
 	}
+
+	pfree(nodes);
 }
 
 /* ---------------------------------------------------------------------
@@ -2953,3 +3007,31 @@ local_buffer_write_error_callback(void *arg)
 		pfree(path);
 	}
 }
+
+/*
+ * Used to sort relfilenode array (ordered by [relnode, dbnode, spcnode]), so
+ * that it's suitable for bsearch.
+ */
+static int
+rnode_comparator(const void *p1, const void *p2)
+{
+	RelFileNode n1 = *(RelFileNode *) p1;
+	RelFileNode n2 = *(RelFileNode *) p2;
+
+	if (n1.relNode < n2.relNode)
+		return -1;
+	else if (n1.relNode > n2.relNode)
+		return 1;
+
+	if (n1.dbNode < n2.dbNode)
+		return -1;
+	else if (n1.dbNode > n2.dbNode)
+		return 1;
+
+	if (n1.spcNode < n2.spcNode)
+		return -1;
+	else if (n1.spcNode > n2.spcNode)
+		return 1;
+	else
+		return 0;
+}
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 5dff8b3..f98ba27 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -390,7 +390,7 @@ smgrdounlink(SMgrRelation reln, bool isRedo)
 	 * Get rid of any remaining buffers for the relation.  bufmgr will just
 	 * drop them without bothering to write the contents.
 	 */
-	DropRelFileNodeAllBuffers(rnode);
+	DropRelFileNodeAllBuffers(&rnode, 1);
 
 	/*
 	 * It'd be nice to tell the stats collector to forget it immediately, too.
@@ -419,6 +419,69 @@ smgrdounlink(SMgrRelation reln, bool isRedo)
 	(*(smgrsw[which].smgr_unlink)) (rnode, InvalidForkNumber, isRedo);
 }
 
+void
+smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
+{
+	int i = 0;
+	RelFileNodeBackend *rnodes;
+	ForkNumber  forknum;
+
+	/* initialize an array which contains all relations to be dropped */
+	rnodes = palloc(sizeof(RelFileNodeBackend) * nrels);
+	for (i = 0; i < nrels; i++)
+	{
+		RelFileNodeBackend rnode = rels[i]->smgr_rnode;
+		int			which = rels[i]->smgr_which;
+
+		rnodes[i] = rnode;
+
+		/* Close the forks at smgr level */
+		for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
+			(*(smgrsw[which].smgr_close)) (rels[i], forknum);
+	}
+
+	/*
+	 * Get rid of any remaining buffers for the relation.  bufmgr will just
+	 * drop them without bothering to write the contents.
+	 */
+	DropRelFileNodeAllBuffers(rnodes, nrels);
+
+	/*
+	 * It'd be nice to tell the stats collector to forget it immediately, too.
+	 * But we can't because we don't know the OID (and in cases involving
+	 * relfilenode swaps, it's not always clear which table OID to forget,
+	 * anyway).
+	 */
+
+	/*
+	 * Send a shared-inval message to force other backends to close any
+	 * dangling smgr references they may have for this rel.  We should do this
+	 * before starting the actual unlinking, in case we fail partway through
+	 * that step.  Note that the sinval message will eventually come back to
+	 * this backend, too, and thereby provide a backstop that we closed our
+	 * own smgr rel.
+	 */
+	for (i = 0; i < nrels; i++)
+		CacheInvalidateSmgr(rnodes[i]);
+
+	/*
+	 * Delete the physical file(s).
+	 *
+	 * Note: smgr_unlink must treat deletion failure as a WARNING, not an
+	 * ERROR, because we've already decided to commit or abort the current
+	 * xact.
+	 */
+
+	for (i = 0; i < nrels; i++)
+	{
+		int	which = rels[i]->smgr_which;
+		for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
+			(*(smgrsw[which].smgr_unlink)) (rnodes[i], forknum, isRedo);
+	}
+
+	pfree(rnodes);
+}
+
 /*
  *	smgrdounlinkfork() -- Immediately unlink one fork of a relation.
  *
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index 51eb77b..1deee42 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -188,7 +188,7 @@ extern void FlushRelationBuffers(Relation rel);
 extern void FlushDatabaseBuffers(Oid dbid);
 extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode,
 					   ForkNumber forkNum, BlockNumber firstDelBlock);
-extern void DropRelFileNodeAllBuffers(RelFileNodeBackend rnode);
+extern void DropRelFileNodeAllBuffers(RelFileNodeBackend * rnodes, int nnodes);
 extern void DropDatabaseBuffers(Oid dbid);
 
 #define RelationGetNumberOfBlocks(reln) \
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index 9eccf76..272476b 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -85,6 +85,7 @@ extern void smgrcloseall(void);
 extern void smgrclosenode(RelFileNodeBackend rnode);
 extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
 extern void smgrdounlink(SMgrRelation reln, bool isRedo);
+extern void smgrdounlinkall(SMgrRelation * rels, int nrels, bool isRedo);
 extern void smgrdounlinkfork(SMgrRelation reln, ForkNumber forknum, bool isRedo);
 extern void smgrextend(SMgrRelation reln, ForkNumber forknum,
 		   BlockNumber blocknum, char *buffer, bool skipFsync);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to