On 17.11.2014 19:46, Andres Freund wrote:
> On 2014-11-17 19:42:25 +0100, Tomas Vondra wrote:
>> On 17.11.2014 18:04, Andres Freund wrote:
>>> Hi,
>>>
>>> On 2014-11-16 23:31:51 -0800, Jeff Davis wrote:
>>>> *** a/src/include/nodes/memnodes.h
>>>> --- b/src/include/nodes/memnodes.h
>>>> ***************
>>>> *** 60,65 **** typedef struct MemoryContextData
>>>> --- 60,66 ----
>>>>    MemoryContext nextchild;        /* next child of same parent */
>>>>    char       *name;                       /* context name (just for 
>>>> debugging) */
>>>>    bool            isReset;                /* T = no space alloced since 
>>>> last reset */
>>>> +  uint64          mem_allocated;  /* track memory allocated for this 
>>>> context */
>>>>   #ifdef USE_ASSERT_CHECKING
>>>>    bool            allowInCritSection;     /* allow palloc in critical 
>>>> section */
>>>>   #endif
>>>
>>> That's quite possibly one culprit for the slowdown. Right now one 
>>> AllocSetContext struct fits precisely into three cachelines. After
>>> your change it doesn't anymore.
>>
>> I'm no PPC64 expert, but I thought the cache lines are 128 B on that
>> platform, since at least Power6?
> 
> Hm, might be true.
> 
>> Also, if I'm counting right, the MemoryContextData structure is 56B
>> without the 'mem_allocated' field (and without the allowInCritSection),
>> and 64B with it (at that particular place). sizeof() seems to confirm
>> that. (But I'm on x86, so maybe the alignment on PPC64 is different?).
> 
> The MemoryContextData struct is embedded into AllocSetContext.

Oh, right. That makes is slightly more complicated, though, because
AllocSetContext adds 6 x 8B fields plus an in-line array of freelist
pointers. Which is 11x8 bytes. So in total 56+56+88=200B, without the
additional field. There might be some difference because of alignment,
but I still don't see how that one additional field might impact cachelines?

But if we separated the freelist, that might actually make it faster, at
least for calls that don't touch the freelist at all, no? Because most
of the palloc() calls will be handled from the current block.

I tried this on the patch I sent on 23/8 (with the MemoryAccounting
hierarchy parallel to the contexts), and I got this (running 10x the
reindex, without restarts or such):

without the patch
-----------------
  ... 1723933 KB used: CPU 1.35s/4.33u sec elapsed 10.38 sec
  ... 1723933 KB used: CPU 1.39s/4.19u sec elapsed 9.89 sec
  ... 1723933 KB used: CPU 1.39s/4.26u sec elapsed 9.90 sec
  ... 1723933 KB used: CPU 1.45s/4.22u sec elapsed 10.15 sec
  ... 1723933 KB used: CPU 1.36s/4.20u sec elapsed 9.84 sec
  ... 1723933 KB used: CPU 1.32s/4.20u sec elapsed 9.96 sec
  ... 1723933 KB used: CPU 1.50s/4.23u sec elapsed 9.91 sec
  ... 1723933 KB used: CPU 1.47s/4.24u sec elapsed 9.90 sec
  ... 1723933 KB used: CPU 1.38s/4.23u sec elapsed 10.09 sec
  ... 1723933 KB used: CPU 1.37s/4.19u sec elapsed 9.84 sec

with the patch (no tracking enabled)
------------------------------------
  ... 1723933 KB used: CPU 1.40s/4.28u sec elapsed 10.79 sec
  ... 1723933 KB used: CPU 1.39s/4.32u sec elapsed 10.14 sec
  ... 1723933 KB used: CPU 1.40s/4.16u sec elapsed 9.99 sec
  ... 1723933 KB used: CPU 1.32s/4.21u sec elapsed 10.16 sec
  ... 1723933 KB used: CPU 1.37s/4.34u sec elapsed 10.03 sec
  ... 1723933 KB used: CPU 1.35s/4.22u sec elapsed 9.97 sec
  ... 1723933 KB used: CPU 1.43s/4.20u sec elapsed 9.99 sec
  ... 1723933 KB used: CPU 1.39s/4.17u sec elapsed 9.71 sec
  ... 1723933 KB used: CPU 1.44s/4.16u sec elapsed 9.91 sec
  ... 1723933 KB used: CPU 1.40s/4.25u sec elapsed 9.99 sec

So it's about 9,986 vs 10,068 for averages, and 9,905 vs 9,99 for a
median. I.e. ~0.8% difference. That's on x86, though. I wonder what'd be
the effect on the PPC machine.

Patch attached - it's not perfect, though. For example it should free
the freelists at some point, but I feel a bit lazy today.

Tomas
diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c
index 743455e..d2ecb5b 100644
--- a/src/backend/utils/mmgr/aset.c
+++ b/src/backend/utils/mmgr/aset.c
@@ -175,7 +175,7 @@ typedef struct AllocSetContext
 	MemoryContextData header;	/* Standard memory-context fields */
 	/* Info about storage allocated in this context: */
 	AllocBlock	blocks;			/* head of list of blocks in this set */
-	AllocChunk	freelist[ALLOCSET_NUM_FREELISTS];		/* free chunk lists */
+	AllocChunk *freelist;		/* free chunk lists */
 	/* Allocation parameters for this context: */
 	Size		initBlockSize;	/* initial block size */
 	Size		maxBlockSize;	/* maximum block size */
@@ -242,6 +242,8 @@ typedef struct AllocChunkData
 #define AllocChunkGetPointer(chk)	\
 					((AllocPointer)(((char *)(chk)) + ALLOC_CHUNKHDRSZ))
 
+static void update_allocation(MemoryContext context, int64 size);
+
 /*
  * These functions implement the MemoryContext API for AllocSet contexts.
  */
@@ -250,7 +252,7 @@ static void AllocSetFree(MemoryContext context, void *pointer);
 static void *AllocSetRealloc(MemoryContext context, void *pointer, Size size);
 static void AllocSetInit(MemoryContext context);
 static void AllocSetReset(MemoryContext context);
-static void AllocSetDelete(MemoryContext context);
+static void AllocSetDelete(MemoryContext context, MemoryContext parent);
 static Size AllocSetGetChunkSpace(MemoryContext context, void *pointer);
 static bool AllocSetIsEmpty(MemoryContext context);
 static void AllocSetStats(MemoryContext context, int level);
@@ -430,6 +432,9 @@ randomize_mem(char *ptr, size_t size)
  * minContextSize: minimum context size
  * initBlockSize: initial allocation block size
  * maxBlockSize: maximum allocation block size
+ *
+ * The flag determining whether this context tracks memory usage is inherited
+ * from the parent context.
  */
 MemoryContext
 AllocSetContextCreate(MemoryContext parent,
@@ -438,6 +443,26 @@ AllocSetContextCreate(MemoryContext parent,
 					  Size initBlockSize,
 					  Size maxBlockSize)
 {
+	return AllocSetContextCreateTracked(
+		parent, name, minContextSize, initBlockSize, maxBlockSize,
+		false);
+}
+
+/*
+ * AllocSetContextCreateTracked
+ *		Create a new AllocSet context.
+ *
+ * Implementation for AllocSetContextCreate, but also allows the caller to
+ * specify whether memory usage should be tracked or not.
+ */
+MemoryContext
+AllocSetContextCreateTracked(MemoryContext parent,
+							 const char *name,
+							 Size minContextSize,
+							 Size initBlockSize,
+							 Size maxBlockSize,
+							 bool track_mem)
+{
 	AllocSet	context;
 
 	/* Do the type-independent part of context creation */
@@ -445,8 +470,23 @@ AllocSetContextCreate(MemoryContext parent,
 											 sizeof(AllocSetContext),
 											 &AllocSetMethods,
 											 parent,
-											 name);
-
+											 name,
+											 track_mem);
+
+    if (parent != NULL)
+    {
+        /* Normal case: allocate the node in TopMemoryContext */
+        context->freelist = MemoryContextAllocZero(parent,
+                            ALLOCSET_NUM_FREELISTS * sizeof(AllocChunk));
+    }
+    else
+    {
+        /* Special case for startup: use good ol' malloc */
+        context->freelist = (AllocChunk*)malloc(ALLOCSET_NUM_FREELISTS * sizeof(AllocChunk));
+        memset(context->freelist, 0, ALLOCSET_NUM_FREELISTS * sizeof(AllocChunk));
+    }
+
+    
 	/*
 	 * Make sure alloc parameters are reasonable, and save them.
 	 *
@@ -500,6 +540,9 @@ AllocSetContextCreate(MemoryContext parent,
 					 errdetail("Failed while creating memory context \"%s\".",
 							   name)));
 		}
+
+		update_allocation((MemoryContext) context, blksize);
+
 		block->aset = context;
 		block->freeptr = ((char *) block) + ALLOC_BLOCKHDRSZ;
 		block->endptr = ((char *) block) + blksize;
@@ -562,7 +605,7 @@ AllocSetReset(MemoryContext context)
 #endif
 
 	/* Clear chunk freelists */
-	MemSetAligned(set->freelist, 0, sizeof(set->freelist));
+	MemSet(set->freelist, 0, ALLOCSET_NUM_FREELISTS * sizeof(AllocChunk));
 
 	block = set->blocks;
 
@@ -590,6 +633,7 @@ AllocSetReset(MemoryContext context)
 		else
 		{
 			/* Normal case, release the block */
+			update_allocation(context, -(block->endptr - ((char*) block)));
 #ifdef CLOBBER_FREED_MEMORY
 			wipe_mem(block, block->freeptr - ((char *) block));
 #endif
@@ -611,11 +655,13 @@ AllocSetReset(MemoryContext context)
  * But note we are not responsible for deleting the context node itself.
  */
 static void
-AllocSetDelete(MemoryContext context)
+AllocSetDelete(MemoryContext context, MemoryContext parent)
 {
 	AllocSet	set = (AllocSet) context;
 	AllocBlock	block = set->blocks;
 
+	MemoryAccounting accounting;
+
 	AssertArg(AllocSetIsValid(set));
 
 #ifdef MEMORY_CONTEXT_CHECKING
@@ -623,8 +669,20 @@ AllocSetDelete(MemoryContext context)
 	AllocSetCheck(context);
 #endif
 
+	if (context->accounting != NULL) {
+
+		accounting = context->accounting->parent;
+
+		while (accounting != NULL)
+		{
+			accounting->total_allocated -= context->accounting->total_allocated;
+			accounting = accounting->parent;
+		}
+	}
+
 	/* Make it look empty, just in case... */
-	MemSetAligned(set->freelist, 0, sizeof(set->freelist));
+	MemSet(set->freelist, 0, ALLOCSET_NUM_FREELISTS * sizeof(AllocChunk));
+
 	set->blocks = NULL;
 	set->keeper = NULL;
 
@@ -678,6 +736,9 @@ AllocSetAlloc(MemoryContext context, Size size)
 					 errmsg("out of memory"),
 					 errdetail("Failed on request of size %zu.", size)));
 		}
+
+		update_allocation(context, blksize);
+
 		block->aset = set;
 		block->freeptr = block->endptr = ((char *) block) + blksize;
 
@@ -873,6 +934,8 @@ AllocSetAlloc(MemoryContext context, Size size)
 					 errdetail("Failed on request of size %zu.", size)));
 		}
 
+		update_allocation(context, blksize);
+
 		block->aset = set;
 		block->freeptr = ((char *) block) + ALLOC_BLOCKHDRSZ;
 		block->endptr = ((char *) block) + blksize;
@@ -976,6 +1039,7 @@ AllocSetFree(MemoryContext context, void *pointer)
 			set->blocks = block->next;
 		else
 			prevblock->next = block->next;
+		update_allocation(context, -(block->endptr - ((char*) block)));
 #ifdef CLOBBER_FREED_MEMORY
 		wipe_mem(block, block->freeptr - ((char *) block));
 #endif
@@ -1088,6 +1152,7 @@ AllocSetRealloc(MemoryContext context, void *pointer, Size size)
 		AllocBlock	prevblock = NULL;
 		Size		chksize;
 		Size		blksize;
+		Size		oldblksize;
 
 		while (block != NULL)
 		{
@@ -1105,6 +1170,8 @@ AllocSetRealloc(MemoryContext context, void *pointer, Size size)
 		/* Do the realloc */
 		chksize = MAXALIGN(size);
 		blksize = chksize + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ;
+		oldblksize = block->endptr - ((char *)block);
+
 		block = (AllocBlock) realloc(block, blksize);
 		if (block == NULL)
 		{
@@ -1114,6 +1181,7 @@ AllocSetRealloc(MemoryContext context, void *pointer, Size size)
 					 errmsg("out of memory"),
 					 errdetail("Failed on request of size %zu.", size)));
 		}
+		update_allocation(context, blksize - oldblksize);
 		block->freeptr = block->endptr = ((char *) block) + blksize;
 
 		/* Update pointers since block has likely been moved */
@@ -1277,6 +1345,36 @@ AllocSetStats(MemoryContext context, int level)
 }
 
 
+/*
+ * update_allocation
+ *
+ * Track newly-allocated or newly-freed memory (freed memory should be
+ * negative).
+ */
+static void
+update_allocation(MemoryContext context, int64 size)
+{
+	MemoryContext parent;
+	MemoryAccounting accounting = context->accounting;
+
+	if (accounting == NULL)
+		return;
+
+	accounting->self_allocated += size;
+
+	while (accounting != NULL) {
+
+		accounting->total_allocated += size;
+
+		Assert(accounting->self_allocated >= 0);
+		Assert(accounting->total_allocated >= accounting->self_allocated);
+
+		accounting = accounting->parent;
+
+	}
+
+}
+
 #ifdef MEMORY_CONTEXT_CHECKING
 
 /*
diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c
index 4185a03..2e2697f 100644
--- a/src/backend/utils/mmgr/mcxt.c
+++ b/src/backend/utils/mmgr/mcxt.c
@@ -187,6 +187,8 @@ MemoryContextResetChildren(MemoryContext context)
 void
 MemoryContextDelete(MemoryContext context)
 {
+	MemoryContext parent = context->parent;
+
 	AssertArg(MemoryContextIsValid(context));
 	/* We had better not be deleting TopMemoryContext ... */
 	Assert(context != TopMemoryContext);
@@ -202,7 +204,12 @@ MemoryContextDelete(MemoryContext context)
 	 */
 	MemoryContextSetParent(context, NULL);
 
-	(*context->methods->delete_context) (context);
+	/* pass the parent in case it's needed, however */
+	(*context->methods->delete_context) (context, parent);
+
+	if (context->track_mem)
+		pfree(context->accounting);
+	
 	VALGRIND_DESTROY_MEMPOOL(context);
 	pfree(context);
 }
@@ -324,6 +331,26 @@ MemoryContextAllowInCriticalSection(MemoryContext context, bool allow)
 }
 
 /*
+ * MemoryContextGetAllocated
+ *
+ * Return memory allocated by the system to this context. If total is true,
+ * include child contexts. Context must have track_mem set.
+ */
+Size
+MemoryContextGetAllocated(MemoryContext context, bool total)
+{
+	Assert(context->track_mem);
+
+	if (! context->track_mem)
+		return 0;
+
+	if (total)
+		return context->accounting->total_allocated;
+	else
+		return context->accounting->self_allocated;
+}
+
+/*
  * GetMemoryChunkSpace
  *		Given a currently-allocated chunk, determine the total space
  *		it occupies (including all memory-allocation overhead).
@@ -546,7 +573,8 @@ MemoryContext
 MemoryContextCreate(NodeTag tag, Size size,
 					MemoryContextMethods *methods,
 					MemoryContext parent,
-					const char *name)
+					const char *name,
+					bool track_mem)
 {
 	MemoryContext node;
 	Size		needed = size + strlen(name) + 1;
@@ -576,6 +604,8 @@ MemoryContextCreate(NodeTag tag, Size size,
 	node->firstchild = NULL;
 	node->nextchild = NULL;
 	node->isReset = true;
+	node->track_mem = track_mem;
+	node->accounting = NULL;
 	node->name = ((char *) node) + size;
 	strcpy(node->name, name);
 
@@ -596,6 +626,18 @@ MemoryContextCreate(NodeTag tag, Size size,
 #endif
 	}
 
+	/* if we want to do tracking, just allocate MemoryAccountingData */
+	if (track_mem)
+	{
+		node->accounting = (MemoryAccounting)MemoryContextAlloc(TopMemoryContext,
+												sizeof(MemoryAccountingData));
+		if (parent)
+			node->accounting->parent = parent->accounting;
+	} else if (parent) {
+		
+		node->accounting = parent->accounting;
+	}
+
 	VALGRIND_CREATE_MEMPOOL(node, 0, false);
 
 	/* Return to type-specific creation routine to finish up */
diff --git a/src/include/nodes/memnodes.h b/src/include/nodes/memnodes.h
index ad77509..7ba59f6 100644
--- a/src/include/nodes/memnodes.h
+++ b/src/include/nodes/memnodes.h
@@ -41,7 +41,8 @@ typedef struct MemoryContextMethods
 	void	   *(*realloc) (MemoryContext context, void *pointer, Size size);
 	void		(*init) (MemoryContext context);
 	void		(*reset) (MemoryContext context);
-	void		(*delete_context) (MemoryContext context);
+	void		(*delete_context) (MemoryContext context,
+								   MemoryContext parent);
 	Size		(*get_chunk_space) (MemoryContext context, void *pointer);
 	bool		(*is_empty) (MemoryContext context);
 	void		(*stats) (MemoryContext context, int level);
@@ -50,6 +51,18 @@ typedef struct MemoryContextMethods
 #endif
 } MemoryContextMethods;
 
+typedef struct MemoryAccountingData {
+
+	Size	total_allocated; /* including child contexts */
+	Size	self_allocated;  /* not including child contexts */
+
+	/* parent accounting (not parent context) */
+	struct MemoryAccountingData * parent;
+
+} MemoryAccountingData;
+
+typedef MemoryAccountingData * MemoryAccounting;
+
 
 typedef struct MemoryContextData
 {
@@ -60,6 +73,8 @@ typedef struct MemoryContextData
 	MemoryContext nextchild;	/* next child of same parent */
 	char	   *name;			/* context name (just for debugging) */
 	bool		isReset;		/* T = no space alloced since last reset */
+	bool		track_mem;		/* whether to track memory usage */
+	MemoryAccounting	accounting;
 #ifdef USE_ASSERT_CHECKING
 	bool		allowInCritSection;	/* allow palloc in critical section */
 #endif
diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h
index 2fede86..b82f923 100644
--- a/src/include/utils/memutils.h
+++ b/src/include/utils/memutils.h
@@ -96,6 +96,7 @@ extern void MemoryContextDeleteChildren(MemoryContext context);
 extern void MemoryContextResetAndDeleteChildren(MemoryContext context);
 extern void MemoryContextSetParent(MemoryContext context,
 					   MemoryContext new_parent);
+extern Size MemoryContextGetAllocated(MemoryContext context, bool total);
 extern Size GetMemoryChunkSpace(void *pointer);
 extern MemoryContext GetMemoryChunkContext(void *pointer);
 extern MemoryContext MemoryContextGetParent(MemoryContext context);
@@ -117,7 +118,8 @@ extern bool MemoryContextContains(MemoryContext context, void *pointer);
 extern MemoryContext MemoryContextCreate(NodeTag tag, Size size,
 					MemoryContextMethods *methods,
 					MemoryContext parent,
-					const char *name);
+					const char *name,
+					bool track_mem);
 
 
 /*
@@ -130,6 +132,12 @@ extern MemoryContext AllocSetContextCreate(MemoryContext parent,
 					  Size minContextSize,
 					  Size initBlockSize,
 					  Size maxBlockSize);
+extern MemoryContext AllocSetContextCreateTracked(MemoryContext parent,
+					  const char *name,
+					  Size minContextSize,
+					  Size initBlockSize,
+					  Size maxBlockSize,
+					  bool track_mem);
 
 /*
  * Recommended default alloc parameters, suitable for "ordinary" contexts
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to