[HACKERS] PATCH : Generational memory allocator (was PATCH: two slab-like memory allocators)

Tomas Vondra Sun, 13 Aug 2017 17:37:01 -0700

Hi,

Attached is a rebased version of the Generational context, originallysubmitted with SlabContext (which was already committed into Pg 10).

The main change is that I've abandoned the pattern of defining a Datastructure and then a pointer typedef, i.e.


    typedef struct GenerationContextData { ... } GenerationContextData;
    typedef struct GenerationContextData *GenerationContext;

Now it's just

    typedef struct GenerationContext { ... } GenerationContext;

mostly because SlabContext was committed like that, and because Andreswas complaining about this code pattern ;-)


Otherwise the design is the same as repeatedly discussed before.

To show that this is still valuable change (even after SlabContext andadding doubly-linked list to AllocSet), I've repeated the test done byAndres in [1] using the test case described in [2], that is


  -- generate data
  SELECT COUNT(*) FROM (SELECT test1()
                          FROM generate_series(1, 50000)) foo;

  -- benchmark (measure time and VmPeak)
  SELECT COUNT(*) FROM (SELECT *
                          FROM pg_logical_slot_get_changes('test', NULL,
                                        NULL, 'include-xids', '0')) foo;

with different values passed to the first step (instead of the 50000).The VmPeak numbers look like this:


         N           master        patched
    --------------------------------------
    100000       1155220 kB      361604 kB
    200000       2020668 kB      434060 kB
    300000       2890236 kB      502452 kB
    400000       3751592 kB      570816 kB
    500000       4621124 kB      639168 kB

and the timing (on assert-enabled build):

         N           master        patched
    --------------------------------------
    100000      1103.182 ms     412.734 ms
    200000      2216.711 ms     820.438 ms
    300000      3320.095 ms    1223.576 ms
    400000      4584.919 ms    1621.261 ms
    500000      5590.444 ms    2113.820 ms

So it seems it's still a significant improvement, both in terms ofmemory usage and timing. Admittedly, this is a single test, so ideas ofother useful test cases are welcome.


regards

[1]https://www.postgresql.org/message-id/20170227111732.vrx5v72ighehwpkf%40alap3.anarazel.de

[2]https://www.postgresql.org/message-id/20160706185502.1426.28143%40wrigleys.postgresql.org


--
Tomas Vondra                  http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

>From 1c46d25ffa9bb104c415cba7c7b3a013958b6ab5 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <to...@2ndquadrant.com>
Date: Mon, 14 Aug 2017 01:52:50 +0200
Subject: [PATCH] Generational memory allocator

This memory context is based on the assumption that the allocated chunks
have similar lifespan, i.e. that chunks allocated close from each other
(by time) will also be freed in close proximity, and mostly in the same
order. This is typical for various queue-like use cases, i.e. when
tuples are constructed, processed and then thrown away.

The memory context uses a very simple approach to free space management.
Instead of a complex global freelist, each block tracks a number
of allocated and freed chunks. The space released by freed chunks is not
reused, and once all chunks are freed (i.e. when nallocated == nfreed),
the whole block is thrown away. When the allocated chunks have similar
lifespan, this works very well and is extremely cheap.
---
 src/backend/replication/logical/reorderbuffer.c |  74 +--
 src/backend/utils/mmgr/Makefile                 |   2 +-
 src/backend/utils/mmgr/generation.c             | 768 ++++++++++++++++++++++++
 src/include/nodes/memnodes.h                    |   4 +-
 src/include/nodes/nodes.h                       |   1 +
 src/include/replication/reorderbuffer.h         |  15 +-
 src/include/utils/memutils.h                    |   5 +
 7 files changed, 790 insertions(+), 79 deletions(-)
 create mode 100644 src/backend/utils/mmgr/generation.c

diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c
index 5567bee..5309170 100644
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -150,15 +150,6 @@ typedef struct ReorderBufferDiskChange
  */
 static const Size max_changes_in_memory = 4096;
 
-/*
- * We use a very simple form of a slab allocator for frequently allocated
- * objects, simply keeping a fixed number in a linked list when unused,
- * instead pfree()ing them. Without that in many workloads aset.c becomes a
- * major bottleneck, especially when spilling to disk while decoding batch
- * workloads.
- */
-static const Size max_cached_tuplebufs = 4096 * 2;	/* ~8MB */
-
 /* ---------------------------------------
  * primary reorderbuffer support routines
  * ---------------------------------------
@@ -248,6 +239,10 @@ ReorderBufferAllocate(void)
 											SLAB_DEFAULT_BLOCK_SIZE,
 											sizeof(ReorderBufferTXN));
 
+	buffer->tup_context = GenerationContextCreate(new_ctx,
+										   "Tuples",
+										   SLAB_LARGE_BLOCK_SIZE);
+
 	hash_ctl.keysize = sizeof(TransactionId);
 	hash_ctl.entrysize = sizeof(ReorderBufferTXNByIdEnt);
 	hash_ctl.hcxt = buffer->context;
@@ -258,15 +253,12 @@ ReorderBufferAllocate(void)
 	buffer->by_txn_last_xid = InvalidTransactionId;
 	buffer->by_txn_last_txn = NULL;
 
-	buffer->nr_cached_tuplebufs = 0;
-
 	buffer->outbuf = NULL;
 	buffer->outbufsize = 0;
 
 	buffer->current_restart_decoding_lsn = InvalidXLogRecPtr;
 
 	dlist_init(&buffer->toplevel_by_lsn);
-	slist_init(&buffer->cached_tuplebufs);
 
 	return buffer;
 }
@@ -419,42 +411,12 @@ ReorderBufferGetTupleBuf(ReorderBuffer *rb, Size tuple_len)
 
 	alloc_len = tuple_len + SizeofHeapTupleHeader;
 
-	/*
-	 * Most tuples are below MaxHeapTupleSize, so we use a slab allocator for
-	 * those. Thus always allocate at least MaxHeapTupleSize. Note that tuples
-	 * generated for oldtuples can be bigger, as they don't have out-of-line
-	 * toast columns.
-	 */
-	if (alloc_len < MaxHeapTupleSize)
-		alloc_len = MaxHeapTupleSize;
-
-
-	/* if small enough, check the slab cache */
-	if (alloc_len <= MaxHeapTupleSize && rb->nr_cached_tuplebufs)
-	{
-		rb->nr_cached_tuplebufs--;
-		tuple = slist_container(ReorderBufferTupleBuf, node,
-								slist_pop_head_node(&rb->cached_tuplebufs));
-		Assert(tuple->alloc_tuple_size == MaxHeapTupleSize);
-#ifdef USE_ASSERT_CHECKING
-		memset(&tuple->tuple, 0xa9, sizeof(HeapTupleData));
-		VALGRIND_MAKE_MEM_UNDEFINED(&tuple->tuple, sizeof(HeapTupleData));
-#endif
-		tuple->tuple.t_data = ReorderBufferTupleBufData(tuple);
-#ifdef USE_ASSERT_CHECKING
-		memset(tuple->tuple.t_data, 0xa8, tuple->alloc_tuple_size);
-		VALGRIND_MAKE_MEM_UNDEFINED(tuple->tuple.t_data, tuple->alloc_tuple_size);
-#endif
-	}
-	else
-	{
-		tuple = (ReorderBufferTupleBuf *)
-			MemoryContextAlloc(rb->context,
-							   sizeof(ReorderBufferTupleBuf) +
-							   MAXIMUM_ALIGNOF + alloc_len);
-		tuple->alloc_tuple_size = alloc_len;
-		tuple->tuple.t_data = ReorderBufferTupleBufData(tuple);
-	}
+	tuple = (ReorderBufferTupleBuf *)
+		MemoryContextAlloc(rb->tup_context,
+						   sizeof(ReorderBufferTupleBuf) +
+						   MAXIMUM_ALIGNOF + alloc_len);
+	tuple->alloc_tuple_size = alloc_len;
+	tuple->tuple.t_data = ReorderBufferTupleBufData(tuple);
 
 	return tuple;
 }
@@ -468,21 +430,7 @@ ReorderBufferGetTupleBuf(ReorderBuffer *rb, Size tuple_len)
 void
 ReorderBufferReturnTupleBuf(ReorderBuffer *rb, ReorderBufferTupleBuf *tuple)
 {
-	/* check whether to put into the slab cache, oversized tuples never are */
-	if (tuple->alloc_tuple_size == MaxHeapTupleSize &&
-		rb->nr_cached_tuplebufs < max_cached_tuplebufs)
-	{
-		rb->nr_cached_tuplebufs++;
-		slist_push_head(&rb->cached_tuplebufs, &tuple->node);
-		VALGRIND_MAKE_MEM_UNDEFINED(tuple->tuple.t_data, tuple->alloc_tuple_size);
-		VALGRIND_MAKE_MEM_UNDEFINED(tuple, sizeof(ReorderBufferTupleBuf));
-		VALGRIND_MAKE_MEM_DEFINED(&tuple->node, sizeof(tuple->node));
-		VALGRIND_MAKE_MEM_DEFINED(&tuple->alloc_tuple_size, sizeof(tuple->alloc_tuple_size));
-	}
-	else
-	{
-		pfree(tuple);
-	}
+	pfree(tuple);
 }
 
 /*
diff --git a/src/backend/utils/mmgr/Makefile b/src/backend/utils/mmgr/Makefile
index cd0e803..f644c40 100644
--- a/src/backend/utils/mmgr/Makefile
+++ b/src/backend/utils/mmgr/Makefile
@@ -12,6 +12,6 @@ subdir = src/backend/utils/mmgr
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = aset.o dsa.o freepage.o mcxt.o memdebug.o portalmem.o slab.o
+OBJS = aset.o dsa.o freepage.o generation.o mcxt.o memdebug.o portalmem.o slab.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/utils/mmgr/generation.c b/src/backend/utils/mmgr/generation.c
new file mode 100644
index 0000000..a35155c
--- /dev/null
+++ b/src/backend/utils/mmgr/generation.c
@@ -0,0 +1,768 @@
+/*-------------------------------------------------------------------------
+ *
+ * generation.c
+ *	  Generational allocator definitions.
+ *
+ * Generation is a custom MemoryContext implementation designed for cases of
+ * chunks with similar lifespan.
+ *
+ * Portions Copyright (c) 2017, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/mmgr/Generation.c
+ *
+ *
+ *	This memory context is based on the assumption that the allocated chunks
+ *	have similar lifespan, i.e. that chunks allocated close from each other
+ *	(by time) will also be freed in close proximity, and mostly in the same
+ *	order. This is typical for various queue-like use cases, i.e. when tuples
+ *	are constructed, processed and then thrown away.
+ *
+ *	The memory context uses a very simple approach to free space management.
+ *	Instead of a complex global freelist, each block tracks a number
+ *	of allocated and freed chunks. The space released by freed chunks is not
+ *	reused, and once all chunks are freed (i.e. when nallocated == nfreed),
+ *	the whole block is thrown away. When the allocated chunks have similar
+ *	lifespan, this works very well and is extremely cheap.
+ *
+ *	The current implementation only uses a fixed block size - maybe it should
+ *	adapt a min/max block size range, and grow the blocks automatically.
+ *	It already uses dedicated blocks for oversized chunks.
+ *
+ *	XXX It might be possible to improve this by keeping a small freelist for
+ *	only a small number of recent blocks, but it's not clear it's worth the
+ *	additional complexity.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "utils/memdebug.h"
+#include "utils/memutils.h"
+#include "lib/ilist.h"
+
+
+#define Generation_BLOCKHDRSZ	MAXALIGN(sizeof(GenerationBlock))
+#define Generation_CHUNKHDRSZ	sizeof(GenerationChunk)
+
+/* Portion of Generation_CHUNKHDRSZ examined outside Generation.c. */
+#define Generation_CHUNK_PUBLIC	\
+	(offsetof(GenerationChunk, size) + sizeof(Size))
+
+/* Portion of Generation_CHUNKHDRSZ excluding trailing padding. */
+#ifdef MEMORY_CONTEXT_CHECKING
+#define Generation_CHUNK_USED	\
+	(offsetof(GenerationChunk, requested_size) + sizeof(Size))
+#else
+#define Generation_CHUNK_USED	\
+	(offsetof(GenerationChunk, size) + sizeof(Size))
+#endif
+
+typedef struct GenerationBlock GenerationBlock;	/* forward reference */
+typedef struct GenerationChunk GenerationChunk;
+
+typedef void *GenerationPointer;
+
+/*
+ * GenerationContext is a simple memory context not reusing allocated chunks, and
+ * freeing blocks once all chunks are freed.
+ */
+typedef struct GenerationContext
+{
+	MemoryContextData header;	/* Standard memory-context fields */
+
+	/* Generationerational context parameters */
+	Size		blockSize;		/* block size */
+
+	GenerationBlock	*block;		/* current (most recently allocated) block */
+	dlist_head	blocks;			/* list of blocks */
+
+}	GenerationContext;
+
+/*
+ * GenerationBlock
+ *		A GenerationBlock is the unit of memory that is obtained by Generation.c
+ *		from malloc().  It contains one or more GenerationChunks, which are
+ *		the units requested by palloc() and freed by pfree().  GenerationChunks
+ *		cannot be returned to malloc() individually, instead pfree()
+ *		updates a free counter on a block and when all chunks on a block
+ *		are freed the whole block is returned to malloc().
+ *
+ *		GenerationBloc is the header data for a block --- the usable space
+ *		within the block begins at the next alignment boundary.
+ */
+typedef struct GenerationBlock
+{
+	dlist_node	node;			/* doubly-linked list */
+	int			nchunks;		/* number of chunks in the block */
+	int			nfree;			/* number of free chunks */
+	char	   *freeptr;		/* start of free space in this block */
+	char	   *endptr;			/* end of space in this block */
+}	GenerationBlock;
+
+/*
+ * GenerationChunk
+ *		The prefix of each piece of memory in an GenerationBlock
+ */
+typedef struct GenerationChunk
+{
+	/* block owning this chunk */
+	void	   *block;
+
+	/* size is always the size of the usable space in the chunk */
+	Size		size;
+#ifdef MEMORY_CONTEXT_CHECKING
+	/* when debugging memory usage, also store actual requested size */
+	/* this is zero in a free chunk */
+	Size		requested_size;
+#endif   /* MEMORY_CONTEXT_CHECKING */
+
+	GenerationContext *context;		/* owning context */
+	/* there must not be any padding to reach a MAXALIGN boundary here! */
+}	GenerationChunk;
+
+
+/*
+ * GenerationIsValid
+ *		True iff set is valid allocation set.
+ */
+#define GenerationIsValid(set) PointerIsValid(set)
+
+#define GenerationPointerGetChunk(ptr) \
+	((GenerationChunk *)(((char *)(ptr)) - Generation_CHUNKHDRSZ))
+#define GenerationChunkGetPointer(chk) \
+	((GenerationPointer *)(((char *)(chk)) + Generation_CHUNKHDRSZ))
+
+/*
+ * These functions implement the MemoryContext API for Generation contexts.
+ */
+static void *GenerationAlloc(MemoryContext context, Size size);
+static void GenerationFree(MemoryContext context, void *pointer);
+static void *GenerationRealloc(MemoryContext context, void *pointer, Size size);
+static void GenerationInit(MemoryContext context);
+static void GenerationReset(MemoryContext context);
+static void GenerationDelete(MemoryContext context);
+static Size GenerationGetChunkSpace(MemoryContext context, void *pointer);
+static bool GenerationIsEmpty(MemoryContext context);
+static void GenerationStats(MemoryContext context, int level, bool print,
+		 MemoryContextCounters *totals);
+
+#ifdef MEMORY_CONTEXT_CHECKING
+static void GenerationCheck(MemoryContext context);
+#endif
+
+/*
+ * This is the virtual function table for Generation contexts.
+ */
+static MemoryContextMethods GenerationMethods = {
+	GenerationAlloc,
+	GenerationFree,
+	GenerationRealloc,
+	GenerationInit,
+	GenerationReset,
+	GenerationDelete,
+	GenerationGetChunkSpace,
+	GenerationIsEmpty,
+	GenerationStats
+#ifdef MEMORY_CONTEXT_CHECKING
+	,GenerationCheck
+#endif
+};
+
+/* ----------
+ * Debug macros
+ * ----------
+ */
+#ifdef HAVE_ALLOCINFO
+#define GenerationFreeInfo(_cxt, _chunk) \
+			fprintf(stderr, "GenerationFree: %s: %p, %lu\n", \
+				(_cxt)->name, (_chunk), (_chunk)->size)
+#define GenerationAllocInfo(_cxt, _chunk) \
+			fprintf(stderr, "GenerationAlloc: %s: %p, %lu\n", \
+				(_cxt)->name, (_chunk), (_chunk)->size)
+#else
+#define GenerationFreeInfo(_cxt, _chunk)
+#define GenerationAllocInfo(_cxt, _chunk)
+#endif
+
+
+/*
+ * Public routines
+ */
+
+
+/*
+ * GenerationContextCreate
+ *		Create a new Generation context.
+ */
+MemoryContext
+GenerationContextCreate(MemoryContext parent,
+				 const char *name,
+				 Size blockSize)
+{
+	GenerationContext  *set;
+
+	StaticAssertStmt(offsetof(GenerationChunk, context) + sizeof(MemoryContext) ==
+					 MAXALIGN(sizeof(GenerationChunk)),
+					 "padding calculation in GenerationChunk is wrong");
+
+	/*
+	 * First, validate allocation parameters.  (If we're going to throw an
+	 * error, we should do so before the context is created, not after.)  We
+	 * somewhat arbitrarily enforce a minimum 1K block size, mostly because
+	 * that's what AllocSet does.
+	 */
+	if (blockSize != MAXALIGN(blockSize) ||
+		blockSize < 1024 ||
+		!AllocHugeSizeIsValid(blockSize))
+		elog(ERROR, "invalid blockSize for memory context: %zu",
+			 blockSize);
+
+	/* Do the type-independent part of context creation */
+	set = (GenerationContext *) MemoryContextCreate(T_GenerationContext,
+									sizeof(GenerationContext),
+									&GenerationMethods,
+									parent,
+									name);
+
+	set->blockSize = blockSize;
+	set->block = NULL;
+
+	return (MemoryContext) set;
+}
+
+/*
+ * GenerationInit
+ *		Context-type-specific initialization routine.
+ */
+static void
+GenerationInit(MemoryContext context)
+{
+	GenerationContext  *set = (GenerationContext *) context;
+
+	dlist_init(&set->blocks);
+}
+
+/*
+ * GenerationReset
+ *		Frees all memory which is allocated in the given set.
+ *
+ * The code simply frees all the blocks in the context - we don't keep any
+ * keeper blocks or anything like that.
+ */
+static void
+GenerationReset(MemoryContext context)
+{
+	GenerationContext  *set = (GenerationContext *) context;
+	dlist_mutable_iter miter;
+
+	AssertArg(GenerationIsValid(set));
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	/* Check for corruption and leaks before freeing */
+	GenerationCheck(context);
+#endif
+
+	dlist_foreach_modify(miter, &set->blocks)
+	{
+		GenerationBlock *block = dlist_container(GenerationBlock, node, miter.cur);
+
+		dlist_delete(miter.cur);
+
+		/* Normal case, release the block */
+#ifdef CLOBBER_FREED_MEMORY
+		wipe_mem(block, set->blockSize);
+#endif
+
+		free(block);
+	}
+
+	set->block = NULL;
+
+	Assert(dlist_is_empty(&set->blocks));
+}
+
+/*
+ * GenerationDelete
+ *		Frees all memory which is allocated in the given set, in preparation
+ *		for deletion of the set. We simply call GenerationReset() which does all the
+ *		dirty work.
+ */
+static void
+GenerationDelete(MemoryContext context)
+{
+	/* just reset (although not really necessary) */
+	GenerationReset(context);
+}
+
+/*
+ * GenerationAlloc
+ *		Returns pointer to allocated memory of given size or NULL if
+ *		request could not be completed; memory is added to the set.
+ *
+ * No request may exceed:
+ *		MAXALIGN_DOWN(SIZE_MAX) - Generation_BLOCKHDRSZ - Generation_CHUNKHDRSZ
+ * All callers use a much-lower limit.
+ */
+static void *
+GenerationAlloc(MemoryContext context, Size size)
+{
+	GenerationContext  *set = (GenerationContext *) context;
+	GenerationBlock	   *block;
+	GenerationChunk	   *chunk;
+
+	Size		chunk_size = MAXALIGN(size);
+
+	/* is it an over-sized chunk? if yes, allocate special block */
+	if (chunk_size > set->blockSize / 8)
+	{
+		Size		blksize = chunk_size + Generation_BLOCKHDRSZ + Generation_CHUNKHDRSZ;
+
+		block = (GenerationBlock *) malloc(blksize);
+		if (block == NULL)
+			return NULL;
+
+		/* block with a single (used) chunk */
+		block->nchunks = 1;
+		block->nfree = 0;
+
+		/* the block is completely full */
+		block->freeptr = block->endptr = ((char *) block) + blksize;
+
+		chunk = (GenerationChunk *) (((char *) block) + Generation_BLOCKHDRSZ);
+		chunk->context = set;
+		chunk->size = chunk_size;
+
+#ifdef MEMORY_CONTEXT_CHECKING
+		/* Valgrind: Will be made NOACCESS below. */
+		chunk->requested_size = size;
+		/* set mark to catch clobber of "unused" space */
+		if (size < chunk_size)
+			set_sentinel(GenerationChunkGetPointer(chunk), size);
+#endif
+#ifdef RANDOMIZE_ALLOCATED_MEMORY
+		/* fill the allocated space with junk */
+		randomize_mem((char *) GenerationChunkGetPointer(chunk), size);
+#endif
+
+		/* add the block to the list of allocated blocks */
+		dlist_push_head(&set->blocks, &block->node);
+
+		GenerationAllocInfo(set, chunk);
+
+		/*
+		 * Chunk header public fields remain DEFINED.  The requested
+		 * allocation itself can be NOACCESS or UNDEFINED; our caller will
+		 * soon make it UNDEFINED.  Make extra space at the end of the chunk,
+		 * if any, NOACCESS.
+		 */
+		VALGRIND_MAKE_MEM_NOACCESS((char *) chunk + Generation_CHUNK_PUBLIC,
+							 chunk_size + Generation_CHUNKHDRSZ - Generation_CHUNK_PUBLIC);
+
+		return GenerationChunkGetPointer(chunk);
+	}
+
+	/*
+	 * Not an over-sized chunk. Is there enough space on the current block? If
+	 * not, allocate a new "regular" block.
+	 */
+	block = set->block;
+
+	if ((block == NULL) ||
+		(block->endptr - block->freeptr) < Generation_CHUNKHDRSZ + chunk_size)
+	{
+		Size		blksize = set->blockSize;
+
+		block = (GenerationBlock *) malloc(blksize);
+
+		if (block == NULL)
+			return NULL;
+
+		block->nchunks = 0;
+		block->nfree = 0;
+
+		block->freeptr = ((char *) block) + Generation_BLOCKHDRSZ;
+		block->endptr = ((char *) block) + blksize;
+
+		/* Mark unallocated space NOACCESS. */
+		VALGRIND_MAKE_MEM_NOACCESS(block->freeptr,
+								   blksize - Generation_BLOCKHDRSZ);
+
+		/* add it to the doubly-linked list of blocks */
+		dlist_push_head(&set->blocks, &block->node);
+
+		/* and also use it as the current allocation block */
+		set->block = block;
+	}
+
+	/* we're supposed to have a block with enough free space now */
+	Assert(block != NULL);
+	Assert((block->endptr - block->freeptr) >= Generation_CHUNKHDRSZ + chunk_size);
+
+	chunk = (GenerationChunk *) block->freeptr;
+
+	block->nchunks += 1;
+	block->freeptr += (Generation_CHUNKHDRSZ + chunk_size);
+
+	chunk->block = block;
+
+	chunk->context = set;
+	chunk->size = chunk_size;
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	/* Valgrind: Free list requested_size should be DEFINED. */
+	chunk->requested_size = size;
+	VALGRIND_MAKE_MEM_NOACCESS(&chunk->requested_size,
+							   sizeof(chunk->requested_size));
+	/* set mark to catch clobber of "unused" space */
+	if (size < chunk->size)
+		set_sentinel(GenerationChunkGetPointer(chunk), size);
+#endif
+#ifdef RANDOMIZE_ALLOCATED_MEMORY
+	/* fill the allocated space with junk */
+	randomize_mem((char *) GenerationChunkGetPointer(chunk), size);
+#endif
+
+	GenerationAllocInfo(set, chunk);
+	return GenerationChunkGetPointer(chunk);
+}
+
+/*
+ * GenerationFree
+ *		Update number of chunks on the block, and if all chunks on the block
+ *		are freeed then discard the block.
+ */
+static void
+GenerationFree(MemoryContext context, void *pointer)
+{
+	GenerationContext  *set = (GenerationContext *) context;
+	GenerationChunk	   *chunk = GenerationPointerGetChunk(pointer);
+	GenerationBlock	   *block = chunk->block;
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	VALGRIND_MAKE_MEM_DEFINED(&chunk->requested_size,
+							  sizeof(chunk->requested_size));
+	/* Test for someone scribbling on unused space in chunk */
+	if (chunk->requested_size < chunk->size)
+		if (!sentinel_ok(pointer, chunk->requested_size))
+			elog(WARNING, "detected write past chunk end in %s %p",
+				 ((MemoryContext)set)->name, chunk);
+#endif
+
+#ifdef CLOBBER_FREED_MEMORY
+	wipe_mem(pointer, chunk->size);
+#endif
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	/* Reset requested_size to 0 in chunks that are on freelist */
+	chunk->requested_size = 0;
+#endif
+
+	block->nfree += 1;
+
+	Assert(block->nchunks > 0);
+	Assert(block->nfree <= block->nchunks);
+
+	/* If there are still allocated chunks on the block, we're done. */
+	if (block->nfree < block->nchunks)
+		return;
+
+	/*
+	 * The block is empty, so let's get rid of it. First remove it from the
+	 * list of blocks, then return it to malloc().
+	 */
+	dlist_delete(&block->node);
+
+	/* Also make sure the block is not marked as the current block. */
+	if (set->block == block)
+		set->block = NULL;
+
+	free(block);
+}
+
+/*
+ * GenerationRealloc
+ *		When handling repalloc, we simply allocate a new chunk, copy the data
+ *		and discard the old one. The only exception is when the new size fits
+ *		into the old chunk - in that case we just update chunk header.
+ */
+static void *
+GenerationRealloc(MemoryContext context, void *pointer, Size size)
+{
+	GenerationContext  *set = (GenerationContext *) context;
+	GenerationChunk	   *chunk = GenerationPointerGetChunk(pointer);
+	GenerationPointer	newPointer;
+	Size		oldsize = chunk->size;
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	VALGRIND_MAKE_MEM_DEFINED(&chunk->requested_size,
+							  sizeof(chunk->requested_size));
+	/* Test for someone scribbling on unused space in chunk */
+	if (chunk->requested_size < oldsize)
+		if (!sentinel_ok(pointer, chunk->requested_size))
+			elog(WARNING, "detected write past chunk end in %s %p",
+				 ((MemoryContext)set)->name, chunk);
+#endif
+
+	/*
+	 * Maybe the allocated area already is >= the new size.  (In particular,
+	 * we always fall out here if the requested size is a decrease.)
+	 *
+	 * This memory context is not use the power-of-2 chunk sizing and instead
+	 * carves the chunks to be as small as possible, so most repalloc() calls
+	 * will end up in the palloc/memcpy/pfree branch.
+	 *
+	 * XXX Perhaps we should annotate this condition with unlikely()?
+	 */
+	if (oldsize >= size)
+	{
+#ifdef MEMORY_CONTEXT_CHECKING
+		Size		oldrequest = chunk->requested_size;
+
+#ifdef RANDOMIZE_ALLOCATED_MEMORY
+		/* We can only fill the extra space if we know the prior request */
+		if (size > oldrequest)
+			randomize_mem((char *) pointer + oldrequest,
+						  size - oldrequest);
+#endif
+
+		chunk->requested_size = size;
+		VALGRIND_MAKE_MEM_NOACCESS(&chunk->requested_size,
+								   sizeof(chunk->requested_size));
+
+		/*
+		 * If this is an increase, mark any newly-available part UNDEFINED.
+		 * Otherwise, mark the obsolete part NOACCESS.
+		 */
+		if (size > oldrequest)
+			VALGRIND_MAKE_MEM_UNDEFINED((char *) pointer + oldrequest,
+										size - oldrequest);
+		else
+			VALGRIND_MAKE_MEM_NOACCESS((char *) pointer + size,
+									   oldsize - size);
+
+		/* set mark to catch clobber of "unused" space */
+		if (size < oldsize)
+			set_sentinel(pointer, size);
+#else							/* !MEMORY_CONTEXT_CHECKING */
+
+		/*
+		 * We don't have the information to determine whether we're growing
+		 * the old request or shrinking it, so we conservatively mark the
+		 * entire new allocation DEFINED.
+		 */
+		VALGRIND_MAKE_MEM_NOACCESS(pointer, oldsize);
+		VALGRIND_MAKE_MEM_DEFINED(pointer, size);
+#endif
+
+		return pointer;
+	}
+
+	/* allocate new chunk */
+	newPointer = GenerationAlloc((MemoryContext) set, size);
+
+	/* leave immediately if request was not completed */
+	if (newPointer == NULL)
+		return NULL;
+
+	/*
+	 * GenerationSetAlloc() just made the region NOACCESS.  Change it to UNDEFINED
+	 * for the moment; memcpy() will then transfer definedness from the old
+	 * allocation to the new.  If we know the old allocation, copy just that
+	 * much.  Otherwise, make the entire old chunk defined to avoid errors as
+	 * we copy the currently-NOACCESS trailing bytes.
+	 */
+	VALGRIND_MAKE_MEM_UNDEFINED(newPointer, size);
+#ifdef MEMORY_CONTEXT_CHECKING
+	oldsize = chunk->requested_size;
+#else
+	VALGRIND_MAKE_MEM_DEFINED(pointer, oldsize);
+#endif
+
+	/* transfer existing data (certain to fit) */
+	memcpy(newPointer, pointer, oldsize);
+
+	/* free old chunk */
+	GenerationFree((MemoryContext) set, pointer);
+
+	return newPointer;
+}
+
+/*
+ * GenerationGetChunkSpace
+ *		Given a currently-allocated chunk, determine the total space
+ *		it occupies (including all memory-allocation overhead).
+ */
+static Size
+GenerationGetChunkSpace(MemoryContext context, void *pointer)
+{
+	GenerationChunk *chunk = GenerationPointerGetChunk(pointer);
+
+	return chunk->size + Generation_CHUNKHDRSZ;
+}
+
+/*
+ * GenerationIsEmpty
+ *		Is an Generation empty of any allocated space?
+ */
+static bool
+GenerationIsEmpty(MemoryContext context)
+{
+	GenerationContext  *set = (GenerationContext *) context;
+
+	return dlist_is_empty(&set->blocks);
+}
+
+/*
+ * GenerationStats
+ *		Compute stats about memory consumption of an Generation.
+ *
+ * level: recursion level (0 at top level); used for print indentation.
+ * print: true to print stats to stderr.
+ * totals: if not NULL, add stats about this Generation into *totals.
+ *
+ * XXX freespace only accounts for empty space at the end of the block, not
+ * space of freed chunks (which is unknown).
+ */
+static void
+GenerationStats(MemoryContext context, int level, bool print,
+		 MemoryContextCounters *totals)
+{
+	GenerationContext  *set = (GenerationContext *) context;
+
+	Size		nblocks = 0;
+	Size		nchunks = 0;
+	Size		nfreechunks = 0;
+	Size		totalspace = 0;
+	Size		freespace = 0;
+
+	dlist_iter	iter;
+
+	dlist_foreach(iter, &set->blocks)
+	{
+		GenerationBlock *block = dlist_container(GenerationBlock, node, iter.cur);
+
+		nblocks++;
+		nchunks += block->nchunks;
+		nfreechunks += block->nfree;
+		totalspace += set->blockSize;
+		freespace += (block->endptr - block->freeptr);
+	}
+
+	if (print)
+	{
+		int			i;
+
+		for (i = 0; i < level; i++)
+			fprintf(stderr, "  ");
+		fprintf(stderr,
+			"Generation: %s: %zu total in %zd blocks (%zd chunks); %zu free (%zd chunks); %zu used\n",
+				((MemoryContext)set)->name, totalspace, nblocks, nchunks, freespace,
+				nfreechunks, totalspace - freespace);
+	}
+
+	if (totals)
+	{
+		totals->nblocks += nblocks;
+		totals->freechunks += nfreechunks;
+		totals->totalspace += totalspace;
+		totals->freespace += freespace;
+	}
+}
+
+
+#ifdef MEMORY_CONTEXT_CHECKING
+
+/*
+ * GenerationCheck
+ *		Walk through chunks and check consistency of memory.
+ *
+ * NOTE: report errors as WARNING, *not* ERROR or FATAL.  Otherwise you'll
+ * find yourself in an infinite loop when trouble occurs, because this
+ * routine will be entered again when elog cleanup tries to release memory!
+ */
+static void
+GenerationCheck(MemoryContext context)
+{
+	GenerationContext  *gen = (GenerationContext *) context;
+	char	   *name = context->name;
+	dlist_iter	iter;
+
+	/* walk all blocks in this context */
+	dlist_foreach(iter, &gen->blocks)
+	{
+		int			nfree,
+					nchunks;
+		char	   *ptr;
+		GenerationBlock *block = dlist_container(GenerationBlock, node, iter.cur);
+
+		/* We can't free more chunks than allocated. */
+		if (block->nfree <= block->nchunks)
+			elog(WARNING, "problem in Generation %s: number of free chunks %d in block %p exceeds %d allocated",
+				 name, block->nfree, block, block->nchunks);
+
+		/* Now walk through the chunks and count them. */
+		nfree = 0;
+		nchunks = 0;
+		ptr = ((char *) block) + Generation_BLOCKHDRSZ;
+
+		while (ptr < block->freeptr)
+		{
+			GenerationChunk *chunk = (GenerationChunk *) ptr;
+
+			/* move to the next chunk */
+			ptr += (chunk->size + Generation_CHUNKHDRSZ);
+
+			/* chunks have both block and context pointers, so check both */
+			if (chunk->block != block)
+				elog(WARNING, "problem in Generation %s: bogus block link in block %p, chunk %p",
+					 name, block, chunk);
+
+			if (chunk->context != gen)
+				elog(WARNING, "problem in Generation %s: bogus context link in block %p, chunk %p",
+					 name, block, chunk);
+
+			nchunks += 1;
+
+			/* if requested_size==0, the chunk was freed */
+			if (chunk->requested_size > 0)
+			{
+				/* if the chunk was not freed, we can trigger valgrind checks */
+				VALGRIND_MAKE_MEM_DEFINED(&chunk->requested_size,
+									   sizeof(chunk->requested_size));
+
+				/* we're in a no-freelist branch */
+				VALGRIND_MAKE_MEM_NOACCESS(&chunk->requested_size,
+									   sizeof(chunk->requested_size));
+
+				/* now make sure the chunk size is correct */
+				if (chunk->size != MAXALIGN(chunk->requested_size))
+					elog(WARNING, "problem in Generation %s: bogus chunk size in block %p, chunk %p",
+						 name, block, chunk);
+
+				/* there might be sentinel (thanks to alignment) */
+				if (chunk->requested_size < chunk->size &&
+					!sentinel_ok(chunk, Generation_CHUNKHDRSZ + chunk->requested_size))
+					elog(WARNING, "problem in Generation %s: detected write past chunk end in block %p, chunk %p",
+						 name, block, chunk);
+			}
+			else
+				nfree += 1;
+		}
+
+		/*
+		 * Make sure we got the expected number of allocated and free chunks
+		 * (as tracked in the block header).
+		 */
+		if (nchunks != block->nchunks)
+			elog(WARNING, "problem in Generation %s: number of allocated chunks %d in block %p does not match header %d",
+				 name, nchunks, block, block->nchunks);
+
+		if (nfree != block->nfree)
+			elog(WARNING, "problem in Generation %s: number of free chunks %d in block %p does not match header %d",
+				 name, nfree, block, block->nfree);
+	}
+}
+
+#endif   /* MEMORY_CONTEXT_CHECKING */
diff --git a/src/include/nodes/memnodes.h b/src/include/nodes/memnodes.h
index 7a0c676..e22d9fb 100644
--- a/src/include/nodes/memnodes.h
+++ b/src/include/nodes/memnodes.h
@@ -96,6 +96,8 @@ typedef struct MemoryContextData
  */
 #define MemoryContextIsValid(context) \
 	((context) != NULL && \
-	 (IsA((context), AllocSetContext) || IsA((context), SlabContext)))
+	 (IsA((context), AllocSetContext) || \
+	  IsA((context), SlabContext) || \
+	  IsA((context), GenerationContext)))
 
 #endif							/* MEMNODES_H */
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 27bd4f3..202ecb3 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -274,6 +274,7 @@ typedef enum NodeTag
 	T_MemoryContext,
 	T_AllocSetContext,
 	T_SlabContext,
+	T_GenerationContext,
 
 	/*
 	 * TAGS FOR VALUE NODES (value.h)
diff --git a/src/include/replication/reorderbuffer.h b/src/include/replication/reorderbuffer.h
index 86effe1..b18ce5a 100644
--- a/src/include/replication/reorderbuffer.h
+++ b/src/include/replication/reorderbuffer.h
@@ -344,20 +344,7 @@ struct ReorderBuffer
 	 */
 	MemoryContext change_context;
 	MemoryContext txn_context;
-
-	/*
-	 * Data structure slab cache.
-	 *
-	 * We allocate/deallocate some structures very frequently, to avoid bigger
-	 * overhead we cache some unused ones here.
-	 *
-	 * The maximum number of cached entries is controlled by const variables
-	 * on top of reorderbuffer.c
-	 */
-
-	/* cached ReorderBufferTupleBufs */
-	slist_head	cached_tuplebufs;
-	Size		nr_cached_tuplebufs;
+	MemoryContext tup_context;
 
 	XLogRecPtr	current_restart_decoding_lsn;
 
diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h
index c553349..42b5246 100644
--- a/src/include/utils/memutils.h
+++ b/src/include/utils/memutils.h
@@ -155,6 +155,11 @@ extern MemoryContext SlabContextCreate(MemoryContext parent,
 				  Size blockSize,
 				  Size chunkSize);
 
+/* generation.c */
+extern MemoryContext GenerationContextCreate(MemoryContext parent,
+				 const char *name,
+				 Size blockSize);
+
 /*
  * Recommended default alloc parameters, suitable for "ordinary" contexts
  * that might hold quite a lot of data.
-- 
2.9.4

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

[HACKERS] PATCH : Generational memory allocator (was PATCH: two slab-like memory allocators)

Reply via email to