From 014b5e85761550df78897b18e52493fb7cc07a7d Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Sat, 24 Aug 2019 14:09:00 +1200
Subject: [PATCH] Avoid unnecessary copying in tqueue.c.

When receiving a tuple, don't make a new copy.  Instead, reuse
the same HeapTupleData object and have it point directly to the
data returned by shm_mq_receive_bytes(), which in turn is valid
until the next call.  Client code that wants the tuple to live
longer that the next call to TupleQueueReaderNext() should make
its own copy.

Gather can emit each tuples it reads directly.  GatherMerge
needs to make an extra copy because it reads ahead, invalidating
pointers it has buffered.

Discussion: https://postgr.es/m/CA%2BhUKG%2B8T_ggoUTAE-U%3DA%2BOcPc4%3DB0nPPHcSfffuQhvXXjML6w%40mail.gmail.com
---
 src/backend/executor/nodeGather.c      |  2 +-
 src/backend/executor/nodeGatherMerge.c |  6 +++++-
 src/backend/executor/tqueue.c          | 22 +++++++++++-----------
 3 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/src/backend/executor/nodeGather.c b/src/backend/executor/nodeGather.c
index 69d5a1f239..41a7d28e1c 100644
--- a/src/backend/executor/nodeGather.c
+++ b/src/backend/executor/nodeGather.c
@@ -280,7 +280,7 @@ gather_getnext(GatherState *gatherstate)
 			{
 				ExecStoreHeapTuple(tup, /* tuple to store */
 								   fslot,	/* slot to store the tuple */
-								   true);	/* pfree tuple when done with it */
+								   false);	/* don't pfree tuple when done */
 				return fslot;
 			}
 		}
diff --git a/src/backend/executor/nodeGatherMerge.c b/src/backend/executor/nodeGatherMerge.c
index 6ef128e2ab..77f09d0a8a 100644
--- a/src/backend/executor/nodeGatherMerge.c
+++ b/src/backend/executor/nodeGatherMerge.c
@@ -732,7 +732,11 @@ gm_readnext_tuple(GatherMergeState *gm_state, int nreader, bool nowait,
 	reader = gm_state->reader[nreader - 1];
 	tup = TupleQueueReaderNext(reader, nowait, done);
 
-	return tup;
+	/*
+	 * Since we'll be holding onto these, we need make a copy.  The tuple we
+	 * just read is only valid until the next read.
+	 */
+	return heap_copytuple(tup);
 }
 
 /*
diff --git a/src/backend/executor/tqueue.c b/src/backend/executor/tqueue.c
index 2b27b41850..adf82e2141 100644
--- a/src/backend/executor/tqueue.c
+++ b/src/backend/executor/tqueue.c
@@ -43,6 +43,7 @@ typedef struct TQueueDestReceiver
 struct TupleQueueReader
 {
 	shm_mq_handle *queue;		/* shm_mq to receive from */
+	HeapTupleData htup;			/* used for returned tuples */
 };
 
 /*
@@ -140,6 +141,8 @@ CreateTupleQueueReader(shm_mq_handle *handle)
 {
 	TupleQueueReader *reader = palloc0(sizeof(TupleQueueReader));
 
+	ItemPointerSetInvalid(&reader->htup.t_self);
+	reader->htup.t_tableOid = InvalidOid;
 	reader->queue = handle;
 
 	return reader;
@@ -164,9 +167,9 @@ DestroyTupleQueueReader(TupleQueueReader *reader)
  * nowait = true and no tuple is ready to return.  *done, if not NULL,
  * is set to true when there are no remaining tuples and otherwise to false.
  *
- * The returned tuple, if any, is allocated in CurrentMemoryContext.
- * Note that this routine must not leak memory!  (We used to allow that,
- * but not any more.)
+ * The returned tuple, if any, is either in shared memory or a private buffer
+ * and remains valid until the next call.  It should not be freed by the
+ * caller.
  *
  * Even when shm_mq_receive() returns SHM_MQ_WOULD_BLOCK, this can still
  * accumulate bytes from a partially-read message, so it's useful to call
@@ -175,7 +178,6 @@ DestroyTupleQueueReader(TupleQueueReader *reader)
 HeapTuple
 TupleQueueReaderNext(TupleQueueReader *reader, bool nowait, bool *done)
 {
-	HeapTupleData htup;
 	shm_mq_result result;
 	Size		nbytes;
 	void	   *data;
@@ -200,13 +202,11 @@ TupleQueueReaderNext(TupleQueueReader *reader, bool nowait, bool *done)
 	Assert(result == SHM_MQ_SUCCESS);
 
 	/*
-	 * Set up a dummy HeapTupleData pointing to the data from the shm_mq
-	 * (which had better be sufficiently aligned).
+	 * Point our HeapTupleData to the data from the shm_mq (which had better
+	 * be sufficiently aligned).
 	 */
-	ItemPointerSetInvalid(&htup.t_self);
-	htup.t_tableOid = InvalidOid;
-	htup.t_len = nbytes;
-	htup.t_data = data;
+	reader->htup.t_len = nbytes;
+	reader->htup.t_data = data;
 
-	return heap_copytuple(&htup);
+	return &reader->htup;
 }
-- 
2.22.1

