diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c
index 5aa6f02..72bacd5 100644
--- a/src/backend/executor/execParallel.c
+++ b/src/backend/executor/execParallel.c
@@ -32,6 +32,7 @@
 #include "nodes/nodeFuncs.h"
 #include "optimizer/planmain.h"
 #include "optimizer/planner.h"
+#include "storage/dsa.h"
 #include "storage/spin.h"
 #include "tcop/tcopprot.h"
 #include "utils/memutils.h"
@@ -47,6 +48,7 @@
 #define PARALLEL_KEY_BUFFER_USAGE		UINT64CONST(0xE000000000000003)
 #define PARALLEL_KEY_TUPLE_QUEUE		UINT64CONST(0xE000000000000004)
 #define PARALLEL_KEY_INSTRUMENTATION	UINT64CONST(0xE000000000000005)
+#define PARALLEL_KEY_AREA_HANDLE		UINT64CONST(0xE000000000000006)
 
 #define PARALLEL_TUPLE_QUEUE_SIZE		65536
 
@@ -345,6 +347,7 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, int nworkers)
 	int			param_len;
 	int			instrumentation_len = 0;
 	int			instrument_offset = 0;
+	dsa_handle *area_handle;
 
 	/* Allocate object for return value. */
 	pei = palloc0(sizeof(ParallelExecutorInfo));
@@ -354,6 +357,16 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, int nworkers)
 	/* Fix up and serialize plan to be sent to workers. */
 	pstmt_data = ExecSerializePlan(planstate->plan, estate);
 
+	/* Create a DSA area that can be used by all workers. */
+	pei->area = dsa_create_dynamic(LWTRANCHE_PARALLEL_EXEC_AREA,
+								   "parallel query memory area");
+
+	/*
+	 * Make the area available to executor nodes running in the leader.  See
+	 * also ParallelQueryMain which makes it available to workers.
+	 */
+	estate->es_query_area = pei->area;
+
 	/* Create a parallel context. */
 	pcxt = CreateParallelContext(ParallelQueryMain, nworkers);
 	pei->pcxt = pcxt;
@@ -413,6 +426,10 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, int nworkers)
 		shm_toc_estimate_keys(&pcxt->estimator, 1);
 	}
 
+	/* Estimate space for DSA area handle. */
+	shm_toc_estimate_chunk(&pcxt->estimator, sizeof(dsa_handle));
+	shm_toc_estimate_keys(&pcxt->estimator, 1);
+
 	/* Everyone's had a chance to ask for space, so now create the DSM. */
 	InitializeParallelDSM(pcxt);
 
@@ -483,6 +500,11 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, int nworkers)
 	if (e.nnodes != d.nnodes)
 		elog(ERROR, "inconsistent count of PlanState nodes");
 
+	/* Store the DSA area handle so that worker backends can attach. */
+	area_handle = shm_toc_allocate(pcxt->toc, sizeof(dsa_handle));
+	*area_handle = dsa_get_handle(pei->area);
+	shm_toc_insert(pcxt->toc, PARALLEL_KEY_AREA_HANDLE, area_handle);
+
 	/* OK, we're ready to rock and roll. */
 	return pei;
 }
@@ -571,6 +593,11 @@ ExecParallelFinish(ParallelExecutorInfo *pei)
 void
 ExecParallelCleanup(ParallelExecutorInfo *pei)
 {
+	if (pei->area != NULL)
+	{
+		dsa_detach(pei->area);
+		pei->area = NULL;
+	}
 	if (pei->pcxt != NULL)
 	{
 		DestroyParallelContext(pei->pcxt);
@@ -728,6 +755,8 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
 	QueryDesc  *queryDesc;
 	SharedExecutorInstrumentation *instrumentation;
 	int			instrument_options = 0;
+	dsa_handle *area_handle;
+	dsa_area *area;
 
 	/* Set up DestReceiver, SharedExecutorInstrumentation, and QueryDesc. */
 	receiver = ExecParallelGetReceiver(seg, toc);
@@ -739,8 +768,14 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
 	/* Prepare to track buffer usage during query execution. */
 	InstrStartParallelQuery();
 
+	/* Attach to the dynamic shared memory area. */
+	area_handle = shm_toc_lookup(toc, PARALLEL_KEY_AREA_HANDLE);
+	Assert(area_handle != NULL);
+	area = dsa_attach_dynamic(*area_handle);
+
 	/* Start up the executor, have it run the plan, and then shut it down. */
 	ExecutorStart(queryDesc, 0);
+	queryDesc->planstate->state->es_query_area = area;
 	ExecParallelInitializeWorker(queryDesc->planstate, toc);
 	ExecutorRun(queryDesc, ForwardScanDirection, 0L);
 	ExecutorFinish(queryDesc);
@@ -758,6 +793,7 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
 	ExecutorEnd(queryDesc);
 
 	/* Cleanup. */
+	dsa_detach(area);
 	FreeQueryDesc(queryDesc);
 	(*receiver->rDestroy) (receiver);
 }
diff --git a/src/include/executor/execParallel.h b/src/include/executor/execParallel.h
index f4c6d37..2afd3d1 100644
--- a/src/include/executor/execParallel.h
+++ b/src/include/executor/execParallel.h
@@ -17,6 +17,7 @@
 #include "nodes/execnodes.h"
 #include "nodes/parsenodes.h"
 #include "nodes/plannodes.h"
+#include "storage/dsa.h"
 
 typedef struct SharedExecutorInstrumentation SharedExecutorInstrumentation;
 
@@ -27,6 +28,7 @@ typedef struct ParallelExecutorInfo
 	BufferUsage *buffer_usage;
 	SharedExecutorInstrumentation *instrumentation;
 	shm_mq_handle **tqueue;
+	dsa_area *area;
 	bool		finished;
 } ParallelExecutorInfo;
 
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 4fa3661..bb1f56a 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -20,6 +20,7 @@
 #include "lib/pairingheap.h"
 #include "nodes/params.h"
 #include "nodes/plannodes.h"
+#include "storage/dsa.h"
 #include "utils/hsearch.h"
 #include "utils/reltrigger.h"
 #include "utils/sortsupport.h"
@@ -422,6 +423,9 @@ typedef struct EState
 	HeapTuple  *es_epqTuple;	/* array of EPQ substitute tuples */
 	bool	   *es_epqTupleSet; /* true if EPQ tuple is provided */
 	bool	   *es_epqScanDone; /* true if EPQ tuple has been fetched */
+
+	/* The per-query shared memory area to use for parallel execution. */
+	dsa_area *es_query_area;
 } EState;
 
 
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index 9a2d869..951e421 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -235,6 +235,7 @@ typedef enum BuiltinTrancheIds
 	LWTRANCHE_BUFFER_MAPPING,
 	LWTRANCHE_LOCK_MANAGER,
 	LWTRANCHE_PREDICATE_LOCK_MANAGER,
+	LWTRANCHE_PARALLEL_EXEC_AREA,
 	LWTRANCHE_FIRST_USER_DEFINED
 }	BuiltinTrancheIds;
 
