On 2018/04/13 14:48, Amit Langote wrote:
> On 2018/04/13 14:38, Amit Langote wrote:
>> About the specific relation_open(.., NoLock) under question, I think there
>> might be a way to address this by opening the tables with the appropriate
>> lock mode in partitioned_rels list in ExecLockNonleafAppendTables
>
> That may have sounded a bit confusing:
>
> I meant to say: "by opening the tables in partitioned_rels list with the
> appropriate lock mode in ExecLockNonleafAppendTables"
>
>> Attached a PoC patch.
>
> There were a couple of unnecessary hunks, which removed in the attached.
Sorry, still a couple more were unnecessary.
Thanks,
Amit
diff --git a/src/backend/executor/execPartition.c
b/src/backend/executor/execPartition.c
index 11139f743d..e3b3911945 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -1244,7 +1244,8 @@ adjust_partition_tlist(List *tlist, TupleConversionMap
*map)
* PartitionPruneInfo.
*/
PartitionPruneState *
-ExecSetupPartitionPruneState(PlanState *planstate, List *partitionpruneinfo)
+ExecSetupPartitionPruneState(PlanState *planstate, List *partitionpruneinfo,
+ Relation
*partitioned_rels)
{
PartitionPruningData *prunedata;
PartitionPruneState *prunestate;
@@ -1303,10 +1304,11 @@ ExecSetupPartitionPruneState(PlanState *planstate, List
*partitionpruneinfo)
pprune->subpart_map = pinfo->subpart_map;
/*
- * Grab some info from the table's relcache; lock was already
obtained
- * by ExecLockNonLeafAppendTables.
+ * ExecLockNonLeafAppendTables already opened the relation for
us.
*/
- rel = relation_open(pinfo->reloid, NoLock);
+ Assert(partitioned_rels[i] != NULL);
+ rel = partitioned_rels[i];
+ Assert(RelationGetRelid(rel) == pinfo->reloid);
partkey = RelationGetPartitionKey(rel);
partdesc = RelationGetPartitionDesc(rel);
@@ -1336,8 +1338,6 @@ ExecSetupPartitionPruneState(PlanState *planstate, List
*partitionpruneinfo)
prunestate->execparams = bms_add_members(prunestate->execparams,
pinfo->execparams);
- relation_close(rel, NoLock);
-
i++;
}
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index b963cae730..58a0961eb1 100644
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -858,22 +858,49 @@ ShutdownExprContext(ExprContext *econtext, bool isCommit)
/*
* ExecLockNonLeafAppendTables
*
- * Locks, if necessary, the tables indicated by the RT indexes contained in
- * the partitioned_rels list. These are the non-leaf tables in the partition
- * tree controlled by a given Append or MergeAppend node.
+ * Opens and/or locks, if necessary, the tables indicated by the RT indexes
+ * contained in the partitioned_rels list. These are the non-leaf tables in
+ * the partition tree controlled by a given Append or MergeAppend node.
*/
void
-ExecLockNonLeafAppendTables(List *partitioned_rels, EState *estate)
+ExecLockNonLeafAppendTables(PlanState *planstate,
+ EState *estate,
+ List *partitioned_rels)
{
PlannedStmt *stmt = estate->es_plannedstmt;
ListCell *lc;
+ int i;
+ /*
+ * If we're going to be performing pruning, allocate space for Relation
+ * pointers to be used later when setting up partition pruning state in
+ * ExecSetupPartitionPruneState.
+ */
+ if (IsA(planstate, AppendState))
+ {
+ AppendState *appendstate = (AppendState *) planstate;
+ Append *node = (Append *) planstate->plan;
+
+ if (node->part_prune_infos != NIL)
+ {
+ Assert(list_length(node->part_prune_infos) ==
+ list_length(partitioned_rels));
+ appendstate->partitioned_rels = (Relation *)
+
palloc(sizeof(Relation) *
+
list_length(node->part_prune_infos));
+ appendstate->num_partitioned_rels =
+
list_length(node->part_prune_infos);
+ }
+ }
+
+ i = 0;
foreach(lc, partitioned_rels)
{
ListCell *l;
Index rti = lfirst_int(lc);
bool is_result_rel = false;
Oid relid = getrelid(rti,
estate->es_range_table);
+ int lockmode;
/* If this is a result relation, already locked in InitPlan */
foreach(l, stmt->nonleafResultRelations)
@@ -903,9 +930,37 @@ ExecLockNonLeafAppendTables(List *partitioned_rels, EState
*estate)
}
if (rc && RowMarkRequiresRowShareLock(rc->markType))
- LockRelationOid(relid, RowShareLock);
+ lockmode = RowShareLock;
else
- LockRelationOid(relid, AccessShareLock);
+ lockmode = AccessShareLock;
+ switch (nodeTag(planstate))
+ {
+ /*
+ * For Append, we may need to store the
Relation pointers to
+ * be used later when setting up partition
pruning state.
+ */
+ case T_AppendState:
+ {
+ AppendState *appendstate =
(AppendState *) planstate;
+
+ if
(appendstate->partitioned_rels)
+
appendstate->partitioned_rels[i] =
+
heap_open(relid, lockmode);
+ else
+ LockRelationOid(relid,
lockmode);
+ i++;
+ }
+
+ /* Just lock here; there is no pruning support.
*/
+ case T_MergeAppendState:
+ LockRelationOid(relid, lockmode);
+ break;
+
+ default:
+ elog(ERROR, "invalid PlanState node:
%d",
+ nodeTag(planstate));
+ break;
+ }
}
}
}
diff --git a/src/backend/executor/nodeAppend.c
b/src/backend/executor/nodeAppend.c
index d062cfddac..0799bbc919 100644
--- a/src/backend/executor/nodeAppend.c
+++ b/src/backend/executor/nodeAppend.c
@@ -112,18 +112,19 @@ ExecInitAppend(Append *node, EState *estate, int eflags)
Assert(!(eflags & EXEC_FLAG_MARK));
/*
- * Lock the non-leaf tables in the partition tree controlled by this
node.
- * It's a no-op for non-partitioned parent tables.
- */
- ExecLockNonLeafAppendTables(node->partitioned_rels, estate);
-
- /*
* create new AppendState for our append node
*/
appendstate->ps.plan = (Plan *) node;
appendstate->ps.state = estate;
appendstate->ps.ExecProcNode = ExecAppend;
+ /*
+ * Lock the non-leaf tables in the partition tree controlled by this
node.
+ * It's a no-op for non-partitioned parent tables.
+ */
+ ExecLockNonLeafAppendTables((PlanState *) appendstate, estate,
+
node->partitioned_rels);
+
/* Let choose_next_subplan_* function handle setting the first subplan
*/
appendstate->as_whichplan = INVALID_SUBPLAN_INDEX;
@@ -134,8 +135,11 @@ ExecInitAppend(Append *node, EState *estate, int eflags)
ExecAssignExprContext(estate, &appendstate->ps);
+ /* ExecLockNonLeafAppendTables must have set this up. */
+ Assert(appendstate->partitioned_rels != NULL);
prunestate = ExecSetupPartitionPruneState(&appendstate->ps,
-
node->part_prune_infos);
+
node->part_prune_infos,
+
appendstate->partitioned_rels);
/*
* When there are external params matching the partition key we
may be
@@ -309,6 +313,7 @@ ExecEndAppend(AppendState *node)
PlanState **appendplans;
int nplans;
int i;
+ int num_partitioned_rels;
/*
* get information from the node
@@ -317,6 +322,15 @@ ExecEndAppend(AppendState *node)
nplans = node->as_nplans;
/*
+ * Close partitioned rels that we may have opened for partition
+ * pruning.
+ */
+ num_partitioned_rels = node->num_partitioned_rels;
+ Assert(node->partitioned_rels != NULL || num_partitioned_rels == 0);
+ for (i = 0; i < num_partitioned_rels; i++)
+ heap_close(node->partitioned_rels[i], NoLock);
+
+ /*
* shut down each of the subscans
*/
for (i = 0; i < nplans; i++)
diff --git a/src/backend/executor/nodeMergeAppend.c
b/src/backend/executor/nodeMergeAppend.c
index 118f4ef07d..6f28002ce2 100644
--- a/src/backend/executor/nodeMergeAppend.c
+++ b/src/backend/executor/nodeMergeAppend.c
@@ -76,7 +76,8 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int
eflags)
* Lock the non-leaf tables in the partition tree controlled by this
node.
* It's a no-op for non-partitioned parent tables.
*/
- ExecLockNonLeafAppendTables(node->partitioned_rels, estate);
+ ExecLockNonLeafAppendTables((PlanState *) mergestate, estate,
+
node->partitioned_rels);
/*
* Set up empty vector of subplan states
diff --git a/src/include/executor/execPartition.h
b/src/include/executor/execPartition.h
index 0c36c8be30..9a78f59347 100644
--- a/src/include/executor/execPartition.h
+++ b/src/include/executor/execPartition.h
@@ -205,7 +205,8 @@ extern HeapTuple
ConvertPartitionTupleSlot(TupleConversionMap *map,
extern void ExecCleanupTupleRouting(ModifyTableState *mtstate,
PartitionTupleRouting *proute);
extern PartitionPruneState *ExecSetupPartitionPruneState(PlanState *planstate,
- List *partitionpruneinfo);
+ List *partitionpruneinfo,
+ Relation *partitioned_rels);
extern Bitmapset *ExecFindMatchingSubPlans(PartitionPruneState *prunestate);
extern Bitmapset *ExecFindInitialMatchingSubPlans(PartitionPruneState
*prunestate,
int nsubnodes);
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 45a077a949..6300397099 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -526,7 +526,9 @@ extern void UnregisterExprContextCallback(ExprContext
*econtext,
ExprContextCallbackFunction function,
Datum arg);
-extern void ExecLockNonLeafAppendTables(List *partitioned_rels, EState
*estate);
+extern void ExecLockNonLeafAppendTables(PlanState *planstate,
+ EState *estate,
+ List *partitioned_rels);
extern Datum GetAttributeByName(HeapTupleHeader tuple, const char *attname,
bool *isNull);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 9fe0b79095..b6188f8458 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1089,6 +1089,8 @@ struct AppendState
int as_whichplan;
ParallelAppendState *as_pstate; /* parallel coordination info */
Size pstate_len; /* size of parallel
coordination info */
+ Relation *partitioned_rels;
+ int num_partitioned_rels; /* number of entries in
above array */
struct PartitionPruneState *as_prune_state;
Bitmapset *as_valid_subplans;
bool (*choose_next_subplan) (AppendState *);