On 2017/09/14 16:13, Amit Langote wrote:
> Hi.
> 
> It seems to me that some of the code in partition.c is better placed
> somewhere under the executor directory.  There was even a suggestion
> recently [1] to introduce a execPartition.c to house some code around
> tuple-routing.
> 
> IMO, catalog/partition.c should present an interface for handling
> operations on a *single* partitioned table and avoid pretending to support
> any operations on the whole partition tree.  For example, the
> PartitionDispatch structure embeds some knowledge about the partition tree
> it's part of, which is useful when used for tuple-routing, because of the
> way it works now (lock and form ResultRelInfos of *all* leaf partitions
> before the first input row is processed).
> 
> So, let's move that structure, along with the code that creates and
> manipulates the same, out of partition.c/h and to execPartition.c/h.
> Attached patch attempts to do that.
> 
> While doing the same, I didn't move *all* of get_partition_for_tuple() out
> to execPartition.c, instead modified its signature as shown below:
> 
> -extern int get_partition_for_tuple(PartitionDispatch *pd,
> -                        TupleTableSlot *slot,
> -                        EState *estate,
> -                        PartitionDispatchData **failed_at,
> -                        TupleTableSlot **failed_slot);
> +extern int get_partition_for_tuple(Relation relation, Datum *values,
> +                            bool *isnull);
> 
> That way, we keep the core partition bound comparison logic inside
> partition.c and move rest of the stuff to its caller ExecFindPartition(),
> which includes navigating the enveloping PartitionDispatch's.
> 
> Thoughts?
> 
> PS: 0001 of the attached is the patch from [2] which is here to be applied
> on HEAD before applying the main patch (0002) itself

Since that 0001 patch was committed [1], here is the rebased patch.  Will
add this to the November commit-fest.

Thanks,
Amit

[1] https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=77b6b5e9c
From 08b337436b5862b0ee593314864d6ba98f95e6c0 Mon Sep 17 00:00:00 2001
From: amit <amitlangot...@gmail.com>
Date: Fri, 8 Sep 2017 19:07:38 +0900
Subject: [PATCH] Move certain partitioning code to the executor

---
 src/backend/catalog/partition.c        | 438 +++++---------------------
 src/backend/commands/copy.c            |   1 +
 src/backend/executor/Makefile          |   2 +-
 src/backend/executor/execMain.c        | 265 +---------------
 src/backend/executor/execPartition.c   | 559 +++++++++++++++++++++++++++++++++
 src/backend/executor/nodeModifyTable.c |   1 +
 src/include/catalog/partition.h        |  48 +--
 src/include/executor/execPartition.h   |  65 ++++
 src/include/executor/executor.h        |  14 +-
 9 files changed, 708 insertions(+), 685 deletions(-)
 create mode 100644 src/backend/executor/execPartition.c
 create mode 100644 src/include/executor/execPartition.h

diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c
index 1ab6dba7ae..903c8c4def 100644
--- a/src/backend/catalog/partition.c
+++ b/src/backend/catalog/partition.c
@@ -147,8 +147,6 @@ static int32 partition_bound_cmp(PartitionKey key,
 static int partition_bound_bsearch(PartitionKey key,
                                                PartitionBoundInfo boundinfo,
                                                void *probe, bool 
probe_is_bound, bool *is_equal);
-static void get_partition_dispatch_recurse(Relation rel, Relation parent,
-                                                          List **pds, List 
**leaf_part_oids);
 
 /*
  * RelationBuildPartitionDesc
@@ -1193,148 +1191,6 @@ get_partition_qual_relid(Oid relid)
        return result;
 }
 
-/*
- * RelationGetPartitionDispatchInfo
- *             Returns information necessary to route tuples down a partition 
tree
- *
- * The number of elements in the returned array (that is, the number of
- * PartitionDispatch objects for the partitioned tables in the partition tree)
- * is returned in *num_parted and a list of the OIDs of all the leaf
- * partitions of rel is returned in *leaf_part_oids.
- *
- * All the relations in the partition tree (including 'rel') must have been
- * locked (using at least the AccessShareLock) by the caller.
- */
-PartitionDispatch *
-RelationGetPartitionDispatchInfo(Relation rel,
-                                                                int 
*num_parted, List **leaf_part_oids)
-{
-       List       *pdlist = NIL;
-       PartitionDispatchData **pd;
-       ListCell   *lc;
-       int                     i;
-
-       Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
-
-       *num_parted = 0;
-       *leaf_part_oids = NIL;
-
-       get_partition_dispatch_recurse(rel, NULL, &pdlist, leaf_part_oids);
-       *num_parted = list_length(pdlist);
-       pd = (PartitionDispatchData **) palloc(*num_parted *
-                                                                               
   sizeof(PartitionDispatchData *));
-       i = 0;
-       foreach(lc, pdlist)
-       {
-               pd[i++] = lfirst(lc);
-       }
-
-       return pd;
-}
-
-/*
- * get_partition_dispatch_recurse
- *             Recursively expand partition tree rooted at rel
- *
- * As the partition tree is expanded in a depth-first manner, we mantain two
- * global lists: of PartitionDispatch objects corresponding to partitioned
- * tables in *pds and of the leaf partition OIDs in *leaf_part_oids.
- *
- * Note that the order of OIDs of leaf partitions in leaf_part_oids matches
- * the order in which the planner's expand_partitioned_rtentry() processes
- * them.  It's not necessarily the case that the offsets match up exactly,
- * because constraint exclusion might prune away some partitions on the
- * planner side, whereas we'll always have the complete list; but unpruned
- * partitions will appear in the same order in the plan as they are returned
- * here.
- */
-static void
-get_partition_dispatch_recurse(Relation rel, Relation parent,
-                                                          List **pds, List 
**leaf_part_oids)
-{
-       TupleDesc       tupdesc = RelationGetDescr(rel);
-       PartitionDesc partdesc = RelationGetPartitionDesc(rel);
-       PartitionKey partkey = RelationGetPartitionKey(rel);
-       PartitionDispatch pd;
-       int                     i;
-
-       check_stack_depth();
-
-       /* Build a PartitionDispatch for this table and add it to *pds. */
-       pd = (PartitionDispatch) palloc(sizeof(PartitionDispatchData));
-       *pds = lappend(*pds, pd);
-       pd->reldesc = rel;
-       pd->key = partkey;
-       pd->keystate = NIL;
-       pd->partdesc = partdesc;
-       if (parent != NULL)
-       {
-               /*
-                * For every partitioned table other than the root, we must 
store a
-                * tuple table slot initialized with its tuple descriptor and a 
tuple
-                * conversion map to convert a tuple from its parent's rowtype 
to its
-                * own. That is to make sure that we are looking at the correct 
row
-                * using the correct tuple descriptor when computing its 
partition key
-                * for tuple routing.
-                */
-               pd->tupslot = MakeSingleTupleTableSlot(tupdesc);
-               pd->tupmap = convert_tuples_by_name(RelationGetDescr(parent),
-                                                                               
        tupdesc,
-                                                                               
        gettext_noop("could not convert row type"));
-       }
-       else
-       {
-               /* Not required for the root partitioned table */
-               pd->tupslot = NULL;
-               pd->tupmap = NULL;
-       }
-
-       /*
-        * Go look at each partition of this table.  If it's a leaf partition,
-        * simply add its OID to *leaf_part_oids.  If it's a partitioned table,
-        * recursively call get_partition_dispatch_recurse(), so that its
-        * partitions are processed as well and a corresponding 
PartitionDispatch
-        * object gets added to *pds.
-        *
-        * About the values in pd->indexes: for a leaf partition, it contains 
the
-        * leaf partition's position in the global list *leaf_part_oids minus 1,
-        * whereas for a partitioned table partition, it contains the 
partition's
-        * position in the global list *pds multiplied by -1.  The latter is
-        * multiplied by -1 to distinguish partitioned tables from leaf 
partitions
-        * when going through the values in pd->indexes.  So, for example, when
-        * using it during tuple-routing, encountering a value >= 0 means we 
found
-        * a leaf partition.  It is immediately returned as the index in the 
array
-        * of ResultRelInfos of all the leaf partitions, using which we insert 
the
-        * tuple into that leaf partition.  A negative value means we found a
-        * partitioned table.  The value multiplied by -1 is returned as the 
index
-        * in the array of PartitionDispatch objects of all partitioned tables 
in
-        * the tree.  This value is used to continue the search in the next 
level
-        * of the partition tree.
-        */
-       pd->indexes = (int *) palloc(partdesc->nparts * sizeof(int));
-       for (i = 0; i < partdesc->nparts; i++)
-       {
-               Oid                     partrelid = partdesc->oids[i];
-
-               if (get_rel_relkind(partrelid) != RELKIND_PARTITIONED_TABLE)
-               {
-                       *leaf_part_oids = lappend_oid(*leaf_part_oids, 
partrelid);
-                       pd->indexes[i] = list_length(*leaf_part_oids) - 1;
-               }
-               else
-               {
-                       /*
-                        * We assume all tables in the partition tree were 
already locked
-                        * by the caller.
-                        */
-                       Relation        partrel = heap_open(partrelid, NoLock);
-
-                       pd->indexes[i] = -list_length(*pds);
-                       get_partition_dispatch_recurse(partrel, rel, pds, 
leaf_part_oids);
-               }
-       }
-}
-
 /* Module-local functions */
 
 /*
@@ -2194,248 +2050,98 @@ generate_partition_qual(Relation rel)
        return result;
 }
 
-/* ----------------
- *             FormPartitionKeyDatum
- *                     Construct values[] and isnull[] arrays for the 
partition key
- *                     of a tuple.
- *
- *     pd                              Partition dispatch object of the 
partitioned table
- *     slot                    Heap tuple from which to extract partition key
- *     estate                  executor state for evaluating any partition key
- *                                     expressions (must be non-NULL)
- *     values                  Array of partition key Datums (output area)
- *     isnull                  Array of is-null indicators (output area)
- *
- * the ecxt_scantuple slot of estate's per-tuple expr context must point to
- * the heap tuple passed in.
- * ----------------
- */
-void
-FormPartitionKeyDatum(PartitionDispatch pd,
-                                         TupleTableSlot *slot,
-                                         EState *estate,
-                                         Datum *values,
-                                         bool *isnull)
-{
-       ListCell   *partexpr_item;
-       int                     i;
-
-       if (pd->key->partexprs != NIL && pd->keystate == NIL)
-       {
-               /* Check caller has set up context correctly */
-               Assert(estate != NULL &&
-                          GetPerTupleExprContext(estate)->ecxt_scantuple == 
slot);
-
-               /* First time through, set up expression evaluation state */
-               pd->keystate = ExecPrepareExprList(pd->key->partexprs, estate);
-       }
-
-       partexpr_item = list_head(pd->keystate);
-       for (i = 0; i < pd->key->partnatts; i++)
-       {
-               AttrNumber      keycol = pd->key->partattrs[i];
-               Datum           datum;
-               bool            isNull;
-
-               if (keycol != 0)
-               {
-                       /* Plain column; get the value directly from the heap 
tuple */
-                       datum = slot_getattr(slot, keycol, &isNull);
-               }
-               else
-               {
-                       /* Expression; need to evaluate it */
-                       if (partexpr_item == NULL)
-                               elog(ERROR, "wrong number of partition key 
expressions");
-                       datum = ExecEvalExprSwitchContext((ExprState *) 
lfirst(partexpr_item),
-                                                                               
          GetPerTupleExprContext(estate),
-                                                                               
          &isNull);
-                       partexpr_item = lnext(partexpr_item);
-               }
-               values[i] = datum;
-               isnull[i] = isNull;
-       }
-
-       if (partexpr_item != NULL)
-               elog(ERROR, "wrong number of partition key expressions");
-}
-
 /*
  * get_partition_for_tuple
- *             Finds a leaf partition for tuple contained in *slot
+ *             Finds partition of relation which accepts the partition key 
specified
+ *             in values and isnull
  *
- * Returned value is the sequence number of the leaf partition thus found,
- * or -1 if no leaf partition is found for the tuple.  *failed_at is set
- * to the OID of the partitioned table whose partition was not found in
- * the latter case.
+ * Return value is index of the partition (>= 0 and < partdesc->nparts) if one
+ * found or -1 if none found.
  */
 int
-get_partition_for_tuple(PartitionDispatch *pd,
-                                               TupleTableSlot *slot,
-                                               EState *estate,
-                                               PartitionDispatchData 
**failed_at,
-                                               TupleTableSlot **failed_slot)
+get_partition_for_tuple(Relation relation, Datum *values, bool *isnull)
 {
-       PartitionDispatch parent;
-       Datum           values[PARTITION_MAX_KEYS];
-       bool            isnull[PARTITION_MAX_KEYS];
-       int                     result;
-       ExprContext *ecxt = GetPerTupleExprContext(estate);
-       TupleTableSlot *ecxt_scantuple_old = ecxt->ecxt_scantuple;
-
-       /* start with the root partitioned table */
-       parent = pd[0];
-       while (true)
-       {
-               PartitionKey key = parent->key;
-               PartitionDesc partdesc = parent->partdesc;
-               TupleTableSlot *myslot = parent->tupslot;
-               TupleConversionMap *map = parent->tupmap;
-               int                     cur_index = -1;
+       int             bound_offset;
+       int             part_index = -1;
+       PartitionKey  key = RelationGetPartitionKey(relation);
+       PartitionDesc partdesc = RelationGetPartitionDesc(relation);
 
-               if (myslot != NULL && map != NULL)
-               {
-                       HeapTuple       tuple = ExecFetchSlotTuple(slot);
-
-                       ExecClearTuple(myslot);
-                       tuple = do_convert_tuple(tuple, map);
-                       ExecStoreTuple(tuple, myslot, InvalidBuffer, true);
-                       slot = myslot;
-               }
-
-               /* Quick exit */
-               if (partdesc->nparts == 0)
-               {
-                       *failed_at = parent;
-                       *failed_slot = slot;
-                       result = -1;
-                       goto error_exit;
-               }
-
-               /*
-                * Extract partition key from tuple. Expression evaluation 
machinery
-                * that FormPartitionKeyDatum() invokes expects ecxt_scantuple 
to
-                * point to the correct tuple slot.  The slot might have 
changed from
-                * what was used for the parent table if the table of the 
current
-                * partitioning level has different tuple descriptor from the 
parent.
-                * So update ecxt_scantuple accordingly.
-                */
-               ecxt->ecxt_scantuple = slot;
-               FormPartitionKeyDatum(parent, slot, estate, values, isnull);
-
-               /* Route as appropriate based on partitioning strategy. */
-               switch (key->strategy)
-               {
-                       case PARTITION_STRATEGY_LIST:
+       /* Route as appropriate based on partitioning strategy. */
+       switch (key->strategy)
+       {
+               case PARTITION_STRATEGY_LIST:
+                       if (isnull[0])
+                       {
+                               if 
(partition_bound_accepts_nulls(partdesc->boundinfo))
+                                       part_index = 
partdesc->boundinfo->null_index;
+                       }
+                       else
+                       {
+                               bool            equal = false;
+
+                               bound_offset = partition_bound_bsearch(key,
+                                                                               
                           partdesc->boundinfo,
+                                                                               
                           values,
+                                                                               
                           false,
+                                                                               
                           &equal);
+                               if (bound_offset >= 0 && equal)
+                                       part_index = 
partdesc->boundinfo->indexes[bound_offset];
+                       }
+                       break;
 
-                               if (isnull[0])
-                               {
-                                       if 
(partition_bound_accepts_nulls(partdesc->boundinfo))
-                                               cur_index = 
partdesc->boundinfo->null_index;
-                               }
-                               else
-                               {
-                                       bool            equal = false;
-                                       int                     cur_offset;
-
-                                       cur_offset = 
partition_bound_bsearch(key,
-                                                                               
                                 partdesc->boundinfo,
-                                                                               
                                 values,
-                                                                               
                                 false,
-                                                                               
                                 &equal);
-                                       if (cur_offset >= 0 && equal)
-                                               cur_index = 
partdesc->boundinfo->indexes[cur_offset];
-                               }
-                               break;
+               case PARTITION_STRATEGY_RANGE:
+                       {
+                               bool            equal = false,
+                                                       range_partkey_has_null 
= false;
+                               int                     i;
 
-                       case PARTITION_STRATEGY_RANGE:
+                               /*
+                                * No range includes NULL, so this will be 
accepted by the
+                                * default partition if there is one, and 
otherwise
+                                * rejected.
+                                */
+                               for (i = 0; i < key->partnatts; i++)
                                {
-                                       bool            equal = false,
-                                                               
range_partkey_has_null = false;
-                                       int                     cur_offset;
-                                       int                     i;
-
-                                       /*
-                                        * No range includes NULL, so this will 
be accepted by the
-                                        * default partition if there is one, 
and otherwise
-                                        * rejected.
-                                        */
-                                       for (i = 0; i < key->partnatts; i++)
+                                       if (isnull[i] &&
+                                               
partition_bound_has_default(partdesc->boundinfo))
                                        {
-                                               if (isnull[i] &&
-                                                       
partition_bound_has_default(partdesc->boundinfo))
-                                               {
-                                                       range_partkey_has_null 
= true;
-                                                       break;
-                                               }
-                                               else if (isnull[i])
-                                               {
-                                                       *failed_at = parent;
-                                                       *failed_slot = slot;
-                                                       result = -1;
-                                                       goto error_exit;
-                                               }
+                                               range_partkey_has_null = true;
+                                               part_index = 
partdesc->boundinfo->default_index;
                                        }
+                               }
 
-                                       /*
-                                        * No need to search for partition, as 
the null key will
-                                        * be routed to the default partition.
-                                        */
-                                       if (range_partkey_has_null)
-                                               break;
-
-                                       cur_offset = 
partition_bound_bsearch(key,
-                                                                               
                                 partdesc->boundinfo,
-                                                                               
                                 values,
-                                                                               
                                 false,
-                                                                               
                                 &equal);
+                               if (!range_partkey_has_null)
+                               {
+                                       bound_offset = 
partition_bound_bsearch(key,
+                                                                               
                           partdesc->boundinfo,
+                                                                               
                                   values,
+                                                                               
                                   false,
+                                                                               
                                   &equal);
 
                                        /*
-                                        * The offset returned is such that the 
bound at
-                                        * cur_offset is less than or equal to 
the tuple value, so
-                                        * the bound at offset+1 is the upper 
bound.
+                                        * The bound at bound_offset is less 
than or equal to the
+                                        * tuple value, so the bound at 
offset+1 is the upper
+                                        * bound of the partition we're looking 
for, if there
+                                        * actually exists one.
                                         */
-                                       cur_index = 
partdesc->boundinfo->indexes[cur_offset + 1];
+                                       part_index = 
partdesc->boundinfo->indexes[bound_offset + 1];
                                }
-                               break;
-
-                       default:
-                               elog(ERROR, "unexpected partition strategy: %d",
-                                        (int) key->strategy);
-               }
-
-               /*
-                * cur_index < 0 means we failed to find a partition of this 
parent.
-                * Use the default partition, if there is one.
-                */
-               if (cur_index < 0)
-                       cur_index = partdesc->boundinfo->default_index;
-
-               /*
-                * If cur_index is still less than 0 at this point, there's no
-                * partition for this tuple.  Otherwise, we either found the 
leaf
-                * partition, or a child partitioned table through which we 
have to
-                * route the tuple.
-                */
-               if (cur_index < 0)
-               {
-                       result = -1;
-                       *failed_at = parent;
-                       *failed_slot = slot;
-                       break;
-               }
-               else if (parent->indexes[cur_index] >= 0)
-               {
-                       result = parent->indexes[cur_index];
+                       }
                        break;
-               }
-               else
-                       parent = pd[-parent->indexes[cur_index]];
+
+               default:
+                       elog(ERROR, "unexpected partition strategy: %d",
+                                (int) key->strategy);
        }
 
-error_exit:
-       ecxt->ecxt_scantuple = ecxt_scantuple_old;
-       return result;
+       /*
+        * part_index < 0 means we failed to find a partition of this parent.
+        * Use the default partition, if there is one.
+        */
+       if (part_index < 0)
+               part_index = partdesc->boundinfo->default_index;
+
+       return part_index;
 }
 
 /*
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 7c004ffad8..ce4b44cca8 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -29,6 +29,7 @@
 #include "commands/copy.h"
 #include "commands/defrem.h"
 #include "commands/trigger.h"
+#include "executor/execPartition.h"
 #include "executor/executor.h"
 #include "libpq/libpq.h"
 #include "libpq/pqformat.h"
diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile
index 083b20f3fe..cc09895fa5 100644
--- a/src/backend/executor/Makefile
+++ b/src/backend/executor/Makefile
@@ -14,7 +14,7 @@ include $(top_builddir)/src/Makefile.global
 
 OBJS = execAmi.o execCurrent.o execExpr.o execExprInterp.o \
        execGrouping.o execIndexing.o execJunk.o \
-       execMain.o execParallel.o execProcnode.o \
+       execMain.o execParallel.o execPartition.o execProcnode.o \
        execReplication.o execScan.o execSRF.o execTuples.o \
        execUtils.o functions.o instrument.o nodeAppend.o nodeAgg.o \
        nodeBitmapAnd.o nodeBitmapOr.o \
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 62fb05efac..1bef75e36f 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -43,7 +43,6 @@
 #include "access/xact.h"
 #include "catalog/namespace.h"
 #include "catalog/partition.h"
-#include "catalog/pg_inherits_fn.h"
 #include "catalog/pg_publication.h"
 #include "commands/matview.h"
 #include "commands/trigger.h"
@@ -98,14 +97,8 @@ static char *ExecBuildSlotValueDescription(Oid reloid,
                                                          TupleDesc tupdesc,
                                                          Bitmapset 
*modifiedCols,
                                                          int maxfieldlen);
-static char *ExecBuildSlotPartitionKeyDescription(Relation rel,
-                                                                        Datum 
*values,
-                                                                        bool 
*isnull,
-                                                                        int 
maxfieldlen);
 static void EvalPlanQualStart(EPQState *epqstate, EState *parentestate,
                                  Plan *planTree);
-static void ExecPartitionCheck(ResultRelInfo *resultRelInfo,
-                                  TupleTableSlot *slot, EState *estate);
 
 /*
  * Note that GetUpdatedColumns() also exists in commands/trigger.c.  There does
@@ -1850,8 +1843,10 @@ ExecRelCheck(ResultRelInfo *resultRelInfo,
 
 /*
  * ExecPartitionCheck --- check that tuple meets the partition constraint.
+ *
+ * Exported in executor.h for outside use.
  */
-static void
+void
 ExecPartitionCheck(ResultRelInfo *resultRelInfo, TupleTableSlot *slot,
                                   EState *estate)
 {
@@ -3238,257 +3233,3 @@ EvalPlanQualEnd(EPQState *epqstate)
        epqstate->planstate = NULL;
        epqstate->origslot = NULL;
 }
-
-/*
- * ExecSetupPartitionTupleRouting - set up information needed during
- * tuple routing for partitioned tables
- *
- * Output arguments:
- * 'pd' receives an array of PartitionDispatch objects with one entry for
- *             every partitioned table in the partition tree
- * 'partitions' receives an array of ResultRelInfo objects with one entry for
- *             every leaf partition in the partition tree
- * 'tup_conv_maps' receives an array of TupleConversionMap objects with one
- *             entry for every leaf partition (required to convert input tuple 
based
- *             on the root table's rowtype to a leaf partition's rowtype after 
tuple
- *             routing is done)
- * 'partition_tuple_slot' receives a standalone TupleTableSlot to be used
- *             to manipulate any given leaf partition's rowtype after that 
partition
- *             is chosen by tuple-routing.
- * 'num_parted' receives the number of partitioned tables in the partition
- *             tree (= the number of entries in the 'pd' output array)
- * 'num_partitions' receives the number of leaf partitions in the partition
- *             tree (= the number of entries in the 'partitions' and 
'tup_conv_maps'
- *             output arrays
- *
- * Note that all the relations in the partition tree are locked using the
- * RowExclusiveLock mode upon return from this function.
- */
-void
-ExecSetupPartitionTupleRouting(Relation rel,
-                                                          Index resultRTindex,
-                                                          EState *estate,
-                                                          PartitionDispatch 
**pd,
-                                                          ResultRelInfo 
**partitions,
-                                                          TupleConversionMap 
***tup_conv_maps,
-                                                          TupleTableSlot 
**partition_tuple_slot,
-                                                          int *num_parted, int 
*num_partitions)
-{
-       TupleDesc       tupDesc = RelationGetDescr(rel);
-       List       *leaf_parts;
-       ListCell   *cell;
-       int                     i;
-       ResultRelInfo *leaf_part_rri;
-
-       /*
-        * Get the information about the partition tree after locking all the
-        * partitions.
-        */
-       (void) find_all_inheritors(RelationGetRelid(rel), RowExclusiveLock, 
NULL);
-       *pd = RelationGetPartitionDispatchInfo(rel, num_parted, &leaf_parts);
-       *num_partitions = list_length(leaf_parts);
-       *partitions = (ResultRelInfo *) palloc(*num_partitions *
-                                                                               
   sizeof(ResultRelInfo));
-       *tup_conv_maps = (TupleConversionMap **) palloc0(*num_partitions *
-                                                                               
                         sizeof(TupleConversionMap *));
-
-       /*
-        * Initialize an empty slot that will be used to manipulate tuples of 
any
-        * given partition's rowtype.  It is attached to the caller-specified 
node
-        * (such as ModifyTableState) and released when the node finishes
-        * processing.
-        */
-       *partition_tuple_slot = MakeTupleTableSlot();
-
-       leaf_part_rri = *partitions;
-       i = 0;
-       foreach(cell, leaf_parts)
-       {
-               Relation        partrel;
-               TupleDesc       part_tupdesc;
-
-               /*
-                * We locked all the partitions above including the leaf 
partitions.
-                * Note that each of the relations in *partitions are eventually
-                * closed by the caller.
-                */
-               partrel = heap_open(lfirst_oid(cell), NoLock);
-               part_tupdesc = RelationGetDescr(partrel);
-
-               /*
-                * Save a tuple conversion map to convert a tuple routed to this
-                * partition from the parent's type to the partition's.
-                */
-               (*tup_conv_maps)[i] = convert_tuples_by_name(tupDesc, 
part_tupdesc,
-                                                                               
                         gettext_noop("could not convert row type"));
-
-               InitResultRelInfo(leaf_part_rri,
-                                                 partrel,
-                                                 resultRTindex,
-                                                 rel,
-                                                 estate->es_instrument);
-
-               /*
-                * Verify result relation is a valid target for INSERT.
-                */
-               CheckValidResultRel(leaf_part_rri, CMD_INSERT);
-
-               /*
-                * Open partition indices (remember we do not support ON 
CONFLICT in
-                * case of partitioned tables, so we do not need support 
information
-                * for speculative insertion)
-                */
-               if (leaf_part_rri->ri_RelationDesc->rd_rel->relhasindex &&
-                       leaf_part_rri->ri_IndexRelationDescs == NULL)
-                       ExecOpenIndices(leaf_part_rri, false);
-
-               estate->es_leaf_result_relations =
-                       lappend(estate->es_leaf_result_relations, 
leaf_part_rri);
-
-               leaf_part_rri++;
-               i++;
-       }
-}
-
-/*
- * ExecFindPartition -- Find a leaf partition in the partition tree rooted
- * at parent, for the heap tuple contained in *slot
- *
- * estate must be non-NULL; we'll need it to compute any expressions in the
- * partition key(s)
- *
- * If no leaf partition is found, this routine errors out with the appropriate
- * error message, else it returns the leaf partition sequence number returned
- * by get_partition_for_tuple() unchanged.
- */
-int
-ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd,
-                                 TupleTableSlot *slot, EState *estate)
-{
-       int                     result;
-       PartitionDispatchData *failed_at;
-       TupleTableSlot *failed_slot;
-
-       /*
-        * First check the root table's partition constraint, if any.  No point 
in
-        * routing the tuple if it doesn't belong in the root table itself.
-        */
-       if (resultRelInfo->ri_PartitionCheck)
-               ExecPartitionCheck(resultRelInfo, slot, estate);
-
-       result = get_partition_for_tuple(pd, slot, estate,
-                                                                        
&failed_at, &failed_slot);
-       if (result < 0)
-       {
-               Relation        failed_rel;
-               Datum           key_values[PARTITION_MAX_KEYS];
-               bool            key_isnull[PARTITION_MAX_KEYS];
-               char       *val_desc;
-               ExprContext *ecxt = GetPerTupleExprContext(estate);
-
-               failed_rel = failed_at->reldesc;
-               ecxt->ecxt_scantuple = failed_slot;
-               FormPartitionKeyDatum(failed_at, failed_slot, estate,
-                                                         key_values, 
key_isnull);
-               val_desc = ExecBuildSlotPartitionKeyDescription(failed_rel,
-                                                                               
                                key_values,
-                                                                               
                                key_isnull,
-                                                                               
                                64);
-               Assert(OidIsValid(RelationGetRelid(failed_rel)));
-               ereport(ERROR,
-                               (errcode(ERRCODE_CHECK_VIOLATION),
-                                errmsg("no partition of relation \"%s\" found 
for row",
-                                               
RelationGetRelationName(failed_rel)),
-                                val_desc ? errdetail("Partition key of the 
failing row contains %s.", val_desc) : 0));
-       }
-
-       return result;
-}
-
-/*
- * BuildSlotPartitionKeyDescription
- *
- * This works very much like BuildIndexValueDescription() and is currently
- * used for building error messages when ExecFindPartition() fails to find
- * partition for a row.
- */
-static char *
-ExecBuildSlotPartitionKeyDescription(Relation rel,
-                                                                        Datum 
*values,
-                                                                        bool 
*isnull,
-                                                                        int 
maxfieldlen)
-{
-       StringInfoData buf;
-       PartitionKey key = RelationGetPartitionKey(rel);
-       int                     partnatts = get_partition_natts(key);
-       int                     i;
-       Oid                     relid = RelationGetRelid(rel);
-       AclResult       aclresult;
-
-       if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED)
-               return NULL;
-
-       /* If the user has table-level access, just go build the description. */
-       aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_SELECT);
-       if (aclresult != ACLCHECK_OK)
-       {
-               /*
-                * Step through the columns of the partition key and make sure 
the
-                * user has SELECT rights on all of them.
-                */
-               for (i = 0; i < partnatts; i++)
-               {
-                       AttrNumber      attnum = get_partition_col_attnum(key, 
i);
-
-                       /*
-                        * If this partition key column is an expression, we 
return no
-                        * detail rather than try to figure out what column(s) 
the
-                        * expression includes and if the user has SELECT 
rights on them.
-                        */
-                       if (attnum == InvalidAttrNumber ||
-                               pg_attribute_aclcheck(relid, attnum, 
GetUserId(),
-                                                                         
ACL_SELECT) != ACLCHECK_OK)
-                               return NULL;
-               }
-       }
-
-       initStringInfo(&buf);
-       appendStringInfo(&buf, "(%s) = (",
-                                        pg_get_partkeydef_columns(relid, 
true));
-
-       for (i = 0; i < partnatts; i++)
-       {
-               char       *val;
-               int                     vallen;
-
-               if (isnull[i])
-                       val = "null";
-               else
-               {
-                       Oid                     foutoid;
-                       bool            typisvarlena;
-
-                       getTypeOutputInfo(get_partition_col_typid(key, i),
-                                                         &foutoid, 
&typisvarlena);
-                       val = OidOutputFunctionCall(foutoid, values[i]);
-               }
-
-               if (i > 0)
-                       appendStringInfoString(&buf, ", ");
-
-               /* truncate if needed */
-               vallen = strlen(val);
-               if (vallen <= maxfieldlen)
-                       appendStringInfoString(&buf, val);
-               else
-               {
-                       vallen = pg_mbcliplen(val, vallen, maxfieldlen);
-                       appendBinaryStringInfo(&buf, val, vallen);
-                       appendStringInfoString(&buf, "...");
-               }
-       }
-
-       appendStringInfoChar(&buf, ')');
-
-       return buf.data;
-}
diff --git a/src/backend/executor/execPartition.c 
b/src/backend/executor/execPartition.c
new file mode 100644
index 0000000000..2cac7a8f48
--- /dev/null
+++ b/src/backend/executor/execPartition.c
@@ -0,0 +1,559 @@
+/*-------------------------------------------------------------------------
+ *
+ * execPartition.c
+ *       Support routines for partitioning.
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *       src/backend/executor/execPartition.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "catalog/pg_inherits_fn.h"
+#include "executor/execPartition.h"
+#include "executor/executor.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "utils/lsyscache.h"
+#include "utils/rls.h"
+#include "utils/ruleutils.h"
+
+static PartitionDispatch *RelationGetPartitionDispatchInfo(Relation rel,
+                                                                int 
*num_parted, List **leaf_part_oids);
+static void get_partition_dispatch_recurse(Relation rel, Relation parent,
+                                                          List **pds, List 
**leaf_part_oids);
+static void FormPartitionKeyDatum(PartitionDispatch pd,
+                                         TupleTableSlot *slot,
+                                         EState *estate,
+                                         Datum *values,
+                                         bool *isnull);
+static char *ExecBuildSlotPartitionKeyDescription(Relation rel,
+                                                                        Datum 
*values,
+                                                                        bool 
*isnull,
+                                                                        int 
maxfieldlen);
+
+/*
+ * ExecSetupPartitionTupleRouting - set up information needed during
+ * tuple routing for partitioned tables
+ *
+ * Output arguments:
+ * 'pd' receives an array of PartitionDispatch objects with one entry for
+ *             every partitioned table in the partition tree
+ * 'partitions' receives an array of ResultRelInfo objects with one entry for
+ *             every leaf partition in the partition tree
+ * 'tup_conv_maps' receives an array of TupleConversionMap objects with one
+ *             entry for every leaf partition (required to convert input tuple 
based
+ *             on the root table's rowtype to a leaf partition's rowtype after 
tuple
+ *             routing is done)
+ * 'partition_tuple_slot' receives a standalone TupleTableSlot to be used
+ *             to manipulate any given leaf partition's rowtype after that 
partition
+ *             is chosen by tuple-routing.
+ * 'num_parted' receives the number of partitioned tables in the partition
+ *             tree (= the number of entries in the 'pd' output array)
+ * 'num_partitions' receives the number of leaf partitions in the partition
+ *             tree (= the number of entries in the 'partitions' and 
'tup_conv_maps'
+ *             output arrays
+ *
+ * Note that all the relations in the partition tree are locked using the
+ * RowExclusiveLock mode upon return from this function.
+ */
+void
+ExecSetupPartitionTupleRouting(Relation rel,
+                                                          Index resultRTindex,
+                                                          EState *estate,
+                                                          PartitionDispatch 
**pd,
+                                                          ResultRelInfo 
**partitions,
+                                                          TupleConversionMap 
***tup_conv_maps,
+                                                          TupleTableSlot 
**partition_tuple_slot,
+                                                          int *num_parted, int 
*num_partitions)
+{
+       TupleDesc       tupDesc = RelationGetDescr(rel);
+       List       *leaf_parts;
+       ListCell   *cell;
+       int                     i;
+       ResultRelInfo *leaf_part_rri;
+
+       /*
+        * Get the information about the partition tree after locking all the
+        * partitions.
+        */
+       (void) find_all_inheritors(RelationGetRelid(rel), RowExclusiveLock, 
NULL);
+       *pd = RelationGetPartitionDispatchInfo(rel, num_parted, &leaf_parts);
+       *num_partitions = list_length(leaf_parts);
+       *partitions = (ResultRelInfo *) palloc(*num_partitions *
+                                                                               
   sizeof(ResultRelInfo));
+       *tup_conv_maps = (TupleConversionMap **) palloc0(*num_partitions *
+                                                                               
                         sizeof(TupleConversionMap *));
+
+       /*
+        * Initialize an empty slot that will be used to manipulate tuples of 
any
+        * given partition's rowtype.  It is attached to the caller-specified 
node
+        * (such as ModifyTableState) and released when the node finishes
+        * processing.
+        */
+       *partition_tuple_slot = MakeTupleTableSlot();
+
+       leaf_part_rri = *partitions;
+       i = 0;
+       foreach(cell, leaf_parts)
+       {
+               Relation        partrel;
+               TupleDesc       part_tupdesc;
+
+               /*
+                * We locked all the partitions above including the leaf 
partitions.
+                * Note that each of the relations in *partitions are eventually
+                * closed by the caller.
+                */
+               partrel = heap_open(lfirst_oid(cell), NoLock);
+               part_tupdesc = RelationGetDescr(partrel);
+
+               /*
+                * Save a tuple conversion map to convert a tuple routed to this
+                * partition from the parent's type to the partition's.
+                */
+               (*tup_conv_maps)[i] = convert_tuples_by_name(tupDesc, 
part_tupdesc,
+                                                                               
                         gettext_noop("could not convert row type"));
+
+               InitResultRelInfo(leaf_part_rri,
+                                                 partrel,
+                                                 resultRTindex,
+                                                 rel,
+                                                 estate->es_instrument);
+
+               /*
+                * Verify result relation is a valid target for INSERT.
+                */
+               CheckValidResultRel(leaf_part_rri, CMD_INSERT);
+
+               /*
+                * Open partition indices (remember we do not support ON 
CONFLICT in
+                * case of partitioned tables, so we do not need support 
information
+                * for speculative insertion)
+                */
+               if (leaf_part_rri->ri_RelationDesc->rd_rel->relhasindex &&
+                       leaf_part_rri->ri_IndexRelationDescs == NULL)
+                       ExecOpenIndices(leaf_part_rri, false);
+
+               estate->es_leaf_result_relations =
+                       lappend(estate->es_leaf_result_relations, 
leaf_part_rri);
+
+               leaf_part_rri++;
+               i++;
+       }
+}
+
+/*
+ * ExecFindPartition -- Find a leaf partition in the partition tree rooted
+ * at parent, for the heap tuple contained in *slot
+ *
+ * estate must be non-NULL; we'll need it to compute any expressions in the
+ * partition key(s)
+ *
+ * If no leaf partition is found, this routine errors out with the appropriate
+ * error message, else it returns the leaf partition sequence number to be
+ * as an index into the array of (ResultRelInfos of) all leaf partitions in
+ * the partition tree.
+ */
+int
+ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd,
+                                 TupleTableSlot *slot, EState *estate)
+{
+       int                     result;
+       Datum           values[PARTITION_MAX_KEYS];
+       bool            isnull[PARTITION_MAX_KEYS];
+       Relation        rel;
+       PartitionDispatch parent;
+       ExprContext *ecxt = GetPerTupleExprContext(estate);
+       TupleTableSlot *ecxt_scantuple_old = ecxt->ecxt_scantuple;
+
+       /*
+        * First check the root table's partition constraint, if any.  No point 
in
+        * routing the tuple if it doesn't belong in the root table itself.
+        */
+       if (resultRelInfo->ri_PartitionCheck)
+               ExecPartitionCheck(resultRelInfo, slot, estate);
+
+       /* start with the root partitioned table */
+       parent = pd[0];
+       while (true)
+       {
+               PartitionDesc   partdesc;
+               TupleTableSlot *myslot = parent->tupslot;
+               TupleConversionMap *map = parent->tupmap;
+               int             cur_index = -1;
+
+               rel = parent->reldesc;
+               partdesc = RelationGetPartitionDesc(rel);
+
+               /*
+                * Convert the tuple to this parent's layout so that we can do 
certain
+                * things we do below.
+                */
+               if (myslot != NULL && map != NULL)
+               {
+                       HeapTuple       tuple = ExecFetchSlotTuple(slot);
+
+                       ExecClearTuple(myslot);
+                       tuple = do_convert_tuple(tuple, map);
+                       ExecStoreTuple(tuple, myslot, InvalidBuffer, true);
+                       slot = myslot;
+               }
+
+               /* Quick exit */
+               if (partdesc->nparts == 0)
+               {
+                       result = -1;
+                       break;
+               }
+
+               /*
+                * Extract partition key from tuple. Expression evaluation 
machinery
+                * that FormPartitionKeyDatum() invokes expects ecxt_scantuple 
to
+                * point to the correct tuple slot.  The slot might have 
changed from
+                * what was used for the parent table if the table of the 
current
+                * partitioning level has different tuple descriptor from the 
parent.
+                * So update ecxt_scantuple accordingly.
+                */
+               ecxt->ecxt_scantuple = slot;
+               FormPartitionKeyDatum(parent, slot, estate, values, isnull);
+               cur_index = get_partition_for_tuple(rel, values, isnull);
+
+               /*
+                * cur_index < 0 means we failed to find a partition of this 
parent.
+                * cur_index >= 0 means we either found the leaf partition, or 
the
+                * next parent to find a partition of.
+                */
+               if (cur_index < 0)
+               {
+                       result = -1;
+                       break;
+               }
+               else if (parent->indexes[cur_index] >= 0)
+               {
+                       result = parent->indexes[cur_index];
+                       break;
+               }
+               else
+                       parent = pd[-parent->indexes[cur_index]];
+       }
+
+       /* A partition was not found. */
+       if (result < 0)
+       {
+               char       *val_desc;
+
+               val_desc = ExecBuildSlotPartitionKeyDescription(rel,
+                                                                               
                                values, isnull, 64);
+               Assert(OidIsValid(RelationGetRelid(rel)));
+               ereport(ERROR,
+                               (errcode(ERRCODE_CHECK_VIOLATION),
+                                errmsg("no partition of relation \"%s\" found 
for row",
+                                               RelationGetRelationName(rel)),
+                                val_desc ? errdetail("Partition key of the 
failing row contains %s.", val_desc) : 0));
+       }
+
+       ecxt->ecxt_scantuple = ecxt_scantuple_old;
+       return result;
+}
+
+/*
+ * RelationGetPartitionDispatchInfo
+ *             Returns information necessary to route tuples down a partition 
tree
+ *
+ * The number of elements in the returned array (that is, the number of
+ * PartitionDispatch objects for the partitioned tables in the partition tree)
+ * is returned in *num_parted and a list of the OIDs of all the leaf
+ * partitions of rel is returned in *leaf_part_oids.
+ *
+ * All the relations in the partition tree (including 'rel') must have been
+ * locked (using at least the AccessShareLock) by the caller.
+ */
+static PartitionDispatch *
+RelationGetPartitionDispatchInfo(Relation rel,
+                                                                int 
*num_parted, List **leaf_part_oids)
+{
+       List       *pdlist = NIL;
+       PartitionDispatchData **pd;
+       ListCell   *lc;
+       int                     i;
+
+       Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+
+       *num_parted = 0;
+       *leaf_part_oids = NIL;
+
+       get_partition_dispatch_recurse(rel, NULL, &pdlist, leaf_part_oids);
+       *num_parted = list_length(pdlist);
+       pd = (PartitionDispatchData **) palloc(*num_parted *
+                                                                               
   sizeof(PartitionDispatchData *));
+       i = 0;
+       foreach(lc, pdlist)
+       {
+               pd[i++] = lfirst(lc);
+       }
+
+       return pd;
+}
+
+/*
+ * get_partition_dispatch_recurse
+ *             Recursively expand partition tree rooted at rel
+ *
+ * As the partition tree is expanded in a depth-first manner, we mantain two
+ * global lists: of PartitionDispatch objects corresponding to partitioned
+ * tables in *pds and of the leaf partition OIDs in *leaf_part_oids.
+ *
+ * Note that the order of OIDs of leaf partitions in leaf_part_oids matches
+ * the order in which the planner's expand_partitioned_rtentry() processes
+ * them.  It's not necessarily the case that the offsets match up exactly,
+ * because constraint exclusion might prune away some partitions on the
+ * planner side, whereas we'll always have the complete list; but unpruned
+ * partitions will appear in the same order in the plan as they are returned
+ * here.
+ */
+static void
+get_partition_dispatch_recurse(Relation rel, Relation parent,
+                                                          List **pds, List 
**leaf_part_oids)
+{
+       TupleDesc       tupdesc = RelationGetDescr(rel);
+       PartitionDesc partdesc = RelationGetPartitionDesc(rel);
+       PartitionKey partkey = RelationGetPartitionKey(rel);
+       PartitionDispatch pd;
+       int                     i;
+
+       check_stack_depth();
+
+       /* Build a PartitionDispatch for this table and add it to *pds. */
+       pd = (PartitionDispatch) palloc(sizeof(PartitionDispatchData));
+       *pds = lappend(*pds, pd);
+       pd->reldesc = rel;
+       pd->key = partkey;
+       pd->keystate = NIL;
+       pd->partdesc = partdesc;
+       if (parent != NULL)
+       {
+               /*
+                * For every partitioned table other than the root, we must 
store a
+                * tuple table slot initialized with its tuple descriptor and a 
tuple
+                * conversion map to convert a tuple from its parent's rowtype 
to its
+                * own. That is to make sure that we are looking at the correct 
row
+                * using the correct tuple descriptor when computing its 
partition key
+                * for tuple routing.
+                */
+               pd->tupslot = MakeSingleTupleTableSlot(tupdesc);
+               pd->tupmap = convert_tuples_by_name(RelationGetDescr(parent),
+                                                                               
        tupdesc,
+                                                                               
        gettext_noop("could not convert row type"));
+       }
+       else
+       {
+               /* Not required for the root partitioned table */
+               pd->tupslot = NULL;
+               pd->tupmap = NULL;
+       }
+
+       /*
+        * Go look at each partition of this table.  If it's a leaf partition,
+        * simply add its OID to *leaf_part_oids.  If it's a partitioned table,
+        * recursively call get_partition_dispatch_recurse(), so that its
+        * partitions are processed as well and a corresponding 
PartitionDispatch
+        * object gets added to *pds.
+        *
+        * About the values in pd->indexes: for a leaf partition, it contains 
the
+        * leaf partition's position in the global list *leaf_part_oids minus 1,
+        * whereas for a partitioned table partition, it contains the 
partition's
+        * position in the global list *pds multiplied by -1.  The latter is
+        * multiplied by -1 to distinguish partitioned tables from leaf 
partitions
+        * when going through the values in pd->indexes.  So, for example, when
+        * using it during tuple-routing, encountering a value >= 0 means we 
found
+        * a leaf partition.  It is immediately returned as the index in the 
array
+        * of ResultRelInfos of all the leaf partitions, using which we insert 
the
+        * tuple into that leaf partition.  A negative value means we found a
+        * partitioned table.  The value multiplied by -1 is returned as the 
index
+        * in the array of PartitionDispatch objects of all partitioned tables 
in
+        * the tree.  This value is used to continue the search in the next 
level
+        * of the partition tree.
+        */
+       pd->indexes = (int *) palloc(partdesc->nparts * sizeof(int));
+       for (i = 0; i < partdesc->nparts; i++)
+       {
+               Oid                     partrelid = partdesc->oids[i];
+
+               if (get_rel_relkind(partrelid) != RELKIND_PARTITIONED_TABLE)
+               {
+                       *leaf_part_oids = lappend_oid(*leaf_part_oids, 
partrelid);
+                       pd->indexes[i] = list_length(*leaf_part_oids) - 1;
+               }
+               else
+               {
+                       /*
+                        * We assume all tables in the partition tree were 
already locked
+                        * by the caller.
+                        */
+                       Relation        partrel = heap_open(partrelid, NoLock);
+
+                       pd->indexes[i] = -list_length(*pds);
+                       get_partition_dispatch_recurse(partrel, rel, pds, 
leaf_part_oids);
+               }
+       }
+}
+
+/* ----------------
+ *             FormPartitionKeyDatum
+ *                     Construct values[] and isnull[] arrays for the 
partition key
+ *                     of a tuple.
+ *
+ *     pd                              Partition dispatch object of the 
partitioned table
+ *     slot                    Heap tuple from which to extract partition key
+ *     estate                  executor state for evaluating any partition key
+ *                                     expressions (must be non-NULL)
+ *     values                  Array of partition key Datums (output area)
+ *     isnull                  Array of is-null indicators (output area)
+ *
+ * the ecxt_scantuple slot of estate's per-tuple expr context must point to
+ * the heap tuple passed in.
+ * ----------------
+ */
+static void
+FormPartitionKeyDatum(PartitionDispatch pd,
+                                         TupleTableSlot *slot,
+                                         EState *estate,
+                                         Datum *values,
+                                         bool *isnull)
+{
+       ListCell   *partexpr_item;
+       int                     i;
+
+       if (pd->key->partexprs != NIL && pd->keystate == NIL)
+       {
+               /* Check caller has set up context correctly */
+               Assert(estate != NULL &&
+                          GetPerTupleExprContext(estate)->ecxt_scantuple == 
slot);
+
+               /* First time through, set up expression evaluation state */
+               pd->keystate = ExecPrepareExprList(pd->key->partexprs, estate);
+       }
+
+       partexpr_item = list_head(pd->keystate);
+       for (i = 0; i < pd->key->partnatts; i++)
+       {
+               AttrNumber      keycol = pd->key->partattrs[i];
+               Datum           datum;
+               bool            isNull;
+
+               if (keycol != 0)
+               {
+                       /* Plain column; get the value directly from the heap 
tuple */
+                       datum = slot_getattr(slot, keycol, &isNull);
+               }
+               else
+               {
+                       /* Expression; need to evaluate it */
+                       if (partexpr_item == NULL)
+                               elog(ERROR, "wrong number of partition key 
expressions");
+                       datum = ExecEvalExprSwitchContext((ExprState *) 
lfirst(partexpr_item),
+                                                                               
          GetPerTupleExprContext(estate),
+                                                                               
          &isNull);
+                       partexpr_item = lnext(partexpr_item);
+               }
+               values[i] = datum;
+               isnull[i] = isNull;
+       }
+
+       if (partexpr_item != NULL)
+               elog(ERROR, "wrong number of partition key expressions");
+}
+
+/*
+ * BuildSlotPartitionKeyDescription
+ *
+ * This works very much like BuildIndexValueDescription() and is currently
+ * used for building error messages when ExecFindPartition() fails to find
+ * partition for a row.
+ */
+static char *
+ExecBuildSlotPartitionKeyDescription(Relation rel,
+                                                                        Datum 
*values,
+                                                                        bool 
*isnull,
+                                                                        int 
maxfieldlen)
+{
+       StringInfoData buf;
+       PartitionKey key = RelationGetPartitionKey(rel);
+       int                     partnatts = get_partition_natts(key);
+       int                     i;
+       Oid                     relid = RelationGetRelid(rel);
+       AclResult       aclresult;
+
+       if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED)
+               return NULL;
+
+       /* If the user has table-level access, just go build the description. */
+       aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_SELECT);
+       if (aclresult != ACLCHECK_OK)
+       {
+               /*
+                * Step through the columns of the partition key and make sure 
the
+                * user has SELECT rights on all of them.
+                */
+               for (i = 0; i < partnatts; i++)
+               {
+                       AttrNumber      attnum = get_partition_col_attnum(key, 
i);
+
+                       /*
+                        * If this partition key column is an expression, we 
return no
+                        * detail rather than try to figure out what column(s) 
the
+                        * expression includes and if the user has SELECT 
rights on them.
+                        */
+                       if (attnum == InvalidAttrNumber ||
+                               pg_attribute_aclcheck(relid, attnum, 
GetUserId(),
+                                                                         
ACL_SELECT) != ACLCHECK_OK)
+                               return NULL;
+               }
+       }
+
+       initStringInfo(&buf);
+       appendStringInfo(&buf, "(%s) = (",
+                                        pg_get_partkeydef_columns(relid, 
true));
+
+       for (i = 0; i < partnatts; i++)
+       {
+               char       *val;
+               int                     vallen;
+
+               if (isnull[i])
+                       val = "null";
+               else
+               {
+                       Oid                     foutoid;
+                       bool            typisvarlena;
+
+                       getTypeOutputInfo(get_partition_col_typid(key, i),
+                                                         &foutoid, 
&typisvarlena);
+                       val = OidOutputFunctionCall(foutoid, values[i]);
+               }
+
+               if (i > 0)
+                       appendStringInfoString(&buf, ", ");
+
+               /* truncate if needed */
+               vallen = strlen(val);
+               if (vallen <= maxfieldlen)
+                       appendStringInfoString(&buf, val);
+               else
+               {
+                       vallen = pg_mbcliplen(val, vallen, maxfieldlen);
+                       appendBinaryStringInfo(&buf, val, vallen);
+                       appendStringInfoString(&buf, "...");
+               }
+       }
+
+       appendStringInfoChar(&buf, ')');
+
+       return buf.data;
+}
diff --git a/src/backend/executor/nodeModifyTable.c 
b/src/backend/executor/nodeModifyTable.c
index 845c409540..c93ea8d598 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -40,6 +40,7 @@
 #include "access/htup_details.h"
 #include "access/xact.h"
 #include "commands/trigger.h"
+#include "executor/execPartition.h"
 #include "executor/executor.h"
 #include "executor/nodeModifyTable.h"
 #include "foreign/fdwapi.h"
diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h
index 454a940a23..154df89e54 100644
--- a/src/include/catalog/partition.h
+++ b/src/include/catalog/partition.h
@@ -39,37 +39,6 @@ typedef struct PartitionDescData
 
 typedef struct PartitionDescData *PartitionDesc;
 
-/*-----------------------
- * PartitionDispatch - information about one partitioned table in a partition
- * hierarchy required to route a tuple to one of its partitions
- *
- *     reldesc         Relation descriptor of the table
- *     key                     Partition key information of the table
- *     keystate        Execution state required for expressions in the 
partition key
- *     partdesc        Partition descriptor of the table
- *     tupslot         A standalone TupleTableSlot initialized with this 
table's tuple
- *                             descriptor
- *     tupmap          TupleConversionMap to convert from the parent's rowtype 
to
- *                             this table's rowtype (when extracting the 
partition key of a
- *                             tuple just before routing it through this table)
- *     indexes         Array with partdesc->nparts members (for details on what
- *                             individual members represent, see how they are 
set in
- *                             RelationGetPartitionDispatchInfo())
- *-----------------------
- */
-typedef struct PartitionDispatchData
-{
-       Relation        reldesc;
-       PartitionKey key;
-       List       *keystate;           /* list of ExprState */
-       PartitionDesc partdesc;
-       TupleTableSlot *tupslot;
-       TupleConversionMap *tupmap;
-       int                *indexes;
-} PartitionDispatchData;
-
-typedef struct PartitionDispatchData *PartitionDispatch;
-
 extern void RelationBuildPartitionDesc(Relation relation);
 extern bool partition_bounds_equal(int partnatts, int16 *parttyplen,
                                           bool *parttypbyval, 
PartitionBoundInfo b1,
@@ -86,19 +55,6 @@ extern List *map_partition_varattnos(List *expr, int 
target_varno,
 extern List *RelationGetPartitionQual(Relation rel);
 extern Expr *get_partition_qual_relid(Oid relid);
 
-/* For tuple routing */
-extern PartitionDispatch *RelationGetPartitionDispatchInfo(Relation rel,
-                                                                int 
*num_parted, List **leaf_part_oids);
-extern void FormPartitionKeyDatum(PartitionDispatch pd,
-                                         TupleTableSlot *slot,
-                                         EState *estate,
-                                         Datum *values,
-                                         bool *isnull);
-extern int get_partition_for_tuple(PartitionDispatch *pd,
-                                               TupleTableSlot *slot,
-                                               EState *estate,
-                                               PartitionDispatchData 
**failed_at,
-                                               TupleTableSlot **failed_slot);
 extern Oid     get_default_oid_from_partdesc(PartitionDesc partdesc);
 extern Oid     get_default_partition_oid(Oid parentId);
 extern void update_default_partition_oid(Oid parentId, Oid defaultPartId);
@@ -106,4 +62,8 @@ extern void check_default_allows_bound(Relation parent, 
Relation defaultRel,
                                                   PartitionBoundSpec 
*new_spec);
 extern List *get_proposed_default_constraint(List *new_part_constaints);
 
+/* For tuple routing */
+extern int get_partition_for_tuple(Relation relation, Datum *values,
+                                                       bool *isnull);
+
 #endif                                                 /* PARTITION_H */
diff --git a/src/include/executor/execPartition.h 
b/src/include/executor/execPartition.h
new file mode 100644
index 0000000000..8c9ab2f1a9
--- /dev/null
+++ b/src/include/executor/execPartition.h
@@ -0,0 +1,65 @@
+/*--------------------------------------------------------------------
+ * execPartition.h
+ *             POSTGRES partitioning executor interface
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *             src/include/executor/execPartition.h
+ *--------------------------------------------------------------------
+ */
+
+#ifndef EXECPARTITION_H
+#define EXECPARTITION_H
+
+#include "catalog/partition.h"
+#include "nodes/execnodes.h"
+#include "nodes/parsenodes.h"
+#include "nodes/plannodes.h"
+
+/*-----------------------
+ * PartitionDispatch - information about one partitioned table in a partition
+ * hierarchy required to route a tuple to one of its partitions
+ *
+ *     reldesc         Relation descriptor of the table
+ *     key                     Partition key information of the table
+ *     keystate        Execution state required for expressions in the 
partition key
+ *     partdesc        Partition descriptor of the table
+ *     tupslot         A standalone TupleTableSlot initialized with this 
table's tuple
+ *                             descriptor
+ *     tupmap          TupleConversionMap to convert from the parent's rowtype 
to
+ *                             this table's rowtype (when extracting the 
partition key of a
+ *                             tuple just before routing it through this table)
+ *     indexes         Array with partdesc->nparts members (for details on what
+ *                             individual members represent, see how they are 
set in
+ *                             get_partition_dispatch_recurse())
+ *-----------------------
+ */
+typedef struct PartitionDispatchData
+{
+       Relation                reldesc;
+       PartitionKey    key;
+       List               *keystate;           /* list of ExprState */
+       PartitionDesc   partdesc;
+       TupleTableSlot *tupslot;
+       TupleConversionMap *tupmap;
+       int                        *indexes;
+} PartitionDispatchData;
+
+typedef struct PartitionDispatchData *PartitionDispatch;
+
+extern void ExecSetupPartitionTupleRouting(Relation rel,
+                                                          Index resultRTindex,
+                                                          EState *estate,
+                                                          PartitionDispatch 
**pd,
+                                                          ResultRelInfo 
**partitions,
+                                                          TupleConversionMap 
***tup_conv_maps,
+                                                          TupleTableSlot 
**partition_tuple_slot,
+                                                          int *num_parted, int 
*num_partitions);
+extern int ExecFindPartition(ResultRelInfo *resultRelInfo,
+                                 PartitionDispatch *pd,
+                                 TupleTableSlot *slot,
+                                 EState *estate);
+
+#endif                                                 /* EXECPARTITION_H */
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 770881849c..ee2ea156d1 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -188,6 +188,8 @@ extern void ExecCleanUpTriggerState(EState *estate);
 extern bool ExecContextForcesOids(PlanState *planstate, bool *hasoids);
 extern void ExecConstraints(ResultRelInfo *resultRelInfo,
                                TupleTableSlot *slot, EState *estate);
+extern void ExecPartitionCheck(ResultRelInfo *resultRelInfo,
+                                  TupleTableSlot *slot, EState *estate);
 extern void ExecWithCheckOptions(WCOKind kind, ResultRelInfo *resultRelInfo,
                                         TupleTableSlot *slot, EState *estate);
 extern LockTupleMode ExecUpdateLockMode(EState *estate, ResultRelInfo 
*relinfo);
@@ -206,18 +208,6 @@ extern void EvalPlanQualSetPlan(EPQState *epqstate,
 extern void EvalPlanQualSetTuple(EPQState *epqstate, Index rti,
                                         HeapTuple tuple);
 extern HeapTuple EvalPlanQualGetTuple(EPQState *epqstate, Index rti);
-extern void ExecSetupPartitionTupleRouting(Relation rel,
-                                                          Index resultRTindex,
-                                                          EState *estate,
-                                                          PartitionDispatch 
**pd,
-                                                          ResultRelInfo 
**partitions,
-                                                          TupleConversionMap 
***tup_conv_maps,
-                                                          TupleTableSlot 
**partition_tuple_slot,
-                                                          int *num_parted, int 
*num_partitions);
-extern int ExecFindPartition(ResultRelInfo *resultRelInfo,
-                                 PartitionDispatch *pd,
-                                 TupleTableSlot *slot,
-                                 EState *estate);
 
 #define EvalPlanQualSetSlot(epqstate, slot)  ((epqstate)->origslot = (slot))
 extern void EvalPlanQualFetchRowMarks(EPQState *epqstate);
-- 
2.11.0

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to