Hi,

On 2016/12/13 2:45, Dmitry Ivanov wrote:
> Huh, this code is broken as well. We have to ignore partitions that don't
> have any subpartitions. Patch is attached below (v2).

Good catch and thanks a lot for the patch!  I have revised it a bit and
added some explanatory comments to that function.

Attaching the above patch, along with some other patches posted earlier,
and one more patch fixing another bug I found.  Patch descriptions follow:

0001-Miscallaneous-code-and-comment-improvements.patch

Fixes some obsolete comments while improving others.  Also, implements
some of Tomas Vondra's review comments.

0002-Miscallaneous-documentation-improvements.patch

Fixes inconsistencies and improves some examples in the documentation.
Also, mentions the limitation regarding row movement.

0003-Invalidate-the-parent-s-relcache-after-partition-cre.patch

Fixes a bug reported by Tomas, whereby a parent's relcache was not
invalidated after creation of a new partition using CREATE TABLE PARTITION
OF.  This resulted in tuple-routing code not being to able to find a
partition that was created by the last command in a given transaction.

0004-Fix-a-bug-of-insertion-into-an-internal-partition.patch

Fixes a bug I found this morning, whereby an internal partition's
constraint would not be enforced if it is targeted directly.  See example
below:

create table p (a int, b char) partition by range (a);
create table p1 partition of p for values from (1) to (10) partition by
list (b);
create table p1a partition of p1 for values in ('a');
insert into p1 values (0, 'a');  -- should fail, but doesn't

0005-Fix-a-tuple-routing-bug-in-multi-level-partitioned-t.patch

Fixes a bug discovered by Dmitry Ivanov, whereby wrong indexes were
assigned to the partitions of lower levels (level > 1), causing spurious
"partition not found" error as demonstrated in his email [1].


Sorry about sending some of these patches repeatedly though.

Thanks,
Amit

[1]
https://www.postgresql.org/message-id/e6c56fe9-4b87-4f64-ac6f-bc99675f3f9e%40postgrespro.ru
>From f089201eab5cb8989aad3403ad9a7b43d42c8fa7 Mon Sep 17 00:00:00 2001
From: amit <amitlangot...@gmail.com>
Date: Tue, 13 Dec 2016 15:06:08 +0900
Subject: [PATCH 1/5] Miscallaneous code and comment improvements.

---
 src/backend/catalog/heap.c                 |  4 ++++
 src/backend/catalog/partition.c            |  2 +-
 src/backend/commands/copy.c                | 27 ++++++++++++---------------
 src/backend/commands/tablecmds.c           | 20 ++++++++++----------
 src/backend/executor/nodeModifyTable.c     | 22 +++++++++++-----------
 src/include/catalog/pg_partitioned_table.h |  4 ++--
 6 files changed, 40 insertions(+), 39 deletions(-)

diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 5ffea74855..c09c9f28a7 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -1887,6 +1887,10 @@ heap_drop_with_catalog(Oid relid)
 
 	if (parent)
 	{
+		/*
+		 * Invalidate the parent's relcache so that the partition is no longer
+		 * included in its partition descriptor.
+		 */
 		CacheInvalidateRelcache(parent);
 		heap_close(parent, NoLock);		/* keep the lock */
 	}
diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c
index 219d380cde..cc09fb3e55 100644
--- a/src/backend/catalog/partition.c
+++ b/src/backend/catalog/partition.c
@@ -1492,7 +1492,7 @@ generate_partition_qual(Relation rel, bool recurse)
  *			Construct values[] and isnull[] arrays for the partition key
  *			of a tuple.
  *
- *	pkinfo			partition key execution info
+ *	pd				Partition dispatch object of the partitioned table
  *	slot			Heap tuple from which to extract partition key
  *	estate			executor state for evaluating any partition key
  *					expressions (must be non-NULL)
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 270be0af18..e9177491e2 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -1403,31 +1403,28 @@ BeginCopy(ParseState *pstate,
 					 errmsg("table \"%s\" does not have OIDs",
 							RelationGetRelationName(cstate->rel))));
 
-		/*
-		 * Initialize state for CopyFrom tuple routing.  Watch out for
-		 * any foreign partitions.
-		 */
+		/* Initialize state for CopyFrom tuple routing. */
 		if (is_from && rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 		{
-			PartitionDispatch *pd;
 			List		   *leaf_parts;
 			ListCell	   *cell;
 			int				i,
-							num_parted,
-							num_leaf_parts;
+							num_parted;
 			ResultRelInfo  *leaf_part_rri;
 
 			/* Get the tuple-routing information and lock partitions */
-			pd = RelationGetPartitionDispatchInfo(rel, RowExclusiveLock,
-												  &num_parted, &leaf_parts);
-			num_leaf_parts = list_length(leaf_parts);
-			cstate->partition_dispatch_info = pd;
+			cstate->partition_dispatch_info =
+					RelationGetPartitionDispatchInfo(rel, RowExclusiveLock,
+													 &num_parted,
+													 &leaf_parts);
 			cstate->num_dispatch = num_parted;
-			cstate->num_partitions = num_leaf_parts;
-			cstate->partitions = (ResultRelInfo *) palloc(num_leaf_parts *
-														sizeof(ResultRelInfo));
+			cstate->num_partitions = list_length(leaf_parts);
+			cstate->partitions = (ResultRelInfo *)
+									palloc(cstate->num_partitions *
+													sizeof(ResultRelInfo));
 			cstate->partition_tupconv_maps = (TupleConversionMap **)
-						palloc0(num_leaf_parts * sizeof(TupleConversionMap *));
+									palloc0(cstate->num_partitions *
+											sizeof(TupleConversionMap *));
 
 			leaf_part_rri = cstate->partitions;
 			i = 0;
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 5856e72918..a9650114d4 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -1614,8 +1614,8 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
 
 	/*
 	 * In case of a partition, there are no new column definitions, only
-	 * dummy ColumnDefs created for column constraints.  We merge these
-	 * constraints inherited from the parent.
+	 * dummy ColumnDefs created for column constraints.  We merge them
+	 * with the constraints inherited from the parent.
 	 */
 	if (is_partition)
 	{
@@ -2030,8 +2030,8 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
 							newcollid;
 
 				/*
-				 * Partitions have only one parent, so conflict should never
-				 * occur
+				 * Partitions have only one parent and have no column
+				 * definitions of their own, so conflict should never occur.
 				 */
 				Assert(!is_partition);
 
@@ -2118,8 +2118,8 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
 
 	/*
 	 * Now that we have the column definition list for a partition, we can
-	 * check whether the columns referenced in column option specifications
-	 * actually exist.  Also, we merge the options into the corresponding
+	 * check whether the columns referenced in the column constraint specs
+	 * actually exist.  Also, we merge the constraints into the corresponding
 	 * column definitions.
 	 */
 	if (is_partition && list_length(saved_schema) > 0)
@@ -13351,8 +13351,8 @@ ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd)
 	}
 
 	/*
-	 * Invalidate the relcache so that the new partition is now included
-	 * in rel's partition descriptor.
+	 * Invalidate the parent's relcache so that the new partition is now
+	 * included its partition descriptor.
 	 */
 	CacheInvalidateRelcache(rel);
 
@@ -13414,8 +13414,8 @@ ATExecDetachPartition(Relation rel, RangeVar *name)
 	heap_close(classRel, RowExclusiveLock);
 
 	/*
-	 * Invalidate the relcache so that the partition is no longer included
-	 * in our partition descriptor.
+	 * Invalidate the parent's relcache so that the partition is no longer
+	 * included in its partition descriptor.
 	 */
 	CacheInvalidateRelcache(rel);
 
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index c0b58d1841..ec440b353d 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -1718,26 +1718,26 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 	if (operation == CMD_INSERT &&
 		rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 	{
-		PartitionDispatch  *pd;
 		int					i,
 							j,
-							num_parted,
-							num_leaf_parts;
+							num_parted;
 		List			   *leaf_parts;
 		ListCell		   *cell;
 		ResultRelInfo	   *leaf_part_rri;
 
-		/* Form the partition node tree and lock partitions */
-		pd = RelationGetPartitionDispatchInfo(rel, RowExclusiveLock,
-											  &num_parted, &leaf_parts);
-		mtstate->mt_partition_dispatch_info = pd;
+		/* Get the tuple-routing information and lock partitions */
+		mtstate->mt_partition_dispatch_info =
+					RelationGetPartitionDispatchInfo(rel, RowExclusiveLock,
+													 &num_parted,
+													 &leaf_parts);
 		mtstate->mt_num_dispatch = num_parted;
-		num_leaf_parts = list_length(leaf_parts);
-		mtstate->mt_num_partitions = num_leaf_parts;
+		mtstate->mt_num_partitions = list_length(leaf_parts);
 		mtstate->mt_partitions = (ResultRelInfo *)
-						palloc0(num_leaf_parts * sizeof(ResultRelInfo));
+						palloc0(mtstate->mt_num_partitions *
+												sizeof(ResultRelInfo));
 		mtstate->mt_partition_tupconv_maps = (TupleConversionMap **)
-					palloc0(num_leaf_parts * sizeof(TupleConversionMap *));
+						palloc0(mtstate->mt_num_partitions *
+												sizeof(TupleConversionMap *));
 
 		leaf_part_rri = mtstate->mt_partitions;
 		i = j = 0;
diff --git a/src/include/catalog/pg_partitioned_table.h b/src/include/catalog/pg_partitioned_table.h
index cec54ae62e..2c6b6cf3a0 100644
--- a/src/include/catalog/pg_partitioned_table.h
+++ b/src/include/catalog/pg_partitioned_table.h
@@ -7,7 +7,7 @@
  *
  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_partitioned_table.h $
+ * src/include/catalog/pg_partitioned_table.h
  *
  * NOTES
  *	  the genbki.sh script reads this file and generates .bki
@@ -47,7 +47,7 @@ CATALOG(pg_partitioned_table,3350) BKI_WITHOUT_OIDS
 #ifdef CATALOG_VARLEN
 	oidvector		partclass;		/* operator class to compare keys */
 	oidvector		partcollation;	/* user-specified collation for keys */
-	pg_node_tree	partexprs;		/* list of expressions in the partitioning
+	pg_node_tree	partexprs;		/* list of expressions in the partition
 									 * key; one item for each zero entry in
 									 * partattrs[] */
 #endif
-- 
2.11.0

>From cd0856998198f59636f9cf4f34c25d01e089aea9 Mon Sep 17 00:00:00 2001
From: amit <amitlangot...@gmail.com>
Date: Tue, 13 Dec 2016 15:06:39 +0900
Subject: [PATCH 2/5] Miscallaneous documentation improvements.

---
 doc/src/sgml/ref/alter_table.sgml  |  4 ++--
 doc/src/sgml/ref/create_table.sgml | 25 +++++++++++++------------
 doc/src/sgml/ref/insert.sgml       | 11 +++++++++++
 doc/src/sgml/ref/update.sgml       |  7 +++++++
 4 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml
index a6a43c4b30..333b01db36 100644
--- a/doc/src/sgml/ref/alter_table.sgml
+++ b/doc/src/sgml/ref/alter_table.sgml
@@ -715,7 +715,7 @@ ALTER TABLE [ IF EXISTS ] <replaceable class="PARAMETER">name</replaceable>
    </varlistentry>
 
    <varlistentry>
-    <term><literal>ATTACH PARTITION</literal> <replaceable class="PARAMETER">partition_name</replaceable> <replaceable class="PARAMETER">partition_bound_spec</replaceable></term>
+    <term><literal>ATTACH PARTITION</literal> <replaceable class="PARAMETER">partition_name</replaceable> FOR VALUES <replaceable class="PARAMETER">partition_bound_spec</replaceable></term>
     <listitem>
      <para>
       This form attaches an existing table (which might itself be partitioned)
@@ -1332,7 +1332,7 @@ ALTER TABLE measurement
    Attach a partition to list partitioned table:
 <programlisting>
 ALTER TABLE cities
-    ATTACH PARTITION cities_west FOR VALUES IN ('Los Angeles', 'San Francisco');
+    ATTACH PARTITION cities_ab FOR VALUES IN ('a', 'b');
 </programlisting></para>
 
   <para>
diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml
index 8bf8af302b..58f8bf6d6a 100644
--- a/doc/src/sgml/ref/create_table.sgml
+++ b/doc/src/sgml/ref/create_table.sgml
@@ -248,7 +248,7 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI
    </varlistentry>
 
    <varlistentry>
-    <term><literal>PARTITION OF <replaceable class="PARAMETER">parent_table</replaceable></literal></term>
+    <term><literal>PARTITION OF <replaceable class="PARAMETER">parent_table</replaceable></literal> FOR VALUES <replaceable class="PARAMETER">partition_bound_spec</replaceable></term>
     <listitem>
      <para>
       Creates the table as <firstterm>partition</firstterm> of the specified
@@ -275,7 +275,8 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI
      <para>
       Rows inserted into a partitioned table will be automatically routed to
       the correct partition.  If no suitable partition exists, an error will
-      occur.
+      occur.  Also, if updating a row in a given partition causes it to move
+      to another partition due to the new partition key, an error will occur.
      </para>
 
      <para>
@@ -1477,7 +1478,6 @@ CREATE TABLE employees OF employee_type (
    Create a range partitioned table:
 <programlisting>
 CREATE TABLE measurement (
-    city_id         int not null,
     logdate         date not null,
     peaktemp        int,
     unitsales       int
@@ -1488,9 +1488,10 @@ CREATE TABLE measurement (
    Create a list partitioned table:
 <programlisting>
 CREATE TABLE cities (
+    city_id      bigserial not null,
     name         text not null,
-    population   int,
-) PARTITION BY LIST (initcap(name));
+    population   bigint,
+) PARTITION BY LIST (left(lower(name), 1));
 </programlisting></para>
 
   <para>
@@ -1498,30 +1499,30 @@ CREATE TABLE cities (
 <programlisting>
 CREATE TABLE measurement_y2016m07
     PARTITION OF measurement (
-    unitsales WITH OPTIONS DEFAULT 0
+    unitsales DEFAULT 0
 ) FOR VALUES FROM ('2016-07-01') TO ('2016-08-01');
 </programlisting></para>
 
   <para>
    Create partition of a list partitioned table:
 <programlisting>
-CREATE TABLE cities_west
+CREATE TABLE cities_ab
     PARTITION OF cities (
     CONSTRAINT city_id_nonzero CHECK (city_id != 0)
-) FOR VALUES IN ('Los Angeles', 'San Francisco');
+) FOR VALUES IN ('a', 'b');
 </programlisting></para>
 
   <para>
    Create partition of a list partitioned table that is itself further
    partitioned and then add a partition to it:
 <programlisting>
-CREATE TABLE cities_west
+CREATE TABLE cities_ab
     PARTITION OF cities (
     CONSTRAINT city_id_nonzero CHECK (city_id != 0)
-) FOR VALUES IN ('Los Angeles', 'San Francisco') PARTITION BY RANGE (population);
+) FOR VALUES IN ('a', 'b') PARTITION BY RANGE (population);
 
-CREATE TABLE cities_west_10000_to_100000
-    PARTITION OF cities_west FOR VALUES FROM (10000) TO (100000);
+CREATE TABLE cities_ab_10000_to_100000
+    PARTITION OF cities_ab FOR VALUES FROM (10000) TO (100000);
 </programlisting></para>
  </refsect1>
 
diff --git a/doc/src/sgml/ref/insert.sgml b/doc/src/sgml/ref/insert.sgml
index 06f416039b..00c984d8d5 100644
--- a/doc/src/sgml/ref/insert.sgml
+++ b/doc/src/sgml/ref/insert.sgml
@@ -526,6 +526,17 @@ INSERT <replaceable>oid</replaceable> <replaceable class="parameter">count</repl
    updated by the command.
   </para>
  </refsect1>
+ 
+ <refsect1>
+  <title>Notes</title>
+
+  <para>
+   If the specified table is a partitioned table, each row is routed to
+   the appropriate partition and inserted into it.  If the specified table
+   is a partition, an error will occur if one of the input rows violates
+   the partition constraint.
+  </para>
+ </refsect1>
 
  <refsect1>
   <title>Examples</title>
diff --git a/doc/src/sgml/ref/update.sgml b/doc/src/sgml/ref/update.sgml
index 2de0f4aad1..e86993b9cf 100644
--- a/doc/src/sgml/ref/update.sgml
+++ b/doc/src/sgml/ref/update.sgml
@@ -279,6 +279,13 @@ UPDATE <replaceable class="parameter">count</replaceable>
    sub-selects is safer, though often harder to read and slower than
    using a join.
   </para>
+
+  <para>
+   In case of partitioned tables, updating a row might cause it to move
+   to a new partition due to the new partition key.  An error will occur
+   in this case.  Also, if the specified table is a partition, an error
+   will occur if the new row violates the partition constraint.
+  </para>
  </refsect1>
 
  <refsect1>
-- 
2.11.0

>From 967c270b858ce52d45b7e0a8cfe7e0bf24cb1dbd Mon Sep 17 00:00:00 2001
From: amit <amitlangot...@gmail.com>
Date: Tue, 13 Dec 2016 15:07:06 +0900
Subject: [PATCH 3/5] Invalidate the parent's relcache after partition
 creation.

---
 src/backend/commands/tablecmds.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index a9650114d4..f94cab60a3 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -777,12 +777,19 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
 		 * it does not return on error.
 		 */
 		check_new_partition_bound(relname, parent, bound);
-		heap_close(parent, NoLock);
 
 		/* Update the pg_class entry. */
 		StorePartitionBound(rel, bound);
 
 		/*
+		 * We must invalidate the parent's relcache so that the next
+		 * CommandCounterIncrement() will cause the same to be rebuilt with
+		 * the new partition's info included in its partition descriptor.
+		 */
+		CacheInvalidateRelcache(parent);
+		heap_close(parent, NoLock);
+
+		/*
 		 * The code that follows may also update the pg_class tuple to update
 		 * relnumchecks, so bump up the command counter to avoid the "already
 		 * updated by self" error.
-- 
2.11.0

>From 2127f8ad891e3967f00bb821331ae16ad29a342e Mon Sep 17 00:00:00 2001
From: amit <amitlangot...@gmail.com>
Date: Tue, 13 Dec 2016 15:07:41 +0900
Subject: [PATCH 4/5] Fix a bug of insertion into an internal partition.

Since implicit partition constraints are not inherited, an internal
partition's constraint was not being enforced when targeted directly.
So, include such constraint when setting up leaf partition result
relations for tuple-routing.
---
 src/backend/commands/copy.c            | 19 +++++++++++++++++--
 src/backend/commands/tablecmds.c       |  2 +-
 src/backend/executor/execMain.c        | 22 +++++++++++++++-------
 src/backend/executor/nodeModifyTable.c | 12 +++++++++++-
 src/include/executor/executor.h        |  3 +--
 5 files changed, 45 insertions(+), 13 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index e9177491e2..b05055808f 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -1411,6 +1411,7 @@ BeginCopy(ParseState *pstate,
 			int				i,
 							num_parted;
 			ResultRelInfo  *leaf_part_rri;
+			List		   *partcheck = NIL;
 
 			/* Get the tuple-routing information and lock partitions */
 			cstate->partition_dispatch_info =
@@ -1426,6 +1427,15 @@ BeginCopy(ParseState *pstate,
 									palloc0(cstate->num_partitions *
 											sizeof(TupleConversionMap *));
 
+			/*
+			 * If the main target rel is a partition, ExecConstraints() as
+			 * applied to each leaf partition must consider its partition
+			 * constraint, because unlike explicit constraints, an implicit
+			 * partition constraint is not inherited.
+			 */
+			if (rel->rd_rel->relispartition)
+				partcheck = RelationGetPartitionQual(rel, true);
+
 			leaf_part_rri = cstate->partitions;
 			i = 0;
 			foreach(cell, leaf_parts)
@@ -1449,7 +1459,7 @@ BeginCopy(ParseState *pstate,
 				InitResultRelInfo(leaf_part_rri,
 								  partrel,
 								  1,	 /* dummy */
-								  false, /* no partition constraint check */
+								  partcheck,
 								  0);
 
 				/* Open partition indices */
@@ -2335,6 +2345,7 @@ CopyFrom(CopyState cstate)
 	uint64		processed = 0;
 	bool		useHeapMultiInsert;
 	int			nBufferedTuples = 0;
+	List	   *partcheck = NIL;
 
 #define MAX_BUFFERED_TUPLES 1000
 	HeapTuple  *bufferedTuples = NULL;	/* initialize to silence warning */
@@ -2451,6 +2462,10 @@ CopyFrom(CopyState cstate)
 		hi_options |= HEAP_INSERT_FROZEN;
 	}
 
+	/* Don't forget the partition constraints */
+	if (cstate->rel->rd_rel->relispartition)
+		partcheck = RelationGetPartitionQual(cstate->rel, true);
+
 	/*
 	 * We need a ResultRelInfo so we can use the regular executor's
 	 * index-entry-making machinery.  (There used to be a huge amount of code
@@ -2460,7 +2475,7 @@ CopyFrom(CopyState cstate)
 	InitResultRelInfo(resultRelInfo,
 					  cstate->rel,
 					  1,		/* dummy rangetable index */
-					  true,		/* do load partition check expression */
+					  partcheck,
 					  0);
 
 	ExecOpenIndices(resultRelInfo, false);
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index f94cab60a3..48adeedbe0 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -1329,7 +1329,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 		InitResultRelInfo(resultRelInfo,
 						  rel,
 						  0,	/* dummy rangetable index */
-						  false,
+						  NIL,	/* No need for partition constraints */
 						  0);
 		resultRelInfo++;
 	}
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index d43a204808..963cd2ae43 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -820,13 +820,19 @@ InitPlan(QueryDesc *queryDesc, int eflags)
 			Index		resultRelationIndex = lfirst_int(l);
 			Oid			resultRelationOid;
 			Relation	resultRelation;
+			List	   *partcheck = NIL;
 
 			resultRelationOid = getrelid(resultRelationIndex, rangeTable);
 			resultRelation = heap_open(resultRelationOid, RowExclusiveLock);
+
+			/* Don't forget the partition constraint */
+			if (resultRelation->rd_rel->relispartition)
+				partcheck = RelationGetPartitionQual(resultRelation, true);
+
 			InitResultRelInfo(resultRelInfo,
 							  resultRelation,
 							  resultRelationIndex,
-							  true,
+							  partcheck,
 							  estate->es_instrument);
 			resultRelInfo++;
 		}
@@ -1216,7 +1222,7 @@ void
 InitResultRelInfo(ResultRelInfo *resultRelInfo,
 				  Relation resultRelationDesc,
 				  Index resultRelationIndex,
-				  bool load_partition_check,
+				  List *partition_check,
 				  int instrument_options)
 {
 	MemSet(resultRelInfo, 0, sizeof(ResultRelInfo));
@@ -1254,10 +1260,7 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo,
 	resultRelInfo->ri_ConstraintExprs = NULL;
 	resultRelInfo->ri_junkFilter = NULL;
 	resultRelInfo->ri_projectReturning = NULL;
-	if (load_partition_check)
-		resultRelInfo->ri_PartitionCheck =
-							RelationGetPartitionQual(resultRelationDesc,
-													 true);
+	resultRelInfo->ri_PartitionCheck = partition_check;
 }
 
 /*
@@ -1284,6 +1287,7 @@ ExecGetTriggerResultRel(EState *estate, Oid relid)
 	ListCell   *l;
 	Relation	rel;
 	MemoryContext oldcontext;
+	List	   *partcheck = NIL;
 
 	/* First, search through the query result relations */
 	rInfo = estate->es_result_relations;
@@ -1312,6 +1316,10 @@ ExecGetTriggerResultRel(EState *estate, Oid relid)
 	 */
 	rel = heap_open(relid, NoLock);
 
+	/* Don't forget the partition constraint */
+	if (rel->rd_rel->relispartition)
+		partcheck = RelationGetPartitionQual(rel, true);
+
 	/*
 	 * Make the new entry in the right context.
 	 */
@@ -1320,7 +1328,7 @@ ExecGetTriggerResultRel(EState *estate, Oid relid)
 	InitResultRelInfo(rInfo,
 					  rel,
 					  0,		/* dummy rangetable index */
-					  true,
+					  partcheck,
 					  estate->es_instrument);
 	estate->es_trig_target_relations =
 		lappend(estate->es_trig_target_relations, rInfo);
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index ec440b353d..e236c0e0a7 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -1724,6 +1724,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 		List			   *leaf_parts;
 		ListCell		   *cell;
 		ResultRelInfo	   *leaf_part_rri;
+		List			   *partcheck = NIL;
 
 		/* Get the tuple-routing information and lock partitions */
 		mtstate->mt_partition_dispatch_info =
@@ -1739,6 +1740,15 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 						palloc0(mtstate->mt_num_partitions *
 												sizeof(TupleConversionMap *));
 
+		/*
+		 * If the main target rel is a partition, ExecConstraints() as
+		 * applied to each leaf partition must consider its partition
+		 * constraint, because unlike explicit constraints, an implicit
+		 * partition constraint is not inherited.
+		 */
+		if (rel->rd_rel->relispartition)
+			partcheck = RelationGetPartitionQual(rel, true);
+
 		leaf_part_rri = mtstate->mt_partitions;
 		i = j = 0;
 		foreach(cell, leaf_parts)
@@ -1762,7 +1772,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 			InitResultRelInfo(leaf_part_rri,
 							  partrel,
 							  1,		/* dummy */
-							  false,	/* no partition constraint checks */
+							  partcheck,
 							  eflags);
 
 			/* Open partition indices (note: ON CONFLICT unsupported)*/
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index b4d09f9564..74213da078 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -73,7 +73,6 @@
 #define ExecEvalExpr(expr, econtext, isNull, isDone) \
 	((*(expr)->evalfunc) (expr, econtext, isNull, isDone))
 
-
 /* Hook for plugins to get control in ExecutorStart() */
 typedef void (*ExecutorStart_hook_type) (QueryDesc *queryDesc, int eflags);
 extern PGDLLIMPORT ExecutorStart_hook_type ExecutorStart_hook;
@@ -189,7 +188,7 @@ extern void CheckValidResultRel(Relation resultRel, CmdType operation);
 extern void InitResultRelInfo(ResultRelInfo *resultRelInfo,
 				  Relation resultRelationDesc,
 				  Index resultRelationIndex,
-				  bool load_partition_check,
+				  List *partition_check,
 				  int instrument_options);
 extern ResultRelInfo *ExecGetTriggerResultRel(EState *estate, Oid relid);
 extern bool ExecContextForcesOids(PlanState *planstate, bool *hasoids);
-- 
2.11.0

>From e4b3652bbcda67c45d0699d23e7395a9d298f5d5 Mon Sep 17 00:00:00 2001
From: amit <amitlangot...@gmail.com>
Date: Tue, 13 Dec 2016 15:12:33 +0900
Subject: [PATCH 5/5] Fix a tuple-routing bug in multi-level partitioned tables

Due to the bug, wrong index was assigned to the partitioned tables
below level 1.  Since we assign indexes in a breadth-first manner,
any partition of the next level should get assigned an index greater
than that of the last partition of the current level.
---
 src/backend/catalog/partition.c | 39 +++++++++++++++++++++++++++++++++------
 1 file changed, 33 insertions(+), 6 deletions(-)

diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c
index cc09fb3e55..f4a9525d1d 100644
--- a/src/backend/catalog/partition.c
+++ b/src/backend/catalog/partition.c
@@ -950,7 +950,8 @@ RelationGetPartitionDispatchInfo(Relation rel, int lockmode,
 			   *parted_rels;
 	ListCell   *lc;
 	int			i,
-				k;
+				k,
+				offset;
 
 	/*
 	 * Lock partitions and make a list of the partitioned ones to prepare
@@ -990,11 +991,19 @@ RelationGetPartitionDispatchInfo(Relation rel, int lockmode,
 		 */
 	}
 
-	/* Generate PartitionDispatch objects for all partitioned tables */
+	/*
+	 * We want to create two arrays - one for leaf partitions and another for
+	 * partitioned tables (including the root table and internal partitions).
+	 * While we only create the latter here, leaf partition array of suitable
+	 * objects (such as, ResultRelInfo) is created by the caller using the
+	 * list of OIDs we return.  Indexes into these arrays get assigned in a
+	 * breadth-first manner, whereby partitions of any given level are placed
+	 * consecutively in the respective arrays.
+	 */
 	pd = (PartitionDispatchData **) palloc(*num_parted *
 										   sizeof(PartitionDispatchData *));
 	*leaf_part_oids = NIL;
-	i = k = 0;
+	i = k = offset = 0;
 	foreach(lc, parted_rels)
 	{
 		Relation	partrel = lfirst(lc);
@@ -1010,6 +1019,16 @@ RelationGetPartitionDispatchInfo(Relation rel, int lockmode,
 		pd[i]->partdesc = partdesc;
 		pd[i]->indexes = (int *) palloc(partdesc->nparts * sizeof(int));
 
+		/*
+		 * Indexes corresponding to the internal partitions are multiplied by
+		 * -1 to distinguish them from those of leaf partitions.  Encountering
+		 * an index >= 0 means we found a leaf partition, which is immediately
+		 * returned as the partition we are looking for.  A negative index
+		 * means we found a partitioned table, whose PartitionDispatch object
+		 * is located at the above index multiplied back by -1.  Using the
+		 * PartitionDispatch object, search is continued further down the
+		 * partition tree.
+		 */
 		m = 0;
 		for (j = 0; j < partdesc->nparts; j++)
 		{
@@ -1023,14 +1042,22 @@ RelationGetPartitionDispatchInfo(Relation rel, int lockmode,
 			else
 			{
 				/*
-				 * We can assign indexes this way because of the way
-				 * parted_rels has been generated.
+				 * offset denotes the number of partitioned tables of upper
+				 * levels including those of the current level.  Any partition
+				 * of this table must belong to the next level and hence will
+				 * be placed after the last partitioned table of this level.
 				 */
-				pd[i]->indexes[j] = -(i + 1 + m);
+				pd[i]->indexes[j] = -(1 + offset + m);
 				m++;
 			}
 		}
 		i++;
+
+		/*
+		 * This counts the number of partitioned tables at upper levels
+		 * including those of the current level.
+		 */
+		offset += m;
 	}
 
 	return pd;
-- 
2.11.0

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to