On 24-02-2015 PM 05:13, Amit Langote wrote:
> On 21-01-2015 PM 07:26, Amit Langote wrote:
>>
>> Ok, I will limit myself to focusing on following things at the moment:
>>
>> * Provide syntax in CREATE TABLE to declare partition key
>> * Provide syntax in CREATE TABLE to declare a table as partition of a
>> partitioned table and values it contains
>> * Arrange to have partition key and values stored in appropriate
>> catalogs (existing or new)
>> * Arrange to cache partitioning info of partitioned tables in relcache
>>
>
> Here is an experimental patch that attempts to implement this.
I divided the patch into two for convenience:
1) 0001_partition_syntax_catalog - adds commands, catalog and
partitioned table relation descriptor related WIP code
2) 0002_tuple-routing-poc - an experimental patch to test how well
binary search approach works for tuple routing in ExecInsert().
Please take a look.
Thanks,
Amit
diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile
index a403c64..bf02730 100644
--- a/src/backend/catalog/Makefile
+++ b/src/backend/catalog/Makefile
@@ -14,7 +14,7 @@ OBJS = catalog.o dependency.o heap.o index.o indexing.o namespace.o aclchk.o \
objectaccess.o objectaddress.o pg_aggregate.o pg_collation.o \
pg_constraint.o pg_conversion.o \
pg_depend.o pg_enum.o pg_inherits.o pg_largeobject.o pg_namespace.o \
- pg_operator.o pg_proc.o pg_range.o pg_db_role_setting.o pg_shdepend.o \
+ pg_operator.o pg_partition.o pg_proc.o pg_range.o pg_db_role_setting.o pg_shdepend.o \
pg_type.o storage.o toasting.o
BKIFILES = postgres.bki postgres.description postgres.shdescription
@@ -41,6 +41,7 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\
pg_foreign_data_wrapper.h pg_foreign_server.h pg_user_mapping.h \
pg_foreign_table.h pg_policy.h \
pg_default_acl.h pg_seclabel.h pg_shseclabel.h pg_collation.h pg_range.h \
+ pg_partitioned_rel.h pg_partition.h\
toasting.h indexing.h \
)
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c
index bacb242..8bdb34b 100644
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -43,6 +43,8 @@
#include "catalog/pg_opclass.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_opfamily.h"
+#include "catalog/pg_partition.h"
+#include "catalog/pg_partitioned_rel.h"
#include "catalog/pg_policy.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_rewrite.h"
@@ -157,7 +159,9 @@ static const Oid object_classes[MAX_OCLASS] = {
DefaultAclRelationId, /* OCLASS_DEFACL */
ExtensionRelationId, /* OCLASS_EXTENSION */
EventTriggerRelationId, /* OCLASS_EVENT_TRIGGER */
- PolicyRelationId /* OCLASS_POLICY */
+ PolicyRelationId, /* OCLASS_POLICY */
+ PartitionedRelRelationId, /* OCLASS_PARTITIONED_REL */
+ PartitionRelationId /* OCLASS_PARTITION */
};
@@ -1265,6 +1269,14 @@ doDeletion(const ObjectAddress *object, int flags)
RemovePolicyById(object->objectId);
break;
+ case OCLASS_PARTITIONED_REL:
+ RemovePartitionKeyByRelId(object->objectId);
+ break;
+
+ case OCLASS_PARTITION:
+ RemovePartitionDefByRelId(object->objectId);
+ break;
+
default:
elog(ERROR, "unrecognized object class: %u",
object->classId);
@@ -2373,6 +2385,12 @@ getObjectClass(const ObjectAddress *object)
case PolicyRelationId:
return OCLASS_POLICY;
+
+ case PartitionedRelRelationId:
+ return OCLASS_PARTITIONED_REL;
+
+ case PartitionRelationId:
+ return OCLASS_PARTITION;
}
/* shouldn't get here */
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 17f7266..0308c0b 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -47,6 +47,8 @@
#include "catalog/pg_foreign_table.h"
#include "catalog/pg_inherits.h"
#include "catalog/pg_namespace.h"
+#include "catalog/pg_partition.h"
+#include "catalog/pg_partitioned_rel.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_type.h"
@@ -110,6 +112,7 @@ static Node *cookConstraint(ParseState *pstate,
Node *raw_constraint,
char *relname);
static List *insert_ordered_unique_oid(List *list, Oid datum);
+static void SetRelationIsPartitioned(Oid relationId, bool relispartitioned);
/* ----------------------------------------------------------------
@@ -2968,3 +2971,306 @@ insert_ordered_unique_oid(List *list, Oid datum)
lappend_cell_oid(list, prev, datum);
return list;
}
+
+/*
+ * StorePartitionKey
+ *
+ * Store the partition key of relation rel into system catalog
+ * pg_partitioned_rel
+ */
+void
+StorePartitionKey(Relation rel, int nattrs,
+ AttrNumber *partKeyAttrNumbers,
+ Oid *partClassOids,
+ char strategy)
+{
+ int i;
+ int2vector *partkey;
+ oidvector *partclass;
+ Datum values[Natts_pg_partitioned_rel];
+ bool nulls[Natts_pg_partitioned_rel];
+ Relation pg_partitioned_rel;
+ HeapTuple tuple;
+ ObjectAddress myself;
+ ObjectAddress target;
+
+ /*
+ * this check is currently unused code because we only allow defining
+ * a partition key in CREATE TABLE at the moment
+ */
+ tuple = SearchSysCache(PARTITIONEDRELID, ObjectIdGetDatum(RelationGetRelid(rel)), 0, 0, 0);
+ if (HeapTupleIsValid(tuple))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("multiple partition keys for table \"%s\" are not allowed",
+ RelationGetRelationName(rel))));
+ ReleaseSysCache(tuple);
+ }
+
+ /*
+ * Copy the partition key, opclass info into arrays (should we
+ * make the caller pass them like this to start with?)
+ */
+ partkey = buildint2vector(partKeyAttrNumbers, nattrs);
+ partclass = buildoidvector(partClassOids, nattrs);
+
+ /*
+ * open the system catalog partitioned_rel relation
+ */
+ pg_partitioned_rel = heap_open(PartitionedRelRelationId, RowExclusiveLock);
+
+ /*
+ * Build a pg_partitioned_rel tuple
+ */
+ MemSet(nulls, false, sizeof(nulls));
+
+ values[Anum_pg_partitioned_rel_partrelid - 1] = ObjectIdGetDatum(RelationGetRelid(rel));
+ values[Anum_pg_partitioned_rel_partstrategy - 1] = CharGetDatum(strategy);
+ values[Anum_pg_partitioned_rel_partnatts - 1] = Int16GetDatum(nattrs);
+ values[Anum_pg_partitioned_rel_partkey - 1] = PointerGetDatum(partkey);
+ values[Anum_pg_partitioned_rel_partclass - 1] = PointerGetDatum(partclass);
+
+ tuple = heap_form_tuple(RelationGetDescr(pg_partitioned_rel), values, nulls);
+
+ /*
+ * insert the tuple into the pg_partitioned_rel catalog
+ */
+ simple_heap_insert(pg_partitioned_rel, tuple);
+
+ /* update the indexes on pg_partitioned_rel */
+ CatalogUpdateIndexes(pg_partitioned_rel, tuple);
+
+ /* Store a dependency - drop the key when the relation is dropped */
+ myself.classId = PartitionedRelRelationId;
+ myself.objectId = RelationGetRelid(rel);
+ myself.objectSubId = 0;
+
+ target.classId = RelationRelationId;
+ target.objectId = RelationGetRelid(rel);
+ target.objectSubId = 0;
+
+ recordDependencyOn(&myself, &target, DEPENDENCY_AUTO);
+
+ /*
+ * close the relation and free the tuple
+ */
+ heap_close(pg_partitioned_rel, RowExclusiveLock);
+ heap_freetuple(tuple);
+
+ SetRelationIsPartitioned(RelationGetRelid(rel), true);
+}
+
+/*
+ * SetRelationIsPartitioned
+ * Set the value of the relation's relispartitioned field in pg_class.
+ *
+ * NOTE: caller must be holding an appropriate lock on the relation.
+ * ShareUpdateExclusiveLock is sufficient.
+ *
+ * NOTE: an important side-effect of this operation is that an SI invalidation
+ * message is sent out to all backends --- including me --- causing plans
+ * referencing the relation to be rebuilt with the new list of children.
+ * This must happen even if we find that no change is needed in the pg_class
+ * row.
+ */
+static void
+SetRelationIsPartitioned(Oid relationId, bool relispartitioned)
+{
+ Relation relationRelation;
+ HeapTuple tuple;
+ Form_pg_class classtuple;
+
+ /*
+ * Fetch a modifiable copy of the tuple, modify it, update pg_class.
+ */
+ relationRelation = heap_open(RelationRelationId, RowExclusiveLock);
+ tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relationId));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for relation %u", relationId);
+ classtuple = (Form_pg_class) GETSTRUCT(tuple);
+
+ if (classtuple->relispartitioned != relispartitioned)
+ {
+ classtuple->relispartitioned = relispartitioned;
+ simple_heap_update(relationRelation, &tuple->t_self, tuple);
+
+ /* keep the catalog indexes up to date */
+ CatalogUpdateIndexes(relationRelation, tuple);
+ }
+ else
+ {
+ /* no need to change tuple, but force relcache rebuild anyway */
+ CacheInvalidateRelcacheByTuple(tuple);
+ }
+
+ heap_freetuple(tuple);
+ heap_close(relationRelation, RowExclusiveLock);
+}
+
+/*
+ * Remove a pg_partitioned_rel entry
+ */
+void
+RemovePartitionKeyByRelId(Oid relid)
+{
+ Relation rel;
+ HeapTuple tuple;
+
+ /* DELETE FROM pg_partitioned_rel WHERE partrelid = :relid */
+ rel = heap_open(PartitionedRelRelationId, RowExclusiveLock);
+ tuple = SearchSysCache(PARTITIONEDRELID,
+ ObjectIdGetDatum(relid), 0, 0, 0);
+
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for partition key of relation %u", relid);
+ simple_heap_delete(rel, &tuple->t_self);
+ ReleaseSysCache(tuple);
+ heap_close(rel, RowExclusiveLock);
+}
+
+/*
+ * StorePartitionValues
+ *
+ * Store partition values of relation rel into system catalog
+ * pg_partition
+ */
+void
+StorePartitionValues(Relation childrel,
+ Relation parentrel,
+ int listnvalues,
+ Datum *datum)
+{
+ Relation pg_partition;
+ HeapTuple tuple;
+ ArrayType *listvalues = NULL;
+ ArrayType *rangebounds = NULL;
+ int i;
+ AttrNumber partattno;
+
+ ObjectAddress myself;
+ ObjectAddress target;
+
+ Datum values[Natts_pg_partition];
+ bool nulls[Natts_pg_partition];
+
+ Oid typid[PARTITION_MAX_KEYS];
+ int32 typmod[PARTITION_MAX_KEYS];
+ int16 typlen[PARTITION_MAX_KEYS];
+ bool typbyval[PARTITION_MAX_KEYS];
+ char typalign[PARTITION_MAX_KEYS];
+
+ Assert(parentrel->rd_partstrategy != 'l'
+ || parentrel->rd_partnatts == 1);
+
+ for(i = 0; i < parentrel->rd_partnatts; i++)
+ {
+ partattno = parentrel->rd_partattrs[i];
+ typid[i] = parentrel->rd_att->attrs[partattno - 1]->atttypid;
+ typmod[i] = parentrel->rd_att->attrs[partattno - 1]->atttypmod;
+ get_typlenbyvalalign(typid[i], &typlen[i], &typbyval[i], &typalign[i]);
+ }
+
+ /*
+ * this check is currently *unused* code because we only allow defining
+ * partition values in CREATE TABLE at the moment (that is, not allowed
+ * in ALTER TABLE)
+ */
+ tuple = SearchSysCache(PARTITIONID, ObjectIdGetDatum(RelationGetRelid(childrel)), 0, 0, 0);
+ if (HeapTupleIsValid(tuple))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("redefining partition values for a partition "
+ "is not allowed")));
+ ReleaseSysCache(tuple);
+ }
+
+ /*
+ * open the system catalog partition relation
+ */
+ pg_partition = heap_open(PartitionRelationId, RowExclusiveLock);
+
+ /*
+ * Build a pg_partition tuple
+ */
+ MemSet(nulls, false, sizeof(nulls));
+
+ switch(parentrel->rd_partstrategy)
+ {
+ case 'l':
+ listvalues = construct_array(datum,
+ listnvalues,
+ typid[0],
+ typlen[0],
+ typbyval[0],
+ typalign[0]);
+
+ nulls[Anum_pg_partition_partrangebounds - 1] = true;
+ break;
+
+ case 'r':
+ rangebounds = construct_array(datum,
+ parentrel->rd_partnatts,
+ ANYARRAYOID,
+ -1,
+ false,
+ 'd');
+
+ nulls[Anum_pg_partition_partlistvalues - 1] = true;
+ break;
+ }
+
+ values[Anum_pg_partition_partitionid - 1] = ObjectIdGetDatum(RelationGetRelid(childrel));
+ values[Anum_pg_partition_partparent - 1] = ObjectIdGetDatum(RelationGetRelid(parentrel));
+ values[Anum_pg_partition_partlistvalues - 1] = PointerGetDatum(listvalues);
+ values[Anum_pg_partition_partrangebounds - 1] = PointerGetDatum(rangebounds);
+
+ tuple = heap_form_tuple(RelationGetDescr(pg_partition), values, nulls);
+
+ /*
+ * insert the tuple into the pg_partitioned_rel catalog
+ */
+ simple_heap_insert(pg_partition, tuple);
+
+ /* update the indexes on pg_partitioned_rel */
+ CatalogUpdateIndexes(pg_partition, tuple);
+
+ /* Store a dependency - drop the key when the relation is dropped */
+ myself.classId = PartitionRelationId;
+ myself.objectId = RelationGetRelid(childrel);
+ myself.objectSubId = 0;
+
+ target.classId = RelationRelationId;
+ target.objectId = RelationGetRelid(childrel);
+ target.objectSubId = 0;
+
+ recordDependencyOn(&myself, &target, DEPENDENCY_AUTO);
+
+ /*
+ * close the relation and free the tuple
+ */
+ heap_close(pg_partition, RowExclusiveLock);
+ heap_freetuple(tuple);
+}
+
+/*
+ * Remove a pg_partition entry
+ */
+void
+RemovePartitionDefByRelId(Oid relid)
+{
+ Relation rel;
+ HeapTuple tuple;
+
+ /* DELETE FROM pg_partitioned_rel WHERE partrelid = :relid */
+ rel = heap_open(PartitionRelationId, RowExclusiveLock);
+ tuple = SearchSysCache(PARTITIONID,
+ ObjectIdGetDatum(relid), 0, 0, 0);
+
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for partition definition of relation %u", relid);
+ simple_heap_delete(rel, &tuple->t_self);
+ ReleaseSysCache(tuple);
+ heap_close(rel, RowExclusiveLock);
+}
diff --git a/src/backend/catalog/objectaddress.c b/src/backend/catalog/objectaddress.c
index d899dd7..b6e8d4e 100644
--- a/src/backend/catalog/objectaddress.c
+++ b/src/backend/catalog/objectaddress.c
@@ -41,6 +41,8 @@
#include "catalog/pg_opclass.h"
#include "catalog/pg_opfamily.h"
#include "catalog/pg_operator.h"
+#include "catalog/pg_partition.h"
+#include "catalog/pg_partitioned_rel.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_policy.h"
#include "catalog/pg_rewrite.h"
diff --git a/src/backend/catalog/pg_partition.c b/src/backend/catalog/pg_partition.c
new file mode 100644
index 0000000..f7f8995
--- /dev/null
+++ b/src/backend/catalog/pg_partition.c
@@ -0,0 +1,135 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_partition.c
+ * routines to support manipulation of the pg_partition relation
+ *
+ * Note: currently, this module only contains inquiry functions; the actual
+ * creation and deletion of pg_partition entries is done in tablecmds.c.
+ * Perhaps someday that code should be moved here, but it'd have to be
+ * disentangled from other stuff such as pg_depend updates.
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/catalog/pg_partition.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "catalog/indexing.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_partition.h"
+#include "catalog/pg_partition_fn.h"
+#include "catalog/pg_type.h"
+#include "parser/parse_type.h"
+#include "storage/lmgr.h"
+#include "utils/array.h"
+#include "utils/rangetypes.h"
+#include "utils/fmgroids.h"
+#include "utils/syscache.h"
+#include "utils/tqual.h"
+#include "utils/typcache.h"
+
+/*
+ * GetPartition
+ *
+ * returns details of a partition of parentrel as read from
+ * pg_partition catalog
+ */
+Partition *
+GetPartition(Relation parentrel, Oid partitionid)
+{
+ HeapTuple tuple;
+ Form_pg_partition form;
+ AttrNumber partattno;
+ int i;
+ int rangenbounds;
+ Datum *rangebounds;
+ Datum datum;
+ bool isnull;
+ Partition *result;
+
+ tuple = SearchSysCache1(PARTITIONID, partitionid);
+
+ /* if no tuple found, it means the entry was just dropped */
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for partition %u",
+ partitionid);
+
+ result = (Partition *) palloc0(sizeof(Partition));
+ result->oid = partitionid;
+
+ form = (Form_pg_partition) GETSTRUCT(tuple);
+
+ datum = SysCacheGetAttr(PARTITIONID, tuple,
+ Anum_pg_partition_partlistvalues, &isnull);
+ if(!isnull)
+ {
+ Oid typid;
+ int16 typlen;
+ bool typbyval;
+ char typalign;
+
+ partattno = parentrel->rd_partattrs[0];
+ typid = parentrel->rd_att->attrs[partattno - 1]->atttypid;
+ typlen = parentrel->rd_att->attrs[partattno - 1]->attlen;
+ typbyval = parentrel->rd_att->attrs[partattno - 1]->attbyval;
+ typalign = parentrel->rd_att->attrs[partattno - 1]->attalign;
+
+ deconstruct_array(DatumGetArrayTypeP(datum),
+ typid, typlen, typbyval, typalign,
+ &result->listvalues, NULL, &result->listnvalues);
+
+ ReleaseSysCache(tuple);
+ return result;
+ }
+
+ /* getting here means we're looking at a range partition */
+ datum = SysCacheGetAttr(PARTITIONID, tuple,
+ Anum_pg_partition_partrangebounds, &isnull);
+
+ /* now, this can't be NULL */
+ Assert(!isnull);
+
+ deconstruct_array(DatumGetArrayTypeP(datum),
+ ANYARRAYOID, -1, false, 'd',
+ &rangebounds, NULL, &result->rangenbounds);
+ /* paranoia */
+ Assert(rangenbounds = parentrel->rd_partnatts);
+
+
+ /* peep into each array to get either of the bounds*/
+ for(i = 0; i < result->rangenbounds; i++)
+ {
+ ArrayType *arr = DatumGetArrayTypeP(rangebounds[i]);
+ Datum *datum;
+ int dummy;
+ Oid typid;
+ int16 typlen;
+ bool typbyval;
+ char typalign;
+
+ partattno = parentrel->rd_partattrs[0];
+ typid = parentrel->rd_att->attrs[partattno - 1]->atttypid;
+ typlen = parentrel->rd_att->attrs[partattno - 1]->attlen;
+ typbyval = parentrel->rd_att->attrs[partattno - 1]->attbyval;
+ typalign = parentrel->rd_att->attrs[partattno - 1]->attalign;
+
+ deconstruct_array(arr, typid, typlen, typbyval, typalign,
+ &datum, NULL, &dummy);
+
+ Assert(dummy == 2);
+
+ result->rangemins[i] = datum[0];
+ result->rangemaxs[i] = datum[1];
+ }
+
+ ReleaseSysCache(tuple);
+ return result;
+}
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index e859669..cf39d4f 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -65,8 +65,6 @@ static void ComputeIndexAttrs(IndexInfo *indexInfo,
char *accessMethodName, Oid accessMethodId,
bool amcanorder,
bool isconstraint);
-static Oid GetIndexOpClass(List *opclass, Oid attrType,
- char *accessMethodName, Oid accessMethodId);
static char *ChooseIndexName(const char *tabname, Oid namespaceId,
List *colnames, List *exclusionOpNames,
bool primary, bool isconstraint);
@@ -1207,9 +1205,9 @@ ComputeIndexAttrs(IndexInfo *indexInfo,
/*
* Resolve possibly-defaulted operator class specification
*/
-static Oid
+Oid
GetIndexOpClass(List *opclass, Oid attrType,
- char *accessMethodName, Oid accessMethodId)
+ const char *accessMethodName, Oid accessMethodId)
{
char *schemaname;
char *opcname;
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index f5d5b63..f44840c 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -37,6 +37,8 @@
#include "catalog/pg_inherits_fn.h"
#include "catalog/pg_namespace.h"
#include "catalog/pg_opclass.h"
+#include "catalog/pg_partition.h"
+#include "catalog/pg_partitioned_rel.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_trigger.h"
#include "catalog/pg_type.h"
@@ -82,6 +84,7 @@
#include "storage/smgr.h"
#include "utils/acl.h"
#include "utils/builtins.h"
+#include "utils/datum.h"
#include "utils/fmgroids.h"
#include "utils/inval.h"
#include "utils/lsyscache.h"
@@ -92,6 +95,7 @@
#include "utils/syscache.h"
#include "utils/tqual.h"
#include "utils/typcache.h"
+#include "utils/rangetypes.h"
/*
@@ -416,6 +420,9 @@ static void ATExecReplicaIdentity(Relation rel, ReplicaIdentityStmt *stmt, LOCKM
static void ATExecGenericOptions(Relation rel, List *options);
static void ATExecEnableRowSecurity(Relation rel);
static void ATExecDisableRowSecurity(Relation rel);
+static void ATExecAttachPartition(Relation parentrel,
+ PartitionDef *partition,
+ LOCKMODE lockmode);
static void copy_relation_data(SMgrRelation rel, SMgrRelation dst,
ForkNumber forkNum, char relpersistence);
@@ -425,6 +432,18 @@ static void RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid,
Oid oldRelOid, void *arg);
static void RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid,
Oid oldrelid, void *arg);
+static void ComputePartitionAttrs(Oid relId, PartitionBy *partby,
+ AttrNumber *partKeyAttrNumbers,
+ Oid *partClassOids);
+static Datum *EvalPartitionValues(Relation parentrel,
+ PartitionDef *partition,
+ int *listnvalues);
+static Datum *evaluateListValues(List *values, NameData attname, Oid typid,
+ int32 typmod, int16 typlen, bool typbyval,
+ int nvalues);
+static Datum *evaluateRangeBounds(List *rangelbounds,
+ NameData *attname, Oid *typid, int32 *typmod,
+ int16 *typlen, bool *typbyval, int partnatts);
/* ----------------------------------------------------------------
@@ -465,6 +484,9 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId)
AttrNumber attnum;
static char *validnsps[] = HEAP_RELOPT_NAMESPACES;
Oid ofTypeId;
+ int numPartitionAttrs;
+ AttrNumber partKeyAttrNumbers[PARTITION_MAX_KEYS];
+ Oid classObjectId[PARTITION_MAX_KEYS];
/*
* Truncate relname to appropriate length (probably a waste of time, as
@@ -690,6 +712,43 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId)
true, true, false);
/*
+ * Store the partition key into pg_partitioned_rel if one defined.
+ * It consists of table attributes constituting the key and opclass
+ * to use with it.
+ */
+ if(stmt->partitionby)
+ {
+ /*
+ * count attributes in the partition key
+ */
+ numPartitionAttrs = list_length(stmt->partitionby->partcols);
+ if (numPartitionAttrs < 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("must specify at least one column in partition key")));
+ if (numPartitionAttrs > PARTITION_MAX_KEYS)
+ ereport(ERROR,
+ (errcode(ERRCODE_TOO_MANY_COLUMNS),
+ errmsg("cannot use more than %d columns in partition key",
+ INDEX_MAX_KEYS)));
+ if (stmt->partitionby->strategy != 'r' && numPartitionAttrs > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("cannot use more than 1 column in partition key when"
+ " using list partitioning strategy")));
+
+ ComputePartitionAttrs(relationId,
+ stmt->partitionby,
+ partKeyAttrNumbers,
+ classObjectId);
+ StorePartitionKey(rel,
+ numPartitionAttrs,
+ partKeyAttrNumbers,
+ classObjectId,
+ stmt->partitionby->strategy);
+ }
+
+ /*
* Clean up. We keep lock on new relation (although it shouldn't be
* visible to anyone else anyway, until commit).
*/
@@ -2939,6 +2998,15 @@ AlterTableGetLockLevel(List *cmds)
break;
/*
+ * CREATE TABLE ... PARTITION OF <parent> FOR VALUES ...
+ *
+ * XXX: the target of this command is <parent>
+ */
+ case AT_AttachPartition:
+ cmd_lockmode = AccessExclusiveLock;
+ break;
+
+ /*
* These subcommands affect implicit row type conversion. They
* have affects similar to CREATE/DROP CAST on queries. don't
* provide for invalidating parse trees as a result of such
@@ -3305,6 +3373,10 @@ ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd,
/* No command-specific prep needed */
pass = AT_PASS_MISC;
break;
+ case AT_AttachPartition:
+ ATSimplePermissions(rel, ATT_TABLE);
+ pass = AT_PASS_MISC;
+ break;
case AT_GenericOptions:
ATSimplePermissions(rel, ATT_FOREIGN_TABLE);
/* No command-specific prep needed */
@@ -3597,6 +3669,9 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab, Relation rel,
case AT_DisableRowSecurity:
ATExecDisableRowSecurity(rel);
break;
+ case AT_AttachPartition:
+ ATExecAttachPartition(rel, (PartitionDef *) cmd->def, lockmode);
+ break;
case AT_GenericOptions:
ATExecGenericOptions(rel, (List *) cmd->def);
break;
@@ -10722,6 +10797,63 @@ ATExecDisableRowSecurity(Relation rel)
}
/*
+ * ATExecAttachPartition
+ *
+ * CREATE TABLE ... PARTITION OF <parent> FOR VALUES ...
+ */
+static void
+ATExecAttachPartition(Relation parentrel,
+ PartitionDef *partition,
+ LOCKMODE lockmode)
+{
+ Relation childrel;
+ Relation catalogRelation; /* pg_inherits */
+ int listnvalues;
+ Datum *values = NULL;
+
+ childrel = heap_openrv(partition->name, AccessShareLock);
+
+ /* cannot attach a partition to a non-partitioned table */
+ if (!parentrel->rd_rel->relispartitioned)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot be a partition of non-partitioned relation \"%s\"",
+ RelationGetRelationName(parentrel))));
+
+ /* Permanent rels cannot be partitions of temporary ones */
+ if (parentrel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+ childrel->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("a permanent relation cannot be a partition of temporary relation \"%s\"",
+ RelationGetRelationName(parentrel))));
+
+ /* If parent rel is temp, it must belong to this session */
+ if (parentrel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+ !parentrel->rd_islocaltemp)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot be a partition of temporary relation of another session")));
+
+ /* Match up the columns and bump attinhcount as needed */
+ MergeAttributesIntoExisting(childrel, parentrel);
+
+ /* Now validate partition values and store in pg_partition */
+ values = EvalPartitionValues(parentrel, partition, &listnvalues);
+
+ StorePartitionValues(childrel, parentrel, listnvalues, values);
+
+ catalogRelation = heap_open(InheritsRelationId, RowExclusiveLock);
+ StoreCatalogInheritance1(RelationGetRelid(childrel),
+ RelationGetRelid(parentrel),
+ 1,
+ catalogRelation);
+
+ heap_close(catalogRelation, RowExclusiveLock);
+ heap_close(childrel, AccessShareLock);
+}
+
+/*
* ALTER FOREIGN TABLE <name> OPTIONS (...)
*/
static void
@@ -11609,3 +11741,337 @@ RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, Oid oldrelid,
ReleaseSysCache(tuple);
}
+
+/*
+ * Compute per-partition-column information viz. partition key
+ * attribute numbers, opclasses.
+ */
+static void
+ComputePartitionAttrs(Oid relId, PartitionBy *partby,
+ AttrNumber *partKeyAttrNumbers,
+ Oid *partClassOids)
+{
+ int attn;
+ ListCell *lc;
+
+ /* Process partColumns list */
+ attn = 0;
+ foreach(lc, partby->partcols)
+ {
+ PartitionElem *partcol = (PartitionElem *) lfirst(lc);
+ Oid atttype;
+
+ if(partcol->name != NULL)
+ {
+ HeapTuple atttuple;
+ Form_pg_attribute attform;
+
+ atttuple = SearchSysCacheAttName(relId, partcol->name);
+ if (!HeapTupleIsValid(atttuple))
+ {
+ /* difference in error message spellings is historical */
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" named in partition key does not exist",
+ partcol->name)));
+ }
+ attform = (Form_pg_attribute) GETSTRUCT(atttuple);
+ partKeyAttrNumbers[attn] = attform->attnum;
+ atttype = attform->atttypid;
+ ReleaseSysCache(atttuple);
+ }
+
+ /*
+ * Identify the opclass to use. At the moment, we use "btree" indexable
+ * operators for simpler cases of partitioning where we compare key column
+ * values with partition bound values with just these set of operators.
+ */
+ partClassOids[attn++] = GetIndexOpClass(partcol->opclass,
+ atttype,
+ "btree",
+ BTREE_AM_OID);
+ }
+}
+
+/*
+ * EvalPartitionValues
+ *
+ * validate and evalulate partition values in a partition definition
+ */
+static Datum *
+EvalPartitionValues(Relation parentrel,
+ PartitionDef *partition,
+ int *listnvalues)
+{
+ PartitionValues *values;
+ Datum *result;
+ Datum *rangelbounds;
+ Datum *rangeubounds;
+ int partnatts;
+ int rngnlvalues, rngnuvalues;
+ AttrNumber partattno;
+ int i;
+ Oid typid[PARTITION_MAX_KEYS];
+ int32 typmod[PARTITION_MAX_KEYS];
+ int16 typlen[PARTITION_MAX_KEYS];
+ char typalign[PARTITION_MAX_KEYS];
+ bool typbyval[PARTITION_MAX_KEYS];
+ NameData attname[PARTITION_MAX_KEYS];
+
+ Assert(parentrel->rd_rel->relispartitioned);
+
+ /* get the parent's tuple descriptor */
+ partnatts = parentrel->rd_partnatts;
+
+ /*
+ * collect type info for partition key attributes
+ */
+ for(i = 0; i < partnatts; i++)
+ {
+ partattno = parentrel->rd_partattrs[i];
+ typid[i] = parentrel->rd_att->attrs[partattno - 1]->atttypid;
+ typmod[i] = parentrel->rd_att->attrs[partattno - 1]->atttypmod;
+ attname[i] = parentrel->rd_att->attrs[partattno - 1]->attname;
+ get_typlenbyvalalign(typid[i], &typlen[i], &typbyval[i], &typalign[i]);
+ }
+
+ /* now transform and evaluate value expressions for this partition */
+ values = partition->values;
+ switch(parentrel->rd_partstrategy)
+ {
+ case 'l':
+ Assert(!(partnatts > 1));
+
+ if(!values->listvalues)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("Specified values in FOR VALUES ... does not"
+ " match the partition key or strategy")));
+
+ *listnvalues = list_length(values->listvalues);
+
+ if(*listnvalues < 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("Must contain at least one value in"
+ " FOR VALUES IN (..)")));
+ /*
+ * Currently only one column is supported here.
+ *
+ * datum[0] - list of allowed values for the only key column
+ */
+
+ result = evaluateListValues(values->listvalues, attname[0],
+ typid[0], typmod[0], typlen[0], typbyval[0],
+ *listnvalues);
+ break;
+
+ case 'r':
+ if(!values->rangelbounds || !values->rangeubounds)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("Specified values in FOR VALUES does not"
+ " match the partition key or strategy")));
+
+ rngnlvalues = list_length(values->rangelbounds);
+ rngnuvalues = list_length(values->rangeubounds);
+
+ if((rngnlvalues < 1) ||
+ (rngnuvalues < 1) ||
+ (rngnlvalues != partnatts) ||
+ (rngnuvalues != partnatts) ||
+ (rngnlvalues != rngnuvalues))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("Specified values in FOR VALUES BETWEEN does not"
+ " match the partition key")));
+
+ /*
+ * both lists have the same number of values equal to
+ * number of columns in the partition key.
+ */
+ rangelbounds = evaluateRangeBounds(values->rangelbounds, attname,
+ typid, typmod, typlen, typbyval, partnatts);
+ rangeubounds = evaluateRangeBounds(values->rangeubounds, attname,
+ typid, typmod, typlen, typbyval, partnatts);
+
+ result = (Datum *) palloc0(partnatts * sizeof(Datum));
+ for(i = 0; i < partnatts; i++)
+ {
+ Datum datum[2];
+ ArrayType *rangebounds;
+
+ datum[0] = rangelbounds[i];
+ datum[1] = rangeubounds[i];
+
+ rangebounds = construct_array(datum, 2, typid[i],
+ typlen[i], typbyval[i], typalign[i]);
+
+ result[i] = PointerGetDatum(rangebounds);
+ }
+ pfree(rangelbounds);
+ pfree(rangeubounds);
+ break;
+ }
+
+ return result;
+}
+
+/*
+ * evaluateListValues
+ *
+ * evaluate a list of expressions to build a datum array
+ */
+static Datum *
+evaluateListValues(List *values, NameData attname,
+ Oid typid, int32 typmod, int16 typlen, bool typbyval,
+ int nvalues)
+{
+ ListCell *cell;
+ ParseState *pstate;
+ EState *estate;
+ ExprContext *ecxt;
+ int i;
+ Datum *datum;
+
+ datum = (Datum *) palloc(nvalues * sizeof(Datum));
+ pstate = make_parsestate(NULL);
+ estate = CreateExecutorState();
+ ecxt = GetPerTupleExprContext(estate);
+
+ i = 0;
+ foreach(cell, values)
+ {
+ Node *value = (Node *) lfirst(cell);
+ bool isnull;
+ ExprState *expr;
+ MemoryContext oldcxt;
+ Oid valuetype;
+
+ oldcxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
+
+ /* commented since no longer necessary to transform again */
+ //value = transformExpr(pstate, value,
+ // EXPR_KIND_PARTITION_VALUES);
+
+ valuetype = exprType(value);
+ value = coerce_to_target_type(NULL,
+ value, valuetype,
+ typid, typmod,
+ COERCION_ASSIGNMENT,
+ COERCE_IMPLICIT_CAST,
+ -1);
+ if (value == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("partition key column \"%s\" is of type %s"
+ " but specified value is of type %s",
+ NameStr(attname),
+ format_type_be(typid),
+ format_type_be(valuetype)),
+ errhint("You will need to rewrite or cast the expression.")));
+
+ expr = ExecPrepareExpr((Expr *) value, estate);
+
+ datum[i] = ExecEvalExpr(expr, ecxt, &isnull, NULL);
+ if (isnull)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("a partition value must not be NULL")));
+
+ MemoryContextSwitchTo(oldcxt);
+
+ if (!typbyval)
+ {
+ if (typlen == -1)
+ datum[i] = PointerGetDatum(PG_DETOAST_DATUM_COPY(datum[i]));
+ else
+ datum[i] = datumCopy(datum[i], false, typlen);
+ }
+
+ ResetPerTupleExprContext(estate);
+ i++;
+ }
+
+ return datum;
+}
+
+/*
+ * evaluateRangeBounds
+ *
+ * evaluate a list of expressions to build a datum array for a range bound
+ */
+static Datum *
+evaluateRangeBounds(List *values, NameData *attname,
+ Oid *typid, int32 *typmod, int16 *typlen, bool *typbyval,
+ int partnatts)
+{
+ ListCell *cell;
+ ParseState *pstate;
+ EState *estate;
+ ExprContext *ecxt;
+ int i;
+ Datum *datum;
+
+ datum = (Datum *) palloc(partnatts * sizeof(Datum));
+ pstate = make_parsestate(NULL);
+ estate = CreateExecutorState();
+ ecxt = GetPerTupleExprContext(estate);
+
+ i = 0;
+ foreach(cell, values)
+ {
+ Node *value = (Node *) lfirst(cell);
+ bool isnull;
+ ExprState *expr;
+ MemoryContext oldcxt;
+ Oid valuetype;
+
+ oldcxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
+
+ /* commented since no longer necessary to transform again */
+ //value = transformExpr(pstate, value,
+ // EXPR_KIND_PARTITION_VALUES);
+
+ valuetype = exprType(value);
+
+ value = coerce_to_target_type(NULL,
+ value, valuetype,
+ typid[i], typmod[i],
+ COERCION_ASSIGNMENT,
+ COERCE_IMPLICIT_CAST,
+ -1);
+ if (value == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("partition key column \"%s\" is of type %s"
+ " but specified value is of type %s",
+ NameStr(attname[i]),
+ format_type_be(typid[i]),
+ format_type_be(valuetype)),
+ errhint("You will need to rewrite or cast the expression.")));
+
+ expr = ExecPrepareExpr((Expr *) value, estate);
+
+ datum[i] = ExecEvalExpr(expr, ecxt, &isnull, NULL);
+ if (isnull)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("a partition value must not be NULL")));
+
+ MemoryContextSwitchTo(oldcxt);
+
+ if (!typbyval[i])
+ {
+ if (typlen[i] == -1)
+ datum[i] = PointerGetDatum(PG_DETOAST_DATUM_COPY(datum[i]));
+ else
+ datum[i] = datumCopy(datum[i], false, typlen[i]);
+ }
+
+ ResetPerTupleExprContext(estate);
+ i++;
+ }
+
+ return datum;
+}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 5282a4f..51abd2c 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -2431,6 +2431,53 @@ _copyColumnDef(const ColumnDef *from)
return newnode;
}
+static PartitionBy *
+_copyPartitionBy(const PartitionBy *from)
+{
+
+ PartitionBy *newnode = makeNode(PartitionBy);
+
+ COPY_SCALAR_FIELD(strategy);
+ COPY_NODE_FIELD(partcols);
+
+ return newnode;
+}
+
+static PartitionElem *
+_copyPartitionElem(const PartitionElem *from)
+{
+ PartitionElem *newnode = makeNode(PartitionElem);
+
+ COPY_STRING_FIELD(name);
+ COPY_NODE_FIELD(opclass);
+
+ return newnode;
+}
+
+static PartitionValues *
+_copyPartitionValues(const PartitionValues *from)
+{
+ PartitionValues *newnode = makeNode(PartitionValues);
+
+ COPY_NODE_FIELD(listvalues);
+ COPY_NODE_FIELD(rangelbounds);
+ COPY_NODE_FIELD(rangeubounds);
+
+ return newnode;
+}
+
+static PartitionDef *
+_copyPartitionDef(const PartitionDef *from)
+{
+ PartitionDef *newnode = makeNode(PartitionDef);
+
+ COPY_NODE_FIELD(name);
+ COPY_NODE_FIELD(parent);
+ COPY_NODE_FIELD(values);
+
+ return newnode;
+}
+
static Constraint *
_copyConstraint(const Constraint *from)
{
@@ -2806,8 +2853,11 @@ static void
CopyCreateStmtFields(const CreateStmt *from, CreateStmt *newnode)
{
COPY_NODE_FIELD(relation);
+ COPY_NODE_FIELD(partitionOf);
COPY_NODE_FIELD(tableElts);
COPY_NODE_FIELD(inhRelations);
+ COPY_NODE_FIELD(partValues);
+ COPY_NODE_FIELD(partitionby);
COPY_NODE_FIELD(ofTypename);
COPY_NODE_FIELD(constraints);
COPY_NODE_FIELD(options);
@@ -4694,6 +4744,18 @@ copyObject(const void *from)
case T_ColumnDef:
retval = _copyColumnDef(from);
break;
+ case T_PartitionBy:
+ retval = _copyPartitionBy(from);
+ break;
+ case T_PartitionElem:
+ retval = _copyPartitionElem(from);
+ break;
+ case T_PartitionValues:
+ retval = _copyPartitionValues(from);
+ break;
+ case T_PartitionDef:
+ retval = _copyPartitionDef(from);
+ break;
case T_Constraint:
retval = _copyConstraint(from);
break;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index fe509b0..66f04a0 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -1102,8 +1102,11 @@ static bool
_equalCreateStmt(const CreateStmt *a, const CreateStmt *b)
{
COMPARE_NODE_FIELD(relation);
+ COMPARE_NODE_FIELD(partitionOf);
COMPARE_NODE_FIELD(tableElts);
COMPARE_NODE_FIELD(inhRelations);
+ COMPARE_NODE_FIELD(partValues);
+ COMPARE_NODE_FIELD(partitionby);
COMPARE_NODE_FIELD(ofTypename);
COMPARE_NODE_FIELD(constraints);
COMPARE_NODE_FIELD(options);
@@ -2268,6 +2271,44 @@ _equalColumnDef(const ColumnDef *a, const ColumnDef *b)
}
static bool
+_equalPartitionBy(const PartitionBy *a, const PartitionBy *b)
+{
+ COMPARE_SCALAR_FIELD(strategy);
+ COMPARE_NODE_FIELD(partcols);
+
+ return true;
+}
+
+static bool
+_equalPartitionElem(const PartitionElem *a, const PartitionElem *b)
+{
+ COMPARE_STRING_FIELD(name);
+ COMPARE_NODE_FIELD(opclass);
+
+ return true;
+}
+
+static bool
+_equalPartitionValues(const PartitionValues *a, const PartitionValues *b)
+{
+ COMPARE_NODE_FIELD(listvalues);
+ COMPARE_NODE_FIELD(rangelbounds);
+ COMPARE_NODE_FIELD(rangeubounds);
+
+ return true;
+}
+
+static bool
+_equalPartitionDef(const PartitionDef *a, const PartitionDef *b)
+{
+ COMPARE_NODE_FIELD(name);
+ COMPARE_NODE_FIELD(parent);
+ COMPARE_NODE_FIELD(values);
+
+ return true;
+}
+
+static bool
_equalConstraint(const Constraint *a, const Constraint *b)
{
COMPARE_SCALAR_FIELD(contype);
@@ -3120,6 +3161,18 @@ equal(const void *a, const void *b)
case T_ColumnDef:
retval = _equalColumnDef(a, b);
break;
+ case T_PartitionBy:
+ retval = _equalPartitionBy(a, b);
+ break;
+ case T_PartitionElem:
+ retval = _equalPartitionElem(a, b);
+ break;
+ case T_PartitionValues:
+ retval = _equalPartitionValues(a, b);
+ break;
+ case T_PartitionDef:
+ retval = _equalPartitionDef(a, b);
+ break;
case T_Constraint:
retval = _equalConstraint(a, b);
break;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 2f417fe..3db5e8b 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -2017,8 +2017,11 @@ static void
_outCreateStmtInfo(StringInfo str, const CreateStmt *node)
{
WRITE_NODE_FIELD(relation);
+ WRITE_NODE_FIELD(partitionOf);
WRITE_NODE_FIELD(tableElts);
WRITE_NODE_FIELD(inhRelations);
+ WRITE_NODE_FIELD(partValues);
+ WRITE_NODE_FIELD(partitionby);
WRITE_NODE_FIELD(ofTypename);
WRITE_NODE_FIELD(constraints);
WRITE_NODE_FIELD(options);
@@ -2258,6 +2261,44 @@ _outIndexElem(StringInfo str, const IndexElem *node)
}
static void
+_outPartitionBy(StringInfo str, const PartitionBy *node)
+{
+ WRITE_NODE_TYPE("PARTITIONBY");
+
+ WRITE_CHAR_FIELD(strategy);
+ WRITE_NODE_FIELD(partcols);
+}
+
+static void
+_outPartitionElem(StringInfo str, const PartitionElem *node)
+{
+ WRITE_NODE_TYPE("PARTITIONELEM");
+
+ WRITE_STRING_FIELD(name);
+ WRITE_NODE_FIELD(opclass);
+}
+
+static void
+_outPartitionValues(StringInfo str, const PartitionValues *node)
+{
+ WRITE_NODE_TYPE("PARTITIONVALUES");
+
+ WRITE_NODE_FIELD(listvalues);
+ WRITE_NODE_FIELD(rangelbounds);
+ WRITE_NODE_FIELD(rangeubounds);
+}
+
+static void
+_outPartitionDef(StringInfo str, const PartitionDef *node)
+{
+ WRITE_NODE_TYPE("PARTITIONDEF");
+
+ WRITE_NODE_FIELD(name);
+ WRITE_NODE_FIELD(parent);
+ WRITE_NODE_FIELD(values);
+}
+
+static void
_outQuery(StringInfo str, const Query *node)
{
WRITE_NODE_TYPE("QUERY");
@@ -3240,6 +3281,18 @@ _outNode(StringInfo str, const void *obj)
case T_IndexElem:
_outIndexElem(str, obj);
break;
+ case T_PartitionBy:
+ _outPartitionBy(str, obj);
+ break;
+ case T_PartitionElem:
+ _outPartitionElem(str, obj);
+ break;
+ case T_PartitionValues:
+ _outPartitionValues(str, obj);
+ break;
+ case T_PartitionDef:
+ _outPartitionDef(str, obj);
+ break;
case T_Query:
_outQuery(str, obj);
break;
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 6c21002..54b3fea 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -222,6 +222,9 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
struct ImportQual *importqual;
InsertStmt *istmt;
VariableSetStmt *vsetstmt;
+ PartitionElem *pelem;
+ PartitionValues *pvalues;
+ PartitionBy *partby;
}
%type <node> stmt schema_stmt
@@ -519,6 +522,11 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
opt_frame_clause frame_extent frame_bound
%type <str> opt_existing_window_name
%type <boolean> opt_if_not_exists
+%type <partby> OptPartitionBy PartitionBy
+%type <list> part_params
+%type <pelem> part_elem
+%type <pvalues> PartitionValues
+%type <list> ValuesList
/*
* Non-keyword token types. These are hard-wired into the "flex" lexer.
@@ -582,7 +590,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
KEY
LABEL LANGUAGE LARGE_P LAST_P LATERAL_P
- LEADING LEAKPROOF LEAST LEFT LEVEL LIKE LIMIT LISTEN LOAD LOCAL
+ LEADING LEAKPROOF LEAST LEFT LEVEL LIKE LIMIT LIST LISTEN LOAD LOCAL
LOCALTIME LOCALTIMESTAMP LOCATION LOCK_P LOCKED LOGGED
MAPPING MATCH MATERIALIZED MAXVALUE MINUTE_P MINVALUE MODE MONTH_P MOVE
@@ -2756,22 +2764,23 @@ copy_generic_opt_arg_list_item:
*****************************************************************************/
CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')'
- OptInherit OptWith OnCommitOption OptTableSpace
+ OptInherit OptPartitionBy OptWith OnCommitOption OptTableSpace
{
CreateStmt *n = makeNode(CreateStmt);
$4->relpersistence = $2;
n->relation = $4;
n->tableElts = $6;
n->inhRelations = $8;
+ n->partitionby = $9;
n->constraints = NIL;
- n->options = $9;
- n->oncommit = $10;
- n->tablespacename = $11;
+ n->options = $10;
+ n->oncommit = $11;
+ n->tablespacename = $12;
n->if_not_exists = false;
$$ = (Node *)n;
}
| CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name '('
- OptTableElementList ')' OptInherit OptWith OnCommitOption
+ OptTableElementList ')' OptInherit OptPartitionBy OptWith OnCommitOption
OptTableSpace
{
CreateStmt *n = makeNode(CreateStmt);
@@ -2779,15 +2788,16 @@ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')'
n->relation = $7;
n->tableElts = $9;
n->inhRelations = $11;
+ n->partitionby = $12;
n->constraints = NIL;
- n->options = $12;
- n->oncommit = $13;
- n->tablespacename = $14;
+ n->options = $13;
+ n->oncommit = $14;
+ n->tablespacename = $15;
n->if_not_exists = true;
$$ = (Node *)n;
}
| CREATE OptTemp TABLE qualified_name OF any_name
- OptTypedTableElementList OptWith OnCommitOption OptTableSpace
+ OptTypedTableElementList OptPartitionBy OptWith OnCommitOption OptTableSpace
{
CreateStmt *n = makeNode(CreateStmt);
$4->relpersistence = $2;
@@ -2795,15 +2805,16 @@ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')'
n->tableElts = $7;
n->ofTypename = makeTypeNameFromNameList($6);
n->ofTypename->location = @6;
+ n->partitionby = $8;
n->constraints = NIL;
- n->options = $8;
- n->oncommit = $9;
- n->tablespacename = $10;
+ n->options = $9;
+ n->oncommit = $10;
+ n->tablespacename = $11;
n->if_not_exists = false;
$$ = (Node *)n;
}
| CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name OF any_name
- OptTypedTableElementList OptWith OnCommitOption OptTableSpace
+ OptTypedTableElementList OptPartitionBy OptWith OnCommitOption OptTableSpace
{
CreateStmt *n = makeNode(CreateStmt);
$7->relpersistence = $2;
@@ -2811,10 +2822,43 @@ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')'
n->tableElts = $10;
n->ofTypename = makeTypeNameFromNameList($9);
n->ofTypename->location = @9;
+ n->partitionby = $11;
n->constraints = NIL;
- n->options = $11;
- n->oncommit = $12;
- n->tablespacename = $13;
+ n->options = $12;
+ n->oncommit = $13;
+ n->tablespacename = $14;
+ n->if_not_exists = true;
+ $$ = (Node *)n;
+ }
+ | CREATE OptTemp TABLE qualified_name PARTITION OF qualified_name
+ PartitionValues OptPartitionBy OptWith OnCommitOption OptTableSpace
+ {
+ CreateStmt *n = makeNode(CreateStmt);
+ $4->relpersistence = $2;
+ n->relation = $4;
+ n->partitionOf = $7;
+ n->partValues = $8;
+ n->partitionby = $9;
+ n->constraints = NIL;
+ n->options = $10;
+ n->oncommit = $11;
+ n->tablespacename = $12;
+ n->if_not_exists = false;
+ $$ = (Node *)n;
+ }
+ | CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name PARTITION OF qualified_name
+ PartitionValues OptPartitionBy OptWith OnCommitOption OptTableSpace
+ {
+ CreateStmt *n = makeNode(CreateStmt);
+ $7->relpersistence = $2;
+ n->relation = $7;
+ n->partitionOf = $10;
+ n->partValues = $11;
+ n->partitionby = $12;
+ n->constraints = NIL;
+ n->options = $13;
+ n->oncommit = $14;
+ n->tablespacename = $15;
n->if_not_exists = true;
$$ = (Node *)n;
}
@@ -2896,6 +2940,7 @@ TypedTableElement:
| TableConstraint { $$ = $1; }
;
+
columnDef: ColId Typename create_generic_options ColQualList
{
ColumnDef *n = makeNode(ColumnDef);
@@ -3357,6 +3402,76 @@ OptInherit: INHERITS '(' qualified_name_list ')' { $$ = $3; }
| /*EMPTY*/ { $$ = NIL; }
;
+/* Optional partition key (PARTITION ON) definition */
+OptPartitionBy: PartitionBy { $$ = $1; }
+ | /*EMPTY*/ { $$ = NULL; }
+ ;
+
+PartitionBy: PARTITION BY RANGE ON '(' part_params ')'
+ {
+ PartitionBy *n = makeNode(PartitionBy);
+
+ n->strategy = PARTITION_STRAT_RANGE;
+ n->partcols = $6;
+
+ $$ = n;
+ }
+ | PARTITION BY LIST ON '(' part_params ')'
+ {
+ PartitionBy *n = makeNode(PartitionBy);
+
+ n->strategy = PARTITION_STRAT_LIST;
+ n->partcols = $6;
+
+ $$ = n;
+ }
+ ;
+
+part_params: part_elem { $$ = list_make1($1); }
+ | part_params ',' part_elem { $$ = lappend($1, $3); }
+ ;
+
+part_elem: ColId opt_class
+ {
+ PartitionElem *n = makeNode(PartitionElem);
+
+ n->name = $1;
+ n->opclass = $2;
+ $$ = n;
+ }
+ ;
+
+/* Definition of a partition */
+PartitionValues:
+ FOR VALUES opt_in '(' ValuesList ')'
+ {
+ PartitionValues *n = makeNode(PartitionValues);
+
+ n->listvalues = $5;
+ n->rangelbounds = NIL;
+ n->rangeubounds = NIL;
+ $$ = n;
+ }
+ | FOR VALUES BETWEEN '(' ValuesList ')' AND '(' ValuesList ')'
+ {
+ PartitionValues *n = makeNode(PartitionValues);
+
+ n->listvalues = NIL;
+ n->rangelbounds = $5;
+ n->rangeubounds = $9;
+ $$ = n;
+ }
+ ;
+
+opt_in: IN_P {}
+ | /*EMPTY*/ {}
+ ;
+
+ValuesList:
+ a_expr { $$ = list_make1($1); }
+ | ValuesList ',' a_expr { $$ = lappend($1, $3); }
+ ;
+
/* WITH (options) is preferred, WITH OIDS and WITHOUT OIDS are legacy forms */
OptWith:
WITH reloptions { $$ = $2; }
@@ -13289,6 +13404,7 @@ unreserved_keyword:
| LAST_P
| LEAKPROOF
| LEVEL
+ | LIST
| LISTEN
| LOAD
| LOCAL
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 7829bcb..4e77256 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -2760,6 +2760,8 @@ ParseExprKindName(ParseExprKind exprKind)
return "EXECUTE";
case EXPR_KIND_TRIGGER_WHEN:
return "WHEN";
+ case EXPR_KIND_PARTITION_VALUES:
+ return "PARTITION FOR VALUES";
/*
* There is intentionally no default: case here, so that the
diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c
index c29f106..acea185 100644
--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -83,6 +83,7 @@ typedef struct
List *alist; /* "after list" of things to do after creating
* the table */
IndexStmt *pkey; /* PRIMARY KEY index, if any */
+ List *partitionElts; /* CREATE TABLE ... PARTITIN OF ... */
} CreateStmtContext;
/* State shared by transformCreateSchemaStmt and its subroutines */
@@ -123,6 +124,11 @@ static void transformConstraintAttrs(CreateStmtContext *cxt,
List *constraintList);
static void transformColumnType(CreateStmtContext *cxt, ColumnDef *column);
static void setSchemaName(char *context_schema, char **stmt_schema_name);
+static void transformPartitionOf(CreateStmtContext *cxt,
+ RangeVar *parent,
+ PartitionValues *values);
+static PartitionValues* transformPartitionValues(CreateStmtContext *cxt,
+ PartitionValues *values);
/*
@@ -217,14 +223,37 @@ transformCreateStmt(CreateStmt *stmt, const char *queryString)
cxt.blist = NIL;
cxt.alist = NIL;
cxt.pkey = NULL;
+ cxt.partitionElts = NIL;
cxt.hasoids = interpretOidsOption(stmt->options, true);
Assert(!stmt->ofTypename || !stmt->inhRelations); /* grammar enforces */
+ Assert(!stmt->ofTypename || !stmt->partitionOf); /* grammar enforces */
+ Assert(!stmt->inhRelations || !stmt->partitionOf); /* grammar enforces */
if (stmt->ofTypename)
transformOfType(&cxt, stmt->ofTypename);
/*
+ * Transform "PARTITION OF <parent> FOR VALUES ..."
+ * 1)
+ * (LIKE <parent> INCLUDING ALL)
+ *
+ * 2) add to cxt->alist, an AlterTableCmd of subtype
+ * AT_AttachPartition on the <parent> which performs:
+ *
+ * ALTER TABLE <thisrel> INHERITS(<parent>), and store the partition
+ * values (FOR VALUES) into catalog pg_partition
+ */
+ if (stmt->partitionOf)
+ {
+ transformPartitionOf(&cxt, stmt->partitionOf, stmt->partValues);
+
+ /* override based on the result of transformPartitionOf */
+ stmt->tableElts = cxt.partitionElts;
+ stmt->relation->schemaname = cxt.relation->schemaname;
+ }
+
+ /*
* Run through each primary element in the table creation clause. Separate
* column defs from constraints, and do preliminary analysis.
*/
@@ -2831,3 +2860,104 @@ setSchemaName(char *context_schema, char **stmt_schema_name)
"different from the one being created (%s)",
*stmt_schema_name, context_schema)));
}
+
+/*
+ * transformPartitionOf
+ *
+ * transform PARTITION OF <parent> into:
+ * (LIKE <parent> INCLUDING ALL)
+ */
+static void
+transformPartitionOf(CreateStmtContext *cxt,
+ RangeVar *parent,
+ PartitionValues *values)
+{
+ TableLikeClause *like;
+ PartitionDef *partition;
+ AlterTableCmd *attachcmd;
+ AlterTableStmt *alter;
+
+ /* Use the same schema as the parent if not specified. */
+ if (cxt->relation->schemaname == NULL)
+ cxt->relation->schemaname = parent->schemaname;
+
+ like = makeNode(TableLikeClause);
+ like->relation = parent;
+ like->options = CREATE_TABLE_LIKE_ALL;
+
+ cxt->partitionElts = list_make1(like);
+
+ /*
+ * following is supposed to perform:
+ * ALTER TABLE <partition> INHERITS(<parent>)
+ *
+ * and arrange to store the values into pg_partition
+ */
+ partition = makeNode(PartitionDef);
+ partition->name = cxt->relation;
+ partition->parent = parent;
+ partition->values = transformPartitionValues(cxt, values);
+
+ attachcmd = makeNode(AlterTableCmd);
+ attachcmd->subtype = AT_AttachPartition;
+ attachcmd->def = (Node *) partition;
+
+ alter = makeNode(AlterTableStmt);
+ alter->relation = parent;
+ alter->cmds = list_make1(attachcmd);
+ alter->relkind = OBJECT_TABLE;
+
+ cxt->alist = lappend(cxt->alist, alter);
+}
+
+/*
+ * transformPartitionValues
+ *
+ * transform partition value as returned by grammar into something
+ * we can evaluate later to store into pg_partition
+ */
+static PartitionValues*
+transformPartitionValues(CreateStmtContext *cxt, PartitionValues *values)
+{
+ ListCell *cell1, *cell2;
+ int len1, len2;
+ PartitionValues *result = (PartitionValues *) makeNode(PartitionValues);
+
+ if(values->listvalues)
+ {
+ foreach(cell1, values->listvalues)
+ {
+ Node *value = (Node *) lfirst(cell1);
+
+ result->listvalues = lappend(result->listvalues,
+ transformExpr(cxt->pstate, value,
+ EXPR_KIND_PARTITION_VALUES));
+ }
+ }
+ else
+ {
+ len1 = list_length(values->rangelbounds);
+ len2 = list_length(values->rangeubounds);
+
+ /* shouldn't get here without this holding true */
+ Assert(len1 == len2);
+
+ forboth(cell1, values->rangelbounds, cell2, values->rangeubounds)
+ {
+ Node *value;
+
+ /* rangelbounds */
+ value = (Node *) lfirst(cell1);
+ result->rangelbounds = lappend(result->rangelbounds,
+ transformExpr(cxt->pstate, value,
+ EXPR_KIND_PARTITION_VALUES));
+ /* rangeubounds */
+ value = (Node *) lfirst(cell2);
+ result->rangeubounds = lappend(result->rangeubounds,
+ transformExpr(cxt->pstate, value,
+ EXPR_KIND_PARTITION_VALUES));
+ }
+ }
+
+ return result;
+}
diff --git a/src/backend/utils/adt/rangetypes.c b/src/backend/utils/adt/rangetypes.c
index c037b05..ded01aa 100644
--- a/src/backend/utils/adt/rangetypes.c
+++ b/src/backend/utils/adt/rangetypes.c
@@ -31,6 +31,9 @@
#include "postgres.h"
#include "access/hash.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "catalog/pg_range.h"
#include "lib/stringinfo.h"
#include "libpq/pqformat.h"
#include "utils/builtins.h"
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 1db4ba8..c1958fd 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -30,6 +30,7 @@
#include <fcntl.h>
#include <unistd.h>
+#include "access/nbtree.h"
#include "access/htup_details.h"
#include "access/multixact.h"
#include "access/reloptions.h"
@@ -46,9 +47,13 @@
#include "catalog/pg_authid.h"
#include "catalog/pg_auth_members.h"
#include "catalog/pg_constraint.h"
+#include "catalog/pg_collation.h"
#include "catalog/pg_database.h"
#include "catalog/pg_namespace.h"
#include "catalog/pg_opclass.h"
+#include "catalog/pg_partition.h"
+#include "catalog/pg_partition_fn.h"
+#include "catalog/pg_partitioned_rel.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_rewrite.h"
#include "catalog/pg_tablespace.h"
@@ -282,7 +287,9 @@ static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
StrategyNumber numSupport);
static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
static void unlink_initfile(const char *initfilename);
-
+static void RelationBuildPartitionKey(Relation relation);
+static void RelationBuildPartitionInfo(Relation relation);
+static int partition_cmp(const void *a, const void *b, void *arg);
/*
* ScanPgRelation
@@ -1055,6 +1062,20 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
else
relation->rd_rsdesc = NULL;
+ if (relation->rd_rel->relispartitioned)
+ {
+ RelationBuildPartitionKey(relation);
+ RelationBuildPartitionInfo(relation);
+ }
+ else
+ {
+ relation->rd_partnatts = 0;
+ relation->rd_partstrategy = '\0';
+ relation->rd_partattrs = NULL;
+ relation->rd_partclass = NULL;
+ relation->rd_partitioninfo = NULL;
+ }
+
/*
* if it's an index, initialize index-related information
*/
@@ -2014,6 +2035,12 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc)
bms_free(relation->rd_keyattr);
bms_free(relation->rd_idattr);
FreeTriggerDesc(relation->trigdesc);
+ if(relation->rd_partattrs)
+ pfree(relation->rd_partattrs);
+ if(relation->rd_partclass)
+ pfree(relation->rd_partclass);
+ if(relation->rd_partattcmpfn)
+ pfree(relation->rd_partattcmpfn);
if (relation->rd_options)
pfree(relation->rd_options);
if (relation->rd_indextuple)
@@ -2026,6 +2053,8 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc)
MemoryContextDelete(relation->rd_rulescxt);
if (relation->rd_rsdesc)
MemoryContextDelete(relation->rd_rsdesc->rscxt);
+ if (relation->rd_partitioninfo)
+ MemoryContextDelete(relation->rd_partitioninfo->picxt);
if (relation->rd_fdwroutine)
pfree(relation->rd_fdwroutine);
pfree(relation);
@@ -4439,6 +4468,271 @@ RelationGetExclusionInfo(Relation indexRelation,
MemoryContextSwitchTo(oldcxt);
}
+/*
+ * RelationBuildPartitionKey
+ *
+ * Initializes rd_partattrs and rd_partclass
+ */
+static void
+RelationBuildPartitionKey(Relation relation)
+{
+ HeapTuple tuple;
+ Form_pg_partitioned_rel prelform;
+ int partnatts;
+ char partstrategy;
+ oidvector *partclass;
+ int i;
+ Datum datum;
+ bool isnull;
+ MemoryContext oldcxt;
+ StrategyNumber stratno;
+
+ tuple = SearchSysCache1(PARTITIONEDRELID,
+ RelationGetRelid(relation));
+
+ /* if no tuple found, it means the entry was just dropped */
+ if (!HeapTupleIsValid(tuple))
+ return;
+
+ prelform = (Form_pg_partitioned_rel) GETSTRUCT(tuple);
+ partnatts = prelform->partnatts;
+ partstrategy = prelform->partstrategy;
+ /* Extract partclass from the pg_partitioned_rel tuple */
+ datum = SysCacheGetAttr(PARTITIONEDRELID, tuple,
+ Anum_pg_partitioned_rel_partclass, &isnull);
+ Assert(!isnull);
+ partclass = (oidvector *) DatumGetPointer(datum);
+
+ oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+ relation->rd_partnatts = partnatts;
+ relation->rd_partstrategy = partstrategy;
+ relation->rd_partattrs = (AttrNumber *) palloc(partnatts * sizeof(AttrNumber));
+ relation->rd_partclass = (Oid *) palloc(partnatts * sizeof(Oid));
+ relation->rd_partattcmpfn = (FmgrInfo *) palloc(partnatts * sizeof(FmgrInfo));
+
+ MemoryContextSwitchTo(oldcxt);
+
+ switch(partstrategy)
+ {
+ case 'l':
+ stratno = BTEqualStrategyNumber;
+ break;
+
+ case 'r':
+ stratno = BTLessStrategyNumber;
+ break;
+ }
+
+ for(i = 0; i < partnatts; i++)
+ {
+ HeapTuple tuple;
+ Form_pg_opclass form;
+ AttrNumber partattno;
+ Oid cmpid;
+ Oid opfamily;
+ Oid opcintype;
+ Oid typid;
+
+ relation->rd_partattrs[i] = (AttrNumber) prelform->partkey.values[i];
+ relation->rd_partclass[i] = (Oid) partclass->values[i];
+
+ tuple = SearchSysCache(CLAOID,
+ ObjectIdGetDatum(relation->rd_partclass[i]),
+ 0, 0, 0);
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for opclass %u",
+ relation->rd_partclass[i]);
+ form = (Form_pg_opclass) GETSTRUCT(tuple);
+ opfamily = form->opcfamily;
+ opcintype = form->opcintype;
+ ReleaseSysCache(tuple);
+
+ partattno = relation->rd_partattrs[i];
+ typid = relation->rd_att->attrs[partattno - 1]->atttypid;
+
+ cmpid = get_opfamily_proc(opfamily,
+ (opcintype != InvalidOid ? opcintype : typid),
+ (opcintype != InvalidOid ? opcintype : typid),
+ BTORDER_PROC);
+
+ fmgr_info(cmpid, &relation->rd_partattcmpfn[i]);
+ }
+
+ ReleaseSysCache(tuple);
+}
+
+static int
+partition_cmp(const void *a, const void *b, void *arg)
+{
+ const Partition *lhs = *(const Partition **) a;
+ const Partition *rhs = *(const Partition **) b;
+ FmgrInfo *cmpfn = (FmgrInfo *) arg;
+ int i, result;
+
+ for (i = 0; i < lhs->rangenbounds; i++)
+ {
+ result = DatumGetInt32(FunctionCall2Coll(&cmpfn[i],
+ DEFAULT_COLLATION_OID,
+ lhs->rangemaxs[i], rhs->rangemaxs[i]));
+
+ /* consider multicolumn range partitions */
+ if (!result)
+ continue;
+ else
+ return result;
+ }
+
+ /* getting here means a partition being compared with itself */
+ return 0;
+}
+
+/*
+ * RelationBuildPartitions
+ *
+ * Initializes rd_partitions
+ */
+static void
+RelationBuildPartitionInfo(Relation relation)
+{
+ MemoryContext picxt;
+ MemoryContext oldcxt = CurrentMemoryContext;
+ Partition **partitions = NULL;
+ PartitionInfo *volatile pinfo = NULL;
+
+ /*
+ * Create a memory context to hold everything associated with this
+ * relation's partitions. This makes it easy to clean up during a
+ * relcache flush.
+ */
+ picxt = AllocSetContextCreate(CacheMemoryContext,
+ "relation partition info",
+ ALLOCSET_SMALL_MINSIZE,
+ ALLOCSET_SMALL_INITSIZE,
+ ALLOCSET_SMALL_MAXSIZE);
+
+ /*
+ * Since picxt lives under CacheMemoryContext, it is long-lived. Use
+ * a PG_TRY block to ensure it'll get freed if we fail partway through.
+ */
+ PG_TRY();
+ {
+ Relation partitionRel;
+ Form_pg_partition form;
+ SysScanDesc scan;
+ ScanKeyData skey[1];
+ HeapTuple tuple;
+ Oid parentid;
+ List *partitionids = NIL;
+ ListCell *cell;
+ int partnatts = relation->rd_partnatts;
+ FmgrInfo *partcmpfn = relation->rd_partattcmpfn;
+ int numparts = 0;
+ int i, j;
+
+ partitionRel = heap_open(PartitionRelationId, AccessShareLock);
+ ScanKeyInit(&skey[0],
+ Anum_pg_partition_partparent,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(relation)));
+
+ scan = systable_beginscan(partitionRel, PartitionParentIndexId, true,
+ NULL, 1, skey);
+
+ while ((tuple = systable_getnext(scan)) != NULL)
+ {
+ form = (Form_pg_partition) GETSTRUCT(tuple);
+ parentid = form->partparent;
+
+ if (parentid == RelationGetRelid(relation))
+ {
+ partitionids = lappend_oid(partitionids, form->partitionid);
+ numparts++;
+ }
+ }
+ systable_endscan(scan);
+ heap_close(partitionRel, AccessShareLock);
+
+ /* partitions found */
+ if(numparts)
+ {
+ /* read info about all the partitions from the catalog */
+ partitions = (Partition **) palloc0(numparts * sizeof(Partition *));
+
+ i = 0;
+ foreach(cell, partitionids)
+ {
+ Oid partitionid = lfirst_oid(cell);
+
+ partitions[i++] = GetPartition(relation, partitionid);
+ }
+
+ /*
+ * Build a PartitionInfo to put into the relation descriptor
+ */
+ oldcxt = MemoryContextSwitchTo(picxt);
+ pinfo = MemoryContextAllocZero(picxt, sizeof(PartitionInfo));
+ pinfo->picxt = picxt;
+
+ pinfo->numpartitions = numparts;
+ pinfo->strategy = relation->rd_partstrategy;
+ pinfo->oids = (Oid *) palloc0(numparts * sizeof(Oid));
+
+ switch(relation->rd_partstrategy)
+ {
+ case 'r':
+ for(i = 0; i < partnatts; i++)
+ {
+ pinfo->rangemins[i] = (Datum *) palloc0(numparts * sizeof(Datum));
+ pinfo->rangemaxs[i] = (Datum *) palloc0(numparts * sizeof(Datum));
+ }
+
+ /* sort on rangemax using comparator partition_cmp() */
+ qsort_arg(partitions, numparts, sizeof(Partition *),
+ partition_cmp, partcmpfn);
+ for(i = 0; i < numparts; i++)
+ {
+ pinfo->oids[i] = partitions[i]->oid;
+
+ for(j = 0; j < partnatts; j++)
+ {
+ pinfo->rangemins[j][i] = partitions[i]->rangemins[j];
+ pinfo->rangemaxs[j][i] = partitions[i]->rangemaxs[j];
+ }
+ }
+ break;
+
+ case 'l':
+ /* simply copy list partitions; no optimizations devised yet */
+ pinfo->listnvalues = (int *) palloc0(numparts * sizeof(int));
+ pinfo->listvalues = (Datum **) palloc0(numparts * sizeof(Datum *));
+ for(i = 0; i < numparts; i++)
+ {
+ pinfo->oids[i] = partitions[i]->oid;
+ pinfo->listnvalues[i] = partitions[i]->listnvalues;
+ pinfo->listvalues[i] = partitions[i]->listvalues;
+ }
+ break;
+
+ } /* switch(relation->rd_partstrategy) */
+
+ MemoryContextSwitchTo(oldcxt);
+ pfree(partitions);
+ } /* if(numparts) */
+
+ }
+ PG_CATCH();
+ {
+ /* Delete rscxt, first making sure it isn't active */
+ MemoryContextSwitchTo(oldcxt);
+ pfree(partitions);
+ MemoryContextDelete(picxt);
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+
+ /* Success --- attach the partitions to the relation descriptor */
+ relation->rd_partitioninfo = pinfo;
+}
/*
* Routines to support ereport() reports of relation-related errors
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index bd27168..50528b7 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -47,6 +47,8 @@
#include "catalog/pg_opclass.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_opfamily.h"
+#include "catalog/pg_partition.h"
+#include "catalog/pg_partitioned_rel.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_range.h"
#include "catalog/pg_rewrite.h"
@@ -565,6 +567,28 @@ static const struct cachedesc cacheinfo[] = {
},
8
},
+ {PartitionRelationId, /* PARTITIONID */
+ PartitionIdIndexId,
+ 1,
+ {
+ Anum_pg_partition_partitionid,
+ 0,
+ 0,
+ 0
+ },
+ 64
+ },
+ {PartitionedRelRelationId, /* PARTITIONEDRELID */
+ PartitionedRelrelidIndexId,
+ 1,
+ {
+ Anum_pg_partitioned_rel_partrelid,
+ 0,
+ 0,
+ 0
+ },
+ 64
+ },
{ProcedureRelationId, /* PROCNAMEARGSNSP */
ProcedureNameArgsNspIndexId,
3,
diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h
index 6481ac8..d44c5f0 100644
--- a/src/include/catalog/dependency.h
+++ b/src/include/catalog/dependency.h
@@ -148,6 +148,8 @@ typedef enum ObjectClass
OCLASS_EXTENSION, /* pg_extension */
OCLASS_EVENT_TRIGGER, /* pg_event_trigger */
OCLASS_POLICY, /* pg_policy */
+ OCLASS_PARTITIONED_REL, /* pg_partitioned_rel */
+ OCLASS_PARTITION, /* pg_partition */
MAX_OCLASS /* MUST BE LAST */
} ObjectClass;
diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h
index e5c204d..6f7c1a5 100644
--- a/src/include/catalog/heap.h
+++ b/src/include/catalog/heap.h
@@ -130,5 +130,14 @@ extern void CheckAttributeType(const char *attname,
Oid atttypid, Oid attcollation,
List *containing_rowtypes,
bool allow_system_table_mods);
-
+extern void StorePartitionKey(Relation rel, int nattrs,
+ AttrNumber *partKeyAttrNumbers,
+ Oid *partClassOids,
+ char strategy);
+extern void RemovePartitionKeyByRelId(Oid relid);
+extern void RemovePartitionDefByRelId(Oid relid);
+extern void StorePartitionValues(Relation childrel,
+ Relation parentrel,
+ int listnvalues,
+ Datum *datum);
#endif /* HEAP_H */
diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h
index a680229..fc77502 100644
--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@@ -305,6 +305,15 @@ DECLARE_UNIQUE_INDEX(pg_policy_oid_index, 3257, on pg_policy using btree(oid oid
DECLARE_UNIQUE_INDEX(pg_policy_polrelid_polname_index, 3258, on pg_policy using btree(polrelid oid_ops, polname name_ops));
#define PolicyPolrelidPolnameIndexId 3258
+DECLARE_UNIQUE_INDEX(pg_partitioned_rel_partrelid_index, 3278, on pg_partitioned_rel using btree(partrelid oid_ops));
+#define PartitionedRelrelidIndexId 3278
+
+DECLARE_UNIQUE_INDEX(pg_partition_partitionid_index, 3280, on pg_partition using btree(partitionid oid_ops));
+#define PartitionIdIndexId 3280
+
+DECLARE_INDEX(pg_partition_parent_index, 3281, on pg_partition using btree(partparent oid_ops));
+#define PartitionParentIndexId 3281
+
/* last step of initialization script: build the indexes declared above */
BUILD_INDICES
diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h
index 8b4c35c..7ae11a1 100644
--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -65,6 +65,7 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO
bool relhasrules; /* has (or has had) any rules */
bool relhastriggers; /* has (or has had) any TRIGGERs */
bool relhassubclass; /* has (or has had) derived classes */
+ bool relispartitioned; /* has (or has had) partition key */
bool relrowsecurity; /* row security is enabled or not */
bool relispopulated; /* matview currently holds query results */
char relreplident; /* see REPLICA_IDENTITY_xxx constants */
@@ -95,7 +96,7 @@ typedef FormData_pg_class *Form_pg_class;
* ----------------
*/
-#define Natts_pg_class 30
+#define Natts_pg_class 31
#define Anum_pg_class_relname 1
#define Anum_pg_class_relnamespace 2
#define Anum_pg_class_reltype 3
@@ -119,13 +120,14 @@ typedef FormData_pg_class *Form_pg_class;
#define Anum_pg_class_relhasrules 21
#define Anum_pg_class_relhastriggers 22
#define Anum_pg_class_relhassubclass 23
-#define Anum_pg_class_relrowsecurity 24
-#define Anum_pg_class_relispopulated 25
-#define Anum_pg_class_relreplident 26
-#define Anum_pg_class_relfrozenxid 27
-#define Anum_pg_class_relminmxid 28
-#define Anum_pg_class_relacl 29
-#define Anum_pg_class_reloptions 30
+#define Anum_pg_class_relispartitioned 24
+#define Anum_pg_class_relrowsecurity 25
+#define Anum_pg_class_relispopulated 26
+#define Anum_pg_class_relreplident 27
+#define Anum_pg_class_relfrozenxid 28
+#define Anum_pg_class_relminmxid 29
+#define Anum_pg_class_relacl 30
+#define Anum_pg_class_reloptions 31
/* ----------------
* initial contents of pg_class
@@ -140,13 +142,13 @@ typedef FormData_pg_class *Form_pg_class;
* Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId;
* similarly, "1" in relminmxid stands for FirstMultiXactId
*/
-DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f t n 3 1 _null_ _null_ ));
+DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f f t n 3 1 _null_ _null_ ));
DESCR("");
-DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 f f p r 21 0 f f f f f f t n 3 1 _null_ _null_ ));
+DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 f f p r 21 0 f f f f f f f t n 3 1 _null_ _null_ ));
DESCR("");
-DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 27 0 t f f f f f t n 3 1 _null_ _null_ ));
+DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 27 0 t f f f f f f t n 3 1 _null_ _null_ ));
DESCR("");
-DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f t n 3 1 _null_ _null_ ));
+DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 f f p r 31 0 t f f f f f f t n 3 1 _null_ _null_ ));
DESCR("");
diff --git a/src/include/catalog/pg_partition.h b/src/include/catalog/pg_partition.h
new file mode 100644
index 0000000..46a69c2
--- /dev/null
+++ b/src/include/catalog/pg_partition.h
@@ -0,0 +1,61 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_partition.h
+ * definition of the system "partition" relation (pg_partition)
+ * along with the relation's initial contents.
+ *
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ *
+ * $PostgreSQL: pgsql/src/include/catalog/pg_partition.h $
+ *
+ * NOTES
+ * the genbki.sh script reads this file and generates .bki
+ * information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_PARTITION_H
+#define PG_PARTITION_H
+
+#include "catalog/genbki.h"
+
+/* ----------------
+ * pg_partitioned_rel definition. cpp turns this into
+ * typedef struct FormData_pg_partitioned_rel
+ * ----------------
+ */
+#define PartitionRelationId 3279
+
+CATALOG(pg_partition,3279) BKI_WITHOUT_OIDS
+{
+ Oid partitionid; /* partition oid */
+ Oid partparent; /* parent oid */
+
+#ifdef CATALOG_VARLEN /* variable-length fields start here */
+ anyarray partlistvalues; /* list of allowed values of the only
+ * partition column */
+ anyarray partrangebounds; /* list of bounds of ranges of
+ * allowed values per partition key
+ * column */
+#endif
+} FormData_pg_partition;
+
+/* ----------------
+ * Form_pg_partition corresponds to a pointer to a tuple with
+ * the format of pg_partition relation.
+ * ----------------
+ */
+typedef FormData_pg_partition *Form_pg_partition;
+
+/* ----------------
+ * compiler constants for pg_partition
+ * ----------------
+ */
+#define Natts_pg_partition 4
+#define Anum_pg_partition_partitionid 1
+#define Anum_pg_partition_partparent 2
+#define Anum_pg_partition_partlistvalues 3
+#define Anum_pg_partition_partrangebounds 4
+
+#endif /* PG_PARTITION_H */
diff --git a/src/include/catalog/pg_partition_fn.h b/src/include/catalog/pg_partition_fn.h
new file mode 100644
index 0000000..2734e42
--- /dev/null
+++ b/src/include/catalog/pg_partition_fn.h
@@ -0,0 +1,34 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_partition_fn.h
+ * prototypes for functions in catalog/pg_partition.c
+ *
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/catalog/pg_inherits_fn.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_INHERITS_FN_H
+#define PG_INHERITS_FN_H
+
+#include "nodes/pg_list.h"
+#include "storage/lock.h"
+#include "utils/rel.h"
+
+/* Bound info of a single partition */
+typedef struct Partition
+{
+ Oid oid;
+ int listnvalues;
+ int rangenbounds;
+ Datum *listvalues;
+ Datum rangemins[PARTITION_MAX_KEYS];
+ Datum rangemaxs[PARTITION_MAX_KEYS];
+} Partition;
+
+extern Partition *GetPartition(Relation parentrel, Oid partitionid);
+
+#endif /* PG_INHERITS_FN_H */
diff --git a/src/include/catalog/pg_partitioned_rel.h b/src/include/catalog/pg_partitioned_rel.h
new file mode 100644
index 0000000..5dd9d16
--- /dev/null
+++ b/src/include/catalog/pg_partitioned_rel.h
@@ -0,0 +1,62 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_partitioned_rel.h
+ * definition of the system "partitioned" relation (pg_partitioned_rel)
+ * along with the relation's initial contents.
+ *
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ *
+ * $PostgreSQL: pgsql/src/include/catalog/pg_partitioned_rel.h $
+ *
+ * NOTES
+ * the genbki.sh script reads this file and generates .bki
+ * information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_PARTITIONED_REL_H
+#define PG_PARTITIONED_REL_H
+
+#include "catalog/genbki.h"
+
+/* ----------------
+ * pg_partitioned_rel definition. cpp turns this into
+ * typedef struct FormData_pg_partitioned_rel
+ * ----------------
+ */
+#define PartitionedRelRelationId 3277
+
+CATALOG(pg_partitioned_rel,3277) BKI_WITHOUT_OIDS
+{
+ Oid partrelid; /* partitioned table oid */
+ char partstrategy; /* partitioning strategy */
+ int16 partnatts; /* number of partition columns */
+
+ /* variable-length fields start here, but we allow direct access to indkey */
+ int2vector partkey; /* column numbers of partition columns */
+
+#ifdef CATALOG_VARLEN
+ oidvector partclass; /* operator class to compare keys */
+#endif
+} FormData_pg_partitioned_rel;
+
+/* ----------------
+ * Form_pg_partitioned_rel corresponds to a pointer to a tuple with
+ * the format of pg_partitioned_rel relation.
+ * ----------------
+ */
+typedef FormData_pg_partitioned_rel *Form_pg_partitioned_rel;
+
+/* ----------------
+ * compiler constants for pg_partitioned_rel
+ * ----------------
+ */
+#define Natts_pg_partitioned_rel 5
+#define Anum_pg_partitioned_rel_partrelid 1
+#define Anum_pg_partitioned_rel_partstrategy 2
+#define Anum_pg_partitioned_rel_partnatts 3
+#define Anum_pg_partitioned_rel_partkey 4
+#define Anum_pg_partitioned_rel_partclass 5
+
+#endif /* PG_PARTITIONED_REL_H */
diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h
index cf586fe..dd94810 100644
--- a/src/include/commands/defrem.h
+++ b/src/include/commands/defrem.h
@@ -40,6 +40,8 @@ extern bool CheckIndexCompatible(Oid oldId,
List *attributeList,
List *exclusionOpNames);
extern Oid GetDefaultOpClass(Oid type_id, Oid am_id);
+extern Oid GetIndexOpClass(List *opclass, Oid attrType,
+ const char *accessMethodName, Oid accessMethodId);
/* commands/functioncmds.c */
extern Oid CreateFunction(CreateFunctionStmt *stmt, const char *queryString);
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 40fde83..a795b0d 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -369,5 +369,6 @@ extern void RegisterExprContextCallback(ExprContext *econtext,
extern void UnregisterExprContextCallback(ExprContext *econtext,
ExprContextCallbackFunction function,
Datum arg);
-
+extern Relation ExecFindPartition(ResultRelInfo *resultRelInfo,
+ TupleTableSlot *slot);
#endif /* EXECUTOR_H */
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 97ef0fc..1eca560 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -413,6 +413,10 @@ typedef enum NodeTag
T_XmlSerialize,
T_WithClause,
T_CommonTableExpr,
+ T_PartitionBy,
+ T_PartitionElem,
+ T_PartitionValues,
+ T_PartitionDef,
/*
* TAGS FOR REPLICATION GRAMMAR PARSE NODES (replnodes.h)
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index ac13302..a3abfcb 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -654,6 +654,62 @@ typedef struct XmlSerialize
int location; /* token location, or -1 if unknown */
} XmlSerialize;
+/*
+ * Partitioning related definitions
+ */
+
+/*
+ * PartitionElem - partition key columns (used in PARTITION BY ... ON
+ * of CREATE TABLE)
+ *
+ * 'name' is the name of the table column to use as part of the partition
+ * key of the table and 'opclass' is the operator class to be used with
+ * this column.
+ */
+typedef struct PartitionElem
+{
+ NodeTag type;
+ char *name; /* name of column to partition on, or NULL */
+ List *opclass; /* name of desired opclass; NIL = default */
+} PartitionElem;
+
+/*
+ * PartitionBy - partition key definition including the strategy
+ */
+#define PARTITION_STRAT_LIST 'l'
+#define PARTITION_STRAT_RANGE 'r'
+
+typedef struct PartitionBy
+{
+ NodeTag type;
+ char strategy;
+ List *partcols;
+} PartitionBy;
+
+/*
+ * PartitionValues - partition bounding values
+ *
+ * Currently, notions of list of values (for a single column)
+ * and range of values (for multiple columns) are supported
+ */
+typedef struct PartitionValues
+{
+ NodeTag type;
+ List *listvalues;
+ List *rangelbounds;
+ List *rangeubounds;
+} PartitionValues;
+
+/*
+ * PartitionDef - a single partition definition
+ */
+typedef struct PartitionDef
+{
+ NodeTag type;
+ RangeVar *name;
+ RangeVar *parent;
+ PartitionValues *values;
+} PartitionDef;
/****************************************************************************
* Nodes for a Query tree
@@ -1347,6 +1403,7 @@ typedef enum AlterTableType
AT_ReplicaIdentity, /* REPLICA IDENTITY */
AT_EnableRowSecurity, /* ENABLE ROW SECURITY */
AT_DisableRowSecurity, /* DISABLE ROW SECURITY */
+ AT_AttachPartition, /* PARTITION OF parent FOR VALUES */
AT_GenericOptions /* OPTIONS (...) */
} AlterTableType;
@@ -1574,9 +1631,13 @@ typedef struct CreateStmt
{
NodeTag type;
RangeVar *relation; /* relation to create */
+ RangeVar *partitionOf; /* relation to create */
List *tableElts; /* column definitions (list of ColumnDef) */
List *inhRelations; /* relations to inherit from (list of
* inhRelation) */
+ PartitionValues *partValues; /* partition definition including
+ * the parent and bounding values */
+ PartitionBy *partitionby; /* partition key definition */
TypeName *ofTypename; /* OF typename */
List *constraints; /* constraints (list of Constraint nodes) */
List *options; /* options from WITH clause */
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index 7c243ec..26764ed 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -223,6 +223,7 @@ PG_KEYWORD("left", LEFT, TYPE_FUNC_NAME_KEYWORD)
PG_KEYWORD("level", LEVEL, UNRESERVED_KEYWORD)
PG_KEYWORD("like", LIKE, TYPE_FUNC_NAME_KEYWORD)
PG_KEYWORD("limit", LIMIT, RESERVED_KEYWORD)
+PG_KEYWORD("list", LIST, UNRESERVED_KEYWORD)
PG_KEYWORD("listen", LISTEN, UNRESERVED_KEYWORD)
PG_KEYWORD("load", LOAD, UNRESERVED_KEYWORD)
PG_KEYWORD("local", LOCAL, UNRESERVED_KEYWORD)
diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h
index 3103b71..bb6e005 100644
--- a/src/include/parser/parse_node.h
+++ b/src/include/parser/parse_node.h
@@ -63,7 +63,8 @@ typedef enum ParseExprKind
EXPR_KIND_INDEX_PREDICATE, /* index predicate */
EXPR_KIND_ALTER_COL_TRANSFORM, /* transform expr in ALTER COLUMN TYPE */
EXPR_KIND_EXECUTE_PARAMETER, /* parameter value in EXECUTE */
- EXPR_KIND_TRIGGER_WHEN /* WHEN condition in CREATE TRIGGER */
+ EXPR_KIND_TRIGGER_WHEN, /* WHEN condition in CREATE TRIGGER */
+ EXPR_KIND_PARTITION_VALUES /* FOR VALUES in CREATE TABLE ... PARTITION OF */
} ParseExprKind;
diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h
index 5cfc0ae..802022d 100644
--- a/src/include/pg_config_manual.h
+++ b/src/include/pg_config_manual.h
@@ -46,6 +46,12 @@
#define INDEX_MAX_KEYS 32
/*
+ * Maximum number of columns in a partition key. This is entirely arbitrary
+ * at this point
+ */
+#define PARTITION_MAX_KEYS 16
+
+/*
* Set the upper and lower bounds of sequence values.
*/
#define SEQ_MAXVALUE INT64CONST(0x7FFFFFFFFFFFFFFF)
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 6bd786d..6fa11e6 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -45,7 +45,6 @@ typedef struct LockInfoData
typedef LockInfoData *LockInfo;
-
/*
* Cached lookup information for the frequently used index access method
* functions, defined by the pg_am row associated with an index relation.
@@ -63,6 +62,18 @@ typedef struct RelationAmInfo
FmgrInfo amcanreturn;
} RelationAmInfo;
+/* Details of partitions of a partitioned relation */
+typedef struct PartitionInfo
+{
+ MemoryContext picxt;
+ int numpartitions;
+ char strategy;
+ Oid *oids;
+ int *listnvalues;
+ Datum **listvalues;
+ Datum *rangemins[PARTITION_MAX_KEYS];
+ Datum *rangemaxs[PARTITION_MAX_KEYS];
+} PartitionInfo;
/*
* Here are the contents of a relation cache entry.
@@ -118,6 +129,17 @@ typedef struct RelationData
Bitmapset *rd_keyattr; /* cols that can be ref'd by foreign keys */
Bitmapset *rd_idattr; /* included in replica identity index */
+ /* Partition key info */
+ int rd_partnatts; /* number of partition key attributes */
+ char rd_partstrategy;/* list or range partitions */
+ AttrNumber *rd_partattrs; /* partition key attributes */
+ Oid *rd_partclass; /* OIDs of the opclass for each key attribute */
+ FmgrInfo *rd_partattcmpfn; /* compare functions for each key attribute */
+
+ /* Partition info */
+ PartitionInfo *rd_partitioninfo; /* Partitions making up this partitioned
+ * relation */
+
/*
* rd_options is set whenever rd_rel is loaded into the relcache entry.
* Note that you can NOT look into rd_rel for this data. NULL means "use
diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h
index ba0b090..7377309 100644
--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -72,6 +72,8 @@ enum SysCacheIdentifier
OPEROID,
OPFAMILYAMNAMENSP,
OPFAMILYOID,
+ PARTITIONID,
+ PARTITIONEDRELID,
PROCNAMEARGSNSP,
PROCOID,
RANGETYPE,
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 33b172b..0cac472 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -42,6 +42,7 @@
#include "access/transam.h"
#include "access/xact.h"
#include "catalog/namespace.h"
+#include "catalog/pg_collation.h"
#include "commands/matview.h"
#include "commands/trigger.h"
#include "executor/execdebug.h"
@@ -2702,3 +2703,317 @@ EvalPlanQualEnd(EPQState *epqstate)
epqstate->planstate = NULL;
epqstate->origslot = NULL;
}
+
+/*
+ * ExecFindPartition - find a partition to insert a tuple (in slot)
+ *
+ * XXX - if not found, returns itself. Currently only good for the
+ * range strategy (including multi-column range strategy).
+ */
+Relation
+ExecFindPartition(ResultRelInfo *resultRelInfo, TupleTableSlot *slot)
+{
+ Relation rel = resultRelInfo->ri_RelationDesc;
+ Relation partition;
+
+ /* partition key and bound info */
+ int partnatts = rel->rd_partnatts;
+ AttrNumber *partattrs = rel->rd_partattrs;
+ FmgrInfo *partattcmpfn = rel->rd_partattcmpfn;
+ PartitionInfo *pinfo = rel->rd_partitioninfo;
+
+ FunctionCallInfoData fcinfo;
+
+ /* input tuple to find a home for */
+ Datum *values = slot->tts_values;
+ bool *isnull = slot->tts_isnull;
+
+ AttrNumber attno;
+ AttrNumber finalattno;
+ int attidx;
+ int finalattidx;
+
+ int least, highest;
+ int lobound, hibound;
+ int newlo, newhi;
+ int probe;
+ int cmpval;
+ bool bsearch_aborted;
+
+ int j;
+
+ /* let's get this out of the way */
+ for(attidx = 0; attidx < partnatts; attidx++)
+ {
+ AttrNumber attno = partattrs[attidx];
+
+ if(isnull[attno - 1])
+ ereport(ERROR,
+ (errcode(ERRCODE_NOT_NULL_VIOLATION),
+ errmsg("value in partition key column \"%s\" cannot be null",
+ NameStr(rel->rd_att->attrs[attno - 1]->attname))));
+ }
+
+ /*
+ * Let the binary search begin!
+ * The invariant is kind of complicated because of incorporating
+ * the logic for multi-column range strategy.
+ */
+ attidx = 0;
+ finalattno = partattrs[0];
+ finalattidx = attidx;
+
+ least = lobound = 0;
+ highest = hibound = pinfo->numpartitions - 1;
+
+ newlo = -1;
+ newhi = pinfo->numpartitions;
+
+ bsearch_aborted = false;
+ while(lobound < hibound)
+ {
+ bool isless;
+ probe = (lobound + hibound) / 2;
+
+ attno = partattrs[attidx];
+ InitFunctionCallInfoData(fcinfo, &(partattcmpfn[attidx]), 2,
+ DEFAULT_COLLATION_OID, NULL, NULL);
+ /* tuple(attno) < rangemax(attidx)? */
+ fcinfo.arg[0] = values[attno - 1];
+ fcinfo.arg[1] = pinfo->rangemaxs[attidx][probe];
+ cmpval = DatumGetInt32(FunctionCallInvoke(&fcinfo));
+
+ if(!cmpval)
+ {
+ /*
+ * tuple(attno) == rangemax(attidx).
+ */
+
+ /*
+ * The last attribute matching a rangemax means we are
+ * pretty close to the partition we're looking for - namely
+ * probe+1
+ */
+ if(attidx == partnatts - 1)
+ {
+ /*
+ * memorize to use after the binary search ends
+ */
+ probe = probe + 1;
+
+ if(probe > highest)
+ probe = highest;
+
+ finalattidx = attidx;
+ finalattno = attno;
+ bsearch_aborted = true;
+ break;
+ }
+
+ /*
+ * Before moving on to the next attribute, restrict the binary
+ * search space the sub-range where rangemax(attidx) is the
+ * same. The sub-range consists of partitions newlo..newhi
+ */
+
+ /* look left of probe */
+ j = probe;
+ do
+ {
+ --j;
+
+ if(j < 0)
+ break;
+
+ fcinfo.arg[0] = values[attno - 1];
+ fcinfo.arg[1] = pinfo->rangemaxs[attidx][j];
+ cmpval = DatumGetInt32(FunctionCallInvoke(&fcinfo));
+ } while(!cmpval);
+
+ /* possibly new lobound */
+ newlo = ++j;
+
+ /* look right of probe */
+ j = probe;
+ do
+ {
+ ++j;
+
+ if(j > pinfo->numpartitions - 1)
+ break;
+
+ fcinfo.arg[0] = values[attno - 1];
+ fcinfo.arg[1] = pinfo->rangemaxs[attidx][j];
+ cmpval = DatumGetInt32(FunctionCallInvoke(&fcinfo));
+ } while(!cmpval);
+
+ /* possibly new hibound */
+ newhi = --j;
+
+ /*
+ * We indeed are moving to our next binary search
+ */
+ if(newlo != newhi)
+ {
+ attidx++;
+ lobound = newlo;
+ hibound = newhi;
+ }
+ }
+ else
+ {
+ /*
+ * tuple(attno) != rangemax(attidx).
+ *
+ * Move a level down in the current binary search
+ */
+ isless = (cmpval < 0);
+
+ if (isless)
+ hibound = probe;
+ else
+ lobound = probe + 1;
+
+ /*
+ * Remember newlo, newhi are the sub-range bounds covering the
+ * range of values rangemax[attidx] for which rangemax[attidx-1]
+ * is the same. As determined when attidx was set to its current
+ * value.
+ *
+ * It is however possible that the desired partition may be the
+ * one just after the last one of the partitions newlo..newhi,
+ * or the one just before.
+ */
+ if(lobound >= newhi)
+ {
+ /*
+ * Before concluding so, check if this is not the one
+ */
+ probe = (lobound + hibound) / 2;
+ InitFunctionCallInfoData(fcinfo, &(partattcmpfn[attidx]), 2,
+ DEFAULT_COLLATION_OID, NULL, NULL);
+ fcinfo.arg[0] = values[attno - 1];
+ fcinfo.arg[1] = pinfo->rangemaxs[attidx][probe];
+ cmpval = DatumGetInt32(FunctionCallInvoke(&fcinfo));
+
+ if(cmpval < 0)
+ {
+ InitFunctionCallInfoData(fcinfo,
+ &(partattcmpfn[finalattidx]),
+ 2, DEFAULT_COLLATION_OID, NULL, NULL);
+
+ fcinfo.arg[0] = values[attno - 1];
+ fcinfo.arg[1] = pinfo->rangemins[attidx][probe];
+
+ cmpval = DatumGetInt32(FunctionCallInvoke(&fcinfo));
+
+ if(cmpval >= 0)
+ {
+ /* Yes, this is it, return. */
+ partition = heap_open(pinfo->oids[probe], RowExclusiveLock);
+ return partition;
+ }
+ }
+
+ /*
+ * We are about to abort the bsearch.
+ * Reset attidx.
+ */
+ attidx = 0;
+ attno = partattrs[attidx];
+ probe = newhi + 1;
+
+ if(probe > highest)
+ probe = highest;
+
+ /*
+ * memorize to use after the binary search ends
+ */
+ finalattidx = attidx;
+ finalattno = attno;
+ bsearch_aborted = true;
+ break;
+ }
+ else if(hibound <= newlo)
+ {
+ /*
+ * Before concluding so, check if this is not the one
+ */
+ probe = (lobound + hibound) / 2;
+ InitFunctionCallInfoData(fcinfo, &(partattcmpfn[finalattidx]),
+ 2, DEFAULT_COLLATION_OID, NULL, NULL);
+
+ fcinfo.arg[0] = values[attno - 1];
+ fcinfo.arg[1] = pinfo->rangemins[attidx][probe];
+
+ cmpval = DatumGetInt32(FunctionCallInvoke(&fcinfo));
+
+ if(cmpval >= 0)
+ {
+ /* Yes, this is it, return. */
+ partition = heap_open(pinfo->oids[probe], RowExclusiveLock);
+ return partition;
+ }
+
+ /*
+ * We're about to abort the bsearch.
+ * Reset attidx.
+ */
+ attidx = 0;
+ attno = partattrs[attidx];
+ probe = newlo - 1;
+
+ if(probe < least)
+ probe = least;
+
+ /*
+ * memorize to use after the binary search ends
+ */
+ finalattidx = attidx;
+ finalattno = attno;
+ bsearch_aborted = true;
+ break;
+ }
+ }
+
+ /*
+ * memorize to use after the binary search ends
+ */
+ finalattidx = attidx;
+ finalattno = attno;
+ }
+
+ if(!bsearch_aborted)
+ probe = (lobound + hibound) / 2;
+
+ /* tuple < rangemaxs(probe)? */
+ InitFunctionCallInfoData(fcinfo, &(partattcmpfn[finalattidx]), 2,
+ DEFAULT_COLLATION_OID, NULL, NULL);
+
+ fcinfo.arg[0] = values[finalattno - 1];
+ fcinfo.arg[1] = pinfo->rangemaxs[finalattidx][probe];
+ cmpval = DatumGetInt32(FunctionCallInvoke(&fcinfo));
+
+ /*
+ * So, tuple < rangemaxs(probe).
+ * Is tuple >= rangemins(probe)?
+ */
+ if(cmpval < 0)
+ {
+ InitFunctionCallInfoData(fcinfo, &(partattcmpfn[finalattidx]), 2,
+ DEFAULT_COLLATION_OID, NULL, NULL);
+
+ fcinfo.arg[0] = values[finalattno - 1];
+ fcinfo.arg[1] = pinfo->rangemins[finalattidx][probe];
+
+ cmpval = DatumGetInt32(FunctionCallInvoke(&fcinfo));
+
+ if(cmpval >= 0)
+ {
+ partition = heap_open(pinfo->oids[probe], RowExclusiveLock);
+ return partition;
+ }
+ }
+
+ return rel;
+}
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index f96fb24..5cd24f5 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -168,7 +168,9 @@ ExecInsert(TupleTableSlot *slot,
{
HeapTuple tuple;
ResultRelInfo *resultRelInfo;
+ ResultRelInfo *saved_resultRelInfo;
Relation resultRelationDesc;
+ Relation saved_resultRelationDesc;
Oid newId;
List *recheckIndexes = NIL;
@@ -259,6 +261,33 @@ ExecInsert(TupleTableSlot *slot,
ExecConstraints(resultRelInfo, slot, estate);
/*
+ * a hack/idea to route tuples to a valid partition
+ * if none found, resultRelationDesc remains unchanged
+ *
+ * XXX - should this be before ExecConstraints()?
+ */
+ saved_resultRelationDesc = resultRelationDesc;
+ saved_resultRelInfo = resultRelInfo;
+ if(resultRelationDesc->rd_rel->relispartitioned)
+ {
+ resultRelationDesc = ExecFindPartition(resultRelInfo, slot);
+
+ /* for ExecInsertIndexTuples() */
+ if(resultRelationDesc != saved_resultRelationDesc)
+ {
+ resultRelInfo = makeNode(ResultRelInfo);
+ InitResultRelInfo(resultRelInfo,
+ resultRelationDesc,
+ 1,
+ 0);
+
+ ExecOpenIndices(resultRelInfo);
+
+ estate->es_result_relation_info = resultRelInfo;
+ }
+ }
+
+ /*
* insert the tuple
*
* Note: heap_insert returns the tid (location) of the new tuple in
@@ -273,6 +302,17 @@ ExecInsert(TupleTableSlot *slot,
if (resultRelInfo->ri_NumIndices > 0)
recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
estate);
+
+ /* close the partition heap and reset estate */
+ if(resultRelationDesc != saved_resultRelationDesc)
+ {
+ pfree(resultRelInfo);
+ heap_close(resultRelationDesc, RowExclusiveLock);
+ estate->es_result_relation_info = saved_resultRelInfo;
+ resultRelInfo = saved_resultRelInfo;
+ }
+
+ resultRelationDesc = saved_resultRelationDesc;
}
if (canSetTag)
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers