On 24-02-2015 PM 05:13, Amit Langote wrote: > On 21-01-2015 PM 07:26, Amit Langote wrote: >> >> Ok, I will limit myself to focusing on following things at the moment: >> >> * Provide syntax in CREATE TABLE to declare partition key >> * Provide syntax in CREATE TABLE to declare a table as partition of a >> partitioned table and values it contains >> * Arrange to have partition key and values stored in appropriate >> catalogs (existing or new) >> * Arrange to cache partitioning info of partitioned tables in relcache >> > > Here is an experimental patch that attempts to implement this.
I divided the patch into two for convenience: 1) 0001_partition_syntax_catalog - adds commands, catalog and partitioned table relation descriptor related WIP code 2) 0002_tuple-routing-poc - an experimental patch to test how well binary search approach works for tuple routing in ExecInsert(). Please take a look. Thanks, Amit
diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile index a403c64..bf02730 100644 --- a/src/backend/catalog/Makefile +++ b/src/backend/catalog/Makefile @@ -14,7 +14,7 @@ OBJS = catalog.o dependency.o heap.o index.o indexing.o namespace.o aclchk.o \ objectaccess.o objectaddress.o pg_aggregate.o pg_collation.o \ pg_constraint.o pg_conversion.o \ pg_depend.o pg_enum.o pg_inherits.o pg_largeobject.o pg_namespace.o \ - pg_operator.o pg_proc.o pg_range.o pg_db_role_setting.o pg_shdepend.o \ + pg_operator.o pg_partition.o pg_proc.o pg_range.o pg_db_role_setting.o pg_shdepend.o \ pg_type.o storage.o toasting.o BKIFILES = postgres.bki postgres.description postgres.shdescription @@ -41,6 +41,7 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\ pg_foreign_data_wrapper.h pg_foreign_server.h pg_user_mapping.h \ pg_foreign_table.h pg_policy.h \ pg_default_acl.h pg_seclabel.h pg_shseclabel.h pg_collation.h pg_range.h \ + pg_partitioned_rel.h pg_partition.h\ toasting.h indexing.h \ ) diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index bacb242..8bdb34b 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -43,6 +43,8 @@ #include "catalog/pg_opclass.h" #include "catalog/pg_operator.h" #include "catalog/pg_opfamily.h" +#include "catalog/pg_partition.h" +#include "catalog/pg_partitioned_rel.h" #include "catalog/pg_policy.h" #include "catalog/pg_proc.h" #include "catalog/pg_rewrite.h" @@ -157,7 +159,9 @@ static const Oid object_classes[MAX_OCLASS] = { DefaultAclRelationId, /* OCLASS_DEFACL */ ExtensionRelationId, /* OCLASS_EXTENSION */ EventTriggerRelationId, /* OCLASS_EVENT_TRIGGER */ - PolicyRelationId /* OCLASS_POLICY */ + PolicyRelationId, /* OCLASS_POLICY */ + PartitionedRelRelationId, /* OCLASS_PARTITIONED_REL */ + PartitionRelationId /* OCLASS_PARTITION */ }; @@ -1265,6 +1269,14 @@ doDeletion(const ObjectAddress *object, int flags) RemovePolicyById(object->objectId); break; + case OCLASS_PARTITIONED_REL: + RemovePartitionKeyByRelId(object->objectId); + break; + + case OCLASS_PARTITION: + RemovePartitionDefByRelId(object->objectId); + break; + default: elog(ERROR, "unrecognized object class: %u", object->classId); @@ -2373,6 +2385,12 @@ getObjectClass(const ObjectAddress *object) case PolicyRelationId: return OCLASS_POLICY; + + case PartitionedRelRelationId: + return OCLASS_PARTITIONED_REL; + + case PartitionRelationId: + return OCLASS_PARTITION; } /* shouldn't get here */ diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 17f7266..0308c0b 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -47,6 +47,8 @@ #include "catalog/pg_foreign_table.h" #include "catalog/pg_inherits.h" #include "catalog/pg_namespace.h" +#include "catalog/pg_partition.h" +#include "catalog/pg_partitioned_rel.h" #include "catalog/pg_statistic.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_type.h" @@ -110,6 +112,7 @@ static Node *cookConstraint(ParseState *pstate, Node *raw_constraint, char *relname); static List *insert_ordered_unique_oid(List *list, Oid datum); +static void SetRelationIsPartitioned(Oid relationId, bool relispartitioned); /* ---------------------------------------------------------------- @@ -2968,3 +2971,306 @@ insert_ordered_unique_oid(List *list, Oid datum) lappend_cell_oid(list, prev, datum); return list; } + +/* + * StorePartitionKey + * + * Store the partition key of relation rel into system catalog + * pg_partitioned_rel + */ +void +StorePartitionKey(Relation rel, int nattrs, + AttrNumber *partKeyAttrNumbers, + Oid *partClassOids, + char strategy) +{ + int i; + int2vector *partkey; + oidvector *partclass; + Datum values[Natts_pg_partitioned_rel]; + bool nulls[Natts_pg_partitioned_rel]; + Relation pg_partitioned_rel; + HeapTuple tuple; + ObjectAddress myself; + ObjectAddress target; + + /* + * this check is currently unused code because we only allow defining + * a partition key in CREATE TABLE at the moment + */ + tuple = SearchSysCache(PARTITIONEDRELID, ObjectIdGetDatum(RelationGetRelid(rel)), 0, 0, 0); + if (HeapTupleIsValid(tuple)) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("multiple partition keys for table \"%s\" are not allowed", + RelationGetRelationName(rel)))); + ReleaseSysCache(tuple); + } + + /* + * Copy the partition key, opclass info into arrays (should we + * make the caller pass them like this to start with?) + */ + partkey = buildint2vector(partKeyAttrNumbers, nattrs); + partclass = buildoidvector(partClassOids, nattrs); + + /* + * open the system catalog partitioned_rel relation + */ + pg_partitioned_rel = heap_open(PartitionedRelRelationId, RowExclusiveLock); + + /* + * Build a pg_partitioned_rel tuple + */ + MemSet(nulls, false, sizeof(nulls)); + + values[Anum_pg_partitioned_rel_partrelid - 1] = ObjectIdGetDatum(RelationGetRelid(rel)); + values[Anum_pg_partitioned_rel_partstrategy - 1] = CharGetDatum(strategy); + values[Anum_pg_partitioned_rel_partnatts - 1] = Int16GetDatum(nattrs); + values[Anum_pg_partitioned_rel_partkey - 1] = PointerGetDatum(partkey); + values[Anum_pg_partitioned_rel_partclass - 1] = PointerGetDatum(partclass); + + tuple = heap_form_tuple(RelationGetDescr(pg_partitioned_rel), values, nulls); + + /* + * insert the tuple into the pg_partitioned_rel catalog + */ + simple_heap_insert(pg_partitioned_rel, tuple); + + /* update the indexes on pg_partitioned_rel */ + CatalogUpdateIndexes(pg_partitioned_rel, tuple); + + /* Store a dependency - drop the key when the relation is dropped */ + myself.classId = PartitionedRelRelationId; + myself.objectId = RelationGetRelid(rel); + myself.objectSubId = 0; + + target.classId = RelationRelationId; + target.objectId = RelationGetRelid(rel); + target.objectSubId = 0; + + recordDependencyOn(&myself, &target, DEPENDENCY_AUTO); + + /* + * close the relation and free the tuple + */ + heap_close(pg_partitioned_rel, RowExclusiveLock); + heap_freetuple(tuple); + + SetRelationIsPartitioned(RelationGetRelid(rel), true); +} + +/* + * SetRelationIsPartitioned + * Set the value of the relation's relispartitioned field in pg_class. + * + * NOTE: caller must be holding an appropriate lock on the relation. + * ShareUpdateExclusiveLock is sufficient. + * + * NOTE: an important side-effect of this operation is that an SI invalidation + * message is sent out to all backends --- including me --- causing plans + * referencing the relation to be rebuilt with the new list of children. + * This must happen even if we find that no change is needed in the pg_class + * row. + */ +static void +SetRelationIsPartitioned(Oid relationId, bool relispartitioned) +{ + Relation relationRelation; + HeapTuple tuple; + Form_pg_class classtuple; + + /* + * Fetch a modifiable copy of the tuple, modify it, update pg_class. + */ + relationRelation = heap_open(RelationRelationId, RowExclusiveLock); + tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relationId)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", relationId); + classtuple = (Form_pg_class) GETSTRUCT(tuple); + + if (classtuple->relispartitioned != relispartitioned) + { + classtuple->relispartitioned = relispartitioned; + simple_heap_update(relationRelation, &tuple->t_self, tuple); + + /* keep the catalog indexes up to date */ + CatalogUpdateIndexes(relationRelation, tuple); + } + else + { + /* no need to change tuple, but force relcache rebuild anyway */ + CacheInvalidateRelcacheByTuple(tuple); + } + + heap_freetuple(tuple); + heap_close(relationRelation, RowExclusiveLock); +} + +/* + * Remove a pg_partitioned_rel entry + */ +void +RemovePartitionKeyByRelId(Oid relid) +{ + Relation rel; + HeapTuple tuple; + + /* DELETE FROM pg_partitioned_rel WHERE partrelid = :relid */ + rel = heap_open(PartitionedRelRelationId, RowExclusiveLock); + tuple = SearchSysCache(PARTITIONEDRELID, + ObjectIdGetDatum(relid), 0, 0, 0); + + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for partition key of relation %u", relid); + simple_heap_delete(rel, &tuple->t_self); + ReleaseSysCache(tuple); + heap_close(rel, RowExclusiveLock); +} + +/* + * StorePartitionValues + * + * Store partition values of relation rel into system catalog + * pg_partition + */ +void +StorePartitionValues(Relation childrel, + Relation parentrel, + int listnvalues, + Datum *datum) +{ + Relation pg_partition; + HeapTuple tuple; + ArrayType *listvalues = NULL; + ArrayType *rangebounds = NULL; + int i; + AttrNumber partattno; + + ObjectAddress myself; + ObjectAddress target; + + Datum values[Natts_pg_partition]; + bool nulls[Natts_pg_partition]; + + Oid typid[PARTITION_MAX_KEYS]; + int32 typmod[PARTITION_MAX_KEYS]; + int16 typlen[PARTITION_MAX_KEYS]; + bool typbyval[PARTITION_MAX_KEYS]; + char typalign[PARTITION_MAX_KEYS]; + + Assert(parentrel->rd_partstrategy != 'l' + || parentrel->rd_partnatts == 1); + + for(i = 0; i < parentrel->rd_partnatts; i++) + { + partattno = parentrel->rd_partattrs[i]; + typid[i] = parentrel->rd_att->attrs[partattno - 1]->atttypid; + typmod[i] = parentrel->rd_att->attrs[partattno - 1]->atttypmod; + get_typlenbyvalalign(typid[i], &typlen[i], &typbyval[i], &typalign[i]); + } + + /* + * this check is currently *unused* code because we only allow defining + * partition values in CREATE TABLE at the moment (that is, not allowed + * in ALTER TABLE) + */ + tuple = SearchSysCache(PARTITIONID, ObjectIdGetDatum(RelationGetRelid(childrel)), 0, 0, 0); + if (HeapTupleIsValid(tuple)) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("redefining partition values for a partition " + "is not allowed"))); + ReleaseSysCache(tuple); + } + + /* + * open the system catalog partition relation + */ + pg_partition = heap_open(PartitionRelationId, RowExclusiveLock); + + /* + * Build a pg_partition tuple + */ + MemSet(nulls, false, sizeof(nulls)); + + switch(parentrel->rd_partstrategy) + { + case 'l': + listvalues = construct_array(datum, + listnvalues, + typid[0], + typlen[0], + typbyval[0], + typalign[0]); + + nulls[Anum_pg_partition_partrangebounds - 1] = true; + break; + + case 'r': + rangebounds = construct_array(datum, + parentrel->rd_partnatts, + ANYARRAYOID, + -1, + false, + 'd'); + + nulls[Anum_pg_partition_partlistvalues - 1] = true; + break; + } + + values[Anum_pg_partition_partitionid - 1] = ObjectIdGetDatum(RelationGetRelid(childrel)); + values[Anum_pg_partition_partparent - 1] = ObjectIdGetDatum(RelationGetRelid(parentrel)); + values[Anum_pg_partition_partlistvalues - 1] = PointerGetDatum(listvalues); + values[Anum_pg_partition_partrangebounds - 1] = PointerGetDatum(rangebounds); + + tuple = heap_form_tuple(RelationGetDescr(pg_partition), values, nulls); + + /* + * insert the tuple into the pg_partitioned_rel catalog + */ + simple_heap_insert(pg_partition, tuple); + + /* update the indexes on pg_partitioned_rel */ + CatalogUpdateIndexes(pg_partition, tuple); + + /* Store a dependency - drop the key when the relation is dropped */ + myself.classId = PartitionRelationId; + myself.objectId = RelationGetRelid(childrel); + myself.objectSubId = 0; + + target.classId = RelationRelationId; + target.objectId = RelationGetRelid(childrel); + target.objectSubId = 0; + + recordDependencyOn(&myself, &target, DEPENDENCY_AUTO); + + /* + * close the relation and free the tuple + */ + heap_close(pg_partition, RowExclusiveLock); + heap_freetuple(tuple); +} + +/* + * Remove a pg_partition entry + */ +void +RemovePartitionDefByRelId(Oid relid) +{ + Relation rel; + HeapTuple tuple; + + /* DELETE FROM pg_partitioned_rel WHERE partrelid = :relid */ + rel = heap_open(PartitionRelationId, RowExclusiveLock); + tuple = SearchSysCache(PARTITIONID, + ObjectIdGetDatum(relid), 0, 0, 0); + + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for partition definition of relation %u", relid); + simple_heap_delete(rel, &tuple->t_self); + ReleaseSysCache(tuple); + heap_close(rel, RowExclusiveLock); +} diff --git a/src/backend/catalog/objectaddress.c b/src/backend/catalog/objectaddress.c index d899dd7..b6e8d4e 100644 --- a/src/backend/catalog/objectaddress.c +++ b/src/backend/catalog/objectaddress.c @@ -41,6 +41,8 @@ #include "catalog/pg_opclass.h" #include "catalog/pg_opfamily.h" #include "catalog/pg_operator.h" +#include "catalog/pg_partition.h" +#include "catalog/pg_partitioned_rel.h" #include "catalog/pg_proc.h" #include "catalog/pg_policy.h" #include "catalog/pg_rewrite.h" diff --git a/src/backend/catalog/pg_partition.c b/src/backend/catalog/pg_partition.c new file mode 100644 index 0000000..f7f8995 --- /dev/null +++ b/src/backend/catalog/pg_partition.c @@ -0,0 +1,135 @@ +/*------------------------------------------------------------------------- + * + * pg_partition.c + * routines to support manipulation of the pg_partition relation + * + * Note: currently, this module only contains inquiry functions; the actual + * creation and deletion of pg_partition entries is done in tablecmds.c. + * Perhaps someday that code should be moved here, but it'd have to be + * disentangled from other stuff such as pg_depend updates. + * + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/catalog/pg_partition.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/heapam.h" +#include "access/htup_details.h" +#include "catalog/indexing.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_partition.h" +#include "catalog/pg_partition_fn.h" +#include "catalog/pg_type.h" +#include "parser/parse_type.h" +#include "storage/lmgr.h" +#include "utils/array.h" +#include "utils/rangetypes.h" +#include "utils/fmgroids.h" +#include "utils/syscache.h" +#include "utils/tqual.h" +#include "utils/typcache.h" + +/* + * GetPartition + * + * returns details of a partition of parentrel as read from + * pg_partition catalog + */ +Partition * +GetPartition(Relation parentrel, Oid partitionid) +{ + HeapTuple tuple; + Form_pg_partition form; + AttrNumber partattno; + int i; + int rangenbounds; + Datum *rangebounds; + Datum datum; + bool isnull; + Partition *result; + + tuple = SearchSysCache1(PARTITIONID, partitionid); + + /* if no tuple found, it means the entry was just dropped */ + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for partition %u", + partitionid); + + result = (Partition *) palloc0(sizeof(Partition)); + result->oid = partitionid; + + form = (Form_pg_partition) GETSTRUCT(tuple); + + datum = SysCacheGetAttr(PARTITIONID, tuple, + Anum_pg_partition_partlistvalues, &isnull); + if(!isnull) + { + Oid typid; + int16 typlen; + bool typbyval; + char typalign; + + partattno = parentrel->rd_partattrs[0]; + typid = parentrel->rd_att->attrs[partattno - 1]->atttypid; + typlen = parentrel->rd_att->attrs[partattno - 1]->attlen; + typbyval = parentrel->rd_att->attrs[partattno - 1]->attbyval; + typalign = parentrel->rd_att->attrs[partattno - 1]->attalign; + + deconstruct_array(DatumGetArrayTypeP(datum), + typid, typlen, typbyval, typalign, + &result->listvalues, NULL, &result->listnvalues); + + ReleaseSysCache(tuple); + return result; + } + + /* getting here means we're looking at a range partition */ + datum = SysCacheGetAttr(PARTITIONID, tuple, + Anum_pg_partition_partrangebounds, &isnull); + + /* now, this can't be NULL */ + Assert(!isnull); + + deconstruct_array(DatumGetArrayTypeP(datum), + ANYARRAYOID, -1, false, 'd', + &rangebounds, NULL, &result->rangenbounds); + /* paranoia */ + Assert(rangenbounds = parentrel->rd_partnatts); + + + /* peep into each array to get either of the bounds*/ + for(i = 0; i < result->rangenbounds; i++) + { + ArrayType *arr = DatumGetArrayTypeP(rangebounds[i]); + Datum *datum; + int dummy; + Oid typid; + int16 typlen; + bool typbyval; + char typalign; + + partattno = parentrel->rd_partattrs[0]; + typid = parentrel->rd_att->attrs[partattno - 1]->atttypid; + typlen = parentrel->rd_att->attrs[partattno - 1]->attlen; + typbyval = parentrel->rd_att->attrs[partattno - 1]->attbyval; + typalign = parentrel->rd_att->attrs[partattno - 1]->attalign; + + deconstruct_array(arr, typid, typlen, typbyval, typalign, + &datum, NULL, &dummy); + + Assert(dummy == 2); + + result->rangemins[i] = datum[0]; + result->rangemaxs[i] = datum[1]; + } + + ReleaseSysCache(tuple); + return result; +} diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index e859669..cf39d4f 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -65,8 +65,6 @@ static void ComputeIndexAttrs(IndexInfo *indexInfo, char *accessMethodName, Oid accessMethodId, bool amcanorder, bool isconstraint); -static Oid GetIndexOpClass(List *opclass, Oid attrType, - char *accessMethodName, Oid accessMethodId); static char *ChooseIndexName(const char *tabname, Oid namespaceId, List *colnames, List *exclusionOpNames, bool primary, bool isconstraint); @@ -1207,9 +1205,9 @@ ComputeIndexAttrs(IndexInfo *indexInfo, /* * Resolve possibly-defaulted operator class specification */ -static Oid +Oid GetIndexOpClass(List *opclass, Oid attrType, - char *accessMethodName, Oid accessMethodId) + const char *accessMethodName, Oid accessMethodId) { char *schemaname; char *opcname; diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index f5d5b63..f44840c 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -37,6 +37,8 @@ #include "catalog/pg_inherits_fn.h" #include "catalog/pg_namespace.h" #include "catalog/pg_opclass.h" +#include "catalog/pg_partition.h" +#include "catalog/pg_partitioned_rel.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_trigger.h" #include "catalog/pg_type.h" @@ -82,6 +84,7 @@ #include "storage/smgr.h" #include "utils/acl.h" #include "utils/builtins.h" +#include "utils/datum.h" #include "utils/fmgroids.h" #include "utils/inval.h" #include "utils/lsyscache.h" @@ -92,6 +95,7 @@ #include "utils/syscache.h" #include "utils/tqual.h" #include "utils/typcache.h" +#include "utils/rangetypes.h" /* @@ -416,6 +420,9 @@ static void ATExecReplicaIdentity(Relation rel, ReplicaIdentityStmt *stmt, LOCKM static void ATExecGenericOptions(Relation rel, List *options); static void ATExecEnableRowSecurity(Relation rel); static void ATExecDisableRowSecurity(Relation rel); +static void ATExecAttachPartition(Relation parentrel, + PartitionDef *partition, + LOCKMODE lockmode); static void copy_relation_data(SMgrRelation rel, SMgrRelation dst, ForkNumber forkNum, char relpersistence); @@ -425,6 +432,18 @@ static void RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, void *arg); static void RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, Oid oldrelid, void *arg); +static void ComputePartitionAttrs(Oid relId, PartitionBy *partby, + AttrNumber *partKeyAttrNumbers, + Oid *partClassOids); +static Datum *EvalPartitionValues(Relation parentrel, + PartitionDef *partition, + int *listnvalues); +static Datum *evaluateListValues(List *values, NameData attname, Oid typid, + int32 typmod, int16 typlen, bool typbyval, + int nvalues); +static Datum *evaluateRangeBounds(List *rangelbounds, + NameData *attname, Oid *typid, int32 *typmod, + int16 *typlen, bool *typbyval, int partnatts); /* ---------------------------------------------------------------- @@ -465,6 +484,9 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId) AttrNumber attnum; static char *validnsps[] = HEAP_RELOPT_NAMESPACES; Oid ofTypeId; + int numPartitionAttrs; + AttrNumber partKeyAttrNumbers[PARTITION_MAX_KEYS]; + Oid classObjectId[PARTITION_MAX_KEYS]; /* * Truncate relname to appropriate length (probably a waste of time, as @@ -690,6 +712,43 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId) true, true, false); /* + * Store the partition key into pg_partitioned_rel if one defined. + * It consists of table attributes constituting the key and opclass + * to use with it. + */ + if(stmt->partitionby) + { + /* + * count attributes in the partition key + */ + numPartitionAttrs = list_length(stmt->partitionby->partcols); + if (numPartitionAttrs < 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("must specify at least one column in partition key"))); + if (numPartitionAttrs > PARTITION_MAX_KEYS) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_COLUMNS), + errmsg("cannot use more than %d columns in partition key", + INDEX_MAX_KEYS))); + if (stmt->partitionby->strategy != 'r' && numPartitionAttrs > 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot use more than 1 column in partition key when" + " using list partitioning strategy"))); + + ComputePartitionAttrs(relationId, + stmt->partitionby, + partKeyAttrNumbers, + classObjectId); + StorePartitionKey(rel, + numPartitionAttrs, + partKeyAttrNumbers, + classObjectId, + stmt->partitionby->strategy); + } + + /* * Clean up. We keep lock on new relation (although it shouldn't be * visible to anyone else anyway, until commit). */ @@ -2939,6 +2998,15 @@ AlterTableGetLockLevel(List *cmds) break; /* + * CREATE TABLE ... PARTITION OF <parent> FOR VALUES ... + * + * XXX: the target of this command is <parent> + */ + case AT_AttachPartition: + cmd_lockmode = AccessExclusiveLock; + break; + + /* * These subcommands affect implicit row type conversion. They * have affects similar to CREATE/DROP CAST on queries. don't * provide for invalidating parse trees as a result of such @@ -3305,6 +3373,10 @@ ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd, /* No command-specific prep needed */ pass = AT_PASS_MISC; break; + case AT_AttachPartition: + ATSimplePermissions(rel, ATT_TABLE); + pass = AT_PASS_MISC; + break; case AT_GenericOptions: ATSimplePermissions(rel, ATT_FOREIGN_TABLE); /* No command-specific prep needed */ @@ -3597,6 +3669,9 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab, Relation rel, case AT_DisableRowSecurity: ATExecDisableRowSecurity(rel); break; + case AT_AttachPartition: + ATExecAttachPartition(rel, (PartitionDef *) cmd->def, lockmode); + break; case AT_GenericOptions: ATExecGenericOptions(rel, (List *) cmd->def); break; @@ -10722,6 +10797,63 @@ ATExecDisableRowSecurity(Relation rel) } /* + * ATExecAttachPartition + * + * CREATE TABLE ... PARTITION OF <parent> FOR VALUES ... + */ +static void +ATExecAttachPartition(Relation parentrel, + PartitionDef *partition, + LOCKMODE lockmode) +{ + Relation childrel; + Relation catalogRelation; /* pg_inherits */ + int listnvalues; + Datum *values = NULL; + + childrel = heap_openrv(partition->name, AccessShareLock); + + /* cannot attach a partition to a non-partitioned table */ + if (!parentrel->rd_rel->relispartitioned) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot be a partition of non-partitioned relation \"%s\"", + RelationGetRelationName(parentrel)))); + + /* Permanent rels cannot be partitions of temporary ones */ + if (parentrel->rd_rel->relpersistence == RELPERSISTENCE_TEMP && + childrel->rd_rel->relpersistence != RELPERSISTENCE_TEMP) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("a permanent relation cannot be a partition of temporary relation \"%s\"", + RelationGetRelationName(parentrel)))); + + /* If parent rel is temp, it must belong to this session */ + if (parentrel->rd_rel->relpersistence == RELPERSISTENCE_TEMP && + !parentrel->rd_islocaltemp) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot be a partition of temporary relation of another session"))); + + /* Match up the columns and bump attinhcount as needed */ + MergeAttributesIntoExisting(childrel, parentrel); + + /* Now validate partition values and store in pg_partition */ + values = EvalPartitionValues(parentrel, partition, &listnvalues); + + StorePartitionValues(childrel, parentrel, listnvalues, values); + + catalogRelation = heap_open(InheritsRelationId, RowExclusiveLock); + StoreCatalogInheritance1(RelationGetRelid(childrel), + RelationGetRelid(parentrel), + 1, + catalogRelation); + + heap_close(catalogRelation, RowExclusiveLock); + heap_close(childrel, AccessShareLock); +} + +/* * ALTER FOREIGN TABLE <name> OPTIONS (...) */ static void @@ -11609,3 +11741,337 @@ RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, Oid oldrelid, ReleaseSysCache(tuple); } + +/* + * Compute per-partition-column information viz. partition key + * attribute numbers, opclasses. + */ +static void +ComputePartitionAttrs(Oid relId, PartitionBy *partby, + AttrNumber *partKeyAttrNumbers, + Oid *partClassOids) +{ + int attn; + ListCell *lc; + + /* Process partColumns list */ + attn = 0; + foreach(lc, partby->partcols) + { + PartitionElem *partcol = (PartitionElem *) lfirst(lc); + Oid atttype; + + if(partcol->name != NULL) + { + HeapTuple atttuple; + Form_pg_attribute attform; + + atttuple = SearchSysCacheAttName(relId, partcol->name); + if (!HeapTupleIsValid(atttuple)) + { + /* difference in error message spellings is historical */ + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" named in partition key does not exist", + partcol->name))); + } + attform = (Form_pg_attribute) GETSTRUCT(atttuple); + partKeyAttrNumbers[attn] = attform->attnum; + atttype = attform->atttypid; + ReleaseSysCache(atttuple); + } + + /* + * Identify the opclass to use. At the moment, we use "btree" indexable + * operators for simpler cases of partitioning where we compare key column + * values with partition bound values with just these set of operators. + */ + partClassOids[attn++] = GetIndexOpClass(partcol->opclass, + atttype, + "btree", + BTREE_AM_OID); + } +} + +/* + * EvalPartitionValues + * + * validate and evalulate partition values in a partition definition + */ +static Datum * +EvalPartitionValues(Relation parentrel, + PartitionDef *partition, + int *listnvalues) +{ + PartitionValues *values; + Datum *result; + Datum *rangelbounds; + Datum *rangeubounds; + int partnatts; + int rngnlvalues, rngnuvalues; + AttrNumber partattno; + int i; + Oid typid[PARTITION_MAX_KEYS]; + int32 typmod[PARTITION_MAX_KEYS]; + int16 typlen[PARTITION_MAX_KEYS]; + char typalign[PARTITION_MAX_KEYS]; + bool typbyval[PARTITION_MAX_KEYS]; + NameData attname[PARTITION_MAX_KEYS]; + + Assert(parentrel->rd_rel->relispartitioned); + + /* get the parent's tuple descriptor */ + partnatts = parentrel->rd_partnatts; + + /* + * collect type info for partition key attributes + */ + for(i = 0; i < partnatts; i++) + { + partattno = parentrel->rd_partattrs[i]; + typid[i] = parentrel->rd_att->attrs[partattno - 1]->atttypid; + typmod[i] = parentrel->rd_att->attrs[partattno - 1]->atttypmod; + attname[i] = parentrel->rd_att->attrs[partattno - 1]->attname; + get_typlenbyvalalign(typid[i], &typlen[i], &typbyval[i], &typalign[i]); + } + + /* now transform and evaluate value expressions for this partition */ + values = partition->values; + switch(parentrel->rd_partstrategy) + { + case 'l': + Assert(!(partnatts > 1)); + + if(!values->listvalues) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("Specified values in FOR VALUES ... does not" + " match the partition key or strategy"))); + + *listnvalues = list_length(values->listvalues); + + if(*listnvalues < 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("Must contain at least one value in" + " FOR VALUES IN (..)"))); + /* + * Currently only one column is supported here. + * + * datum[0] - list of allowed values for the only key column + */ + + result = evaluateListValues(values->listvalues, attname[0], + typid[0], typmod[0], typlen[0], typbyval[0], + *listnvalues); + break; + + case 'r': + if(!values->rangelbounds || !values->rangeubounds) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("Specified values in FOR VALUES does not" + " match the partition key or strategy"))); + + rngnlvalues = list_length(values->rangelbounds); + rngnuvalues = list_length(values->rangeubounds); + + if((rngnlvalues < 1) || + (rngnuvalues < 1) || + (rngnlvalues != partnatts) || + (rngnuvalues != partnatts) || + (rngnlvalues != rngnuvalues)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("Specified values in FOR VALUES BETWEEN does not" + " match the partition key"))); + + /* + * both lists have the same number of values equal to + * number of columns in the partition key. + */ + rangelbounds = evaluateRangeBounds(values->rangelbounds, attname, + typid, typmod, typlen, typbyval, partnatts); + rangeubounds = evaluateRangeBounds(values->rangeubounds, attname, + typid, typmod, typlen, typbyval, partnatts); + + result = (Datum *) palloc0(partnatts * sizeof(Datum)); + for(i = 0; i < partnatts; i++) + { + Datum datum[2]; + ArrayType *rangebounds; + + datum[0] = rangelbounds[i]; + datum[1] = rangeubounds[i]; + + rangebounds = construct_array(datum, 2, typid[i], + typlen[i], typbyval[i], typalign[i]); + + result[i] = PointerGetDatum(rangebounds); + } + pfree(rangelbounds); + pfree(rangeubounds); + break; + } + + return result; +} + +/* + * evaluateListValues + * + * evaluate a list of expressions to build a datum array + */ +static Datum * +evaluateListValues(List *values, NameData attname, + Oid typid, int32 typmod, int16 typlen, bool typbyval, + int nvalues) +{ + ListCell *cell; + ParseState *pstate; + EState *estate; + ExprContext *ecxt; + int i; + Datum *datum; + + datum = (Datum *) palloc(nvalues * sizeof(Datum)); + pstate = make_parsestate(NULL); + estate = CreateExecutorState(); + ecxt = GetPerTupleExprContext(estate); + + i = 0; + foreach(cell, values) + { + Node *value = (Node *) lfirst(cell); + bool isnull; + ExprState *expr; + MemoryContext oldcxt; + Oid valuetype; + + oldcxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); + + /* commented since no longer necessary to transform again */ + //value = transformExpr(pstate, value, + // EXPR_KIND_PARTITION_VALUES); + + valuetype = exprType(value); + value = coerce_to_target_type(NULL, + value, valuetype, + typid, typmod, + COERCION_ASSIGNMENT, + COERCE_IMPLICIT_CAST, + -1); + if (value == NULL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("partition key column \"%s\" is of type %s" + " but specified value is of type %s", + NameStr(attname), + format_type_be(typid), + format_type_be(valuetype)), + errhint("You will need to rewrite or cast the expression."))); + + expr = ExecPrepareExpr((Expr *) value, estate); + + datum[i] = ExecEvalExpr(expr, ecxt, &isnull, NULL); + if (isnull) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("a partition value must not be NULL"))); + + MemoryContextSwitchTo(oldcxt); + + if (!typbyval) + { + if (typlen == -1) + datum[i] = PointerGetDatum(PG_DETOAST_DATUM_COPY(datum[i])); + else + datum[i] = datumCopy(datum[i], false, typlen); + } + + ResetPerTupleExprContext(estate); + i++; + } + + return datum; +} + +/* + * evaluateRangeBounds + * + * evaluate a list of expressions to build a datum array for a range bound + */ +static Datum * +evaluateRangeBounds(List *values, NameData *attname, + Oid *typid, int32 *typmod, int16 *typlen, bool *typbyval, + int partnatts) +{ + ListCell *cell; + ParseState *pstate; + EState *estate; + ExprContext *ecxt; + int i; + Datum *datum; + + datum = (Datum *) palloc(partnatts * sizeof(Datum)); + pstate = make_parsestate(NULL); + estate = CreateExecutorState(); + ecxt = GetPerTupleExprContext(estate); + + i = 0; + foreach(cell, values) + { + Node *value = (Node *) lfirst(cell); + bool isnull; + ExprState *expr; + MemoryContext oldcxt; + Oid valuetype; + + oldcxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); + + /* commented since no longer necessary to transform again */ + //value = transformExpr(pstate, value, + // EXPR_KIND_PARTITION_VALUES); + + valuetype = exprType(value); + + value = coerce_to_target_type(NULL, + value, valuetype, + typid[i], typmod[i], + COERCION_ASSIGNMENT, + COERCE_IMPLICIT_CAST, + -1); + if (value == NULL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("partition key column \"%s\" is of type %s" + " but specified value is of type %s", + NameStr(attname[i]), + format_type_be(typid[i]), + format_type_be(valuetype)), + errhint("You will need to rewrite or cast the expression."))); + + expr = ExecPrepareExpr((Expr *) value, estate); + + datum[i] = ExecEvalExpr(expr, ecxt, &isnull, NULL); + if (isnull) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("a partition value must not be NULL"))); + + MemoryContextSwitchTo(oldcxt); + + if (!typbyval[i]) + { + if (typlen[i] == -1) + datum[i] = PointerGetDatum(PG_DETOAST_DATUM_COPY(datum[i])); + else + datum[i] = datumCopy(datum[i], false, typlen[i]); + } + + ResetPerTupleExprContext(estate); + i++; + } + + return datum; +} diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 5282a4f..51abd2c 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -2431,6 +2431,53 @@ _copyColumnDef(const ColumnDef *from) return newnode; } +static PartitionBy * +_copyPartitionBy(const PartitionBy *from) +{ + + PartitionBy *newnode = makeNode(PartitionBy); + + COPY_SCALAR_FIELD(strategy); + COPY_NODE_FIELD(partcols); + + return newnode; +} + +static PartitionElem * +_copyPartitionElem(const PartitionElem *from) +{ + PartitionElem *newnode = makeNode(PartitionElem); + + COPY_STRING_FIELD(name); + COPY_NODE_FIELD(opclass); + + return newnode; +} + +static PartitionValues * +_copyPartitionValues(const PartitionValues *from) +{ + PartitionValues *newnode = makeNode(PartitionValues); + + COPY_NODE_FIELD(listvalues); + COPY_NODE_FIELD(rangelbounds); + COPY_NODE_FIELD(rangeubounds); + + return newnode; +} + +static PartitionDef * +_copyPartitionDef(const PartitionDef *from) +{ + PartitionDef *newnode = makeNode(PartitionDef); + + COPY_NODE_FIELD(name); + COPY_NODE_FIELD(parent); + COPY_NODE_FIELD(values); + + return newnode; +} + static Constraint * _copyConstraint(const Constraint *from) { @@ -2806,8 +2853,11 @@ static void CopyCreateStmtFields(const CreateStmt *from, CreateStmt *newnode) { COPY_NODE_FIELD(relation); + COPY_NODE_FIELD(partitionOf); COPY_NODE_FIELD(tableElts); COPY_NODE_FIELD(inhRelations); + COPY_NODE_FIELD(partValues); + COPY_NODE_FIELD(partitionby); COPY_NODE_FIELD(ofTypename); COPY_NODE_FIELD(constraints); COPY_NODE_FIELD(options); @@ -4694,6 +4744,18 @@ copyObject(const void *from) case T_ColumnDef: retval = _copyColumnDef(from); break; + case T_PartitionBy: + retval = _copyPartitionBy(from); + break; + case T_PartitionElem: + retval = _copyPartitionElem(from); + break; + case T_PartitionValues: + retval = _copyPartitionValues(from); + break; + case T_PartitionDef: + retval = _copyPartitionDef(from); + break; case T_Constraint: retval = _copyConstraint(from); break; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index fe509b0..66f04a0 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -1102,8 +1102,11 @@ static bool _equalCreateStmt(const CreateStmt *a, const CreateStmt *b) { COMPARE_NODE_FIELD(relation); + COMPARE_NODE_FIELD(partitionOf); COMPARE_NODE_FIELD(tableElts); COMPARE_NODE_FIELD(inhRelations); + COMPARE_NODE_FIELD(partValues); + COMPARE_NODE_FIELD(partitionby); COMPARE_NODE_FIELD(ofTypename); COMPARE_NODE_FIELD(constraints); COMPARE_NODE_FIELD(options); @@ -2268,6 +2271,44 @@ _equalColumnDef(const ColumnDef *a, const ColumnDef *b) } static bool +_equalPartitionBy(const PartitionBy *a, const PartitionBy *b) +{ + COMPARE_SCALAR_FIELD(strategy); + COMPARE_NODE_FIELD(partcols); + + return true; +} + +static bool +_equalPartitionElem(const PartitionElem *a, const PartitionElem *b) +{ + COMPARE_STRING_FIELD(name); + COMPARE_NODE_FIELD(opclass); + + return true; +} + +static bool +_equalPartitionValues(const PartitionValues *a, const PartitionValues *b) +{ + COMPARE_NODE_FIELD(listvalues); + COMPARE_NODE_FIELD(rangelbounds); + COMPARE_NODE_FIELD(rangeubounds); + + return true; +} + +static bool +_equalPartitionDef(const PartitionDef *a, const PartitionDef *b) +{ + COMPARE_NODE_FIELD(name); + COMPARE_NODE_FIELD(parent); + COMPARE_NODE_FIELD(values); + + return true; +} + +static bool _equalConstraint(const Constraint *a, const Constraint *b) { COMPARE_SCALAR_FIELD(contype); @@ -3120,6 +3161,18 @@ equal(const void *a, const void *b) case T_ColumnDef: retval = _equalColumnDef(a, b); break; + case T_PartitionBy: + retval = _equalPartitionBy(a, b); + break; + case T_PartitionElem: + retval = _equalPartitionElem(a, b); + break; + case T_PartitionValues: + retval = _equalPartitionValues(a, b); + break; + case T_PartitionDef: + retval = _equalPartitionDef(a, b); + break; case T_Constraint: retval = _equalConstraint(a, b); break; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 2f417fe..3db5e8b 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2017,8 +2017,11 @@ static void _outCreateStmtInfo(StringInfo str, const CreateStmt *node) { WRITE_NODE_FIELD(relation); + WRITE_NODE_FIELD(partitionOf); WRITE_NODE_FIELD(tableElts); WRITE_NODE_FIELD(inhRelations); + WRITE_NODE_FIELD(partValues); + WRITE_NODE_FIELD(partitionby); WRITE_NODE_FIELD(ofTypename); WRITE_NODE_FIELD(constraints); WRITE_NODE_FIELD(options); @@ -2258,6 +2261,44 @@ _outIndexElem(StringInfo str, const IndexElem *node) } static void +_outPartitionBy(StringInfo str, const PartitionBy *node) +{ + WRITE_NODE_TYPE("PARTITIONBY"); + + WRITE_CHAR_FIELD(strategy); + WRITE_NODE_FIELD(partcols); +} + +static void +_outPartitionElem(StringInfo str, const PartitionElem *node) +{ + WRITE_NODE_TYPE("PARTITIONELEM"); + + WRITE_STRING_FIELD(name); + WRITE_NODE_FIELD(opclass); +} + +static void +_outPartitionValues(StringInfo str, const PartitionValues *node) +{ + WRITE_NODE_TYPE("PARTITIONVALUES"); + + WRITE_NODE_FIELD(listvalues); + WRITE_NODE_FIELD(rangelbounds); + WRITE_NODE_FIELD(rangeubounds); +} + +static void +_outPartitionDef(StringInfo str, const PartitionDef *node) +{ + WRITE_NODE_TYPE("PARTITIONDEF"); + + WRITE_NODE_FIELD(name); + WRITE_NODE_FIELD(parent); + WRITE_NODE_FIELD(values); +} + +static void _outQuery(StringInfo str, const Query *node) { WRITE_NODE_TYPE("QUERY"); @@ -3240,6 +3281,18 @@ _outNode(StringInfo str, const void *obj) case T_IndexElem: _outIndexElem(str, obj); break; + case T_PartitionBy: + _outPartitionBy(str, obj); + break; + case T_PartitionElem: + _outPartitionElem(str, obj); + break; + case T_PartitionValues: + _outPartitionValues(str, obj); + break; + case T_PartitionDef: + _outPartitionDef(str, obj); + break; case T_Query: _outQuery(str, obj); break; diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 6c21002..54b3fea 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -222,6 +222,9 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); struct ImportQual *importqual; InsertStmt *istmt; VariableSetStmt *vsetstmt; + PartitionElem *pelem; + PartitionValues *pvalues; + PartitionBy *partby; } %type <node> stmt schema_stmt @@ -519,6 +522,11 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); opt_frame_clause frame_extent frame_bound %type <str> opt_existing_window_name %type <boolean> opt_if_not_exists +%type <partby> OptPartitionBy PartitionBy +%type <list> part_params +%type <pelem> part_elem +%type <pvalues> PartitionValues +%type <list> ValuesList /* * Non-keyword token types. These are hard-wired into the "flex" lexer. @@ -582,7 +590,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); KEY LABEL LANGUAGE LARGE_P LAST_P LATERAL_P - LEADING LEAKPROOF LEAST LEFT LEVEL LIKE LIMIT LISTEN LOAD LOCAL + LEADING LEAKPROOF LEAST LEFT LEVEL LIKE LIMIT LIST LISTEN LOAD LOCAL LOCALTIME LOCALTIMESTAMP LOCATION LOCK_P LOCKED LOGGED MAPPING MATCH MATERIALIZED MAXVALUE MINUTE_P MINVALUE MODE MONTH_P MOVE @@ -2756,22 +2764,23 @@ copy_generic_opt_arg_list_item: *****************************************************************************/ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')' - OptInherit OptWith OnCommitOption OptTableSpace + OptInherit OptPartitionBy OptWith OnCommitOption OptTableSpace { CreateStmt *n = makeNode(CreateStmt); $4->relpersistence = $2; n->relation = $4; n->tableElts = $6; n->inhRelations = $8; + n->partitionby = $9; n->constraints = NIL; - n->options = $9; - n->oncommit = $10; - n->tablespacename = $11; + n->options = $10; + n->oncommit = $11; + n->tablespacename = $12; n->if_not_exists = false; $$ = (Node *)n; } | CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name '(' - OptTableElementList ')' OptInherit OptWith OnCommitOption + OptTableElementList ')' OptInherit OptPartitionBy OptWith OnCommitOption OptTableSpace { CreateStmt *n = makeNode(CreateStmt); @@ -2779,15 +2788,16 @@ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')' n->relation = $7; n->tableElts = $9; n->inhRelations = $11; + n->partitionby = $12; n->constraints = NIL; - n->options = $12; - n->oncommit = $13; - n->tablespacename = $14; + n->options = $13; + n->oncommit = $14; + n->tablespacename = $15; n->if_not_exists = true; $$ = (Node *)n; } | CREATE OptTemp TABLE qualified_name OF any_name - OptTypedTableElementList OptWith OnCommitOption OptTableSpace + OptTypedTableElementList OptPartitionBy OptWith OnCommitOption OptTableSpace { CreateStmt *n = makeNode(CreateStmt); $4->relpersistence = $2; @@ -2795,15 +2805,16 @@ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')' n->tableElts = $7; n->ofTypename = makeTypeNameFromNameList($6); n->ofTypename->location = @6; + n->partitionby = $8; n->constraints = NIL; - n->options = $8; - n->oncommit = $9; - n->tablespacename = $10; + n->options = $9; + n->oncommit = $10; + n->tablespacename = $11; n->if_not_exists = false; $$ = (Node *)n; } | CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name OF any_name - OptTypedTableElementList OptWith OnCommitOption OptTableSpace + OptTypedTableElementList OptPartitionBy OptWith OnCommitOption OptTableSpace { CreateStmt *n = makeNode(CreateStmt); $7->relpersistence = $2; @@ -2811,10 +2822,43 @@ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')' n->tableElts = $10; n->ofTypename = makeTypeNameFromNameList($9); n->ofTypename->location = @9; + n->partitionby = $11; n->constraints = NIL; - n->options = $11; - n->oncommit = $12; - n->tablespacename = $13; + n->options = $12; + n->oncommit = $13; + n->tablespacename = $14; + n->if_not_exists = true; + $$ = (Node *)n; + } + | CREATE OptTemp TABLE qualified_name PARTITION OF qualified_name + PartitionValues OptPartitionBy OptWith OnCommitOption OptTableSpace + { + CreateStmt *n = makeNode(CreateStmt); + $4->relpersistence = $2; + n->relation = $4; + n->partitionOf = $7; + n->partValues = $8; + n->partitionby = $9; + n->constraints = NIL; + n->options = $10; + n->oncommit = $11; + n->tablespacename = $12; + n->if_not_exists = false; + $$ = (Node *)n; + } + | CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name PARTITION OF qualified_name + PartitionValues OptPartitionBy OptWith OnCommitOption OptTableSpace + { + CreateStmt *n = makeNode(CreateStmt); + $7->relpersistence = $2; + n->relation = $7; + n->partitionOf = $10; + n->partValues = $11; + n->partitionby = $12; + n->constraints = NIL; + n->options = $13; + n->oncommit = $14; + n->tablespacename = $15; n->if_not_exists = true; $$ = (Node *)n; } @@ -2896,6 +2940,7 @@ TypedTableElement: | TableConstraint { $$ = $1; } ; + columnDef: ColId Typename create_generic_options ColQualList { ColumnDef *n = makeNode(ColumnDef); @@ -3357,6 +3402,76 @@ OptInherit: INHERITS '(' qualified_name_list ')' { $$ = $3; } | /*EMPTY*/ { $$ = NIL; } ; +/* Optional partition key (PARTITION ON) definition */ +OptPartitionBy: PartitionBy { $$ = $1; } + | /*EMPTY*/ { $$ = NULL; } + ; + +PartitionBy: PARTITION BY RANGE ON '(' part_params ')' + { + PartitionBy *n = makeNode(PartitionBy); + + n->strategy = PARTITION_STRAT_RANGE; + n->partcols = $6; + + $$ = n; + } + | PARTITION BY LIST ON '(' part_params ')' + { + PartitionBy *n = makeNode(PartitionBy); + + n->strategy = PARTITION_STRAT_LIST; + n->partcols = $6; + + $$ = n; + } + ; + +part_params: part_elem { $$ = list_make1($1); } + | part_params ',' part_elem { $$ = lappend($1, $3); } + ; + +part_elem: ColId opt_class + { + PartitionElem *n = makeNode(PartitionElem); + + n->name = $1; + n->opclass = $2; + $$ = n; + } + ; + +/* Definition of a partition */ +PartitionValues: + FOR VALUES opt_in '(' ValuesList ')' + { + PartitionValues *n = makeNode(PartitionValues); + + n->listvalues = $5; + n->rangelbounds = NIL; + n->rangeubounds = NIL; + $$ = n; + } + | FOR VALUES BETWEEN '(' ValuesList ')' AND '(' ValuesList ')' + { + PartitionValues *n = makeNode(PartitionValues); + + n->listvalues = NIL; + n->rangelbounds = $5; + n->rangeubounds = $9; + $$ = n; + } + ; + +opt_in: IN_P {} + | /*EMPTY*/ {} + ; + +ValuesList: + a_expr { $$ = list_make1($1); } + | ValuesList ',' a_expr { $$ = lappend($1, $3); } + ; + /* WITH (options) is preferred, WITH OIDS and WITHOUT OIDS are legacy forms */ OptWith: WITH reloptions { $$ = $2; } @@ -13289,6 +13404,7 @@ unreserved_keyword: | LAST_P | LEAKPROOF | LEVEL + | LIST | LISTEN | LOAD | LOCAL diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index 7829bcb..4e77256 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -2760,6 +2760,8 @@ ParseExprKindName(ParseExprKind exprKind) return "EXECUTE"; case EXPR_KIND_TRIGGER_WHEN: return "WHEN"; + case EXPR_KIND_PARTITION_VALUES: + return "PARTITION FOR VALUES"; /* * There is intentionally no default: case here, so that the diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index c29f106..acea185 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -83,6 +83,7 @@ typedef struct List *alist; /* "after list" of things to do after creating * the table */ IndexStmt *pkey; /* PRIMARY KEY index, if any */ + List *partitionElts; /* CREATE TABLE ... PARTITIN OF ... */ } CreateStmtContext; /* State shared by transformCreateSchemaStmt and its subroutines */ @@ -123,6 +124,11 @@ static void transformConstraintAttrs(CreateStmtContext *cxt, List *constraintList); static void transformColumnType(CreateStmtContext *cxt, ColumnDef *column); static void setSchemaName(char *context_schema, char **stmt_schema_name); +static void transformPartitionOf(CreateStmtContext *cxt, + RangeVar *parent, + PartitionValues *values); +static PartitionValues* transformPartitionValues(CreateStmtContext *cxt, + PartitionValues *values); /* @@ -217,14 +223,37 @@ transformCreateStmt(CreateStmt *stmt, const char *queryString) cxt.blist = NIL; cxt.alist = NIL; cxt.pkey = NULL; + cxt.partitionElts = NIL; cxt.hasoids = interpretOidsOption(stmt->options, true); Assert(!stmt->ofTypename || !stmt->inhRelations); /* grammar enforces */ + Assert(!stmt->ofTypename || !stmt->partitionOf); /* grammar enforces */ + Assert(!stmt->inhRelations || !stmt->partitionOf); /* grammar enforces */ if (stmt->ofTypename) transformOfType(&cxt, stmt->ofTypename); /* + * Transform "PARTITION OF <parent> FOR VALUES ..." + * 1) + * (LIKE <parent> INCLUDING ALL) + * + * 2) add to cxt->alist, an AlterTableCmd of subtype + * AT_AttachPartition on the <parent> which performs: + * + * ALTER TABLE <thisrel> INHERITS(<parent>), and store the partition + * values (FOR VALUES) into catalog pg_partition + */ + if (stmt->partitionOf) + { + transformPartitionOf(&cxt, stmt->partitionOf, stmt->partValues); + + /* override based on the result of transformPartitionOf */ + stmt->tableElts = cxt.partitionElts; + stmt->relation->schemaname = cxt.relation->schemaname; + } + + /* * Run through each primary element in the table creation clause. Separate * column defs from constraints, and do preliminary analysis. */ @@ -2831,3 +2860,104 @@ setSchemaName(char *context_schema, char **stmt_schema_name) "different from the one being created (%s)", *stmt_schema_name, context_schema))); } + +/* + * transformPartitionOf + * + * transform PARTITION OF <parent> into: + * (LIKE <parent> INCLUDING ALL) + */ +static void +transformPartitionOf(CreateStmtContext *cxt, + RangeVar *parent, + PartitionValues *values) +{ + TableLikeClause *like; + PartitionDef *partition; + AlterTableCmd *attachcmd; + AlterTableStmt *alter; + + /* Use the same schema as the parent if not specified. */ + if (cxt->relation->schemaname == NULL) + cxt->relation->schemaname = parent->schemaname; + + like = makeNode(TableLikeClause); + like->relation = parent; + like->options = CREATE_TABLE_LIKE_ALL; + + cxt->partitionElts = list_make1(like); + + /* + * following is supposed to perform: + * ALTER TABLE <partition> INHERITS(<parent>) + * + * and arrange to store the values into pg_partition + */ + partition = makeNode(PartitionDef); + partition->name = cxt->relation; + partition->parent = parent; + partition->values = transformPartitionValues(cxt, values); + + attachcmd = makeNode(AlterTableCmd); + attachcmd->subtype = AT_AttachPartition; + attachcmd->def = (Node *) partition; + + alter = makeNode(AlterTableStmt); + alter->relation = parent; + alter->cmds = list_make1(attachcmd); + alter->relkind = OBJECT_TABLE; + + cxt->alist = lappend(cxt->alist, alter); +} + +/* + * transformPartitionValues + * + * transform partition value as returned by grammar into something + * we can evaluate later to store into pg_partition + */ +static PartitionValues* +transformPartitionValues(CreateStmtContext *cxt, PartitionValues *values) +{ + ListCell *cell1, *cell2; + int len1, len2; + PartitionValues *result = (PartitionValues *) makeNode(PartitionValues); + + if(values->listvalues) + { + foreach(cell1, values->listvalues) + { + Node *value = (Node *) lfirst(cell1); + + result->listvalues = lappend(result->listvalues, + transformExpr(cxt->pstate, value, + EXPR_KIND_PARTITION_VALUES)); + } + } + else + { + len1 = list_length(values->rangelbounds); + len2 = list_length(values->rangeubounds); + + /* shouldn't get here without this holding true */ + Assert(len1 == len2); + + forboth(cell1, values->rangelbounds, cell2, values->rangeubounds) + { + Node *value; + + /* rangelbounds */ + value = (Node *) lfirst(cell1); + result->rangelbounds = lappend(result->rangelbounds, + transformExpr(cxt->pstate, value, + EXPR_KIND_PARTITION_VALUES)); + /* rangeubounds */ + value = (Node *) lfirst(cell2); + result->rangeubounds = lappend(result->rangeubounds, + transformExpr(cxt->pstate, value, + EXPR_KIND_PARTITION_VALUES)); + } + } + + return result; +} diff --git a/src/backend/utils/adt/rangetypes.c b/src/backend/utils/adt/rangetypes.c index c037b05..ded01aa 100644 --- a/src/backend/utils/adt/rangetypes.c +++ b/src/backend/utils/adt/rangetypes.c @@ -31,6 +31,9 @@ #include "postgres.h" #include "access/hash.h" +#include "access/heapam.h" +#include "access/htup_details.h" +#include "catalog/pg_range.h" #include "lib/stringinfo.h" #include "libpq/pqformat.h" #include "utils/builtins.h" diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 1db4ba8..c1958fd 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -30,6 +30,7 @@ #include <fcntl.h> #include <unistd.h> +#include "access/nbtree.h" #include "access/htup_details.h" #include "access/multixact.h" #include "access/reloptions.h" @@ -46,9 +47,13 @@ #include "catalog/pg_authid.h" #include "catalog/pg_auth_members.h" #include "catalog/pg_constraint.h" +#include "catalog/pg_collation.h" #include "catalog/pg_database.h" #include "catalog/pg_namespace.h" #include "catalog/pg_opclass.h" +#include "catalog/pg_partition.h" +#include "catalog/pg_partition_fn.h" +#include "catalog/pg_partitioned_rel.h" #include "catalog/pg_proc.h" #include "catalog/pg_rewrite.h" #include "catalog/pg_tablespace.h" @@ -282,7 +287,9 @@ static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid, StrategyNumber numSupport); static void RelationCacheInitFileRemoveInDir(const char *tblspcpath); static void unlink_initfile(const char *initfilename); - +static void RelationBuildPartitionKey(Relation relation); +static void RelationBuildPartitionInfo(Relation relation); +static int partition_cmp(const void *a, const void *b, void *arg); /* * ScanPgRelation @@ -1055,6 +1062,20 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) else relation->rd_rsdesc = NULL; + if (relation->rd_rel->relispartitioned) + { + RelationBuildPartitionKey(relation); + RelationBuildPartitionInfo(relation); + } + else + { + relation->rd_partnatts = 0; + relation->rd_partstrategy = '\0'; + relation->rd_partattrs = NULL; + relation->rd_partclass = NULL; + relation->rd_partitioninfo = NULL; + } + /* * if it's an index, initialize index-related information */ @@ -2014,6 +2035,12 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc) bms_free(relation->rd_keyattr); bms_free(relation->rd_idattr); FreeTriggerDesc(relation->trigdesc); + if(relation->rd_partattrs) + pfree(relation->rd_partattrs); + if(relation->rd_partclass) + pfree(relation->rd_partclass); + if(relation->rd_partattcmpfn) + pfree(relation->rd_partattcmpfn); if (relation->rd_options) pfree(relation->rd_options); if (relation->rd_indextuple) @@ -2026,6 +2053,8 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc) MemoryContextDelete(relation->rd_rulescxt); if (relation->rd_rsdesc) MemoryContextDelete(relation->rd_rsdesc->rscxt); + if (relation->rd_partitioninfo) + MemoryContextDelete(relation->rd_partitioninfo->picxt); if (relation->rd_fdwroutine) pfree(relation->rd_fdwroutine); pfree(relation); @@ -4439,6 +4468,271 @@ RelationGetExclusionInfo(Relation indexRelation, MemoryContextSwitchTo(oldcxt); } +/* + * RelationBuildPartitionKey + * + * Initializes rd_partattrs and rd_partclass + */ +static void +RelationBuildPartitionKey(Relation relation) +{ + HeapTuple tuple; + Form_pg_partitioned_rel prelform; + int partnatts; + char partstrategy; + oidvector *partclass; + int i; + Datum datum; + bool isnull; + MemoryContext oldcxt; + StrategyNumber stratno; + + tuple = SearchSysCache1(PARTITIONEDRELID, + RelationGetRelid(relation)); + + /* if no tuple found, it means the entry was just dropped */ + if (!HeapTupleIsValid(tuple)) + return; + + prelform = (Form_pg_partitioned_rel) GETSTRUCT(tuple); + partnatts = prelform->partnatts; + partstrategy = prelform->partstrategy; + /* Extract partclass from the pg_partitioned_rel tuple */ + datum = SysCacheGetAttr(PARTITIONEDRELID, tuple, + Anum_pg_partitioned_rel_partclass, &isnull); + Assert(!isnull); + partclass = (oidvector *) DatumGetPointer(datum); + + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + relation->rd_partnatts = partnatts; + relation->rd_partstrategy = partstrategy; + relation->rd_partattrs = (AttrNumber *) palloc(partnatts * sizeof(AttrNumber)); + relation->rd_partclass = (Oid *) palloc(partnatts * sizeof(Oid)); + relation->rd_partattcmpfn = (FmgrInfo *) palloc(partnatts * sizeof(FmgrInfo)); + + MemoryContextSwitchTo(oldcxt); + + switch(partstrategy) + { + case 'l': + stratno = BTEqualStrategyNumber; + break; + + case 'r': + stratno = BTLessStrategyNumber; + break; + } + + for(i = 0; i < partnatts; i++) + { + HeapTuple tuple; + Form_pg_opclass form; + AttrNumber partattno; + Oid cmpid; + Oid opfamily; + Oid opcintype; + Oid typid; + + relation->rd_partattrs[i] = (AttrNumber) prelform->partkey.values[i]; + relation->rd_partclass[i] = (Oid) partclass->values[i]; + + tuple = SearchSysCache(CLAOID, + ObjectIdGetDatum(relation->rd_partclass[i]), + 0, 0, 0); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for opclass %u", + relation->rd_partclass[i]); + form = (Form_pg_opclass) GETSTRUCT(tuple); + opfamily = form->opcfamily; + opcintype = form->opcintype; + ReleaseSysCache(tuple); + + partattno = relation->rd_partattrs[i]; + typid = relation->rd_att->attrs[partattno - 1]->atttypid; + + cmpid = get_opfamily_proc(opfamily, + (opcintype != InvalidOid ? opcintype : typid), + (opcintype != InvalidOid ? opcintype : typid), + BTORDER_PROC); + + fmgr_info(cmpid, &relation->rd_partattcmpfn[i]); + } + + ReleaseSysCache(tuple); +} + +static int +partition_cmp(const void *a, const void *b, void *arg) +{ + const Partition *lhs = *(const Partition **) a; + const Partition *rhs = *(const Partition **) b; + FmgrInfo *cmpfn = (FmgrInfo *) arg; + int i, result; + + for (i = 0; i < lhs->rangenbounds; i++) + { + result = DatumGetInt32(FunctionCall2Coll(&cmpfn[i], + DEFAULT_COLLATION_OID, + lhs->rangemaxs[i], rhs->rangemaxs[i])); + + /* consider multicolumn range partitions */ + if (!result) + continue; + else + return result; + } + + /* getting here means a partition being compared with itself */ + return 0; +} + +/* + * RelationBuildPartitions + * + * Initializes rd_partitions + */ +static void +RelationBuildPartitionInfo(Relation relation) +{ + MemoryContext picxt; + MemoryContext oldcxt = CurrentMemoryContext; + Partition **partitions = NULL; + PartitionInfo *volatile pinfo = NULL; + + /* + * Create a memory context to hold everything associated with this + * relation's partitions. This makes it easy to clean up during a + * relcache flush. + */ + picxt = AllocSetContextCreate(CacheMemoryContext, + "relation partition info", + ALLOCSET_SMALL_MINSIZE, + ALLOCSET_SMALL_INITSIZE, + ALLOCSET_SMALL_MAXSIZE); + + /* + * Since picxt lives under CacheMemoryContext, it is long-lived. Use + * a PG_TRY block to ensure it'll get freed if we fail partway through. + */ + PG_TRY(); + { + Relation partitionRel; + Form_pg_partition form; + SysScanDesc scan; + ScanKeyData skey[1]; + HeapTuple tuple; + Oid parentid; + List *partitionids = NIL; + ListCell *cell; + int partnatts = relation->rd_partnatts; + FmgrInfo *partcmpfn = relation->rd_partattcmpfn; + int numparts = 0; + int i, j; + + partitionRel = heap_open(PartitionRelationId, AccessShareLock); + ScanKeyInit(&skey[0], + Anum_pg_partition_partparent, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(relation))); + + scan = systable_beginscan(partitionRel, PartitionParentIndexId, true, + NULL, 1, skey); + + while ((tuple = systable_getnext(scan)) != NULL) + { + form = (Form_pg_partition) GETSTRUCT(tuple); + parentid = form->partparent; + + if (parentid == RelationGetRelid(relation)) + { + partitionids = lappend_oid(partitionids, form->partitionid); + numparts++; + } + } + systable_endscan(scan); + heap_close(partitionRel, AccessShareLock); + + /* partitions found */ + if(numparts) + { + /* read info about all the partitions from the catalog */ + partitions = (Partition **) palloc0(numparts * sizeof(Partition *)); + + i = 0; + foreach(cell, partitionids) + { + Oid partitionid = lfirst_oid(cell); + + partitions[i++] = GetPartition(relation, partitionid); + } + + /* + * Build a PartitionInfo to put into the relation descriptor + */ + oldcxt = MemoryContextSwitchTo(picxt); + pinfo = MemoryContextAllocZero(picxt, sizeof(PartitionInfo)); + pinfo->picxt = picxt; + + pinfo->numpartitions = numparts; + pinfo->strategy = relation->rd_partstrategy; + pinfo->oids = (Oid *) palloc0(numparts * sizeof(Oid)); + + switch(relation->rd_partstrategy) + { + case 'r': + for(i = 0; i < partnatts; i++) + { + pinfo->rangemins[i] = (Datum *) palloc0(numparts * sizeof(Datum)); + pinfo->rangemaxs[i] = (Datum *) palloc0(numparts * sizeof(Datum)); + } + + /* sort on rangemax using comparator partition_cmp() */ + qsort_arg(partitions, numparts, sizeof(Partition *), + partition_cmp, partcmpfn); + for(i = 0; i < numparts; i++) + { + pinfo->oids[i] = partitions[i]->oid; + + for(j = 0; j < partnatts; j++) + { + pinfo->rangemins[j][i] = partitions[i]->rangemins[j]; + pinfo->rangemaxs[j][i] = partitions[i]->rangemaxs[j]; + } + } + break; + + case 'l': + /* simply copy list partitions; no optimizations devised yet */ + pinfo->listnvalues = (int *) palloc0(numparts * sizeof(int)); + pinfo->listvalues = (Datum **) palloc0(numparts * sizeof(Datum *)); + for(i = 0; i < numparts; i++) + { + pinfo->oids[i] = partitions[i]->oid; + pinfo->listnvalues[i] = partitions[i]->listnvalues; + pinfo->listvalues[i] = partitions[i]->listvalues; + } + break; + + } /* switch(relation->rd_partstrategy) */ + + MemoryContextSwitchTo(oldcxt); + pfree(partitions); + } /* if(numparts) */ + + } + PG_CATCH(); + { + /* Delete rscxt, first making sure it isn't active */ + MemoryContextSwitchTo(oldcxt); + pfree(partitions); + MemoryContextDelete(picxt); + PG_RE_THROW(); + } + PG_END_TRY(); + + /* Success --- attach the partitions to the relation descriptor */ + relation->rd_partitioninfo = pinfo; +} /* * Routines to support ereport() reports of relation-related errors diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index bd27168..50528b7 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -47,6 +47,8 @@ #include "catalog/pg_opclass.h" #include "catalog/pg_operator.h" #include "catalog/pg_opfamily.h" +#include "catalog/pg_partition.h" +#include "catalog/pg_partitioned_rel.h" #include "catalog/pg_proc.h" #include "catalog/pg_range.h" #include "catalog/pg_rewrite.h" @@ -565,6 +567,28 @@ static const struct cachedesc cacheinfo[] = { }, 8 }, + {PartitionRelationId, /* PARTITIONID */ + PartitionIdIndexId, + 1, + { + Anum_pg_partition_partitionid, + 0, + 0, + 0 + }, + 64 + }, + {PartitionedRelRelationId, /* PARTITIONEDRELID */ + PartitionedRelrelidIndexId, + 1, + { + Anum_pg_partitioned_rel_partrelid, + 0, + 0, + 0 + }, + 64 + }, {ProcedureRelationId, /* PROCNAMEARGSNSP */ ProcedureNameArgsNspIndexId, 3, diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h index 6481ac8..d44c5f0 100644 --- a/src/include/catalog/dependency.h +++ b/src/include/catalog/dependency.h @@ -148,6 +148,8 @@ typedef enum ObjectClass OCLASS_EXTENSION, /* pg_extension */ OCLASS_EVENT_TRIGGER, /* pg_event_trigger */ OCLASS_POLICY, /* pg_policy */ + OCLASS_PARTITIONED_REL, /* pg_partitioned_rel */ + OCLASS_PARTITION, /* pg_partition */ MAX_OCLASS /* MUST BE LAST */ } ObjectClass; diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h index e5c204d..6f7c1a5 100644 --- a/src/include/catalog/heap.h +++ b/src/include/catalog/heap.h @@ -130,5 +130,14 @@ extern void CheckAttributeType(const char *attname, Oid atttypid, Oid attcollation, List *containing_rowtypes, bool allow_system_table_mods); - +extern void StorePartitionKey(Relation rel, int nattrs, + AttrNumber *partKeyAttrNumbers, + Oid *partClassOids, + char strategy); +extern void RemovePartitionKeyByRelId(Oid relid); +extern void RemovePartitionDefByRelId(Oid relid); +extern void StorePartitionValues(Relation childrel, + Relation parentrel, + int listnvalues, + Datum *datum); #endif /* HEAP_H */ diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h index a680229..fc77502 100644 --- a/src/include/catalog/indexing.h +++ b/src/include/catalog/indexing.h @@ -305,6 +305,15 @@ DECLARE_UNIQUE_INDEX(pg_policy_oid_index, 3257, on pg_policy using btree(oid oid DECLARE_UNIQUE_INDEX(pg_policy_polrelid_polname_index, 3258, on pg_policy using btree(polrelid oid_ops, polname name_ops)); #define PolicyPolrelidPolnameIndexId 3258 +DECLARE_UNIQUE_INDEX(pg_partitioned_rel_partrelid_index, 3278, on pg_partitioned_rel using btree(partrelid oid_ops)); +#define PartitionedRelrelidIndexId 3278 + +DECLARE_UNIQUE_INDEX(pg_partition_partitionid_index, 3280, on pg_partition using btree(partitionid oid_ops)); +#define PartitionIdIndexId 3280 + +DECLARE_INDEX(pg_partition_parent_index, 3281, on pg_partition using btree(partparent oid_ops)); +#define PartitionParentIndexId 3281 + /* last step of initialization script: build the indexes declared above */ BUILD_INDICES diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index 8b4c35c..7ae11a1 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -65,6 +65,7 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO bool relhasrules; /* has (or has had) any rules */ bool relhastriggers; /* has (or has had) any TRIGGERs */ bool relhassubclass; /* has (or has had) derived classes */ + bool relispartitioned; /* has (or has had) partition key */ bool relrowsecurity; /* row security is enabled or not */ bool relispopulated; /* matview currently holds query results */ char relreplident; /* see REPLICA_IDENTITY_xxx constants */ @@ -95,7 +96,7 @@ typedef FormData_pg_class *Form_pg_class; * ---------------- */ -#define Natts_pg_class 30 +#define Natts_pg_class 31 #define Anum_pg_class_relname 1 #define Anum_pg_class_relnamespace 2 #define Anum_pg_class_reltype 3 @@ -119,13 +120,14 @@ typedef FormData_pg_class *Form_pg_class; #define Anum_pg_class_relhasrules 21 #define Anum_pg_class_relhastriggers 22 #define Anum_pg_class_relhassubclass 23 -#define Anum_pg_class_relrowsecurity 24 -#define Anum_pg_class_relispopulated 25 -#define Anum_pg_class_relreplident 26 -#define Anum_pg_class_relfrozenxid 27 -#define Anum_pg_class_relminmxid 28 -#define Anum_pg_class_relacl 29 -#define Anum_pg_class_reloptions 30 +#define Anum_pg_class_relispartitioned 24 +#define Anum_pg_class_relrowsecurity 25 +#define Anum_pg_class_relispopulated 26 +#define Anum_pg_class_relreplident 27 +#define Anum_pg_class_relfrozenxid 28 +#define Anum_pg_class_relminmxid 29 +#define Anum_pg_class_relacl 30 +#define Anum_pg_class_reloptions 31 /* ---------------- * initial contents of pg_class @@ -140,13 +142,13 @@ typedef FormData_pg_class *Form_pg_class; * Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId; * similarly, "1" in relminmxid stands for FirstMultiXactId */ -DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f t n 3 1 _null_ _null_ )); +DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f f t n 3 1 _null_ _null_ )); DESCR(""); -DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 f f p r 21 0 f f f f f f t n 3 1 _null_ _null_ )); +DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 f f p r 21 0 f f f f f f f t n 3 1 _null_ _null_ )); DESCR(""); -DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 27 0 t f f f f f t n 3 1 _null_ _null_ )); +DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 27 0 t f f f f f f t n 3 1 _null_ _null_ )); DESCR(""); -DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f t n 3 1 _null_ _null_ )); +DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 f f p r 31 0 t f f f f f f t n 3 1 _null_ _null_ )); DESCR(""); diff --git a/src/include/catalog/pg_partition.h b/src/include/catalog/pg_partition.h new file mode 100644 index 0000000..46a69c2 --- /dev/null +++ b/src/include/catalog/pg_partition.h @@ -0,0 +1,61 @@ +/*------------------------------------------------------------------------- + * + * pg_partition.h + * definition of the system "partition" relation (pg_partition) + * along with the relation's initial contents. + * + * + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group + * + * $PostgreSQL: pgsql/src/include/catalog/pg_partition.h $ + * + * NOTES + * the genbki.sh script reads this file and generates .bki + * information from the DATA() statements. + * + *------------------------------------------------------------------------- + */ +#ifndef PG_PARTITION_H +#define PG_PARTITION_H + +#include "catalog/genbki.h" + +/* ---------------- + * pg_partitioned_rel definition. cpp turns this into + * typedef struct FormData_pg_partitioned_rel + * ---------------- + */ +#define PartitionRelationId 3279 + +CATALOG(pg_partition,3279) BKI_WITHOUT_OIDS +{ + Oid partitionid; /* partition oid */ + Oid partparent; /* parent oid */ + +#ifdef CATALOG_VARLEN /* variable-length fields start here */ + anyarray partlistvalues; /* list of allowed values of the only + * partition column */ + anyarray partrangebounds; /* list of bounds of ranges of + * allowed values per partition key + * column */ +#endif +} FormData_pg_partition; + +/* ---------------- + * Form_pg_partition corresponds to a pointer to a tuple with + * the format of pg_partition relation. + * ---------------- + */ +typedef FormData_pg_partition *Form_pg_partition; + +/* ---------------- + * compiler constants for pg_partition + * ---------------- + */ +#define Natts_pg_partition 4 +#define Anum_pg_partition_partitionid 1 +#define Anum_pg_partition_partparent 2 +#define Anum_pg_partition_partlistvalues 3 +#define Anum_pg_partition_partrangebounds 4 + +#endif /* PG_PARTITION_H */ diff --git a/src/include/catalog/pg_partition_fn.h b/src/include/catalog/pg_partition_fn.h new file mode 100644 index 0000000..2734e42 --- /dev/null +++ b/src/include/catalog/pg_partition_fn.h @@ -0,0 +1,34 @@ +/*------------------------------------------------------------------------- + * + * pg_partition_fn.h + * prototypes for functions in catalog/pg_partition.c + * + * + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/catalog/pg_inherits_fn.h + * + *------------------------------------------------------------------------- + */ +#ifndef PG_INHERITS_FN_H +#define PG_INHERITS_FN_H + +#include "nodes/pg_list.h" +#include "storage/lock.h" +#include "utils/rel.h" + +/* Bound info of a single partition */ +typedef struct Partition +{ + Oid oid; + int listnvalues; + int rangenbounds; + Datum *listvalues; + Datum rangemins[PARTITION_MAX_KEYS]; + Datum rangemaxs[PARTITION_MAX_KEYS]; +} Partition; + +extern Partition *GetPartition(Relation parentrel, Oid partitionid); + +#endif /* PG_INHERITS_FN_H */ diff --git a/src/include/catalog/pg_partitioned_rel.h b/src/include/catalog/pg_partitioned_rel.h new file mode 100644 index 0000000..5dd9d16 --- /dev/null +++ b/src/include/catalog/pg_partitioned_rel.h @@ -0,0 +1,62 @@ +/*------------------------------------------------------------------------- + * + * pg_partitioned_rel.h + * definition of the system "partitioned" relation (pg_partitioned_rel) + * along with the relation's initial contents. + * + * + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group + * + * $PostgreSQL: pgsql/src/include/catalog/pg_partitioned_rel.h $ + * + * NOTES + * the genbki.sh script reads this file and generates .bki + * information from the DATA() statements. + * + *------------------------------------------------------------------------- + */ +#ifndef PG_PARTITIONED_REL_H +#define PG_PARTITIONED_REL_H + +#include "catalog/genbki.h" + +/* ---------------- + * pg_partitioned_rel definition. cpp turns this into + * typedef struct FormData_pg_partitioned_rel + * ---------------- + */ +#define PartitionedRelRelationId 3277 + +CATALOG(pg_partitioned_rel,3277) BKI_WITHOUT_OIDS +{ + Oid partrelid; /* partitioned table oid */ + char partstrategy; /* partitioning strategy */ + int16 partnatts; /* number of partition columns */ + + /* variable-length fields start here, but we allow direct access to indkey */ + int2vector partkey; /* column numbers of partition columns */ + +#ifdef CATALOG_VARLEN + oidvector partclass; /* operator class to compare keys */ +#endif +} FormData_pg_partitioned_rel; + +/* ---------------- + * Form_pg_partitioned_rel corresponds to a pointer to a tuple with + * the format of pg_partitioned_rel relation. + * ---------------- + */ +typedef FormData_pg_partitioned_rel *Form_pg_partitioned_rel; + +/* ---------------- + * compiler constants for pg_partitioned_rel + * ---------------- + */ +#define Natts_pg_partitioned_rel 5 +#define Anum_pg_partitioned_rel_partrelid 1 +#define Anum_pg_partitioned_rel_partstrategy 2 +#define Anum_pg_partitioned_rel_partnatts 3 +#define Anum_pg_partitioned_rel_partkey 4 +#define Anum_pg_partitioned_rel_partclass 5 + +#endif /* PG_PARTITIONED_REL_H */ diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h index cf586fe..dd94810 100644 --- a/src/include/commands/defrem.h +++ b/src/include/commands/defrem.h @@ -40,6 +40,8 @@ extern bool CheckIndexCompatible(Oid oldId, List *attributeList, List *exclusionOpNames); extern Oid GetDefaultOpClass(Oid type_id, Oid am_id); +extern Oid GetIndexOpClass(List *opclass, Oid attrType, + const char *accessMethodName, Oid accessMethodId); /* commands/functioncmds.c */ extern Oid CreateFunction(CreateFunctionStmt *stmt, const char *queryString); diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 40fde83..a795b0d 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -369,5 +369,6 @@ extern void RegisterExprContextCallback(ExprContext *econtext, extern void UnregisterExprContextCallback(ExprContext *econtext, ExprContextCallbackFunction function, Datum arg); - +extern Relation ExecFindPartition(ResultRelInfo *resultRelInfo, + TupleTableSlot *slot); #endif /* EXECUTOR_H */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 97ef0fc..1eca560 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -413,6 +413,10 @@ typedef enum NodeTag T_XmlSerialize, T_WithClause, T_CommonTableExpr, + T_PartitionBy, + T_PartitionElem, + T_PartitionValues, + T_PartitionDef, /* * TAGS FOR REPLICATION GRAMMAR PARSE NODES (replnodes.h) diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index ac13302..a3abfcb 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -654,6 +654,62 @@ typedef struct XmlSerialize int location; /* token location, or -1 if unknown */ } XmlSerialize; +/* + * Partitioning related definitions + */ + +/* + * PartitionElem - partition key columns (used in PARTITION BY ... ON + * of CREATE TABLE) + * + * 'name' is the name of the table column to use as part of the partition + * key of the table and 'opclass' is the operator class to be used with + * this column. + */ +typedef struct PartitionElem +{ + NodeTag type; + char *name; /* name of column to partition on, or NULL */ + List *opclass; /* name of desired opclass; NIL = default */ +} PartitionElem; + +/* + * PartitionBy - partition key definition including the strategy + */ +#define PARTITION_STRAT_LIST 'l' +#define PARTITION_STRAT_RANGE 'r' + +typedef struct PartitionBy +{ + NodeTag type; + char strategy; + List *partcols; +} PartitionBy; + +/* + * PartitionValues - partition bounding values + * + * Currently, notions of list of values (for a single column) + * and range of values (for multiple columns) are supported + */ +typedef struct PartitionValues +{ + NodeTag type; + List *listvalues; + List *rangelbounds; + List *rangeubounds; +} PartitionValues; + +/* + * PartitionDef - a single partition definition + */ +typedef struct PartitionDef +{ + NodeTag type; + RangeVar *name; + RangeVar *parent; + PartitionValues *values; +} PartitionDef; /**************************************************************************** * Nodes for a Query tree @@ -1347,6 +1403,7 @@ typedef enum AlterTableType AT_ReplicaIdentity, /* REPLICA IDENTITY */ AT_EnableRowSecurity, /* ENABLE ROW SECURITY */ AT_DisableRowSecurity, /* DISABLE ROW SECURITY */ + AT_AttachPartition, /* PARTITION OF parent FOR VALUES */ AT_GenericOptions /* OPTIONS (...) */ } AlterTableType; @@ -1574,9 +1631,13 @@ typedef struct CreateStmt { NodeTag type; RangeVar *relation; /* relation to create */ + RangeVar *partitionOf; /* relation to create */ List *tableElts; /* column definitions (list of ColumnDef) */ List *inhRelations; /* relations to inherit from (list of * inhRelation) */ + PartitionValues *partValues; /* partition definition including + * the parent and bounding values */ + PartitionBy *partitionby; /* partition key definition */ TypeName *ofTypename; /* OF typename */ List *constraints; /* constraints (list of Constraint nodes) */ List *options; /* options from WITH clause */ diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index 7c243ec..26764ed 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -223,6 +223,7 @@ PG_KEYWORD("left", LEFT, TYPE_FUNC_NAME_KEYWORD) PG_KEYWORD("level", LEVEL, UNRESERVED_KEYWORD) PG_KEYWORD("like", LIKE, TYPE_FUNC_NAME_KEYWORD) PG_KEYWORD("limit", LIMIT, RESERVED_KEYWORD) +PG_KEYWORD("list", LIST, UNRESERVED_KEYWORD) PG_KEYWORD("listen", LISTEN, UNRESERVED_KEYWORD) PG_KEYWORD("load", LOAD, UNRESERVED_KEYWORD) PG_KEYWORD("local", LOCAL, UNRESERVED_KEYWORD) diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h index 3103b71..bb6e005 100644 --- a/src/include/parser/parse_node.h +++ b/src/include/parser/parse_node.h @@ -63,7 +63,8 @@ typedef enum ParseExprKind EXPR_KIND_INDEX_PREDICATE, /* index predicate */ EXPR_KIND_ALTER_COL_TRANSFORM, /* transform expr in ALTER COLUMN TYPE */ EXPR_KIND_EXECUTE_PARAMETER, /* parameter value in EXECUTE */ - EXPR_KIND_TRIGGER_WHEN /* WHEN condition in CREATE TRIGGER */ + EXPR_KIND_TRIGGER_WHEN, /* WHEN condition in CREATE TRIGGER */ + EXPR_KIND_PARTITION_VALUES /* FOR VALUES in CREATE TABLE ... PARTITION OF */ } ParseExprKind; diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h index 5cfc0ae..802022d 100644 --- a/src/include/pg_config_manual.h +++ b/src/include/pg_config_manual.h @@ -46,6 +46,12 @@ #define INDEX_MAX_KEYS 32 /* + * Maximum number of columns in a partition key. This is entirely arbitrary + * at this point + */ +#define PARTITION_MAX_KEYS 16 + +/* * Set the upper and lower bounds of sequence values. */ #define SEQ_MAXVALUE INT64CONST(0x7FFFFFFFFFFFFFFF) diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 6bd786d..6fa11e6 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -45,7 +45,6 @@ typedef struct LockInfoData typedef LockInfoData *LockInfo; - /* * Cached lookup information for the frequently used index access method * functions, defined by the pg_am row associated with an index relation. @@ -63,6 +62,18 @@ typedef struct RelationAmInfo FmgrInfo amcanreturn; } RelationAmInfo; +/* Details of partitions of a partitioned relation */ +typedef struct PartitionInfo +{ + MemoryContext picxt; + int numpartitions; + char strategy; + Oid *oids; + int *listnvalues; + Datum **listvalues; + Datum *rangemins[PARTITION_MAX_KEYS]; + Datum *rangemaxs[PARTITION_MAX_KEYS]; +} PartitionInfo; /* * Here are the contents of a relation cache entry. @@ -118,6 +129,17 @@ typedef struct RelationData Bitmapset *rd_keyattr; /* cols that can be ref'd by foreign keys */ Bitmapset *rd_idattr; /* included in replica identity index */ + /* Partition key info */ + int rd_partnatts; /* number of partition key attributes */ + char rd_partstrategy;/* list or range partitions */ + AttrNumber *rd_partattrs; /* partition key attributes */ + Oid *rd_partclass; /* OIDs of the opclass for each key attribute */ + FmgrInfo *rd_partattcmpfn; /* compare functions for each key attribute */ + + /* Partition info */ + PartitionInfo *rd_partitioninfo; /* Partitions making up this partitioned + * relation */ + /* * rd_options is set whenever rd_rel is loaded into the relcache entry. * Note that you can NOT look into rd_rel for this data. NULL means "use diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h index ba0b090..7377309 100644 --- a/src/include/utils/syscache.h +++ b/src/include/utils/syscache.h @@ -72,6 +72,8 @@ enum SysCacheIdentifier OPEROID, OPFAMILYAMNAMENSP, OPFAMILYOID, + PARTITIONID, + PARTITIONEDRELID, PROCNAMEARGSNSP, PROCOID, RANGETYPE,
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 33b172b..0cac472 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -42,6 +42,7 @@ #include "access/transam.h" #include "access/xact.h" #include "catalog/namespace.h" +#include "catalog/pg_collation.h" #include "commands/matview.h" #include "commands/trigger.h" #include "executor/execdebug.h" @@ -2702,3 +2703,317 @@ EvalPlanQualEnd(EPQState *epqstate) epqstate->planstate = NULL; epqstate->origslot = NULL; } + +/* + * ExecFindPartition - find a partition to insert a tuple (in slot) + * + * XXX - if not found, returns itself. Currently only good for the + * range strategy (including multi-column range strategy). + */ +Relation +ExecFindPartition(ResultRelInfo *resultRelInfo, TupleTableSlot *slot) +{ + Relation rel = resultRelInfo->ri_RelationDesc; + Relation partition; + + /* partition key and bound info */ + int partnatts = rel->rd_partnatts; + AttrNumber *partattrs = rel->rd_partattrs; + FmgrInfo *partattcmpfn = rel->rd_partattcmpfn; + PartitionInfo *pinfo = rel->rd_partitioninfo; + + FunctionCallInfoData fcinfo; + + /* input tuple to find a home for */ + Datum *values = slot->tts_values; + bool *isnull = slot->tts_isnull; + + AttrNumber attno; + AttrNumber finalattno; + int attidx; + int finalattidx; + + int least, highest; + int lobound, hibound; + int newlo, newhi; + int probe; + int cmpval; + bool bsearch_aborted; + + int j; + + /* let's get this out of the way */ + for(attidx = 0; attidx < partnatts; attidx++) + { + AttrNumber attno = partattrs[attidx]; + + if(isnull[attno - 1]) + ereport(ERROR, + (errcode(ERRCODE_NOT_NULL_VIOLATION), + errmsg("value in partition key column \"%s\" cannot be null", + NameStr(rel->rd_att->attrs[attno - 1]->attname)))); + } + + /* + * Let the binary search begin! + * The invariant is kind of complicated because of incorporating + * the logic for multi-column range strategy. + */ + attidx = 0; + finalattno = partattrs[0]; + finalattidx = attidx; + + least = lobound = 0; + highest = hibound = pinfo->numpartitions - 1; + + newlo = -1; + newhi = pinfo->numpartitions; + + bsearch_aborted = false; + while(lobound < hibound) + { + bool isless; + probe = (lobound + hibound) / 2; + + attno = partattrs[attidx]; + InitFunctionCallInfoData(fcinfo, &(partattcmpfn[attidx]), 2, + DEFAULT_COLLATION_OID, NULL, NULL); + /* tuple(attno) < rangemax(attidx)? */ + fcinfo.arg[0] = values[attno - 1]; + fcinfo.arg[1] = pinfo->rangemaxs[attidx][probe]; + cmpval = DatumGetInt32(FunctionCallInvoke(&fcinfo)); + + if(!cmpval) + { + /* + * tuple(attno) == rangemax(attidx). + */ + + /* + * The last attribute matching a rangemax means we are + * pretty close to the partition we're looking for - namely + * probe+1 + */ + if(attidx == partnatts - 1) + { + /* + * memorize to use after the binary search ends + */ + probe = probe + 1; + + if(probe > highest) + probe = highest; + + finalattidx = attidx; + finalattno = attno; + bsearch_aborted = true; + break; + } + + /* + * Before moving on to the next attribute, restrict the binary + * search space the sub-range where rangemax(attidx) is the + * same. The sub-range consists of partitions newlo..newhi + */ + + /* look left of probe */ + j = probe; + do + { + --j; + + if(j < 0) + break; + + fcinfo.arg[0] = values[attno - 1]; + fcinfo.arg[1] = pinfo->rangemaxs[attidx][j]; + cmpval = DatumGetInt32(FunctionCallInvoke(&fcinfo)); + } while(!cmpval); + + /* possibly new lobound */ + newlo = ++j; + + /* look right of probe */ + j = probe; + do + { + ++j; + + if(j > pinfo->numpartitions - 1) + break; + + fcinfo.arg[0] = values[attno - 1]; + fcinfo.arg[1] = pinfo->rangemaxs[attidx][j]; + cmpval = DatumGetInt32(FunctionCallInvoke(&fcinfo)); + } while(!cmpval); + + /* possibly new hibound */ + newhi = --j; + + /* + * We indeed are moving to our next binary search + */ + if(newlo != newhi) + { + attidx++; + lobound = newlo; + hibound = newhi; + } + } + else + { + /* + * tuple(attno) != rangemax(attidx). + * + * Move a level down in the current binary search + */ + isless = (cmpval < 0); + + if (isless) + hibound = probe; + else + lobound = probe + 1; + + /* + * Remember newlo, newhi are the sub-range bounds covering the + * range of values rangemax[attidx] for which rangemax[attidx-1] + * is the same. As determined when attidx was set to its current + * value. + * + * It is however possible that the desired partition may be the + * one just after the last one of the partitions newlo..newhi, + * or the one just before. + */ + if(lobound >= newhi) + { + /* + * Before concluding so, check if this is not the one + */ + probe = (lobound + hibound) / 2; + InitFunctionCallInfoData(fcinfo, &(partattcmpfn[attidx]), 2, + DEFAULT_COLLATION_OID, NULL, NULL); + fcinfo.arg[0] = values[attno - 1]; + fcinfo.arg[1] = pinfo->rangemaxs[attidx][probe]; + cmpval = DatumGetInt32(FunctionCallInvoke(&fcinfo)); + + if(cmpval < 0) + { + InitFunctionCallInfoData(fcinfo, + &(partattcmpfn[finalattidx]), + 2, DEFAULT_COLLATION_OID, NULL, NULL); + + fcinfo.arg[0] = values[attno - 1]; + fcinfo.arg[1] = pinfo->rangemins[attidx][probe]; + + cmpval = DatumGetInt32(FunctionCallInvoke(&fcinfo)); + + if(cmpval >= 0) + { + /* Yes, this is it, return. */ + partition = heap_open(pinfo->oids[probe], RowExclusiveLock); + return partition; + } + } + + /* + * We are about to abort the bsearch. + * Reset attidx. + */ + attidx = 0; + attno = partattrs[attidx]; + probe = newhi + 1; + + if(probe > highest) + probe = highest; + + /* + * memorize to use after the binary search ends + */ + finalattidx = attidx; + finalattno = attno; + bsearch_aborted = true; + break; + } + else if(hibound <= newlo) + { + /* + * Before concluding so, check if this is not the one + */ + probe = (lobound + hibound) / 2; + InitFunctionCallInfoData(fcinfo, &(partattcmpfn[finalattidx]), + 2, DEFAULT_COLLATION_OID, NULL, NULL); + + fcinfo.arg[0] = values[attno - 1]; + fcinfo.arg[1] = pinfo->rangemins[attidx][probe]; + + cmpval = DatumGetInt32(FunctionCallInvoke(&fcinfo)); + + if(cmpval >= 0) + { + /* Yes, this is it, return. */ + partition = heap_open(pinfo->oids[probe], RowExclusiveLock); + return partition; + } + + /* + * We're about to abort the bsearch. + * Reset attidx. + */ + attidx = 0; + attno = partattrs[attidx]; + probe = newlo - 1; + + if(probe < least) + probe = least; + + /* + * memorize to use after the binary search ends + */ + finalattidx = attidx; + finalattno = attno; + bsearch_aborted = true; + break; + } + } + + /* + * memorize to use after the binary search ends + */ + finalattidx = attidx; + finalattno = attno; + } + + if(!bsearch_aborted) + probe = (lobound + hibound) / 2; + + /* tuple < rangemaxs(probe)? */ + InitFunctionCallInfoData(fcinfo, &(partattcmpfn[finalattidx]), 2, + DEFAULT_COLLATION_OID, NULL, NULL); + + fcinfo.arg[0] = values[finalattno - 1]; + fcinfo.arg[1] = pinfo->rangemaxs[finalattidx][probe]; + cmpval = DatumGetInt32(FunctionCallInvoke(&fcinfo)); + + /* + * So, tuple < rangemaxs(probe). + * Is tuple >= rangemins(probe)? + */ + if(cmpval < 0) + { + InitFunctionCallInfoData(fcinfo, &(partattcmpfn[finalattidx]), 2, + DEFAULT_COLLATION_OID, NULL, NULL); + + fcinfo.arg[0] = values[finalattno - 1]; + fcinfo.arg[1] = pinfo->rangemins[finalattidx][probe]; + + cmpval = DatumGetInt32(FunctionCallInvoke(&fcinfo)); + + if(cmpval >= 0) + { + partition = heap_open(pinfo->oids[probe], RowExclusiveLock); + return partition; + } + } + + return rel; +} diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index f96fb24..5cd24f5 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -168,7 +168,9 @@ ExecInsert(TupleTableSlot *slot, { HeapTuple tuple; ResultRelInfo *resultRelInfo; + ResultRelInfo *saved_resultRelInfo; Relation resultRelationDesc; + Relation saved_resultRelationDesc; Oid newId; List *recheckIndexes = NIL; @@ -259,6 +261,33 @@ ExecInsert(TupleTableSlot *slot, ExecConstraints(resultRelInfo, slot, estate); /* + * a hack/idea to route tuples to a valid partition + * if none found, resultRelationDesc remains unchanged + * + * XXX - should this be before ExecConstraints()? + */ + saved_resultRelationDesc = resultRelationDesc; + saved_resultRelInfo = resultRelInfo; + if(resultRelationDesc->rd_rel->relispartitioned) + { + resultRelationDesc = ExecFindPartition(resultRelInfo, slot); + + /* for ExecInsertIndexTuples() */ + if(resultRelationDesc != saved_resultRelationDesc) + { + resultRelInfo = makeNode(ResultRelInfo); + InitResultRelInfo(resultRelInfo, + resultRelationDesc, + 1, + 0); + + ExecOpenIndices(resultRelInfo); + + estate->es_result_relation_info = resultRelInfo; + } + } + + /* * insert the tuple * * Note: heap_insert returns the tid (location) of the new tuple in @@ -273,6 +302,17 @@ ExecInsert(TupleTableSlot *slot, if (resultRelInfo->ri_NumIndices > 0) recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), estate); + + /* close the partition heap and reset estate */ + if(resultRelationDesc != saved_resultRelationDesc) + { + pfree(resultRelInfo); + heap_close(resultRelationDesc, RowExclusiveLock); + estate->es_result_relation_info = saved_resultRelInfo; + resultRelInfo = saved_resultRelInfo; + } + + resultRelationDesc = saved_resultRelationDesc; } if (canSetTag)
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers