Well, I was regretting missing the deadline for this CommitFest and then realized today was only the 14th, so I finished this up while the kids were napping.
I ended up not reusing the reloptions.c code. It looks like a lot of extra complexity for no obvious benefit, considering that there is no equivalent of AMs for tablespaces and therefore no need to support AM-specific options. I did reuse the reloptions syntax, and I think the internal representation could always be redone later, if we find that there's a use case for something more complicated. ...Robert
*** a/doc/src/sgml/config.sgml --- b/doc/src/sgml/config.sgml *************** *** 1935,1940 **** archive_command = 'copy "%p" "C:\\server\\archivedir\\%f"' # Windows --- 1935,1943 ---- <para> Sets the planner's estimate of the cost of a disk page fetch that is part of a series of sequential fetches. The default is 1.0. + This value can be overriden for a particular tablespace by setting + the tablespace parameter of the same name + (see <xref linkend="sql-altertablespace">). </para> </listitem> </varlistentry> *************** *** 1948,1953 **** archive_command = 'copy "%p" "C:\\server\\archivedir\\%f"' # Windows --- 1951,1962 ---- <para> Sets the planner's estimate of the cost of a non-sequentially-fetched disk page. The default is 4.0. + This value can be overriden for a particular tablespace by setting + the tablespace parameter of the same name + (see <xref linkend="sql-altertablespace">). + </para> + + <para> Reducing this value relative to <varname>seq_page_cost</> will cause the system to prefer index scans; raising it will make index scans look relatively more expensive. You can raise *** a/doc/src/sgml/ref/alter_tablespace.sgml --- b/doc/src/sgml/ref/alter_tablespace.sgml *************** *** 23,28 **** PostgreSQL documentation --- 23,30 ---- <synopsis> ALTER TABLESPACE <replaceable>name</replaceable> RENAME TO <replaceable>new_name</replaceable> ALTER TABLESPACE <replaceable>name</replaceable> OWNER TO <replaceable>new_owner</replaceable> + ALTER TABLESPACE <replaceable>name</replaceable> SET ( <replaceable class="PARAMETER">tablespace_option</replaceable> = <replaceable class="PARAMETER">value</replaceable> [, ... ] ) + ALTER TABLESPACE <replaceable>name</replaceable> RESET ( <replaceable class="PARAMETER">tablespace_option</replaceable> [, ... ] ) </synopsis> </refsynopsisdiv> *************** *** 74,79 **** ALTER TABLESPACE <replaceable>name</replaceable> OWNER TO <replaceable>new_owner --- 76,99 ---- </para> </listitem> </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">tablespace_parameter</replaceable></term> + <listitem> + <para> + A tablespace parameter to be set or reset. Currently, the only + available parameters are <varname>seq_page_cost</> and + <varname>random_page_cost</>. Setting either value for a particular + tablespace will override the planner's usual estimate of the cost of + reading pages from tables in that tablespace, as established by + the configuration parameters of the same name (see + <xref linkend="guc-seq-page-cost">, + <xref linkend="guc-random-page-cost">). This may be useful if one + tablespace is located on a disk which is faster or slower than the + remainder of the I/O subsystem. + </para> + </listitem> + </varlistentry> </variablelist> </refsect1> *** a/src/backend/catalog/aclchk.c --- b/src/backend/catalog/aclchk.c *************** *** 2621,2638 **** ExecGrant_Tablespace(InternalGrant *istmt) int nnewmembers; Oid *oldmembers; Oid *newmembers; - ScanKeyData entry[1]; - SysScanDesc scan; HeapTuple tuple; ! /* There's no syscache for pg_tablespace, so must look the hard way */ ! ScanKeyInit(&entry[0], ! ObjectIdAttributeNumber, ! BTEqualStrategyNumber, F_OIDEQ, ! ObjectIdGetDatum(tblId)); ! scan = systable_beginscan(relation, TablespaceOidIndexId, true, ! SnapshotNow, 1, entry); ! tuple = systable_getnext(scan); if (!HeapTupleIsValid(tuple)) elog(ERROR, "cache lookup failed for tablespace %u", tblId); --- 2621,2631 ---- int nnewmembers; Oid *oldmembers; Oid *newmembers; HeapTuple tuple; ! /* Search syscache for pg_tablespace */ ! tuple = SearchSysCache(TABLESPACEOID, ObjectIdGetDatum(tblId), ! 0, 0, 0); if (!HeapTupleIsValid(tuple)) elog(ERROR, "cache lookup failed for tablespace %u", tblId); *************** *** 2703,2709 **** ExecGrant_Tablespace(InternalGrant *istmt) noldmembers, oldmembers, nnewmembers, newmembers); ! systable_endscan(scan); pfree(new_acl); --- 2696,2702 ---- noldmembers, oldmembers, nnewmembers, newmembers); ! ReleaseSysCache(tuple); pfree(new_acl); *************** *** 3443,3451 **** pg_tablespace_aclmask(Oid spc_oid, Oid roleid, AclMode mask, AclMaskHow how) { AclMode result; - Relation pg_tablespace; - ScanKeyData entry[1]; - SysScanDesc scan; HeapTuple tuple; Datum aclDatum; bool isNull; --- 3436,3441 ---- *************** *** 3458,3474 **** pg_tablespace_aclmask(Oid spc_oid, Oid roleid, /* * Get the tablespace's ACL from pg_tablespace ! * ! * There's no syscache for pg_tablespace, so must look the hard way ! */ ! pg_tablespace = heap_open(TableSpaceRelationId, AccessShareLock); ! ScanKeyInit(&entry[0], ! ObjectIdAttributeNumber, ! BTEqualStrategyNumber, F_OIDEQ, ! ObjectIdGetDatum(spc_oid)); ! scan = systable_beginscan(pg_tablespace, TablespaceOidIndexId, true, ! SnapshotNow, 1, entry); ! tuple = systable_getnext(scan); if (!HeapTupleIsValid(tuple)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), --- 3448,3456 ---- /* * Get the tablespace's ACL from pg_tablespace ! */ ! tuple = SearchSysCache(TABLESPACEOID, ObjectIdGetDatum(spc_oid), ! 0, 0, 0); if (!HeapTupleIsValid(tuple)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), *************** *** 3476,3483 **** pg_tablespace_aclmask(Oid spc_oid, Oid roleid, ownerId = ((Form_pg_tablespace) GETSTRUCT(tuple))->spcowner; ! aclDatum = heap_getattr(tuple, Anum_pg_tablespace_spcacl, ! RelationGetDescr(pg_tablespace), &isNull); if (isNull) { --- 3458,3466 ---- ownerId = ((Form_pg_tablespace) GETSTRUCT(tuple))->spcowner; ! aclDatum = SysCacheGetAttr(TABLESPACEOID, tuple, ! Anum_pg_tablespace_spcacl, ! &isNull); if (isNull) { *************** *** 3497,3504 **** pg_tablespace_aclmask(Oid spc_oid, Oid roleid, if (acl && (Pointer) acl != DatumGetPointer(aclDatum)) pfree(acl); ! systable_endscan(scan); ! heap_close(pg_tablespace, AccessShareLock); return result; } --- 3480,3486 ---- if (acl && (Pointer) acl != DatumGetPointer(aclDatum)) pfree(acl); ! ReleaseSysCache(tuple); return result; } *************** *** 4025,4033 **** pg_namespace_ownercheck(Oid nsp_oid, Oid roleid) bool pg_tablespace_ownercheck(Oid spc_oid, Oid roleid) { - Relation pg_tablespace; - ScanKeyData entry[1]; - SysScanDesc scan; HeapTuple spctuple; Oid spcowner; --- 4007,4012 ---- *************** *** 4035,4051 **** pg_tablespace_ownercheck(Oid spc_oid, Oid roleid) if (superuser_arg(roleid)) return true; ! /* There's no syscache for pg_tablespace, so must look the hard way */ ! pg_tablespace = heap_open(TableSpaceRelationId, AccessShareLock); ! ScanKeyInit(&entry[0], ! ObjectIdAttributeNumber, ! BTEqualStrategyNumber, F_OIDEQ, ! ObjectIdGetDatum(spc_oid)); ! scan = systable_beginscan(pg_tablespace, TablespaceOidIndexId, true, ! SnapshotNow, 1, entry); ! ! spctuple = systable_getnext(scan); ! if (!HeapTupleIsValid(spctuple)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), --- 4014,4022 ---- if (superuser_arg(roleid)) return true; ! /* Search syscache for pg_tablespace */ ! spctuple = SearchSysCache(TABLESPACEOID, ObjectIdGetDatum(spc_oid), ! 0, 0, 0); if (!HeapTupleIsValid(spctuple)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), *************** *** 4053,4060 **** pg_tablespace_ownercheck(Oid spc_oid, Oid roleid) spcowner = ((Form_pg_tablespace) GETSTRUCT(spctuple))->spcowner; ! systable_endscan(scan); ! heap_close(pg_tablespace, AccessShareLock); return has_privs_of_role(roleid, spcowner); } --- 4024,4030 ---- spcowner = ((Form_pg_tablespace) GETSTRUCT(spctuple))->spcowner; ! ReleaseSysCache(spctuple); return has_privs_of_role(roleid, spcowner); } *** a/src/backend/commands/tablespace.c --- b/src/backend/commands/tablespace.c *************** *** 56,61 **** --- 56,62 ---- #include "catalog/indexing.h" #include "catalog/pg_tablespace.h" #include "commands/comment.h" + #include "commands/defrem.h" #include "commands/tablespace.h" #include "miscadmin.h" #include "postmaster/bgwriter.h" *************** *** 77,82 **** char *temp_tablespaces = NULL; --- 78,84 ---- static bool remove_tablespace_directories(Oid tablespaceoid, bool redo); static void set_short_version(const char *path); + static double interpret_page_cost(DefElem *opt); /* *************** *** 284,289 **** CreateTableSpace(CreateTableSpaceStmt *stmt) --- 286,295 ---- DirectFunctionCall1(namein, CStringGetDatum(stmt->tablespacename)); values[Anum_pg_tablespace_spcowner - 1] = ObjectIdGetDatum(ownerId); + values[Anum_pg_tablespace_spcseq_page_cost - 1] = + Float8GetDatum(-1.0); + values[Anum_pg_tablespace_spcrandom_page_cost - 1] = + Float8GetDatum(-1.0); values[Anum_pg_tablespace_spclocation - 1] = CStringGetTextDatum(location); nulls[Anum_pg_tablespace_spcacl - 1] = true; *************** *** 910,915 **** AlterTableSpaceOwner(const char *name, Oid newOwnerId) --- 916,1029 ---- /* + * Alter table space options + */ + void + AlterTableSpace(AlterTableSpaceStmt *stmt) + { + Relation rel; + ScanKeyData entry[1]; + HeapScanDesc scandesc; + HeapTuple tup; + Datum repl_val[Natts_pg_tablespace]; + bool repl_null[Natts_pg_tablespace]; + bool repl_repl[Natts_pg_tablespace]; + HeapTuple newtuple; + ListCell *lc; + + /* Search pg_tablespace */ + rel = heap_open(TableSpaceRelationId, RowExclusiveLock); + + ScanKeyInit(&entry[0], + Anum_pg_tablespace_spcname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(stmt->tablespacename)); + scandesc = heap_beginscan(rel, SnapshotNow, 1, entry); + tup = heap_getnext(scandesc, ForwardScanDirection); + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("tablespace \"%s\" does not exist", + stmt->tablespacename))); + + /* Must be owner of the existing object */ + if (!pg_tablespace_ownercheck(HeapTupleGetOid(tup), GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_TABLESPACE, + stmt->tablespacename); + + /* Prepare to build new tuple. */ + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + + /* Parse options list. */ + foreach(lc, stmt->options) + { + DefElem *opt = (DefElem *) lfirst(lc); + + if (strcmp(opt->defname, "seq_page_cost") == 0) + { + double newval = interpret_page_cost(opt); + repl_repl[Anum_pg_tablespace_spcseq_page_cost - 1] = true; + repl_val[Anum_pg_tablespace_spcseq_page_cost - 1] = + Float8GetDatum(newval); + } + else if (strcmp(opt->defname, "random_page_cost") == 0) + { + double newval = interpret_page_cost(opt); + repl_repl[Anum_pg_tablespace_spcrandom_page_cost - 1] = true; + repl_val[Anum_pg_tablespace_spcrandom_page_cost - 1] = + Float8GetDatum(newval); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognized parameter \"%s\"", opt->defname))); + } + } + + /* Update system catalog. */ + newtuple = heap_modify_tuple(tup, RelationGetDescr(rel), repl_val, + repl_null, repl_repl); + simple_heap_update(rel, &newtuple->t_self, newtuple); + CatalogUpdateIndexes(rel, newtuple); + heap_freetuple(newtuple); + + /* Conclude heap scan. */ + heap_endscan(scandesc); + heap_close(rel, NoLock); + } + + /* + * Friendly helper function for making sense of page cost parameters. + */ + static double + interpret_page_cost(DefElem *opt) + { + double newval; + + if (opt->defaction == DEFELEM_DROP) + { + if (opt->arg != NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("RESET must not include values for parameters"))); + newval = -1.0; + } + else + { + newval = defGetNumeric(opt); + if (newval < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("%g is outside the valid range for parameter \"%s\" (%g .. %g)", + newval, opt->defname, 0.0, DBL_MAX))); + } + + return newval; + } + + /* * Routines for handling the GUC variable 'default_tablespace'. */ *** a/src/backend/nodes/copyfuncs.c --- b/src/backend/nodes/copyfuncs.c *************** *** 3058,3063 **** _copyDropTableSpaceStmt(DropTableSpaceStmt *from) --- 3058,3074 ---- return newnode; } + static AlterTableSpaceStmt * + _copyAlterTableSpaceStmt(AlterTableSpaceStmt *from) + { + AlterTableSpaceStmt *newnode = makeNode(AlterTableSpaceStmt); + + COPY_STRING_FIELD(tablespacename); + COPY_NODE_FIELD(options); + + return newnode; + } + static CreateFdwStmt * _copyCreateFdwStmt(CreateFdwStmt *from) { *************** *** 4021,4026 **** copyObject(void *from) --- 4032,4040 ---- case T_DropTableSpaceStmt: retval = _copyDropTableSpaceStmt(from); break; + case T_AlterTableSpaceStmt: + retval = _copyAlterTableSpaceStmt(from); + break; case T_CreateFdwStmt: retval = _copyCreateFdwStmt(from); break; *** a/src/backend/nodes/equalfuncs.c --- b/src/backend/nodes/equalfuncs.c *************** *** 1569,1574 **** _equalDropTableSpaceStmt(DropTableSpaceStmt *a, DropTableSpaceStmt *b) --- 1569,1583 ---- } static bool + _equalAlterTableSpaceStmt(AlterTableSpaceStmt *a, AlterTableSpaceStmt *b) + { + COMPARE_STRING_FIELD(tablespacename); + COMPARE_NODE_FIELD(options); + + return true; + } + + static bool _equalCreateFdwStmt(CreateFdwStmt *a, CreateFdwStmt *b) { COMPARE_STRING_FIELD(fdwname); *************** *** 2714,2719 **** equal(void *a, void *b) --- 2723,2731 ---- case T_DropTableSpaceStmt: retval = _equalDropTableSpaceStmt(a, b); break; + case T_AlterTableSpaceStmt: + retval = _equalAlterTableSpaceStmt(a, b); + break; case T_CreateFdwStmt: retval = _equalCreateFdwStmt(a, b); break; *** a/src/backend/nodes/outfuncs.c --- b/src/backend/nodes/outfuncs.c *************** *** 1585,1590 **** _outRelOptInfo(StringInfo str, RelOptInfo *node) --- 1585,1591 ---- WRITE_NODE_FIELD(cheapest_total_path); WRITE_NODE_FIELD(cheapest_unique_path); WRITE_UINT_FIELD(relid); + WRITE_UINT_FIELD(reltablespace); WRITE_ENUM_FIELD(rtekind, RTEKind); WRITE_INT_FIELD(min_attr); WRITE_INT_FIELD(max_attr); *** a/src/backend/optimizer/path/costsize.c --- b/src/backend/optimizer/path/costsize.c *************** *** 27,32 **** --- 27,37 ---- * detail. Note that all of these parameters are user-settable, in case * the default values are drastically off for a particular platform. * + * seq_page_cost and random_page_cost can also be overridden for an individual + * tablespace, in case some data is on a fast disk and other data is on a slow + * disk. Per-tablespace overrides never apply to temporary work files such as + * an external sort or a materialize node that overflows work_mem. + * * We compute two separate costs for each path: * total_cost: total estimated cost to fetch all tuples * startup_cost: cost that is expended before first tuple is fetched *************** *** 164,169 **** void --- 169,175 ---- cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel) { + double spc_seq_page_cost; Cost startup_cost = 0; Cost run_cost = 0; Cost cpu_per_tuple; *************** *** 175,184 **** cost_seqscan(Path *path, PlannerInfo *root, if (!enable_seqscan) startup_cost += disable_cost; /* * disk costs */ ! run_cost += seq_page_cost * baserel->pages; /* CPU costs */ startup_cost += baserel->baserestrictcost.startup; --- 181,195 ---- if (!enable_seqscan) startup_cost += disable_cost; + /* fetch estimated page cost for tablespace containing table */ + get_tablespace_page_costs(baserel->reltablespace, + NULL, + &spc_seq_page_cost); + /* * disk costs */ ! run_cost += spc_seq_page_cost * baserel->pages; /* CPU costs */ startup_cost += baserel->baserestrictcost.startup; *************** *** 226,231 **** cost_index(IndexPath *path, PlannerInfo *root, --- 237,244 ---- Selectivity indexSelectivity; double indexCorrelation, csquared; + double spc_seq_page_cost, + spc_random_page_cost; Cost min_IO_cost, max_IO_cost; Cost cpu_per_tuple; *************** *** 272,284 **** cost_index(IndexPath *path, PlannerInfo *root, /* estimate number of main-table tuples fetched */ tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples); /*---------- * Estimate number of main-table pages fetched, and compute I/O cost. * * When the index ordering is uncorrelated with the table ordering, * we use an approximation proposed by Mackert and Lohman (see * index_pages_fetched() for details) to compute the number of pages ! * fetched, and then charge random_page_cost per page fetched. * * When the index ordering is exactly correlated with the table ordering * (just after a CLUSTER, for example), the number of pages fetched should --- 285,302 ---- /* estimate number of main-table tuples fetched */ tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples); + /* fetch estimated page costs for tablespace containing table */ + get_tablespace_page_costs(baserel->reltablespace, + &spc_random_page_cost, + &spc_seq_page_cost); + /*---------- * Estimate number of main-table pages fetched, and compute I/O cost. * * When the index ordering is uncorrelated with the table ordering, * we use an approximation proposed by Mackert and Lohman (see * index_pages_fetched() for details) to compute the number of pages ! * fetched, and then charge spc_random_page_cost per page fetched. * * When the index ordering is exactly correlated with the table ordering * (just after a CLUSTER, for example), the number of pages fetched should *************** *** 286,292 **** cost_index(IndexPath *path, PlannerInfo *root, * will be sequential fetches, not the random fetches that occur in the * uncorrelated case. So if the number of pages is more than 1, we * ought to charge ! * random_page_cost + (pages_fetched - 1) * seq_page_cost * For partially-correlated indexes, we ought to charge somewhere between * these two estimates. We currently interpolate linearly between the * estimates based on the correlation squared (XXX is that appropriate?). --- 304,310 ---- * will be sequential fetches, not the random fetches that occur in the * uncorrelated case. So if the number of pages is more than 1, we * ought to charge ! * spc_random_page_cost + (pages_fetched - 1) * spc_seq_page_cost * For partially-correlated indexes, we ought to charge somewhere between * these two estimates. We currently interpolate linearly between the * estimates based on the correlation squared (XXX is that appropriate?). *************** *** 309,315 **** cost_index(IndexPath *path, PlannerInfo *root, (double) index->pages, root); ! max_IO_cost = (pages_fetched * random_page_cost) / num_scans; /* * In the perfectly correlated case, the number of pages touched by --- 327,333 ---- (double) index->pages, root); ! max_IO_cost = (pages_fetched * spc_random_page_cost) / num_scans; /* * In the perfectly correlated case, the number of pages touched by *************** *** 328,334 **** cost_index(IndexPath *path, PlannerInfo *root, (double) index->pages, root); ! min_IO_cost = (pages_fetched * random_page_cost) / num_scans; } else { --- 346,352 ---- (double) index->pages, root); ! min_IO_cost = (pages_fetched * spc_random_page_cost) / num_scans; } else { *************** *** 342,354 **** cost_index(IndexPath *path, PlannerInfo *root, root); /* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */ ! max_IO_cost = pages_fetched * random_page_cost; /* min_IO_cost is for the perfectly correlated case (csquared=1) */ pages_fetched = ceil(indexSelectivity * (double) baserel->pages); ! min_IO_cost = random_page_cost; if (pages_fetched > 1) ! min_IO_cost += (pages_fetched - 1) * seq_page_cost; } /* --- 360,372 ---- root); /* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */ ! max_IO_cost = pages_fetched * spc_random_page_cost; /* min_IO_cost is for the perfectly correlated case (csquared=1) */ pages_fetched = ceil(indexSelectivity * (double) baserel->pages); ! min_IO_cost = spc_random_page_cost; if (pages_fetched > 1) ! min_IO_cost += (pages_fetched - 1) * spc_seq_page_cost; } /* *************** *** 553,558 **** cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, --- 571,578 ---- Cost cost_per_page; double tuples_fetched; double pages_fetched; + double spc_seq_page_cost, + spc_random_page_cost; double T; /* Should only be applied to base relations */ *************** *** 571,576 **** cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, --- 591,601 ---- startup_cost += indexTotalCost; + /* Fetch estimated page costs for tablespace containing table. */ + get_tablespace_page_costs(baserel->reltablespace, + &spc_random_page_cost, + &spc_seq_page_cost); + /* * Estimate number of main-table pages fetched. */ *************** *** 609,625 **** cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, pages_fetched = ceil(pages_fetched); /* ! * For small numbers of pages we should charge random_page_cost apiece, * while if nearly all the table's pages are being read, it's more ! * appropriate to charge seq_page_cost apiece. The effect is nonlinear, * too. For lack of a better idea, interpolate like this to determine the * cost per page. */ if (pages_fetched >= 2.0) ! cost_per_page = random_page_cost - ! (random_page_cost - seq_page_cost) * sqrt(pages_fetched / T); else ! cost_per_page = random_page_cost; run_cost += pages_fetched * cost_per_page; --- 634,651 ---- pages_fetched = ceil(pages_fetched); /* ! * For small numbers of pages we should charge spc_random_page_cost apiece, * while if nearly all the table's pages are being read, it's more ! * appropriate to charge spc_seq_page_cost apiece. The effect is nonlinear, * too. For lack of a better idea, interpolate like this to determine the * cost per page. */ if (pages_fetched >= 2.0) ! cost_per_page = spc_random_page_cost - ! (spc_random_page_cost - spc_seq_page_cost) ! * sqrt(pages_fetched / T); else ! cost_per_page = spc_random_page_cost; run_cost += pages_fetched * cost_per_page; *************** *** 783,788 **** cost_tidscan(Path *path, PlannerInfo *root, --- 809,815 ---- QualCost tid_qual_cost; int ntuples; ListCell *l; + double spc_random_page_cost; /* Should only be applied to base relations */ Assert(baserel->relid > 0); *************** *** 835,842 **** cost_tidscan(Path *path, PlannerInfo *root, */ cost_qual_eval(&tid_qual_cost, tidquals, root); /* disk costs --- assume each tuple on a different page */ ! run_cost += random_page_cost * ntuples; /* CPU costs */ startup_cost += baserel->baserestrictcost.startup + --- 862,874 ---- */ cost_qual_eval(&tid_qual_cost, tidquals, root); + /* fetch estimated page cost for tablespace containing table */ + get_tablespace_page_costs(baserel->reltablespace, + &spc_random_page_cost, + NULL); + /* disk costs --- assume each tuple on a different page */ ! run_cost += spc_random_page_cost * ntuples; /* CPU costs */ startup_cost += baserel->baserestrictcost.startup + *** a/src/backend/optimizer/util/plancat.c --- b/src/backend/optimizer/util/plancat.c *************** *** 91,96 **** get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, --- 91,97 ---- rel->min_attr = FirstLowInvalidHeapAttributeNumber + 1; rel->max_attr = RelationGetNumberOfAttributes(relation); + rel->reltablespace = RelationGetForm(relation)->reltablespace; Assert(rel->max_attr >= rel->min_attr); rel->attr_needed = (Relids *) *************** *** 183,188 **** get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, --- 184,191 ---- info = makeNode(IndexOptInfo); info->indexoid = index->indexrelid; + info->reltablespace = + RelationGetForm(indexRelation)->reltablespace; info->rel = rel; info->ncolumns = ncolumns = index->indnatts; *** a/src/backend/parser/gram.y --- b/src/backend/parser/gram.y *************** *** 5622,5627 **** RenameStmt: ALTER AGGREGATE func_name aggr_args RENAME TO name --- 5622,5647 ---- n->newname = $6; $$ = (Node *)n; } + | ALTER TABLESPACE name SET reloptions + { + AlterTableSpaceStmt *n = makeNode(AlterTableSpaceStmt); + n->tablespacename = $3; + n->options = $5; + $$ = (Node *)n; + } + | ALTER TABLESPACE name RESET reloptions + { + AlterTableSpaceStmt *n = makeNode(AlterTableSpaceStmt); + ListCell *lc; + n->tablespacename = $3; + n->options = $5; + foreach (lc, n->options) + { + DefElem *def = lfirst(lc); + def->defaction = DEFELEM_DROP; + } + $$ = (Node *)n; + } | ALTER TEXT_P SEARCH PARSER any_name RENAME TO name { RenameStmt *n = makeNode(RenameStmt); *** a/src/backend/tcop/utility.c --- b/src/backend/tcop/utility.c *************** *** 214,219 **** check_xact_readonly(Node *parsetree) --- 214,220 ---- case T_CreateUserMappingStmt: case T_AlterUserMappingStmt: case T_DropUserMappingStmt: + case T_AlterTableSpaceStmt: ereport(ERROR, (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION), errmsg("transaction is read-only"))); *************** *** 480,485 **** ProcessUtility(Node *parsetree, --- 481,490 ---- DropTableSpace((DropTableSpaceStmt *) parsetree); break; + case T_AlterTableSpaceStmt: + AlterTableSpace((AlterTableSpaceStmt *) parsetree); + break; + case T_CreateFdwStmt: CreateForeignDataWrapper((CreateFdwStmt *) parsetree); break; *************** *** 1386,1391 **** CreateCommandTag(Node *parsetree) --- 1391,1400 ---- tag = "DROP TABLESPACE"; break; + case T_AlterTableSpaceStmt: + tag = "ALTER TABLESPACE"; + break; + case T_CreateFdwStmt: tag = "CREATE FOREIGN DATA WRAPPER"; break; *************** *** 2165,2170 **** GetCommandLogLevel(Node *parsetree) --- 2174,2183 ---- lev = LOGSTMT_DDL; break; + case T_AlterTableSpaceStmt: + lev = LOGSTMT_DDL; + break; + case T_CreateFdwStmt: case T_AlterFdwStmt: case T_DropFdwStmt: *** a/src/backend/utils/adt/selfuncs.c --- b/src/backend/utils/adt/selfuncs.c *************** *** 5372,5377 **** genericcostestimate(PlannerInfo *root, --- 5372,5378 ---- QualCost index_qual_cost; double qual_op_cost; double qual_arg_cost; + double spc_random_page_cost; List *selectivityQuals; ListCell *l; *************** *** 5480,5485 **** genericcostestimate(PlannerInfo *root, --- 5481,5491 ---- else numIndexPages = 1.0; + /* fetch estimated page cost for schema containing index */ + get_tablespace_page_costs(index->reltablespace, + &spc_random_page_cost, + NULL); + /* * Now compute the disk access costs. * *************** *** 5526,5540 **** genericcostestimate(PlannerInfo *root, * share for each outer scan. (Don't pro-rate for ScalarArrayOpExpr, * since that's internal to the indexscan.) */ ! *indexTotalCost = (pages_fetched * random_page_cost) / num_outer_scans; } else { /* ! * For a single index scan, we just charge random_page_cost per page ! * touched. */ ! *indexTotalCost = numIndexPages * random_page_cost; } /* --- 5532,5547 ---- * share for each outer scan. (Don't pro-rate for ScalarArrayOpExpr, * since that's internal to the indexscan.) */ ! *indexTotalCost = (pages_fetched * spc_random_page_cost) ! / num_outer_scans; } else { /* ! * For a single index scan, we just charge spc_random_page_cost per ! * page touched. */ ! *indexTotalCost = numIndexPages * spc_random_page_cost; } /* *************** *** 5549,5559 **** genericcostestimate(PlannerInfo *root, * * We can deal with this by adding a very small "fudge factor" that * depends on the index size. The fudge factor used here is one ! * random_page_cost per 100000 index pages, which should be small enough ! * to not alter index-vs-seqscan decisions, but will prevent indexes of ! * different sizes from looking exactly equally attractive. */ ! *indexTotalCost += index->pages * random_page_cost / 100000.0; /* * CPU cost: any complex expressions in the indexquals will need to be --- 5556,5566 ---- * * We can deal with this by adding a very small "fudge factor" that * depends on the index size. The fudge factor used here is one ! * spc_random_page_cost per 100000 index pages, which should be small ! * enough to not alter index-vs-seqscan decisions, but will prevent ! * indexes of different sizes from looking exactly equally attractive. */ ! *indexTotalCost += index->pages * spc_random_page_cost / 100000.0; /* * CPU cost: any complex expressions in the indexquals will need to be *** a/src/backend/utils/cache/lsyscache.c --- b/src/backend/utils/cache/lsyscache.c *************** *** 26,34 **** --- 26,36 ---- #include "catalog/pg_operator.h" #include "catalog/pg_proc.h" #include "catalog/pg_statistic.h" + #include "catalog/pg_tablespace.h" #include "catalog/pg_type.h" #include "miscadmin.h" #include "nodes/makefuncs.h" + #include "optimizer/cost.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/datum.h" *************** *** 2776,2778 **** get_roleid_checked(const char *rolname) --- 2778,2819 ---- errmsg("role \"%s\" does not exist", rolname))); return roleid; } + + /* ---------- PG_TABLESPACE CACHE ---------- */ + + /* + * get_tablespace_page_costs + * Returns random and seqential page costs for a given tablespace + */ + void + get_tablespace_page_costs(Oid spcid, double *spc_random_page_cost, + double *spc_seq_page_cost) + { + HeapTuple tp; + + /* Ensure output args are initialized on failure */ + if (spc_random_page_cost) + *spc_random_page_cost = random_page_cost; + if (spc_seq_page_cost) + *spc_seq_page_cost = seq_page_cost; + + /* spcid is always from a pg_class tuple, so InvalidOid implies the + * default */ + if (spcid == InvalidOid) + spcid = MyDatabaseTableSpace; + + tp = SearchSysCache(TABLESPACEOID, + ObjectIdGetDatum(spcid), + 0, 0, 0); + if (HeapTupleIsValid(tp)) + { + Form_pg_tablespace spctup = (Form_pg_tablespace) GETSTRUCT(tp); + + if (spc_random_page_cost && spctup->spcrandom_page_cost >= 0) + *spc_random_page_cost = (double) spctup->spcrandom_page_cost; + if (spc_seq_page_cost && spctup->spcseq_page_cost >= 0) + *spc_seq_page_cost = (double) spctup->spcseq_page_cost; + + ReleaseSysCache(tp); + } + } *** a/src/backend/utils/cache/syscache.c --- b/src/backend/utils/cache/syscache.c *************** *** 43,48 **** --- 43,49 ---- #include "catalog/pg_proc.h" #include "catalog/pg_rewrite.h" #include "catalog/pg_statistic.h" + #include "catalog/pg_tablespace.h" #include "catalog/pg_ts_config.h" #include "catalog/pg_ts_config_map.h" #include "catalog/pg_ts_dict.h" *************** *** 609,614 **** static const struct cachedesc cacheinfo[] = { --- 610,627 ---- }, 1024 }, + {TableSpaceRelationId, /* TABLESPACEOID */ + TablespaceOidIndexId, + 0, + 1, + { + ObjectIdAttributeNumber, + 0, + 0, + 0, + }, + 16 + }, {TSConfigMapRelationId, /* TSCONFIGMAP */ TSConfigMapIndexId, 0, *** a/src/bin/pg_dump/pg_dumpall.c --- b/src/bin/pg_dump/pg_dumpall.c *************** *** 956,974 **** dumpTablespaces(PGconn *conn) * Get all tablespaces except built-in ones (which we assume are named * pg_xxx) */ ! if (server_version >= 80200) res = executeQuery(conn, "SELECT spcname, " "pg_catalog.pg_get_userbyid(spcowner) AS spcowner, " "spclocation, spcacl, " "pg_catalog.shobj_description(oid, 'pg_tablespace') " "FROM pg_catalog.pg_tablespace " "WHERE spcname !~ '^pg_' " "ORDER BY 1"); else res = executeQuery(conn, "SELECT spcname, " "pg_catalog.pg_get_userbyid(spcowner) AS spcowner, " "spclocation, spcacl, " ! "null " "FROM pg_catalog.pg_tablespace " "WHERE spcname !~ '^pg_' " "ORDER BY 1"); --- 956,988 ---- * Get all tablespaces except built-in ones (which we assume are named * pg_xxx) */ ! if (server_version >= 80500) res = executeQuery(conn, "SELECT spcname, " "pg_catalog.pg_get_userbyid(spcowner) AS spcowner, " "spclocation, spcacl, " + "array_to_string(ARRAY[" + "CASE WHEN spcseq_page_cost < 0 THEN NULL " + "ELSE 'seq_page_cost = ' || spcseq_page_cost END," + "CASE WHEN spcrandom_page_cost < 0 THEN NULL " + "ELSE 'random_page_cost = ' || spcrandom_page_cost END" + "], ', ')," "pg_catalog.shobj_description(oid, 'pg_tablespace') " "FROM pg_catalog.pg_tablespace " "WHERE spcname !~ '^pg_' " "ORDER BY 1"); + else if (server_version >= 80200) + res = executeQuery(conn, "SELECT spcname, " + "pg_catalog.pg_get_userbyid(spcowner) AS spcowner, " + "spclocation, spcacl, null, " + "pg_catalog.shobj_description(oid, 'pg_tablespace'), " + "FROM pg_catalog.pg_tablespace " + "WHERE spcname !~ '^pg_' " + "ORDER BY 1"); else res = executeQuery(conn, "SELECT spcname, " "pg_catalog.pg_get_userbyid(spcowner) AS spcowner, " "spclocation, spcacl, " ! "null, null " "FROM pg_catalog.pg_tablespace " "WHERE spcname !~ '^pg_' " "ORDER BY 1"); *************** *** 983,989 **** dumpTablespaces(PGconn *conn) char *spcowner = PQgetvalue(res, i, 1); char *spclocation = PQgetvalue(res, i, 2); char *spcacl = PQgetvalue(res, i, 3); ! char *spccomment = PQgetvalue(res, i, 4); char *fspcname; /* needed for buildACLCommands() */ --- 997,1004 ---- char *spcowner = PQgetvalue(res, i, 1); char *spclocation = PQgetvalue(res, i, 2); char *spcacl = PQgetvalue(res, i, 3); ! char *spcoptions = PQgetvalue(res, i, 4); ! char *spccomment = PQgetvalue(res, i, 5); char *fspcname; /* needed for buildACLCommands() */ *************** *** 996,1001 **** dumpTablespaces(PGconn *conn) --- 1011,1020 ---- appendStringLiteralConn(buf, spclocation, conn); appendPQExpBuffer(buf, ";\n"); + if (spcoptions && spcoptions[0] != '\0') + appendPQExpBuffer(buf, "ALTER TABLESPACE %s SET (%s);\n", + fspcname, spcoptions); + if (!skip_acls && !buildACLCommands(fspcname, NULL, "TABLESPACE", spcacl, spcowner, "", server_version, buf)) *** a/src/include/catalog/pg_tablespace.h --- b/src/include/catalog/pg_tablespace.h *************** *** 32,37 **** CATALOG(pg_tablespace,1213) BKI_SHARED_RELATION --- 32,39 ---- { NameData spcname; /* tablespace name */ Oid spcowner; /* owner of tablespace */ + float8 spcrandom_page_cost; /* per-tablespace random_page_cost */ + float8 spcseq_page_cost; /* per-tablespace seq_page_cost */ text spclocation; /* physical location (VAR LENGTH) */ aclitem spcacl[1]; /* access permissions (VAR LENGTH) */ } FormData_pg_tablespace; *************** *** 48,61 **** typedef FormData_pg_tablespace *Form_pg_tablespace; * ---------------- */ ! #define Natts_pg_tablespace 4 ! #define Anum_pg_tablespace_spcname 1 ! #define Anum_pg_tablespace_spcowner 2 ! #define Anum_pg_tablespace_spclocation 3 ! #define Anum_pg_tablespace_spcacl 4 ! DATA(insert OID = 1663 ( pg_default PGUID "" _null_ )); ! DATA(insert OID = 1664 ( pg_global PGUID "" _null_ )); #define DEFAULTTABLESPACE_OID 1663 #define GLOBALTABLESPACE_OID 1664 --- 50,65 ---- * ---------------- */ ! #define Natts_pg_tablespace 6 ! #define Anum_pg_tablespace_spcname 1 ! #define Anum_pg_tablespace_spcowner 2 ! #define Anum_pg_tablespace_spcrandom_page_cost 3 ! #define Anum_pg_tablespace_spcseq_page_cost 4 ! #define Anum_pg_tablespace_spclocation 5 ! #define Anum_pg_tablespace_spcacl 6 ! DATA(insert OID = 1663 ( pg_default PGUID -1 -1 "" _null_ )); ! DATA(insert OID = 1664 ( pg_global PGUID -1 -1 "" _null_ )); #define DEFAULTTABLESPACE_OID 1663 #define GLOBALTABLESPACE_OID 1664 *** a/src/include/commands/tablespace.h --- b/src/include/commands/tablespace.h *************** *** 35,40 **** typedef struct xl_tblspc_drop_rec --- 35,41 ---- extern void CreateTableSpace(CreateTableSpaceStmt *stmt); extern void DropTableSpace(DropTableSpaceStmt *stmt); + extern void AlterTableSpace(AlterTableSpaceStmt *stmt); extern void RenameTableSpace(const char *oldname, const char *newname); extern void AlterTableSpaceOwner(const char *name, Oid newOwnerId); *** a/src/include/nodes/nodes.h --- b/src/include/nodes/nodes.h *************** *** 346,351 **** typedef enum NodeTag --- 346,352 ---- T_CreateUserMappingStmt, T_AlterUserMappingStmt, T_DropUserMappingStmt, + T_AlterTableSpaceStmt, /* * TAGS FOR PARSE TREE NODES (parsenodes.h) *** a/src/include/nodes/parsenodes.h --- b/src/include/nodes/parsenodes.h *************** *** 1464,1469 **** typedef struct DropTableSpaceStmt --- 1464,1476 ---- bool missing_ok; /* skip error if missing? */ } DropTableSpaceStmt; + typedef struct AlterTableSpaceStmt + { + NodeTag type; + char *tablespacename; + List *options; + } AlterTableSpaceStmt; + /* ---------------------- * Create/Drop FOREIGN DATA WRAPPER Statements * ---------------------- *** a/src/include/nodes/relation.h --- b/src/include/nodes/relation.h *************** *** 361,366 **** typedef struct RelOptInfo --- 361,367 ---- /* information about a base rel (not set for join rels!) */ Index relid; + Oid reltablespace; /* containing tablespace */ RTEKind rtekind; /* RELATION, SUBQUERY, or FUNCTION */ AttrNumber min_attr; /* smallest attrno of rel (often <0) */ AttrNumber max_attr; /* largest attrno of rel */ *************** *** 425,430 **** typedef struct IndexOptInfo --- 426,432 ---- NodeTag type; Oid indexoid; /* OID of the index relation */ + Oid reltablespace; /* tablespace of index (not table) */ RelOptInfo *rel; /* back-link to index's table */ /* statistics from pg_class */ *** a/src/include/utils/lsyscache.h --- b/src/include/utils/lsyscache.h *************** *** 137,142 **** extern void free_attstatsslot(Oid atttype, --- 137,144 ---- extern char *get_namespace_name(Oid nspid); extern Oid get_roleid(const char *rolname); extern Oid get_roleid_checked(const char *rolname); + void get_tablespace_page_costs(Oid spcid, float8 *spc_random_page_cost, + float8 *spc_seq_page_cost); #define type_is_array(typid) (get_element_type(typid) != InvalidOid) *** a/src/include/utils/syscache.h --- b/src/include/utils/syscache.h *************** *** 71,76 **** enum SysCacheIdentifier --- 71,77 ---- RELOID, RULERELNAME, STATRELATT, + TABLESPACEOID, TSCONFIGMAP, TSCONFIGNAMENSP, TSCONFIGOID,
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers