Consider the following schema: create table foo_archive (a int, b timestamp); create index foo_archive_idx on foo_archive(b); CREATE TABLE foo_archive_2007_01_01 (CONSTRAINT foo_archive_2007_01_01_b_check CHECK (((b >= '2007-01-01'::date) AND (b < '2007-01-02'::date)))) INHERITS (foo_archive); CREATE INDEX foo_archive_2007_01_01_idx ON foo_archive_2007_01_01 USING btree (b); CREATE TABLE foo_archive_2007_01_02 (CONSTRAINT foo_archive_2007_01_02_b_check CHECK (((b >= '2007-01-02'::date) AND (b < '2007-01-03'::date)))) INHERITS (foo_archive); CREATE INDEX foo_archive_2007_01_02_idx ON foo_archive_2007_01_02 USING btree (b); ...
Currently the optimizer yields the following plan: postgres=# explain select max(b) from foo_archive; QUERY PLAN ----------------------------------------------------------------------------------------------------- Aggregate (cost=18602.00..18602.01 rows=1 width=8) -> Append (cost=0.00..16005.00 rows=1038800 width=8) -> Seq Scan on foo_archive (cost=0.00..29.40 rows=1940 width=8) -> Seq Scan on foo_archive_2007_01_01 foo_archive (cost=0.00..1331.99 rows=86399 width=8) -> Seq Scan on foo_archive_2007_01_02 foo_archive (cost=0.00..1332.00 rows=86400 width=8) -> Seq Scan on foo_archive_2007_01_03 foo_archive (cost=0.00..1332.00 rows=86400 width=8) -> Seq Scan on foo_archive_2007_01_04 foo_archive (cost=0.00..1332.00 rows=86400 width=8) -> Seq Scan on foo_archive_2007_01_05 foo_archive (cost=0.00..1332.00 rows=86400 width=8) -> Seq Scan on foo_archive_2007_01_06 foo_archive (cost=0.00..1332.00 rows=86400 width=8) -> Seq Scan on foo_archive_2007_01_07 foo_archive (cost=0.00..1332.00 rows=86400 width=8) -> Seq Scan on foo_archive_2007_01_08 foo_archive (cost=0.00..1332.00 rows=86400 width=8) -> Seq Scan on foo_archive_2007_01_09 foo_archive (cost=0.00..1332.00 rows=86400 width=8) -> Seq Scan on foo_archive_2007_01_10 foo_archive (cost=0.00..1332.00 rows=86400 width=8) -> Seq Scan on foo_archive_2007_01_11 foo_archive (cost=0.00..1332.00 rows=86400 width=8) -> Seq Scan on foo_archive_2007_01_12 foo_archive (cost=0.00..765.01 rows=49601 width=8) -> Seq Scan on foo_archive_2007_01_13 foo_archive (cost=0.00..29.40 rows=1940 width=8) -> Seq Scan on foo_archive_2007_01_14 foo_archive (cost=0.00..29.40 rows=1940 width=8) -> Seq Scan on foo_archive_2007_01_15 foo_archive (cost=0.00..29.40 rows=1940 width=8) -> Seq Scan on foo_archive_2007_01_16 foo_archive (cost=0.00..29.40 rows=1940 width=8) -> Seq Scan on foo_archive_2007_01_17 foo_archive (cost=0.00..29.40 rows=1940 width=8) -> Seq Scan on foo_archive_2007_01_18 foo_archive (cost=0.00..29.40 rows=1940 width=8) -> Seq Scan on foo_archive_2007_01_19 foo_archive (cost=0.00..29.40 rows=1940 width=8) -> Seq Scan on foo_archive_2007_01_20 foo_archive (cost=0.00..29.40 rows=1940 width=8) -> Seq Scan on foo_archive_2007_01_21 foo_archive (cost=0.00..29.40 rows=1940 width=8) -> Seq Scan on foo_archive_2007_01_22 foo_archive (cost=0.00..29.40 rows=1940 width=8) -> Seq Scan on foo_archive_2007_01_23 foo_archive (cost=0.00..29.40 rows=1940 width=8) -> Seq Scan on foo_archive_2007_01_24 foo_archive (cost=0.00..29.40 rows=1940 width=8) -> Seq Scan on foo_archive_2007_01_25 foo_archive (cost=0.00..29.40 rows=1940 width=8) -> Seq Scan on foo_archive_2007_01_26 foo_archive (cost=0.00..29.40 rows=1940 width=8) -> Seq Scan on foo_archive_2007_01_27 foo_archive (cost=0.00..29.40 rows=1940 width=8) -> Seq Scan on foo_archive_2007_01_28 foo_archive (cost=0.00..29.40 rows=1940 width=8) -> Seq Scan on foo_archive_2007_01_29 foo_archive (cost=0.00..29.40 rows=1940 width=8) -> Seq Scan on foo_archive_2007_01_30 foo_archive (cost=0.00..29.40 rows=1940 width=8) -> Seq Scan on foo_archive_2007_01_31 foo_archive (cost=0.00..29.40 rows=1940 width=8) (34 rows) As we can see, the optimizer does not take advantage of the indexes on column b in the children relations. Attached is a patch that will take advantage of the indexes (when they're available and if the index path is cheaper) and yield the following plan. postgres=# explain select max(b) from foo_archive; QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------------- Aggregate (cost=1.54..1.55 rows=1 width=0) InitPlan 1 (returns $0) -> Limit (cost=0.00..0.04 rows=1 width=8) -> Index Scan Backward using foo_archive_idx on foo_archive (cost=0.00..73.35 rows=1940 width=8) Filter: (b IS NOT NULL) InitPlan 2 (returns $1) -> Limit (cost=0.00..0.03 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_01_idx on foo_archive_2007_01_01 foo_archive (cost=0.00..2719.24 rows=86399 width=8) Filter: (b IS NOT NULL) InitPlan 3 (returns $2) -> Limit (cost=0.00..0.03 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_02_idx on foo_archive_2007_01_02 foo_archive (cost=0.00..2719.26 rows=86400 width=8) Filter: (b IS NOT NULL) InitPlan 4 (returns $3) -> Limit (cost=0.00..0.03 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_03_idx on foo_archive_2007_01_03 foo_archive (cost=0.00..2719.26 rows=86400 width=8) Filter: (b IS NOT NULL) InitPlan 5 (returns $4) -> Limit (cost=0.00..0.03 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_04_idx on foo_archive_2007_01_04 foo_archive (cost=0.00..2719.26 rows=86400 width=8) Filter: (b IS NOT NULL) InitPlan 6 (returns $5) -> Limit (cost=0.00..0.03 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_05_idx on foo_archive_2007_01_05 foo_archive (cost=0.00..2719.26 rows=86400 width=8) Filter: (b IS NOT NULL) InitPlan 7 (returns $6) -> Limit (cost=0.00..0.03 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_06_idx on foo_archive_2007_01_06 foo_archive (cost=0.00..2719.26 rows=86400 width=8) Filter: (b IS NOT NULL) InitPlan 8 (returns $7) -> Limit (cost=0.00..0.03 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_07_idx on foo_archive_2007_01_07 foo_archive (cost=0.00..2719.26 rows=86400 width=8) Filter: (b IS NOT NULL) InitPlan 9 (returns $8) -> Limit (cost=0.00..0.03 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_08_idx on foo_archive_2007_01_08 foo_archive (cost=0.00..2719.26 rows=86400 width=8) Filter: (b IS NOT NULL) InitPlan 10 (returns $9) -> Limit (cost=0.00..0.03 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_09_idx on foo_archive_2007_01_09 foo_archive (cost=0.00..2719.26 rows=86400 width=8) Filter: (b IS NOT NULL) InitPlan 11 (returns $10) -> Limit (cost=0.00..0.03 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_10_idx on foo_archive_2007_01_10 foo_archive (cost=0.00..2719.26 rows=86400 width=8) Filter: (b IS NOT NULL) InitPlan 12 (returns $11) -> Limit (cost=0.00..0.03 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_11_idx on foo_archive_2007_01_11 foo_archive (cost=0.00..2719.26 rows=86400 width=8) Filter: (b IS NOT NULL) InitPlan 13 (returns $12) -> Limit (cost=0.00..0.03 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_12_idx on foo_archive_2007_01_12 foo_archive (cost=0.00..1568.27 rows=49601 width=8) Filter: (b IS NOT NULL) InitPlan 14 (returns $13) -> Limit (cost=0.00..0.04 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_13_idx on foo_archive_2007_01_13 foo_archive (cost=0.00..73.35 rows=1940 width=8) Filter: (b IS NOT NULL) InitPlan 15 (returns $14) -> Limit (cost=0.00..0.04 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_14_idx on foo_archive_2007_01_14 foo_archive (cost=0.00..73.35 rows=1940 width=8) Filter: (b IS NOT NULL) InitPlan 16 (returns $15) -> Limit (cost=0.00..0.04 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_15_idx on foo_archive_2007_01_15 foo_archive (cost=0.00..73.35 rows=1940 width=8) Filter: (b IS NOT NULL) InitPlan 17 (returns $16) -> Limit (cost=0.00..0.04 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_16_idx on foo_archive_2007_01_16 foo_archive (cost=0.00..73.35 rows=1940 width=8) Filter: (b IS NOT NULL) InitPlan 18 (returns $17) -> Limit (cost=0.00..0.04 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_17_idx on foo_archive_2007_01_17 foo_archive (cost=0.00..73.35 rows=1940 width=8) Filter: (b IS NOT NULL) InitPlan 19 (returns $18) -> Limit (cost=0.00..0.04 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_18_idx on foo_archive_2007_01_18 foo_archive (cost=0.00..73.35 rows=1940 width=8) Filter: (b IS NOT NULL) InitPlan 20 (returns $19) -> Limit (cost=0.00..0.04 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_19_idx on foo_archive_2007_01_19 foo_archive (cost=0.00..73.35 rows=1940 width=8) Filter: (b IS NOT NULL) InitPlan 21 (returns $20) -> Limit (cost=0.00..0.04 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_20_idx on foo_archive_2007_01_20 foo_archive (cost=0.00..73.35 rows=1940 width=8) Filter: (b IS NOT NULL) InitPlan 22 (returns $21) -> Limit (cost=0.00..0.04 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_21_idx on foo_archive_2007_01_21 foo_archive (cost=0.00..73.35 rows=1940 width=8) Filter: (b IS NOT NULL) InitPlan 23 (returns $22) -> Limit (cost=0.00..0.04 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_22_idx on foo_archive_2007_01_22 foo_archive (cost=0.00..73.35 rows=1940 width=8) Filter: (b IS NOT NULL) InitPlan 24 (returns $23) -> Limit (cost=0.00..0.04 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_23_idx on foo_archive_2007_01_23 foo_archive (cost=0.00..73.35 rows=1940 width=8) Filter: (b IS NOT NULL) InitPlan 25 (returns $24) -> Limit (cost=0.00..0.04 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_24_idx on foo_archive_2007_01_24 foo_archive (cost=0.00..73.35 rows=1940 width=8) Filter: (b IS NOT NULL) InitPlan 26 (returns $25) -> Limit (cost=0.00..0.04 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_25_idx on foo_archive_2007_01_25 foo_archive (cost=0.00..73.35 rows=1940 width=8) Filter: (b IS NOT NULL) InitPlan 27 (returns $26) -> Limit (cost=0.00..0.04 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_26_idx on foo_archive_2007_01_26 foo_archive (cost=0.00..73.35 rows=1940 width=8) Filter: (b IS NOT NULL) InitPlan 28 (returns $27) -> Limit (cost=0.00..0.04 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_27_idx on foo_archive_2007_01_27 foo_archive (cost=0.00..73.35 rows=1940 width=8) Filter: (b IS NOT NULL) InitPlan 29 (returns $28) -> Limit (cost=0.00..0.04 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_28_idx on foo_archive_2007_01_28 foo_archive (cost=0.00..73.35 rows=1940 width=8) Filter: (b IS NOT NULL) InitPlan 30 (returns $29) -> Limit (cost=0.00..0.04 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_29_idx on foo_archive_2007_01_29 foo_archive (cost=0.00..73.35 rows=1940 width=8) Filter: (b IS NOT NULL) InitPlan 31 (returns $30) -> Limit (cost=0.00..0.04 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_30_idx on foo_archive_2007_01_30 foo_archive (cost=0.00..73.35 rows=1940 width=8) Filter: (b IS NOT NULL) InitPlan 32 (returns $31) -> Limit (cost=0.00..0.04 rows=1 width=8) -> Index Scan Backward using foo_archive_2007_01_31_idx on foo_archive_2007_01_31 foo_archive (cost=0.00..73.35 rows=1940 width=8) Filter: (b IS NOT NULL) -> Append (cost=0.00..0.32 rows=32 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) -> Result (cost=0.00..0.01 rows=1 width=0) (162 rows) Does this patch make sense for 8.5 to optimize for this particular class of queries? Specifically queries of the form: SELECT MAX(col) FROM tab WHERE ...; SELECT MIN(col) FROM tab WHERE ...; Thanks, Alan
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 762dfb1..5f88466 100644 *** a/src/backend/optimizer/plan/createplan.c --- b/src/backend/optimizer/plan/createplan.c *************** *** 27,32 **** --- 27,33 ---- #include "optimizer/plancat.h" #include "optimizer/planmain.h" #include "optimizer/predtest.h" + #include "optimizer/prep.h" #include "optimizer/restrictinfo.h" #include "optimizer/tlist.h" #include "optimizer/var.h" *************** create_append_plan(PlannerInfo *root, Ap *** 546,551 **** --- 547,553 ---- List *tlist = build_relation_tlist(best_path->path.parent); List *subplans = NIL; ListCell *subpaths; + bool can_optimize_minmax; /* * It is possible for the subplans list to contain only one entry, or even *************** create_append_plan(PlannerInfo *root, Ap *** 566,577 **** NULL); } /* Normal case with multiple subpaths */ foreach(subpaths, best_path->subpaths) { Path *subpath = (Path *) lfirst(subpaths); ! subplans = lappend(subplans, create_plan(root, subpath)); } plan = make_append(subplans, false, tlist); --- 568,618 ---- NULL); } + can_optimize_minmax = can_optimize_append_minmax(root); + /* Normal case with multiple subpaths */ foreach(subpaths, best_path->subpaths) { Path *subpath = (Path *) lfirst(subpaths); ! RelOptInfo *parentrel = best_path->path.parent; ! RelOptInfo *childrel = subpath->parent; ! AppendRelInfo *appinfo = NULL; ! ListCell *lc; ! Plan *subplan = NULL; ! ! if (!can_optimize_minmax) ! { ! subplans = lappend(subplans, create_plan(root, subpath)); ! continue; ! } ! ! /* only try to optimize children rel's */ ! foreach (lc, root->append_rel_list) ! { ! AppendRelInfo *a = (AppendRelInfo *) lfirst(lc); ! ! if (a->child_relid == childrel->relid && ! a->parent_relid == parentrel->relid) ! { ! appinfo = a; ! break; ! } ! } ! ! if (appinfo) ! { ! List *targetlist = ! (List *) adjust_appendrel_attrs((Node *) root->parse->targetList, ! appinfo); ! subplan = do_optimize_minmax_aggregates(root, targetlist, childrel, ! subpath); ! } ! ! if (subplan) ! subplans = lappend(subplans, subplan); ! else ! subplans = lappend(subplans, create_plan(root, subpath)); } plan = make_append(subplans, false, tlist); diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c index 34e9617..959cc2c 100644 *** a/src/backend/optimizer/plan/planagg.c --- b/src/backend/optimizer/plan/planagg.c *************** static void make_agg_subplan(PlannerInfo *** 53,59 **** static Node *replace_aggs_with_params_mutator(Node *node, List **context); static Oid fetch_agg_sort_op(Oid aggfnoid); - /* * optimize_minmax_aggregates - check for optimizing MIN/MAX via indexes * --- 53,58 ---- *************** optimize_minmax_aggregates(PlannerInfo * *** 77,89 **** RangeTblRef *rtr; RangeTblEntry *rte; RelOptInfo *rel; - List *aggs_list; - ListCell *l; - Cost total_cost; - Path agg_p; - Plan *plan; - Node *hqual; - QualCost tlist_cost; /* Nothing to do if query has no aggregates */ if (!parse->hasAggs) --- 76,81 ---- *************** optimize_minmax_aggregates(PlannerInfo * *** 124,129 **** --- 116,137 ---- return NULL; rel = find_base_rel(root, rtr->rtindex); + return do_optimize_minmax_aggregates(root, tlist, rel, best_path); + } + + Plan * + do_optimize_minmax_aggregates(PlannerInfo *root, List *tlist, RelOptInfo *rel, + Path *best_path) + { + Query *parse = root->parse; + List *aggs_list; + ListCell *l; + Cost total_cost; + Path agg_p; + Plan *plan; + Node *hqual; + QualCost tlist_cost; + /* * Since this optimization is not applicable all that often, we want to * fall out before doing very much work if possible. Therefore we do the *************** optimize_minmax_aggregates(PlannerInfo * *** 214,219 **** --- 222,273 ---- } /* + * can_optimize_append_minmax - check whether optimizing MIN/MAX via indexes + * can potentially be pushed down to child relations. + * + * This checks whether the query is of the form: + * 1. SELECT MAX(col) FROM tab WHERE ...; + * 2. SELECT MIN(col) FROM tab WHERE ...; + */ + bool + can_optimize_append_minmax(PlannerInfo *root) + { + Query *parse = root->parse; + List *tlist = parse->targetList; + List *aggs_list; + + /* Nothing to do if query has no aggregates */ + if (!parse->hasAggs) + return false; + + Assert(!parse->setOperations); /* shouldn't get here if a setop */ + Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */ + + /* + * Reject unoptimizable cases. + * + * We don't handle GROUP BY or windowing, because our current + * implementations of grouping require looking at all the rows anyway, and + * so there's not much point in optimizing MIN/MAX. + */ + if (parse->groupClause || parse->hasWindowFuncs) + return false; + + /* We're only supporting a single MIN/MAX in the targetlist. */ + if (list_length(tlist) > 1 || parse->havingQual) + return false; + + aggs_list = NIL; + if (find_minmax_aggs_walker((Node *) tlist, &aggs_list)) + return false; + + if (list_length(aggs_list) > 1) + return false; + + return true; + } + + /* * find_minmax_aggs_walker * Recursively scan the Aggref nodes in an expression tree, and check * that each one is a MIN/MAX aggregate. If so, build a list of the diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 0dd2bcc..939c1ef 100644 *** a/src/include/optimizer/planmain.h --- b/src/include/optimizer/planmain.h *************** extern void query_planner(PlannerInfo *r *** 34,39 **** --- 34,42 ---- */ extern Plan *optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path); + extern Plan *do_optimize_minmax_aggregates(PlannerInfo *root, List *tlist, + RelOptInfo *rel, Path *best_path); + extern bool can_optimize_append_minmax(PlannerInfo *root); /* * prototypes for plan/createplan.c
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers