While hacking around the planner, the "callback" mechanism in
query_planner started to feel awkward. It would seem more natural to
split query_planner() into two parts: one function to do all the
pre-processing of the jointree, building equivalence classes etc. And a
second function to generate the Paths.
So where the callers currently do this:
/* set up callback arguments */
qp_extra.foo = ...;
qp_extra.bar = ...;
query_planner(root, qp_callback, &qp_extra);
static void
qp_callback(PlannerInfo *root, void *qp_extra)
{
root->sort_pathkeys = ...;
root->query_pathkeys = ...;
}
This would feel more natural to me:
/* process the jointree, build equivalence classes */
process_jointree(root);
/* set up query_pathkeys */
root->sort_pathkeys = ...;
root->query_pathkeys = ...;
query_planner(root);
Attached is a more full-fledged patch to do that.
The callback was introduced by commit db9f0e1d9a. The commit message
explains the choice to use a callback like this:
There are several ways in which we could implement that without making
query_planner itself deal with grouping/sorting features (which are
supposed to be the province of grouping_planner). I chose to add a
callback function to query_planner's API; other alternatives would have
required adding more fields to PlannerInfo, which while not bad in itself
would create an ABI break for planner-related plugins in the 9.2 release
series. This still breaks ABI for anything that calls query_planner
directly, but it seems somewhat unlikely that there are any such plugins.
In v12, we can change the ABI.
- Heikki
>From 482e86a864f7ae5f4a5ffac09e0d1d21dfd8f762 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakan...@iki.fi>
Date: Fri, 15 Jun 2018 15:13:41 +0300
Subject: [PATCH 1/1] Refactor query_planner() into two parts.
Commit db9f0e1d9a introduced a callback to query_planner(), which gave the
caller a chance to build query_pathkeys, in the middle of query_planner()
steps. That feels a bit awkward. Let's split query_planner() into two
functions instead, one to process the jointree, building the equivalence
classes, and another to produce the Paths. The caller can build the
query_pathkeys between the two function calls.
---
src/backend/optimizer/plan/planagg.c | 51 ++++++-------
src/backend/optimizer/plan/planmain.c | 125 ++++++++++++++++++-------------
src/backend/optimizer/plan/planner.c | 45 +++++------
src/backend/optimizer/util/placeholder.c | 2 +-
src/include/nodes/relation.h | 9 ++-
src/include/optimizer/planmain.h | 7 +-
6 files changed, 131 insertions(+), 108 deletions(-)
diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c
index 95cbffbd69..07f0734559 100644
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@@ -50,7 +50,6 @@
static bool find_minmax_aggs_walker(Node *node, List **context);
static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
Oid eqop, Oid sortop, bool nulls_first);
-static void minmax_qp_callback(PlannerInfo *root, void *extra);
static Oid fetch_agg_sort_op(Oid aggfnoid);
@@ -63,9 +62,9 @@ static Oid fetch_agg_sort_op(Oid aggfnoid);
* the (UPPERREL_GROUP_AGG, NULL) upperrel.
*
* This should be called by grouping_planner() just before it's ready to call
- * query_planner(), because we generate indexscan paths by cloning the
- * planner's state and invoking query_planner() on a modified version of
- * the query parsetree. Thus, all preprocessing needed before query_planner()
+ * process_jointree(), because we generate indexscan paths by cloning the
+ * planner's state and invoking the scan/join planner on a modified version of
+ * the query parsetree. Thus, all preprocessing needed before process_jointree()
* must already be done.
*
* Note: we are passed the preprocessed targetlist separately, because it's
@@ -435,13 +434,33 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
FLOAT8PASSBYVAL);
/*
- * Generate the best paths for this query, telling query_planner that we
- * have LIMIT 1.
+ * Build base relation and equivalence classes, knowing that we have
+ * LIMIT 1.
*/
subroot->tuple_fraction = 1.0;
subroot->limit_tuples = 1.0;
- final_rel = query_planner(subroot, tlist, minmax_qp_callback, NULL);
+ process_jointree(subroot, tlist);
+
+ /*
+ * Compute query_pathkeys to represent the desired order. There is no
+ * GROUP BY, window functions, or DISTINCT in the generated query.
+ */
+ subroot->group_pathkeys = NIL;
+ subroot->window_pathkeys = NIL;
+ subroot->distinct_pathkeys = NIL;
+
+ subroot->sort_pathkeys =
+ make_pathkeys_for_sortclauses(subroot,
+ subroot->parse->sortClause,
+ subroot->parse->targetList);
+
+ subroot->query_pathkeys = subroot->sort_pathkeys;
+
+ /*
+ * Generate the best paths for the generated query.
+ */
+ final_rel = query_planner(subroot);
/*
* Since we didn't go through subquery_planner() to handle the subquery,
@@ -495,24 +514,6 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
}
/*
- * Compute query_pathkeys and other pathkeys during query_planner()
- */
-static void
-minmax_qp_callback(PlannerInfo *root, void *extra)
-{
- root->group_pathkeys = NIL;
- root->window_pathkeys = NIL;
- root->distinct_pathkeys = NIL;
-
- root->sort_pathkeys =
- make_pathkeys_for_sortclauses(root,
- root->parse->sortClause,
- root->parse->targetList);
-
- root->query_pathkeys = root->sort_pathkeys;
-}
-
-/*
* Get the OID of the sort operator, if any, associated with an aggregate.
* Returns InvalidOid if there is no such operator.
*/
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index 7a34abca04..dc5cc110a9 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -29,73 +29,35 @@
/*
- * query_planner
- * Generate a path (that is, a simplified plan) for a basic query,
- * which may involve joins but not any fancier features.
+ * process_jointree
+ * Analyze the jointree of a query.
*
- * Since query_planner does not handle the toplevel processing (grouping,
- * sorting, etc) it cannot select the best path by itself. Instead, it
- * returns the RelOptInfo for the top level of joining, and the caller
- * (grouping_planner) can choose among the surviving paths for the rel.
+ * This builds the base relations, restrict/join quals, and equivalence
+ * classes.
*
* root describes the query to plan
* tlist is the target list the query should produce
* (this is NOT necessarily root->parse->targetList!)
- * qp_callback is a function to compute query_pathkeys once it's safe to do so
- * qp_extra is optional extra data to pass to qp_callback
- *
- * Note: the PlannerInfo node also includes a query_pathkeys field, which
- * tells query_planner the sort order that is desired in the final output
- * plan. This value is *not* available at call time, but is computed by
- * qp_callback once we have completed merging the query's equivalence classes.
- * (We cannot construct canonical pathkeys until that's done.)
*/
-RelOptInfo *
-query_planner(PlannerInfo *root, List *tlist,
- query_pathkeys_callback qp_callback, void *qp_extra)
+void
+process_jointree(PlannerInfo *root, List *tlist)
{
Query *parse = root->parse;
- List *joinlist;
- RelOptInfo *final_rel;
Index rti;
double total_pages;
+ List *joinlist;
/*
- * If the query has an empty join tree, then it's something easy like
- * "SELECT 2+2;" or "INSERT ... VALUES()". Fall through quickly.
+ * If the query has an empty join tree, fall through quickly.
*/
if (parse->jointree->fromlist == NIL)
{
- /* We need a dummy joinrel to describe the empty set of baserels */
- final_rel = build_empty_join_rel(root);
-
- /*
- * If query allows parallelism in general, check whether the quals are
- * parallel-restricted. (We need not check final_rel->reltarget
- * because it's empty at this point. Anything parallel-restricted in
- * the query tlist will be dealt with later.)
- */
- if (root->glob->parallelModeOK)
- final_rel->consider_parallel =
- is_parallel_safe(root, parse->jointree->quals);
-
- /* The only path for it is a trivial Result path */
- add_path(final_rel, (Path *)
- create_result_path(root, final_rel,
- final_rel->reltarget,
- (List *) parse->jointree->quals));
-
- /* Select cheapest path (pretty easy in this case...) */
- set_cheapest(final_rel);
-
/*
- * We still are required to call qp_callback, in case it's something
- * like "SELECT 2+2 ORDER BY 1".
+ * Initialize canon_pathkeys, in case it's something like
+ * "SELECT 2+2 ORDER BY 1".
*/
root->canon_pathkeys = NIL;
- (*qp_callback) (root, qp_extra);
-
- return final_rel;
+ return;
}
/*
@@ -171,10 +133,11 @@ query_planner(PlannerInfo *root, List *tlist,
/*
* We have completed merging equivalence sets, so it's now possible to
- * generate pathkeys in canonical form; so compute query_pathkeys and
- * other pathkeys fields in PlannerInfo.
+ * generate pathkeys in canonical form. (We don't do that here, though.
+ * The caller will compute query_pathkeys and other pathkeys fields in
+ * PlannerInfo, based on the "upper" parts of the query, like GROUP BY
+ * and ORDER BY.)
*/
- (*qp_callback) (root, qp_extra);
/*
* Examine any "placeholder" expressions generated during subquery pullup.
@@ -190,7 +153,7 @@ query_planner(PlannerInfo *root, List *tlist,
* jointree preprocessing, but the necessary information isn't available
* until we've built baserel data structures and classified qual clauses.
*/
- joinlist = remove_useless_joins(root, joinlist);
+ root->join_subproblem_list = remove_useless_joins(root, joinlist);
/*
* Also, reduce any semijoins with unique inner rels to plain inner joins.
@@ -252,11 +215,65 @@ query_planner(PlannerInfo *root, List *tlist,
total_pages += (double) brel->pages;
}
root->total_table_pages = total_pages;
+}
+
+/*
+ * query_planner
+ * Generate paths (that is, simplified plans) for a basic query,
+ * which may involve joins but not any fancier features.
+ *
+ * Since query_planner does not handle the toplevel processing (grouping,
+ * sorting, etc) it cannot select the best path by itself. Instead, it
+ * returns the RelOptInfo for the top level of joining, and the caller
+ * (grouping_planner) can choose among the surviving paths for the rel.
+ *
+ * The PlannerInfo node also includes a query_pathkeys field, which tells
+ * query_planner the sort order that is desired in the final output plan.
+ * The pathkeys must be in canonical form, therefore they can only be
+ * computed after we have completed merging the query's equivalence classes,
+ * ie. after process_jointree().
+ */
+RelOptInfo *
+query_planner(PlannerInfo *root)
+{
+ Query *parse = root->parse;
+ RelOptInfo *final_rel;
+
+ /*
+ * If the query has an empty join tree, then it's something easy like
+ * "SELECT 2+2;" or "INSERT ... VALUES()". Fall through quickly.
+ */
+ if (parse->jointree->fromlist == NIL)
+ {
+ /* We need a dummy joinrel to describe the empty set of baserels */
+ final_rel = build_empty_join_rel(root);
+
+ /*
+ * If query allows parallelism in general, check whether the quals are
+ * parallel-restricted. (We need not check final_rel->reltarget
+ * because it's empty at this point. Anything parallel-restricted in
+ * the query tlist will be dealt with later.)
+ */
+ if (root->glob->parallelModeOK)
+ final_rel->consider_parallel =
+ is_parallel_safe(root, parse->jointree->quals);
+
+ /* The only path for it is a trivial Result path */
+ add_path(final_rel, (Path *)
+ create_result_path(root, final_rel,
+ final_rel->reltarget,
+ (List *) parse->jointree->quals));
+
+ /* Select cheapest path (pretty easy in this case...) */
+ set_cheapest(final_rel);
+
+ return final_rel;
+ }
/*
* Ready to do the primary planning.
*/
- final_rel = make_one_rel(root, joinlist);
+ final_rel = make_one_rel(root, root->join_subproblem_list);
/* Check that we got at least one usable path */
if (!final_rel || !final_rel->cheapest_total_path ||
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 67a2c7a581..e73007f3ba 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -116,6 +116,7 @@ static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
static void inheritance_planner(PlannerInfo *root);
static void grouping_planner(PlannerInfo *root, bool inheritance_update,
double tuple_fraction);
+static void compute_pathkeys(PlannerInfo *root, List *tlist, List *activeWindows, List *groupClause);
static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
int *tleref_to_colnum_map);
@@ -128,7 +129,6 @@ static void remove_useless_groupby_columns(PlannerInfo *root);
static List *preprocess_groupclause(PlannerInfo *root, List *force);
static List *extract_rollup_sets(List *groupingSets);
static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
-static void standard_qp_callback(PlannerInfo *root, void *extra);
static double get_number_of_groups(PlannerInfo *root,
double path_rows,
grouping_sets_data *gd,
@@ -1787,7 +1787,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
WindowFuncLists *wflists = NULL;
List *activeWindows = NIL;
grouping_sets_data *gset_data = NULL;
- standard_qp_extra qp_extra;
/* A recursive query should always have setOperations */
Assert(!root->hasRecursion);
@@ -1880,29 +1879,34 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
else
root->limit_tuples = limit_tuples;
- /* Set up data needed by standard_qp_callback */
- qp_extra.tlist = tlist;
- qp_extra.activeWindows = activeWindows;
- qp_extra.groupClause = (gset_data
- ? (gset_data->rollups ? linitial_node(RollupData, gset_data->rollups)->groupClause : NIL)
- : parse->groupClause);
+ /*
+ * Build the base relations and equivalence classes, based on the
+ * scan/join portion of this query, ie the FROM/WHERE clauses.
+ */
+ process_jointree(root, tlist);
+
+ /*
+ * Generate pathkey representations of the query's sort clause,
+ * distinct clause, etc.
+ */
+ compute_pathkeys(root, tlist, activeWindows,
+ gset_data
+ ? (gset_data->rollups ? linitial_node(RollupData, gset_data->rollups)->groupClause : NIL)
+ : parse->groupClause);
/*
* Generate the best unsorted and presorted paths for the scan/join
* portion of this Query, ie the processing represented by the
* FROM/WHERE clauses. (Note there may not be any presorted paths.)
- * We also generate (in standard_qp_callback) pathkey representations
- * of the query's sort clause, distinct clause, etc.
*/
- current_rel = query_planner(root, tlist,
- standard_qp_callback, &qp_extra);
+ current_rel = query_planner(root);
/*
* Convert the query's result tlist into PathTarget format.
*
- * Note: it's desirable to not do this till after query_planner(),
+ * Note: it's desirable to not do this till after process_jointree(), (FIXME: or query_planner()?)
* because the target width estimates can use per-Var width numbers
- * that were obtained within query_planner().
+ * that were obtained within process_jointree().
*/
final_target = create_pathtarget(root, tlist);
final_target_parallel_safe =
@@ -3440,26 +3444,23 @@ reorder_grouping_sets(List *groupingsets, List *sortclause)
}
/*
- * Compute query_pathkeys and other pathkeys during plan generation
+ * Compute query_pathkeys and other pathkeys, to tell query_planner() which
+ * orderings would be useful for the later planner stages.
*/
static void
-standard_qp_callback(PlannerInfo *root, void *extra)
+compute_pathkeys(PlannerInfo *root, List *tlist, List *activeWindows, List *groupClause)
{
Query *parse = root->parse;
- standard_qp_extra *qp_extra = (standard_qp_extra *) extra;
- List *tlist = qp_extra->tlist;
- List *activeWindows = qp_extra->activeWindows;
/*
* Calculate pathkeys that represent grouping/ordering requirements. The
* sortClause is certainly sort-able, but GROUP BY and DISTINCT might not
* be, in which case we just leave their pathkeys empty.
*/
- if (qp_extra->groupClause &&
- grouping_is_sortable(qp_extra->groupClause))
+ if (groupClause && grouping_is_sortable(groupClause))
root->group_pathkeys =
make_pathkeys_for_sortclauses(root,
- qp_extra->groupClause,
+ groupClause,
tlist);
else
root->group_pathkeys = NIL;
diff --git a/src/backend/optimizer/util/placeholder.c b/src/backend/optimizer/util/placeholder.c
index c79d0f25d4..1b393aa936 100644
--- a/src/backend/optimizer/util/placeholder.c
+++ b/src/backend/optimizer/util/placeholder.c
@@ -62,7 +62,7 @@ make_placeholder_expr(PlannerInfo *root, Expr *expr, Relids phrels)
* We build PlaceHolderInfos only for PHVs that are still present in the
* simplified query passed to query_planner().
*
- * Note: this should only be called after query_planner() has started. Also,
+ * Note: this should only be called after process_jointree() (FIXME: or query_planner()?). Also,
* create_new_ph must not be true after deconstruct_jointree begins, because
* make_outerjoininfo assumes that we already know about all placeholders.
*/
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 5af484024a..6bf9e84c4a 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -218,6 +218,13 @@ typedef struct PlannerInfo
Relids nullable_baserels;
/*
+ * join_subprolem_list represents the join tree, as a tree of join order
+ * decisions that need to be made by make_one_rel(). See
+ * deconstruct_jointree().
+ */
+ List *join_subproblem_list;
+
+ /*
* join_rel_list is a list of all join-relation RelOptInfos we have
* considered in this planning run. For small problems we just scan the
* list to do lookups, but when there are many join relations we build a
@@ -339,7 +346,7 @@ typedef struct PlannerInfo
/*
* In places where it's known that simple_rte_array[] must have been prepared
* already, we just index into it to fetch RTEs. In code that might be
- * executed before or after entering query_planner(), use this macro.
+ * executed before or after entering process_jointree(), use this macro.
*/
#define planner_rt_fetch(rti, root) \
((root)->simple_rte_array ? (root)->simple_rte_array[rti] : \
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index c8ab0280d2..4e61dff241 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -31,14 +31,11 @@ extern double cursor_tuple_fraction;
extern int force_parallel_mode;
extern bool parallel_leader_participation;
-/* query_planner callback to compute query_pathkeys */
-typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra);
-
/*
* prototypes for plan/planmain.c
*/
-extern RelOptInfo *query_planner(PlannerInfo *root, List *tlist,
- query_pathkeys_callback qp_callback, void *qp_extra);
+extern void process_jointree(PlannerInfo *root, List *tlist);
+extern RelOptInfo *query_planner(PlannerInfo *root);
/*
* prototypes for plan/planagg.c
--
2.11.0