Hi list, Attached patch allows semijoin/antijoin/hashed SubPlan optimization when an EXISTS subquery contains a LIMIT clause with a positive constant. It seems to be a fairly common meme to put LIMIT 1 into EXISTS() subqueries, and it even makes sense when you're not aware that the database already does this optimization.
Do we want this? It has come up in #postgresql, and at twice times on mailing lists: http://www.postgresql.org/message-id/53279529.2070...@freemail.hu http://www.postgresql.org/message-id/50a36820.4030...@pingpong.net And there may even be good reasons, such as writing performant portable SQL code for Other Databases: https://dev.mysql.com/doc/refman/5.1/en/optimizing-subqueries.html ---- The code is fairly straightforward. The only ugly part is that I need to call eval_const_expressions() on the LIMIT expression because subquery_planner() does subquery optimizations before constant folding. A "LIMIT 1" clause will actually produce an int8(1) expression. And I have to drag along PlannerInfo for that. If it fails to yield a constant we've done some useless work, but it should be nothing compared to the caller doing a deep copy of the whole subquery. Regards, Marti
From 3448408121e7e32a12fc16403c9d48bce63503f5 Mon Sep 17 00:00:00 2001 From: Marti Raudsepp <ma...@juffo.org> Date: Wed, 1 Oct 2014 02:17:21 +0300 Subject: [PATCH] Simplify EXISTS subqueries containing LIMIT --- src/backend/optimizer/plan/subselect.c | 40 +++++++++++++++++++++++++++------ src/test/regress/expected/subselect.out | 14 ++++++++++++ src/test/regress/sql/subselect.sql | 7 ++++++ 3 files changed, 54 insertions(+), 7 deletions(-) diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 3e7dc85..09b153e 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -70,7 +70,7 @@ static Node *convert_testexpr_mutator(Node *node, static bool subplan_is_hashable(Plan *plan); static bool testexpr_is_hashable(Node *testexpr); static bool hash_ok_operator(OpExpr *expr); -static bool simplify_EXISTS_query(Query *query); +static bool simplify_EXISTS_query(PlannerInfo *root, Query *query); static Query *convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect, Node **testexpr, List **paramIds); static Node *replace_correlation_vars_mutator(Node *node, PlannerInfo *root); @@ -452,7 +452,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, * If it's an EXISTS subplan, we might be able to simplify it. */ if (subLinkType == EXISTS_SUBLINK) - simple_exists = simplify_EXISTS_query(subquery); + simple_exists = simplify_EXISTS_query(root, subquery); /* * For an EXISTS subplan, tell lower-level planner to expect that only the @@ -518,7 +518,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, /* Make a second copy of the original subquery */ subquery = (Query *) copyObject(orig_subquery); /* and re-simplify */ - simple_exists = simplify_EXISTS_query(subquery); + simple_exists = simplify_EXISTS_query(root, subquery); Assert(simple_exists); /* See if it can be converted to an ANY query */ subquery = convert_EXISTS_to_ANY(root, subquery, @@ -1359,7 +1359,7 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, * targetlist, we have to fail, because the pullup operation leaves us * with noplace to evaluate the targetlist. */ - if (!simplify_EXISTS_query(subselect)) + if (!simplify_EXISTS_query(root, subselect)) return NULL; /* @@ -1486,11 +1486,11 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, * Returns TRUE if was able to discard the targetlist, else FALSE. */ static bool -simplify_EXISTS_query(Query *query) +simplify_EXISTS_query(PlannerInfo *root, Query *query) { /* * We don't try to simplify at all if the query uses set operations, - * aggregates, modifying CTEs, HAVING, LIMIT/OFFSET, or FOR UPDATE/SHARE; + * aggregates, modifying CTEs, HAVING, OFFSET, or FOR UPDATE/SHARE; * none of these seem likely in normal usage and their possible effects * are complex. */ @@ -1501,7 +1501,6 @@ simplify_EXISTS_query(Query *query) query->hasModifyingCTE || query->havingQual || query->limitOffset || - query->limitCount || query->rowMarks) return false; @@ -1513,6 +1512,33 @@ simplify_EXISTS_query(Query *query) return false; /* + * LIMIT clause can be removed if it's a positive constant or ALL, to + * prevent it from being an optimization barrier. It's a common meme to put + * LIMIT 1 within EXISTS subqueries. + */ + if (query->limitCount) + { + /* + * eval_const_expressions has not been called yet by subquery_planner, + * may still contain int64 coercions etc. + */ + Node *node = eval_const_expressions(root, query->limitCount); + Const *limit; + + if (! IsA(node, Const)) + return false; + + limit = (Const *) node; + Assert(limit->consttype == INT8OID); + + /* Checking for negative values is done later; 0 is just silly */ + if (! limit->constisnull && DatumGetInt64(limit->constvalue) <= 0) + return false; + + query->limitCount = NULL; + } + + /* * Otherwise, we can throw away the targetlist, as well as any GROUP, * WINDOW, DISTINCT, and ORDER BY clauses; none of those clauses will * change a nonzero-rows result to zero rows or vice versa. (Furthermore, diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 01c9130..91daee2 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -775,6 +775,20 @@ select * from int4_tbl o where (f1, f1) in (1 row) -- +-- Check EXISTS simplification with LIMIT +-- +select * from int4_tbl o where not exists + (select 1 from int4_tbl i where i.f1=o.f1 limit 1); + f1 +---- +(0 rows) + +select * from int4_tbl o where exists (select 1 limit 0); + f1 +---- +(0 rows) + +-- -- check for over-optimization of whole-row Var referencing an Append plan -- select (select q from diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index 56707e2..086a9cb 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -433,6 +433,13 @@ select * from int4_tbl o where (f1, f1) in (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1); -- +-- Check EXISTS simplification with LIMIT +-- +select * from int4_tbl o where not exists + (select 1 from int4_tbl i where i.f1=o.f1 limit 1); +select * from int4_tbl o where exists (select 1 limit 0); + +-- -- check for over-optimization of whole-row Var referencing an Append plan -- select (select q from -- 2.1.2
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers