Hi list,

Attached patch allows semijoin/antijoin/hashed SubPlan optimization
when an EXISTS subquery contains a LIMIT clause with a positive
constant. It seems to be a fairly common meme to put LIMIT 1 into
EXISTS() subqueries, and it even makes sense when you're not aware
that the database already does this optimization.

Do we want this?

It has come up in #postgresql, and at twice times on mailing lists:
http://www.postgresql.org/message-id/53279529.2070...@freemail.hu
http://www.postgresql.org/message-id/50a36820.4030...@pingpong.net

And there may even be good reasons, such as writing performant
portable SQL code for Other Databases:
https://dev.mysql.com/doc/refman/5.1/en/optimizing-subqueries.html

----
The code is fairly straightforward. The only ugly part is that I need
to call eval_const_expressions() on the LIMIT expression because
subquery_planner() does subquery optimizations before constant
folding. A "LIMIT 1" clause will actually produce an int8(1)
expression. And I have to drag along PlannerInfo for that.

If it fails to yield a constant we've done some useless work, but it
should be nothing compared to the caller doing a deep copy of the
whole subquery.

Regards,
Marti
From 3448408121e7e32a12fc16403c9d48bce63503f5 Mon Sep 17 00:00:00 2001
From: Marti Raudsepp <ma...@juffo.org>
Date: Wed, 1 Oct 2014 02:17:21 +0300
Subject: [PATCH] Simplify EXISTS subqueries containing LIMIT

---
 src/backend/optimizer/plan/subselect.c  | 40 +++++++++++++++++++++++++++------
 src/test/regress/expected/subselect.out | 14 ++++++++++++
 src/test/regress/sql/subselect.sql      |  7 ++++++
 3 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 3e7dc85..09b153e 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -70,7 +70,7 @@ static Node *convert_testexpr_mutator(Node *node,
 static bool subplan_is_hashable(Plan *plan);
 static bool testexpr_is_hashable(Node *testexpr);
 static bool hash_ok_operator(OpExpr *expr);
-static bool simplify_EXISTS_query(Query *query);
+static bool simplify_EXISTS_query(PlannerInfo *root, Query *query);
 static Query *convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
 					  Node **testexpr, List **paramIds);
 static Node *replace_correlation_vars_mutator(Node *node, PlannerInfo *root);
@@ -452,7 +452,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
 	 * If it's an EXISTS subplan, we might be able to simplify it.
 	 */
 	if (subLinkType == EXISTS_SUBLINK)
-		simple_exists = simplify_EXISTS_query(subquery);
+		simple_exists = simplify_EXISTS_query(root, subquery);
 
 	/*
 	 * For an EXISTS subplan, tell lower-level planner to expect that only the
@@ -518,7 +518,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
 		/* Make a second copy of the original subquery */
 		subquery = (Query *) copyObject(orig_subquery);
 		/* and re-simplify */
-		simple_exists = simplify_EXISTS_query(subquery);
+		simple_exists = simplify_EXISTS_query(root, subquery);
 		Assert(simple_exists);
 		/* See if it can be converted to an ANY query */
 		subquery = convert_EXISTS_to_ANY(root, subquery,
@@ -1359,7 +1359,7 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	 * targetlist, we have to fail, because the pullup operation leaves us
 	 * with noplace to evaluate the targetlist.
 	 */
-	if (!simplify_EXISTS_query(subselect))
+	if (!simplify_EXISTS_query(root, subselect))
 		return NULL;
 
 	/*
@@ -1486,11 +1486,11 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
  * Returns TRUE if was able to discard the targetlist, else FALSE.
  */
 static bool
-simplify_EXISTS_query(Query *query)
+simplify_EXISTS_query(PlannerInfo *root, Query *query)
 {
 	/*
 	 * We don't try to simplify at all if the query uses set operations,
-	 * aggregates, modifying CTEs, HAVING, LIMIT/OFFSET, or FOR UPDATE/SHARE;
+	 * aggregates, modifying CTEs, HAVING, OFFSET, or FOR UPDATE/SHARE;
 	 * none of these seem likely in normal usage and their possible effects
 	 * are complex.
 	 */
@@ -1501,7 +1501,6 @@ simplify_EXISTS_query(Query *query)
 		query->hasModifyingCTE ||
 		query->havingQual ||
 		query->limitOffset ||
-		query->limitCount ||
 		query->rowMarks)
 		return false;
 
@@ -1513,6 +1512,33 @@ simplify_EXISTS_query(Query *query)
 		return false;
 
 	/*
+	 * LIMIT clause can be removed if it's a positive constant or ALL, to
+	 * prevent it from being an optimization barrier. It's a common meme to put
+	 * LIMIT 1 within EXISTS subqueries.
+	 */
+	if (query->limitCount)
+	{
+		/*
+		 * eval_const_expressions has not been called yet by subquery_planner,
+		 * may still contain int64 coercions etc.
+		 */
+		Node	   *node = eval_const_expressions(root, query->limitCount);
+		Const	   *limit;
+
+		if (! IsA(node, Const))
+			return false;
+
+		limit = (Const *) node;
+		Assert(limit->consttype == INT8OID);
+
+		/* Checking for negative values is done later; 0 is just silly */
+		if (! limit->constisnull && DatumGetInt64(limit->constvalue) <= 0)
+			return false;
+
+		query->limitCount = NULL;
+	}
+
+	/*
 	 * Otherwise, we can throw away the targetlist, as well as any GROUP,
 	 * WINDOW, DISTINCT, and ORDER BY clauses; none of those clauses will
 	 * change a nonzero-rows result to zero rows or vice versa.  (Furthermore,
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 01c9130..91daee2 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -775,6 +775,20 @@ select * from int4_tbl o where (f1, f1) in
 (1 row)
 
 --
+-- Check EXISTS simplification with LIMIT
+--
+select * from int4_tbl o where not exists
+	(select 1 from int4_tbl i where i.f1=o.f1 limit 1);
+ f1 
+----
+(0 rows)
+
+select * from int4_tbl o where exists (select 1 limit 0);
+ f1 
+----
+(0 rows)
+
+--
 -- check for over-optimization of whole-row Var referencing an Append plan
 --
 select (select q from
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index 56707e2..086a9cb 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -433,6 +433,13 @@ select * from int4_tbl o where (f1, f1) in
   (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
 
 --
+-- Check EXISTS simplification with LIMIT
+--
+select * from int4_tbl o where not exists
+	(select 1 from int4_tbl i where i.f1=o.f1 limit 1);
+select * from int4_tbl o where exists (select 1 limit 0);
+
+--
 -- check for over-optimization of whole-row Var referencing an Append plan
 --
 select (select q from
-- 
2.1.2

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to