Hi list,

PostgreSQL 9.0 introduced the "join removal" feature for cases where
an left join can't return more than one output row. This feature
relies on checking whether a UNIQUE constraint exists on the joined
column in the referenced table.

However, there was another new feature in 9.0: deferrable unique
constraints. It seems that the join removal code currently doesn't
check whether the constraint is deferrable or not. Since deferrable
unique constraints may contain duplicate rows in the course of a
transaction, join removal changes the results returned from a query.

This probably doesn't affect many real-world applications, but it
seems wrong that a performance feature can affect results returned by
a query.

Test case:

create table uniq (i int unique deferrable initially deferred);
begin;
insert into uniq values(1),(1);
select count(*) from uniq a left join uniq b using (i);
 count
-------
     2

An inner join performs as expected:
marti=# select count(*) from uniq a inner join uniq b using (i);
 count
-------
     4

----
Attached is a patch with an attempt to fix it. I'm not at all sure
this is the right approach, but it seems the cheapest way to make sure
is walk through *all* rows in pg_constraint for the given table, since
there is no index on pg_constraint.conindid. I'm not sure whether the
cost of this check outweighs the usefulness of this patch.

Catalog changes are a no-no for backported patches, right?
Or should I just go ahead and create this index, and not worry about
backporting?

I'm also adding lots of includes to this file. Maybe
unique_index_is_consistent() should be moved to another file instead?

While passing by, I also added an unrelated check to
check_functional_grouping() for the validity of the constraint. This
isn't necessary for now (unique constraints are always valid), but
seems useful just in case this changes in the future.

Regards,
Marti
From 23f33298485ecff80f01feb0dbd349cca2b38032 Mon Sep 17 00:00:00 2001
From: Marti Raudsepp <ma...@juffo.org>
Date: Tue, 18 Oct 2011 21:31:42 +0300
Subject: [PATCH] Don't allow join removal for deferrable unique constraints

This used to be allowed, but could return incorrect results if the
deferrable constraint is invalid in the middle of a transaction.
---
 src/backend/catalog/pg_constraint.c   |    4 +-
 src/backend/optimizer/path/indxpath.c |  101 ++++++++++++++++++++++++++++++++-
 src/test/regress/expected/join.out    |   39 +++++++++++++
 src/test/regress/sql/join.sql         |   24 ++++++++
 4 files changed, 164 insertions(+), 4 deletions(-)

diff --git a/src/backend/catalog/pg_constraint.c b/src/backend/catalog/pg_constraint.c
index 6997994..7dbaca0 100644
--- a/src/backend/catalog/pg_constraint.c
+++ b/src/backend/catalog/pg_constraint.c
@@ -842,8 +842,8 @@ check_functional_grouping(Oid relid,
 		/* Only PK constraints are of interest for now, see comment above */
 		if (con->contype != CONSTRAINT_PRIMARY)
 			continue;
-		/* Constraint must be non-deferrable */
-		if (con->condeferrable)
+		/* Constraint must be non-deferrable and valid */
+		if (con->condeferrable || !con->convalidated)
 			continue;
 
 		/* Extract the conkey array, ie, attnums of PK's columns */
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 940efb3..6d46e75 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -17,10 +17,14 @@
 
 #include <math.h>
 
+#include "access/genam.h"
+#include "access/heapam.h"
 #include "access/skey.h"
 #include "access/sysattr.h"
+#include "catalog/indexing.h"
 #include "catalog/pg_am.h"
 #include "catalog/pg_collation.h"
+#include "catalog/pg_constraint.h"
 #include "catalog/pg_operator.h"
 #include "catalog/pg_opfamily.h"
 #include "catalog/pg_type.h"
@@ -34,9 +38,12 @@
 #include "optimizer/var.h"
 #include "utils/builtins.h"
 #include "utils/bytea.h"
+#include "utils/fmgroids.h"
 #include "utils/lsyscache.h"
 #include "utils/pg_locale.h"
 #include "utils/selfuncs.h"
+#include "utils/tqual.h"
+#include "storage/lock.h"
 
 
 #define IsBooleanOpfamily(opfamily) \
@@ -116,6 +123,8 @@ static bool matches_any_index(RestrictInfo *rinfo, RelOptInfo *rel,
 				  Relids outer_relids);
 static List *find_clauses_for_join(PlannerInfo *root, RelOptInfo *rel,
 					  Relids outer_relids, bool isouterjoin);
+static bool unique_index_is_consistent(Oid reloid, Oid indexoid,
+								Oid *conoid);
 static bool match_boolean_index_clause(Node *clause, int indexcol,
 						   IndexOptInfo *index);
 static bool match_special_index_operator(Expr *clause,
@@ -2248,6 +2257,71 @@ find_clauses_for_join(PlannerInfo *root, RelOptInfo *rel,
 }
 
 /*
+ * unique_constraint_is_consistent
+ *	  Make sure that a unique index's respective constraint is validated and
+ *	  not deferrable. Sets *conoid to the found constraint OID, or InvalidOid
+ *	  if not found.
+ *
+ * This is expensive; there is on index on pg_constraint.conindid, so we have
+ * to scan all constraints on the relation.
+ */
+static bool
+unique_index_is_consistent(Oid reloid, Oid indexoid,
+						   Oid *conoid)
+{
+	/* If no constraint is found, then the index cannot be invalid/deferrable */
+	bool		result = true;
+	Relation	pg_constraint;
+	HeapTuple	tuple;
+	SysScanDesc scan;
+	ScanKeyData skey[1];
+
+	*conoid = InvalidOid;
+
+	/* Scan pg_constraint for constraints of the target rel */
+	pg_constraint = heap_open(ConstraintRelationId, AccessShareLock);
+
+	ScanKeyInit(&skey[0],
+				Anum_pg_constraint_conrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(reloid));
+
+	scan = systable_beginscan(pg_constraint, ConstraintRelidIndexId, true,
+							  SnapshotNow, 1, skey);
+
+	while (HeapTupleIsValid(tuple = systable_getnext(scan)))
+	{
+		Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(tuple);
+
+		if (con->conindid == indexoid)
+		{
+			if (con->condeferrable)
+				result = false;
+
+			/*
+			 * Currently unique constraints are always valid, but that could
+			 * change in the future. This check costs us nothing.
+			 */
+			if (!con->convalidated)
+				result = false;
+
+			*conoid = HeapTupleGetOid(tuple);
+
+			/*
+			 * Stop searching since we found a constraint. Assumes that
+			 * conindid is unique.
+			 */
+			break;
+		}
+	}
+
+	systable_endscan(scan);
+	heap_close(pg_constraint, AccessShareLock);
+
+	return result;
+}
+
+/*
  * relation_has_unique_index_for
  *	  Determine whether the relation provably has at most one row satisfying
  *	  a set of equality conditions, because the conditions constrain all
@@ -2274,6 +2348,8 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
 	foreach(ic, rel->indexlist)
 	{
 		IndexOptInfo *ind = (IndexOptInfo *) lfirst(ic);
+		RangeTblEntry *rte;
+		Oid			conoid = InvalidOid;
 		int			c;
 
 		/*
@@ -2326,8 +2402,29 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
 		}
 
 		/* Matched all columns of this index? */
-		if (c == ind->ncolumns)
-			return true;
+		if (c != ind->ncolumns)
+			continue;
+
+		/*
+		 * Deferrable or invalid constraints don't qualify for removal. This
+		 * check is last since it's the most expensive -- requires a catalog
+		 * lookup.
+		 *
+		 * It's tempting to peek into the queue of deferred unique checks
+		 * and see if it's empty, but there's no mechanism to invalidate the
+		 * plan when that queue changes.
+		 */
+		rte = root->simple_rte_array[rel->relid];
+		Assert(rte->rtekind == RTE_RELATION);
+
+		if (!unique_index_is_consistent(rte->relid, ind->indexoid, &conoid))
+			continue;
+
+		/* Query plan now relies on this constraint */
+		if (conoid != InvalidOid)
+			root->parse->constraintDeps = lappend_oid(root->parse->constraintDeps, conoid);
+
+		return true;
 	}
 
 	return false;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index a54000b..9217714 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -2755,3 +2755,42 @@ SELECT * FROM
 (5 rows)
 
 rollback;
+-- and just one more bug, deferrable unique constraints can't be relied on
+create temp table uniq (i int);
+create unique index uniq_i_idx on uniq (i);
+begin;
+prepare join_removal as
+  select a.* from uniq as a left join uniq as b using (i);
+explain (costs off) execute join_removal;
+     QUERY PLAN     
+--------------------
+ Seq Scan on uniq a
+(1 row)
+
+-- query must be replanned after a deferrable constraint is added
+alter table uniq add constraint uniq_i_idx
+	unique using index uniq_i_idx deferrable initially deferred;
+explain (costs off) execute join_removal;
+           QUERY PLAN           
+--------------------------------
+ Hash Left Join
+   Hash Cond: (a.i = b.i)
+   ->  Seq Scan on uniq a
+   ->  Hash
+         ->  Seq Scan on uniq b
+(5 rows)
+
+-- test actual results
+insert into uniq values(1),(1);
+execute join_removal;
+ i 
+---
+ 1
+ 1
+ 1
+ 1
+(4 rows)
+
+rollback;
+deallocate join_removal;
+drop table uniq;
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 1be80b8..ca46bae 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -736,3 +736,27 @@ SELECT * FROM
   ON true;
 
 rollback;
+
+-- and just one more bug, deferrable unique constraints can't be relied on
+create temp table uniq (i int);
+create unique index uniq_i_idx on uniq (i);
+
+begin;
+prepare join_removal as
+  select a.* from uniq as a left join uniq as b using (i);
+
+explain (costs off) execute join_removal;
+
+-- query must be replanned after a deferrable constraint is added
+alter table uniq add constraint uniq_i_idx
+	unique using index uniq_i_idx deferrable initially deferred;
+
+explain (costs off) execute join_removal;
+
+-- test actual results
+insert into uniq values(1),(1);
+execute join_removal;
+
+rollback;
+deallocate join_removal;
+drop table uniq;
-- 
1.7.7

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to