I wrote:
> However, in the case at hand, the complaint basically is why aren't we
> treating the boolean function expression like a boolean variable, and
> looking to see if there are stats available for it, like this other
> bit in clause_selectivity:

>             /*
>              * A Var at the top of a clause must be a bool Var. This is
>              * equivalent to the clause reln.attribute = 't', so we compute
>              * the selectivity as if that is what we have.
>              */
>             s1 = restriction_selectivity(root,
>                                          BooleanEqualOperator,
>                                          list_make2(var,
>                                                     makeBoolConst(true,
>                                                                   false)),
>                                          InvalidOid,
>                                          varRelid);

> Indeed you could argue that this ought to be the fallback behavior for
> *any* unhandled case, not just function expressions.  Not sure if we'd
> need to restrict it to single-relation expressions or not.

> The implication of doing it like this would be that the default estimate
> in the absence of any matching stats would be 0.5 (since eqsel defaults
> to 1/ndistinct, and get_variable_numdistinct will report 2.0 for any
> boolean-type expression it has no stats for).  That's not a huge change
> from the existing 0.3333333 estimate, which seems pretty unprincipled
> anyway ... but it would probably be enough to annoy people if we did it in
> stable branches.  So I'd be inclined to propose changing this in HEAD and
> maybe 9.5, but not further back.  (For non-function expressions, 0.5 is
> the default already, so those would not change behavior.)

I experimented with the attached patch.  The change in the default
estimate for a function results in just one change in the standard
regression test results, so far as I can find.

Comments, objections?

                        regards, tom lane

diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index dcac1c1..c862b70 100644
*** a/src/backend/optimizer/path/clausesel.c
--- b/src/backend/optimizer/path/clausesel.c
***************
*** 14,20 ****
   */
  #include "postgres.h"
  
- #include "catalog/pg_operator.h"
  #include "nodes/makefuncs.h"
  #include "optimizer/clauses.h"
  #include "optimizer/cost.h"
--- 14,19 ----
*************** clause_selectivity(PlannerInfo *root,
*** 568,585 ****
  		if (var->varlevelsup == 0 &&
  			(varRelid == 0 || varRelid == (int) var->varno))
  		{
! 			/*
! 			 * A Var at the top of a clause must be a bool Var. This is
! 			 * equivalent to the clause reln.attribute = 't', so we compute
! 			 * the selectivity as if that is what we have.
! 			 */
! 			s1 = restriction_selectivity(root,
! 										 BooleanEqualOperator,
! 										 list_make2(var,
! 													makeBoolConst(true,
! 																  false)),
! 										 InvalidOid,
! 										 varRelid);
  		}
  	}
  	else if (IsA(clause, Const))
--- 567,574 ----
  		if (var->varlevelsup == 0 &&
  			(varRelid == 0 || varRelid == (int) var->varno))
  		{
! 			/* Use the restriction selectivity function for a bool Var */
! 			s1 = boolvarsel(root, (Node *) var, varRelid);
  		}
  	}
  	else if (IsA(clause, Const))
*************** clause_selectivity(PlannerInfo *root,
*** 680,704 ****
  		if (IsA(clause, DistinctExpr))
  			s1 = 1.0 - s1;
  	}
- 	else if (is_funcclause(clause))
- 	{
- 		/*
- 		 * This is not an operator, so we guess at the selectivity. THIS IS A
- 		 * HACK TO GET V4 OUT THE DOOR.  FUNCS SHOULD BE ABLE TO HAVE
- 		 * SELECTIVITIES THEMSELVES.       -- JMH 7/9/92
- 		 */
- 		s1 = (Selectivity) 0.3333333;
- 	}
- #ifdef NOT_USED
- 	else if (IsA(clause, SubPlan) ||
- 			 IsA(clause, AlternativeSubPlan))
- 	{
- 		/*
- 		 * Just for the moment! FIX ME! - vadim 02/04/98
- 		 */
- 		s1 = (Selectivity) 0.5;
- 	}
- #endif
  	else if (IsA(clause, ScalarArrayOpExpr))
  	{
  		/* Use node specific selectivity calculation function */
--- 669,674 ----
*************** clause_selectivity(PlannerInfo *root,
*** 766,771 ****
--- 736,752 ----
  								jointype,
  								sjinfo);
  	}
+ 	else
+ 	{
+ 		/*
+ 		 * For anything else, see if we can consider it as a boolean variable.
+ 		 * This only works if it's an immutable expression in Vars of a single
+ 		 * relation; but there's no point in us checking that here because
+ 		 * boolvarsel() will do it internally, and return a suitable default
+ 		 * selectivity (0.5) if not.
+ 		 */
+ 		s1 = boolvarsel(root, clause, varRelid);
+ 	}
  
  	/* Cache the result if possible */
  	if (cacheable)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 72bc502..a643c6f 100644
*** a/src/backend/utils/adt/selfuncs.c
--- b/src/backend/utils/adt/selfuncs.c
***************
*** 105,110 ****
--- 105,111 ----
  #include "access/sysattr.h"
  #include "catalog/index.h"
  #include "catalog/pg_collation.h"
+ #include "catalog/pg_operator.h"
  #include "catalog/pg_opfamily.h"
  #include "catalog/pg_statistic.h"
  #include "catalog/pg_type.h"
*************** icnlikesel(PG_FUNCTION_ARGS)
*** 1440,1445 ****
--- 1441,1479 ----
  }
  
  /*
+  *		boolvarsel		- Selectivity of Boolean variable.
+  *
+  * This can actually be called on any expression involving only Vars of
+  * the specified relation.  If there are statistics about the Var or
+  * expression (the latter can happen, if it's indexed) then we'll produce
+  * a real estimate; otherwise we default to 0.5.
+  */
+ Selectivity
+ boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
+ {
+ 	VariableStatData vardata;
+ 	double		selec;
+ 
+ 	examine_variable(root, arg, varRelid, &vardata);
+ 	if (HeapTupleIsValid(vardata.statsTuple))
+ 	{
+ 		/*
+ 		 * A boolean variable V is equivalent to the clause V = 't', so we
+ 		 * compute the selectivity as if that is what we have.
+ 		 */
+ 		selec = var_eq_const(&vardata, BooleanEqualOperator,
+ 							 BoolGetDatum(true), false, true);
+ 	}
+ 	else
+ 	{
+ 		/* Punt and estimate 0.5 */
+ 		selec = 0.5;
+ 	}
+ 	ReleaseVariableStats(vardata);
+ 	return selec;
+ }
+ 
+ /*
   *		booltestsel		- Selectivity of BooleanTest Node.
   */
  Selectivity
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index b3d8017..a7433d9 100644
*** a/src/include/utils/selfuncs.h
--- b/src/include/utils/selfuncs.h
*************** extern Datum icregexnejoinsel(PG_FUNCTIO
*** 164,169 ****
--- 164,170 ----
  extern Datum nlikejoinsel(PG_FUNCTION_ARGS);
  extern Datum icnlikejoinsel(PG_FUNCTION_ARGS);
  
+ extern Selectivity boolvarsel(PlannerInfo *root, Node *arg, int varRelid);
  extern Selectivity booltestsel(PlannerInfo *root, BoolTestType booltesttype,
  			Node *arg, int varRelid,
  			JoinType jointype, SpecialJoinInfo *sjinfo);
diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out
index 9d3540f..88dcca3 100644
*** a/src/test/regress/expected/rowsecurity.out
--- b/src/test/regress/expected/rowsecurity.out
*************** EXPLAIN (COSTS OFF) SELECT * FROM docume
*** 186,204 ****
  (7 rows)
  
  EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle);
!                               QUERY PLAN                              
! ----------------------------------------------------------------------
   Hash Join
!    Hash Cond: (category.cid = document.cid)
!    ->  Seq Scan on category
     ->  Hash
!          ->  Subquery Scan on document
!                Filter: f_leak(document.dtitle)
!                ->  Seq Scan on document document_1
!                      Filter: (dlevel <= $0)
!                      InitPlan 1 (returns $0)
!                        ->  Index Scan using uaccount_pkey on uaccount
!                              Index Cond: (pguser = "current_user"())
  (11 rows)
  
  -- only owner can change policies
--- 186,204 ----
  (7 rows)
  
  EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle);
!                            QUERY PLAN                           
! ----------------------------------------------------------------
   Hash Join
!    Hash Cond: (document.cid = category.cid)
!    ->  Subquery Scan on document
!          Filter: f_leak(document.dtitle)
!          ->  Seq Scan on document document_1
!                Filter: (dlevel <= $0)
!                InitPlan 1 (returns $0)
!                  ->  Index Scan using uaccount_pkey on uaccount
!                        Index Cond: (pguser = "current_user"())
     ->  Hash
!          ->  Seq Scan on category
  (11 rows)
  
  -- only owner can change policies
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to