[PATCHES] Patch to inline stable SQL set returning UDFs

Richard Rowell Mon, 01 Oct 2007 09:44:31 -0700

I discussed this hack with Neil and Josh in Ottawa back in May.  I
took a stab at it a few weeks ago and below are the results.  It is
mostly modeled after the existing inline_function.  The approach taken
is to recursively cleanup the RTable where stable SQL functions are
found in it.


I've been testing this in our dev environment for a few weeks now.
I'm sure it still has issues but I think it is about as clean as I can
get it without help from a wider audience.  This is my first backend
patch so please be gentle.

I want to thank Neil for answering many of my supremely ignorant
questions on IRC.

-- 
"An eye for eye only ends up making the whole world blind." -- Mohandas Gandhi

diff -r -c3 postgres-8.3/src/backend/optimizer/plan/planner.c postgres-8.3-hacked/src/backend/optimizer/plan/planner.c
*** postgres-8.3/src/backend/optimizer/plan/planner.c	2007-05-26 14:23:01.000000000 -0400
--- postgres-8.3-hacked/src/backend/optimizer/plan/planner.c	2007-10-01 11:49:08.000000000 -0400
***************
*** 256,261 ****
--- 256,268 ----
  		parse->jointree->quals = pull_up_IN_clauses(root,
  													parse->jointree->quals);
  
+ 
+ 	/*
+ 	 * Examine the rtable and inline any stable SQL set returning functions
+ 	 * if possible.
+ 	 */
+ 	parse->rtable = inline_stable_sql_funcs(parse->rtable);
+ 
  	/*
  	 * Check to see if any subqueries in the rangetable can be merged into
  	 * this query.
diff -r -c3 postgres-8.3/src/backend/optimizer/util/clauses.c postgres-8.3-hacked/src/backend/optimizer/util/clauses.c
*** postgres-8.3/src/backend/optimizer/util/clauses.c	2007-09-06 13:31:58.000000000 -0400
--- postgres-8.3-hacked/src/backend/optimizer/util/clauses.c	2007-10-01 11:49:57.000000000 -0400
***************
*** 101,106 ****
--- 101,117 ----
  static void sql_inline_error_callback(void *arg);
  static Expr *evaluate_expr(Expr *expr, Oid result_type, int32 result_typmod);
  
+ static Node *
+ substitute_actual_parameters_srf_inliner_mutator(Node *node,
+ 							   substitute_actual_parameters_context *context);
+ static Query *
+ substitute_actual_parameters_srf_inliner(Query *querytree, int nargs, List *args,
+ 							 int *usecounts);
+ static List * inline_stable_sql_funcs_mutator( List* rtable, eval_const_expressions_context *context);
+ static Query* inline_stable_sql_func(FuncExpr *expr, eval_const_expressions_context *context);
+ List * inline_stable_sql_funcs(List* rtable);
+ 
+ 
  
  /*****************************************************************************
   *		OPERATOR clause functions
***************
*** 4419,4421 ****
--- 4430,4792 ----
  	else
  		return mutator(node, context);
  }
+ 
+ /*
+  * Replace Param nodes with the actual arguments passed to the function.
+  */
+ static Query *
+ substitute_actual_parameters_srf_inliner(Query *querytree, int nargs, List *args,
+ 										 int *usecounts)
+ {
+ 	substitute_actual_parameters_context context;
+ 
+ 	context.nargs = nargs;
+ 	context.args = args;
+ 	context.usecounts = usecounts;
+ 
+ 	return query_tree_mutator(querytree, substitute_actual_parameters_srf_inliner_mutator, &context, 0);
+ }
+ 
+ 
+ /*
+  * This mutator cleanses the RTable of function body's to be inlined.  Each parameter reference
+  * in the function body must be replaced with the appropriate function argument.  Sub-queries
+  * are recursed, and sublink parameters are copied as is.
+  */
+ static Node *
+ substitute_actual_parameters_srf_inliner_mutator(Node *node,
+ 							   substitute_actual_parameters_context *context)
+ {
+ 	if (node == NULL)
+ 		return NULL;
+ 	if (IsA(node, Query))
+ 	{
+ 		Query	   *nquery;
+ 
+ 		nquery = query_tree_mutator((Query *) node, substitute_actual_parameters_srf_inliner_mutator, context, 0);
+ 		return (Node *) nquery;
+ 	}
+ 	if (IsA(node, Param))
+ 	{
+ 		Param	   *param = (Param *) node;
+ 
+                 if(param->paramkind == PARAM_SUBLINK )
+                 {
+                         return copyObject( node ); 
+                 }
+ 		if (param->paramkind != PARAM_SUBLINK && param->paramkind != PARAM_EXTERN)
+                 {
+ 			elog(ERROR, "unhanled parameter type");
+                 }
+ 		if (param->paramid <= 0 || param->paramid > context->nargs)
+ 			elog(ERROR, "invalid paramid: %d", param->paramid);
+ 
+ 		/* Count usage of parameter */
+ 		context->usecounts[param->paramid - 1]++;
+ 
+ 		/* Select the appropriate actual arg and replace the Param with it */
+ 		/* We don't need to copy at this time (it'll get done later) */
+ 		return list_nth(context->args, param->paramid - 1);
+ 	}
+ 	return expression_tree_mutator(node, substitute_actual_parameters_srf_inliner_mutator,
+ 								   (void *) context);
+ }
+ 
+ /*
+  * Inline stable SQL set-returning functions
+  *
+  *
+  * Takes a function expression as a parameter.	If the FuncExpr
+  * is that of a stable SQL set-returning function then we can
+  * replace the function's RTE with a subquery RTE containing the
+  * body of the function.  We also recurse into the subqueries RTE
+  * looking for inlineable functions within those.
+  *
+  * Returns the inlined function body (Query) if successfull, NULL on failure.
+  */
+ static Query *
+ inline_stable_sql_func(FuncExpr *expr, eval_const_expressions_context *context)
+ {
+ 	Form_pg_proc funcform;
+ 	HeapTuple	func_tuple;
+ 	Oid		   *argtypes;
+ 	char	   *src;
+ 	Datum		tmp;
+ 	bool		isNull;
+ 	MemoryContext oldcxt;
+ 	MemoryContext mycxt;
+ 	ErrorContextCallback sqlerrcontext;
+ 	List	   *rewritten_parsetree_list;
+ 	Query	   *querytree;
+ 	int		   *usecounts;
+ 	ListCell   *arg;
+ 	int			i;
+ 
+ 
+ 	/* Check for recursive function, and give up trying to expand if so */
+ 	if (list_member_oid(context->active_fns, expr->funcid))
+ 		return NULL;
+ 
+ 	/* Check permission to call function (fail later, if not) */
+ 	if (pg_proc_aclcheck(expr->funcid, GetUserId(), ACL_EXECUTE) != ACLCHECK_OK)
+ 		return NULL;
+ 
+ 
+ 	func_tuple = SearchSysCache(PROCOID, ObjectIdGetDatum(expr->funcid), 0, 0, 0);
+ 
+ 	/*
+ 	 * Setup error traceback support for ereport().  This is so that we can
+ 	 * finger the function that bad information came from.
+ 	 */
+ 	sqlerrcontext.callback = sql_inline_error_callback;
+ 	sqlerrcontext.arg = func_tuple;
+ 	sqlerrcontext.previous = error_context_stack;
+ 	error_context_stack = &sqlerrcontext;
+ 
+ 	if (!HeapTupleIsValid(func_tuple))
+ 	{
+ 		elog(ERROR, "cache lookup failed for function %u", expr->funcid);
+ 		ReleaseSysCache(func_tuple);
+ 		error_context_stack = sqlerrcontext.previous;
+ 		return NULL;
+ 	}
+ 
+ 
+ 	funcform = (Form_pg_proc) GETSTRUCT(func_tuple);
+ 
+ 	/*
+ 	 * If the function is not SQL, doesn't return a set, or isn't stable then
+ 	 * get out now
+ 	 */
+ 	if (funcform->prolang != SQLlanguageId ||
+ 		!funcform->proretset ||
+ 		funcform->provolatile != PROVOLATILE_STABLE)
+ 	{
+ 		ReleaseSysCache(func_tuple);
+ 		error_context_stack = sqlerrcontext.previous;
+ 		return NULL;
+ 	}
+ 
+ 	/* If this is the top-level function then record the context */
+ 	mycxt = AllocSetContextCreate(CurrentMemoryContext,
+ 								  "inline_stable_sql_func",
+ 								  ALLOCSET_DEFAULT_MINSIZE,
+ 								  ALLOCSET_DEFAULT_INITSIZE,
+ 								  ALLOCSET_DEFAULT_MAXSIZE);
+ 	oldcxt = MemoryContextSwitchTo(mycxt);
+ 
+ 	/* Check for polymorphic arguments, and substitute actual arg types */
+ 	argtypes = (Oid *) palloc(funcform->pronargs * sizeof(Oid));
+ 	memcpy(argtypes, funcform->proargtypes.values,
+ 		   funcform->pronargs * sizeof(Oid));
+ 	for (i = 0; i < funcform->pronargs; i++)
+ 	{
+ 		if (IsPolymorphicType(argtypes[i]))
+ 		{
+ 			argtypes[i] = exprType((Node *) list_nth(expr->args, i));
+ 		}
+ 	}
+ 
+ 	/* Fetch and parse and re-write the function body */
+ 	tmp = SysCacheGetAttr(PROCOID,
+ 						  func_tuple,
+ 						  Anum_pg_proc_prosrc,
+ 						  &isNull);
+ 	if (isNull)
+ 		elog(ERROR, "null prosrc for function %u", expr->funcid);
+ 	src = DatumGetCString(DirectFunctionCall1(textout, tmp));
+ 
+ 	/*
+ 	 * We punt if we detect more than one command in the function body.
+ 	 */
+ 	rewritten_parsetree_list = pg_parse_and_rewrite(src, argtypes, funcform->pronargs);
+ 	if (list_length(rewritten_parsetree_list) != 1)
+ 		goto fail;
+ 
+ 	querytree = (linitial(rewritten_parsetree_list));
+ 
+ 	/*
+ 	 * Assure that the function body is a straight SELECT statement.
+ 	 *
+ 	 * REVIEWER: We could probably use more sanity checks here, but certainly
+ 	 * less then those in inline_function.
+ 	 *
+ 	 * I do use limit and order by in functions to pull the "top" rows in some
+ 	 * cases and I'd hate to see those critical functions lost down the
+ 	 * performance hole that is a function_scan.  In general since we are
+ 	 * translating into a subquery would not most anything acceptable in a
+ 	 * subquery fly?
+ 	 */
+ 	if (!IsA(querytree, Query) ||
+ 		querytree->commandType != CMD_SELECT ||
+ 		querytree->utilityStmt ||
+ 		querytree->intoClause ||
+ 		querytree->hasAggs)
+         {
+ 		goto fail;
+         }
+ 
+ 	usecounts = (int *) palloc0(funcform->pronargs * sizeof(int));
+ 
+ 	/*
+ 	 * we substitute all params in the entire function querytree as the
+ 	 * function body may contain subqueries, and there may be paramaters in
+ 	 * the jointree, etc
+ 	 */
+ 	querytree = substitute_actual_parameters_srf_inliner(querytree, funcform->pronargs, expr->args, usecounts);
+ 
+ 
+ 	/* Now check for parameter usage */
+ 
+ 	/*
+ 	 * REVIEWER: Is this enough sanity checking?  This is unchanged from
+ 	 * inline_function, is it enough here?
+ 	 */
+ 	i = 0;
+ 	foreach(arg, expr->args)
+ 	{
+ 		Node	   *param = lfirst(arg);
+ 
+ 		if (usecounts[i] == 0)
+ 		{
+ 			/* Param not used at all: uncool if func is strict */
+ 			if (funcform->proisstrict)
+ 				goto fail;
+ 		}
+ 		else if (usecounts[i] != 1)
+ 		{
+ 			/* Param used multiple times: uncool if expensive or volatile */
+ 			QualCost	eval_cost;
+ 
+ 			/*
+ 			 * We define "expensive" as "contains any subplan or more than 10
+ 			 * operators".  Note that the subplan search has to be done
+ 			 * explicitly, since cost_qual_eval() will barf on unplanned
+ 			 * subselects.
+ 			 */
+ 			if (contain_subplans(param))
+ 				goto fail;
+ 			cost_qual_eval(&eval_cost, list_make1(param), NULL);
+ 			if (eval_cost.startup + eval_cost.per_tuple >
+ 				10 * cpu_operator_cost)
+ 				goto fail;
+ 
+ 			/*
+ 			 * Check volatility last since this is more expensive than the
+ 			 * above tests
+ 			 */
+ 			if (contain_volatile_functions(param))
+ 				goto fail;
+ 		}
+ 		i++;
+ 	}
+ 	ReleaseSysCache(func_tuple);
+ 
+ 	/* scan the function body for inlinable functions and recurse */
+ 	context->active_fns = lcons_oid(expr->funcid, context->active_fns);
+ 	querytree->rtable = inline_stable_sql_funcs_mutator(querytree->rtable, context);
+ 	context->active_fns = list_delete_first(context->active_fns);
+ 
+ 	/*
+ 	 * Whew --- we can make the substitution.  Copy the modified expression
+ 	 * out of the temporary memory context, and clean up.
+ 	 */
+ 	MemoryContextSwitchTo(oldcxt);
+ 
+ 	querytree = copyObject(querytree);
+ 
+ 	MemoryContextDelete(mycxt);
+ 	error_context_stack = sqlerrcontext.previous;
+ 
+ 	return querytree;
+ 
+ fail:
+ 	ReleaseSysCache(func_tuple);
+ 	MemoryContextSwitchTo(oldcxt);
+ 	MemoryContextDelete(mycxt);
+ 	error_context_stack = sqlerrcontext.previous;
+ 	return NULL;
+ }
+ 
+ /*
+  * Walk the range-table inlining stable SQL set functions along the way
+  *
+  * Based heavily off of range_table_mutator which we can not use
+  * since we are transforming some RTE_FUNCTION nodes into RTE_SUBQUERY nodes.
+  * Scans the range table for inlinable function expressions. For stable SQL set 
+  * functions we replace the function's RTE with a RTE_SUBQUERY containing the
+  * body of the function.  We also recurse into the subqueries RTE
+  * looking for inlineable functions within those.
+  */
+ List *
+ inline_stable_sql_funcs(List *rtable)
+ {
+ 	eval_const_expressions_context context;
+ 
+ 	context.boundParams = NULL; /* don't use any bound params */
+ 	context.active_fns = NIL;	/* nothing being recursively simplified */
+ 	context.case_val = NULL;	/* no CASE being examined */
+ 	context.estimate = false;	/* safe transformations only */
+ 
+ 	return inline_stable_sql_funcs_mutator(rtable, &context);
+ }
+ 
+ static List *
+ inline_stable_sql_funcs_mutator(List *rtable, eval_const_expressions_context *context)
+ {
+ 	List	   *newrt = NIL;
+ 	ListCell   *rt;
+ 	Query	   *func_query;
+ 
+ 	foreach(rt, rtable)
+ 	{
+ 		RangeTblEntry *rte = (RangeTblEntry *) lfirst(rt);
+ 		RangeTblEntry *newrte;
+ 
+ 		FLATCOPY(newrte, rte, RangeTblEntry);
+ 		switch (rte->rtekind)
+ 		{
+ 			case RTE_RELATION:
+ 			case RTE_SPECIAL:
+ 				/* we don't bother to copy eref, aliases, etc; OK? */
+ 				break;
+ 			case RTE_SUBQUERY:
+ 				{
+ 					newrte->subquery = copyObject(rte->subquery);
+ 
+ 					/*
+ 					 * recurse into sub-queries to inline any stable SQL
+ 					 * functions there
+ 					 */
+ 
+ 					newrte->subquery->rtable = inline_stable_sql_funcs_mutator(newrte->subquery->rtable, context);
+ 				}
+ 				break;
+ 			case RTE_JOIN:
+ 				{
+ 					/* copy join aliases as-is */
+ 					newrte->joinaliasvars = copyObject(rte->joinaliasvars);
+ 				}
+ 				break;
+ 			case RTE_FUNCTION:
+ 				{
+ 					func_query = inline_stable_sql_func((FuncExpr *) rte->funcexpr, context);
+ 					if (func_query != NULL)
+ 					{
+ 						newrte->rtekind = RTE_SUBQUERY;
+ 						newrte->subquery = func_query;
+ 					}
+ 					else
+ 					{
+ 						newrte->funcexpr = copyObject(rte->funcexpr);
+ 					}
+ 				}
+ 				break;
+ 			case RTE_VALUES:
+ 				newrte->values_lists = copyObject(rte->values_lists);
+ 				break;
+ 		}
+ 		newrt = lappend(newrt, newrte);
+ 	}
+ 	return newrt;
+ }
diff -r -c3 postgres-8.3/src/include/optimizer/prep.h postgres-8.3-hacked/src/include/optimizer/prep.h
*** postgres-8.3/src/include/optimizer/prep.h	2007-01-05 17:19:56.000000000 -0500
--- postgres-8.3-hacked/src/include/optimizer/prep.h	2007-09-19 23:18:21.000000000 -0400
***************
*** 24,29 ****
--- 24,32 ----
  extern Node *pull_up_IN_clauses(PlannerInfo *root, Node *node);
  extern Node *pull_up_subqueries(PlannerInfo *root, Node *jtnode,
  				   bool below_outer_join, bool append_rel_member);
+ 
+ extern List *inline_stable_sql_funcs( List* rtable );
+ 
  extern void reduce_outer_joins(PlannerInfo *root);
  extern Relids get_relids_in_jointree(Node *jtnode);
  extern Relids get_relids_for_join(PlannerInfo *root, int joinrelid);

---------------------------(end of broadcast)---------------------------
TIP 2: Don't 'kill -9' the postmaster

[PATCHES] Patch to inline stable SQL set returning UDFs

Reply via email to