On Mon, 13 Jun 2022 at 16:50, Greg Stark <st...@mit.edu> wrote:
>
> For that matter.... the gotchas are a bit .... convoluted....
>
> Perhaps we should automatically fix up the current search patch and
> attach it to functions where necessary for users instead of just
> whingeing at them....

So I reviewed my own patch and.... it was completely broken... I fixed
it to actually check the right variables.

I also implemented the other idea above of actually fixing up
search_path in proconfig for the user by default. I think this is
going to be more practical.

The problem with expecting the user to provide search_path is that
they probably aren't today so the warnings would be firing for
everything...

Providing a fixed up search_path for users would give them a smoother
upgrade process where we can give a warning only if the search_path is
changed substantively which is much less likely.

I'm still quite concerned about the performance impact of having
search_path on so many functions. It causes a cache flush which could
be pretty painful on a frequently called function such as one in an
index expression during a data load....

The other issue is that having proconfig set does prevent these
functions from being inlined which can be seen in the regression test
as seen below. I'm not sure how big a problem this is for users.
Inlining is important for many use cases I think. Maybe we can go
ahead and inline things even if they have a proconfig if it matches
the proconfig on the caller? Or maybe even check if we get the same
objects from both search_paths?


Of course this patch is still very WIP. Only one or the other function
makes sense to keep. And I'm not opposed to having a GUC to
enable/disable the enforcement or warnings. And the code itself needs
to be cleaned up with parts of it moving to guc.c and/or namespace.c.


Example of regression tests noticing that immutable functions with
proconfig set become non-inlineable:

diff -U3 /home/stark/src/postgresql/src/test/regress/expected/rangefuncs.out
/home/stark/src/postgresql/src/test/regress/results/rangefuncs.out
--- /home/stark/src/postgresql/src/test/regress/expected/rangefuncs.out
2022-01-17 12:01:54.958628564 -0500
+++ /home/stark/src/postgresql/src/test/regress/results/rangefuncs.out
2022-06-16 02:16:47.589703966 -0400
@@ -1924,14 +1924,14 @@
 select * from array_to_set(array['one', 'two']) as t(f1 point,f2 text);
 ERROR:  return type mismatch in function declared to return record
 DETAIL:  Final statement returns integer instead of point at column 1.
-CONTEXT:  SQL function "array_to_set" during inlining
+CONTEXT:  SQL function "array_to_set" during startup
 explain (verbose, costs off)
   select * from array_to_set(array['one', 'two']) as t(f1
numeric(4,2),f2 text);
-                          QUERY PLAN
---------------------------------------------------------------
- Function Scan on pg_catalog.generate_subscripts i
-   Output: i.i, ('{one,two}'::text[])[i.i]
-   Function Call: generate_subscripts('{one,two}'::text[], 1)
+                     QUERY PLAN
+----------------------------------------------------
+ Function Scan on public.array_to_set t
+   Output: f1, f2
+   Function Call: array_to_set('{one,two}'::text[])
 (3 rows)

 create temp table rngfunc(f1 int8, f2 int8);
@@ -2064,11 +2064,12 @@

 explain (verbose, costs off)
 select * from testrngfunc();
-                       QUERY PLAN
---------------------------------------------------------
- Result
-   Output: 7.136178::numeric(35,6), 7.14::numeric(35,2)
-(2 rows)
+             QUERY PLAN
+-------------------------------------
+ Function Scan on public.testrngfunc
+   Output: f1, f2
+   Function Call: testrngfunc()
+(3 rows)

 select * from testrngfunc();
     f1    |  f2


--
greg
From 02f16014c3458e549719aaaaf4bb6c338e55b6e2 Mon Sep 17 00:00:00 2001
From: Greg Stark <stark@mit.edu>
Date: Thu, 16 Jun 2022 01:40:49 -0400
Subject: [PATCH 2/2] Instead of checks actually force search_path on immutable
 and security definer functions

---
 src/backend/commands/functioncmds.c | 79 ++++++++++++++++++++++++++++-
 1 file changed, 78 insertions(+), 1 deletion(-)

diff --git a/src/backend/commands/functioncmds.c b/src/backend/commands/functioncmds.c
index 9347ed70e2..20da751202 100644
--- a/src/backend/commands/functioncmds.c
+++ b/src/backend/commands/functioncmds.c
@@ -41,6 +41,7 @@
 #include "catalog/indexing.h"
 #include "catalog/objectaccess.h"
 #include "catalog/pg_aggregate.h"
+#include "catalog/pg_authid.h"
 #include "catalog/pg_cast.h"
 #include "catalog/pg_language.h"
 #include "catalog/pg_namespace.h"
@@ -683,9 +684,83 @@ update_proconfig_value(ArrayType *a, List *set_items)
  * https://www.postgresql.org/docs/current/sql-createfunction.html#SQL-CREATEFUNCTION-SECURITY
  */ 
 
-/* XXX This logic should perhaps be moved to namespace.c XXX */
+/* XXX Some or all of this logic should perhaps be moved to namespace.c XXX */
 #include "utils/varlena.h"
 
+static ArrayType *
+fixup_proconfig_value(ArrayType *a, char provolatile, bool prosecdef) {
+	char *search_path = NULL;
+
+	List	   *namelist;
+	ListCell   *l;
+	StringInfoData string;
+
+	if (provolatile != PROVOLATILE_IMMUTABLE && !prosecdef) {
+		return a;
+	}
+	if (a != NULL) {
+		search_path = GUCArrayLookup(a, "search_path");
+	}
+	if (!search_path) {
+		search_path = pstrdup(namespace_search_path);
+	}
+
+	/* Parse string into list of identifiers
+	 * GUCArrayLookup returns a pstrdup'd string so this is safe
+	 */
+	if (!SplitIdentifierString(search_path, ',', &namelist))
+	{
+		/* syntax error in name list */
+		elog(ERROR, "invalid list syntax in search_path setting on function");
+	}
+
+	initStringInfo(&string);
+	appendStringInfoString(&string, "pg_catalog");
+	foreach(l, namelist)
+	{
+		char	   *curname = (char *) lfirst(l);
+
+		if (strcmp(curname, "pg_catalog") == 0)
+		{
+			if (foreach_current_index(l) != 0)
+				elog(WARNING, "moving pg_catalog to first position in search_path for IMMUTABLE or SECURITY DEFINER function");
+			continue;
+		}
+		if (strcmp(curname, "pg_temp") == 0)
+		{
+			if (foreach_current_index(l) != namelist->length-1)
+				elog(WARNING, "moving pg_temp to last position in search_path for IMMUTABLE or SECURITY DEFINER function");
+			continue;
+		}
+		if (strcmp(curname, "$user") == 0)
+		{
+			/* $user --- substitute namespace matching user name, if any */
+			HeapTuple	tuple;
+
+			tuple = SearchSysCache1(AUTHOID, ObjectIdGetDatum(GetUserId()));
+			if (HeapTupleIsValid(tuple))
+			{
+				curname = NameStr(((Form_pg_authid) GETSTRUCT(tuple))->rolname);
+				ReleaseSysCache(tuple);
+				/*elog(WARNING, "IMMUTABLE or SECURITY DEFINER functions should not have a search path including $user");*/
+			} else {
+				/* No great options here */
+				ReleaseSysCache(tuple);
+				elog(WARNING, "removing invalid $user from search_path on IMMUTABLE or SECURITY DEFINER function");
+				continue;
+			}
+		}
+		appendStringInfoChar(&string, ',');
+		appendStringInfoString(&string, quote_identifier(curname));
+	}
+	appendStringInfoChar(&string, ',');
+	appendStringInfoString(&string, "pg_temp");
+
+	a = GUCArrayAdd(a, "search_path", string.data);
+	return a;
+}
+	
+
 static void
 verify_proconfig_value(ArrayType *a, char provolatile, bool prosecdef) {
 	char *proconfig_search_path = NULL;
@@ -1233,6 +1308,7 @@ CreateFunction(ParseState *pstate, CreateFunctionStmt *stmt)
 		}
 	}
 
+	proconfig = fixup_proconfig_value(proconfig, volatility, security);
 	verify_proconfig_value(proconfig, volatility, security);
 
 	/*
@@ -1564,6 +1640,7 @@ AlterFunction(ParseState *pstate, AlterFunctionStmt *stmt)
 		/* update according to each SET or RESET item, left to right */
 		a = update_proconfig_value(a, set_items);
 
+		a = fixup_proconfig_value(a, procForm->provolatile, procForm->prosecdef);
 		verify_proconfig_value(a, procForm->provolatile, procForm->prosecdef);
 
 		/* update the tuple */
-- 
2.36.1

From 94fcda77f88b55a4c6e06a2e4367c1bd322c07b4 Mon Sep 17 00:00:00 2001
From: Greg Stark <stark@mit.edu>
Date: Mon, 13 Jun 2022 16:47:49 -0400
Subject: [PATCH 1/2] Add checks on search_path for IMMUTABLE and SECURITY
 DEFINER functions

---
 src/backend/commands/functioncmds.c | 76 +++++++++++++++++++++++++++++
 src/backend/utils/misc/guc.c        | 57 +++++++++++++++++++++-
 src/include/utils/guc.h             |  1 +
 3 files changed, 133 insertions(+), 1 deletion(-)

diff --git a/src/backend/commands/functioncmds.c b/src/backend/commands/functioncmds.c
index 00a6d282cf..9347ed70e2 100644
--- a/src/backend/commands/functioncmds.c
+++ b/src/backend/commands/functioncmds.c
@@ -675,6 +675,78 @@ update_proconfig_value(ArrayType *a, List *set_items)
 	return a;
 }
 
+/* We only enforce constraints on IMMUTABLE functions (because they can be
+ * used in indexes and search_path hacking can corrupt indexes for everyone)
+ * or SECURITY DEFINER functions (for obvious reasons). For these functions we
+ * enforce that the proconfig GUC settings should include search_path and that
+ * that search path should follow the recommendations listed at
+ * https://www.postgresql.org/docs/current/sql-createfunction.html#SQL-CREATEFUNCTION-SECURITY
+ */ 
+
+/* XXX This logic should perhaps be moved to namespace.c XXX */
+#include "utils/varlena.h"
+
+static void
+verify_proconfig_value(ArrayType *a, char provolatile, bool prosecdef) {
+	char *proconfig_search_path = NULL;
+	List	   *namelist;
+	ListCell   *l;
+	bool has_dollar_user = false;
+	bool pg_catalog_first = true;
+	bool pg_temp_last = false;
+	bool is_first = true;
+	
+	if (provolatile != PROVOLATILE_IMMUTABLE && !prosecdef) {
+		return;
+	}
+
+	if (a != NULL) {
+		proconfig_search_path = GUCArrayLookup(a, "search_path");
+	}
+
+	if (!proconfig_search_path)
+	{
+		elog(WARNING, "IMMUTABLE and SECURITY DEFINER functions must have search_path set");
+		return;
+	}
+
+	/* Parse string into list of identifiers
+	 * GUCArrayLookup returns a pstrdup'd string so this is safe
+	 */
+	if (!SplitIdentifierString(proconfig_search_path, ',', &namelist))
+	{
+		/* syntax error in name list */
+		elog(ERROR, "invalid list syntax in search_path setting on function");
+	}
+
+	foreach(l, namelist)
+	{
+		char	   *curname = (char *) lfirst(l);
+
+		/* It's not last unless it's last... If it's missing pg_temp is
+		 * implicitly added as first */
+		pg_temp_last = false;
+
+		if (strcmp(curname, "$user") == 0)
+			has_dollar_user = true;
+		if (strcmp(curname, "pg_catalog") == 0 && !is_first)
+			pg_catalog_first = false;
+		if (strcmp(curname,"pg_temp") == 0)
+			pg_temp_last = true;
+		
+		is_first = false;
+	}
+
+	if (has_dollar_user)
+		elog(WARNING, "IMMUTABLE or SECURITY DEFINER functions should not have a search path including $user");
+
+	if (!pg_catalog_first)
+		elog(WARNING, "IMMUTABLE or SECURITY DEFINER functions should have pg_catalog first in search_path (or omit it implicitly placing it first)");
+
+	if (!pg_temp_last)
+		elog(WARNING, "IMMUTABLE or SECURITY DEFINER functions should explicitly place pg_temp last or else it's implicitly first");
+}
+
 static Oid
 interpret_func_support(DefElem *defel)
 {
@@ -1161,6 +1233,8 @@ CreateFunction(ParseState *pstate, CreateFunctionStmt *stmt)
 		}
 	}
 
+	verify_proconfig_value(proconfig, volatility, security);
+
 	/*
 	 * Convert remaining parameters of CREATE to form wanted by
 	 * ProcedureCreate.
@@ -1490,6 +1564,8 @@ AlterFunction(ParseState *pstate, AlterFunctionStmt *stmt)
 		/* update according to each SET or RESET item, left to right */
 		a = update_proconfig_value(a, set_items);
 
+		verify_proconfig_value(a, procForm->provolatile, procForm->prosecdef);
+
 		/* update the tuple */
 		memset(repl_repl, false, sizeof(repl_repl));
 		repl_repl[Anum_pg_proc_proconfig - 1] = true;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index a7cc49898b..55eed02851 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -11435,7 +11435,6 @@ ProcessGUCArray(ArrayType *array,
 	}
 }
 
-
 /*
  * Add an entry to an option array.  The array parameter may be NULL
  * to indicate the current table entry is NULL.
@@ -11514,6 +11513,62 @@ GUCArrayAdd(ArrayType *array, const char *name, const char *value)
 	return a;
 }
 
+/*
+ * Return palloced string with the value for a GUC in a GUCArray (i.e. array
+ * of foo=bar text datums) with name search_name. Returns NULL if the value of
+ * the GUC can not be parsed using ParseLongOption or if it's absent from the
+ * array.
+ */
+char *
+GUCArrayLookup(ArrayType *array, const char *search_name)
+{
+	int			i;
+
+	Assert(array != NULL);
+	Assert(ARR_ELEMTYPE(array) == TEXTOID);
+	Assert(ARR_NDIM(array) == 1);
+	Assert(ARR_LBOUND(array)[0] == 1);
+
+	for (i = 1; i <= ARR_DIMS(array)[0]; i++)
+	{
+		Datum		d;
+		bool		isnull;
+		char	   *s;
+		char	   *name;
+		char	   *value;
+		char	   *valuecopy = NULL;
+		bool        done = false;
+
+		d = array_ref(array, 1, &i,
+					  -1 /* varlenarray */ ,
+					  -1 /* TEXT's typlen */ ,
+					  false /* TEXT's typbyval */ ,
+					  TYPALIGN_INT /* TEXT's typalign */ ,
+					  &isnull);
+
+		if (isnull)
+			continue;
+
+		s = TextDatumGetCString(d);
+
+		/* ParseLongOption returns malloced storage */
+		ParseLongOption(s, &name, &value);
+		done = (strcmp(name, search_name) == 0);
+		
+		if (done && value) {
+			valuecopy = pstrdup(value);
+		}
+		if (name)
+			free(name);
+		if (value)
+			free(value);
+
+		if (done)
+			return valuecopy;
+	}
+	return NULL;
+}
+
 
 /*
  * Delete an entry from an option array.  The array parameter may be NULL
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index d5976ecbfb..24aa13f9e6 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -404,6 +404,7 @@ extern char *ExtractSetVariableArgs(VariableSetStmt *stmt);
 extern void ProcessGUCArray(ArrayType *array,
 							GucContext context, GucSource source, GucAction action);
 extern ArrayType *GUCArrayAdd(ArrayType *array, const char *name, const char *value);
+extern char * GUCArrayLookup(ArrayType *array, const char *search_name);
 extern ArrayType *GUCArrayDelete(ArrayType *array, const char *name);
 extern ArrayType *GUCArrayReset(ArrayType *array);
 
-- 
2.36.1

Reply via email to