Le 02/12/2020 à 11:46, Drouvot, Bertrand a écrit :

I ended up making more changes in QualifiedNameGetCreationNamespace <https://doxygen.postgresql.org/namespace_8c.html#a2cde9c8897e89ae47a99c103c4f96d31>() to mimic the retry() logic that is in RangeVarGetAndCheckCreationNamespace().

The attached v2 patch, now protects the function to be orphaned in those 2 scenarios:

Scenario 1: first, session 1 begin create function, then session 2 drops the schema: drop schema is locked and would produce (once the create function finishes):

bdt=# 2020-12-02 10:12:46.364 UTC [15810] ERROR:  cannot drop schema tobeorph because other objects depend on it 2020-12-02 10:12:46.364 UTC [15810] DETAIL:  function tobeorph.bdttime() depends on schema tobeorph 2020-12-02 10:12:46.364 UTC [15810] HINT:  Use DROP ... CASCADE to drop the dependent objects too.
2020-12-02 10:12:46.364 UTC [15810] STATEMENT:  drop schema tobeorph;

Scenario 2: first, session 1 begin drop schema, then session 2 creates the function: create function is locked and would produce (once the drop schema finishes):

2020-12-02 10:14:29.468 UTC [15822] ERROR:  schema "tobeorph" does not exist

Thanks

Bertrand


Hi,

This is a review for the orphaned function bug fix https://www.postgresql.org/message-id/flat/5a9daaae-5538-b209-6279-e903c3ea2...@amazon.com


Contents & Purpose
==================

This patch fixes an historical race condition in PostgreSQL that leave a function orphan of a namespace. It happens when a function is created in a namespace inside a transaction not yet committed and that another session drop the namespace. The function become orphan and can not be used anymore.

postgres=# \df *.bdttime
                List of functions
  Schema |  Name   |      Result data type       | Argument data types | Type
 --------+---------+-----------------------------+---------------------+------
         | bdttime | timestamp without time zone |                     | func


Initial Run
===========

The patch applies cleanly to HEAD. The regression tests all pass successfully with the new behavior. Given the example of the case to reproduce, the second session now waits until the first session is committed and when it is done it thrown error:

    postgres=# drop schema tobeorph;
    ERROR:  cannot drop schema tobeorph because other objects depend on it
    DETAIL:  function tobeorph.bdttime() depends on schema tobeorph
    HINT:  Use DROP ... CASCADE to drop the dependent objects too.

the function is well defined in the catalog.


Comments
========

As Tom mention there is still pending DDL concurrency problems. For example if session 1 execute the following:

    CREATE TYPE public.foo as enum ('one', 'two');
    BEGIN;
    CREATE OR REPLACE FUNCTION tobeorph.bdttime(num foo) RETURNS TIMESTAMP AS $$
    DECLARE
       outTS TIMESTAMP;
    BEGIN
       perform pg_sleep(10);
       RETURN CURRENT_TIMESTAMP;
    END;
    $$ LANGUAGE plpgsql volatile;

if session 2 drop the data type before the session 1 is committed, the function will be declared with invalid parameter data type.

The same problem applies if the returned type or the procedural language is dropped. I have tried to fix that in ProcedureCreate() by using an AccessSharedLock for the data types and languages involved in the function declaration. With this patch the race condition with parameters types, return types and PL languages are fixed. Only data types and procedural languages with Oid greater than FirstBootstrapObjectId will be locked locked. But this is probably more complex than this fix so it is proposed as a separate patch (v1-003-orphaned_function_types_language.patch) to not interfere with the applying of Bertran's patch.


Conclusion
==========

I tag this patch (v1-002-orphaned_function.patch) as ready for committers review as it fixes the bug reported.


--
Gilles Darold
LzLabs GmbH
https://www.lzlabs.com/

diff --git a/src/backend/catalog/pg_proc.c b/src/backend/catalog/pg_proc.c
index 1dd9ecc063..9b305896a3 100644
--- a/src/backend/catalog/pg_proc.c
+++ b/src/backend/catalog/pg_proc.c
@@ -34,6 +34,7 @@
 #include "nodes/nodeFuncs.h"
 #include "parser/parse_coerce.h"
 #include "parser/parse_type.h"
+#include "storage/lmgr.h"
 #include "tcop/pquery.h"
 #include "tcop/tcopprot.h"
 #include "utils/acl.h"
@@ -55,7 +56,7 @@ static int	match_prosrc_to_query(const char *prosrc, const char *queryText,
 								  int cursorpos);
 static bool match_prosrc_to_literal(const char *prosrc, const char *literal,
 									int cursorpos, int *newcursorpos);
-
+static ObjectAddress ObjectAddressSetWithShareLock(Oid classId, Oid objectId);
 
 /* ----------------------------------------------------------------
  *		ProcedureCreate
@@ -115,6 +116,7 @@ ProcedureCreate(const char *procedureName,
 	int			i;
 	Oid			trfid;
 	ObjectAddresses *addrs;
+	List *types_locked = NIL;
 
 	/*
 	 * sanity checks
@@ -600,11 +602,28 @@ ProcedureCreate(const char *procedureName,
 	add_exact_object_address(&referenced, addrs);
 
 	/* dependency on implementation language */
-	ObjectAddressSet(referenced, LanguageRelationId, languageObjectId);
+	if (languageObjectId > FirstBootstrapObjectId)
+		/*
+		 * at the same time apply an AccessSharedLock as the language
+		 * could be dropped before our transaction commits
+		 */
+		referenced = ObjectAddressSetWithShareLock(LanguageRelationId, languageObjectId);
+	else
+		ObjectAddressSet(referenced, LanguageRelationId, languageObjectId);
 	add_exact_object_address(&referenced, addrs);
 
 	/* dependency on return type */
-	ObjectAddressSet(referenced, TypeRelationId, returnType);
+	if (returnType > FirstBootstrapObjectId)
+	{
+		/*
+		 * at the same time apply an AccessSharedLock as the returned
+		 * data type could be dropped before our transaction commits
+		 */
+		referenced = ObjectAddressSetWithShareLock(TypeRelationId, returnType);
+		types_locked = lappend(types_locked, (Oid *) &returnType);
+	}
+	else
+		ObjectAddressSet(referenced, TypeRelationId, returnType);
 	add_exact_object_address(&referenced, addrs);
 
 	/* dependency on transform used by return type, if any */
@@ -617,7 +636,30 @@ ProcedureCreate(const char *procedureName,
 	/* dependency on parameter types */
 	for (i = 0; i < allParamCount; i++)
 	{
-		ObjectAddressSet(referenced, TypeRelationId, allParams[i]);
+		ListCell *lc;
+		bool nolocked = true;
+		/* do not lock twice a parameter type or if it below FirstBootstrapObjectId */
+		foreach(lc, types_locked)
+		{
+			Oid *savedId = (Oid *) lfirst(lc);
+
+			if (*savedId < FirstBootstrapObjectId || *savedId == allParams[i])
+			{
+				nolocked = false;
+				break;
+			}
+		}	
+		if (nolocked)
+		{
+			/*
+			 * at the same time apply an AccessSharedLock as the parameter
+			 * data type could be dropped before our transaction commits
+			 */
+			referenced = ObjectAddressSetWithShareLock(TypeRelationId, allParams[i]);
+			types_locked = lappend(types_locked, (Oid *) &allParams[i]);
+		}
+		else
+			ObjectAddressSet(referenced, TypeRelationId, allParams[i]);
 		add_exact_object_address(&referenced, addrs);
 
 		/* dependency on transform used by parameter type, if any */
@@ -626,7 +668,11 @@ ProcedureCreate(const char *procedureName,
 			ObjectAddressSet(referenced, TransformRelationId, trfid);
 			add_exact_object_address(&referenced, addrs);
 		}
+		/* lock parameter type to avoid concurrent drop */
+		if (allParams[i] != returnType)
+			LockDatabaseObject(TypeRelationId, allParams[i], 0, AccessShareLock);
 	}
+	list_free(types_locked);
 
 	/* dependency on support function, if any */
 	if (OidIsValid(prosupport))
@@ -1150,3 +1196,36 @@ oid_array_to_list(Datum datum)
 		result = lappend_oid(result, values[i]);
 	return result;
 }
+
+/*
+ * Return an ObjectAddress for the object after we take an AccessShareLock
+ * on the object to guard against concurrent DDL operations. Without this,
+ * the object could be dropped before our transaction commits leaving behind
+ * relations with object id pointing to a no-longer-existent object.
+ *
+ * Used in ProcedureCreate() to protect from concuttent drop of type used
+ * in function parameters or returned type, as well as language.
+ */
+static ObjectAddress
+ObjectAddressSetWithShareLock(Oid classId, Oid objectId)
+{
+	ObjectAddress referenced;
+
+	switch (classId)
+	{
+		case LanguageRelationId:
+		case TypeRelationId:
+			/* lock object */
+			LockDatabaseObject(classId, objectId, 0, AccessShareLock);
+			break;
+		default:
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("this class of object %d can not be locked here.", classId)));
+			break;
+	}
+
+	ObjectAddressSet(referenced, classId, objectId);
+
+	return referenced;
+}

Reply via email to