This is an automated email from the ASF dual-hosted git repository.

yjhjstz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 688bff0280f66b0a7b92c7fb83a7a2d4e5519408
Author: Soumyadeep Chakraborty <soumyadeep2...@gmail.com>
AuthorDate: Mon Jun 26 14:05:13 2023 -0700

    resqueue: Fix statement leak for holdable cursors
    
    Creating cursors "with hold" would clean up the locallock prematurely
    during transaction commit for the DECLARE CURSOR command:
    
    RemoveLocalLock lock.c:1406
    LockReleaseAll lock.c:2313
    ProcReleaseLocks proc.c:900
    ResourceOwnerReleaseInternal resowner.c:320
    ResourceOwnerRelease resowner.c:225
    CommitTransaction xact.c:2818
    
    This would cause an early exit in ResLockRelease():
    LOG: Resource queue %d: no lock to release
    
    and then subsequently the assertion failure:
    FailedAssertion(""!(SHMQueueEmpty(&(MyProc->myProcLocks[i])))"", File: 
""proc.c"", Line: 994
    
    indicating a failure to clean up the resource queue locks, leading to a
    statement leak in the associated resource queue.
    
    This behavior was inadvertently introduced when we merged upstream
    commit 3cba899, for 6X_STABLE.
    
    The fix here is to ensure we don't accidentally remove a LOCALLOCK
    associated with a resource queue, in LockReleaseAll().
    
    We also take the trouble to add more commentary about how we expect
    LOCALLOCK fields to be used in the context of resource queues.
    
    Co-authored-by: xuejing zhao <zxuej...@vmware.com>
    Co-authored-by: Zhenghua Lyu <kain...@gmail.com>
---
 src/backend/storage/lmgr/lock.c                 | 16 ++++++++++++--
 src/backend/utils/resscheduler/resqueue.c       |  9 +++++---
 src/include/storage/lock.h                      |  7 +++++++
 src/test/isolation2/expected/resource_queue.out | 28 +++++++++++++++++++++++++
 src/test/isolation2/sql/resource_queue.sql      | 15 +++++++++++++
 5 files changed, 70 insertions(+), 5 deletions(-)

diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 719049a646..4b99181030 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -2517,6 +2517,8 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
        int                     partition;
        bool            have_fast_path_lwlock = false;
 
+       Assert(lockmethodid != RESOURCE_LOCKMETHOD);
+
        if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
                elog(ERROR, "unrecognized lock method: %d", lockmethodid);
        lockMethodTable = LockMethods[lockmethodid];
@@ -2553,11 +2555,21 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
                 * If the LOCALLOCK entry is unused, we must've run out of 
shared
                 * memory while trying to set up this lock.  Just forget the 
local
                 * entry.
+                *
+                * GPDB: Add an exception for resource queue based locallocks. 
Neither
+                * do we maintain nLocks for them, nor do we use the resource 
owner
+                * mechanism for them.
                 */
                if (locallock->nLocks == 0)
                {
-                       RemoveLocalLock(locallock);
-                       continue;
+                       if (locallock->lock &&
+                               locallock->lock->tag.locktag_type == 
LOCKTAG_RESOURCE_QUEUE)
+                               Assert(locallock->numLockOwners == 0);
+                       else
+                       {
+                               RemoveLocalLock(locallock);
+                               continue;
+                       }
                }
 
                /* Ignore items that are not of the lockmethod to be removed */
diff --git a/src/backend/utils/resscheduler/resqueue.c 
b/src/backend/utils/resscheduler/resqueue.c
index 7ee6afbe6f..9bbfb5c386 100644
--- a/src/backend/utils/resscheduler/resqueue.c
+++ b/src/backend/utils/resscheduler/resqueue.c
@@ -179,13 +179,16 @@ ResLockAcquire(LOCKTAG *locktag, ResPortalIncrement 
*incrementSet)
                locallock->lock = NULL;
                locallock->proclock = NULL;
                locallock->hashcode = LockTagHashCode(&(localtag.lock));
-               locallock->istemptable = false;
+
+               /* must remain 0 for the entire lifecycle of the LOCALLOCK */
                locallock->nLocks = 0;
                locallock->numLockOwners = 0;
-               locallock->maxLockOwners = 8;
+
+               /* initialized but unused for the entire lifecycle of the 
LOCALLOCK */
+               locallock->istemptable = false;
                locallock->holdsStrongLockCount = false;
                locallock->lockCleared = false;
-               locallock->lockOwners = NULL;
+               locallock->maxLockOwners = 8;
                locallock->lockOwners = (LOCALLOCKOWNER *)
                        MemoryContextAlloc(TopMemoryContext, 
locallock->maxLockOwners * sizeof(LOCALLOCKOWNER));
        }
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h
index 5f77b5f942..df684d7025 100644
--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
@@ -468,6 +468,13 @@ typedef struct LOCALLOCKOWNER
        int64           nLocks;                 /* # of times held by this 
owner */
 } LOCALLOCKOWNER;
 
+/*
+ * GPDB: For resource queue based LOCALLOCKs, we don't maintain nLocks or any
+ * of the owner related fields, after their initialization in ResLockAcquire().
+ * We don't use the resource owner mechanism for resource queue based
+ * LOCALLOCKs. This is key to avoid inadvertently operating on these in 
upstream
+ * lock routines where such LOCALLOCKs aren't meant to be manipulated.
+ */
 typedef struct LOCALLOCK
 {
        /* tag */
diff --git a/src/test/isolation2/expected/resource_queue.out 
b/src/test/isolation2/expected/resource_queue.out
index df438a16f2..05c4d2c429 100644
--- a/src/test/isolation2/expected/resource_queue.out
+++ b/src/test/isolation2/expected/resource_queue.out
@@ -86,6 +86,34 @@ END
  1             | 0             
 (1 row)
 
+-- Introduce a holdable cursor.
+4:SET role role_concurrency_test;
+SET
+4:DECLARE c_hold CURSOR WITH HOLD FOR SELECT 1;
+DECLARE
+
+-- Sanity check: The holdable cursor should be accounted for in pg_locks.
+0:SELECT granted, locktype, mode FROM pg_locks where locktype = 'resource 
queue' and pid != pg_backend_pid();
+ granted | locktype       | mode          
+---------+----------------+---------------
+ t       | resource queue | ExclusiveLock 
+(1 row)
+
+4q: ... <quitting>
+
+-- Sanity check: Ensure that all locks were released.
+0:SELECT granted, locktype, mode FROM pg_locks where locktype = 'resource 
queue' and pid != pg_backend_pid();
+ granted | locktype | mode 
+---------+----------+------
+(0 rows)
+
+-- Sanity check: Ensure that the resource queue is now empty.
+0: SELECT rsqcountlimit, rsqcountvalue from pg_resqueue_status WHERE rsqname = 
'rq_concurrency_test';
+ rsqcountlimit | rsqcountvalue 
+---------------+---------------
+ 1             | 0             
+(1 row)
+
 0:DROP role role_concurrency_test;
 DROP
 0:DROP RESOURCE QUEUE rq_concurrency_test;
diff --git a/src/test/isolation2/sql/resource_queue.sql 
b/src/test/isolation2/sql/resource_queue.sql
index 61bdabec08..1523ada116 100644
--- a/src/test/isolation2/sql/resource_queue.sql
+++ b/src/test/isolation2/sql/resource_queue.sql
@@ -40,5 +40,20 @@
 -- Sanity check: Ensure that the resource queue is now empty.
 0: SELECT rsqcountlimit, rsqcountvalue from pg_resqueue_status WHERE rsqname = 
'rq_concurrency_test';
 
+-- Introduce a holdable cursor.
+4:SET role role_concurrency_test;
+4:DECLARE c_hold CURSOR WITH HOLD FOR SELECT 1;
+
+-- Sanity check: The holdable cursor should be accounted for in pg_locks.
+0:SELECT granted, locktype, mode FROM pg_locks where locktype = 'resource 
queue' and pid != pg_backend_pid();
+
+4q:
+
+-- Sanity check: Ensure that all locks were released.
+0:SELECT granted, locktype, mode FROM pg_locks where locktype = 'resource 
queue' and pid != pg_backend_pid();
+
+-- Sanity check: Ensure that the resource queue is now empty.
+0: SELECT rsqcountlimit, rsqcountvalue from pg_resqueue_status WHERE rsqname = 
'rq_concurrency_test';
+
 0:DROP role role_concurrency_test;
 0:DROP RESOURCE QUEUE rq_concurrency_test;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@cloudberry.apache.org
For additional commands, e-mail: commits-h...@cloudberry.apache.org

Reply via email to