This is an automated email from the ASF dual-hosted git repository. yjhjstz pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 688bff0280f66b0a7b92c7fb83a7a2d4e5519408 Author: Soumyadeep Chakraborty <soumyadeep2...@gmail.com> AuthorDate: Mon Jun 26 14:05:13 2023 -0700 resqueue: Fix statement leak for holdable cursors Creating cursors "with hold" would clean up the locallock prematurely during transaction commit for the DECLARE CURSOR command: RemoveLocalLock lock.c:1406 LockReleaseAll lock.c:2313 ProcReleaseLocks proc.c:900 ResourceOwnerReleaseInternal resowner.c:320 ResourceOwnerRelease resowner.c:225 CommitTransaction xact.c:2818 This would cause an early exit in ResLockRelease(): LOG: Resource queue %d: no lock to release and then subsequently the assertion failure: FailedAssertion(""!(SHMQueueEmpty(&(MyProc->myProcLocks[i])))"", File: ""proc.c"", Line: 994 indicating a failure to clean up the resource queue locks, leading to a statement leak in the associated resource queue. This behavior was inadvertently introduced when we merged upstream commit 3cba899, for 6X_STABLE. The fix here is to ensure we don't accidentally remove a LOCALLOCK associated with a resource queue, in LockReleaseAll(). We also take the trouble to add more commentary about how we expect LOCALLOCK fields to be used in the context of resource queues. Co-authored-by: xuejing zhao <zxuej...@vmware.com> Co-authored-by: Zhenghua Lyu <kain...@gmail.com> --- src/backend/storage/lmgr/lock.c | 16 ++++++++++++-- src/backend/utils/resscheduler/resqueue.c | 9 +++++--- src/include/storage/lock.h | 7 +++++++ src/test/isolation2/expected/resource_queue.out | 28 +++++++++++++++++++++++++ src/test/isolation2/sql/resource_queue.sql | 15 +++++++++++++ 5 files changed, 70 insertions(+), 5 deletions(-) diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 719049a646..4b99181030 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -2517,6 +2517,8 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks) int partition; bool have_fast_path_lwlock = false; + Assert(lockmethodid != RESOURCE_LOCKMETHOD); + if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods)) elog(ERROR, "unrecognized lock method: %d", lockmethodid); lockMethodTable = LockMethods[lockmethodid]; @@ -2553,11 +2555,21 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks) * If the LOCALLOCK entry is unused, we must've run out of shared * memory while trying to set up this lock. Just forget the local * entry. + * + * GPDB: Add an exception for resource queue based locallocks. Neither + * do we maintain nLocks for them, nor do we use the resource owner + * mechanism for them. */ if (locallock->nLocks == 0) { - RemoveLocalLock(locallock); - continue; + if (locallock->lock && + locallock->lock->tag.locktag_type == LOCKTAG_RESOURCE_QUEUE) + Assert(locallock->numLockOwners == 0); + else + { + RemoveLocalLock(locallock); + continue; + } } /* Ignore items that are not of the lockmethod to be removed */ diff --git a/src/backend/utils/resscheduler/resqueue.c b/src/backend/utils/resscheduler/resqueue.c index 7ee6afbe6f..9bbfb5c386 100644 --- a/src/backend/utils/resscheduler/resqueue.c +++ b/src/backend/utils/resscheduler/resqueue.c @@ -179,13 +179,16 @@ ResLockAcquire(LOCKTAG *locktag, ResPortalIncrement *incrementSet) locallock->lock = NULL; locallock->proclock = NULL; locallock->hashcode = LockTagHashCode(&(localtag.lock)); - locallock->istemptable = false; + + /* must remain 0 for the entire lifecycle of the LOCALLOCK */ locallock->nLocks = 0; locallock->numLockOwners = 0; - locallock->maxLockOwners = 8; + + /* initialized but unused for the entire lifecycle of the LOCALLOCK */ + locallock->istemptable = false; locallock->holdsStrongLockCount = false; locallock->lockCleared = false; - locallock->lockOwners = NULL; + locallock->maxLockOwners = 8; locallock->lockOwners = (LOCALLOCKOWNER *) MemoryContextAlloc(TopMemoryContext, locallock->maxLockOwners * sizeof(LOCALLOCKOWNER)); } diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index 5f77b5f942..df684d7025 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -468,6 +468,13 @@ typedef struct LOCALLOCKOWNER int64 nLocks; /* # of times held by this owner */ } LOCALLOCKOWNER; +/* + * GPDB: For resource queue based LOCALLOCKs, we don't maintain nLocks or any + * of the owner related fields, after their initialization in ResLockAcquire(). + * We don't use the resource owner mechanism for resource queue based + * LOCALLOCKs. This is key to avoid inadvertently operating on these in upstream + * lock routines where such LOCALLOCKs aren't meant to be manipulated. + */ typedef struct LOCALLOCK { /* tag */ diff --git a/src/test/isolation2/expected/resource_queue.out b/src/test/isolation2/expected/resource_queue.out index df438a16f2..05c4d2c429 100644 --- a/src/test/isolation2/expected/resource_queue.out +++ b/src/test/isolation2/expected/resource_queue.out @@ -86,6 +86,34 @@ END 1 | 0 (1 row) +-- Introduce a holdable cursor. +4:SET role role_concurrency_test; +SET +4:DECLARE c_hold CURSOR WITH HOLD FOR SELECT 1; +DECLARE + +-- Sanity check: The holdable cursor should be accounted for in pg_locks. +0:SELECT granted, locktype, mode FROM pg_locks where locktype = 'resource queue' and pid != pg_backend_pid(); + granted | locktype | mode +---------+----------------+--------------- + t | resource queue | ExclusiveLock +(1 row) + +4q: ... <quitting> + +-- Sanity check: Ensure that all locks were released. +0:SELECT granted, locktype, mode FROM pg_locks where locktype = 'resource queue' and pid != pg_backend_pid(); + granted | locktype | mode +---------+----------+------ +(0 rows) + +-- Sanity check: Ensure that the resource queue is now empty. +0: SELECT rsqcountlimit, rsqcountvalue from pg_resqueue_status WHERE rsqname = 'rq_concurrency_test'; + rsqcountlimit | rsqcountvalue +---------------+--------------- + 1 | 0 +(1 row) + 0:DROP role role_concurrency_test; DROP 0:DROP RESOURCE QUEUE rq_concurrency_test; diff --git a/src/test/isolation2/sql/resource_queue.sql b/src/test/isolation2/sql/resource_queue.sql index 61bdabec08..1523ada116 100644 --- a/src/test/isolation2/sql/resource_queue.sql +++ b/src/test/isolation2/sql/resource_queue.sql @@ -40,5 +40,20 @@ -- Sanity check: Ensure that the resource queue is now empty. 0: SELECT rsqcountlimit, rsqcountvalue from pg_resqueue_status WHERE rsqname = 'rq_concurrency_test'; +-- Introduce a holdable cursor. +4:SET role role_concurrency_test; +4:DECLARE c_hold CURSOR WITH HOLD FOR SELECT 1; + +-- Sanity check: The holdable cursor should be accounted for in pg_locks. +0:SELECT granted, locktype, mode FROM pg_locks where locktype = 'resource queue' and pid != pg_backend_pid(); + +4q: + +-- Sanity check: Ensure that all locks were released. +0:SELECT granted, locktype, mode FROM pg_locks where locktype = 'resource queue' and pid != pg_backend_pid(); + +-- Sanity check: Ensure that the resource queue is now empty. +0: SELECT rsqcountlimit, rsqcountvalue from pg_resqueue_status WHERE rsqname = 'rq_concurrency_test'; + 0:DROP role role_concurrency_test; 0:DROP RESOURCE QUEUE rq_concurrency_test; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cloudberry.apache.org For additional commands, e-mail: commits-h...@cloudberry.apache.org