Repository: incubator-hawq Updated Branches: refs/heads/master 8f753f32d -> f7e6fc5d0
HAWQ-344. When resource queue capacity is shrunk, deadlock detection maybe not triggered Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/f7e6fc5d Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/f7e6fc5d Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/f7e6fc5d Branch: refs/heads/master Commit: f7e6fc5d06da0b74daadb8fd10ded4f69645721f Parents: 8f753f3 Author: YI JIN <[email protected]> Authored: Wed Jan 20 17:27:17 2016 +1100 Committer: YI JIN <[email protected]> Committed: Wed Jan 20 17:27:17 2016 +1100 ---------------------------------------------------------------------- .../resourcemanager/include/resqueuemanager.h | 4 +- src/backend/resourcemanager/resqueuedeadlock.c | 30 ++++++++-- src/backend/resourcemanager/resqueuemanager.c | 62 +++++++++++++------- 3 files changed, 67 insertions(+), 29 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/f7e6fc5d/src/backend/resourcemanager/include/resqueuemanager.h ---------------------------------------------------------------------- diff --git a/src/backend/resourcemanager/include/resqueuemanager.h b/src/backend/resourcemanager/include/resqueuemanager.h index 86fb69a..0b38520 100644 --- a/src/backend/resourcemanager/include/resqueuemanager.h +++ b/src/backend/resourcemanager/include/resqueuemanager.h @@ -528,8 +528,8 @@ void applyResourceQueueTrackChangesFromShadows(List *quehavingshadow); void cancelQueryRequestToBreakDeadLockInShadow(DynResourceQueueTrack shadowtrack, DQueueNode iter, - uint32_t expmemorymb, - uint32_t availmemorymb); + int32_t expmemorymb, + int32_t availmemorymb); /* Dump resource queue status to file system. */ void dumpResourceQueueStatus(const char *filename); http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/f7e6fc5d/src/backend/resourcemanager/resqueuedeadlock.c ---------------------------------------------------------------------- diff --git a/src/backend/resourcemanager/resqueuedeadlock.c b/src/backend/resourcemanager/resqueuedeadlock.c index 90404f2..3922b4b 100644 --- a/src/backend/resourcemanager/resqueuedeadlock.c +++ b/src/backend/resourcemanager/resqueuedeadlock.c @@ -59,6 +59,13 @@ int addSessionInUseResource(ResqueueDeadLockDetector detector, addResourceBundleData(&(sessiontrack->InUseTotal), memorymb, core); addResourceBundleData(&(detector->InUseTotal), memorymb, core); + elog(DEBUG3, "Deadlock detector adds in-use %d MB from session "INT64_FORMAT", " + "has %d MB in use %d MB locked.", + memorymb, + sessionid, + detector->InUseTotal.MemoryMB, + detector->LockedTotal.MemoryMB); + return FUNC_RETURN_OK; } @@ -97,6 +104,13 @@ int minusSessionInUseResource(ResqueueDeadLockDetector detector, removeHASHTABLENode(&(detector->Sessions), &key); } + elog(DEBUG3, "Deadlock detector reduces in-use %d MB from session "INT64_FORMAT", " + "has %d MB in use %d MB locked.", + memorymb, + sessionid, + detector->InUseTotal.MemoryMB, + detector->LockedTotal.MemoryMB); + return FUNC_RETURN_OK; } @@ -132,9 +146,11 @@ void createAndLockSessionResource(ResqueueDeadLockDetector detector, addResourceBundleDataByBundle(&(detector->LockedTotal), &(curstrack->InUseTotal)); - elog(RMLOG, "Locked session "INT64_FORMAT" Locked %d MB", - sessionid, - detector->LockedTotal.MemoryMB); + elog(DEBUG3, "Deadlock detector locked session "INT64_FORMAT + ", has %d MB in use %d MB locked", + sessionid, + detector->InUseTotal.MemoryMB, + detector->LockedTotal.MemoryMB); } void unlockSessionResource(ResqueueDeadLockDetector detector, @@ -156,9 +172,11 @@ void unlockSessionResource(ResqueueDeadLockDetector detector, &(sessiontrack->InUseTotal)); sessiontrack->Locked = false; - elog(DEBUG3, "Unlocked session "INT64_FORMAT " Locked %d MB", - sessionid, - detector->LockedTotal.MemoryMB); + elog(DEBUG3, "Deadlock detector unlocked session "INT64_FORMAT + ", has %d MB in use %d MB locked", + sessionid, + detector->InUseTotal.MemoryMB, + detector->LockedTotal.MemoryMB); } Assert(detector->LockedTotal.Core >= 0.0 && http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/f7e6fc5d/src/backend/resourcemanager/resqueuemanager.c ---------------------------------------------------------------------- diff --git a/src/backend/resourcemanager/resqueuemanager.c b/src/backend/resourcemanager/resqueuemanager.c index f358b3f..9ccf619 100644 --- a/src/backend/resourcemanager/resqueuemanager.c +++ b/src/backend/resourcemanager/resqueuemanager.c @@ -2968,6 +2968,9 @@ void dispatchResourceToQueries(void) { bool hasresourceallocated = false; bool hasrequest = false; + + elog(DEBUG3, "Resource manager tries to dispatch resource to queries."); + /* *-------------------------------------------------------------------------- * STEP 1. Re-balance resource among different mem/core ratio trackers. After @@ -2991,6 +2994,14 @@ void dispatchResourceToQueries(void) if ( (mctrack->ClusterMemoryMaxMB == 0 || mctrack->ClusterVCoreMax == 0) || (mctrack->TotalAllocated.MemoryMB == 0 && mctrack->TotalAllocated.Core == 0) ) { + elog(DEBUG3, "Resource manager skipped memory core ratio index %d, " + "memory max limit %d MB, %lf CORE, " + "total allocated %d MB, %lf CORE", + i, + mctrack->ClusterMemoryMaxMB, + mctrack->ClusterVCoreMax, + mctrack->TotalAllocated.MemoryMB, + mctrack->TotalAllocated.Core); continue; } @@ -3014,6 +3025,8 @@ void dispatchResourceToQueries(void) /* Ignore the queues not in use. */ if ( !track->isBusy ) { + elog(DEBUG3, "Resource manager skips idle resource queue %s", + track->QueueInfo->Name); continue; } @@ -3043,6 +3056,10 @@ void dispatchResourceToQueries(void) expweight, track->TotalUsed.MemoryMB, track->TotalUsed.Core); + + /* We still need to handle the resource queue dead lock here. */ + detectAndDealWithDeadLock(track); + } else { @@ -3834,7 +3851,7 @@ void refreshResourceQueuePercentageCapacityInternal(uint32_t clustermemmb, } else { - track->ClusterVCoreMax = track->ClusterMemoryMaxMB / track->MemCoreRatio; + track->ClusterVCoreMax = 1.0 * track->ClusterMemoryMaxMB / track->MemCoreRatio; } /* Decide cluster segment resource quota. */ @@ -3949,6 +3966,9 @@ void dispatchResourceToQueriesInOneQueue(DynResourceQueueTrack track) int policy = 0; Assert( track != NULL ); + elog(DEBUG3, "Resource manager dispatch resource in queue %s", + track->QueueInfo->Name); + if ( track->QueryResRequests.NodeCount > 0 ) { ConnectionTrack topwaiter = getDQueueHeadNodeData(&(track->QueryResRequests)); @@ -4168,7 +4188,6 @@ int dispatchResourceToQueries_EVEN(DynResourceQueueTrack track) if ( counter == 0 ) { - /* TODO:: Maybe too conservative. */ detectAndDealWithDeadLock(track); return FUNC_RETURN_OK; /* Expect requests are processed in next loop. */ } @@ -5186,6 +5205,10 @@ int rebuildResourceQueueTrackDynamicStatusInShadow(DynResourceQueueTrack quetra copyResourceDeadLockDetectorWithoutLocking(&(quetrack->DLDetector), &(shadowtrack->DLDetector)); + elog(DEBUG3, "Deadlock detector in shadow has %d MB in use %d MB locked.", + shadowtrack->DLDetector.InUseTotal.MemoryMB, + quetrack->DLDetector.LockedTotal.MemoryMB); + /* Go through all queued query resource requests, recalculate the request. */ DQUEUE_LOOP_BEGIN(&(quetrack->QueryResRequests), iter, ConnectionTrack, conn) @@ -5263,6 +5286,11 @@ int rebuildResourceQueueTrackDynamicStatusInShadow(DynResourceQueueTrack quetra } DQUEUE_LOOP_END + elog(DEBUG3, "Deadlock detector in shadow has %d MB in use %d MB locked " + "after rebuilding.", + shadowtrack->DLDetector.InUseTotal.MemoryMB, + shadowtrack->DLDetector.LockedTotal.MemoryMB); + elog(LOG, "Finished rebuilding resource queue %s dynamic status in its shadow.", quetrack->QueueInfo->Name); @@ -5276,12 +5304,12 @@ int detectAndDealWithDeadLockInShadow(DynResourceQueueTrack quetrack, Assert(quetrack->ShadowQueueTrack != NULL); DynResourceQueueTrack shadowtrack = quetrack->ShadowQueueTrack; - elog(DEBUG3, "Deadlock detector has %d MB in use, %d MB locked", + elog(DEBUG3, "Deadlock detector in shadow has %d MB in use, %d MB locked", shadowtrack->DLDetector.InUseTotal.MemoryMB, shadowtrack->DLDetector.LockedTotal.MemoryMB); /* Assume more available resource unlocked queued requests. */ - uint32_t pavailmemorymb = 0; + int32_t pavailmemorymb = 0; /* Go through all queued query resource requests, recalculate the request. */ DQUEUE_LOOP_BEGIN(&(shadowtrack->QueryResRequests), iter, ConnectionTrack, conn) @@ -5293,10 +5321,10 @@ int detectAndDealWithDeadLockInShadow(DynResourceQueueTrack quetrack, } /* Check if this connection has deadlock issue. */ - uint32_t expmemorymb = conn->SegMemoryMB * conn->SegNumMin; - uint32_t availmemorymb = shadowtrack->ClusterMemoryMaxMB - - shadowtrack->DLDetector.LockedTotal.MemoryMB + - pavailmemorymb; + int32_t expmemorymb = conn->SegMemoryMB * conn->SegNumMin; + int32_t availmemorymb = shadowtrack->ClusterMemoryMaxMB - + shadowtrack->DLDetector.LockedTotal.MemoryMB + + pavailmemorymb; /*---------------------------------------------------------------------- * If the queue already uses more resource than its maximum capability, @@ -5308,6 +5336,7 @@ int detectAndDealWithDeadLockInShadow(DynResourceQueueTrack quetrack, shadowtrack->ClusterMemoryMaxMB : availmemorymb; + /* NOTE: availmemorymb maybe less than 0. */ if ( expmemorymb > availmemorymb ) { /* We encounter a deadlock issue. */ @@ -5347,8 +5376,8 @@ int detectAndDealWithDeadLockInShadow(DynResourceQueueTrack quetrack, void cancelQueryRequestToBreakDeadLockInShadow(DynResourceQueueTrack shadowtrack, DQueueNode iter, - uint32_t expmemorymb, - uint32_t availmemorymb) + int32_t expmemorymb, + int32_t availmemorymb) { static char errorbuf[ERRORMESSAGE_SIZE]; DQueueNode tailiter = getDQueueContainerTail(&(shadowtrack->QueryResRequests)); @@ -5441,8 +5470,8 @@ void applyResourceQueueTrackChangesFromShadows(List *quehavingshadow) /* The deadlock detector should use the new one completely. */ resetResourceDeadLockDetector(&(quetrack->DLDetector)); - copyResourceDeadLockDetectorWithoutLocking(&(quetrack->DLDetector), - &(shadowtrack->DLDetector)); + copyResourceDeadLockDetectorWithoutLocking(&(shadowtrack->DLDetector), + &(quetrack->DLDetector)); resetResourceBundleDataByBundle(&(quetrack->TotalUsed), &(shadowtrack->TotalUsed)); @@ -5498,15 +5527,6 @@ void applyResourceQueueTrackChangesFromShadows(List *quehavingshadow) MEMORY_CONTEXT_SWITCH_TO(PCONTEXT) PCONTRACK->ConnToSend = lappend(PCONTRACK->ConnToSend, conn); MEMORY_CONTEXT_SWITCH_BACK - - /* Recycle connection track instance. */ - quetrack->CurConnCounter--; - if ( quetrack->CurConnCounter == 0 ) - { - quetrack->isBusy = false; - refreshMemoryCoreRatioLimits(); - refreshMemoryCoreRatioWaterMark(); - } } else {
