Repository: incubator-hawq Updated Branches: refs/heads/master f2066eb4c -> d9b1848a4
HAWQ-755. Fix validateResourcePoolStatus assertion Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/d9b1848a Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/d9b1848a Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/d9b1848a Branch: refs/heads/master Commit: d9b1848a448671bbb93eda7f0cd46ba3d715fd2f Parents: f2066eb Author: Wen Lin <[email protected]> Authored: Fri Jun 17 10:58:44 2016 +0800 Committer: Wen Lin <[email protected]> Committed: Fri Jun 17 10:58:44 2016 +0800 ---------------------------------------------------------------------- .../resourcemanager/include/resourcepool.h | 4 +++ src/backend/resourcemanager/resourcepool.c | 29 ++++++++++---------- src/backend/resourcemanager/resqueuemanager.c | 6 ++-- 3 files changed, 22 insertions(+), 17 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/d9b1848a/src/backend/resourcemanager/include/resourcepool.h ---------------------------------------------------------------------- diff --git a/src/backend/resourcemanager/include/resourcepool.h b/src/backend/resourcemanager/include/resourcepool.h index e957bb0..4caf139 100644 --- a/src/backend/resourcemanager/include/resourcepool.h +++ b/src/backend/resourcemanager/include/resourcepool.h @@ -712,6 +712,10 @@ SimpStringPtr build_segment_status_description(SegStat segstat); #define VALIDATE_RATIO_BIAS 0.005 #define VALIDATE_RESOURCE_BIAS 0.0001 +#define EPSILON 1e-7 +#define IS_DOUBLE_ZERO(d) (fabs(d) < EPSILON) +#define IS_DOUBLE_EQ(x, y) ((fabs((x) - (y))) <= (EPSILON)) + void validateResourcePoolStatus(bool refquemgr); /* http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/d9b1848a/src/backend/resourcemanager/resourcepool.c ---------------------------------------------------------------------- diff --git a/src/backend/resourcemanager/resourcepool.c b/src/backend/resourcemanager/resourcepool.c index 7221066..dc48e70 100644 --- a/src/backend/resourcemanager/resourcepool.c +++ b/src/backend/resourcemanager/resourcepool.c @@ -3219,7 +3219,7 @@ void returnAllGRMResourceFromSegment(SegResource segres) reorderSegResourceCombinedWorkloadIndex(segres); Assert(segres->Allocated.MemoryMB == 0); - Assert(segres->Allocated.Core == 0.0); + Assert(IS_DOUBLE_ZERO(segres->Allocated.Core)); segres->GRMContainerCount = 0; elog(DEBUG3, "HAWQ RM: returnAllResourceForSegment: %u containers have been " @@ -3339,7 +3339,7 @@ int notifyToBeAcceptedGRMContainersToRMSEG(void) { GRMContainerSet ctns = (GRMContainerSet)(((PAIR)lfirst(cell))->Value); - if ( ctns->Allocated.MemoryMB == 0 && ctns->Allocated.Core == 0 ) + if ( ctns->Allocated.MemoryMB == 0 && IS_DOUBLE_ZERO(ctns->Allocated.Core)) { continue; } @@ -3390,7 +3390,7 @@ int notifyToBeKickedGRMContainersToRMSEG(void) { GRMContainerSet ctns = (GRMContainerSet)(((PAIR)lfirst(cell))->Value); - if (ctns->Allocated.Core == 0 || ctns->Allocated.MemoryMB == 0) + if (IS_DOUBLE_ZERO(ctns->Allocated.Core) || ctns->Allocated.MemoryMB == 0) { continue; } @@ -3972,7 +3972,7 @@ void dropAllToAcceptGRMContainersToKicked(void) { GRMContainerSet ctns = (GRMContainerSet)(((PAIR)lfirst(cell))->Value); - if ( ctns->Allocated.MemoryMB == 0 && ctns->Allocated.Core == 0 ) + if ( ctns->Allocated.MemoryMB == 0 && IS_DOUBLE_ZERO(ctns->Allocated.Core)) { continue; } @@ -4019,9 +4019,9 @@ void validateResourcePoolStatus(bool refquemgr) &availmem, &availcore); if ( segres->Allocated.MemoryMB != allocmem || - segres->Allocated.Core != alloccore || + !IS_DOUBLE_EQ(segres->Allocated.Core, alloccore) || segres->Available.MemoryMB != availmem || - segres->Available.Core != availcore ) + !IS_DOUBLE_EQ(segres->Available.Core, availcore) ) { elog(ERROR, "HAWQ RM Validation. Wrong resource counter. " "Host %s. " @@ -4043,11 +4043,11 @@ void validateResourcePoolStatus(bool refquemgr) /* Validation 2. The ratio should be correct. */ double r1 = alloccore == 0 ? 0 : allocmem/alloccore; double r2 = availcore == 0 ? 0 : availmem/availcore; - if ( (allocmem == 0 && alloccore != 0) || - (allocmem != 0 && alloccore == 0) || - (availmem == 0 && availcore != 0) || - (availmem != 0 && availcore == 0) || - (alloccore != 0 && availcore != 0 && + if ( (allocmem == 0 && !IS_DOUBLE_ZERO(alloccore)) || + (allocmem != 0 && IS_DOUBLE_ZERO(alloccore)) || + (availmem == 0 && !IS_DOUBLE_ZERO(availcore)) || + (availmem != 0 && IS_DOUBLE_ZERO(availcore)) || + (!IS_DOUBLE_ZERO(alloccore) && !IS_DOUBLE_ZERO(availcore) && 2 * fabs(r1-r2) / (r1 + r2) > VALIDATE_RATIO_BIAS) ) { elog(ERROR, "HAWQ RM Validation. Wrong resource counter ratio. " @@ -4135,7 +4135,7 @@ void validateResourcePoolStatus(bool refquemgr) Assert( PQUEMGR->RatioTrackers[0] != NULL ); if ( PQUEMGR->RatioTrackers[0]->TotalAllocated.MemoryMB != totalallocmem || - PQUEMGR->RatioTrackers[0]->TotalAllocated.Core != totalalloccore ) + !IS_DOUBLE_EQ(PQUEMGR->RatioTrackers[0]->TotalAllocated.Core,totalalloccore)) { elog(ERROR, "HAWQ RM Validation. Wrong total allocated resource. " "In resource pool allocated (%d MB, %lf CORE), " @@ -4147,7 +4147,8 @@ void validateResourcePoolStatus(bool refquemgr) } if ( totalavailmem > totalallocmem || - totalavailcore > totalalloccore * (1+VALIDATE_RESOURCE_BIAS) ) + (( totalavailcore > totalalloccore * (1+VALIDATE_RESOURCE_BIAS)) && + (!IS_DOUBLE_ZERO(totalavailcore) || !IS_DOUBLE_ZERO(totalalloccore)))) { elog(ERROR, "HAWQ RM Validation. Wrong total allocated resource. " "In resource pool available (%d MB, %lf CORE), " @@ -4473,7 +4474,7 @@ void getSegResResourceCountersByMemCoreCounters(SegResource resinfo, core += ((GRMContainer)lfirst(cell))->Core; } - if ( mem != ctns->Allocated.MemoryMB || core != ctns->Allocated.Core ) + if ( mem != ctns->Allocated.MemoryMB || !IS_DOUBLE_EQ(core,ctns->Allocated.Core) ) { elog(ERROR, "HAWQ RM Validation. Wrong container set counter. " "Host %s.", http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/d9b1848a/src/backend/resourcemanager/resqueuemanager.c ---------------------------------------------------------------------- diff --git a/src/backend/resourcemanager/resqueuemanager.c b/src/backend/resourcemanager/resqueuemanager.c index 2916df9..ca58507 100644 --- a/src/backend/resourcemanager/resqueuemanager.c +++ b/src/backend/resourcemanager/resqueuemanager.c @@ -2851,8 +2851,8 @@ void dispatchResourceToQueries(void) DynMemoryCoreRatioTrack mctrack = PQUEMGR->RatioTrackers[i]; /* Ignore the memory/core ratio 1) not in use. 2) no resource allocated. */ - if ( (mctrack->ClusterMemoryMaxMB == 0 || mctrack->ClusterVCoreMax == 0) || - (mctrack->TotalAllocated.MemoryMB == 0 && mctrack->TotalAllocated.Core == 0) ) + if ( (mctrack->ClusterMemoryMaxMB == 0 || IS_DOUBLE_ZERO(mctrack->ClusterVCoreMax)) || + (mctrack->TotalAllocated.MemoryMB == 0 && IS_DOUBLE_ZERO(mctrack->TotalAllocated.Core)) ) { elog(DEBUG3, "Resource manager skipped memory core ratio index %d, " "memory max limit %d MB, %lf CORE, " @@ -3012,7 +3012,7 @@ void dispatchResourceToQueries(void) track->TotalAllocated.Core, track->QueueInfo->Name); - double evalcore = track->TotalAllocated.Core == 0 ? + double evalcore = IS_DOUBLE_ZERO(track->TotalAllocated.Core) ? VALIDATE_RESOURCE_BIAS : track->TotalAllocated.Core * (1+VALIDATE_RESOURCE_BIAS); Assert(evalcore >= track->TotalUsed.Core);
