[GitHub] [geode] BenjaminPerryRoss commented on a change in pull request #5391: GEODE-7846: Adding Stats for Partitioned Region Clear
BenjaminPerryRoss commented on a change in pull request #5391: URL: https://github.com/apache/geode/pull/5391#discussion_r464609432 ## File path: geode-core/src/main/java/org/apache/geode/internal/cache/CachePerfStats.java ## @@ -274,7 +278,14 @@ "Current number of regions configured for reliablity that are missing required roles with Limited access"; final String reliableRegionsMissingNoAccessDesc = "Current number of regions configured for reliablity that are missing required roles with No access"; -final String clearsDesc = "The total number of times a clear has been done on this cache."; +final String regionClearsDesc = +"The total number of times a clear has been done on this cache."; +final String bucketClearsDesc = +"The total number of times a clear has been done on this region and it's bucket regions"; +final String partitionedRegionClearLocalDurationDesc = +"The time in nanoseconds partitioned region clear has been running for the region on this member"; Review comment: I actually switched this from the original metric I used to nanoseconds because other stats in the file seem to be using them and I wanted to be consistent (such as EventQueueThrottleTime or any of the tx lifetime stats in that class). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [geode] BenjaminPerryRoss commented on a change in pull request #5391: GEODE-7846: Adding Stats for Partitioned Region Clear
BenjaminPerryRoss commented on a change in pull request #5391: URL: https://github.com/apache/geode/pull/5391#discussion_r464609432 ## File path: geode-core/src/main/java/org/apache/geode/internal/cache/CachePerfStats.java ## @@ -274,7 +278,14 @@ "Current number of regions configured for reliablity that are missing required roles with Limited access"; final String reliableRegionsMissingNoAccessDesc = "Current number of regions configured for reliablity that are missing required roles with No access"; -final String clearsDesc = "The total number of times a clear has been done on this cache."; +final String regionClearsDesc = +"The total number of times a clear has been done on this cache."; +final String bucketClearsDesc = +"The total number of times a clear has been done on this region and it's bucket regions"; +final String partitionedRegionClearLocalDurationDesc = +"The time in nanoseconds partitioned region clear has been running for the region on this member"; Review comment: I actually switch this from the original metric I used to nanoseconds because other stats in the file seem to be using them and I wanted to be consistent (such as EventQueueThrottleTime or any of the tx lifetime stats in that class). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [geode] BenjaminPerryRoss commented on a change in pull request #5391: GEODE-7846: Adding Stats for Partitioned Region Clear
BenjaminPerryRoss commented on a change in pull request #5391: URL: https://github.com/apache/geode/pull/5391#discussion_r462642093 ## File path: geode-core/src/distributedTest/java/org/apache/geode/internal/cache/PartitionedRegionClearDUnitTest.java ## @@ -330,6 +337,101 @@ public void normalClearFromAccessorWithoutWriterButWithWriterOnDataStore() { .isEqualTo(0); } + @Test + public void normalClearFromDataStoreUpdatesStats() { +configureServers(false, true); +client1.invoke(this::initClientCache); +client2.invoke(this::initClientCache); + +// Verify no clears have been recorded in stats +dataStore1.invoke(() -> { + PartitionedRegion region = (PartitionedRegion) getRegion(false); + + for (BucketRegion bucket : region.getDataStore().getAllLocalBucketRegions()) { +long clearCount = bucket.getCachePerfStats().getRegionClearCount(); +assertThat(clearCount).isEqualTo(0); + } +}); + +accessor.invoke(() -> feed(false)); +verifyServerRegionSize(NUM_ENTRIES); +dataStore1.invoke(() -> getRegion(false).clear()); +verifyServerRegionSize(0); + +// Verify the stats were properly updated for the bucket regions +dataStore1.invoke(() -> { + PartitionedRegion region = (PartitionedRegion) getRegion(false); + long clearCount = 0L; Review comment: Done. ## File path: geode-core/src/distributedTest/java/org/apache/geode/internal/cache/PartitionedRegionClearDUnitTest.java ## @@ -330,6 +337,101 @@ public void normalClearFromAccessorWithoutWriterButWithWriterOnDataStore() { .isEqualTo(0); } + @Test + public void normalClearFromDataStoreUpdatesStats() { +configureServers(false, true); +client1.invoke(this::initClientCache); +client2.invoke(this::initClientCache); + +// Verify no clears have been recorded in stats +dataStore1.invoke(() -> { + PartitionedRegion region = (PartitionedRegion) getRegion(false); + + for (BucketRegion bucket : region.getDataStore().getAllLocalBucketRegions()) { +long clearCount = bucket.getCachePerfStats().getRegionClearCount(); +assertThat(clearCount).isEqualTo(0); + } +}); + +accessor.invoke(() -> feed(false)); +verifyServerRegionSize(NUM_ENTRIES); +dataStore1.invoke(() -> getRegion(false).clear()); +verifyServerRegionSize(0); + +// Verify the stats were properly updated for the bucket regions +dataStore1.invoke(() -> { + PartitionedRegion region = (PartitionedRegion) getRegion(false); + long clearCount = 0L; + + for (BucketRegion bucket : region.getDataStore().getAllLocalBucketRegions()) { +if (clearCount == 0) { + clearCount = bucket.getCachePerfStats().getBucketClearCount(); +} + assertThat(bucket.getCachePerfStats().getBucketClearCount()).isEqualTo(clearCount) +.isNotEqualTo(0); + } + + assertThat(region.getCachePerfStats().getRegionClearCount()).isEqualTo(1); + assertThat(region.getCachePerfStats().getPartitionedRegionClearDuration()).isGreaterThan(0); +}); + +dataStore2.invoke(() -> { + PartitionedRegion region = (PartitionedRegion) getRegion(false); + long clearCount = 0L; + + for (BucketRegion bucket : region.getDataStore().getAllLocalBucketRegions()) { +if (clearCount == 0) { + clearCount = bucket.getCachePerfStats().getBucketClearCount(); +} + assertThat(bucket.getCachePerfStats().getBucketClearCount()).isEqualTo(clearCount) +.isNotEqualTo(0); + } + + assertThat(region.getCachePerfStats().getRegionClearCount()).isEqualTo(1); +}); + +dataStore3.invoke(() -> { + PartitionedRegion region = (PartitionedRegion) getRegion(false); + long clearCount = 0L; + + for (BucketRegion bucket : region.getDataStore().getAllLocalBucketRegions()) { +if (clearCount == 0) { + clearCount = bucket.getCachePerfStats().getBucketClearCount(); +} + assertThat(bucket.getCachePerfStats().getBucketClearCount()).isEqualTo(clearCount) +.isNotEqualTo(0); + } + + assertThat(region.getCachePerfStats().getRegionClearCount()).isEqualTo(1); +}); + + +// The accessor shouldn't increment the region clear count +accessor.invoke(() -> { + PartitionedRegion region = (PartitionedRegion) getRegion(false); + assertThat(region.getCachePerfStats().getRegionClearCount()).isEqualTo(0); +}); + +// do the region destroy to compare that the same callbacks will be triggered +dataStore1.invoke(() -> { + Region region = getRegion(false); + region.destroyRegion(); +}); + + assertThat(dataStore1.invoke(getWriterDestroys)).isEqualTo(dataStore1.invoke(getWriterClears)) +.isEqualTo(0); + assertThat(dataStore2.invoke(getWriterDestroys)).isEqualTo(dataStore2.invoke(getWriterClears)) +.isEqualTo(0); +
[GitHub] [geode] BenjaminPerryRoss commented on a change in pull request #5391: GEODE-7846: Adding Stats for Partitioned Region Clear
BenjaminPerryRoss commented on a change in pull request #5391: URL: https://github.com/apache/geode/pull/5391#discussion_r461218632 ## File path: geode-core/src/main/java/org/apache/geode/internal/cache/PartitionedRegionClear.java ## @@ -162,6 +162,7 @@ protected void waitForPrimary(PartitionedRegion.RetryTimeKeeper retryTimer) { doAfterClear(regionEvent); } } +incClearCount(); Review comment: This is part of a larger discussion about what we're actually trying to record with the duration. It's something Gester and I have touched on in the operations standup and I'll probably put something up regarding it in the slack channel but basically the duration in the initial version (the one you commented on) was measuring the total duration of the entire clear operation from the perspective of the coordinator. The way it's been changed to (in line with your suggestion) is to move it into the actual local clear. This makes the stat present on all members which have the region (as long as they have data) but it means that this value ONLY captures the actual process of clearing the region and not any of the stuff that happens in between (messaging, any work done on the coordinator other than the local clear, etc). For now it's been changed to something that matches with your thought process here, but if you have ideas on this I would love to hear them. ## File path: geode-core/src/main/java/org/apache/geode/internal/cache/CachePerfStats.java ## @@ -442,7 +451,10 @@ f.createIntCounter("retries", "Number of times a concurrent destroy followed by a create has caused an entry operation to need to retry.", "operations"), -f.createLongCounter("clears", clearsDesc, "operations"), +f.createLongCounter("regionClears", regionClearsDesc, "operations"), Review comment: Agreed. I recall there was a documentation story for PR clear, but I'm not sure what happened to it. I'll hunt it down and if this documentation isn't covered by it I'll make sure to add it to this PR. ## File path: geode-core/src/main/java/org/apache/geode/internal/cache/PartitionedRegionClear.java ## @@ -367,12 +370,38 @@ void doClear(RegionEventImpl regionEvent, boolean cacheWrite) { releaseLockForClear(regionEvent); } } - } finally { releaseDistributedClearLock(lockName); + incClearDuration(System.nanoTime() - clearStartTime); +} + } + + void incClearCount() { +if (partitionedRegion != null && partitionedRegion.getDataStore() != null +&& partitionedRegion.getDataStore().getAllLocalBucketRegions() != null +&& partitionedRegion.getDataStore().getAllLocalBucketRegions().size() != 0) { + CachePerfStats stats = partitionedRegion.getCachePerfStats(); + if (stats != null) { +logger.info("BR inc PR Region count:" + stats.getClass().getName() + ":" Review comment: Done ## File path: geode-core/src/main/java/org/apache/geode/internal/cache/PartitionedRegionClear.java ## @@ -367,12 +370,38 @@ void doClear(RegionEventImpl regionEvent, boolean cacheWrite) { releaseLockForClear(regionEvent); } } - } finally { releaseDistributedClearLock(lockName); + incClearDuration(System.nanoTime() - clearStartTime); +} + } + + void incClearCount() { +if (partitionedRegion != null && partitionedRegion.getDataStore() != null +&& partitionedRegion.getDataStore().getAllLocalBucketRegions() != null +&& partitionedRegion.getDataStore().getAllLocalBucketRegions().size() != 0) { + CachePerfStats stats = partitionedRegion.getCachePerfStats(); + if (stats != null) { +logger.info("BR inc PR Region count:" + stats.getClass().getName() + ":" ++ partitionedRegion.getFullPath(), new Exception()); +stats.incClearCount(); + } } } + void incClearDuration(long durationNanos) { +if (partitionedRegion != null && partitionedRegion.getTotalNumberOfBuckets() != 0) { + CachePerfStats stats = partitionedRegion.getCachePerfStats(); + if (stats != null) { +logger.info("BR inc PR Duration by + " + durationNanos + " ns:" + stats.getClass().getName() Review comment: Done. ## File path: geode-core/src/main/java/org/apache/geode/internal/cache/PartitionedRegionClear.java ## @@ -367,12 +370,38 @@ void doClear(RegionEventImpl regionEvent, boolean cacheWrite) { releaseLockForClear(regionEvent); } } - } finally { releaseDistributedClearLock(lockName); + incClearDuration(System.nanoTime() - clearStartTime); +} + } + + void incClearCount() { +if (partitionedRegion != null && partitionedRegion.getDataStore() != null +&& partitionedRegion.getDataStore().getAllLocalBucketRegions() != null +