adoroszlai commented on a change in pull request #3180: URL: https://github.com/apache/ozone/pull/3180#discussion_r827963447
########## File path: hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/lock/OMLockMetrics.java ########## @@ -0,0 +1,118 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.om.lock; + +import org.apache.hadoop.hdds.annotation.InterfaceAudience; +import org.apache.hadoop.metrics2.MetricsSystem; +import org.apache.hadoop.metrics2.annotation.Metric; +import org.apache.hadoop.metrics2.annotation.Metrics; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.metrics2.lib.MutableStat; +import org.apache.hadoop.ozone.OzoneConsts; + +/** + * This class is for maintaining the various Ozone Manager Lock Metrics. + */ [email protected] +@Metrics(about = "Ozone Manager Lock Metrics", context = OzoneConsts.OZONE) +public class OMLockMetrics { + private static final String SOURCE_NAME = + OMLockMetrics.class.getSimpleName(); + + private @Metric MutableStat readLockWaitingTimeMsStat; + private @Metric MutableStat readLockHeldTimeMsStat; Review comment: We need to set `extended` flag to `true` on these metrics to enable publishing min/max/stddev information. Otherwise only avg and count are exposed. Something like this: ```diff diff --git hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/lock/OMLockMetrics.java hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/lock/OMLockMetrics.java index 7ff3a6d49..22b0ab253 100644 --- hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/lock/OMLockMetrics.java +++ hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/lock/OMLockMetrics.java @@ -18,10 +18,15 @@ package org.apache.hadoop.ozone.om.lock; import org.apache.hadoop.hdds.annotation.InterfaceAudience; +import org.apache.hadoop.metrics2.MetricsCollector; +import org.apache.hadoop.metrics2.MetricsRecordBuilder; +import org.apache.hadoop.metrics2.MetricsSource; import org.apache.hadoop.metrics2.MetricsSystem; import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.annotation.Metrics; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.metrics2.lib.MetricsRegistry; +import org.apache.hadoop.metrics2.lib.MutableRate; import org.apache.hadoop.metrics2.lib.MutableStat; import org.apache.hadoop.ozone.OzoneConsts; @@ -30,12 +35,23 @@ */ @InterfaceAudience.Private @Metrics(about = "Ozone Manager Lock Metrics", context = OzoneConsts.OZONE) -public class OMLockMetrics { +public class OMLockMetrics implements MetricsSource { private static final String SOURCE_NAME = OMLockMetrics.class.getSimpleName(); - private @Metric MutableStat readLockWaitingTimeMsStat; - private @Metric MutableStat readLockHeldTimeMsStat; + private final MetricsRegistry registry; + private final MutableStat readLockWaitingTimeMsStat; + private final MutableStat readLockHeldTimeMsStat; + + private OMLockMetrics() { + registry = new MetricsRegistry(SOURCE_NAME); + readLockWaitingTimeMsStat = registry.newStat("ReadLockWaitTime", + "Time (in milliseconds) spent waiting for aquiring the lock", + "Ops", "Time", true); + readLockHeldTimeMsStat = registry.newStat("ReadLockHeldTime", + "Time (in milliseconds) spent holding the lock", + "Ops", "Time", true); + } /** * Registers OMLockMetrics source. @@ -115,4 +131,11 @@ public String getReadLockHeldTimeMsStat() { public long getLongestReadLockHeldTimeMs() { return (long) readLockHeldTimeMsStat.lastStat().max(); } + + @Override + public void getMetrics(MetricsCollector collector, boolean all) { + MetricsRecordBuilder builder = collector.addRecord(SOURCE_NAME); + readLockHeldTimeMsStat.snapshot(builder, all); + readLockWaitingTimeMsStat.snapshot(builder, all); + } } ``` ########## File path: hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/lock/TestOzoneManagerLock.java ########## @@ -341,4 +357,66 @@ public void testMultiLockResourceParallel() throws Exception { Thread.sleep(100); Assert.assertTrue(gotLock.get()); } + + @Test + public void testLockHoldCount() throws Exception { + String[] resourceName; + String resourceLockName; + for (OzoneManagerLock.Resource resource : + OzoneManagerLock.Resource.values()) { + // USER_LOCK, S3_SECRET_LOCK and PREFIX_LOCK disallow lock re-acquire by + // the same thread. + if (resource != OzoneManagerLock.Resource.USER_LOCK && + resource != OzoneManagerLock.Resource.S3_SECRET_LOCK && + resource != OzoneManagerLock.Resource.PREFIX_LOCK) { + resourceName = generateResourceName(resource); + resourceLockName = generateResourceLockName(resource, resourceName); + testLockHoldCountUtil(resource, resourceName, resourceLockName); + } + } + } + + private void testLockHoldCountUtil(OzoneManagerLock.Resource resource, + String[] resourceName, + String resourceLockName) throws Exception { + OzoneManagerLock lock = new OzoneManagerLock(new OzoneConfiguration()); + + assertEquals(0, lock.getHoldCount(resourceLockName)); + + lock.acquireReadLock(resource, resourceName); + assertEquals(1, lock.getHoldCount(resourceLockName)); + + lock.acquireReadLock(resource, resourceName); + assertEquals(2, lock.getHoldCount(resourceLockName)); + + lock.acquireReadLock(resource, resourceName); + assertEquals(3, lock.getHoldCount(resourceLockName)); + + lock.releaseReadLock(resource, resourceName); + assertEquals(2, lock.getHoldCount(resourceLockName)); + + lock.releaseReadLock(resource, resourceName); + assertEquals(1, lock.getHoldCount(resourceLockName)); + + lock.releaseReadLock(resource, resourceName); + assertEquals(0, lock.getHoldCount(resourceLockName)); + + lock.acquireWriteLock(resource, resourceName); + assertEquals(1, lock.getHoldCount(resourceLockName)); + + lock.acquireWriteLock(resource, resourceName); + assertEquals(2, lock.getHoldCount(resourceLockName)); + + lock.acquireWriteLock(resource, resourceName); + assertEquals(3, lock.getHoldCount(resourceLockName)); + + lock.releaseWriteLock(resource, resourceName); + assertEquals(2, lock.getHoldCount(resourceLockName)); + + lock.releaseWriteLock(resource, resourceName); + assertEquals(1, lock.getHoldCount(resourceLockName)); + + lock.releaseWriteLock(resource, resourceName); + assertEquals(0, lock.getHoldCount(resourceLockName)); Review comment: Can we reduce duplication with loops? ########## File path: hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/lock/OzoneManagerLock.java ########## @@ -172,7 +176,16 @@ private boolean lock(Resource resource, String resourceName, LOG.error(errorMessage); throw new RuntimeException(errorMessage); } else { + long startWaitingTimeNanos = Time.monotonicNowNanos(); + /** + * holdCount helps in metric updation only once in case of reentrant + * locks. + */ + int holdCount = manager.getActiveLockCount(resourceName); lockFn.accept(resourceName); + if (holdCount == 0) { + updateReadLockMetrics(resource, lockType, startWaitingTimeNanos); + } Review comment: We should consider the case that another thread is holding the lock when this thread tries to acquire it. My understanding is that `holdCount` will be `>= 1`, so we skip updating lock metrics. Similarly, `unlock` only updates held time for the last thread that releases the lock (read lock can be held at the same time by multiple threads). Since lock acquire/release and count increase/decrease are two separate, non-atomic steps, I don't think we can rely on `holdCount` for this. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
