Repository: hadoop Updated Branches: refs/heads/branch-2 38f86c084 -> 5c5362980
HDFS-9145. Tracking methods that hold FSNamesytemLock for too long. Contributed by Mingliang Liu. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/5c536298 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/5c536298 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/5c536298 Branch: refs/heads/branch-2 Commit: 5c5362980c429a5ad0b58e0dd1933de9cde7f369 Parents: 38f86c0 Author: Haohui Mai <[email protected]> Authored: Tue Oct 13 13:20:11 2015 -0700 Committer: Haohui Mai <[email protected]> Committed: Tue Oct 13 13:20:17 2015 -0700 ---------------------------------------------------------------------- .../apache/hadoop/test/GenericTestUtils.java | 4 ++ hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../hdfs/server/namenode/FSNamesystem.java | 21 ++++++++ .../hdfs/server/namenode/TestFSNamesystem.java | 56 ++++++++++++++++++++ 4 files changed, 84 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/5c536298/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java index 1e5852b..5bc1252 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java @@ -196,6 +196,10 @@ public abstract class GenericTestUtils { logger.removeAppender(appender); } + + public void clearOutput() { + sw.getBuffer().setLength(0); + } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/5c536298/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 16a6d3b..881e901 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -717,6 +717,9 @@ Release 2.8.0 - UNRELEASED HDFS-9139. Enable parallel JUnit tests for HDFS Pre-commit (Chris Nauroth and Vinayakumar B via vinayakumarb) + HDFS-9145. Tracking methods that hold FSNamesytemLock for too long. + (Mingliang Liu via wheat9) + BUG FIXES HDFS-8091: ACLStatus and XAttributes should be presented to http://git-wip-us.apache.org/repos/asf/hadoop/blob/5c536298/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 9dc9d42..76e14b0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -1443,6 +1443,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, return Util.stringCollectionAsURIs(dirNames); } + /** Threshold (ms) for long holding write lock report. */ + static final short WRITELOCK_REPORTING_THRESHOLD = 1000; + /** Last time stamp for write lock. Keep the longest one for multi-entrance.*/ + private long writeLockHeldTimeStamp; + @Override public void readLock() { this.fsLock.readLock().lock(); @@ -1454,14 +1459,30 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, @Override public void writeLock() { this.fsLock.writeLock().lock(); + if (fsLock.getWriteHoldCount() == 1) { + writeLockHeldTimeStamp = monotonicNow(); + } } @Override public void writeLockInterruptibly() throws InterruptedException { this.fsLock.writeLock().lockInterruptibly(); + if (fsLock.getWriteHoldCount() == 1) { + writeLockHeldTimeStamp = monotonicNow(); + } } @Override public void writeUnlock() { + final boolean needReport = fsLock.getWriteHoldCount() == 1 && + fsLock.isWriteLockedByCurrentThread(); this.fsLock.writeLock().unlock(); + + if (needReport) { + long writeLockInterval = monotonicNow() - writeLockHeldTimeStamp; + if (writeLockInterval >= WRITELOCK_REPORTING_THRESHOLD) { + LOG.info("FSNamesystem write lock held for " + writeLockInterval + + " ms via\n" + StringUtils.getStackTrace(Thread.currentThread())); + } + } } @Override public boolean hasWriteLock() { http://git-wip-us.apache.org/repos/asf/hadoop/blob/5c536298/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java index d533cf6..15fc40e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java @@ -39,6 +39,9 @@ import org.apache.hadoop.hdfs.server.namenode.FSNamesystem.SafeModeInfo; import org.apache.hadoop.hdfs.server.namenode.ha.HAContext; import org.apache.hadoop.hdfs.server.namenode.ha.HAState; import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; +import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.GenericTestUtils.LogCapturer; +import org.apache.log4j.Level; import org.junit.After; import org.junit.Assert; import org.junit.Test; @@ -268,6 +271,59 @@ public class TestFSNamesystem { threadCount, rwLock.getQueueLength()); } + /** + * Test when FSNamesystem lock is held for a long time, logger will report it. + */ + @Test(timeout=45000) + public void testFSLockLongHoldingReport() throws Exception { + Configuration conf = new Configuration(); + FSImage fsImage = Mockito.mock(FSImage.class); + FSEditLog fsEditLog = Mockito.mock(FSEditLog.class); + Mockito.when(fsImage.getEditLog()).thenReturn(fsEditLog); + FSNamesystem fsn = new FSNamesystem(conf, fsImage); + + LogCapturer logs = LogCapturer.captureLogs(FSNamesystem.LOG); + GenericTestUtils.setLogLevel(FSNamesystem.LOG, Level.INFO); + + // Don't report if the write lock is held for a short time + fsn.writeLock(); + Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2); + fsn.writeUnlock(); + assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName())); + + + // Report if the write lock is held for a long time + fsn.writeLock(); + Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD + 100); + logs.clearOutput(); + fsn.writeUnlock(); + assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName())); + + // Report if the write lock is held (interruptibly) for a long time + fsn.writeLockInterruptibly(); + Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD + 100); + logs.clearOutput(); + fsn.writeUnlock(); + assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName())); + + // Report if it's held for a long time when re-entering write lock + fsn.writeLock(); + Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2 + 1); + fsn.writeLockInterruptibly(); + Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2 + 1); + fsn.writeLock(); + Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2); + logs.clearOutput(); + fsn.writeUnlock(); + assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName())); + logs.clearOutput(); + fsn.writeUnlock(); + assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName())); + logs.clearOutput(); + fsn.writeUnlock(); + assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName())); + } + @Test public void testSafemodeReplicationConf() throws IOException { Configuration conf = new Configuration();
