Repository: hadoop
Updated Branches:
  refs/heads/branch-2 38f86c084 -> 5c5362980


HDFS-9145. Tracking methods that hold FSNamesytemLock for too long. Contributed 
by Mingliang Liu.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/5c536298
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/5c536298
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/5c536298

Branch: refs/heads/branch-2
Commit: 5c5362980c429a5ad0b58e0dd1933de9cde7f369
Parents: 38f86c0
Author: Haohui Mai <[email protected]>
Authored: Tue Oct 13 13:20:11 2015 -0700
Committer: Haohui Mai <[email protected]>
Committed: Tue Oct 13 13:20:17 2015 -0700

----------------------------------------------------------------------
 .../apache/hadoop/test/GenericTestUtils.java    |  4 ++
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |  3 ++
 .../hdfs/server/namenode/FSNamesystem.java      | 21 ++++++++
 .../hdfs/server/namenode/TestFSNamesystem.java  | 56 ++++++++++++++++++++
 4 files changed, 84 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/5c536298/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
----------------------------------------------------------------------
diff --git 
a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
 
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
index 1e5852b..5bc1252 100644
--- 
a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
+++ 
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
@@ -196,6 +196,10 @@ public abstract class GenericTestUtils {
       logger.removeAppender(appender);
 
     }
+
+    public void clearOutput() {
+      sw.getBuffer().setLength(0);
+    }
   }
   
   

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5c536298/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt 
b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 16a6d3b..881e901 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -717,6 +717,9 @@ Release 2.8.0 - UNRELEASED
     HDFS-9139. Enable parallel JUnit tests for HDFS Pre-commit
     (Chris Nauroth and Vinayakumar B via vinayakumarb)
 
+    HDFS-9145. Tracking methods that hold FSNamesytemLock for too long.
+    (Mingliang Liu via wheat9)
+
   BUG FIXES
 
     HDFS-8091: ACLStatus and XAttributes should be presented to

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5c536298/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 9dc9d42..76e14b0 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -1443,6 +1443,11 @@ public class FSNamesystem implements Namesystem, 
FSNamesystemMBean,
     return Util.stringCollectionAsURIs(dirNames);
   }
 
+  /** Threshold (ms) for long holding write lock report. */
+  static final short WRITELOCK_REPORTING_THRESHOLD = 1000;
+  /** Last time stamp for write lock. Keep the longest one for 
multi-entrance.*/
+  private long writeLockHeldTimeStamp;
+
   @Override
   public void readLock() {
     this.fsLock.readLock().lock();
@@ -1454,14 +1459,30 @@ public class FSNamesystem implements Namesystem, 
FSNamesystemMBean,
   @Override
   public void writeLock() {
     this.fsLock.writeLock().lock();
+    if (fsLock.getWriteHoldCount() == 1) {
+      writeLockHeldTimeStamp = monotonicNow();
+    }
   }
   @Override
   public void writeLockInterruptibly() throws InterruptedException {
     this.fsLock.writeLock().lockInterruptibly();
+    if (fsLock.getWriteHoldCount() == 1) {
+      writeLockHeldTimeStamp = monotonicNow();
+    }
   }
   @Override
   public void writeUnlock() {
+    final boolean needReport = fsLock.getWriteHoldCount() == 1 &&
+        fsLock.isWriteLockedByCurrentThread();
     this.fsLock.writeLock().unlock();
+
+    if (needReport) {
+      long writeLockInterval = monotonicNow() - writeLockHeldTimeStamp;
+      if (writeLockInterval >= WRITELOCK_REPORTING_THRESHOLD) {
+        LOG.info("FSNamesystem write lock held for " + writeLockInterval +
+            " ms via\n" + StringUtils.getStackTrace(Thread.currentThread()));
+      }
+    }
   }
   @Override
   public boolean hasWriteLock() {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5c536298/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java
index d533cf6..15fc40e 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java
@@ -39,6 +39,9 @@ import 
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.SafeModeInfo;
 import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
 import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
+import org.apache.log4j.Level;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Test;
@@ -268,6 +271,59 @@ public class TestFSNamesystem {
                         threadCount, rwLock.getQueueLength());
   }
 
+  /**
+   * Test when FSNamesystem lock is held for a long time, logger will report 
it.
+   */
+  @Test(timeout=45000)
+  public void testFSLockLongHoldingReport() throws Exception {
+    Configuration conf = new Configuration();
+    FSImage fsImage = Mockito.mock(FSImage.class);
+    FSEditLog fsEditLog = Mockito.mock(FSEditLog.class);
+    Mockito.when(fsImage.getEditLog()).thenReturn(fsEditLog);
+    FSNamesystem fsn = new FSNamesystem(conf, fsImage);
+
+    LogCapturer logs = LogCapturer.captureLogs(FSNamesystem.LOG);
+    GenericTestUtils.setLogLevel(FSNamesystem.LOG, Level.INFO);
+
+    // Don't report if the write lock is held for a short time
+    fsn.writeLock();
+    Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2);
+    fsn.writeUnlock();
+    assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()));
+
+
+    // Report if the write lock is held for a long time
+    fsn.writeLock();
+    Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD + 100);
+    logs.clearOutput();
+    fsn.writeUnlock();
+    assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName()));
+
+    // Report if the write lock is held (interruptibly) for a long time
+    fsn.writeLockInterruptibly();
+    Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD + 100);
+    logs.clearOutput();
+    fsn.writeUnlock();
+    assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName()));
+
+    // Report if it's held for a long time when re-entering write lock
+    fsn.writeLock();
+    Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2 + 1);
+    fsn.writeLockInterruptibly();
+    Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2 + 1);
+    fsn.writeLock();
+    Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2);
+    logs.clearOutput();
+    fsn.writeUnlock();
+    assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()));
+    logs.clearOutput();
+    fsn.writeUnlock();
+    assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()));
+    logs.clearOutput();
+    fsn.writeUnlock();
+    assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName()));
+  }
+
   @Test
   public void testSafemodeReplicationConf() throws IOException {
     Configuration conf = new Configuration();

Reply via email to