Repository: hadoop Updated Branches: refs/heads/branch-2.8 dd4acebb4 -> 5025a898e
HDFS-11180. Intermittent deadlock in NameNode when failover happens. (cherry picked from commit 5a7941a4fc193259ab4a306f7fe4f68bf101d0e0) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/5025a898 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/5025a898 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/5025a898 Branch: refs/heads/branch-2.8 Commit: 5025a898e13b0d442aa8c7a3433a90367e013a42 Parents: dd4aceb Author: Akira Ajisaka <aajis...@apache.org> Authored: Fri Dec 2 11:34:05 2016 +0900 Committer: Akira Ajisaka <aajis...@apache.org> Committed: Fri Dec 2 11:35:22 2016 +0900 ---------------------------------------------------------------------- .../dev-support/findbugsExcludeFile.xml | 27 ++++++++ .../hadoop/hdfs/server/namenode/FSEditLog.java | 72 +++++++++++++++++--- .../hadoop/hdfs/server/namenode/FSImage.java | 13 +++- .../hdfs/server/namenode/FSNamesystem.java | 27 ++++++-- .../hdfs/server/namenode/NameNodeRpcServer.java | 2 +- .../server/namenode/ha/StandbyCheckpointer.java | 4 +- .../server/namenode/TestFSNamesystemMBean.java | 24 +++++++ 7 files changed, 147 insertions(+), 22 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/5025a898/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml index 426fb72..e6e4057 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml @@ -109,6 +109,33 @@ <Field name="journalSet" /> <Bug pattern="IS2_INCONSISTENT_SYNC" /> </Match> + <!-- + FSEditLog#getTotalSyncCount is not synchronized because this method is + used by metrics. NullPointerException can happen and it is ignored. + --> + <Match> + <Class name="org.apache.hadoop.hdfs.server.namenode.FSEditLog" /> + <Field name="editLogStream" /> + <Bug pattern="IS2_INCONSISTENT_SYNC" /> + </Match> + <!-- + FSEditLog#isOpenForWriteWithoutLock and FSEditLog#isSegmentOpenWithoutLock + are not synchronized because these methods are used by metrics. + --> + <Match> + <Class name="org.apache.hadoop.hdfs.server.namenode.FSEditLog" /> + <Field name="state" /> + <Bug pattern="IS2_INCONSISTENT_SYNC" /> + </Match> + <!-- + All of the threads which update/increment txid are synchronized, + so make txid volatile instead of AtomicLong. + --> + <Match> + <Class name="org.apache.hadoop.hdfs.server.namenode.FSEditLog" /> + <Field name="txid" /> + <Bug pattern="VO_VOLATILE_INCREMENT" /> + </Match> <!-- This method isn't performance-critical and is much clearer to write as it's written. --> http://git-wip-us.apache.org/repos/asf/hadoop/blob/5025a898/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java index 3eda0f5..2fa70df 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java @@ -156,14 +156,16 @@ public class FSEditLog implements LogsPurgeable { private EditLogOutputStream editLogStream = null; // a monotonically increasing counter that represents transactionIds. - private long txid = 0; + // All of the threads which update/increment txid are synchronized, + // so make txid volatile instead of AtomicLong. + private volatile long txid = 0; // stores the last synced transactionId. private long synctxid = 0; // the first txid of the log that's currently open for writing. // If this value is N, we are currently writing to edits_inprogress_N - private long curSegmentTxId = HdfsServerConstants.INVALID_TXID; + private volatile long curSegmentTxId = HdfsServerConstants.INVALID_TXID; // the time of printing the statistics to the log file. private long lastPrintTime; @@ -339,7 +341,18 @@ public class FSEditLog implements LogsPurgeable { return state == State.IN_SEGMENT || state == State.BETWEEN_LOG_SEGMENTS; } - + + /** + * Return true if the log is currently open in write mode. + * This method is not synchronized and must be used only for metrics. + * @return true if the log is currently open in write mode, regardless + * of whether it actually has an open segment. + */ + boolean isOpenForWriteWithoutLock() { + return state == State.IN_SEGMENT || + state == State.BETWEEN_LOG_SEGMENTS; + } + /** * @return true if the log is open in write mode and has a segment open * ready to take edits. @@ -349,6 +362,16 @@ public class FSEditLog implements LogsPurgeable { } /** + * Return true the state is IN_SEGMENT. + * This method is not synchronized and must be used only for metrics. + * @return true if the log is open in write mode and has a segment open + * ready to take edits. + */ + boolean isSegmentOpenWithoutLock() { + return state == State.IN_SEGMENT; + } + + /** * @return true if the log is open in read mode. */ public synchronized boolean isOpenForRead() { @@ -523,7 +546,16 @@ public class FSEditLog implements LogsPurgeable { public synchronized long getLastWrittenTxId() { return txid; } - + + /** + * Return the transaction ID of the last transaction written to the log. + * This method is not synchronized and must be used only for metrics. + * @return The transaction ID of the last transaction written to the log + */ + long getLastWrittenTxIdWithoutLock() { + return txid; + } + /** * @return the first transaction ID in the current log segment */ @@ -532,7 +564,16 @@ public class FSEditLog implements LogsPurgeable { "Bad state: %s", state); return curSegmentTxId; } - + + /** + * Return the first transaction ID in the current log segment. + * This method is not synchronized and must be used only for metrics. + * @return The first transaction ID in the current log segment + */ + long getCurSegmentTxIdWithoutLock() { + return curSegmentTxId; + } + /** * Set the transaction ID to use for the next transaction written. */ @@ -1183,7 +1224,9 @@ public class FSEditLog implements LogsPurgeable { /** * Get all the journals this edit log is currently operating on. */ - synchronized List<JournalAndStream> getJournals() { + List<JournalAndStream> getJournals() { + // The list implementation is CopyOnWriteArrayList, + // so we don't need to synchronize this method. return journalSet.getAllJournalStreams(); } @@ -1191,7 +1234,7 @@ public class FSEditLog implements LogsPurgeable { * Used only by tests. */ @VisibleForTesting - synchronized public JournalSet getJournalSet() { + public JournalSet getJournalSet() { return journalSet; } @@ -1677,11 +1720,18 @@ public class FSEditLog implements LogsPurgeable { * Return total number of syncs happened on this edit log. * @return long - count */ - public synchronized long getTotalSyncCount() { - if (editLogStream != null) { - return editLogStream.getNumSync(); - } else { + public long getTotalSyncCount() { + // Avoid NPE as possible. + if (editLogStream == null) { return 0; } + long count = 0; + try { + count = editLogStream.getNumSync(); + } catch (NullPointerException ignore) { + // This method is used for metrics, so we don't synchronize it. + // Therefore NPE can happen even if there is a null check before. + } + return count; } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/5025a898/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java index 778d053..8cd611e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java @@ -927,7 +927,7 @@ public class FSImage implements Closeable { Canceler canceler) throws IOException { FSImageCompression compression = FSImageCompression.createCompression(conf); - long txid = getLastAppliedOrWrittenTxId(); + long txid = getCorrectLastAppliedOrWrittenTxId(); SaveNamespaceContext ctx = new SaveNamespaceContext(source, txid, canceler); FSImageFormat.Saver saver = new FSImageFormat.Saver(ctx); @@ -1025,7 +1025,7 @@ public class FSImage implements Closeable { if (editLogWasOpen) { editLog.endCurrentLogSegment(true); } - long imageTxId = getLastAppliedOrWrittenTxId(); + long imageTxId = getCorrectLastAppliedOrWrittenTxId(); if (!addToCheckpointing(imageTxId)) { throw new IOException( "FS image is being downloaded from another NN at txid " + imageTxId); @@ -1397,6 +1397,15 @@ public class FSImage implements Closeable { public long getLastAppliedOrWrittenTxId() { return Math.max(lastAppliedTxId, + editLog != null ? editLog.getLastWrittenTxIdWithoutLock() : 0); + } + + /** + * This method holds a lock of FSEditLog to get the correct value. + * This method must not be used for metrics. + */ + public long getCorrectLastAppliedOrWrittenTxId() { + return Math.max(lastAppliedTxId, editLog != null ? editLog.getLastWrittenTxId() : 0); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/5025a898/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index cbbf869..7bb6b66 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -3694,7 +3694,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, //create ha status final NNHAStatusHeartbeat haState = new NNHAStatusHeartbeat( haContext.getState().getServiceState(), - getFSImage().getLastAppliedOrWrittenTxId()); + getFSImage().getCorrectLastAppliedOrWrittenTxId()); return new HeartbeatResponse(cmds, haState, rollingUpgradeInfo, blockReportLeaseId); @@ -3820,7 +3820,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, public void run() { while (fsRunning && shouldRun) { try { - long numEdits = getTransactionsSinceLastLogRoll(); + long numEdits = getCorrectTransactionsSinceLastLogRoll(); if (numEdits > rollThreshold) { FSNamesystem.LOG.info("NameNode rolling its own edit log because" + " number of edits in open segment exceeds threshold of " @@ -3969,17 +3969,30 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, @Metric({"TransactionsSinceLastLogRoll", "Number of transactions since last edit log roll"}) public long getTransactionsSinceLastLogRoll() { + if (isInStandbyState() || !getEditLog().isSegmentOpenWithoutLock()) { + return 0; + } else { + return getEditLog().getLastWrittenTxIdWithoutLock() - + getEditLog().getCurSegmentTxIdWithoutLock() + 1; + } + } + + /** + * Get the correct number of transactions since last edit log roll. + * This method holds a lock of FSEditLog and must not be used for metrics. + */ + private long getCorrectTransactionsSinceLastLogRoll() { if (isInStandbyState() || !getEditLog().isSegmentOpen()) { return 0; } else { return getEditLog().getLastWrittenTxId() - - getEditLog().getCurSegmentTxId() + 1; + getEditLog().getCurSegmentTxId() + 1; } } - + @Metric({"LastWrittenTransactionId", "Transaction ID written to the edit log"}) public long getLastWrittenTransactionId() { - return getEditLog().getLastWrittenTxId(); + return getEditLog().getLastWrittenTxIdWithoutLock(); } @Metric({"LastCheckpointTime", @@ -6294,7 +6307,9 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, List<Map<String, String>> jasList = new ArrayList<Map<String, String>>(); FSEditLog log = getFSImage().getEditLog(); if (log != null) { - boolean openForWrite = log.isOpenForWrite(); + // This flag can be false because we cannot hold a lock of FSEditLog + // for metrics. + boolean openForWrite = log.isOpenForWriteWithoutLock(); for (JournalAndStream jas : log.getJournals()) { final Map<String, String> jasMap = new HashMap<String, String>(); String manager = jas.getManager().toString(); http://git-wip-us.apache.org/repos/asf/hadoop/blob/5025a898/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java index 683e90d..03f77dc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java @@ -1198,7 +1198,7 @@ public class NameNodeRpcServer implements NamenodeProtocols { checkNNStartup(); namesystem.checkOperation(OperationCategory.UNCHECKED); namesystem.checkSuperuserPrivilege(); - return namesystem.getFSImage().getLastAppliedOrWrittenTxId(); + return namesystem.getFSImage().getCorrectLastAppliedOrWrittenTxId(); } @Override // NamenodeProtocol http://git-wip-us.apache.org/repos/asf/hadoop/blob/5025a898/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java index 1e40368..31fa5a2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java @@ -165,7 +165,7 @@ public class StandbyCheckpointer { FSImage img = namesystem.getFSImage(); long prevCheckpointTxId = img.getStorage().getMostRecentCheckpointTxId(); - long thisCheckpointTxId = img.getLastAppliedOrWrittenTxId(); + long thisCheckpointTxId = img.getCorrectLastAppliedOrWrittenTxId(); assert thisCheckpointTxId >= prevCheckpointTxId; if (thisCheckpointTxId == prevCheckpointTxId) { LOG.info("A checkpoint was triggered but the Standby Node has not " + @@ -253,7 +253,7 @@ public class StandbyCheckpointer { private long countUncheckpointedTxns() { FSImage img = namesystem.getFSImage(); - return img.getLastAppliedOrWrittenTxId() - + return img.getCorrectLastAppliedOrWrittenTxId() - img.getStorage().getMostRecentCheckpointTxId(); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/5025a898/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemMBean.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemMBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemMBean.java index 3c0d54a..e8db18f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemMBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemMBean.java @@ -162,6 +162,30 @@ public class TestFSNamesystemMBean { } } + // The test makes sure JMX request can be processed even if FSEditLog + // is synchronized. + @Test + public void testWithFSEditLogLock() throws Exception { + Configuration conf = new Configuration(); + MiniDFSCluster cluster = null; + try { + cluster = new MiniDFSCluster.Builder(conf).build(); + cluster.waitActive(); + synchronized (cluster.getNameNode().getFSImage().getEditLog()) { + MBeanClient client = new MBeanClient(); + client.start(); + client.join(20000); + assertTrue("JMX calls are blocked when FSEditLog" + + " is synchronized by another thread", client.succeeded); + client.interrupt(); + } + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + } + @Test(timeout = 120000) public void testFsEditLogMetrics() throws Exception { final Configuration conf = new Configuration(); --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org