HDFS-11180. Intermittent deadlock in NameNode when failover happens.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/e0fa4923 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/e0fa4923 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/e0fa4923 Branch: refs/heads/YARN-5085 Commit: e0fa49234fd37aca88e1caa95bac77bca192bae4 Parents: 1f7613b Author: Akira Ajisaka <aajis...@apache.org> Authored: Thu Dec 1 23:08:59 2016 +0900 Committer: Akira Ajisaka <aajis...@apache.org> Committed: Thu Dec 1 23:08:59 2016 +0900 ---------------------------------------------------------------------- .../dev-support/findbugsExcludeFile.xml | 27 ++++++++ .../hadoop/hdfs/server/namenode/FSEditLog.java | 72 +++++++++++++++++--- .../hadoop/hdfs/server/namenode/FSImage.java | 15 +++- .../hdfs/server/namenode/FSNamesystem.java | 27 ++++++-- .../hdfs/server/namenode/NameNodeRpcServer.java | 2 +- .../server/namenode/ha/StandbyCheckpointer.java | 4 +- .../server/namenode/TestFSNamesystemMBean.java | 24 +++++++ 7 files changed, 148 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/e0fa4923/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml index 426fb72..e6e4057 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml @@ -109,6 +109,33 @@ <Field name="journalSet" /> <Bug pattern="IS2_INCONSISTENT_SYNC" /> </Match> + <!-- + FSEditLog#getTotalSyncCount is not synchronized because this method is + used by metrics. NullPointerException can happen and it is ignored. + --> + <Match> + <Class name="org.apache.hadoop.hdfs.server.namenode.FSEditLog" /> + <Field name="editLogStream" /> + <Bug pattern="IS2_INCONSISTENT_SYNC" /> + </Match> + <!-- + FSEditLog#isOpenForWriteWithoutLock and FSEditLog#isSegmentOpenWithoutLock + are not synchronized because these methods are used by metrics. + --> + <Match> + <Class name="org.apache.hadoop.hdfs.server.namenode.FSEditLog" /> + <Field name="state" /> + <Bug pattern="IS2_INCONSISTENT_SYNC" /> + </Match> + <!-- + All of the threads which update/increment txid are synchronized, + so make txid volatile instead of AtomicLong. + --> + <Match> + <Class name="org.apache.hadoop.hdfs.server.namenode.FSEditLog" /> + <Field name="txid" /> + <Bug pattern="VO_VOLATILE_INCREMENT" /> + </Match> <!-- This method isn't performance-critical and is much clearer to write as it's written. --> http://git-wip-us.apache.org/repos/asf/hadoop/blob/e0fa4923/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java index ef9eb68..c9ee32b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java @@ -155,14 +155,16 @@ public class FSEditLog implements LogsPurgeable { private EditLogOutputStream editLogStream = null; // a monotonically increasing counter that represents transactionIds. - private long txid = 0; + // All of the threads which update/increment txid are synchronized, + // so make txid volatile instead of AtomicLong. + private volatile long txid = 0; // stores the last synced transactionId. private long synctxid = 0; // the first txid of the log that's currently open for writing. // If this value is N, we are currently writing to edits_inprogress_N - private long curSegmentTxId = HdfsServerConstants.INVALID_TXID; + private volatile long curSegmentTxId = HdfsServerConstants.INVALID_TXID; // the time of printing the statistics to the log file. private long lastPrintTime; @@ -338,7 +340,18 @@ public class FSEditLog implements LogsPurgeable { return state == State.IN_SEGMENT || state == State.BETWEEN_LOG_SEGMENTS; } - + + /** + * Return true if the log is currently open in write mode. + * This method is not synchronized and must be used only for metrics. + * @return true if the log is currently open in write mode, regardless + * of whether it actually has an open segment. + */ + boolean isOpenForWriteWithoutLock() { + return state == State.IN_SEGMENT || + state == State.BETWEEN_LOG_SEGMENTS; + } + /** * @return true if the log is open in write mode and has a segment open * ready to take edits. @@ -348,6 +361,16 @@ public class FSEditLog implements LogsPurgeable { } /** + * Return true the state is IN_SEGMENT. + * This method is not synchronized and must be used only for metrics. + * @return true if the log is open in write mode and has a segment open + * ready to take edits. + */ + boolean isSegmentOpenWithoutLock() { + return state == State.IN_SEGMENT; + } + + /** * @return true if the log is open in read mode. */ public synchronized boolean isOpenForRead() { @@ -522,7 +545,16 @@ public class FSEditLog implements LogsPurgeable { public synchronized long getLastWrittenTxId() { return txid; } - + + /** + * Return the transaction ID of the last transaction written to the log. + * This method is not synchronized and must be used only for metrics. + * @return The transaction ID of the last transaction written to the log + */ + long getLastWrittenTxIdWithoutLock() { + return txid; + } + /** * @return the first transaction ID in the current log segment */ @@ -531,7 +563,16 @@ public class FSEditLog implements LogsPurgeable { "Bad state: %s", state); return curSegmentTxId; } - + + /** + * Return the first transaction ID in the current log segment. + * This method is not synchronized and must be used only for metrics. + * @return The first transaction ID in the current log segment + */ + long getCurSegmentTxIdWithoutLock() { + return curSegmentTxId; + } + /** * Set the transaction ID to use for the next transaction written. */ @@ -1182,7 +1223,9 @@ public class FSEditLog implements LogsPurgeable { /** * Get all the journals this edit log is currently operating on. */ - synchronized List<JournalAndStream> getJournals() { + List<JournalAndStream> getJournals() { + // The list implementation is CopyOnWriteArrayList, + // so we don't need to synchronize this method. return journalSet.getAllJournalStreams(); } @@ -1190,7 +1233,7 @@ public class FSEditLog implements LogsPurgeable { * Used only by tests. */ @VisibleForTesting - synchronized public JournalSet getJournalSet() { + public JournalSet getJournalSet() { return journalSet; } @@ -1731,11 +1774,18 @@ public class FSEditLog implements LogsPurgeable { * Return total number of syncs happened on this edit log. * @return long - count */ - public synchronized long getTotalSyncCount() { - if (editLogStream != null) { - return editLogStream.getNumSync(); - } else { + public long getTotalSyncCount() { + // Avoid NPE as possible. + if (editLogStream == null) { return 0; } + long count = 0; + try { + count = editLogStream.getNumSync(); + } catch (NullPointerException ignore) { + // This method is used for metrics, so we don't synchronize it. + // Therefore NPE can happen even if there is a null check before. + } + return count; } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/e0fa4923/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java index d02a6ca..c40c626 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java @@ -924,7 +924,7 @@ public class FSImage implements Closeable { Canceler canceler) throws IOException { FSImageCompression compression = FSImageCompression.createCompression(conf); - long txid = getLastAppliedOrWrittenTxId(); + long txid = getCorrectLastAppliedOrWrittenTxId(); SaveNamespaceContext ctx = new SaveNamespaceContext(source, txid, canceler); FSImageFormat.Saver saver = new FSImageFormat.Saver(ctx); @@ -1019,7 +1019,7 @@ public class FSImage implements Closeable { final long checkpointTxId = image.getCheckpointTxId(); final long checkpointAge = Time.now() - imageFile.lastModified(); if (checkpointAge <= timeWindow * 1000 && - checkpointTxId >= this.getLastAppliedOrWrittenTxId() - txGap) { + checkpointTxId >= this.getCorrectLastAppliedOrWrittenTxId() - txGap) { return false; } } @@ -1046,7 +1046,7 @@ public class FSImage implements Closeable { if (editLogWasOpen) { editLog.endCurrentLogSegment(true); } - long imageTxId = getLastAppliedOrWrittenTxId(); + long imageTxId = getCorrectLastAppliedOrWrittenTxId(); if (!addToCheckpointing(imageTxId)) { throw new IOException( "FS image is being downloaded from another NN at txid " + imageTxId); @@ -1418,6 +1418,15 @@ public class FSImage implements Closeable { public long getLastAppliedOrWrittenTxId() { return Math.max(lastAppliedTxId, + editLog != null ? editLog.getLastWrittenTxIdWithoutLock() : 0); + } + + /** + * This method holds a lock of FSEditLog to get the correct value. + * This method must not be used for metrics. + */ + public long getCorrectLastAppliedOrWrittenTxId() { + return Math.max(lastAppliedTxId, editLog != null ? editLog.getLastWrittenTxId() : 0); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/e0fa4923/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index c78ef46..8a750a0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -3654,7 +3654,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, //create ha status final NNHAStatusHeartbeat haState = new NNHAStatusHeartbeat( haContext.getState().getServiceState(), - getFSImage().getLastAppliedOrWrittenTxId()); + getFSImage().getCorrectLastAppliedOrWrittenTxId()); return new HeartbeatResponse(cmds, haState, rollingUpgradeInfo, blockReportLeaseId); @@ -3779,7 +3779,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, public void run() { while (fsRunning && shouldRun) { try { - long numEdits = getTransactionsSinceLastLogRoll(); + long numEdits = getCorrectTransactionsSinceLastLogRoll(); if (numEdits > rollThreshold) { FSNamesystem.LOG.info("NameNode rolling its own edit log because" + " number of edits in open segment exceeds threshold of " @@ -3928,17 +3928,30 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, @Metric({"TransactionsSinceLastLogRoll", "Number of transactions since last edit log roll"}) public long getTransactionsSinceLastLogRoll() { + if (isInStandbyState() || !getEditLog().isSegmentOpenWithoutLock()) { + return 0; + } else { + return getEditLog().getLastWrittenTxIdWithoutLock() - + getEditLog().getCurSegmentTxIdWithoutLock() + 1; + } + } + + /** + * Get the correct number of transactions since last edit log roll. + * This method holds a lock of FSEditLog and must not be used for metrics. + */ + private long getCorrectTransactionsSinceLastLogRoll() { if (isInStandbyState() || !getEditLog().isSegmentOpen()) { return 0; } else { return getEditLog().getLastWrittenTxId() - - getEditLog().getCurSegmentTxId() + 1; + getEditLog().getCurSegmentTxId() + 1; } } - + @Metric({"LastWrittenTransactionId", "Transaction ID written to the edit log"}) public long getLastWrittenTransactionId() { - return getEditLog().getLastWrittenTxId(); + return getEditLog().getLastWrittenTxIdWithoutLock(); } @Metric({"LastCheckpointTime", @@ -5613,7 +5626,9 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, List<Map<String, String>> jasList = new ArrayList<Map<String, String>>(); FSEditLog log = getFSImage().getEditLog(); if (log != null) { - boolean openForWrite = log.isOpenForWrite(); + // This flag can be false because we cannot hold a lock of FSEditLog + // for metrics. + boolean openForWrite = log.isOpenForWriteWithoutLock(); for (JournalAndStream jas : log.getJournals()) { final Map<String, String> jasMap = new HashMap<String, String>(); String manager = jas.getManager().toString(); http://git-wip-us.apache.org/repos/asf/hadoop/blob/e0fa4923/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java index c55c4d1..0fc3e60 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java @@ -1215,7 +1215,7 @@ public class NameNodeRpcServer implements NamenodeProtocols { checkNNStartup(); namesystem.checkOperation(OperationCategory.UNCHECKED); namesystem.checkSuperuserPrivilege(); - return namesystem.getFSImage().getLastAppliedOrWrittenTxId(); + return namesystem.getFSImage().getCorrectLastAppliedOrWrittenTxId(); } @Override // NamenodeProtocol http://git-wip-us.apache.org/repos/asf/hadoop/blob/e0fa4923/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java index 099a6aa..569c6bf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java @@ -172,7 +172,7 @@ public class StandbyCheckpointer { FSImage img = namesystem.getFSImage(); long prevCheckpointTxId = img.getStorage().getMostRecentCheckpointTxId(); - long thisCheckpointTxId = img.getLastAppliedOrWrittenTxId(); + long thisCheckpointTxId = img.getCorrectLastAppliedOrWrittenTxId(); assert thisCheckpointTxId >= prevCheckpointTxId; if (thisCheckpointTxId == prevCheckpointTxId) { LOG.info("A checkpoint was triggered but the Standby Node has not " + @@ -315,7 +315,7 @@ public class StandbyCheckpointer { private long countUncheckpointedTxns() { FSImage img = namesystem.getFSImage(); - return img.getLastAppliedOrWrittenTxId() - + return img.getCorrectLastAppliedOrWrittenTxId() - img.getStorage().getMostRecentCheckpointTxId(); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/e0fa4923/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemMBean.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemMBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemMBean.java index 4687cfd..d311494 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemMBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemMBean.java @@ -162,6 +162,30 @@ public class TestFSNamesystemMBean { } } + // The test makes sure JMX request can be processed even if FSEditLog + // is synchronized. + @Test + public void testWithFSEditLogLock() throws Exception { + Configuration conf = new Configuration(); + MiniDFSCluster cluster = null; + try { + cluster = new MiniDFSCluster.Builder(conf).build(); + cluster.waitActive(); + synchronized (cluster.getNameNode().getFSImage().getEditLog()) { + MBeanClient client = new MBeanClient(); + client.start(); + client.join(20000); + assertTrue("JMX calls are blocked when FSEditLog" + + " is synchronized by another thread", client.succeeded); + client.interrupt(); + } + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + } + @Test(timeout = 120000) public void testFsEditLogMetrics() throws Exception { final Configuration conf = new Configuration(); --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org